Merge branch 'master' of github.com:PegasusWang/python_data_structures_and_algorithms

wangningning · wangningning · commit 03556620f06b · 2018-04-28T13:49:14.000+08:00
diff --git a/README.md b/README.md
@@ -59,8 +59,9 @@
 - 递归
 - 查找：线性查找和二分查找
 - 基本排序算法
-- 高级排序算法: 归并排序、堆排序、快排
+- 高级排序算法: 归并排序、快排
 - 树，二叉树
+- 堆与堆排序
 - 图，dfs 和 bfs
 - python 内置常用数据结构和算法的使用。list, dict, set, collections 模块，heapq 模块
 - 面试笔试常考算法
diff --git a/docs/12_基本排序算法/basic_sort.py b/docs/12_基本排序算法/basic_sort.py
@@ -17,8 +17,9 @@ def bubble_sort(seq):  # O(n^2), n(n-1)/2 = 1/2(n^2 + n)
 def test_bubble_sort():
     seq = list(range(10))  # 注意 python3 返回迭代器，所以我都用 list 强转了，python2 range 返回的就是 list
     random.shuffle(seq)   # shuffle inplace 操作，打乱数组
+    sorted_seq = sorted(seq)  # 注意呦，内置的 sorted 就不是 inplace 的，它返回一个新的数组，不影响传入的参数
     bubble_sort(seq)
-    assert seq == sorted(seq)  # 注意呦，内置的 sorted 就不是 inplace 的，它返回一个新的数组，不影响传入的参数
+    assert seq == sorted_seq
 
 
 def select_sort(seq):
@@ -35,8 +36,9 @@ def select_sort(seq):
 def test_select_sort():
     seq = list(range(10))
     random.shuffle(seq)
+    sorted_seq = sorted(seq)
     select_sort(seq)
-    assert seq == sorted(seq)
+    assert seq == sorted_seq
 
 
 def insertion_sort(seq):
diff --git a/docs/13_高级排序算法/advanced_sorting.md b/docs/13_高级排序算法/advanced_sorting.md
@@ -4,4 +4,7 @@
 
 - [分治法与归并排序](./merge_sort.md)
 - [快速排序](./quick_sort.md)
+
+在讲完二叉树之后，我们会看下它的应用：
+
 - [堆和堆排序](./heap_sort.md)
diff --git a/docs/13_高级排序算法/heap_sort.md b/docs/13_高级排序算法/heap_sort.md
diff --git a/docs/13_高级排序算法/merge_sort.py b/docs/13_高级排序算法/merge_sort.py
@@ -48,4 +48,3 @@ def test_merge_sort():
     seq = list(range(10))
     random.shuffle(seq)
     assert merge_sort(seq) == sorted(seq)
-
diff --git a/docs/13_高级排序算法/quick_sort.md b/docs/13_高级排序算法/quick_sort.md
@@ -0,0 +1,107 @@
+# 快速排序
+
+快速排序名字可不是盖的，很多程序语言标准库实现的内置排序都有它的身影，我们就直奔主题吧。
+和归并排序一样，快排也是一种分而治之的策略。归并排序把数组递归成只有单个元素的数组，之后再不断两两
+合并，最后得到一个有序数组。这里的递归基本条件就是只包含一个元素的数组，当数组只包含一个元素的时候，我们可以认为它本来就是有序的（当然空数组也不用排序）。
+
+快排的工作过程其实比较简单，三步走：
+
+- 选择基准值 pivot
+
+- 将数组分成两个子数组：小于基准值的元素和大于基准值的元素。这个过程称之为 partition
+
+- 对这两个子数组进行快速排序。
+
+根据这个想法我们可以快速写出快排的代码，简直就是在翻译上边的描述：
+
+```py
+def quicksort(array):
+    if len(array) < 2:   # 递归出口，空数组或者只有一个元素的数组都是有序的
+        return array
+    else:
+        pivot_index = 0    # 选择第一个元素作为主元 pivot
+        pivot = array[pivot_index]
+        less_part = [i for i in array[pivot_index+1:] if i <= pivot]
+        great_part = [i for i in array[pivot_index+1:] if i > pivot]
+        return quicksort(less_part) + [pivot] + quicksort(great_part)
+
+
+def test_quicksort():
+    import random
+    seq = list(range(10))
+    random.shuffle(seq)
+    assert quicksort(seq) == sorted(seq)
+```
+是不是很简单，下次面试官让你手写快排你再写不出来就有点过分啦。 当然这个实现有两个不好的地方:
+
+- 第一是它需要额外的存储空间，我们想实现 inplace 原地排序。
+- 第二是它的 partion 操作每次都要两次遍历整个数组，我们想改善一下。
+
+这里我们就来优化一下它，实现 inplace 排序并且改善下 partition 操作。新的代码大概如下：
+
+```py
+def quicksort_inplace(array, beg, end):    # 注意这里我们都用左闭右开区间，end 传入 len(array)
+    if beg < end:    # beg == end 的时候递归出口
+        pivot = partition(array, beg, end)
+        quicksort_inplace(array, beg, pivot)
+        quicksort_inplace(array, pivot+1, end)
+```
+
+能否实现只遍历一次数组就可以完成 partition 操作呢？实际上是可以的。我们设置首位俩个指针 left, right，两个指针不断向中间收拢。如果遇到 left 位置的元素大于 pivot 并且 right 指向的元素小于 pivot，我们就交换这俩元素，当 left > right 的时候推出就行了，这样实现了一次遍历就完成了 partition。如果你感觉懵逼，纸上画画就立马明白了。我们来撸代码实现：
+
+```py
+def partition(array, beg, end):
+    pivot_index = beg
+    pivot = array[pivot_index]
+    left = pivot_index + 1
+    right = end - 1    # 开区间，最后一个元素位置是 end-1     [0, end-1] or [0: end)，括号表示开区间
+
+    while True:
+        # 从左边找到比 pivot 大的
+        while left <= right and array[left] < pivot:
+            left += 1
+
+        while right >= left and array[right] >= pivot:
+            right -= 1
+
+        if left > right:
+            break
+        else:
+            array[left], array[right] = array[right], array[left]
+
+    array[pivot_index], array[right] = array[right], array[pivot_index]
+    return right   # 新的 pivot 位置
+
+
+def test_partition():
+    l = [4, 1, 2, 8]
+    assert partition(l, 0, len(l)) == 2
+    l = [1, 2, 3, 4]
+    assert partition(l, 0, len(l)) == 0
+    l = [4, 3, 2, 1]
+    assert partition(l, 0, len(l))
+```
+
+大功告成，新的快排就实现好了。
+
+# 时间复杂度
+在比较理想的情况下，比如数组每次都被 pivot 均分，我们可以得到递归式：
+
+T(n) = 2T(n/2) + n
+
+上一节我们讲过通过递归树得到它的时间复杂度是 O(nlog(n))。即便是很坏的情况，比如 pivot 每次都把数组按照 1:9 划分，依然是 O(nlog(n))，感兴趣请阅读算法导论相关章节。
+
+![](quicksort_worst.png)
+
+
+# 思考题
+- 请你补充 quicksort_inplace 的单元测试
+- 最坏的情况下快排的时间复杂度是多少？什么时候会发生这种情况？
+- 我们实现的快排是稳定的啵？
+- 选择基准值如果选不好就可能导致复杂度升高，算导中提到了一种『median of 3』策略，就是说选择 pivot 的时候
+从子数组中随机选三个元素，再取它的中位数，你能实现这个想法吗？这里我们的代码很简单地取了第一个元素作为 pivot
+- 利用快排中的 partition 操作，我们还能实现另一个算法，nth_element，快速查找一个无序数组中的第 k 大元素，请你尝试实现它并编写单测
+
+
+# 延伸阅读
+- 《算法导论》第 7 章
diff --git a/docs/13_高级排序算法/quicksort.py b/docs/13_高级排序算法/quicksort.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+
+def quicksort(array):
+    if len(array) < 2:   # 递归出口，空数组或者只有一个元素的数组都是有序的
+        return array
+    else:
+        pivot_index = 0    # 选择第一个元素作为主元 pivot
+        pivot = array[pivot_index]
+        less_part = [i for i in array[pivot_index+1:] if i <= pivot]
+        great_part = [i for i in array[pivot_index+1:] if i > pivot]
+        return quicksort(less_part) + [pivot] + quicksort(great_part)
+
+
+def test_quicksort():
+    import random
+    seq = list(range(10))
+    random.shuffle(seq)
+    assert quicksort(seq) == sorted(seq)    # 用内置的sorted 『对拍』
+
+
+def quicksort_inplace(array, beg, end):    # 注意这里我们都用左闭右开区间
+    if beg < end:    # beg == end 的时候递归出口
+        pivot = partition(array, beg, end)
+        quicksort_inplace(array, beg, pivot)
+        quicksort_inplace(array, pivot+1, end)
+
+
+def partition(array, beg, end):
+    pivot_index = beg
+    pivot = array[pivot_index]
+    left = pivot_index + 1
+    right = end - 1    # 开区间，最后一个元素位置是 end-1     [0, end-1] or [0: end)，括号表示开区间
+
+    while True:
+        # 从左边找到比 pivot 大的
+        while left <= right and array[left] < pivot:
+            left += 1
+
+        while right >= left and array[right] >= pivot:
+            right -= 1
+
+        if left > right:
+            break
+        else:
+            array[left], array[right] = array[right], array[left]
+
+    array[pivot_index], array[right] = array[right], array[pivot_index]
+    return right   # 新的 pivot 位置
+
+
+def test_partition():
+    l = [4, 1, 2, 8]
+    assert partition(l, 0, len(l)) == 2
+    l = [1, 2, 3, 4]
+    assert partition(l, 0, len(l)) == 0
+    l = [4, 3, 2, 1]
+    assert partition(l, 0, len(l))
+
+
+def test_quicksort_inplace():
+    import random
+    seq = list(range(10))
+    random.shuffle(seq)
+    sorted_seq = sorted(seq)
+    quicksort_inplace(seq, 0, len(seq))
+    assert seq == sorted_seq
diff --git a/docs/13_高级排序算法/quicksort_worst.png b/docs/13_高级排序算法/quicksort_worst.png
diff --git a/docs/index.md b/docs/index.md
@@ -59,8 +59,9 @@
 - 递归
 - 查找：线性查找和二分查找
 - 基本排序算法
-- 高级排序算法: 归并排序、堆排序、快排
+- 高级排序算法: 归并排序、快排
 - 树，二叉树
+- 堆与堆排序
 - 图，dfs 和 bfs
 - python 内置常用数据结构和算法的使用。list, dict, set, collections 模块，heapq 模块
 - 面试笔试常考算法
@@ -173,9 +174,12 @@ Python 抽象程度比较高， 我们能用更少的代码来实现功能，同
 
 安装依赖：
 ```sh
-pip install mkdocs    # 制作电子书
+pip install mkdocs    # 制作电子书, http://markdown-docs-zh.readthedocs.io/zh_CN/latest/
 # https://stackoverflow.com/questions/27882261/mkdocs-and-mathjax/31874157
 pip install https://github.com/mitya57/python-markdown-math/archive/master.zip
+
+# 或者直接
+pip install -r requirements.txt
 ```
 
 编写并查看：
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -26,4 +26,3 @@ pages:
       - 高级排序算法: '13_高级排序算法/advanced_sorting.md'
       - 分治法与归并排序: '13_高级排序算法/merge_sort.md'
       - 快速排序: '13_高级排序算法/quick_sort.md'
-      - 堆与堆排序: '13_高级排序算法/heap_sort.md'