Algorithms and data structure in Python

复杂度

计算执行语句的次数,取分量最大的

常见复杂度排序

Ο(1)<Ο(log2n)<Ο(n)<Ο(nlog2n)<Ο(n2)<…<Ο(2n)

P类问题:多项式时间复杂度的算法 ,NP类问题:指数时间复杂度的算法

捕获函数执行时间,timeit模块

数据结构

线性数据结构:诸如栈,队列,双端队列, 列表

Stacks

特性:LIFO -- last in first out 比如浏览器的返回按钮

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
class Stack:
def __init__(self):
self.items = []

def isEmpty(self):
return self.items == []

def push(self, item):
self.items.append(item)

def pop(self):
return self.items.pop()

def peek(self):
return self.items[len(self.items)-1]

def size(self):
return len(self.items)

例子:符号匹配进制转换

Queues

特性:FIFO 比如操作系统使用不同的队列控制进程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class Queue:
def __init__(self):
self.items = []

def isEmpty(self):
return self.items == []

def enqueue(self, item):
self.items.insert(0,item)

def dequeue(self):
return self.items.pop()

def size(self):
return len(self.items)

例子:约瑟夫问题

Deques

特性:提供栈和队列的所有能力

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class Deque:
def __init__(self):
self.items = []

def isEmpty(self):
return self.items == []

def addFront(self, item):
self.items.append(item)

def addRear(self, item):
self.items.insert(0,item)

def removeFront(self):
return self.items.pop()

def removeRear(self):
return self.items.pop(0)

def size(self):
return len(self.items)

例子:回文检查

Lists

无序列表 – 链表

特性:保持项的相对定位

注意:

必须明确链表的第一项的位置
外部引用通常称为链表的头
最后一项需要知道没有下一项

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class Node:  #链表实现的基本构造是节点,包括列表项本身以及下一个节点的引用
def __init__(self,initdata):
self.data = initdata
self.next = None #接地节点

def getData(self):
return self.data

def getNext(self):
return self.next

def setData(self,newdata):
self.data = newdata

def setNext(self,newnext):
self.next = newnext

class UnorderedList:

def __init__(self):
self.head = None #链表的头不包含任何节点对象,只包含对第一个节点的单个引用

def isEmpty(self):
return self.head == None

def add(self,item): #新项放在链表的头部
temp = Node(item)
temp.setNext(self.head)
self.head = temp

def size(self): #链表遍历
current = self.head #外部引用
count = 0
while current != None:
count = count + 1
current = current.getNext() #链表的头等于第一个节点
return count

def search(self,item):
current = self.head
found = False
while current != None and not found:
if current.getData() == item:
found = True
else:
current = current.getNext()
return found

def remove(self,item):
current = self.head #两个外部引用
previous = None #总是传递current后面一个节点
found = False
while not found:
if current.getData() == item:
found = True
else:
previous = current
current = current.getNext() #英寸蠕动

if previous == None: #如果删除的项目是链表的第一项
self.head = current.getNext() #链表头设第二项
else:
previous.setNext(current.getNext()) #删除current项
有序列表
Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class OrderedList:
def __init__(self):
self.head = None

def search(self,item):
current = self.head
found = False
stop = False
while current != None and not found and not stop:
if current.getData() == item:
found = True
else:
if current.getData() > item: #大于停止
stop = True
else:
current = current.getNext()
return found

def add(self,item):
current = self.head
previous = None
stop = False
while current != None and not stop:
if current.getData() > item:
stop = True
else:
previous = current
current = current.getNext()

temp = Node(item)
if previous == None:
temp.setNext(self.head)
self.head = temp
else:
temp.setNext(current) #插队
previous.setNext(temp)

递归

递归是一种解决问题的方法,将问题分解为更小的子问题,直到得到一个足够小的问题可以被很简单的解决
三大定律

1.递归算法必须具有基本情况
2.递归算法必须改变其状态并向基本情况靠近
3.递归算法必须以递归方式调用自身

1
2
3
4
5
def listsum(numList):
if len(numList) == 1: #基本情况,算法停止递归的条件
return numList[0]
else:
return numlist[0] + listsum(numList[1:]) #向基本情况靠近,缩短列表,调用自身

排序和搜索

顺序查找 O(n)

1
2
3
4
5
6
7
8
9
10
11
def sequentialSearch(alist, item):
pos = 0
found = False

while pos < len(alist) and not found:
if alist[pos] == item:
found = True
else:
pos = pos+1

return found

二分查找 O(log2n)

用于有序列表

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def binarySearch(alist, item):
first = 0
last = len(alist)-1
found = False

while first<=last and not found:
midpoint = (first + last)//2
if alist[midpoint] == item:
found = True
else:
if item < alist[midpoint]:
last = midpoint-1
else:
first = midpoint+1

return found

#递归版本
def binarySearch(alist, item):
if len(alist) == 0:
return False
else:
midpoint = len(alist)//2
if alist[midpoint]==item:
return True
else:
if item<alist[midpoint]:
return binarySearch(alist[:midpoint],item)
else:
return binarySearch(alist[midpoint+1:],item)

Hash查找 O(1)

哈希表 槽+数值 槽取素数
散列函数(余数法) 扩展:分组求和法 平方取中法
负载因子:λ=项数/表大小

冲突解决
线性探测的开放寻址技术,缺点是聚集的趋势
解决聚集,扩展线性探测技术--使用常量跳过值
二次探测,使用rehash函数
允许每个槽保持对项的引用,使用链接

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class HashTable:
def __init__(self):
self.size = 11
self.slots = [None] * self.size
self.data = [None] * self.size

def put(self,key,data):
hashvalue = self.hashfunction(key,len(self.slots))

if self.slots[hashvalue] == None:
self.slots[hashvalue] = key
self.data[hashvalue] = data
else:
if self.slots[hashvalue] == key:
self.data[hashvalue] = data #replace
else:
nextslot = self.rehash(hashvalue,len(self.slots))
while self.slots[nextslot] != None and \
self.slots[nextslot] != key:
nextslot = self.rehash(nextslot,len(self.slots))

if self.slots[nextslot] == None:
self.slots[nextslot]=key
self.data[nextslot]=data
else:
self.data[nextslot] = data #replace

def hashfunction(self,key,size):
return key%size

def rehash(self,oldhash,size):
return (oldhash+1)%size

def get(self,key):
startslot = self.hashfunction(key,len(self.slots))

data = None
stop = False
found = False
position = startslot
while self.slots[position] != None and \
not found and not stop:
if self.slots[position] == key:
found = True
data = self.data[position]
else:
position=self.rehash(position,len(self.slots))
if position == startslot:
stop = True
return data

def __getitem__(self,key):
return self.get(key)

def __setitem__(self,key,data):
self.put(key,data)

冒泡排序 O(n2)

1
2
3
4
5
6
7
def bubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]>alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp

选择排序 O(n2)

1
2
3
4
5
6
7
8
9
10
def selectionSort(alist):
for fillslot in range(len(alist)-1,0,-1):
positionOfMax=0
for location in range(1,fillslot+1):
if alist[location]>alist[positionOfMax]:
positionOfMax = location

temp = alist[fillslot]
alist[fillslot] = alist[positionOfMax]
alist[positionOfMax] = temp

插入排序 O(n2)

1
2
3
4
5
6
7
8
9
10
11
def insertionSort(alist):
for index in range(1,len(alist)):

currentvalue = alist[index]
position = index

while position>0 and alist[position-1]>currentvalue:
alist[position]=alist[position-1]
position = position-1

alist[position]=currentvalue

归并排序 Ο(nlog2n)

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def mergeSort(alist):
if len(alist)>1:
mid = len(alist)//2
lefthalf = alist[:mid]
righthalf = alist[mid:]

mergeSort(lefthalf)
mergeSort(righthalf)

i=0
j=0
k=0
while i < len(lefthalf) and j < len(righthalf):
if lefthalf[i] < righthalf[j]:
alist[k]=lefthalf[i]
i=i+1
else:
alist[k]=righthalf[j]
j=j+1
k=k+1

while i < len(lefthalf):
alist[k]=lefthalf[i]
i=i+1
k=k+1

while j < len(righthalf):
alist[k]=righthalf[j]
j=j+1
k=k+1

快速排序 Ο(nlog2n)

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def quickSort(alist):
quickSortHelper(alist,0,len(alist)-1)

def quickSortHelper(alist,first,last):
if first<last:

splitpoint = partition(alist,first,last)

quickSortHelper(alist,first,splitpoint-1)
quickSortHelper(alist,splitpoint+1,last)

def partition(alist,first,last):
pivotvalue = alist[first]

leftmark = first+1
rightmark = last

done = False
while not done:

while leftmark <= rightmark and alist[leftmark] <= pivotvalue:
leftmark = leftmark + 1

while alist[rightmark] >= pivotvalue and rightmark >= leftmark:
rightmark = rightmark -1

if rightmark < leftmark:
done = True
else:
temp = alist[leftmark]
alist[leftmark] = alist[rightmark]
alist[rightmark] = temp

temp = alist[first]
alist[first] = alist[rightmark]
alist[rightmark] = temp

return rightmark

如文件系统,网页
叶节点:没有子节点的节点
层数:从根结点到该结点所经过的分支数目
高度:树中任何节点的最大层数
二叉树:树中的每个节点最多有两个子节点
二叉堆:具有堆的性质(父节点的键值总是大于或等于(<=)任一子节点的键值),又有二叉树的性质
二叉搜索树:左子树的键值小于父节点,并且右子树大于父代(bst)

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class BinaryTree:
def __init__(self,rootObj):
self.key = rootObj
self.leftChild = None
self.rightChild = None

def insertLeft(self,newNode):
if self.leftChild == None:
self.leftChild = BinaryTree(newNode)
else:
t = BinaryTree(newNode)
t.leftChild = self.leftChild
self.leftChild = t

def insertRight(self,newNode):
pass #同上

def getRightChild(self):
return self.rightChild

def getLeftChild(self):
return self.leftChild

def setRootVal(self,obj):
self.key = obj

def getRootVal(self):
return self.key

树的遍历

前序

1
2
3
4
5
6
7
8
9
10
11
12
13
def preorder(tree):
if tree:
print(tree.getRootVal())
preorder(tree.getLeftChild())
preorder(tree.getRightChild())

# 作为BinaryTree类的方法
def preorder(self):
print(self.key)
if self.leftChild:
self.leftChild.preorder()
if self.rightChild:
self.rightChild.preorder()

中序

1
2
3
4
5
def inorder(tree):
if tree != None:
inorder(tree.getLeftChild())
print(tree.getRootVal())
inorder(tree.getRightChild())

后序

1
2
3
4
5
def postorder(tree):
if tree != None:
postorder(tree.getLeftChild())
postorder(tree.getRightChild())
print(tree.getRootVal())

基于二叉堆的优先队列

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class BinHeap:
def __init__(self):
self.heapList = [0] #默认第一项为零,用于整数除法
self.currentSize = 0

# 插入
def percUp(self,i):
while i // 2 > 0:
if self.heapList[i] < self.heapList[i // 2]:
tmp = self.heapList[i // 2]
self.heapList[i // 2] = self.heapList[i]
self.heapList[i] = tmp
i = i // 2

def insert(self,k):
self.heapList.append(k) #在末尾加,然后不停的比较父节点,调换位置
self.currentSize = self.currentSize + 1
self.percUp(self.currentSize)

# 删除最小
def percDown(self,i): #从根节点一直向下,恢复堆顺序属性
while (i * 2) <= self.currentSize:
mc = self.minChild(i)
if self.heapList[i] > self.heapList[mc]:
tmp = self.heapList[i]
self.heapList[i] = self.heapList[mc]
self.heapList[mc] = tmp
i = mc

def minChild(self,i): #取两个子节点小的
if i * 2 + 1 > self.currentSize:
return i * 2
else:
if self.heapList[i*2] < self.heapList[i*2+1]:
return i * 2
else:
return i * 2 + 1

def delMin(self): #删除第一项,把最后一项放在第一项,保持堆结构属性
retval = self.heapList[1]
self.heapList[1] = self.heapList[self.currentSize]
self.currentSize = self.currentSize - 1
self.heapList.pop() #删除最后一项
self.percDown(1)
return retval

#列表建堆
def buildHeap(self,alist):
i = len(alist) // 2
self.currentSize = len(alist)
self.heapList = [0] + alist[:]
while (i > 0):
self.percDown(i)
i = i - 1

二叉搜索树

Python实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
class TreeNode:  #树节点类
def __init__(self,key,val,left=None,right=None,parent=None): #可选参数
self.key = key
self.payload = val
self.leftChild = left
self.rightChild = right
self.parent = parent

def hasLeftChild(self):
return self.leftChild

def hasRightChild(self):
return self.rightChild

def isLeftChild(self):
return self.parent and self.parent.leftChild == self

def isRightChild(self):
return self.parent and self.parent.rightChild == self

def isRoot(self):
return not self.parent

def isLeaf(self):
return not (self.rightChild or self.leftChild)

def hasAnyChildren(self):
return self.rightChild or self.leftChild

def hasBothChildren(self):
return self.rightChild and self.leftChild

def replaceNodeData(self,key,value,lc,rc):
self.key = key
self.payload = value
self.leftChild = lc
self.rightChild = rc
if self.hasLeftChild():
self.leftChild.parent = self
if self.hasRightChild():
self.rightChild.parent = self

def __iter__(self): #中序遍历的生成函数
if self:
if self.hasLeftChild():
for elem in self.leftChiLd:
yield elem
yield self.key
if self.hasRightChild():
for elem in self.rightChild:
yield elem

class BinarySearchTree: #作为二叉搜索树的根的节点的引用的类

def __init__(self):
self.root = None
self.size = 0

def length(self):
return self.size

def __len__(self):
return self.size

# 插入
def put(self,key,val):
if self.root:
self._put(key,val,self.root)
else:
self.root = TreeNode(key,val)
self.size = self.size + 1

def _put(self,key,val,currentNode): #私有递归辅助函数
if key < currentNode.key:
if currentNode.hasLeftChild(): #有左节点,不停的递归
self._put(key,val,currentNode.leftChild)
else: #直到找到可以建立新节点的位置
currentNode.leftChild = TreeNode(key,val,parent=currentNode)
else:
if currentNode.hasRightChild():
self._put(key,val,currentNode.rightChild)
else:
currentNode.rightChild = TreeNode(key,val,parent=currentNode)

def __setitem__(self,k,v): #重载赋值[]
self.put(k,v)

# 获取
def get(self,key):
if self.root:
res = self._get(key,self.root)
if res:
return res.payload
else:
return None
else:
return None

def _get(self,key,currentNode):
if not currentNode:
return None
elif currentNode.key == key:
return currentNode
elif key < currentNode.key:
return self._get(key,currentNode.leftChild)
else:
return self._get(key,currentNode.rightChild)

def __getitem__(self,key): #重载赋值[]
return self.get(key)

def __contains__(self,key): #实现in操作
if self._get(key,self.root):
return True
else:
return False

# 删除
def delete(self,key):
if self.size > 1:
nodeToRemove = self._get(key,self.root)
if nodeToRemove:
self.remove(nodeToRemove) #调用
self.size = self.size-1
else:
raise KeyError('Error, key not in tree')
elif self.size == 1 and self.root.key == key:
self.root = None
self.size = self.size - 1
else:
raise KeyError('Error, key not in tree')

def __delitem__(self,key):
self.delete(key)

def spliceOut(self): #删除后继节点
if self.isLeaf():
if self.isLeftChild():
self.parent.leftChild = None
else:
self.parent.rightChild = None
elif self.hasAnyChildren():
if self.hasLeftChild():
if self.isLeftChild():
self.parent.leftChild = self.leftChild
else:
self.parent.rightChild = self.leftChild
self.leftChild.parent = self.parent
else:
if self.isLeftChild():
self.parent.leftChild = self.rightChild
else:
self.parent.rightChild = self.rightChild
self.rightChild.parent = self.parent

def findSuccessor(self): #找后继节点
succ = None
if self.hasRightChild(): #如果节点有右子节点,则后继节点是右子树中的最小的键
succ = self.rightChild.findMin()
else:
if self.parent:
if self.isLeftChild(): #如果节点没有右子节点并且是父节点的左子节点,则父节点是后继节点
succ = self.parent
else: #如果节点是其父节点的右子节点,并且它本身没有右子节点,则此节点的后继节点是其父节点的后继节点,不包括此节点
self.parent.rightChild = None
succ = self.parent.findSuccessor()
self.parent.rightChild = self
return succ

def findMin(self): #找最小值,树的最左边
current = self
while current.hasLeftChild():
current = current.leftChild
return current

def remove(self,currentNode): #3种情况
if currentNode.isLeaf(): #没有子节点,删除节点并删除对父节点中该节点的引用
if currentNode == currentNode.parent.leftChild:
currentNode.parent.leftChild = None
else:
currentNode.parent.rightChild = None
elif currentNode.hasBothChildren(): #两个节点
succ = currentNode.findSuccessor()
succ.spliceOut()
currentNode.key = succ.key
currentNode.payload = succ.payload

else: #只有一个节点,子节点取代其父,6种情况
if currentNode.hasLeftChild():
if currentNode.isLeftChild(): #当前节点是左节点
currentNode.leftChild.parent = currentNode.parent
currentNode.parent.leftChild = currentNode.leftChild
elif currentNode.isRightChild(): #右节点
currentNode.leftChild.parent = currentNode.parent
currentNode.parent.rightChild = currentNode.leftChild
else: #根节点
currentNode.replaceNodeData(currentNode.leftChild.key,
currentNode.leftChild.payload,
currentNode.leftChild.leftChild,
currentNode.leftChild.rightChild)
else:
if currentNode.isLeftChild():
currentNode.rightChild.parent = currentNode.parent
currentNode.parent.leftChild = currentNode.rightChild
elif currentNode.isRightChild():
currentNode.rightChild.parent = currentNode.parent
currentNode.parent.rightChild = currentNode.rightChild
else:
currentNode.replaceNodeData(currentNode.rightChild.key,
currentNode.rightChild.payload,
currentNode.rightChild.leftChild,
currentNode.rightChild.rightChild)
----------本文完,感谢您的阅读----------