数据结构( Pyhon 语言描述 ) — —第11章:集和字典
- 使用集
- 集是没有特定顺序的项的一个集合,集中的项中唯一的
- 集上可以执行的操作
- 返回集中项的数目
- 测试集是否为空
- 向集中添加一项
- 从集中删除一项
- 测试给定的项是否在集中
- 获取两个集的并集
- 获取两个集的交集
- 获取两个集的差集
- 判断一个集是否是另一个集的子集
- 集上的差集和子集操作是不对称的
- Python 中的 set 类
- 使用示例
-
>>> A = set([0,1,2])
>>> B = set()
>>> 1 in A
True
>>> A & B
set()
>>> B.add(1)
>>> B.add(1)
>>> B.add(5)
>>> B
{1, 5}
>>> A & B
{1}
>>> A | B
{0, 1, 2, 5}
>>> A - B
{0, 2}
>>> B.remove(5)
>>> B
{1}
>>> B.issubset(A)
True
>>> for item in A:
print(item, end=" ")
0 1 2
>>>
- 集和包的区别
- 包也是一个无序的集合,但是包可以包含相同项的多个实例。因此,交集,差集和子集等操作并不适用于包
- 集和字典的关系
- 字典(dictionary)是元素的一个无序集合,其中的元素叫做条目(entry)。
- 每个条目由一个键和相关的值组成
- 添加、修改、删除条目的操作,都是通过键来定位的
- 字典中的键必须是唯一的,但是字典中的值可以是重复的
- 集的实现
- 可以使用数组或链表结构来包含集中的数据项
- 只要在结构中找到了要删除的项,链表结构就支持常数时间的删除
- 但是在集中添加项或者删除项往往需要线性时间的搜索
- 哈希
- 试图以近似随机的方式访问一个数组
- 基于数组和链表的实现
- 集实际上是包含了唯一的数据项和一些额外方法的包。集的最简单实现是将包子类化
- 类图
- AbstractSet类
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: abstractset.py
"""
class AbstractSet(object):
"""Generic set method implementations."""
def __or__(self, other):
"""Return the union of self and other."""
return self + other
def __and__(self, other):
"""Return the intersection of self and other."""
intersection = type(self)()
for item in self:
if item in other:
intersection.add(item)
return intersection
def __sub__(self, other):
"""Return the difference of self and other."""
difference = type(self)()
for item in self:
if item not in other:
difference.add(item)
return difference
def issubset(self, other):
"""Returns True if self is a subset of other or False otherwise."""
for item in self:
if item not in other:
return False
return True
- ArraySet类
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: arrayset.py
"""
from arraybag import ArrayBag
from abstractset import AbstractSet
class ArraySet(ArrayBag, AbstractSet):
"""An array-based implementation of a set"""
def __init__(self, sourceCollection=None):
"""Sets the initial state of self, which includes the contents
of sourceCollection, if it's present."""
ArrayBag.__init__(self, sourceCollection)
def add(self, item):
"""Add item to the set if it is not in the set."""
if item not in self:
ArrayBag.add(self, item)
- LinkedSet类
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File name: linkedset.py
"""
from linkedbag import LinkedBag
from abstractset import AbstractSet
class LinkedSet(AbstractSet, LinkedBag):
"""An linked-based set implementation"""
# Constructor method
def __init__(self, sourceCollection = None):
"""Set the initial state of self, which includes the contents of
sourceCollection, if it's present."""
LinkedBag.__init__(self, sourceCollection)
# Mutator method
def add(self, item):
"""Adds item to self, avoid deplicate item."""
# Check array memory here and increase it if necessary.
if item not in self:
LinkedBag.add(self, item)
- testset函数
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: testset.py
A simple test program
"""
from arrayset import ArraySet
from linkedset import LinkedSet
def testSet(setType):
print("Add [0, 1, 2] to A.")
A = setType([0, 1, 2])
B = setType()
print("1 in A, Expect True: ", end="")
print(1 in A)
print("A & B, expect '{}':", A & B)
print("Adding 1 1 5 to B")
B.add(1)
B.add(1)
B.add(5)
print("B: ", B)
print("A & B, expect '{1}': ", A & B)
print("A | B: ", A | B)
print("A - B: ", A - B)
print("Remove 5 from B")
B.remove(5)
print("Expect '{1}': ", B)
print("B is s subset of A?, expect True:", B.issubset(A))
print("Overview of A:", end="")
for item in A:
print(item, end=" ")
# testSet(ArraySet)
testSet(LinkedSet)
- 使用字典
- 基于数组和基于链表的算法的实现
- Item 类
- 包含键值对,而且包含一些比较方法,允许测试两个条目的相等性以及在有序的字典中对它们排序
- 代码包含在 abstractdict.py 文件中
- AbstractDict 类
- 需要重写 AbstractCollection中的相关方法,比如 __str__、__add__ 和 __eq__,以支持字典的相关行为,一大部分原因是由于 dict 中没有定义 add 方法。
- AbstractDict 不能直接使用 AbstractCollection 的 __init__ 方法,同样是由于没有定义 add 方法。sourceCollection 的处理需要在 AbstractDict 进行
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
""""
File: abstractdict.py
"""
from abstractcollection import AbstractCollection
class Item(object):
"""Represents a dictionary item.
Supports comparision by key."""
def __init__(self, key, value):
self.key = key
self.value = value
def __str__(self):
return str(self.key) + ":" + str(self.value)
def __eq__(self, other):
if type(self) != type(other):
return False
return self.key == other.key
def __lt__(self, other):
if type(self) != type(other):
return False
return self.key < other.key
def __le__(self, other):
if type(self) != type(other):
return False
return self.key <= other.key
class AbstractDict(AbstractCollection):
"""Common data and method implementations for dictionaries."""
def __init__(self, sourceCollection=None):
"""Will copy item to collection from sourceCollection if it's present."""
AbstractCollection.__init__(self)
if sourceCollection:
for key, value in sourceCollection:
self[key] = value
def __str__(self):
return "{" + ", ".join(map(str, self.items())) + "}"
def __add__(self, other):
"""Return a new dictionary containing the contents of self and other.
if self.key == other.key, use the other.value"""
result = type(self)(map(lambda item: (item.key, item.value), self.items()))
for key in other:
result[key] = other[key]
return result
def __eq__(self, other):
"""Return True if self equals other, or False otherwise."""
if self is other:
return True
if type(self) != type(other) or len(self) != len(other):
return False
for key in self:
if key not in other:
return False
else:
if self[key] != other[key]:
return False
return True
def keys(self):
"""Returns an iterator on keys in the dictionary."""
return iter(self)
def values(self):
"""Return an iterator on values in the dictionary."""
return map(lambda key: self[key], self)
def items(self):
"""Returns an iterator on the items in the dictionary."""
return map(lambda key: Item(key, self[key]), self)
- ArrayDict 类
- 跟书中不同,不再使用 _index 辅助函数,而是将其整合到 __contains__ 中
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
from abstractdict import AbstractDict, Item
from arraylist import ArrayList
class ArrayDict(AbstractDict):
"""Represents an array-list based dictionary."""
def __init__(self, sourceCollection=None):
"""Will copy items to collection from sourceCollection if it's present."""
self._items = ArrayList()
self._index = -1
AbstractDict.__init__(self, sourceCollection)
# Accessor
def __iter__(self):
"""Serves up the key in the dictionary."""
cursor = 0
while cursor < len(self):
yield self._items[cursor].key
cursor += 1
def __getitem__(self, key):
"""Precondition: the key is in the dictionary
Raise KeyError if the key is not in the dictionary
Return the value associated with the key.
"""
if key not in self:
raise KeyError("Missing: " + str(key))
return self._items[self._index].value
def __contains__(self, item):
"""Set the self._index to the target position if item is in self."""
self._index = 0
for entry in self._items:
if entry.key == item:
return True
self._index += 1
self._index = -1
return False
# Mutator
def __setitem__(self, key, value):
"""If the key is not in the dictionary, adds the key and value to it,
otherwise, replace the old value with the new one."""
if key not in self:
self._items.insert(len(self), Item(key, value))
self._size += 1
else:
self._items[self._index].value = value
def pop(self, key):
"""Precondition: the key is in the dictionary.
Raise: KeyError if the key is not in the dictionary.
Remove the key and return the associated value if the key is in the dictionary."""
if key not in self:
raise KeyError("Missing: " + str(key))
self._size -= 1
return self._items.pop(self._index).value
- LinkedDict 类
- 跟 ArrayDict 类一样,在 __contains__ 中包含对目标节点的定位
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
from abstractdict import AbstractDict, Item
from node import Node
class LinkedDict(AbstractDict):
"""Represents an linked based dictionary."""
def __init__(self, sourceCollection=None):
"""Will copy items to collection from sourceCollection if it's present."""
self._head = None
self._priorNode = self._foundNode = None
AbstractDict.__init__(self, sourceCollection)
# Accessor
def __iter__(self):
"""Serves up the key in the dictionary."""
probe = self._head
while probe is not None:
yield probe.data.key
probe = probe.next
def __contains__(self, key):
self._priorNode = None
self._foundNode = self._head
while self._foundNode is not None:
if self._foundNode.data.key == key:
return True
self._priorNode = self._foundNode
self._foundNode = self._foundNode.next
return False
def __getitem__(self, key):
"""Precondition: the key is in the dictionary
Raise KeyError if the key is not in the dictionary
Return the value associated with the key.
"""
if key not in self:
raise KeyError("Missing: " + str(key))
return self._foundNode.data.value
# Mutator
def __setitem__(self, key, value):
"""If the key is not in the dictionary, adds the key and value to it,
otherwise, replace the old value with the new one."""
if key not in self:
newNode = Node(Item(key, value), self._head)
self._head = newNode
self._size += 1
else:
self._foundNode.data.value = value
def pop(self, key):
"""Precondition: the key is in the dictionary.
Raise: KeyError if the key is not in the dictionary.
Remove the key and return the associated value if the key is in the dictionary."""
if key not in self:
raise KeyError("Missing: " + str(key))
self._size -= 1
if self._priorNode is None:
self._head = self._head.next
else:
self._priorNode.next = self._foundNode.next
return self._foundNode.data.value
- testDict函数
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: testdict.py
"""
from arraydict import ArrayDict
from linkeddict import LinkedDict
def test(dictType):
print("Adding [('Ming', 'AC1644'), ('Qing', 'AC1911'), ('Tang', 'AC960')] to dict A")
A = dictType([('Ming', 'AC1644'), ('Qing', 'AC1911'), ('Tang', 'AC960')])
print("Print A: ", str(A))
print("\nSet B = A")
B = dictType()
for key in A:
B[key] = A[key]
print("Print B: ", str(B))
print("B == A:, expect True: ", B == A)
print("\nGet Ming's value:", A["Ming"])
print("\nAdding ('Qin', 'BC221') to A")
A['Qin'] = 'BC221'
print("Print A: ", str(A))
print("B == A, expect False:", B == A)
print("\nSet B == A")
B = dictType()
for key in A:
B[key] = A[key]
print("Print B: ", str(B))
print("\nReplace 'Tang' with the value AC907.")
A['Tang'] = 'AC907'
print("Print A: ", str(A))
print("B == A, expect False:", B == A)
print("\nRemove Qing.")
print(A.pop("Qing"))
print("Print A: ", str(A))
print("Remove Qin.")
print(A.pop("Qin"))
print("Print A: ", str(A))
print("\nTest the iterator of keys.")
for key in A:
print(key, end="->")
print("\b\b")
for key in A.keys():
print(key, end="->")
print("\b\b")
print("\n Test the iterator of values.")
for value in A.values():
print(value, end="->")
print("\b\b")
print("\n Test the iterator of items.")
for item in A.items():
print(item, end="->")
print("\b\b")
print("\nThe length of dictionary A: ", len(A))
print("\nTest __getitem__ precondition.")
try:
print(A["Song"])
except KeyError as err:
print("KeyError: " + str(err))
print("\nTest pop precondition.")
try:
print(A.pop("Yuan"))
except KeyError as err:
print("KeyError: " + str(err))
# test(ArrayDict)
test(LinkedDict)
- 哈希策略
- 访问集合中的项最快的方法,是通过数组和基于数组的列表所支持的随机访问。假设集或字典的底层数据结构是一个数组,那么通过哈希函数,可以近似随机的访问集中的项或字典中的键
- 哈希函数
- 实现从键到地址的转换
- 哈希函数在一个给定的键上执行,并且返回其在数组中的相对位置
- 使用一种哈希策略的数组,叫做哈希表
- 哈希表的示列
- 冲突
- 不同的键得到相同的索引,称为冲突
- 示例
- 测试函数
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
def keysToIndexes(keys, n):
"""Return the indexes corresponding to the keys for an array of length n."""
return list(map(lambda key: key % n, keys))
- 和密度的关系
- 数组的密度降低(即装填因子减小,以增大数组的空间为代价),可以减小冲突的可能性
- 示例
-
>>> keysToIndexes([3, 5, 8, 10], 4 )
[3, 1, 0, 2]
>>> keysToIndexes([3, 4, 8, 10], 4 )
[3, 0, 0, 2]
>>> keysToIndexes([3, 5, 8, 10], 8 )
[3, 5, 0, 2]
>>> keysToIndexes([3, 4, 8, 10], 8 )
[3, 4, 0, 2]
- 数组长度为素数,也有助于降低冲突发生的可能性
- 示例
-
>>> keysToIndexes([10, 20, 30, 40, 50, 60, 70], 15 )
[10, 5, 0, 10, 5, 0, 10]
>>> keysToIndexes([10, 20, 30, 40, 50, 60, 70], 11 )
[10, 9, 8, 7, 6, 5, 4]
- 非数字键的哈希
- 英文字符串
- 可以返回 ASCII 码的加和
- 英文单词中的首字母分布不均匀,以字母 S 开头的单词很多,而以字母 X 开头的单词很少。
- 为了减少首字母造成的潜在偏移,并减少回文所导致的效果,如果字符串的长度大于某一阈值,可以将首字母从字符串中丢弃,然后再加和运算
- 代码
-
def stringHash(item):
"""Generates an integer key from a string."""
if len(item) > 4 and (item[0].islower() or item[0].isupper()):
# Drop first letter
item = item[1:]
sum = 0
for ch in item:
sum += ord(ch)
if len(item) > 2:
# Subtract last ASCII
sum -= 2 * ord(item[-1])
return sum
- 运行示例
-
>>> stringHash("cinema")
328
>>> stringHash("iceman")
296
- 为了更好的测试新的哈希函数,可以将 keysToIndex 函数更新为接收一个哈希函数作为可选的第 3 个参数
- 代码
-
def keysToIndexes(keys, n, hash= lambda key: key):
"""Return the indexes corresponding to the keys for an array of length n."""
return list(map(lambda key: hash(key) % n, keys))
- 运行示例
-
>>> keysToIndexes([3, 5, 8, 10], 4)
[3, 1, 0, 2]
>>> keysToIndexes(["cinema", "iceman"], 2, stringHash)
[0, 0]
>>> keysToIndexes(["cinema", "iceman"], 3, stringHash)
[1, 2]
- Python 自带有标准哈希函数 hash
- 示例
-
>>> keysToIndexes(["cinema", "iceman"], 2, hash)
[1, 0]
>>> keysToIndexes(["cinema", "iceman"], 2, stringHash)
[0, 0]
- 较为高级的哈希函数是高级课程的内容,但不管哈希函数多么高级,哈希表仍然可能会冲突。因此需要研究处理冲突的方法
- 线性探测
- 数组中的每个位置都有三种状态:occupied,EMPTY,DELETED
- 在开始插入的时候,运行哈希函数来计算项的主索引。如果主索引的单元格不可用,算法会将索引向右移动以探测一个可用的单元格
- 代码示例
-
# Get the home index
index = abs(hash(item)) % len(table)
# Stop searching when an empty cell is encountered
while table[index] not in (EMPTY, DELETED):
# Increment the index and wrap around to the first position if necessary
index = (index + 1) % len(table)
# An empty cell is found, so store the item.
tabel[index] = item
- 访问和删除操作
- 对于访问来说,当前的数组单元格为空或它包含目标项的时候,就停止探测过程。
- 删除操作,首先和访问一样进行探测。如果找到了目标项,就将其单元格的值设置为 DELETED
- 问题
- 在几次插入和删除操作后,一些标记为 DELETED 的单元格可能位于给定的项和其主索引之间,从而会增加平均的整体访问时间
- 解决方法
- 移动单元格
- 在删除一个单元格之后,将位于该单元格右侧的成员向左移动。直到碰到一个空的单元格或者当前被占用的单元格或者每一项的主索引。如何删除单元格留下了DELETED空隙,这个过程会填充上这个空隙
- 周期性的重新哈希表
- 例如当装载因子变为 0.5 时,即重新哈希表。这样会将 DELETED 状态的单元格全部变为当前占用的单元格或者空单元格
- 如果表有一些方法记录了访问给定项的频率,即可以按照这个频率递减的顺序来插入项。从而将较为频繁访问的项放置的更加接近于主索引
- 聚簇
- 二次探测
- 避免和线性探测相关的聚簇的一种方式是,从冲突位置将对空位置的索引向前推进一定的距离
- 二次探测通过将主索引增加每一次尝试距离的平方来实现这一点
- 代码
-
# Set the initial key, index, and distance
key = abs(hash(item))
distance = 1
homeIndex = key % len(table)
# Stop searching when an unoccupied cell is encountered
while table[index] not in (EMPTY, DELETED):
# Increment the index and warp around to the first position if necessary.
index = (homeIndex + distance ** 2) % len(table)
distance += 1
# An empty cell is found, so store the item.
tabel[index] = item
- 二次探测的主要问题是会跳过一些单元格,从而导致空间的浪费
- 链化 -- 桶链策略
- 示意图
- 访问和删除操作执行的步骤
- 计算项在数组中的主索引
- 搜索该项在链表中的索引
- 插入项执行的步骤
- 计算项在数组中的主索引
- 如果数组单元格为空,创建一个带有该项的节点,并将该节点赋值给单元格
- 如果不为空,会产生冲突。在该位置已有的项,是链表的头部。在这个链表的头部插入新的项
- 示意代码
-
# Get the home index
index = abs(hash(item)) % len(table)
# Access a bucket and store the item at the head of its linked list
table[index] = Node(item, table[index])
- 复杂度分析
- 案例学习:探查哈希策略
- 需求
- 编写一个程序,来探查不同的哈希策略
- 分析
- 设计
- HashTable
- insert 方法假设数组中有用于新项的空间,并且新项不会和已有的项重复
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: hasetable.py
"""
from arrays import Array
class HashTable(object):
"""Represent a hash table"""
EMPTY = None
DELETED = True
def __init__(self, capacity=29, hashFunction=hash, liner=True):
self._table = Array(capacity, HashTable.EMPTY)
self._size = 0
self._hash = hashFunction
self._homeIndex = -1
self._actualIndex = -1
self._liner = liner
self._probeCount = 0
# Accessor method
def __len__(self):
return self._size
def loadFactor(self):
return self._size / len(self._table)
def homeIndex(self):
return self._homeIndex
def actualIndex(self):
return self._actualIndex
def probeCount(self):
return self._probeCount
# Mutator method
def insert(self, item):
"""Insert item into the table.
Precondition: There is at least one empty cell or one previously occupied cell.
There is not a duplicate item."""
self._probeCount = 0
# Get the home index
self._homeIndex = self._hash(item) % len(self._table)
distance = 1
index = self._homeIndex
# Stop searching when an empty cell in encountered
while self._table[index] not in (HashTable.EMPTY, HashTable.DELETED):
# Increment the index and wrap around to the first position if necessary.
if self._liner:
increment = index + 1
else:
# Quadratic probing
increment = index + distance ** 2
distance += 1
index = increment % (len(self._table))
self._probeCount += 1
# An empty cell is found, so store the item
self._table[index] = item
self._size += 1
self._actualIndex = index
- Profiler
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: profiler.py
"""
from hashtable import HashTable
class Profiler(object):
"""Represent a profiler for hash table."""
def __init__(self):
self._table = None
self._collisions = 0
self._probeCount = 0
self._result = ""
def test(self, table, data):
"""Inserts the data into table and gathers statistics"""
self._table = table
self._collisions = 0
self._probeCount = 0
self._result = "Load Factor | Item Inserted | Home Index | Actual Index | Probes\n"
for item in data:
loadFactor = table.loadFactor()
table.insert(item)
homeIndex = table.homeIndex()
actualIndex = table.actualIndex()
probes = table.probeCount()
self._probeCount += probes
if probes > 0:
self._collisions += 1
self._result += "%8.3f%14d%12d%12d%14d" % (loadFactor, item, homeIndex, actualIndex, probes)\
+ "\n"
self._result += "Total collisions: " + str(self._collisions) + \
"\nTotal probes: " + str(self._probeCount) + \
"\nAverage probes per collision: " + str(self._probeCount / self._collisions)
def __str__(self):
if self._table is None:
return "No test has been run yet."
else:
return self._result
def collisions(self):
return self._collisions
def probeCount(self):
return self._probeCount
def main():
# Create a table with 8 cells, an identity hash function and liner probing.
table = HashTable(8, lambda x: x)
data = list(range(10, 71, 10))
profiler = Profiler()
profiler.test(table, data)
print(profiler)
if __name__ == "__main__":
main()
- 输出示例
-
Load Factor | Item Inserted | Home Index | Actual Index | Probes
0.000 10 2 2 0
0.125 20 4 4 0
0.250 30 6 6 0
0.375 40 0 0 0
0.500 50 2 3 1
0.625 60 4 5 1
0.750 70 6 7 1
Total collisions: 3
Total probes: 3
Average probes per collision: 1.0
- 集的哈希实现
- 采用桶/链策略来处理冲突
- __contains__方法可以将一些实例变量的值设置为可以在插入、访问和删除过程中使用的信息
- 代码实现
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
from node import Node
from arrays import Array
from abstractset import AbstractSet
from abstractcollection import AbstractCollection
class HashSet(AbstractCollection, AbstractSet):
"""A hashing implementation of a set."""
DEFAULT_CAPACITY = 3
def __init__(self, sourceCollection=None, capacity=None):
if capacity is None:
self._capacity = HashSet.DEFAULT_CAPACITY
else:
self._capacity = capacity
self._items = Array(self._capacity)
self._foundNode = self._priorNode = None
self._index = -1
AbstractCollection.__init__(self, sourceCollection)
# Accessor method
def __contains__(self, item):
"""Return True if item is in the set or False otherwise."""
self._index = hash(item) % len(self._items)
self._priorNode = None
self._foundNode = self._items[self._index]
while self._foundNode is not None:
if self._foundNode.data == item:
return True
else:
self._priorNode = self._foundNode
self._foundNode = self._foundNode.next
return False
def __iter__(self):
"""Supports iteration over a view of self."""
for item in self._items:
while item is not None:
yield item.data
item = item.next
def __str__(self):
"""Return a string representation of self"""
return "{" + ", ".join(map(str, self)) + "}"
# Mutator methods
def clear(self):
"""Makes self becomes empty."""
self._size = 0
self._items = Array(HashSet.DEFAULT_CAPACITY)
def add(self, item):
"""Adds item to the set if if is not in the set."""
if item not in self:
newNode = Node(item, self._items[self._index])
self._items[self._index] = newNode
self._size += 1
def remove(self, item):
"""Precondition: item is in self.
Raise: KeyError if item is not in self.
return the removed item if item is in self"""
if item not in self:
raise KeyError("Missing: " + str(item))
if self._priorNode is None:
self._items[self._index] = self._foundNode.next
else:
self._priorNode.next = self._foundNode.next
self._size -= 1
return self._foundNode.data
- 字典的哈希实现
- 采用桶/链策略来处理冲突
- __contains__方法同样将一些实例变量的值设置为可以在插入、访问和删除过程中使用的信息
- 代码实现
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: hashdict.py
"""
from abstractdict import AbstractDict, Item
from node import Node
from arrays import Array
class HashDict(AbstractDict):
"""Represents a hash-based dictionary"""
DEFAULT_CAPACITY = 9
def __init__(self, sourceDictionary=None):
"""Will copy items to collection from sourceDictionary if it's present."""
self._array = Array(HashDict.DEFAULT_CAPACITY)
self._foundNode = self._priorNode = None
self._index = -1
AbstractDict.__init__(self, sourceDictionary)
# Accessor method
def __contains__(self, key):
"""Return True if item is in self, or False otherwise."""
self._index = hash(key) % len(self._array)
self._priorNode = None
self._foundNode = self._array[self._index]
while self._foundNode is not None:
if self._foundNode.data.key == key:
return True
else:
self._priorNode = self._foundNode
self._foundNode = self._foundNode.next
return False
def __iter__(self):
"""Serves up the key in the dictionary."""
for item in self._array:
while item is not None:
yield item.data.key
item = item.next
def __getitem__(self, key):
"""Precondition: the key is in the dictionary
Raise KeyError if the key is not in the dictionary
Return the value associated with the key.
"""
if key not in self:
raise KeyError("Missing: " + str(key))
return self._foundNode.data.value
# Mutator method
def __setitem__(self, key, value):
"""If the key is not in the dictionary, adds the key and value to it,
otherwise, replace the old value with the new one."""
if key in self:
self._foundNode.data.value = value
else:
newNode = Node(Item(key, value), self._array[self._index])
self._array[self._index] = newNode
self._size += 1
def pop(self, key):
"""Precondition: the key is in the dictionary.
Raise: KeyError if the key is not in the dictionary.
Remove the key and return the associated value if the key is in the dictionary."""
if key not in self:
raise KeyError("Missing: " + str(key))
if self._priorNode is None:
self._array[self._index] = self._foundNode.next
else:
self._priorNode.next = self._foundNode.next
self._size -= 1
return self._foundNode.data.value
- 有序的集和字典
- 集中的项和字典中的键必须是可比较的,才可能创建有序的集。有序的集必须要放弃哈希策略
- 可以用二叉搜索树来实现有序的集和字典
- 有序的集
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: treesortedset.py
"""
from linkedbst import LinkedBST
from abstractcollection import AbstractCollection
from abstractset import AbstractSet
class TreeSortedSet(AbstractCollection, AbstractSet):
"""A tree-based implementation of a sorted set."""
def __init__(self, sourceCollection=None):
self._items = LinkedBST()
AbstractCollection.__init__(self, sourceCollection)
def __contains__(self, item):
"""Return True if item is in the set or False otherwise."""
return item in self._items
def __iter__(self):
"""Supports iteration over a view of self."""
return self._items.inorder()
def __str__(self):
"""Return a string representation of self"""
return "{" + ", ".join(map(str, self)) + "}"
# Mutator method
def add(self, item):
"""Adds item to the set if if is not in the set."""
if item not in self:
self._items.add(item)
self._size += 1
def clear(self):
"""Makes self becomes empty."""
self._size = 0
self._items = LinkedBST()
def remove(self, item):
"""Precondition: item is in self.
Raise: KeyError if item is not in self.
return the removed item if item is in self"""
if item not in self:
raise KeyError("Missing: " + str(item))
self._items.remove(item)
self._size -= 1
- 有序的字典
- 代码
-
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Lijunjie
"""
File: treesorteddict.py
"""
from linkedbst import LinkedBST
from abstractdict import AbstractDict, Item
class TreeSortedDict(AbstractDict):
"""A tree-based implementation of a sorted dictionary."""
def __init__(self, sourceCollection=None):
"""Will copy items to collection from sourceCollection if it's present."""
self._items = LinkedBST()
self._foundItem = None
AbstractDict.__init__(self, sourceCollection)
# Accessor
def __iter__(self):
"""Serves up the key in the dictionary."""
for item in self._items.inorder():
yield item.key
def __getitem__(self, key):
"""Precondition: the key is in the dictionary
Raise KeyError if the key is not in the dictionary
Return the value associated with the key.
"""
if key not in self:
raise KeyError("Missing: " + str(key))
return self._foundItem.value
def __contains__(self, key):
"""Set the self._index to the target position if key is in self."""
newItem = Item(key, None)
self._foundItem = self._items.find(newItem)
if self._foundItem is None:
return False
else:
return True
# Mutator
def __setitem__(self, key, value):
"""If the key is not in the dictionary, adds the key and value to it,
otherwise, replace the old value with the new one."""
if key not in self:
self._items.add(Item(key, value))
self._size += 1
else:
self._foundItem.value = value
def pop(self, key):
"""Precondition: the key is in the dictionary.
Raise: KeyError if the key is not in the dictionary.
Remove the key and return the associated value if the key is in the dictionary."""
if key not in self:
raise KeyError("Missing: " + str(key))
self._size -= 1
removedItem = self._items.remove(Item(key, None))
return removedItem.value