[2021 spring] CS61A Project 2: CATS (CS 61A Autocorrected Typing Software)
项目说明: https://inst.eecs.berkeley.edu/~cs61a/sp21/proj/cats/#introduction
自动更正打字软件
Phase 1:Typing
choose:从多个段落中选择需要打印的段落
about:传入一个topic单词列表,返回一个函数,用于判断段落是否符合主题。
accuracy:根据已打出的段落和参考段落计算准确率
wpm:计算打字速度 words per minute
Phase 2:Autocorrect
autocorrect
自动更正函数
参数:
typed_word:可能包含错误的单词字符串
valid_words:有效单词列表
diff_function:用于量化两个单词之间差异的函数
limit:对量化后差异的限制
功能:
如果typed_word在valid_words中,返回typed_word;
找到有效列表中与typed_word差异最小的单词,
如果这个差异小于限制limit,则返回该单词;
超出限制,则直接返回typed_word。
sphinx_switches
一种diff_function
参数:
start:初始单词
goal:目标单词
limit:从start到goal,需要修改的字符个数上限
功能:
返回将start转换为goal需要修改(逐个替换)的最小字符数count,如果单词长度不同,则将长度差添加到总数中。
如果count大于limit,则直接返回limit+1以减小计算量。
pawssible_patches
一种diff_function
与sphinx_switches区别在于,可以对start进行添加、删除或替换的编辑操作。
定义add、remove和substitute操作后递归解决。
Phase 3:Multiplayer 多人游戏
report_progress 报告玩家进度
time_per_word 根据时间戳数据返回打出每个单词所需要的时间
fastest_words 返回各个玩家打字最快的单词列表
完整代码
"""Typing test implementation"""
from utils import lower, split, remove_punctuation, lines_from_file
from ucb import main, interact, trace
from datetime import datetime
###########
# Phase 1 #
###########
def choose(paragraphs, select, k):
"""Return the Kth paragraph from PARAGRAPHS for which SELECT called on the
paragraph returns True. If there are fewer than K such paragraphs, return
the empty string.
Arguments:
paragraphs: a list of strings
select: a function that returns True for paragraphs that can be selected
k: an integer
>>> ps = ['hi', 'how are you', 'fine']
>>> s = lambda p: len(p) <= 4
>>> choose(ps, s, 0)
'hi'
>>> choose(ps, s, 1)
'fine'
>>> choose(ps, s, 2)
''
"""
# BEGIN PROBLEM 1
"*** YOUR CODE HERE ***"
selected = [paragraph for paragraph in paragraphs if select(paragraph)]
return selected[k] if k < len(selected) else ''
# END PROBLEM 1
def about(topic):
"""Return a select function that returns whether
a paragraph contains one of the words in TOPIC.
Arguments:
topic: a list of words related to a subject
>>> about_dogs = about(['dog', 'dogs', 'pup', 'puppy'])
>>> choose(['Cute Dog!', 'That is a cat.', 'Nice pup!'], about_dogs, 0)
'Cute Dog!'
>>> choose(['Cute Dog!', 'That is a cat.', 'Nice pup.'], about_dogs, 1)
'Nice pup.'
"""
assert all([lower(x) == x for x in topic]), 'topics should be lowercase.'
# BEGIN PROBLEM 2
"*** YOUR CODE HERE ***"
def abouts(paragraph):
for word in split(lower(remove_punctuation(paragraph))):
if word in topic:
return True
return False
return abouts
# END PROBLEM 2
def accuracy(typed, reference):
"""Return the accuracy (percentage of words typed correctly) of TYPED
when compared to the prefix of REFERENCE that was typed.
Arguments:
typed: a string that may contain typos
reference: a string without errors
>>> accuracy('Cute Dog!', 'Cute Dog.')
50.0
>>> accuracy('A Cute Dog!', 'Cute Dog.')
0.0
>>> accuracy('cute Dog.', 'Cute Dog.')
50.0
>>> accuracy('Cute Dog. I say!', 'Cute Dog.')
50.0
>>> accuracy('Cute', 'Cute Dog.')
100.0
>>> accuracy('', 'Cute Dog.')
0.0
>>> accuracy('', '')
100.0
"""
typed_words = split(typed)
reference_words = split(reference)
# BEGIN PROBLEM 3
"*** YOUR CODE HERE ***"
if len(typed_words) == len(reference_words) == 0:
return 100.0
elif len(typed_words) == 0:
return 0.0
correct = 0
for x, y in zip(typed_words, reference_words):
if x == y:
correct += 1
return correct / len(typed_words) * 100.0
# END PROBLEM 3
def wpm(typed, elapsed):
"""Return the words-per-minute (WPM) of the TYPED string.
Arguments:
typed: an entered string
elapsed: an amount of time in seconds
>>> wpm('hello friend hello buddy hello', 15)
24.0
>>> wpm('0123456789',60)
2.0
"""
assert elapsed > 0, 'Elapsed time must be positive'
# BEGIN PROBLEM 4
"*** YOUR CODE HERE ***"
return len(typed) / 5 * 60 / elapsed
# END PROBLEM 4
###########
# Phase 2 #
###########
def autocorrect(typed_word, valid_words, diff_function, limit):
"""Returns the element of VALID_WORDS that has the smallest difference
from TYPED_WORD. Instead returns TYPED_WORD if that difference is greater
than LIMIT.
Arguments:
typed_word: a string representing a word that may contain typos
valid_words: a list of strings representing valid words
diff_function: a function quantifying the difference between two words
limit: a number
>>> ten_diff = lambda w1, w2, limit: 10 # Always returns 10
>>> autocorrect("hwllo", ["butter", "hello", "potato"], ten_diff, 20)
'butter'
>>> first_diff = lambda w1, w2, limit: (1 if w1[0] != w2[0] else 0) # Checks for matching first char
>>> autocorrect("tosting", ["testing", "asking", "fasting"], first_diff, 10)
'testing'
"""
# BEGIN PROBLEM 5
"*** YOUR CODE HERE ***"
if typed_word in valid_words:
return typed_word
diff_list = [diff_function(typed_word, word, limit) for word in valid_words]
if min(diff_list) > limit:
return typed_word
else:
return valid_words[diff_list.index(min(diff_list))]
# END PROBLEM 5
def sphinx_switches(start, goal, limit):
"""A diff function for autocorrect that determines how many letters
in START need to be substituted to create GOAL, then adds the difference in
their lengths and returns the result.
Arguments:
start: a starting word
goal: a string representing a desired goal word
limit: a number representing an upper bound on the number of chars that must change
>>> big_limit = 10
>>> sphinx_switches("nice", "rice", big_limit) # Substitute: n -> r
1
>>> sphinx_switches("range", "rungs", big_limit) # Substitute: a -> u, e -> s
2
>>> sphinx_switches("pill", "pillage", big_limit) # Don't substitute anything, length difference of 3.
3
>>> sphinx_switches("roses", "arose", big_limit) # Substitute: r -> a, o -> r, s -> o, e -> s, s -> e
5
>>> sphinx_switches("rose", "hello", big_limit) # Substitute: r->h, o->e, s->l, e->l, length difference of 1.
5
"""
# BEGIN PROBLEM 6
# assert False, 'Remove this line'
def counts(cstart, cgoal, count):
if count > limit:
return limit + 1
if not cstart and not cgoal:
return count
elif not cstart or not cgoal:
return counts(cstart[1:], cgoal[1:], count + 1)
elif cstart[0] == cgoal[0]:
return counts(cstart[1:], cgoal[1:], count)
else:
return counts(cstart[1:], cgoal[1:], count + 1)
return counts(start, goal, 0)
# END PROBLEM 6
def pawssible_patches(start, goal, limit):
"""A diff function that computes the edit distance from START to GOAL.
This function takes in a string START, a string GOAL, and a number LIMIT.
Arguments:
start: a starting word
goal: a goal word
limit: a number representing an upper bound on the number of edits
>>> big_limit = 10
>>> pawssible_patches("cats", "scat", big_limit) # cats -> scats -> scat
2
>>> pawssible_patches("purng", "purring", big_limit) # purng -> purrng -> purring
2
>>> pawssible_patches("ckiteus", "kittens", big_limit) # ckiteus -> kiteus -> kitteus -> kittens
3
"""
# assert False, 'Remove this line'
if limit < 0: # Fill in the condition
# BEGIN
"*** YOUR CODE HERE ***"
return 0
# END
if not start and not goal:
return 0
elif not start or not goal:
return abs(len(start) - len(goal))
elif start[0] == goal[0]: # Feel free to remove or add additional cases
# BEGIN
"*** YOUR CODE HERE ***"
return pawssible_patches(start[1:], goal[1:], limit)
# END
else:
add = pawssible_patches(start, goal[1:], limit - 1) # Fill in these lines
remove = pawssible_patches(start[1:], goal, limit - 1)
substitute = pawssible_patches(start[1:], goal[1:], limit - 1)
# BEGIN
"*** YOUR CODE HERE ***"
return min(add, remove, substitute) + 1
# END
def final_diff(start, goal, limit):
"""A diff function that takes in a string START, a string GOAL, and a number LIMIT.
If you implement this function, it will be used."""
assert False, 'Remove this line to use your final_diff function.'
FINAL_DIFF_LIMIT = 6 # REPLACE THIS WITH YOUR LIMIT
###########
# Phase 3 #
###########
def report_progress(typed, prompt, user_id, send):
"""Send a report of your id and progress so far to the multiplayer server.
Returns the progress so far.
Arguments:
typed: a list of the words typed so far
prompt: a list of the words in the typing prompt
user_id: a number representing the id of the current user
send: a function used to send progress to the multiplayer server
>>> print_progress = lambda d: print('ID:', d['id'], 'Progress:', d['progress'])
>>> # The above function displays progress in the format ID: __, Progress: __
>>> print_progress({'id': 1, 'progress': 0.6})
ID: 1 Progress: 0.6
>>> typed = ['how', 'are', 'you']
>>> prompt = ['how', 'are', 'you', 'doing', 'today']
>>> report_progress(typed, prompt, 2, print_progress)
ID: 2 Progress: 0.6
0.6
>>> report_progress(['how', 'aree'], prompt, 3, print_progress)
ID: 3 Progress: 0.2
0.2
"""
# BEGIN PROBLEM 8
"*** YOUR CODE HERE ***"
count = 0
for i in range(len(typed)):
if typed[i] != prompt[i]:
break
else:
count += 1
progress = count / len(prompt)
send({'id': user_id, 'progress': progress})
return progress
# END PROBLEM 8
def fastest_words_report(times_per_player, words):
"""Return a text description of the fastest words typed by each player."""
game = time_per_word(times_per_player, words)
fastest = fastest_words(game)
report = ''
for i in range(len(fastest)):
words = ','.join(fastest[i])
report += 'Player {} typed these fastest: {}\n'.format(i + 1, words)
return report
def time_per_word(times_per_player, words):
"""Given timing data, return a game data abstraction, which contains a list
of words and the amount of time each player took to type each word.
Arguments:
times_per_player: A list of lists of timestamps including the time
the player started typing, followed by the time
the player finished typing each word.
words: a list of words, in the order they are typed.
>>> p = [[75, 81, 84, 90, 92], [19, 29, 35, 36, 38]]
>>> game = time_per_word(p, ['collar', 'plush', 'blush', 'repute'])
>>> all_words(game)
['collar', 'plush', 'blush', 'repute']
>>> all_times(game)
[[6, 3, 6, 2], [10, 6, 1, 2]]
"""
# BEGIN PROBLEM 9
"*** YOUR CODE HERE ***"
diff = []
for tpp in times_per_player:
diff.append([tpp[i] - tpp[i-1] for i in range(1, len(tpp))])
return game(words, diff)
# END PROBLEM 9
def fastest_words(game):
"""Return a list of lists of which words each player typed fastest.
Arguments:
game: a game data abstraction as returned by time_per_word.
>>> p0 = [5, 1, 3]
>>> p1 = [4, 1, 6]
>>> fastest_words(game(['Just', 'have', 'fun'], [p0, p1]))
[['have', 'fun'], ['Just']]
"""
player_indices = range(len(all_times(game))) # contains an *index* for each player
word_indices = range(len(all_words(game))) # contains an *index* for each word
# BEGIN PROBLEM 10
"*** YOUR CODE HERE ***"
fastest = [[] for _ in player_indices]
for word_index in word_indices:
min_time = float('inf')
player = 0
for player_index in player_indices:
if time(game, player_index, word_index) < min_time:
min_time = time(game, player_index, word_index)
player = player_index
fastest[player].append(word_at(game, word_index))
return fastest
# END PROBLEM 10
def game(words, times):
"""A data abstraction containing all words typed and their times."""
assert all([type(w) == str for w in words]), 'words should be a list of strings'
assert all([type(t) == list for t in times]), 'times should be a list of lists'
assert all([isinstance(i, (int, float)) for t in times for i in t]), 'times lists should contain numbers'
assert all([len(t) == len(words) for t in times]), 'There should be one word per time.'
return [words, times]
def word_at(game, word_index):
"""A selector function that gets the word with index word_index"""
assert 0 <= word_index < len(game[0]), "word_index out of range of words"
return game[0][word_index]
def all_words(game):
"""A selector function for all the words in the game"""
return game[0]
def all_times(game):
"""A selector function for all typing times for all players"""
return game[1]
def time(game, player_num, word_index):
"""A selector function for the time it took player_num to type the word at word_index"""
assert word_index < len(game[0]), "word_index out of range of words"
assert player_num < len(game[1]), "player_num out of range of players"
return game[1][player_num][word_index]
def game_string(game):
"""A helper function that takes in a game object and returns a string representation of it"""
return "game(%s, %s)" % (game[0], game[1])
enable_multiplayer = False # Change to True when you're ready to race.
##########################
# Command Line Interface #
##########################
def run_typing_test(topics):
"""Measure typing speed and accuracy on the command line."""
paragraphs = lines_from_file('data/sample_paragraphs.txt')
select = lambda p: True
if topics:
select = about(topics)
i = 0
while True:
reference = choose(paragraphs, select, i)
if not reference:
print('No more paragraphs about', topics, 'are available.')
return
print('Type the following paragraph and then press enter/return.')
print('If you only type part of it, you will be scored only on that part.\n')
print(reference)
print()
start = datetime.now()
typed = input()
if not typed:
print('Goodbye.')
return
print()
elapsed = (datetime.now() - start).total_seconds()
print("Nice work!")
print('Words per minute:', wpm(typed, elapsed))
print('Accuracy: ', accuracy(typed, reference))
print('\nPress enter/return for the next paragraph or type q to quit.')
if input().strip() == 'q':
return
i += 1
@main
def run(*args):
"""Read in the command-line argument and calls corresponding functions."""
import argparse
parser = argparse.ArgumentParser(description="Typing Test")
parser.add_argument('topic', help="Topic word", nargs='*')
parser.add_argument('-t', help="Run typing test", action='store_true')
args = parser.parse_args()
if args.t:
run_typing_test(args.topic)
p6非递归版本:
def sphinx_switches(start, goal, limit):
"""A diff function for autocorrect that determines how many letters
in START need to be substituted to create GOAL, then adds the difference in
their lengths and returns the result.
Arguments:
start: a starting word
goal: a string representing a desired goal word
limit: a number representing an upper bound on the number of chars that must change
>>> big_limit = 10
>>> sphinx_switches("nice", "rice", big_limit) # Substitute: n -> r
1
>>> sphinx_switches("range", "rungs", big_limit) # Substitute: a -> u, e -> s
2
>>> sphinx_switches("pill", "pillage", big_limit) # Don't substitute anything, length difference of 3.
3
>>> sphinx_switches("roses", "arose", big_limit) # Substitute: r -> a, o -> r, s -> o, e -> s, s -> e
5
>>> sphinx_switches("rose", "hello", big_limit) # Substitute: r->h, o->e, s->l, e->l, length difference of 1.
5
"""
# BEGIN PROBLEM 6
# assert False, 'Remove this line'
count = 0
for com in list(zip(start, goal)):
if com[0] != com[1]:
count += 1
count += abs(len(start) - len(goal))
return count if count <= limit else limit + 1
# END PROBLEM 6
运行结果: