爬虫小案例:扇贝单词评估
使用python模拟扇贝官网单词测试的过程
数据来源:扇贝网站 https://www.shanbay.com/vocabtest/
import requests,random # 题库 topics_data = [] # 单词列表 words_data = [] # 选择的单词列表 word_indexs = [] # 答案序号 answer_orders = ['A','B','C','D','E'] # 选择正确的单词 answer_right_indexs = [] # 选择题库 def select_topics(): global topics_data url = 'https://www.shanbay.com/api/v1/vocabtest/category/' params = { '_': str(random.randint(10**13, 9 * 10**13)) } res = requests.get(url, params=params) if res.status_code == 200: topics_data = res.json()['data'] # print(topics_data) print('请选择题库序号:{} ~ {}'.format(0, len(topics_data))) for i in range(len(topics_data)): print('{}.{}'.format(i, topics_data[i][1]), end=' ') print("") while True: try: index = int(input(":")) if index not in range(len(topics_data)): print('范围只能是{} ~ {},请重新选择!'.format(0, len(topics_data))) continue except BaseException: print('输入有误,请重新选择!') else: break return index else: print('出题范围请求失败!') return False # 选择单词 def select_words(category): global words_data,word_indexs url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/' headers ={ 'referer':'https://www.shanbay.com/vocabtest/', 'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)' } params = { 'category':category, '_':str(random.randint(10**13, 9 * 10**13)) } # print(params) print('正获取相关单词...') res = requests.get(url, headers=headers, params=params) if res.status_code == 200: # print(res.json()) words_data = res.json()['data'] # print(words_data) # 显示单词 print('请选择你认识的单词(选择对应的序号即可,范围:{} ~ {})'.format(0, len(words_data))) rownum = 0 for index in range(len(words_data)): print('{}.{}'.format(index, words_data[index]['content'].ljust(15, ' ')), end='') if (index > 0 and index % 8 == 0) or (index == len(words_data)-1): print('') while True: try: index = int(input(':')) if index not in range(len(words_data)): print('您输入的范围有误,请输入{} ~ {}的数值:'.format(0, len(words_data))) continue elif index in word_indexs: continue else: word_indexs.append(index) except BaseException: print('您输入的数值有误,请重新输入!') else: tip = input('是否要继续选择你认识的单词?(输入:n/N 则退出;其他任意字符则继续):') if tip in ['n', 'N']: break else: print('请选择你认识的单词(选择对应的序号即可,范围:{} ~ {})'.format(0, len(words_data))) # print('已选择:{}'.format(word_indexs)) return True else: print('单词获取失败!') return False # 作答 def select_answer(): global answer_right_indexs num = 0 for index in word_indexs: if num > 0: print('') print('————————————————————————————————————') print('') print('{}/{}){}的正确词义是:'.format(num+1, len(word_indexs), words_data[index]['content'])) num2 = 0 for choice in words_data[index]['definition_choices']: print('{}:{}'.format(answer_orders[num2], choice['definition'])) num2 += 1 print('{}:{}'.format(answer_orders[num2], '不认识')) while True: choice = input(':').strip().upper() if choice not in answer_orders: print('请重新选择序号:{}'.format('、'.join(answer_orders))) continue else: break print('你选择了:{}'.format(choice)) # 判断答案是否正确 if choice != 'E' and int(words_data[index]['definition_choices'][answer_orders.index(choice)]['pk']) == int(words_data[index]['pk']): print('答对了!') answer_right_indexs.append(index) else: for right_choice in words_data[index]['definition_choices']: if int(right_choice['pk']) == int(words_data[index]['pk']): break # print('答错了!:') # print(right_choice) print('答错了,正确答案是:{}:{}'.format(answer_orders[words_data[index]['definition_choices'].index(right_choice)], right_choice['definition'])) num += 1 # 报告 def show_report(category): url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/' headers = { 'referer':'https://www.shanbay.com/vocabtest/', 'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)' } params = { 'category':category, 'phase':'primary', 'right_ranks':'', 'word_ranks':'' } word_ranks = [] for word in words_data: word_ranks.append(str(word['rank'])) params['word_ranks'] = ','.join(word_ranks) # print(params) res = requests.post(url, headers=headers, params=params) if res.status_code == 200: result = res.json()['data'] print('') print('您的词汇量大约是:{}'.format(result['vocab'])) # 此处返回结果是 0 print(result['comment']) print('详细报告:') print('{}个单词,不认识{},认识{},掌握了{},答错了{}'.format(len(words_data), len(words_data) - len(word_indexs), len(word_indexs), len(answer_right_indexs), len( word_indexs) - len(answer_right_indexs))) else: print('评分请求失败!') def main(): # 题库序号 topic_index = select_topics() if topic_index: # 题库代号 category = topics_data[topic_index] print('选择的题库是:{}.{}'.format(topic_index, category[1])) # 选择单词 result = select_words(category[0]) if result: print('单词测试,请选择正确的词义:') # 作答 select_answer() # 报告 show_report(category[0]) if __name__ == '__main__': main()