爬虫小案例:扇贝单词评估

使用python模拟扇贝官网单词测试的过程

数据来源:扇贝网站 https://www.shanbay.com/vocabtest/

import requests,random

# 题库
topics_data = []
# 单词列表
words_data = []
# 选择的单词列表
word_indexs = []
# 答案序号
answer_orders = ['A','B','C','D','E']
# 选择正确的单词
answer_right_indexs = []

# 选择题库
def select_topics():
    global topics_data
    url = 'https://www.shanbay.com/api/v1/vocabtest/category/'
    params = {
        '_': str(random.randint(10**13, 9 * 10**13))
    }
    res = requests.get(url, params=params)
    if res.status_code == 200:
        topics_data = res.json()['data']
        # print(topics_data)
        print('请选择题库序号:{} ~ {}'.format(0, len(topics_data)))
        for i in range(len(topics_data)):
            print('{}.{}'.format(i, topics_data[i][1]), end='  ')
        print("")
        while True:
            try:
                index = int(input(""))
                if index not in range(len(topics_data)):
                    print('范围只能是{} ~ {},请重新选择!'.format(0, len(topics_data)))
                    continue
            except BaseException:
                print('输入有误,请重新选择!')
            else:
                break
        return index
    else:
        print('出题范围请求失败!')
        return False

# 选择单词
def select_words(category):
    global words_data,word_indexs
    url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/'
    headers ={
        'referer':'https://www.shanbay.com/vocabtest/',
        'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)'
    }
    params = {
        'category':category,
        '_':str(random.randint(10**13, 9 * 10**13))
    }
    # print(params)
    print('正获取相关单词...')
    res = requests.get(url, headers=headers, params=params)
    if res.status_code == 200:
        # print(res.json())
        words_data = res.json()['data']
        # print(words_data)
        # 显示单词
        print('请选择你认识的单词(选择对应的序号即可,范围:{} ~ {})'.format(0, len(words_data)))
        rownum = 0
        for index in range(len(words_data)):
            print('{}.{}'.format(index, words_data[index]['content'].ljust(15, ' ')), end='')
            if (index > 0 and index % 8 == 0) or (index == len(words_data)-1):
                print('')
        while True:
            try:
                index = int(input(':'))
                if index not in range(len(words_data)):
                    print('您输入的范围有误,请输入{} ~ {}的数值:'.format(0, len(words_data)))
                    continue
                elif index in word_indexs:
                    continue
                else:
                    word_indexs.append(index)
            except BaseException:
                print('您输入的数值有误,请重新输入!')
            else:
                tip = input('是否要继续选择你认识的单词?(输入:n/N 则退出;其他任意字符则继续):')
                if tip in ['n', 'N']:
                    break
                else:
                    print('请选择你认识的单词(选择对应的序号即可,范围:{} ~ {})'.format(0, len(words_data)))
                    
        # print('已选择:{}'.format(word_indexs))
        return True

    else:
        print('单词获取失败!')
        return False

# 作答
def select_answer():
    global answer_right_indexs
    num = 0
    for index in word_indexs:
        if num > 0:
            print('')
            print('————————————————————————————————————')
            print('')
        print('{}/{}){}的正确词义是:'.format(num+1, len(word_indexs), words_data[index]['content']))
        num2 = 0
        for choice in words_data[index]['definition_choices']:
            print('{}:{}'.format(answer_orders[num2], choice['definition']))
            num2 += 1
        print('{}:{}'.format(answer_orders[num2], '不认识'))
        while True:
            choice = input(':').strip().upper()
            if choice not in answer_orders:
                print('请重新选择序号:{}'.format(''.join(answer_orders)))
                continue
            else:
                break
        print('你选择了:{}'.format(choice))

        # 判断答案是否正确
        if choice != 'E' and int(words_data[index]['definition_choices'][answer_orders.index(choice)]['pk']) == int(words_data[index]['pk']):
            print('答对了!')
            answer_right_indexs.append(index)
        else:
            for right_choice in words_data[index]['definition_choices']:
                if int(right_choice['pk']) ==  int(words_data[index]['pk']):
                    break
            # print('答错了!:')
            # print(right_choice)
            print('答错了,正确答案是:{}:{}'.format(answer_orders[words_data[index]['definition_choices'].index(right_choice)], right_choice['definition']))
        num += 1

# 报告
def show_report(category):
    url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/'
    headers = {
        'referer':'https://www.shanbay.com/vocabtest/',
        'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)'
    }
    params = {
        'category':category,
        'phase':'primary',
        'right_ranks':'',
        'word_ranks':''
    }
    word_ranks = []
    for word in words_data:
        word_ranks.append(str(word['rank']))
    params['word_ranks'] = ','.join(word_ranks)
    # print(params)
    res = requests.post(url, headers=headers, params=params)
    if res.status_code == 200:
        result = res.json()['data']
        print('')
        print('您的词汇量大约是:{}'.format(result['vocab'])) # 此处返回结果是 0
        print(result['comment'])
        print('详细报告:')
        print('{}个单词,不认识{},认识{},掌握了{},答错了{}'.format(len(words_data), len(words_data) - len(word_indexs), len(word_indexs), len(answer_right_indexs), len(
            word_indexs) - len(answer_right_indexs)))
    else:
        print('评分请求失败!')

def main():
    # 题库序号
    topic_index = select_topics()
    if topic_index:
        # 题库代号
        category = topics_data[topic_index]
        print('选择的题库是:{}.{}'.format(topic_index, category[1]))
        # 选择单词
        result = select_words(category[0])
        if result:
            print('单词测试,请选择正确的词义:')
            # 作答
            select_answer()
            # 报告
            show_report(category[0])

if __name__ == '__main__':
    main()

 

 

posted @ 2020-01-14 14:45  KeenLeung  阅读(475)  评论(0编辑  收藏  举报