慕容昊天

导航

Python实例之抓取网易云课堂搜索数据(post方式json型数据)并保存到数据库

  本实例实现了抓取网易云课堂中以‘java’为关键字的搜索结果,经详细查看请求的方式为post,请求的结果为JSON数据

具体实现代码如下:

 

import requests
import json
import pymysql
conn = pymysql.connect(host='localhost',
                port=3306,
                user='root',
                passwd='123456',
                db='test',
                charset='utf8')     #连接数据库
cur = conn.cursor()

totlePage = 0                #初始化总页数
test = 0                    #初始化数据总条数
url = 'http://study.163.com/p/search/studycourse.json'
headers = {'content-type': 'application/json'}

def getData(count):         #定义一个方法,返回json型请求结果
    payload = { 
        'pageIndex':count,        #页码为变量
        'pageSize':'50',
        'keyword':'java',
        'searchTimeType':'-1',
        'orderType':'5',
        'priceType':'-1'
    }   
    req = requests.post(url,data=json.dumps(payload),headers=headers)
    res_json = json.loads(req.text)
    return res_json

cur.execute("DROP TABLE IF EXISTS neteasy")     #如果表存在就删除
sqlc = "create table neteasy(id int(5),title varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci,\
provider varchar(30) CHARACTER SET utf8 COLLATE utf8_general_ci,price float(10),\
learnercount int(5)) CHARACTER SET utf8 COLLATE utf8_general_ci"
cur.execute(sqlc)           #创建表

final = getData(1)['result']['list']        #判断是否有搜索结果
if final != None:
    totlePage = getData(1)['result']['query']['totlePageCount']       #获取页码总数
    for j in range(1,totlePage+1):         #页码循环
        final = getData(j)['result']['list']
        for i in range(len(final)):        #每页中的数据项循环
            rt = final[i]['productName']
            rp = final[i]['provider']
            strpri = final[i]['originalPrice']
            if final[i]['discountPrice'] != None:
                strpri = final[i]['discountPrice']
            rn = strpri
            strcou = final[i]['learnerCount']
            if strcou == None:
                strcou = 0
            rd = strcou
            print('当前正在读取第'+str(j)+'页的第'+str(i+1)+'条数据...')
            test += 1
            sqli = 'insert into neteasy values(%s,%s,%s,%s,%s)'
            cur.execute(sqli,(test,rt,rp,rn,rd))              #插入数据
    print('保存完毕!共'+str(test)+'条数据')
    cur.close()
    conn.commit()
    conn.close()
else:
    print('没有查询结果,请换个关键词试试!')



 

posted on 2017-05-30 16:35  慕容昊天  阅读(424)  评论(0编辑  收藏  举报