爬取崔庆才大神的爬虫教程,最后存储到mysql

 1 # -*- coding: utf-8 -*-
 2 #coding:utf8
 3 import requests,time,unittest
 4 from lxml import etree
 5 import pymysql
 6 url ='http://cuiqingcai.com/1052.html'
 7 
 8 head = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
 9         "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
10         "Accept-Encoding":"gzip, deflate",
11         "Accept-Language":"zh-CN,zh;q=0.8"
12 
13         }
14 html = requests.get(url)
15 
16 selector = etree.HTML(str(html.text))
17 
18 r = selector.xpath("/html/body/section/div[3]/div/article/p/a/text()")
19 t = selector.xpath("/html/body/section/div[3]/div/article/p/a/@href")
20 #print(r)
21 #print(t)
22 now = str(time.strftime('%Y-%m-%d-%H-%M',time.localtime(time.time())))
23 
24 class datas(object):
25 
26     def Data_storage(self,dt):
27 
28         for each in dt:
29             self.uid=",".join(each).split(',')[0]
30             self.uname=",".join(each).split(',')[1]
31             self.ulink=",".join(each).split(',')[2]
32             uid="\'"+str(self.uid)+"\'"
33             uname="\'"+str(self.uname)+"\'"
34             ulink="\'"+str(self.ulink)+"\'"
35             time="\'"+str(now)+"\'"
36             r="\'"+str('ggg')+"\'"
37             #print(uid,uname,ulink,r,time)
38 
39             conn=pymysql.connect(host='192.168.191.1',user='root',passwd='123456789',db='data',port=3306,charset='utf8')
40             cur=conn.cursor()#获取一个游标
41             sql ='''INSERT INTO xxb(id,name,remark,link,time)VALUES(%s,%s,%s,%s,%s)'''%(uid,uname,r,ulink,time)
42 
43             cur.execute(sql)
44             cur.execute('select * from xxb')#执行查询sql语句+
45             data=cur.fetchall()#执行查询后获取的数据赋值给data变量,每次查到的数据是上一个sql语句的结果,如果要重新查询别的数据,则要在上面重新写个sql查询语句
46 
47             cur.close()#关闭游标
48             conn.commit()#事务提交
49             conn.close()#释放数据库资源
50 
51     def Data_processing(self):
52         a=[]
53         for i in  range(1,31):
54             a.append(str(i))
55         dt = list(zip(a,r,t))
56         self.Data_storage(dt)
57 
58 if __name__=="__main__":
59     gg = datas()
60     gg.Data_processing()

 

posted @ 2017-07-01 17:59  双鱼男-huangsh  阅读(1766)  评论(0编辑  收藏  举报