python爬虫笔记

 1 import urllib2
 2 response = urllib2.urlopen("http://www.baidu.com")
 3 html = response.read()
 4 
 5 #eg2
 6 import urllib2
 7 req = urllib2.Request("http://www.baidu.com")
 8 response = urllib2.urllib2(req)
 9 the_page = response.read()
10 
11 #eg3 POST传送数据
12 import urllib
13 import urllib2
14 
15 url = "http://www.msdn.com"
16 values={'name':'Xu',
17         'location':'YJ',
18         'language':'Python'}
19 
20 data = urllib.urlencode(values)
21 req = urllib2.Request(url,data) #发送请求,同时传送data表单
22 response = urllib2.urlopen(req) #接收数据
23 the_page = response.read()
24 
25 #eg4 GET传送数据
26 #
27 
28 #eg5 加入User-Agent
29 import urllib
30 import urllib2
31 
32 url = "http://www.msdn.com"
33 user_agent = 'Mozilla/4.0(compatible;MSIE 5.5;Windows NT)'
34 values={'name':'Xu',
35         'location':'YJ',
36         'language':'Python'}
37 
38 headers = {'User-Agent':user_agent}
39 data = urllib.urlencode(values)
40 req = urllib2.Request(url,data,headers) #发送请求,同时传送data表单和User-agent
41 response = urllib2.urlopen(req) #接收数据
42 the_page = response.read()
43 
44 #eg6捕获异常
45 try:
46     response = urllib2.urlopen(req) #接收数据
47 except urllib2.URLError,e:
48     print e.reason
49     print e.code    #404 or 500...
50 #way2
51 try:
52     response = urllib2.urlopen(req) #接收数据
53 except urllib2.HTTPError,e:
54     print e.code    #404 or 500...
55 except urllib2.URLError,e:
56     print e.reason
57 
58 #way3. we command to handle exception in this way
59 try:
60     response = urllib2.urlopen(req) #接收数据
61 except urllib2.URLError,e:
62     if hasattr(e,'code'):
63         print 'Error code:',e.code
64     elif hasattr(e,'reason'):
65         print 'Reason:',e.reason
66 
67 #eg7
68 from urllib2 import Request,urlopen,URLError,HTTPError
69 old_url = "http://www.baidu.com"
70 req = Request(old_url)
71 response = urlopen(req)
72 rel_url = response.geturl()
73 info = response.info()
74 
75 #eg8 cookie
76 import urllib2
77 import cookielib
78 cookie = cookielib.CookieJar()
79 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
80 response = opener.open("http://www.baidu.com")
81 for item in cookie:
82     print item.name,item.
83 
84 #eg9 正则表达式
85 import re
86 pattern = re.compile(r"hello")
87 match1 = pattern.match("hello world")
88 if match1:
89     print match1.group()
90 else:
91     print "match失败"

 

posted @ 2015-06-02 10:48  _level_  阅读(147)  评论(0编辑  收藏  举报