爬虫之一:爬补天厂商数据(爬虫)

 

#coding:utf-8
import re,urllib

def gethtml(url):
  page = urllib.urlopen(url)
  html=page.read()
  return html

def getlink(html):

  link = re.findall(r'<td  align="left" style="padding-left:20px;">(.*?)</td>',html)
  #linklist = re.findall(link,html)
  return link

def save(links):
  f=open('360.txt','a')
  for i in links:
    f.write(i+"\n")
    #f.close()
    #print 'ok'  

for page in range(11, 200):
  url = "https://butian.360.cn/company/lists/page/" +str(page)
  html = gethtml(url)
  print str(page)+"ye"
  links = getlink(html)
  print links
  save(links)
  

  

posted @ 2016-01-29 16:02  anything good  阅读(514)  评论(0编辑  收藏  举报
孤 's 博客