Python学习笔记_爬虫数据存储为xlsx格式的方法

import requests
from bs4 import BeautifulSoup
import openpyxl

wb=openpyxl.Workbook() 
sheet=wb.active
sheet.title='product1' 

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}

for x in range(20):
    res = requests.get("https://promiseedental.en.made-in-china.com/product-list-"+str(x)+".html",headers=headers,timeout=120)#原始地址
    soup = BeautifulSoup(res.text,"html.parser")
    item_all = soup.find_all('div',class_="prod-image")#获取原始地址中每个产品的详情链接

    for item1 in item_all:
        try:
            url_p = item1.find("a")["href"]#详情链接
            res1 = requests.get(url_p)
            soup1 = BeautifulSoup(res1.text,"html.parser")
            item2 = soup1.find_all('div',class_="swiper-wrapper")

            for item3 in item2:
                item3_str = str(item3)
                lst = item3_str.split("<")
                sheet.append(lst)
                print("ok")
              
        except Exception as e:
             print('---->', e)


wb.save('product.xlsx')

 

posted @ 2020-11-23 00:05  止一  阅读(414)  评论(0编辑  收藏  举报