4/23学习总结
今天学会了爬虫,昨天只能得到一行一列,今天可以得到所有分页的内容
遇到的困难:今天想做一个轮播图,还没研究明白
import bs4 import pymysql import requests from bs4 import BeautifulSoup import pandas as pd def download_all_htmls(): htmls=[]; for idx in range(2): url=f"https://yz.chsi.com.cn/sch/?start={idx*20}" print("craw html",url) r=requests.get(url) if r.status_code!=200: raise Exception("error") htmls.append(r.text) return htmls htmls=download_all_htmls() def parse_single_html(html): name1=[]; soup=BeautifulSoup(html,'html.parser') tbody=soup.find('tbody') print("1") if isinstance(tbody, bs4.element.Tag): for tr in tbody.find_all("tr"): a=tr.find("td").find("a") name1.append(a.getText().replace(' ', '')) print(name1) return name1 ''' for idex in range(2): name2 = parse_single_html(htmls[idex]) name=tbody.find('tr').find('td').find('a') ''' conn = pymysql.connect( host='localhost', # 我的IP地址 port=3306, # 不是字符串不需要加引号。 user='root', password='******', db='yanhu', charset='utf8' ) cursor = conn.cursor() # 获取一个光标 for idex in range(2): name = parse_single_html(htmls[idex]) for a in range(20): sql = 'insert into school (name,fenshu,bili) values (%s,%s,%s);' name3=name[a] print(name3) fenshu=420 bili=0.2 cursor.execute(sql, [name3,fenshu,bili]) print() conn.commit() cursor.close() conn.close()