初练pandas实现数据处理
import urllib.request; from pandas import DataFrame; from pandas import Series; from bs4 import BeautifulSoup; response = urllib.request.urlopen('file:///F:/python/untitled1/core/do_data/2month.html'); html = response.read(); soup = BeautifulSoup(html,"html.parser") trs = soup.find_all('tr') ths = trs[0].find_all('th'); index_d = [] for th in ths: index_d.append(th.getText()) data = DataFrame(columns=index_d) print(index_d) for tr in trs : tds = tr.find_all('td') td_datas = [] for td in tds: td_datas.append(td.getText()) if len(td_datas) != 0: data=data.append( Series( td_datas, index=index_d ), ignore_index=True ) print(len(data)) str2s = [] for i in range(len(data["股票全码"])): str2 =str(data["股票全码"][i]) str2 = str2.replace("SZ","0|") str2 = str2.replace("SH","1|") str2 = str2 + "|" + data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i] str2s.append(str2) data["new"] = str2s data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False) print(len(data)) df2 = data["new"].values #print(type(df2)) file = open('data.txt', 'w') file.writelines("\n".join(df2)); file.close()