get_data_use_notbom 自定义外部数据自动写入
import urllib.request; from pandas import DataFrame; from pandas import Series; from bs4 import BeautifulSoup; import pandas as pd import chardet file_name = "2222-11.txt" #file_name = "2222.txt" file_path = 'file:///F:/python/untitled1/core/do_data/save2/' response = urllib.request.urlopen(file_path + file_name) html = response.read(); #result = chardet.detect(html) # 检测文件内容 #print(result) #print(html) soup = BeautifulSoup(html,"html.parser") trs = soup.find_all('tr') ths = trs[0].find_all('th'); index_d = [] for th in ths: #print(th.getText) index_d.append(th.getText()) data = DataFrame(columns=index_d) print(index_d) for tr in trs : tds = tr.find_all('td') td_datas = [] for td in tds: td_datas.append(td.getText()) if len(td_datas) != 0: data=data.append( Series( td_datas, index=index_d ), ignore_index=True ) print(len(data)) str2s = [] for i in range(len(data["股票全码"])): str2 = data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i] str2s.append(str2) data["new"] = str2s data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False) print(len(data)) ofile = "extern_user.txt" def gb_trans_utf8(file_path): with open(file_path, 'r', encoding='gb18030') as f: content = f.read() #print(content) with open("utf"+file_path, 'w', encoding='utf-8') as f: f.write(content) def utf8_trans_gb(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() #print(content) with open(file_path, 'w', encoding='gb18030') as f: f.write(content) gb_trans_utf8(ofile) new_data = pd.read_table("utf"+ofile,header= None,sep="|",encoding="utf-8",dtype=str) new_data = new_data.iloc[:,0:4] new_data = new_data.dropna() new_data.columns.name = ["a","b","c","d"] new_data.columns = ["a","b","c","d"] data = data.reset_index(drop=True) #data = data.reindex(range(len(data))) #print(data.iloc[:]) for i in range(len(data)): #for i in range(10): #print(i) #print(data.loc[i,"股票代码"]) #print("haham") d_code = str(data.loc[i,"股票代码"]) #new_data.loc[((new_data["b"] == d_code) & (new_data["c"] == "31")),"d"]=data.loc[i,"new"] flag = '' if d_code[0] == "6": flag = "1" else: flag = "0" row=[flag,d_code,"31",data.loc[i,"new"]] #print(i) print(row) #print(new_data.iloc[:]) new_data = new_data.append( Series( row, index=new_data.columns ), ignore_index = True ) #print("haha") new_data = new_data.drop_duplicates(subset=["b","c"],keep='last',inplace=False) new_data["c"]=new_data["c"].astype(int) new_data = new_data.sort_values(by=["c","b"] , ascending=(True,True)) print(new_data.columns) new_data=new_data.reset_index(drop=True) new_data["e"]="0.00" new_data.to_csv('save/extern_user.txt', sep='|', index=False,header=None,) utf8_trans_gb('save/extern_user.txt')