药->excel
1无日志
# @author: zhc # @Time: 2023/5/18 # @FileName: demo import re import pandas as pd import requests session = requests.Session() session.trust_env = False # 获取第一页名字写入excel中 略 def infosaa(pages): Ls = [] for page in range(1, int(pages) + 1): print(f"第{page}页") cookies = { 'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250', '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', 'cf_chl_2': '1ea719f0d331036', 'cf_chl_rc_i': '1', '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D', '_gat': '1', '_ga': 'GA1.1.1772772602.1682037506', '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0', '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11', '__hssrc': '1', '__hssc': '49600953.1.1684397557784', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } parms = { 'button': '', 'page': str(page), 'query': '*', 'searcher': 'bio_entities' } response = session.get( 'https://go.drugbank.com/unearth/q?', cookies=cookies, headers=headers, params=parms ) infos = response.text # print(infos) try: nameLs = re.findall('class="hit-link"><a href=".*?">(.*?)</a>', infos) if not nameLs: print("最大页数了",page) break for name in nameLs: dic = { "名字": name } print(dic) Ls.append(dic) except Exception: print("最大限度") break finally: pf = pd.DataFrame(Ls) # 转列表为DataFrame path = pd.ExcelWriter('aa.xlsx') # 设置保存路径 pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel path.save() # 保存 def infos1(pages): for page in range(1, int(pages) + 1): # for page in range(243, int(pages) + 1): print(f"第{page}页") cookies = { 'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250', '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', 'cf_chl_2': '1ea719f0d331036', 'cf_chl_rc_i': '1', '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D', '_gat': '1', '_ga': 'GA1.1.1772772602.1682037506', '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0', '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11', '__hssrc': '1', '__hssc': '49600953.1.1684397557784', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } parms = { 'button': '', 'page': str(page), 'query': '*', 'searcher': 'bio_entities' } response = session.get( 'https://go.drugbank.com/unearth/q?', cookies=cookies, headers=headers, params=parms ) infos = response.text # print(infos) try: urlLs = re.findall('class="hit-link"><a href="/bio_entities/(.*?)">(.*?)</a>', infos) if not urlLs: print("最大页数了",page) break # exit() for url1, url1_name in urlLs: # dic = { # "url1": url1 # } # print(dic) # Ls.append(dic) yield url1, url1_name except Exception: print("最大限度") break def infos2(pages): # BE0000001 re = uniprot/P45059"> for url_num, url_name in infos1(pages): # url_num = 'BE0000001' cookies = { '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', '__hssrc': '1', 'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250', '_ga': 'GA1.1.1772772602.1682037506', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12', '_omx_drug_bank_session': 'co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D', '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412424.0.0.0', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; _ga=GA1.1.1772772602.1682037506; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12; _omx_drug_bank_session=co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412424.0.0.0', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } response = session.get(f'https://go.drugbank.com/bio_entities/{url_num}', cookies=cookies, headers=headers) infos = response.text url_parms = re.findall(r'uniprot/(.*?)">', infos) for url_arm in url_parms: # print(url_arm) # yield url_arm yield url_num, url_arm, url_name def infos3(pages): Ls = [] for url_m, num, url_name in infos2(pages): # num = 'P45059' cookies = { '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', '__hssrc': '1', 'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13', '_gat': '1', '_omx_drug_bank_session': 'Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D', '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412956.0.0.0', '_ga': 'GA1.1.1772772602.1682037506', '_clsk': '13a49nl|1684412957117|3|1|z.clarity.ms/collect', '__hssc': '49600953.2.1684412518322', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13; _gat=1; _omx_drug_bank_session=Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412956.0.0.0; _ga=GA1.1.1772772602.1682037506; _clsk=13a49nl|1684412957117|3|1|z.clarity.ms/collect; __hssc=49600953.2.1684412518322', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } response = session.get(f'https://go.drugbank.com/polypeptides/{num}', cookies=cookies, headers=headers).text res_infosLs = re.findall('Gene Name</dt><dd class="col-xl-10 col-md-9 col-sm-8">(.*?)</dd>', response) try: for resu in res_infosLs: print(f"最终结果:{url_name}----->{url_m}------>{num}----->{resu}") dic = { "url_name": url_name, "url_m": url_m, "num": num, "resu": resu } # print(dic) Ls.append(dic) except: ... finally: pf = pd.DataFrame(Ls) # 转列表为DataFrame path = pd.ExcelWriter('result.xlsx') # 设置保存路径 pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel path.save() # 保存 if __name__ == '__main__': page = input("页数:") # infosaa(page) # infos1(page) # 取id : 如'BE0000001' # infos2() # 取参数:P45059 infos3(page)
2日志版本
# -*- coding: utf-8 -*- # @Author : zhc # @File : juck_yao.pyo import os.path import re import time import pandas as pd import requests from loguru import logger logger.add(sink='ces.log', encoding='utf-8', level='DEBUG') session = requests.Session() session.trust_env = False # 获取第一页名字写入excel中 略 def infosaa(pages): Ls = [] for page in range(1, int(pages) + 1): print(f"第{page}页") cookies = { 'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250', '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', 'cf_chl_2': '1ea719f0d331036', 'cf_chl_rc_i': '1', '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D', '_gat': '1', '_ga': 'GA1.1.1772772602.1682037506', '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0', '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11', '__hssrc': '1', '__hssc': '49600953.1.1684397557784', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } parms = { 'button': '', 'page': str(page), 'query': '*', 'searcher': 'bio_entities' } response = session.get( 'https://go.drugbank.com/unearth/q?', cookies=cookies, headers=headers, params=parms ) infos = response.text # print(infos) try: nameLs = re.findall('class="hit-link"><a href=".*?">(.*?)</a>', infos) if not nameLs: print("最大页数了", page) break for name in nameLs: dic = { "名字": name } print(dic) Ls.append(dic) except Exception: print("最大限度") break finally: pf = pd.DataFrame(Ls) # 转列表为DataFrame path = pd.ExcelWriter('aa.xlsx') # 设置保存路径 pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel path.save() # 保存 def infos1(pages): for page in range(1, int(pages) + 1): # for page in range(243, int(pages) + 1): # print(f"第{page}页") logger.debug(f"第{page}页") cookies = { 'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250', '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', 'cf_chl_2': '1ea719f0d331036', 'cf_chl_rc_i': '1', '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D', '_gat': '1', '_ga': 'GA1.1.1772772602.1682037506', '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0', '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11', '__hssrc': '1', '__hssc': '49600953.1.1684397557784', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } parms = { 'button': '', 'page': str(page), 'query': '*', 'searcher': 'bio_entities' } response = session.get( 'https://go.drugbank.com/unearth/q?', cookies=cookies, headers=headers, params=parms ) infos = response.text # print(infos) try: urlLs = re.findall('class="hit-link"><a href="/bio_entities/(.*?)">(.*?)</a>', infos) if not urlLs: print("最大页数了", page) break # exit() for url1, url1_name in urlLs: # dic = { # "url1": url1 # } # print(dic) # Ls.append(dic) yield url1, url1_name except Exception: # print("最大限度") logger.error("最大限度") break def infos2(pages): # BE0000001 re = uniprot/P45059"> for url_num, url_name in infos1(pages): # url_num = 'BE0000001' cookies = { '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', '__hssrc': '1', 'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250', '_ga': 'GA1.1.1772772602.1682037506', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12', '_omx_drug_bank_session': 'co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D', '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412424.0.0.0', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; _ga=GA1.1.1772772602.1682037506; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12; _omx_drug_bank_session=co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412424.0.0.0', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } response = session.get(f'https://go.drugbank.com/bio_entities/{url_num}', cookies=cookies, headers=headers) infos = response.text url_parms = re.findall(r'uniprot/(.*?)">', infos) for url_arm in url_parms: # print(url_arm) # yield url_arm yield url_num, url_arm, url_name def infos3(pages): Ls = [] for url_m, num, url_name in infos2(pages): # num = 'P45059' cookies = { '_gcl_au': '1.1.154779136.1682037508', 'hubspotutk': '15b5c265b1847afab42a7def948ef734', '_gid': 'GA1.2.695314151.1684390995', '_clck': 'qmfr9z|2|fbp|0|1233', 'ln_or': 'eyIyNDI4NDg0IjoiZCJ9', '__hssrc': '1', 'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250', '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13', '_gat': '1', '_omx_drug_bank_session': 'Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D', '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412956.0.0.0', '_ga': 'GA1.1.1772772602.1682037506', '_clsk': '13a49nl|1684412957117|3|1|z.clarity.ms/collect', '__hssc': '49600953.2.1684412518322', } headers = { 'authority': 'go.drugbank.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13; _gat=1; _omx_drug_bank_session=Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412956.0.0.0; _ga=GA1.1.1772772602.1682037506; _clsk=13a49nl|1684412957117|3|1|z.clarity.ms/collect; __hssc=49600953.2.1684412518322', 'pragma': 'no-cache', 'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', } response = session.get(f'https://go.drugbank.com/polypeptides/{num}', cookies=cookies, headers=headers).text res_infosLs = re.findall('Gene Name</dt><dd class="col-xl-10 col-md-9 col-sm-8">(.*?)</dd>', response) try: for resu in res_infosLs: # print(f"最终结果:{url_name}----->{url_m}------>{num}----->{resu}") dic = { "url_name": url_name, "url_m": url_m, "num": num, "resu": resu } logger.info(dic) # print(dic) Ls.append(dic) except: ... finally: pf = pd.DataFrame(Ls) # 转列表为DataFrame path = pd.ExcelWriter('result.xlsx') # 设置保存路径 pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel path.save() # 保存 def run(): print(""" _____ _ Author: 十架bgm __ _________ ___ ___ _____________________________________________ \_ ___ \ / | \ / _ \__ ___/ _____/\______ \__ ___/ / \ \// ~ \/ /_\ \| | / \ ___ | ___/ | | \ \___\ Y / | \ | \ \_\ \| | | | \______ /\___|_ /\____|__ /____| \______ /|____| |____| \/ \/ \/ \/ version=1.1 """) if __name__ == '__main__': run() page = input("页数:") # infosaa(page) # infos1(page) # 取id : 如'BE0000001' # infos2() # 取参数:P45059 logger.debug("采集开始行动,开始计时间") start_time = time.time() logger.debug(f'启动模块{os.path.basename(os.path.abspath(__file__))}') infos3(page) end_time = time.time() logger.warning(f"计时结束,运行{end_time - start_time}秒") logger.warning("采集结束,已保存")
部分结果
本文来自博客园,作者:__username,转载请注明原文链接:https://www.cnblogs.com/code3/p/17413499.html
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步