日日行,不怕千万里;常常做,不怕千万事|

__username

园龄:2年5个月粉丝:12关注:2

📂python
🔖Python
2023-05-18 22:26阅读: 63评论: 0推荐: 0

药->excel

1无日志

# @author: zhc
# @Time: 2023/5/18
# @FileName: demo
import re
import pandas as pd
import requests
session = requests.Session()
session.trust_env = False
# 获取第一页名字写入excel中 略
def infosaa(pages):
Ls = []
for page in range(1, int(pages) + 1):
print(f"第{page}页")
cookies = {
'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'cf_chl_2': '1ea719f0d331036',
'cf_chl_rc_i': '1',
'_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
'_gat': '1',
'_ga': 'GA1.1.1772772602.1682037506',
'_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
'_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
'__hssrc': '1',
'__hssc': '49600953.1.1684397557784',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
parms = {
'button': '',
'page': str(page),
'query': '*',
'searcher': 'bio_entities'
}
response = session.get(
'https://go.drugbank.com/unearth/q?',
cookies=cookies,
headers=headers,
params=parms
)
infos = response.text
# print(infos)
try:
nameLs = re.findall('class="hit-link"><a href=".*?">(.*?)</a>', infos)
if not nameLs:
print("最大页数了",page)
break
for name in nameLs:
dic = {
"名字": name
}
print(dic)
Ls.append(dic)
except Exception:
print("最大限度")
break
finally:
pf = pd.DataFrame(Ls) # 转列表为DataFrame
path = pd.ExcelWriter('aa.xlsx') # 设置保存路径
pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel
path.save() # 保存
def infos1(pages):
for page in range(1, int(pages) + 1):
# for page in range(243, int(pages) + 1):
print(f"第{page}页")
cookies = {
'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'cf_chl_2': '1ea719f0d331036',
'cf_chl_rc_i': '1',
'_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
'_gat': '1',
'_ga': 'GA1.1.1772772602.1682037506',
'_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
'_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
'__hssrc': '1',
'__hssc': '49600953.1.1684397557784',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
parms = {
'button': '',
'page': str(page),
'query': '*',
'searcher': 'bio_entities'
}
response = session.get(
'https://go.drugbank.com/unearth/q?',
cookies=cookies,
headers=headers,
params=parms
)
infos = response.text
# print(infos)
try:
urlLs = re.findall('class="hit-link"><a href="/bio_entities/(.*?)">(.*?)</a>', infos)
if not urlLs:
print("最大页数了",page)
break
# exit()
for url1, url1_name in urlLs:
# dic = {
# "url1": url1
# }
# print(dic)
# Ls.append(dic)
yield url1, url1_name
except Exception:
print("最大限度")
break
def infos2(pages):
# BE0000001 re = uniprot/P45059">
for url_num, url_name in infos1(pages):
# url_num = 'BE0000001'
cookies = {
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'__hssrc': '1',
'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
'_ga': 'GA1.1.1772772602.1682037506',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12',
'_omx_drug_bank_session': 'co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D',
'_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412424.0.0.0',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; _ga=GA1.1.1772772602.1682037506; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12; _omx_drug_bank_session=co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412424.0.0.0',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
response = session.get(f'https://go.drugbank.com/bio_entities/{url_num}', cookies=cookies, headers=headers)
infos = response.text
url_parms = re.findall(r'uniprot/(.*?)">', infos)
for url_arm in url_parms:
# print(url_arm)
# yield url_arm
yield url_num, url_arm, url_name
def infos3(pages):
Ls = []
for url_m, num, url_name in infos2(pages):
# num = 'P45059'
cookies = {
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'__hssrc': '1',
'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13',
'_gat': '1',
'_omx_drug_bank_session': 'Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D',
'_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412956.0.0.0',
'_ga': 'GA1.1.1772772602.1682037506',
'_clsk': '13a49nl|1684412957117|3|1|z.clarity.ms/collect',
'__hssc': '49600953.2.1684412518322',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13; _gat=1; _omx_drug_bank_session=Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412956.0.0.0; _ga=GA1.1.1772772602.1682037506; _clsk=13a49nl|1684412957117|3|1|z.clarity.ms/collect; __hssc=49600953.2.1684412518322',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
response = session.get(f'https://go.drugbank.com/polypeptides/{num}', cookies=cookies, headers=headers).text
res_infosLs = re.findall('Gene Name</dt><dd class="col-xl-10 col-md-9 col-sm-8">(.*?)</dd>', response)
try:
for resu in res_infosLs:
print(f"最终结果:{url_name}----->{url_m}------>{num}----->{resu}")
dic = {
"url_name": url_name,
"url_m": url_m,
"num": num,
"resu": resu
}
# print(dic)
Ls.append(dic)
except:
...
finally:
pf = pd.DataFrame(Ls) # 转列表为DataFrame
path = pd.ExcelWriter('result.xlsx') # 设置保存路径
pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel
path.save() # 保存
if __name__ == '__main__':
page = input("页数:")
# infosaa(page)
# infos1(page) # 取id : 如'BE0000001'
# infos2() # 取参数:P45059
infos3(page)

2日志版本

# -*- coding: utf-8 -*-
# @Author : zhc
# @File : juck_yao.pyo
import os.path
import re
import time
import pandas as pd
import requests
from loguru import logger
logger.add(sink='ces.log', encoding='utf-8', level='DEBUG')
session = requests.Session()
session.trust_env = False
# 获取第一页名字写入excel中 略
def infosaa(pages):
Ls = []
for page in range(1, int(pages) + 1):
print(f"第{page}页")
cookies = {
'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'cf_chl_2': '1ea719f0d331036',
'cf_chl_rc_i': '1',
'_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
'_gat': '1',
'_ga': 'GA1.1.1772772602.1682037506',
'_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
'_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
'__hssrc': '1',
'__hssc': '49600953.1.1684397557784',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
parms = {
'button': '',
'page': str(page),
'query': '*',
'searcher': 'bio_entities'
}
response = session.get(
'https://go.drugbank.com/unearth/q?',
cookies=cookies,
headers=headers,
params=parms
)
infos = response.text
# print(infos)
try:
nameLs = re.findall('class="hit-link"><a href=".*?">(.*?)</a>', infos)
if not nameLs:
print("最大页数了", page)
break
for name in nameLs:
dic = {
"名字": name
}
print(dic)
Ls.append(dic)
except Exception:
print("最大限度")
break
finally:
pf = pd.DataFrame(Ls) # 转列表为DataFrame
path = pd.ExcelWriter('aa.xlsx') # 设置保存路径
pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel
path.save() # 保存
def infos1(pages):
for page in range(1, int(pages) + 1):
# for page in range(243, int(pages) + 1):
# print(f"第{page}页")
logger.debug(f"第{page}页")
cookies = {
'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'cf_chl_2': '1ea719f0d331036',
'cf_chl_rc_i': '1',
'_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
'_gat': '1',
'_ga': 'GA1.1.1772772602.1682037506',
'_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
'_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
'__hssrc': '1',
'__hssc': '49600953.1.1684397557784',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
parms = {
'button': '',
'page': str(page),
'query': '*',
'searcher': 'bio_entities'
}
response = session.get(
'https://go.drugbank.com/unearth/q?',
cookies=cookies,
headers=headers,
params=parms
)
infos = response.text
# print(infos)
try:
urlLs = re.findall('class="hit-link"><a href="/bio_entities/(.*?)">(.*?)</a>', infos)
if not urlLs:
print("最大页数了", page)
break
# exit()
for url1, url1_name in urlLs:
# dic = {
# "url1": url1
# }
# print(dic)
# Ls.append(dic)
yield url1, url1_name
except Exception:
# print("最大限度")
logger.error("最大限度")
break
def infos2(pages):
# BE0000001 re = uniprot/P45059">
for url_num, url_name in infos1(pages):
# url_num = 'BE0000001'
cookies = {
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'__hssrc': '1',
'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
'_ga': 'GA1.1.1772772602.1682037506',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12',
'_omx_drug_bank_session': 'co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D',
'_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412424.0.0.0',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; _ga=GA1.1.1772772602.1682037506; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12; _omx_drug_bank_session=co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412424.0.0.0',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
response = session.get(f'https://go.drugbank.com/bio_entities/{url_num}', cookies=cookies, headers=headers)
infos = response.text
url_parms = re.findall(r'uniprot/(.*?)">', infos)
for url_arm in url_parms:
# print(url_arm)
# yield url_arm
yield url_num, url_arm, url_name
def infos3(pages):
Ls = []
for url_m, num, url_name in infos2(pages):
# num = 'P45059'
cookies = {
'_gcl_au': '1.1.154779136.1682037508',
'hubspotutk': '15b5c265b1847afab42a7def948ef734',
'_gid': 'GA1.2.695314151.1684390995',
'_clck': 'qmfr9z|2|fbp|0|1233',
'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
'__hssrc': '1',
'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
'__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13',
'_gat': '1',
'_omx_drug_bank_session': 'Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D',
'_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412956.0.0.0',
'_ga': 'GA1.1.1772772602.1682037506',
'_clsk': '13a49nl|1684412957117|3|1|z.clarity.ms/collect',
'__hssc': '49600953.2.1684412518322',
}
headers = {
'authority': 'go.drugbank.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
# 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13; _gat=1; _omx_drug_bank_session=Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412956.0.0.0; _ga=GA1.1.1772772602.1682037506; _clsk=13a49nl|1684412957117|3|1|z.clarity.ms/collect; __hssc=49600953.2.1684412518322',
'pragma': 'no-cache',
'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
}
response = session.get(f'https://go.drugbank.com/polypeptides/{num}', cookies=cookies, headers=headers).text
res_infosLs = re.findall('Gene Name</dt><dd class="col-xl-10 col-md-9 col-sm-8">(.*?)</dd>', response)
try:
for resu in res_infosLs:
# print(f"最终结果:{url_name}----->{url_m}------>{num}----->{resu}")
dic = {
"url_name": url_name,
"url_m": url_m,
"num": num,
"resu": resu
}
logger.info(dic)
# print(dic)
Ls.append(dic)
except:
...
finally:
pf = pd.DataFrame(Ls) # 转列表为DataFrame
path = pd.ExcelWriter('result.xlsx') # 设置保存路径
pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel
path.save() # 保存
def run():
print("""
_____ _ Author: 十架bgm __
_________ ___ ___ _____________________________________________
\_ ___ \ / | \ / _ \__ ___/ _____/\______ \__ ___/
/ \ \// ~ \/ /_\ \| | / \ ___ | ___/ | |
\ \___\ Y / | \ | \ \_\ \| | | |
\______ /\___|_ /\____|__ /____| \______ /|____| |____|
\/ \/ \/ \/ version=1.1
""")
if __name__ == '__main__':
run()
page = input("页数:")
# infosaa(page)
# infos1(page) # 取id : 如'BE0000001'
# infos2() # 取参数:P45059
logger.debug("采集开始行动,开始计时间")
start_time = time.time()
logger.debug(f'启动模块{os.path.basename(os.path.abspath(__file__))}')
infos3(page)
end_time = time.time()
logger.warning(f"计时结束,运行{end_time - start_time}秒")
logger.warning("采集结束,已保存")

部分结果

posted @   __username  阅读(63)  评论(0编辑  收藏  举报

本文作者:DIVMonster

本文链接:https://www.cnblogs.com/guangzan/p/12886111.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。

点击右上角即可分享
微信分享提示
评论
收藏
关注
推荐
深色
回顶
收起