下载pubmed数据
1 import requests 2 import json 3 4 search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&mindate=1800/01/01&maxdate=2016/12/31&usehistory=y&retmode=json" 5 search_r = requests.post(search_url) 6 search_data = search_r.json() 7 webenv = search_data["esearchresult"]['webenv'] 8 total_records = int(search_data["esearchresult"]['count']) 9 fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmax=9999&query_key=1&webenv="+webenv 10 11 for i in range(0, total_records, 10000): 12 this_fetch = fetch_url+"&retstart="+str(i) 13 print("Getting this URL: "+this_fetch) 14 fetch_r = requests.post(this_fetch) 15 f = open('pubmed_batch_'+str(i)+'_to_'+str(i+9999)+".json", 'w') 16 f.write(fetch_r.text) 17 f.close() 18 19 print("Number of records found :"+str(total_records))
目前有2641273个记录 大约134G 文献摘要 摘要处理查看