下载pubmed数据

 1 import requests
 2 import json
 3 
 4 search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&mindate=1800/01/01&maxdate=2016/12/31&usehistory=y&retmode=json"
 5 search_r = requests.post(search_url)
 6 search_data = search_r.json()
 7 webenv = search_data["esearchresult"]['webenv']
 8 total_records = int(search_data["esearchresult"]['count'])
 9 fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmax=9999&query_key=1&webenv="+webenv
10 
11 for i in range(0, total_records, 10000):
12     this_fetch = fetch_url+"&retstart="+str(i)
13     print("Getting this URL: "+this_fetch)
14     fetch_r = requests.post(this_fetch)
15     f = open('pubmed_batch_'+str(i)+'_to_'+str(i+9999)+".json", 'w')
16     f.write(fetch_r.text)
17     f.close()
18 
19 print("Number of records found :"+str(total_records))

目前有2641273个记录 大约134G 文献摘要 摘要处理查看

posted @ 2017-11-30 13:14  WangLC  阅读(460)  评论(0编辑  收藏  举报