关于alzheimer disease论文的统计

1.获取2016年的所有关键字,保存到keyword_2016.json中

import pymysql
import json

conn= pymysql.connect(
        host='localhost',
        port = 3306,
        user='root',
        passwd='',
        db ='python',
        )
cursor = conn.cursor()

sql = "SELECT union_kwd_str,pmc_id FROM alzheimer where pub_year = '2016' && union_kwd_str != '' "
a = cursor.execute(sql)
print a
b = cursor.fetchmany(a)  #b has 7887 abstract list

abstract_list = []
pmc_id_dict= {}

for j in range(a):
    abstract_list.append(b[j][0])
    pmc_id_dict[j] = b[j][1]



def output_to_json(data,filename):
    with open(filename,'w') as file:
        file.write(json.dumps(data))
        file.close()
    return json.dumps(data)

output_data = {
        'pub_year': "2016",
        'count': a,
        'keyword': abstract_list
    }
output_to_json(output_data, 'keyword_2016.json')

从keyword_2016。json中读取关键词,并统计选出前25的关键词

import re  
import collections  
import json

def input_from_json(filename):
    with open(filename,'r') as file:
        data = json.loads(file.read())
        file.close()
        return data

def count_word(path):  
    result = {}
    keyword_list = input_from_json(path)['keyword']  
    for all_the_text in keyword_list:
        for word in all_the_text.split(','): 
            if word not in result:  
                result[word] = 0  
            result[word] += 1                 
    return result
  
      

 
def sort_by_count(d):  

    d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
    return d  

 
if __name__ == '__main__':  
    file_name = "keyword_2016.json"  
    fobj2 = open('sort_keyword_2016.json','w')
 
    dword = count_word(file_name)  
    dword = sort_by_count(dword)  
      
    jsonlist = []
    num = 0

    for key,value in dword.items():
        num += 1
        key = re.sub("_", " ", key)
        data = {
        'name': key,
        'value': value
        }
        json_data = json.dumps(data)

        if num < 25:
            fobj2.write(json_data)
            fobj2.write(',')
        if num == 25:
            fobj2.write(json_data)
        

  

2.获取发表论文量排名前十的国家

1)把所有第一作者的信息保存到authorinfor.json中

import pymysql
import json

conn= pymysql.connect(
        host='localhost',
        port = 3306,
        user='root',
        passwd='',
        db ='python',
        )
cursor = conn.cursor()

sql = "SELECT authorinfor,pmc_id FROM alzheimer WHERE authorinfor != ''"
a = cursor.execute(sql)
print a
b = cursor.fetchmany(a)  #b has 7887 abstract list

authorinfor_list = []
pmc_id_dict= {}

for j in range(a):
    authorinfor_list.append(b[j][0])
    pmc_id_dict[j] = b[j][1]

def output_to_json(data,filename):
    with open(filename,'w') as file:
        file.write(json.dumps(data))
        file.close()
    return json.dumps(data)

output_data = {
        'pub_year': "2016",
        'count': a,
        'authorinfor': authorinfor_list,
        'pmc_id': pmc_id_dict
    }
output_to_json(output_data, 'authorinfor.json')

2)选出排名前十的国家

import re  
import collections  
import json

def input_from_json(filename):
    with open(filename,'r') as file:
        data = json.loads(file.read())
        file.close()
        return data

def count_word(path):  
    result = {}
    authorinfor_list = input_from_json(path)['authorinfor']  
    for all_the_text in authorinfor_list:
        country = all_the_text.split(',')[-1]
        country = re.sub("\.","",country)
        country = re.sub("\\n","",country)
        country = country.encode('utf-8')

        if country not in result:
            result[country] = 0
        result[country] += 1                
    return result 
      
 
def sort_by_count(d):  

    d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
    return d  

 
if __name__ == '__main__':  
    file_name = "authorinfor.json"  
    fobj2 = open('sort_country.json','w')
 
    dword = count_word(file_name)  
    dword = sort_by_count(dword)  
      
    jsonlist = []
    num = 0

    for country,value in dword.items():
        num += 1
        data = {
        'name': country,
        'value': value
        }
        json_data = json.dumps(data)

        if num < 50:
            fobj2.write(json_data)
            fobj2.write('\n')

    countrylist = dword.keys()
    valuelist = dword.values()

    print countrylist[:11]
    print valuelist[:11]

 

posted @ 2016-12-14 12:31  zdmlcmepl  阅读(311)  评论(0编辑  收藏  举报