Python 对Twitter tweet的元素 (Word, Screen Name, Hash Tag)的词汇多样性分析

CODE:

#!/usr/bin/python 
# -*- coding: utf-8 -*-

'''
Created on 2014-7-3
@author: guaguastd
@name: tweet_lexical_diversity.py
'''
    
if __name__ == '__main__':

    # import login, see http://blog.csdn.net/guaguastd/article/details/31706155 
    from login import twitter_login

    # get the twitter access api
    twitter_api = twitter_login()
    
    # import tweet
    from tweet import extract_tweet_entities
    
    # import search
    from search import search_for_tweet
    
    # import lexical_diversity
    from lexical_diversity import lexical_diversity,average_words

    while 1:
        query = raw_input('\nInput the query (eg. #MentionSomeoneImportantForYou, exit to quit): ')
        
        if query == 'exit':
            print 'Successfully exit!'
            break
        
        statuses = search_for_tweet(twitter_api, query)
        status_texts,screen_names,hashtags,words = extract_tweet_entities(statuses)  
        
        for token in (words, screen_names, hashtags):
            print '\rLexical diversity of %s: ' % token
            print lexical_diversity(token)

        for status in (status_texts,):
            print '\rAverage words of %s: ' % status
            print average_words(status)

RESULT:

Input the query (eg. #MentionSomeoneImportantForYou, exit to quit): #MentionSomeoneImportantForYou
Length of statuses 30

Lexical diversity of [u'RT', u'@xmlovex:', u'#MentionSomeoneImportantForYou', u'@purpledrauhl_23', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@MissRosaa_', u'#MentionSomeoneImportantForYou', u'@justinbieber', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@_K_L_O_"', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@_K_L_O_', u'\u201c@0hDearPriscii:', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii"', u'aww', u'ily\U0001f618\U0001f46f\u201dily2\u2764\ufe0f', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii"', u'aww', u'ily\U0001f618\U0001f46f', u'#MentionSomeoneImportantForYou', u'@', u'my', u'brotherrrr', u'http://t.co/LprqvaLvyu', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@BeyonceTapia', u'\U0001f498', u'RT', u'@thuggie_salma:', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@thuggie_salma"', u'baeee', u'\U0001f618\U0001f60f\U0001f62d', u'#MentionSomeoneImportantForYou', u'@BeyonceTapia', u'\U0001f498', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@thuggie_salma"', u'baeee', u'\U0001f618\U0001f60f\U0001f62d', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@thuggie_salma', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@NotNormal_Javi', u'#MentionSomeoneImportantForYou', u'@NotNormal_Javi', u'#MentionSomeoneImportantForYou', u'@thuggie_salma', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@EbbsContreras', u'RT', u'@sashaalexxa_:', u'#MentionSomeoneImportantForYou', u'@', u'#MentionSomeoneImportantForYou', u'@EbbsContreras', u'RT', u'@NotNormal_Javi:', u'#MentionSomeoneImportantForYou', u'cheeseburgers', u'\U0001f354\U0001f354', u'#MentionSomeoneImportantForYou', u'@TaeTae2Beast', u'#MentionSomeoneImportantForYou', u'@', u'#MentionSomeoneImportantForYou', u'@Brendaaa23', u'#MentionSomeoneImportantForYou', u'cheeseburgers', u'\U0001f354\U0001f354', u'#MentionSomeoneImportantForYou', u'@_K_L_O_', u'#MentionSomeoneImportantForYou', u'@MissRosaa_', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii', u'@LoveASharie', u'@DJZeeti', u'Speechless', u'beauty', u'and', u'Pretty', u'smile', u'.#WomanCrushWednesday', u'#MentionSomeoneImportantForYou', u'#TeamSharie', u'@louiswonderwall', u'my', u'babeeeee\U0001f60d\U0001f60d\U0001f60d\U0001f60d\U0001f60d', u'#MentionSomeoneImportantForYou']: 
0.407079646018

Lexical diversity of [u'xmlovex', u'KillahPimpp', u'MissRosaa_', u'justinbieber', u'KillahPimpp', u'_K_L_O_', u'KillahPimpp', u'_K_L_O_', u'0hDearPriscii', u'KillahPimpp', u'0hDearPriscii', u'KillahPimpp', u'0hDearPriscii', u'KillahPimpp', u'0hDearPriscii', u'KillahPimpp', u'BeyonceTapia', u'thuggie_salma', u'KillahPimpp', u'thuggie_salma', u'BeyonceTapia', u'KillahPimpp', u'thuggie_salma', u'KillahPimpp', u'thuggie_salma', u'KillahPimpp', u'NotNormal_Javi', u'NotNormal_Javi', u'thuggie_salma', u'KillahPimpp', u'EbbsContreras', u'sashaalexxa_', u'EbbsContreras', u'NotNormal_Javi', u'TaeTae2Beast', u'Brendaaa23', u'_K_L_O_', u'MissRosaa_', u'0hDearPriscii', u'LoveASharie', u'DJZeeti', u'louiswonderwall']: 
0.380952380952

Lexical diversity of [u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'WomanCrushWednesday', u'MentionSomeoneImportantForYou', u'TeamSharie', u'MentionSomeoneImportantForYou']: 
0.09375

Average words of [u'RT @xmlovex: #MentionSomeoneImportantForYou @purpledrauhl_23', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @MissRosaa_', u'#MentionSomeoneImportantForYou @justinbieber', u'"@KillahPimpp: #MentionSomeoneImportantForYou @_K_L_O_"', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @_K_L_O_', u'\u201c@0hDearPriscii: "@KillahPimpp: #MentionSomeoneImportantForYou @0hDearPriscii" aww ily\U0001f618\U0001f46f\u201dily2\u2764\ufe0f', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @0hDearPriscii', u'"@KillahPimpp: #MentionSomeoneImportantForYou @0hDearPriscii" aww ily\U0001f618\U0001f46f', u'#MentionSomeoneImportantForYou @ my brotherrrr http://t.co/LprqvaLvyu', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @BeyonceTapia \U0001f498', u'RT @thuggie_salma: "@KillahPimpp: #MentionSomeoneImportantForYou @thuggie_salma" baeee \U0001f618\U0001f60f\U0001f62d', u'#MentionSomeoneImportantForYou @BeyonceTapia \U0001f498', u'"@KillahPimpp: #MentionSomeoneImportantForYou @thuggie_salma" baeee \U0001f618\U0001f60f\U0001f62d', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @thuggie_salma', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @NotNormal_Javi', u'#MentionSomeoneImportantForYou @NotNormal_Javi', u'#MentionSomeoneImportantForYou @thuggie_salma', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @EbbsContreras', u'RT @sashaalexxa_: #MentionSomeoneImportantForYou @', u'#MentionSomeoneImportantForYou @EbbsContreras', u'RT @NotNormal_Javi: #MentionSomeoneImportantForYou cheeseburgers \U0001f354\U0001f354', u'#MentionSomeoneImportantForYou @TaeTae2Beast', u'#MentionSomeoneImportantForYou @', u'#MentionSomeoneImportantForYou @Brendaaa23', u'#MentionSomeoneImportantForYou cheeseburgers \U0001f354\U0001f354', u'#MentionSomeoneImportantForYou @_K_L_O_', u'#MentionSomeoneImportantForYou @MissRosaa_', u'#MentionSomeoneImportantForYou @0hDearPriscii', u'@LoveASharie @DJZeeti Speechless beauty  and Pretty smile .#WomanCrushWednesday  #MentionSomeoneImportantForYou  #TeamSharie', u'@louiswonderwall my babeeeee\U0001f60d\U0001f60d\U0001f60d\U0001f60d\U0001f60d #MentionSomeoneImportantForYou']: 
3.76666666667

Input the query (eg. #MentionSomeoneImportantForYou, exit to quit): 

posted on 2014-11-03 13:35  gcczhongduan  阅读(477)  评论(0编辑  收藏  举报