【449】backup get weekly tweets
1 2 3 4 5 6 7 8 9 10 11 12 13 | import pandas as pd from datetime import datetime fn = r "D:\OneDrive - UNSW\tweets_flu.csv" df = pd.read_csv(fn) for i in range ( len (df)): t = df.iloc[i][ 'created_at' ] w = datetime.strptime(t, "%Y-%m-%d %H:%M:%S" ).strftime( "%W" ) ws.append(w) ws = [] df[ 'ws' ] = ws df[ 'ws' ].value_counts() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | import pandas as pd from datetime import datetime fn = r "D:\OneDrive - UNSW\tweets_flu.csv" df = pd.read_csv(fn) for i in range ( len (df)): t = df.iloc[i][ 'created_at' ] w = datetime.strptime(t, "%Y-%m-%d %H:%M:%S" ).strftime( "%W" ) ws.append(w) ws = [] df[ 'ws' ] = ws df[ 'ws' ].value_counts() wss = [] for i in a.index: wss.append((i, a[i])) sorted (wss, key = lambda x:x[ 0 ]) [( '12' , 56 ), ( '13' , 22 ), ( '14' , 41 ), ( '15' , 52 ), ( '16' , 25 ), ( '17' , 45 ), ( '18' , 63 ), ( '19' , 54 ), ( '20' , 51 ), ( '21' , 143 ), ( '22' , 77 ), ( '23' , 53 ), ( '24' , 133 ), ( '25' , 93 ), ( '26' , 77 ), ( '27' , 125 ), ( '28' , 63 ), ( '29' , 67 ), ( '30' , 56 ), ( '31' , 67 ), ( '32' , 62 ), ( '33' , 67 ), ( '34' , 54 ), ( '35' , 41 ), ( '36' , 43 ), ( '37' , 24 ), ( '38' , 29 ), ( '39' , 33 ), ( '40' , 14 )] |
save data in csv file.
1 2 3 4 5 | fn = r "D:\OneDrive - UNSW\01-UNSW\02-Papers\20190514-Prediction Location of Twitter\Data\Paper\weekly_tweets.csv" fo = open (fn, "w+" ) for e in a: fo.write(e[ 0 ] + ", " + str (e[ 1 ]) + "\n" ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | >>> import re >>> def word_extraction(sentence): ignore = [ 'a' , "the" , "is" ] words = re.sub( "[^\w]" , " " , sentence).split() cleaned_text = [w.lower() for w in words if w not in ignore] return cleaned_text >>> a = "alex is. good guy." >>> word_extraction(a) [ 'alex' , 'good' , 'guy' ] >>> a = [ "fluence" , 'good' ] >>> b = 'flu' >>> b in a False >>> 'go' in a False >>> 'good' in a True |
1 2 3 4 5 6 7 8 9 | >>> import nltk >>> nltk.download( 'stopwords' ) [nltk_data] Downloading package stopwords to [nltk_data] C:\Users\z5194293\AppData\Roaming\nltk_data... [nltk_data] Unzipping corpora\stopwords. zip . True >>> from nltk.corpus import stopwords >>> stopwords.words( 'english' ) [ 'i' , 'me' , 'my' , 'myself' , 'we' , 'our' , 'ours' , 'ourselves' , 'you' , "you're" , "you've" , "you'll" , "you'd" , 'your' , 'yours' , 'yourself' , 'yourselves' , 'he' , 'him' , 'his' , 'himself' , 'she' , "she's" , 'her ', ' hers ', ' herself ', ' it ', "it' s", 'its' , 'itself' , 'they' , 'them' , 'their' , 'theirs' , 'themselves' , 'what' , 'which' , 'who' , 'whom' , 'this' , 'that' , "that'll" , 'these ', ' those ', ' am ', ' is ', ' are ', ' was ', ' were ', ' be ', ' been ', ' being ', ' have ', ' has ', ' had ', ' having ', ' do ', ' does ', ' did ', ' doing ', ' a ', ' an ', ' the ', ' and ', ' but ', ' if ', ' or ', ' because ', ' as ', ' until ', ' while ', ' of ', ' at ', ' by ', ' for ', ' with ', ' about ', ' against ', ' between ', ' into ', ' through ', ' during ', ' before ', ' after ', ' above ', ' below ', ' to ', ' from ', ' up ', ' down ', ' in ', ' out ', ' on ', ' off ', ' over ', ' under ', ' again ', ' further ', ' then ', ' once ', ' here ', ' there ', ' when ', ' where ', ' why ', ' how ', ' all ', ' any ', ' both ', ' each ', ' few ', ' more ', ' most ', ' other ', ' some ', ' such ', ' no ', ' nor ', ' not ', ' only ', ' own ', ' same ', ' so ', ' than ', ' too ', ' very ', ' s ', ' t ', ' can ', ' will ', ' just ', ' don ', "don' t", 'should' , "should've" , 'now ', ' d ', ' ll ', ' m ', ' o ', ' re ', ' ve ', ' y ', ' ain ', ' aren ', "aren' t", 'couldn' , "couldn't" , 'didn ', "didn' t", 'doesn' , "doesn't" , 'hadn ', "hadn' t", 'hasn' , "hasn't" , 'haven ', "haven' t", 'isn' , "isn't" , 'ma ', ' mightn ', "mightn' t", 'mustn' , "mustn't" , 'needn ', "needn' t", 'shan' , "shan't" , 'shouldn ', "shouldn' t", 'wasn' , "wasn't" , 'weren ', "weren' t", 'won' , "won't" , 'wouldn ', "wouldn' t"] |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | Python 3.7 . 0 (v3. 7.0 : 1bf9cc5093 , Jun 27 2018 , 04 : 59 : 51 ) [MSC v. 1914 64 bit (AMD64)] on win32 Type "copyright" , "credits" or "license()" for more information. >>> fn = r "D:\Data\CSV\AUS_AVG_tweets_Centroid_Lon_lat.csv" >>> import pandas as pd >>> df = pd.read_csv(fn) >>> df.head() OBJECTID_1 OBJECTID ... d_y distance 0 1 1 ... 0.009560 1.149847 1 2 2 ... 0.204213 36.363808 2 3 3 ... - 0.003238 0.394919 3 4 4 ... 0.000109 1.063002 4 5 5 ... - 0.004560 0.549273 [ 5 rows x 14 columns] >>> df.columns Index([ 'OBJECTID_1' , 'OBJECTID' , 'SA2_NAME16' , 'CENTROID_X' , 'CENTROID_Y' , 'State' , 'Count_' , 'Avg_co_lon' , 'Avg_co_lat' , 'Shape_Length' , 'Shape_Area' , 'd_x' , 'd_y' , 'distance' ], dtype = 'object' ) >>> dff = df[[ 'SA2_NAME16' ]] >>> dff.head() SA2_NAME16 0 Albany 1 Albany Region 2 Alexander Heights - Koondoola 3 Alkimos - Eglinton 4 Applecross - Ardross >>> dff = df[[ 'SA2_NAME16' , 'CENTROID_X' ]] >>> dff.head() SA2_NAME16 CENTROID_X 0 Albany 117.899601 1 Albany Region 118.207172 2 Alexander Heights - Koondoola 115.865812 3 Alkimos - Eglinton 115.677976 4 Applecross - Ardross 115.836085 >>> dff = df[[ 'SA2_NAME16' , 'CENTROID_X' , 'CENTROID_Y' , 'State' , 'Avg_co_lon' , 'Avg_co_lat' , 'Shape_Area' ]] >>> dff.head() SA2_NAME16 CENTROID_X ... Avg_co_lat Shape_Area 0 Albany 117.899601 ... - 35.017921 0.003012 1 Albany Region 118.207172 ... - 34.923186 0.394533 2 Alexander Heights - Koondoola 115.865812 ... - 31.831628 0.000638 3 Alkimos - Eglinton 115.677976 ... - 31.600350 0.003104 4 Applecross - Ardross 115.836085 ... - 32.014606 0.000518 [ 5 rows x 7 columns] >>> dff.columns Index([ 'SA2_NAME16' , 'CENTROID_X' , 'CENTROID_Y' , 'State' , 'Avg_co_lon' , 'Avg_co_lat' , 'Shape_Area' ], dtype = 'object' ) >>> dff.to_csv(r "D:\Data\CSV\AUS_AVG_tweets_Centroid_Lon_lat_lite.csv" , index = False ") SyntaxError: EOL while scanning string literal >>> dff.to_csv(r "D:\Data\CSV\AUS_AVG_tweets_Centroid_Lon_lat_lite.csv" , index = False ) >>> dff = pd.read_csv(r "D:\Data\CSV\AUS_AVG_tweets_Centroid_Lon_lat_lite.csv" ) >>> dff.head() NAME CEN_X ... AVG_Y AREA 0 Albany 117.899601 ... - 35.017921 0.003012 1 Albany Region 118.207172 ... - 34.923186 0.394533 2 Alexander Heights - Koondoola 115.865812 ... - 31.831628 0.000638 3 Alkimos - Eglinton 115.677976 ... - 31.600350 0.003104 4 Applecross - Ardross 115.836085 ... - 32.014606 0.000518 [ 5 rows x 7 columns] >>> dff.columns Index([ 'NAME' , 'CEN_X' , 'CEN_Y' , 'STATE' , 'AVG_X' , 'AVG_Y' , 'AREA' ], dtype = 'object' ) >>> |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· .NET10 - 预览版1新功能体验(一)
2014-11-10 【152】C# 操作 Excel 杂记