import pandas as pd
data = pd.read_csv("./facebook/train.csv")
data.head()
| row_id | x | y | accuracy | time | place_id |
---|
0 | 0 | 0.7941 | 9.0809 | 54 | 470702 | 8523065625 |
---|
1 | 1 | 5.9567 | 4.7968 | 13 | 186555 | 1757726713 |
---|
2 | 2 | 8.3078 | 7.0407 | 74 | 322648 | 1137537235 |
---|
3 | 3 | 7.3665 | 2.5165 | 65 | 704587 | 6567393236 |
---|
4 | 4 | 4.0961 | 1.1307 | 31 | 472130 | 7440663949 |
---|
data.query("x < 2.5 & x > 2 & y < 1.5 & y > 1.0")
| row_id | x | y | accuracy | time | place_id |
---|
112 | 112 | 2.2360 | 1.3655 | 66 | 623174 | 7663031065 |
---|
180 | 180 | 2.2003 | 1.2541 | 65 | 610195 | 2358558474 |
---|
367 | 367 | 2.4108 | 1.3213 | 74 | 579667 | 6644108708 |
---|
874 | 874 | 2.0822 | 1.1973 | 320 | 143566 | 3229876087 |
---|
1022 | 1022 | 2.0160 | 1.1659 | 65 | 207993 | 3244363975 |
---|
... | ... | ... | ... | ... | ... | ... |
---|
29115112 | 29115112 | 2.1889 | 1.2914 | 168 | 721885 | 4606837364 |
---|
29115204 | 29115204 | 2.1193 | 1.4692 | 58 | 563389 | 2074133146 |
---|
29115338 | 29115338 | 2.0007 | 1.4852 | 25 | 765986 | 6691588909 |
---|
29115464 | 29115464 | 2.4132 | 1.4237 | 61 | 151918 | 7396159924 |
---|
29117493 | 29117493 | 2.2948 | 1.0504 | 81 | 79569 | 1168869217 |
---|
83197 rows × 6 columns
data.head()
| row_id | x | y | accuracy | time | place_id |
---|
0 | 0 | 0.7941 | 9.0809 | 54 | 470702 | 8523065625 |
---|
1 | 1 | 5.9567 | 4.7968 | 13 | 186555 | 1757726713 |
---|
2 | 2 | 8.3078 | 7.0407 | 74 | 322648 | 1137537235 |
---|
3 | 3 | 7.3665 | 2.5165 | 65 | 704587 | 6567393236 |
---|
4 | 4 | 4.0961 | 1.1307 | 31 | 472130 | 7440663949 |
---|
data["time"]
0 470702
1 186555
2 322648
3 704587
4 472130
...
29118016 399740
29118017 125480
29118018 737758
29118019 764975
29118020 102842
Name: time, Length: 29118021, dtype: int64
time_value = pd.to_datetime(data["time"],unit="s")
date = pd.DatetimeIndex(time_value)
data["day"]=date.day
data["weekday"]=date.weekday
data["hour"]=date.hour
data.head()
| row_id | x | y | accuracy | time | place_id | day | weekday | hour |
---|
0 | 0 | 0.7941 | 9.0809 | 54 | 470702 | 8523065625 | 6 | 1 | 10 |
---|
1 | 1 | 5.9567 | 4.7968 | 13 | 186555 | 1757726713 | 3 | 5 | 3 |
---|
2 | 2 | 8.3078 | 7.0407 | 74 | 322648 | 1137537235 | 4 | 6 | 17 |
---|
3 | 3 | 7.3665 | 2.5165 | 65 | 704587 | 6567393236 | 9 | 4 | 3 |
---|
4 | 4 | 4.0961 | 1.1307 | 31 | 472130 | 7440663949 | 6 | 1 | 11 |
---|
place_count[place_count > 3].head()
place_id1000015801 781000017288 951000025138 5631000052096 9611000063498 60Name: row_id, dtype: int64
data_final = data[data["place_id"].isin(place_count[place_count > 3].index.values)]
data_final.head()
| row_id | x | y | accuracy | time | place_id | day | weekday | hour |
---|
0 | 0 | 0.7941 | 9.0809 | 54 | 470702 | 8523065625 | 6 | 1 | 10 |
---|
1 | 1 | 5.9567 | 4.7968 | 13 | 186555 | 1757726713 | 3 | 5 | 3 |
---|
2 | 2 | 8.3078 | 7.0407 | 74 | 322648 | 1137537235 | 4 | 6 | 17 |
---|
3 | 3 | 7.3665 | 2.5165 | 65 | 704587 | 6567393236 | 9 | 4 | 3 |
---|
4 | 4 | 4.0961 | 1.1307 | 31 | 472130 | 7440663949 | 6 | 1 | 11 |
---|
x.head()
| x | y | accuracy | day | weekday | hour |
---|
0 | 0.7941 | 9.0809 | 54 | 6 | 1 | 10 |
---|
1 | 5.9567 | 4.7968 | 13 | 3 | 5 | 3 |
---|
2 | 8.3078 | 7.0407 | 74 | 4 | 6 | 17 |
---|
3 | 7.3665 | 2.5165 | 65 | 9 | 4 | 3 |
---|
4 | 4.0961 | 1.1307 | 31 | 6 | 1 | 11 |
---|
y.head()
0 85230656251 17577267132 11375372353 65673932364 7440663949Name: place_id, dtype: int64
from sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import train_test_split, GridSearchCVfrom sklearn.neighbors import KNeighborsClassifier
y_predict: [3152373328 1911687695 3376568433 ... 5132208889 6142130159 6434055790]比对真实值与预测值: 2824787 False25584762 False28945483 False13511540 False28128585 False ... 6067146 False25971193 False4086338 False24043598 False23926688 FalseName: place_id, Length: 7279238, dtype: bool准确率: 0.11855224406730484最佳参数: {'n_neighbors': 3}最佳结果: 0.09723119370461579最佳估计器: KNeighborsClassifier(n_neighbors=3)最佳验证结果: {'mean_fit_time': array([113.03914762, 128.09856272]), 'std_fit_time': array([2.48930113, 6.34220784]), 'mean_score_time': array([ 788.99727678, 1027.41904926]), 'std_score_time': array([ 9.12906035, 73.60085472]), 'param_n_neighbors': masked_array(data=[3, 5], mask=[False, False], fill_value='?', dtype=object), 'params': [{'n_neighbors': 3}, {'n_neighbors': 5}], 'split0_test_score': array([0.09731527, 0.09202337]), 'split1_test_score': array([0.09721594, 0.09201059]), 'split2_test_score': array([0.09716237, 0.09203766]), 'mean_test_score': array([0.09723119, 0.09202387]), 'std_test_score': array([6.33458518e-05, 1.10542744e-05]), 'rank_test_score': array([1, 2])}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
· SQL Server 2025 AI相关能力初探
· 为什么 退出登录 或 修改密码 无法使 token 失效