pandas(一):选取部分(行、列)写入到另一个文件

一、选取列

import pandas as pd
df = pd.read_csv('zhihutest.csv', sep="\t")
# 类别特征(16)
fixlen_category_columns = ['m_sex', 'm_access_frequencies', 'm_twoA', 'm_twoB', 'm_twoC',
                           'm_twoD', 'm_twoE', 'm_categoryA', 'm_categoryB', 'm_categoryC',
                           'm_categoryD', 'm_categoryE', 'm_num_interest_topic', 'num_topic_attention_intersection',
                           'q_num_topic_words',
                           'num_topic_interest_intersection'
                         ]
# 数值特征(7)
fixlen_number_columns = ['m_salt_score', 'm_num_atten_topic', 'q_num_title_chars_words',
                         'q_num_desc_chars_words', 'q_num_desc_words', 'q_num_title_words',
                         'days_to_invite'
                        ]
target = ['label']
text = ["q_title_words"]
#总列数 = 25
#数值列数: 7
#数值+类别 = 23
cols = target + fixlen_number_columns + fixlen_category_columns + text
fout = df[cols]
print(fout)
fout.to_csv("zhihu.txt", mode='a', header=False, index=False,  sep='\t')

二、选取行

import pandas as pd

df = pd.read_csv('criteo_sampled_data.csv', sep=",", nrows =20000)
df = df.sample(frac=1.0)
cut_idx = int(round(0.2 * df.shape[0]))
df_test, df_train = df.iloc[:cut_idx], df.iloc[cut_idx:]
df_test.to_csv("criteo_train.txt", index=False, sep='\t')
df_train.to_csv("criteo_test.txt", index=False, sep='\t')

 

posted @ 2021-04-09 15:10  jasonzhangxianrong  阅读(1940)  评论(0编辑  收藏  举报