pandas

1、读写csv文件

import csv
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]
with open(input_file, 'r') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
         filereader = csv.reader(csv_in_file, delimiter=',')
         filewriter = csv.writer(csv_out_file, delimiter=',')
         for row_list in filereader:
             print(row_list)
             filewriter.writerow(row_list)

2、筛选特定行

2.1 行中的值满足某个条件

基础python版:

import csv
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

with open(input_file, 'rU') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filewriter = csv.writer(csv_out_file)
        header = next(filereader)

        #print header

        filewriter.writerow(header)
        for row_list in filereader:
            print row_list
            supplier = str(row_list[0]).strip()
            cost = str(row_list[3]).strip("$").replace(',', '')
            if supplier == 'Supplier X' or float(cost)>600.0:
                filewriter.writerow(row_list)

 pandas版(主要用到了loc()函数)

import pandas as pd 
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

data_frame = pd.read_csv(input_file)
data_frame['Cost'] = data_frame['Cost'].str.strip('$').astype(float)
data_frame_value_meets_condition = data_frame.loc[(data_frame['Supplier Name'].str.contains('X')) & (data_frame['Cost']>600.0), :]
data_frame_value_meets_condition.to_csv(output_file, index=False)

 2.2 行中的值属于某个集合

基础python版:

pandas版:

2.3 行中的值匹配于某个正则表达式/模式

基础python版:

import csv
import sys
import re

input_file = sys.argv[1]
output_file = sys.argv[2]

pattern = re.compile(r'(?P<my_pattern_group>^001-.*)', re.I)

with open(input_file, 'rU') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filewriter = csv.writer(csv_out_file)
        header = next(filereader)
        filewriter.writerow(header)

        for row_list in filereader:
            invoice_number = row_list[1]
            if pattern.search(invoice_number):
                filewriter.writerow(row_list)

pandas版:

import pandas as pd 
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

data_frame = pd.read_csv(input_file)
data_frame_value_matches_pattern = data_frame.loc[data_frame['Invoice Number'].str.startswith("001-"), :]

data_frame_value_matches_pattern.to_csv(output_file, index=False)

 3、筛选特定的列

3.1 列索引值

基础python版

import csv
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

my_columns = [0,3]

with open(input_file, 'rU') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filewriter = csv.writer(csv_out_file)

        for row_list in filereader:
            row_list_ouput = []
            for index_value in my_columns:
                row_list_ouput.append(row_list[index_value])
            filewriter.writerow(row_list_ouput)

pandas版

import sys
import pandas as pd 

input_file = sys.argv[1]
output_file = sys.argv[2]

data_frame = pd.read_csv(input_file)
data_frame_column_by_index = data_frame.iloc[:,[0,3]]
data_frame_column_by_index.to_csv(output_file, index=False)

 3.2 列标题

基础python版

pandas版

4、选定特定的行

 

posted @ 2018-09-13 16:16  四季万花筒  阅读(227)  评论(0编辑  收藏  举报