PYTHON 读stata的结构方便
import pyreadstat as pyreadstat dataframe, meta = pyreadstat.read_dta("1.dta") #stata变量标签: print(meta.column_labels) #stata变量名: print(meta.column_names) #stata变量名称及标签 print(meta.column_names_to_labels) #stata文件格式: print(meta.file_format) #stata变量个数 print(meta.number_columns) #stata样本数 print(meta.number_rows) #stata变量类型 print(meta.original_variable_types) #{'make': '%-18s', 'price': '%8.0gc', 'mpg': '%8.0g', 'rep78': '%8.0g', 'headroom': '%6.1f', 'trunk': '%8.0g', 'weight': '%8.0gc', 'length': '%8.0g', 'turn': '%8.0g', 'displacement': '%8.0g', 'gear_ratio': '%6.2f', 'foreign': '%8.0g'} #stata变量显示类型: print(meta.readstat_variable_types) #{'make': 'string', 'price': 'int16', 'mpg': 'int16', 'rep78': 'int16', 'headroom': 'float', 'trunk': 'int16', 'weight': 'int16', 'length': 'int16', 'turn': 'int16', 'displacement': 'int16', 'gear_ratio': 'float', 'foreign': 'int8'} #stata变量值标签: print(meta.value_labels) #{'origin': {0: 'Domestic', 1: 'Foreign'}} #stata变量对齐方式: print(meta.variable_alignment) #{'make': 'left', 'price': 'right', 'mpg': 'right', 'rep78': 'right', 'headroom': 'right', 'trunk': 'right', 'weight': 'right', 'length': 'right', 'turn': 'right', 'displacement': 'right', 'gear_ratio': 'right', 'foreign': 'right'} #stata变量显示宽度: print(meta.variable_display_width) #{'make': -18, 'price': 8, 'mpg': 8, 'rep78': 8, 'headroom': 6, 'trunk': 8, 'weight': 8, 'length': 8, 'turn': 8, 'displacement': 8, 'gear_ratio': 6, 'foreign': 8} #stata变量存储宽度 print(meta.variable_storage_width) #{'make': 19, 'price': 2, 'mpg': 2, 'rep78': 2, 'headroom': 4, 'trunk': 2, 'weight': 2, 'length': 2, 'turn': 2, 'displacement': 2, 'gear_ratio': 4, 'foreign': 1} #stata变量对应的标签: print(meta.variable_to_label) #{'foreign': 'origin'} 键:变量名 值:标签名 #stata变量值标签: print(meta.variable_value_labels) #{'foreign': {0: 'Domestic', 1: 'Foreign'}}
import pyreadstat as pyreadstat import json dataframe, meta = pyreadstat.read_dta("1.dta") ###stata变量标签: ## ##print(meta.column_labels) ###stata变量名: ##print(meta.column_names) ###stata变量名称及标签 ##print(meta.column_names_to_labels) ###stata文件格式: ##print(meta.file_format) ###stata变量个数 ## ##print(meta.number_columns) ###stata样本数 ## ##print(meta.number_rows) ###stata变量类型 ##print(meta.original_variable_types) ## ###{'make': '%-18s', 'price': '%8.0gc', 'mpg': '%8.0g', 'rep78': '%8.0g', 'headroom': '%6.1f', 'trunk': '%8.0g', 'weight': '%8.0gc', 'length': '%8.0g', 'turn': '%8.0g', 'displacement': '%8.0g', 'gear_ratio': '%6.2f', 'foreign': '%8.0g'} ## ###stata变量显示类型: ##print(meta.readstat_variable_types) ## ###{'make': 'string', 'price': 'int16', 'mpg': 'int16', 'rep78': 'int16', 'headroom': 'float', 'trunk': 'int16', 'weight': 'int16', 'length': 'int16', 'turn': 'int16', 'displacement': 'int16', 'gear_ratio': 'float', 'foreign': 'int8'} ###stata变量值标签: ##print(meta.value_labels) ###{'origin': {0: 'Domestic', 1: 'Foreign'}} ###stata变量对齐方式: ##print(meta.variable_alignment) ###{'make': 'left', 'price': 'right', 'mpg': 'right', 'rep78': 'right', 'headroom': 'right', 'trunk': 'right', 'weight': 'right', 'length': 'right', 'turn': 'right', 'displacement': 'right', 'gear_ratio': 'right', 'foreign': 'right'} ###stata变量显示宽度: ##print(meta.variable_display_width) ###{'make': -18, 'price': 8, 'mpg': 8, 'rep78': 8, 'headroom': 6, 'trunk': 8, 'weight': 8, 'length': 8, 'turn': 8, 'displacement': 8, 'gear_ratio': 6, 'foreign': 8} ###stata变量存储宽度 ##print(meta.variable_storage_width) ###{'make': 19, 'price': 2, 'mpg': 2, 'rep78': 2, 'headroom': 4, 'trunk': 2, 'weight': 2, 'length': 2, 'turn': 2, 'displacement': 2, 'gear_ratio': 4, 'foreign': 1} ###stata变量对应的标签: ##print(meta.variable_to_label) ###{'foreign': 'origin'} 键:变量名 值:标签名 ###stata变量值标签: ##print(meta.variable_value_labels) #{'foreign': {0: 'Domestic', 1: 'Foreign'}} bla=meta.original_variable_types for ii in bla: mc=ii lx=bla[ii].replace('%-','').replace('%','') if lx.endswith('s'): lxa="varchar" cd=lx.replace('s','') print(mc,lxa,cd)