json2mysql
import json import psycopg2 import sys conn = psycopg2.connect(dbname='dev', host='127.0.0.1', port='5439', user='master', password='123456') cur = conn.cursor() column_names = None def execute(sql): cur.execute(sql) conn.commit() def build_value(item): global column_names column_names = [] fields = [] if tablename.startswith('sold_'): keylist = ['soldUrl', 'country', 'category0', 'itemNumber', 'price', 'quantity', 'Date_of_Purchase', 'ts', 'ts_string'] else: keylist = None for key in sorted(item.keys()) if not keylist else keylist: field = item.get(key, None) if isinstance(field, str): field = field.replace("'", "''").replace("\\", "") column_names.append(key) if key in ['page', 'index']: fields.append(str(field)) else: fields.append("'{}'".format(field)) return '({})'.format(', '.join(fields)) if __name__ == '__main__': data_file = sys.argv[1] tablename = sys.argv[2] values = [] for line in open(data_file): item = json.loads(line) values.append(build_value(item)) if len(values) == 10000: sql = 'insert into {}({}) values {};'.format(tablename, ', '.join(column_names), ', '.join(values)) execute(sql) values.clear() execute('insert into {}({}) values {};'.format(tablename, ', '.join(column_names), ', '.join(values)))