CSDN数据库转换程序

由于泄露出来的CSDN数据库文件不是标准的sql语句,所以产生了用python进行转换的想法,顺便熟悉一下python对字符串和列表的操作效率。

原文件解码用的是用decode('gb18030'),但仍出现了2个解码错误。不知哪位大侠能告诉我解决办法。

我用的是str.split()函数对file.readline()进行简单的截断。但是感觉很不优雅,却想不出简单的办法,坐等指点。

下面把处理函数贴出来

#coding = utf8

def d2(self,select,output):
    all_temp = open(select,'r')
    outsql = open(output,'a')
    outsql.write(u'INSERT INTO `csdn_csdnuser` (`username`, `userpass`, `useremail`) VALUES\n'.encode('utf8'))
    
    temp = all_temp.readline().replace("'","''").decode('gb18030').split()
    temp_line = u"('%s', '%s', '%s')" % (temp[0],temp[2],temp[4])
    outsql.write(temp_line.encode('utf8'))
    i = 1
    
    while True :
        try:
            temp = all_temp.readline().replace("'","''").decode('gb18030').split()
            if len(temp) == 0:
                break
            temp_line = u",\n('%s', '%s', '%s')" % (temp[0],temp[2],temp[4])
            outsql.write(temp_line.encode('utf8'))
        except:
            temp_line = u",\n('%s', '%s', '%s'),\n" % (u'qtsharp',u' ',u'qtsharp@qq.com')
            outsql.write(temp_line.encode('utf8'))
        n = i  + 1
        if n>=1000000 and n%1000000==0:
            self.label_3.setText(u'已经转换%s个' %n)
        i += 1
        
    outsql.write(';')
    all_temp.close()
    outsql.close()

 

下面是用PySide建的程序界面

#!usr/bin/env python

# -*- coding: utf-8 -*-



import sys

from PySide.QtCore import *

from PySide.QtGui import *

import thread

class

from form import Ui_Form

from output_func import d2



MainUi(QWidget,Ui_Form):

    def __init__(self,parent = None):

        super(MainUi,self).__init__(parent)

        self.setupUi(self)

        self._connect_slot()

    _select = ''
   
    _output = ''

    #连接槽
    def _connect_slot(self):

        self.select_button.clicked.connect(self.change_select)

        self.output_button.clicked.connect(self.change_output)

        self.start_button.clicked.connect(self.start_out)


    def change_select(self):

        select_name = QFileDialog.getOpenFileName(self,u'选择源文件',QDir.currentPath())

        self._select = select_name[0]

        self.label.setText(self._select)


    def change_output(self):

        output_name = QFileDialog.getSaveFileName(self,u'保存输出文件',QDir.currentPath())

        self._output = output_name[0]

        self.label_2.setText(self._output)

    #执行函数
    def start_out(self):

        thread.start_new_thread(self._out_thread,())

        self.label_3.setText(u'转换进行中')

    #使用新线程
    def _out_thread(self):

        if len(self._select):

            if len(self._output):

                try:

                    d2(self,self._select,self._output)

                    \self.label_3.setText(u'完成')

                except:

                    self.label_3.setText(u'出错了')

            else:
                self.label_3.setText(u'你还没有设置保存路径')

        else:

            self.label_3.setText(u'你还没有设置源文件')



def main():

    app = QApplication(sys.argv)

    ui = MainUi()

    ui.show()

    sys.exit(app.exec_())



if __name__ == '__main__':
    main()

 

form.py是用qtcreater创建的form.ui文件经piside-uic.exe转换得来,这里就不贴出来了。

 

下面是最终效果

未命名

posted @ 2012-01-19 19:54  catmelo  阅读(365)  评论(0编辑  收藏  举报