# coding: utf-8
import sqlite3
import mysql.connector
import mysql
import pymysql
import time
import json
import re
replacement_patterns = [
(r'won\'t', 'will not'),
(r'can\'t', 'cannot'),
(r'i\'m', 'i am'),
(r'ain\'t', 'is not'),
(r'(\w+)\'ll', '\g<1> will'),
(r'(\w+)n\'t', '\g<1> not'),
(r'(\w+)\'ve', '\g<1> have'),
(r'(\w+)\'s', '\g<1> is'),
(r'(\w+)\'re', '\g<1> are'),
(r'(\w+)\'d', '\g<1> would'),
(r"s\'",'s'),
(r"\'", ''),
(r"O\'", ''),
(r"o\'", '')]
class RegexpReplacer(object):
def __init__(self, patterns=replacement_patterns):
self.patterns = [(re.compile(regex), repl) for (regex, repl) in patterns]
def replace(self, text):
s = text
for (pattern, repl) in self.patterns:
(s, count) = re.subn(pattern, repl, s)
return s
replacer = RegexpReplacer()
conn_from = pymysql.connect("localhost", "root", "root", "suibian", charset='utf8')
print("ok")
_from = conn_from.cursor()
conn_to = sqlite3.connect('D:/桌面/VOA常速/bin/Debug/Ted.sqlite')
print("连接sqlite3成功")
_to = conn_to.cursor()
# 处理detail表格
count3 = 0
sql_4 = 'SELECT DISTINCT NewsID FROM newstext'
_from.execute(sql_4)
idlist = _from.fetchall()
for id in idlist:
sql_5 = 'select Id from bbcdetail where Id = ?'
params_5 = (id)
_to.execute(sql_5, params_5)
if not _to.fetchall():
itemid = id[0] # 所有在newstitle 里面的newsId
sql_3 = 'SELECT EndTiming, ParaId,IdIndex, Sentence_cn,Timing,Sentence,NewsID,AudioSrc FROM newstext WHERE NewsID = %s'
params_3 = (itemid)
_from.execute(sql_3, params_3)
d = _from.fetchall() #d此时是每一整篇新闻分成的句子及其后面数据为一个元组构成的大元组
d = list(d)
paranum = 0
list1 = list(range(len(d)))
for p in d:
# 条件表达式
a = ' ' if p[0] is None else p[0]
b = ' ' if p[4] is None else p[4]
list1[paranum] = {"EndTiming":a,"ParaId": str(p[1]), "IdIndex":str(p[2]), "Sentence_cn": p[3],
"Timing":b,"Sentence": p[5],}
paranum += 1
data = [json.dumps(w, ensure_ascii=False) for w in list1]
print("id is the ",itemid)
sql_33 = 'SELECT Category,CreatTime,Title,Sound,Pic,Flag,DescCn,Title_cn,ReadCount FROM newstitle WHERE NewsID = %s'
params_33 = (itemid)
try:
_from.execute(sql_33, params_33)
dd = _from.fetchall() # d此时是每一整篇新闻分成的句子及其后面数据为一个元组构成的大元组
# print("Flag",dd[0][5],",DescCn",dd[0][6],",Title_cn",dd[0][7],",ReadCount",dd[0][8])
except Exception as e:
print("错误是",e)
aha = ' ' if dd[0][7] is None else dd[0][7]
descontent = dd[0][6]
descontent = str(descontent).replace('"',"'")
# result = '{"result": 1,"data": [' + ','.join(data) + '],"id":' + str(itemid)+',"title":{'+ '"Category":'+dd[0][0]+',"CreateTime":'+dd[0][1]+',"Title":'+dd[0][2]+',"Sound":'+ dd[0][3]+'"Pic":'+dd[0][4]+',"Type":'+dd[0][0]+',"DescCn":'+(dd[0][5])+'"Title_cn:"'+dd[0][6]+',"ReadCount":'+str(dd[0][7])+'}}'
result = '{"result": 1,"data": [' + ','.join(data) + '],"id":"' + str(itemid)+'","title":{'+ '"Category":"'+dd[0][0]+'","CreateTime":"'+dd[0][1]+'","Title":"'+dd[0][2]+'","Sound":"'+ dd[0][3] +'","Pic":"'+dd[0][4]+'","Flag":"'+str(dd[0][5])+'","Type":"'+dd[0][0]+'","DescCn":"'+descontent+'","Title_cn":"'+aha+'","series":"0","CategoryName":"'+dd[0][0]+'","id":"'+str(itemid)+'","ReadCount":"'+str(dd[0][8])+'"},"message": "success"}'
# print("result is the ", result)
pic = replacer.replace(result)
sql_6 = "insert into bbcdetail (Id, detail) values ('%s', '%s')" % (id[0], pic)
try:
_to.execute(sql_6)
conn_to.commit()
except Exception as e:
print("这又是一次错误",e)
count3 += 1
print("插入detail成功")
print("今天插入detail内容{}条".format(count3))