python3.6 解析svg保存到mysql
1 import json 2 from collections import Counter 3 from json import JSONDecodeError 4 5 import mysql 6 import requests 7 from lxml import etree 8 9 # 定义远程 SVG 文件的 URL 10 file = r'D:\tmp_files\jmx\0919_3568.txt' 11 data_to_insert=[] 12 with open(file, 'r', encoding='utf-8') as file: 13 # 逐行读取文件 14 for line in file: 15 try: 16 # print(line.strip()) # 使用 strip() 去掉每行末尾的换行符 17 url = line.strip() 18 response = requests.get(url) 19 start_index = url.rfind('/') + 1 # 从最后一个 '/' 开始 20 end_index = url.find('_', start_index) # 找到第一个 '_' 21 # 提取子字符串 22 pos_id = url[start_index:end_index] 23 print("pos_id===", pos_id) 24 # 发送 HTTP GET 请求获取 SVG 文件内容 25 response = requests.get(url) 26 # 打印响应内容(XML 格式) 27 28 svg_content = response.content # 使用 .content 获取字节类型的内容 29 # print(svg_content) 30 # print(svg_content.decode('utf-8')) # 打印内容 31 # 将字节类型的 SVG 内容解析为 XML 解析树 32 root = etree.fromstring(svg_content) 33 # print(svg_content) 34 # print(root.xpath('//@class')) 35 class_items = root.xpath('//@class') 36 dataTitle_items = root.xpath('//@data-title') 37 dictx = {} 38 counter = Counter(class_items) 39 store_btn_info = {} 40 for obj, count in counter.items(): 41 # print(f"{obj}: {count}; ") 42 store_btn_info[obj] = count 43 btn_table = store_btn_info.get("btn_table") 44 btn_poster = store_btn_info.get("btn_poster") 45 btn_mainEntry = store_btn_info.get("btn_mainEntry") 46 # print(btn_table,btn_poster,btn_mainEntry) 47 btn_poster_elements = root.xpath("//*[@class='btn_poster']") 48 btn_table_elements = root.xpath("//*[@class='btn_table']") 49 btn_mainEntry_elements = root.xpath("//*[@class='btn_mainEntry']") 50 51 dictx1 = {} 52 btn_poster_title_list = [] 53 for element in btn_poster_elements: 54 class_attr = str(element.get('class')) 55 data_title = str(element.get('data-title')) 56 # btn_poster_title_list.append(data_title) 57 data_pictures = json.loads(str(element.get('data-pictures')).replace("'", '"')) 58 # 打印提取到的属性 59 # print(f"class_attr: {class_attr}",f"data-title: {data_title}",f"data-pictures: {data_pictures}") 60 picture = data_pictures.get("picture") 61 backPicture = data_pictures.get("backPicture") 62 if picture != '' and backPicture != '': 63 dictx1[data_title] = [picture, backPicture] 64 btn_poster_title_list.append(str(data_title + '-E')) 65 btn_poster_title_list.append(str(data_title + '-I')) 66 67 elif picture != '': 68 dictx1[data_title] = [picture] 69 btn_poster_title_list.append(str(data_title + '-E')) 70 elif backPicture != '': 71 dictx1[data_title] = [backPicture] 72 btn_poster_title_list.append(str(data_title + '-I')) 73 # print("-"*100) 74 # print(dictx1) 75 # print("-"*100) 76 btn_poster = sum([len(v) for v in dictx1.values()]) 77 # print(sum([ len(v) for v in dictx1.values()])) 78 btn_poster_title_list.sort() 79 80 dictx2 = {} 81 btn_table_dataId_list = [] 82 for element in btn_table_elements: 83 class_attr = element.get('class') 84 data_table_id = str(element.get('data-table-id')).replace(' ', '') 85 # print(data_table_id) 86 btn_table_dataId_list.append(data_table_id) 87 data_title = str(element.get('data-title')) 88 data_sku = str(element.get('data-sku')) 89 # 打印提取到的属性 90 # print(f"class_attr: {class_attr}",f"data-title: {data_table_id}",f"data-pictures: {data_title}",f"data-sku:{data_sku}") 91 dictx2[data_table_id] = (data_sku, data_title) 92 # print(dictx2) 93 btn_table_dataId_list.sort() 94 # print(btn_table_dataId_list) 95 96 dictx3 = {} 97 btn_poster_title_list_v1 = [] 98 for element in btn_mainEntry_elements: 99 class_attr = element.get('class') 100 data_items = json.loads(str(element.get('data-items')).replace("'", '"')) 101 data_title = element.get('data-title') 102 data_posters = json.loads(str(element.get('data-posters')).replace("'", '"')) 103 # 打印提取到的属性 104 dictx3[ 105 "btn_mainEntryx"] = f"class_attr: {class_attr}\n", f"data_items: {data_items}\n", f"data_title: {data_title}\n", f"data_posters:{data_posters}" 106 # print(f"class_attr: {class_attr}\n",f"data_items: {data_items}\n",f"data_title: {data_title}\n",f"data_posters:{data_posters}") 107 data_items_titles = [item['title'] for item in data_items] 108 data_posters_titles = [item['title'] for item in data_posters] 109 data_posters_tit = [item['title'] for item in data_posters][0] if len( 110 [item['title'] for item in data_posters]) > 0 else '' 111 picture = [item['picture'] for item in data_posters][0] if len( 112 [item['picture'] for item in data_posters]) else '' 113 backPicture = [item['backPicture'] for item in data_posters][0] if len( 114 [item['backPicture'] for item in data_posters]) else '' 115 if picture != '' and backPicture != '': 116 data_posters_titles[0] = str(data_posters_tit) + "-E" 117 data_posters_titles.append(str(data_posters_tit) + "-I") 118 elif picture != '': 119 data_posters_titles[0] = str(data_posters_tit) + "-E" 120 121 elif backPicture != '': 122 data_posters_titles[0] = str(data_posters_tit) + "-I" 123 124 import itertools 125 126 btn_poster_title_list_v1.extend(data_items_titles) 127 btn_poster_title_list_v1.extend(data_posters_titles) 128 129 btn_poster_title_list_v1 = btn_poster_title_list + btn_poster_title_list_v1 130 btn_poster_title_list_v1.sort() 131 132 # print(dictx3) 133 134 dictx["btn_table"] = dictx2 135 dictx["btn_poster"] = dictx1 136 dictx["btn_mainEntry"] = dictx3 137 # print(dictx) 138 139 from mysql.connector import Error 140 141 try: 142 # 连接到 MySQL 数据库 143 connection = mysql.connector.connect( 144 host='', # 数据库主机地址 145 user='', # 数据库用户名 146 password=', # 数据库密码 147 database='' # 数据库名称 148 ) 149 if connection.is_connected(): 150 # 创建游标对象 151 cursor = connection.cursor() 152 # cursor.execute(create_table_query) 153 # 插入数据 154 insert_query = ''' 155 INSERT INTO tmp_store_svg_info_1 ( 156 url, 157 store_id, 158 btn_table, 159 btn_poster, 160 btn_mainEntry, 161 btn_table_dataId_list, 162 btn_poster_title_list, 163 svg_btn_info, 164 svg_xml, 165 create_time 166 ) VALUES ( 167 %s, %s, %s, %s, %s,%s,%s, %s, %s, NOW() 168 ) 169 ''' 170 171 # data_to_insert = [( 172 # url 173 # , pos_id 174 # , str(btn_table) 175 # , str(len(btn_poster_title_list_v1)) 176 # , str(btn_mainEntry) 177 # , str(btn_table_dataId_list).replace("'", '"') 178 # , str(btn_poster_title_list_v1).replace("'", '"') 179 # , json.dumps(dictx) 180 # , str(svg_content) 181 # )] 182 183 184 print("data_to_insert=",len(data_to_insert)) 185 if len(data_to_insert)==100: 186 cursor.executemany(insert_query, data_to_insert) 187 connection.commit() 188 data_to_insert.clear() 189 cursor.close() 190 connection.close() 191 print("数据插入成功") 192 else: 193 data_to_insert.append( 194 ( 195 url 196 , pos_id 197 , str(btn_table) 198 , str(len(btn_poster_title_list_v1)) 199 , str(btn_mainEntry) 200 , str(btn_table_dataId_list).replace("'", '"') 201 , str(btn_poster_title_list_v1).replace("'", '"') 202 , json.dumps(dictx) 203 , str(svg_content) 204 ) 205 206 ) 207 except Error as e: 208 print(f"Error: {e}") 209 if connection.is_connected(): 210 connection.rollback() # 回滚事务 211 finally: 212 # 关闭游标和连接 213 if connection.is_connected(): 214 cursor.close() 215 connection.close() 216 print("数据库连接已关闭") 217 except JSONDecodeError as e: 218 print(url) 219 continue 220 221 222 def save(connection,li): 223 try: 224 insert_query = ''' 225 INSERT INTO tmp_store_svg_info_1 ( 226 url, 227 store_id, 228 btn_table, 229 btn_poster, 230 btn_mainEntry, 231 btn_table_dataId_list, 232 btn_poster_title_list, 233 svg_btn_info, 234 svg_xml, 235 create_time 236 ) VALUES ( 237 %s, %s, %s, %s, %s,%s,%s, %s, %s, NOW() 238 ) 239 ''' 240 if connection.is_connected(): 241 # 创建游标对象 242 cursor = connection.cursor() 243 if len(data_to_insert) == 100: 244 cursor.executemany(insert_query, data_to_insert) 245 connection.commit() 246 data_to_insert.clear() 247 cursor.close() 248 connection.close() 249 print("数据插入成功") 250 251 except Error as e: 252 print(f"Error: {e}") 253 if connection.is_connected(): 254 connection.rollback() # 回滚事务 255 finally: 256 # 关闭游标和连接 257 if connection.is_connected(): 258 cursor.close() 259 connection.close() 260 print("数据库连接已关闭")