python3.6 解析svg保存到mysql

 

  1 import json
  2 from collections import Counter
  3 from json import JSONDecodeError
  4 
  5 import mysql
  6 import requests
  7 from lxml import etree
  8 
  9 # 定义远程 SVG 文件的 URL
 10 file = r'D:\tmp_files\jmx\0919_3568.txt'
 11 data_to_insert=[]
 12 with open(file, 'r', encoding='utf-8') as file:
 13     # 逐行读取文件
 14     for line in file:
 15         try:
 16             # print(line.strip())  # 使用 strip() 去掉每行末尾的换行符
 17             url = line.strip()
 18             response = requests.get(url)
 19             start_index = url.rfind('/') + 1  # 从最后一个 '/' 开始
 20             end_index = url.find('_', start_index)  # 找到第一个 '_'
 21             # 提取子字符串
 22             pos_id = url[start_index:end_index]
 23             print("pos_id===", pos_id)
 24             # 发送 HTTP GET 请求获取 SVG 文件内容
 25             response = requests.get(url)
 26             # 打印响应内容(XML 格式)
 27 
 28             svg_content = response.content  # 使用 .content 获取字节类型的内容
 29             # print(svg_content)
 30             # print(svg_content.decode('utf-8'))  # 打印内容
 31             # 将字节类型的 SVG 内容解析为 XML 解析树
 32             root = etree.fromstring(svg_content)
 33             # print(svg_content)
 34             # print(root.xpath('//@class'))
 35             class_items = root.xpath('//@class')
 36             dataTitle_items = root.xpath('//@data-title')
 37             dictx = {}
 38             counter = Counter(class_items)
 39             store_btn_info = {}
 40             for obj, count in counter.items():
 41                 # print(f"{obj}: {count}; ")
 42                 store_btn_info[obj] = count
 43             btn_table = store_btn_info.get("btn_table")
 44             btn_poster = store_btn_info.get("btn_poster")
 45             btn_mainEntry = store_btn_info.get("btn_mainEntry")
 46             # print(btn_table,btn_poster,btn_mainEntry)
 47             btn_poster_elements = root.xpath("//*[@class='btn_poster']")
 48             btn_table_elements = root.xpath("//*[@class='btn_table']")
 49             btn_mainEntry_elements = root.xpath("//*[@class='btn_mainEntry']")
 50 
 51             dictx1 = {}
 52             btn_poster_title_list = []
 53             for element in btn_poster_elements:
 54                 class_attr = str(element.get('class'))
 55                 data_title = str(element.get('data-title'))
 56                 # btn_poster_title_list.append(data_title)
 57                 data_pictures = json.loads(str(element.get('data-pictures')).replace("'", '"'))
 58                 # 打印提取到的属性
 59                 # print(f"class_attr: {class_attr}",f"data-title: {data_title}",f"data-pictures: {data_pictures}")
 60                 picture = data_pictures.get("picture")
 61                 backPicture = data_pictures.get("backPicture")
 62                 if picture != '' and backPicture != '':
 63                     dictx1[data_title] = [picture, backPicture]
 64                     btn_poster_title_list.append(str(data_title + '-E'))
 65                     btn_poster_title_list.append(str(data_title + '-I'))
 66 
 67                 elif picture != '':
 68                     dictx1[data_title] = [picture]
 69                     btn_poster_title_list.append(str(data_title + '-E'))
 70                 elif backPicture != '':
 71                     dictx1[data_title] = [backPicture]
 72                     btn_poster_title_list.append(str(data_title + '-I'))
 73             # print("-"*100)
 74             # print(dictx1)
 75             # print("-"*100)
 76             btn_poster = sum([len(v) for v in dictx1.values()])
 77             # print(sum([ len(v) for v in dictx1.values()]))
 78             btn_poster_title_list.sort()
 79 
 80             dictx2 = {}
 81             btn_table_dataId_list = []
 82             for element in btn_table_elements:
 83                 class_attr = element.get('class')
 84                 data_table_id = str(element.get('data-table-id')).replace(' ', '')
 85                 # print(data_table_id)
 86                 btn_table_dataId_list.append(data_table_id)
 87                 data_title = str(element.get('data-title'))
 88                 data_sku = str(element.get('data-sku'))
 89                 # 打印提取到的属性
 90                 # print(f"class_attr: {class_attr}",f"data-title: {data_table_id}",f"data-pictures: {data_title}",f"data-sku:{data_sku}")
 91                 dictx2[data_table_id] = (data_sku, data_title)
 92             # print(dictx2)
 93             btn_table_dataId_list.sort()
 94             # print(btn_table_dataId_list)
 95 
 96             dictx3 = {}
 97             btn_poster_title_list_v1 = []
 98             for element in btn_mainEntry_elements:
 99                 class_attr = element.get('class')
100                 data_items = json.loads(str(element.get('data-items')).replace("'", '"'))
101                 data_title = element.get('data-title')
102                 data_posters = json.loads(str(element.get('data-posters')).replace("'", '"'))
103                 # 打印提取到的属性
104                 dictx3[
105                     "btn_mainEntryx"] = f"class_attr: {class_attr}\n", f"data_items: {data_items}\n", f"data_title: {data_title}\n", f"data_posters:{data_posters}"
106                 # print(f"class_attr: {class_attr}\n",f"data_items: {data_items}\n",f"data_title: {data_title}\n",f"data_posters:{data_posters}")
107                 data_items_titles = [item['title'] for item in data_items]
108                 data_posters_titles = [item['title'] for item in data_posters]
109                 data_posters_tit = [item['title'] for item in data_posters][0] if len(
110                     [item['title'] for item in data_posters]) > 0 else ''
111                 picture = [item['picture'] for item in data_posters][0] if len(
112                     [item['picture'] for item in data_posters]) else ''
113                 backPicture = [item['backPicture'] for item in data_posters][0] if len(
114                     [item['backPicture'] for item in data_posters]) else ''
115                 if picture != '' and backPicture != '':
116                     data_posters_titles[0] = str(data_posters_tit) + "-E"
117                     data_posters_titles.append(str(data_posters_tit) + "-I")
118                 elif picture != '':
119                     data_posters_titles[0] = str(data_posters_tit) + "-E"
120 
121                 elif backPicture != '':
122                     data_posters_titles[0] = str(data_posters_tit) + "-I"
123 
124                 import itertools
125 
126                 btn_poster_title_list_v1.extend(data_items_titles)
127                 btn_poster_title_list_v1.extend(data_posters_titles)
128 
129             btn_poster_title_list_v1 = btn_poster_title_list + btn_poster_title_list_v1
130             btn_poster_title_list_v1.sort()
131 
132             # print(dictx3)
133 
134             dictx["btn_table"] = dictx2
135             dictx["btn_poster"] = dictx1
136             dictx["btn_mainEntry"] = dictx3
137             # print(dictx)
138 
139             from mysql.connector import Error
140 
141             try:
142                 # 连接到 MySQL 数据库
143                 connection = mysql.connector.connect(
144                     host='',  # 数据库主机地址
145                     user='',  # 数据库用户名
146                     password=',  # 数据库密码
147                     database=''  # 数据库名称
148                 )
149                 if connection.is_connected():
150                     # 创建游标对象
151                     cursor = connection.cursor()
152                     # cursor.execute(create_table_query)
153                     # 插入数据
154                     insert_query = '''
155                           INSERT INTO tmp_store_svg_info_1 (
156                             url,
157                             store_id,
158                             btn_table,
159                             btn_poster,
160                             btn_mainEntry,
161                             btn_table_dataId_list,
162                             btn_poster_title_list,
163                             svg_btn_info,
164                             svg_xml,
165                             create_time
166                           ) VALUES (
167                             %s, %s, %s, %s, %s,%s,%s, %s, %s, NOW()
168                           )
169                           '''
170 
171                     # data_to_insert = [(
172                     #     url
173                     #     , pos_id
174                     #     , str(btn_table)
175                     #     , str(len(btn_poster_title_list_v1))
176                     #     , str(btn_mainEntry)
177                     #     , str(btn_table_dataId_list).replace("'", '"')
178                     #     , str(btn_poster_title_list_v1).replace("'", '"')
179                     #     , json.dumps(dictx)
180                     #     , str(svg_content)
181                     # )]
182 
183 
184                     print("data_to_insert=",len(data_to_insert))
185                     if len(data_to_insert)==100:
186                         cursor.executemany(insert_query, data_to_insert)
187                         connection.commit()
188                         data_to_insert.clear()
189                         cursor.close()
190                         connection.close()
191                         print("数据插入成功")
192                     else:
193                         data_to_insert.append(
194                             (
195                                 url
196                                 , pos_id
197                                 , str(btn_table)
198                                 , str(len(btn_poster_title_list_v1))
199                                 , str(btn_mainEntry)
200                                 , str(btn_table_dataId_list).replace("'", '"')
201                                 , str(btn_poster_title_list_v1).replace("'", '"')
202                                 , json.dumps(dictx)
203                                 , str(svg_content)
204                             )
205 
206                         )
207             except Error as e:
208                 print(f"Error: {e}")
209                 if connection.is_connected():
210                     connection.rollback()  # 回滚事务
211             finally:
212                 # 关闭游标和连接
213                 if connection.is_connected():
214                     cursor.close()
215                     connection.close()
216                     print("数据库连接已关闭")
217         except JSONDecodeError as e:
218             print(url)
219             continue
220 
221 
222 def save(connection,li):
223     try:
224         insert_query = '''
225               INSERT INTO tmp_store_svg_info_1 (
226                 url,
227                 store_id,
228                 btn_table,
229                 btn_poster,
230                 btn_mainEntry,
231                 btn_table_dataId_list,
232                 btn_poster_title_list,
233                 svg_btn_info,
234                 svg_xml,
235                 create_time
236               ) VALUES (
237                 %s, %s, %s, %s, %s,%s,%s, %s, %s, NOW()
238               )
239               '''
240         if connection.is_connected():
241             # 创建游标对象
242             cursor = connection.cursor()
243             if len(data_to_insert) == 100:
244                 cursor.executemany(insert_query, data_to_insert)
245                 connection.commit()
246                 data_to_insert.clear()
247                 cursor.close()
248                 connection.close()
249                 print("数据插入成功")
250 
251     except Error as e:
252         print(f"Error: {e}")
253         if connection.is_connected():
254             connection.rollback()  # 回滚事务
255     finally:
256         # 关闭游标和连接
257         if connection.is_connected():
258             cursor.close()
259             connection.close()
260             print("数据库连接已关闭")
View Code

 

posted @ 2024-10-14 16:00  linbo.yang  阅读(2)  评论(0编辑  收藏  举报