python 简易表格封装+路径+xml


import xlsxwriter
import openpyxl
import os
import operator
import xml.etree.ElementTree as ET

import requests
# import json
from bs4 import BeautifulSoup


class MyXlsx:
def __init__(self):
self._book1 = None # 新表工作簿
self._sheet1 = None # 新表工作簿
self._book2 = None # 追加方式操作表工作簿
self._sheet2 = None # 追加方式操作表工作簿
self._type_new = False # 新表模式默认
self._path_name = ""
self._currow = 0

'''
打开表格
'''
def open_xlsx(self, str_path="", str_book_name=""):
if os.path.exists(str_path) is False:
self._book1 = xlsxwriter.Workbook(str_path)
self._sheet1 = self._book1.add_worksheet(str_book_name)
else:
self._book2 = openpyxl.load_workbook(str_path)
self._sheet2 = self._book2[str_book_name]
self._type_new = True
self._path_name = str_path
return MyXlsx

'''
写数据 效率慢
'''
def write(self, str_data=list, row=1, low=1):
if self._type_new:
if row > 1 or low > 1:
for x, y in enumerate(str_data):
self._sheet2.cell(row, low + x, y) # 效率慢
else:
# max_row = self._sheet2.max_row
# for x, y in enumerate(str_data):
# self._sheet2.cell(max_row, low + x, y) # 效率慢
self._sheet2.append(str_data)
else:
self._sheet1.write_row(self._currow, low - 1, str_data)
self._currow += 1

'''
读数据 按行读取默认
'''
def read(self, row=0, low=0, isbyrow=True):
lt = []
if isbyrow:
for i in range(low, self._sheet2.max_col):
lt.append(self._sheet2.cell(row, i))
else:
for i in range(row, self._sheet2.max_row):
lt.append(self._sheet2.cell(i, low))
return lt

'''
保存数据
'''
def save(self):
if self._type_new:
self._book2.save(self._path_name)
else:
self._book1.close()

class Mypath:
def __init__(self):
...

'''
删除路径下的所有文件以及文件夹 os.removedirs()不好用的样子
'''
@staticmethod
def remove_all_file(input_path):
if operator.eq(os.path.exists(input_path), False) is True:
return
for ft in os.listdir(input_path):
f_file = input_path + "\\" + ft
if os.path.isfile(f_file) is True:
os.remove(f_file)
else:
Mypath.remove_all_file(f_file)
os.rmdir(f_file)
os.rmdir(input_path)

'''
创建文件夹
'''
@staticmethod
def make_dirs(input_path):
return os.makedirs(input_path)


'''
获取路径下的所有文件
'''
@staticmethod
def get_file_name(input_path, houzhui=""):
all_path = dict()
for root, dirs, files in os.walk(input_path):
for file in files:
# if os.path.splitext(file)[1] == '.jpeg':
if len(houzhui) > 0 :
if os.path.splitext(file)[1] == houzhui:
all_path[os.path.join(root, file)] = houzhui
else:
continue
else:
all_path[os.path.join(root, file)] = ""
return all_path

class Myxml:
def __init__(self, path=""):
self._path = path
self._root = None
self._xml = None


'''
打开xml文件,返回根节点
'''
def open_xml(self):
self._xml = ET.parse(self._path)
self._root = self._xml.getroot()

'''
获取文件是有节点,返回字典类型,去重
或者指定节点tag
'''
def getall_tag(self):
dt = dict()
for one_tag in self._root.iter():
dt[one_tag] = ""

'''
获取文件节点tag 的值 ;返回dict
'''
def get_tag_value(self, user_tag=None):
dt = dict()
for one_tag in self._root.iter(tag=user_tag):
lt = [one_tag.tag, one_tag.attrib, one_tag.text]
dt[lt] = ""
return dt


'''
爬虫
'''

class PC:
def __init__(self):
...

def testpc(self):
url = 'https://www.sojson.com/ascii.html'
strhtml = requests.get(url) # Get方式获取网页数据
soup = BeautifulSoup(strhtml.text, 'lxml')

# 选中文字-》右键-》元素审查(有些浏览器是检查)-》 “Copy”➔“Copy Selector” #so_box > div.layui-row.layui-anim-scale.pt10 >aqwdq
# div.layui-col-md9 > div:nth-child(3) > div:nth-child(3) > div div:nth-child(3) 冒号部分不要 txt = soup.select('#so_box > div.layui-row.layui-anim-scale.pt10 > div.layui-col-md9 > div:nth-child(3) > div> div') body >
# div.layout > footer > div > div.layui-row.footer-site-desc > div.layui-col-md7.footer-left > p

txt = soup.select('body > div.layout > footer > div > div.layui-row.footer-site-desc')
for x, y in enumerate(txt[0].children):
# json_t = {
# "title": i.get_text(),
# "link": i.get('href')
# }
# print(i.get_text())
# print(i.get())
print(x)
print(y)
posted @ 2021-07-28 17:21  默*为  阅读(88)  评论(0编辑  收藏  举报