工具类common
#### # 工具类common
import requests
import time
import json
import random
import os
from lxml import etree
import concurrent.futures
from urllib.parse import unquote, quote
from PIL import Image
def strClear_v1(str):
try:
return str.replace("\r", "").replace("\n", "").replace("\u3000", "").replace('\xa0', "").replace("\t", "").strip()
except Exception as e:
return "strClear_v1报错()"
#UrlEncode转Utf-8
def urlEncodeDecode(data:str) -> None:
return unquote(data)
#Utf-8转UrlEncode
def urlEncodeEncrypy(data:str) -> None:
return quote(data,safe="?:=/")
# 读取json文件
def getJsonData(path:str):
with open(path, 'r', encoding='utf-8') as f:
data_str = f.read()
data = json.loads(data_str)
return data
# 保存到json文件
def saveJsonData(path:str, data:str):
with open(path, 'a+', encoding='utf-8') as f:
data_str = json.dumps(data, ensure_ascii=False)
data_str = data_str + ","
f.write(data_str)
#请二进制形式的图片保存到本地
def imgByteToImageFile(divPath:str, fileName:str, data:str):
# 确保./ico文件夹存在,如果不存在则创建
#divPath='./ico/'
os.makedirs(divPath, exist_ok=True)
outPath = divPath + fileName
# 保存图片
with open(outPath, 'wb') as f:
f.write(data)
print(f'图片已保存到 {outPath}')
#截取图片直链中图片的名称(img.jpg)
def interceptImagePathGetName(img_url:str) -> None:
return img_url[img_url.rfind("/")+1:]
#获取图片格式(.jpg/.png/..)
def interceptImagePathGetFormat(img_url:str) -> None:
return os.path.basename(img_url)
#获取图片的绝对地址(E:\code\Model\PythonComplete\file\img.jpg)
def getAbsPath(currPage:str) -> None:
return os.path.abspath(currPage)
#获取地址中目录地址
def getDivAbsPath(currPage:str) -> None:
return list(os.path.split(currPage))[0]
#读取图片(模板)
def readImageTemplate(path:str) -> None:
print("阅读源码->def readImageTemplate(path:str) -> None:")
# # 读取图片
# image = Image.open(path)
# # 显示图片
# image.show()
#
# # 对图片进行处理(比如:旋转图片)
# image = image.rotate(90)
#
# # 保存图片到新的文件
# image.save(interceptImagePathGetName(path))
#显示图片
def getRandHeader() -> None:
headers_list = [
{
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G955U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (iPad; CPU OS 13_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/87.0.4280.77 Mobile/15E148 Safari/604.1'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.109 Safari/537.36 CrKey/1.54.248666'
}, {
'user-agent': 'Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.188 Safari/537.36 CrKey/1.54.250320'
}, {
'user-agent': 'Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/10.0.9.2372 Mobile Safari/537.10+'
}, {
'user-agent': 'Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/7.2.1.0 Safari/536.2+'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/14.14263'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)'
}, {
'user-agent': 'Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.158 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 11; Pixel 3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.181 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36'
}, {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
}, {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
}, {
'user-agent': 'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1'
}
]
headers = random.choice(headers_list)
return headers
#获取一个代理
def getOneNewProxies() -> None:
response = requests.get("http://www.zdopen.com/ShortProxy/GetIP/?api=202312072137512511&akey=185c5_14x6dy_ae051f0d5df&count=1&fitter=2×pan=5&type=3")
data = json.loads(response.text)
proxies = {
'http': '{}:{}'.format(data["data"]["proxy_list"][0]['ip'], data["data"]["proxy_list"][0]['port'])
}
# proxies = getOneNewProxies()
proxies = {
'http': '42.7.29.241:14545',
}
# 1. 定义函数
a. 这个 -> None 没有什么大用, -> 应该是告诉你函数返回值的类型, None 则表示返回的数值类型不受限制, 在这个函数中, 无论返回什么类型的数值都不会报错
b. 参数中格式为"参数名:数据类型"
def cli(name:str) -> None:
print("");
# 2. 截取图片直链中图片的名称
#截取图片直链中图片的名称
def interceptImagePathGetName(img_url:str) -> None:
return img_url[img_url.rfind("/")+1:]
#获取图片格式(.jpg/.png/..)
def interceptImagePathGetFormat(img_url:str) -> None:
return os.path.splitext(img_url)[1]
# 3. 解析str格式的json数据
#解析str格式的json数据
import json
data_str = "[{'name': 'Jack'},{'age':30}]"
data = json.loads(data_str)
print(data)
# 4.实现url编码和解码.
#解码
from urllib.parse import unquote,quote
url21 = "%7B%22ailx10%22:%22Network%20security%0ACISSP%20Information%20Security%20Expert%22%7D"
url22 = unquote(url21)
print("URL解码:{}".format(url22))
#编码
from urllib.parse import unquote,quote
url11 = '{"ailx10":"Network security\nCISSP Information Security Expert"}'
url12 = quote(url11,safe="?:=/")
print("URL编码:{}".format(url12))
# 5. 将字典元素dict转为json
import json
def index():
result_file = read_file()
result_db = read_db()
result_api = read_api()
return json.dumps({
"result_file": result_file,
"result_db": result_db,
"result_api": result_api,
})
# 6. 读取json文件转换为dict或list对象
import json
# 读取json文件
def getJsonData(path:str):
with open(path, 'r', encoding='utf-8') as f:
data_str = f.read()
data = json.loads(data_str)
return data
# 保存到json文件
def saveJsonData(path:str, data:str):
with open(path, 'a+', encoding='utf-8') as f:
data_str = json.dumps(data, ensure_ascii=False)
data_str = data_str + ","
f.write(data_str)
# 7.读取json文件
def readJson():
jsonPath = r'E:\kaifa-file\爬虫\Project\colab\rmUtil\files.json'
with open(jsonPath, 'r', encoding='utf-8') as f:
data_str = f.read()
data = json.loads(data_str)
return data
print(readJson())
# 8. python获取文件夹名称并删除
import os
import shutil
folder_path = "文件夹路径"
folder_names = os.listdir(folder_path)
for folder_name in folder_names:
folder_path = os.path.join(folder_path, folder_name)
shutil.rmtree(folder_path)
# 9.venv与conda两种虚拟环境的创建方式
# 1). venv虚拟环境(创建环境、进入环境、退出环境)
python -m venv myenv
myenv\Scripts\activate
deactivate
# 2). conda虚拟环境(创建环境、进入环境、退出环境)
conda create --name myenv python=3.8
conda activate myenv 或 source activate myenv
conda deactivate 或 source deactivate
# 10.列表的深拷贝和浅拷贝
#浅拷贝
list_1 = [1, [22, 33, 44], (5, 6, 7), {"name": "Sarah"}]
list_2 = list_1.copy() #写法一
list_3 = list(list_1) #写法二
#深拷贝
import copy
list_1 = [1, [22, 33, 44], (5, 6, 7), {"name": "Sarah"}]
list_2 = copy.deepcopy(list_1) #深拷贝