python 备忘(外置模块)
- python解析XML,HTML
# 1 from lxml import etree
# 2 import xml.dom.minidom
# 3 bs4
# 4 import xmltodict
- python读取文件共享
'''
https://pypi.org/project/smbprotocol/
# pip install smbprotocol
'''
import smbclient
smbclient.register_session("10.101.x.x", username="xxx", password="xxx")
img = smbclient.listdir(r"\\10.101.x.x\img")
print(img[0])
- ddt.py (数据驱动)
show me the money code
import unittest
from ddt import ddt,data,unpack
@ddt
class MyTesting(unittest.TestCase):
'''打印 1 次:
test_a : [1, 2, 3]
'''
@data([1,2,3])
def test_a(self,value):
print('test_a :',value)
'''打印 3 次:
test_b : 1 ,
test_b : 2,
test_b : 3
'''
@data(1,2,3)
def test_b(self,value):
print('test_b :',value)
#程序报错无法运行,@unpack 无法解包
#add_test() argument after ** must be a mapping, not int
@data(1,2,3)
@unpack
def test_b(self,value):
print('test_b :',value)
'''打印 2 次:
test_c : [2, 3]
test_c : [4, 5]
'''
@data([2,3],[4,5])
def test_c(self,a):
print('test_c :', a)
#报错程序还能正常运行
#test_compare() missing 1 required positional argument: 'b'
@data([2,3],[4,5])
def test_compare(self,a,b):
print('test_compare :', a, b)
self.assertEqual(a,b)
'''打印 2 次:
test_d : 2 3
test_d : 4 5
'''
@data([2,3],[4,5])
@unpack
def test_d(self,a,b):
print('test_d :', a, b)
self.assertEqual(a,b)
'''打印 3 次:
test_minus : 3 2 1
test_minus : 5 3 2
test_minus : 10 4 6
'''
@data([3,2,1],[5,3,2],[10,4,6])
@unpack
def test_minus(self,a,b,expected):
print('test_minus :',a,b,expected)
actual = int(a) - int(b)
expected = int(expected)
self.assertEqual(actual, expected)
if __name__ == '__main__':
unittest.main(verbosity=2)
- apscheduler (定时任务框架)
title
https://blog.csdn.net/somezz/article/details/83104368
- asyncio模块
title
- gevent模块
title
- Twisted模块
title
- BeautifulSoup 模块
#1 常规
import requests
from bs4 import BeautifulSoup
response = requests.get("https://www.autohome.com.cn/news/")
response.encoding = 'gbk'
soup = BeautifulSoup(response.text, 'html.parser')
div = soup.find(name='div',attrs={'id':'auto-channel-lazyload-article'})
li_list = div.find_all(name='li')
for li in li_list:
title = li.find(name='h3')
if not title:
continue
p = li.find(name='p')
a = li.find(name='a')
print(title.text)
print(a.attrs.get('href'))
print(p.text)
img = li.find(name='img')
src = img.get('src')
src = "https:" + src
print(src)
# ret = requests.get(src)
# print(ret.content)
# 再次发起请求,下载图片
file_name = src.rsplit('/',maxsplit=1)[1]
ret = requests.get(src)
# with open(file_name,'wb') as f:
# f.write(ret.content)
#2 实际应用
#a 故事
session = requests.session()
resp = session.get(url=url, headers=headers)
soup = BeautifulSoup(resp.text, 'html.parser')
# [标签对象,标签对象]
item_list = soup.find_all(name='div',attrs={'class':'down16x16'})
len_of_story = len(item_list)
for idx, item in enumerate(item_list):
a = item.find(name='a') # 查找div下面的a标签 <class 'bs4.element.Tag'>
a_href = item.a['href'] # div下面的所有a标签的href属性 <class 'str'>
download_href(a_href) # 下载
#b 电影
User_Agent = 'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11'
headers = {
'User-Agent': User_Agent,
}
r = requests.get(url="xxx", headers=headers)
html = r.text
name_list = [] # 用来装获取到的电影名称
download_list = [] # 用来装获取到的电影下载链接
bs = BeautifulSoup(html, "html.parser")
b = bs.findAll(class_="co_content8")
b = b[0].findAll(class_="ulink") # 此处拿到了每一页中的电影列表
for i in range(0, len(b)):
name = b[i].get_text() # 获取每个电影的名称,# 和儿童故事的item.a['href']用法类似
href = "xxx" + b[i].get("href") # 获取每个电影的详情页面的url
r1 = requests.get(url=href, headers=headers) # 访问每部电影的详情页面
html1 = r1.text
bs1 = BeautifulSoup(html1, "html.parser")
b1 = bs1.find("tbody").find_next("td").find_next("a") #用法
download_url = b1.get("href") # 获取到下载链接
name_list.append(name)
download_list.append(download_ur)
enumerate和list 结合使用
soup = BeautifulSoup(html, 'lxml')
print('Next Sibling', soup.a.next_silbing)
print('Next Siblings', list(enumerate(soup.a.next_siblings)))