python 备忘（外置模块）

python解析XML，HTML

# 1 from lxml import etree 

# 2 import xml.dom.minidom

# 3 bs4

# 4 import xmltodict

python读取文件共享

'''
https://pypi.org/project/smbprotocol/
# pip install smbprotocol
'''

import smbclient

smbclient.register_session("10.101.x.x", username="xxx", password="xxx")
img = smbclient.listdir(r"\\10.101.x.x\img")

print(img[0])

ddt.py （数据驱动）

show me the ~~money~~ code

import unittest
from ddt import ddt,data,unpack

@ddt
class MyTesting(unittest.TestCase):

    '''打印 1 次：
    test_a : [1, 2, 3]
    '''
    @data([1,2,3])
    def test_a(self,value):
        print('test_a :',value)



    '''打印 3 次：
    test_b : 1 ，
    test_b : 2，
    test_b : 3
    '''
    @data(1,2,3)
    def test_b(self,value):
        print('test_b :',value)


    #程序报错无法运行，@unpack 无法解包
    #add_test() argument after ** must be a mapping, not int
    @data(1,2,3)
    @unpack
    def test_b(self,value):
        print('test_b :',value)


    '''打印 2 次：
    test_c : [2, 3]
    test_c : [4, 5]
    '''
    @data([2,3],[4,5])
    def test_c(self,a):
        print('test_c :', a)


    #报错程序还能正常运行
    #test_compare() missing 1 required positional argument: 'b'
    @data([2,3],[4,5])
    def test_compare(self,a,b):
        print('test_compare :', a, b)
        self.assertEqual(a,b)


    '''打印 2 次：
    test_d : 2 3
    test_d : 4 5
    '''
    @data([2,3],[4,5])
    @unpack
    def test_d(self,a,b):
        print('test_d :', a, b)
        self.assertEqual(a,b)


    '''打印 3 次：
    test_minus : 3 2 1
    test_minus : 5 3 2
    test_minus : 10 4 6
    '''
    @data([3,2,1],[5,3,2],[10,4,6])
    @unpack
    def test_minus(self,a,b,expected):
        print('test_minus :',a,b,expected)
        actual = int(a) - int(b)
        expected = int(expected)
        self.assertEqual(actual, expected)


if __name__ == '__main__':
    unittest.main(verbosity=2)

apscheduler （定时任务框架）

title
https://blog.csdn.net/somezz/article/details/83104368

asyncio模块

title

gevent模块

title

Twisted模块

title

BeautifulSoup 模块

#1 常规
import requests
from bs4 import BeautifulSoup

response = requests.get("https://www.autohome.com.cn/news/")
response.encoding = 'gbk'
soup = BeautifulSoup(response.text, 'html.parser')

div = soup.find(name='div',attrs={'id':'auto-channel-lazyload-article'})
li_list = div.find_all(name='li')

for li in li_list:

    title = li.find(name='h3')
    if not title:
        continue
    p = li.find(name='p')
    a = li.find(name='a')

    print(title.text)
    print(a.attrs.get('href'))
    print(p.text)

    img = li.find(name='img')
    src = img.get('src')
    src = "https:" + src
    print(src)
    # ret = requests.get(src)
    # print(ret.content)

    # 再次发起请求，下载图片
    file_name = src.rsplit('/',maxsplit=1)[1]
    ret = requests.get(src)
    # with open(file_name,'wb') as f:
    #     f.write(ret.content)



#2 实际应用
#a 故事

session = requests.session()
resp = session.get(url=url, headers=headers)
soup = BeautifulSoup(resp.text, 'html.parser')

# [标签对象,标签对象]
item_list = soup.find_all(name='div',attrs={'class':'down16x16'})
len_of_story = len(item_list)
for idx, item in enumerate(item_list):
    a = item.find(name='a')  # 查找div下面的a标签  <class 'bs4.element.Tag'>
    a_href = item.a['href']  # div下面的所有a标签的href属性 <class 'str'>
    download_href(a_href)   # 下载


#b 电影

User_Agent = 'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11'
headers = {

    'User-Agent': User_Agent,
}
r = requests.get(url="xxx", headers=headers)
html = r.text
name_list = []  # 用来装获取到的电影名称
download_list = []  # 用来装获取到的电影下载链接
bs = BeautifulSoup(html, "html.parser")
b = bs.findAll(class_="co_content8")
b = b[0].findAll(class_="ulink")  # 此处拿到了每一页中的电影列表
for i in range(0, len(b)):
    name = b[i].get_text()  # 获取每个电影的名称,# 和儿童故事的item.a['href']用法类似
    href = "xxx" + b[i].get("href")  # 获取每个电影的详情页面的url
    r1 = requests.get(url=href, headers=headers)  # 访问每部电影的详情页面
    html1 = r1.text
    bs1 = BeautifulSoup(html1, "html.parser")
    b1 = bs1.find("tbody").find_next("td").find_next("a") #用法
    download_url = b1.get("href")  # 获取到下载链接
    name_list.append(name)
    download_list.append(download_ur)

enumerate和list 结合使用

soup = BeautifulSoup(html, 'lxml')
print('Next Sibling', soup.a.next_silbing)
print('Next Siblings', list(enumerate(soup.a.next_siblings)))

posted @ 2020-07-08 10:39 该显示昵称已被使用了阅读(195) 评论(0) 编辑收藏举报

刷新页面返回顶部