自动化测试爬取抖音的用户信息

# -*- coding: utf-8 -*-
# TODO 抖音爬取
"""
url:"https://www.douyin.com/aweme/v1/web/discover/search/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_user_web&keyword=%E5%AE%9C%E6%98%8C&search_source=switch_tab&query_correct_type=1&is_filter_search=0&offset=12&count=6&search_id=202203241001190101510740730D33498B&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1536&screen_height=864&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=99.0.4844.51&browser_online=true&engine_name=Blink&engine_version=99.0.4844.51&os_name=Windows&os_version=10&cpu_core_num=4&device_memory=8&platform=PC&downlink=5.75&effective_type=4g&round_trip_time=50&webid=7077846684190148127&msToken=yaBuTTR4uGFpgn39Hv0eT5g_nVQL8oERuLSsXvA6K0ntSj1DIbYncFLlqxo-xZhWUq8KOADdRcIPtN8fMvJDtOhCt6GFPD9U-ixbFpZK5OGzZxvXz44aB7f87gET290=&X-Bogus=DFSzswVOoeJANxo9SRS6El9WX7Jx&_signature=_02B4Z6wo000010FpbQgAAIDDwWuXS4AnaD9BaWmAALJs8TKBDtkXEkGcyTo5VcXbXPFT9YqmbKQ2nb-HKwrkB.uk4dIExcCraLfmDfpppToMLv3cJwMvL7fkFVsnpyZq.P7hhWfx1QPi4O2-12"
例如:
"nicename":"xxx"
"enterprise_verify_reason":"xxx办公室"
"unique_id":"xxx"
"follower_count":"492710"
"""
# @Date    : 2022/3/24 9:59
# @Author  : layman
import time

import pymysql

from selenium import webdriver
from selenium.webdriver.common.by import By

# 打开数据库连接
db = pymysql.connect(host='localhost', port=3306,
                     user='root', passwd='root', db='xxx', charset='utf8')

cursor = db.cursor()
driver = webdriver.Chrome()
driver.get("https://www.douyin.com/search/xxx?source=switch_tab&type=user")
time.sleep(200)
# driver.execute_script("window.scrollTo(0,10000)")
# 滑动到页面底部
while True:
    time.sleep(2)
    li_list = driver.find_elements(By.XPATH, '//*[@id="dark"]/div[2]/div/div[3]/div[3]/ul/li[*]')
    for li in li_list:
        uname = li.find_element(By.XPATH, './div/a/div[1]/div[2]/p/span/span/span/span/span').text
        try:
            uverify = li.find_element(By.XPATH, './div/a/div[1]/div[2]/div/p').text
        except:
            uverify = '无认证'
        uid = li.find_element(By.XPATH, './div/a/div[2]/span[1]/span').text
        try:
            ufans = li.find_element(By.XPATH, './div/a/div[2]/span[5]').text
        except:
            ufans = '0'
        values = (uname, uverify, uid, ufans)
        try:
            sql = "INSERT INTO douyin(uname, uverify, uid,ufans) VALUES (%s,%s,%s,%s)"
            cursor.execute(sql, values)
            db.commit()
        except:
            pass

posted @   biglayman  阅读(1863)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!
点击右上角即可分享
微信分享提示