python 下载图片，音频。视频文件

def catch_data(url,FileName):
    ip = commonMethod.getIP()
    userAgent = commonMethod.get_userAgent()
    driver = commonMethod.get_driver(ip, userAgent, False)
    try:
        driver.get(url)
        time.sleep(5)
        for i in range(1,12):
            wid = 500 * i + 500
            js = "var q=document.documentElement.scrollTop=" + str(wid)
            driver.execute_script(js)
            time.sleep(2)

        selenium_html = driver.execute_script("return document.documentElement.outerHTML")
        doc = pq(selenium_html)
        spans = doc("div[class='ml-wrap']").find("div[id='J_goodsList']").find("ul[class='gl-warp clearfix']").find("li[class^='gl-item']")
        data_list = []
        headList = ['大图链接', '价格', '商品名称', '评价数', '店铺名称']
        for span in spans.items():
            list1=[]
            picture_url = 'https:'+span.find("div[class='gl-i-wrap']").find("div[class='p-img']").find("a").find("img").attr('src')
            list1.append(picture_url)
            price = span.find("div[class='gl-i-wrap']").find("div[class='p-price']").text()
            list1.append(price)
            name = span.find("div[class='gl-i-wrap']").find("div[class='p-name p-name-type-3']").find("a").find("em").text()
            list1.append(name)
            comment_count = span.find("div[class='gl-i-wrap']").find("div[class='p-commit']").find("strong").find("a").text()
            list1.append(comment_count)
            store = span.find("div[class='gl-i-wrap']").find("div[class='p-shop']").find("span[class='J_im_icon']").find("a").attr('title')
            list1.append(store)
            print(picture_url,price,name,comment_count,store)
            data_list.append(list1)

        wbk = xlwt.Workbook()
        sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True)

        rowIndex = 0
        commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True)
        for lst in data_list:
            rowIndex += 1
            commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False)
        wbk.save(FileName)

        time.sleep(1)

    except Exception as ex:
        print(ex)
def catch_category1(FileName,url):
    ip = commonMethod.getIP()
    userAgent = commonMethod.get_userAgent()
    driver = commonMethod.get_driver(ip,userAgent, False)
    try:
        # url = 'https://www.jd.com/'
        driver.get(url)
        time.sleep(10)
        elements = driver.find_elements_by_xpath('//div[@class="fs_col1"]/div[@id="J_cate"]/ul[@class="JS_navCtn cate_menu"]/li[@class="cate_menu_item"]')
        category_one_list =[]
        for element in elements:
            print(element.text)
            txt = str(element.text).replace(' / ','/')
            category_one_list.append(txt)
            ActionChains(driver).move_to_element(element).perform()
            time.sleep(1)

        selenium_html = driver.execute_script("return document.documentElement.outerHTML")
        doc = pq(selenium_html)
        # spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']").find(
        #     "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']")
        spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']")
        category_two = ''
        category_two_link = ''
        # .find("div[class='mc']").find("div[class='items']").find("dl[class='clearfix']")
        headList = ['序号', '一级分类', '二级分类', '三级分类', '三级分类链接']
        data_list = []
        index =0
        count=1
        for span in spans.items():
            category_one = category_one_list[index]
            index += 1
            subSpans = span.find(
            "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']")
            for item in subSpans.items():
                category_two = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").text()
                category_two_link = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").attr(
                    'href')
                sub_spans = item.find("dd[class='cate_detail_con']").find("a[class='cate_detail_con_lk']")
                for sub_span in sub_spans.items():
                    col_list = []
                    ccategory_three = sub_span.text()
                    category_three_link = 'https:' + sub_span.attr('href')
                    print(category_one,category_two, ccategory_three, category_three_link)
                    col_list.append(count)
                    count += 1
                    col_list.append(category_one)
                    col_list.append(category_two)
                    col_list.append(ccategory_three)
                    col_list.append(category_three_link)
                    data_list.append(col_list)


        wbk = xlwt.Workbook()
        sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True)

        rowIndex = 0
        commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True)
        for lst in data_list:
            rowIndex += 1
            commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False)
        wbk.save(FileName)

        time.sleep(1)

    except Exception as ex:
        print(ex)
View Code
posted on 2022-04-13 17:43 shaomine 阅读(30) 评论(0) 编辑收藏举报