京东
# -*- conding:utf-8 -*- import requests import urllib.request import json import xlwt import time import datetime # list_url=[] # list_title=[] # list_img=[] # wb = xlwt.Workbook() # ws = wb.add_sheet('电脑数码') # url='http://f.3.cn/index-floor?argv=basic_3' # url_data = urllib.request.urlopen(url).read().decode("utf-8") # # url_data=requests.get(url) # # url_data=url_data.text # url_data=url_data.replace('jsonCallBackbasic_6(','') # url_data=url_data.replace(')','') # jsDict = json.loads(url_data) # jsdata=jsDict['data'] def one_floor(jsdata,wb,ws): list_url=[] list_title=[] list_img=[] jscols = jsdata['cols'][0] jscols_brand = jscols['brand'] jscols_content = jscols['content'] jscols_tag = jscols['tag'] print(jscols_tag) print(jscols_content) print(len(jscols_content)) jscols_content1 = jscols_content[0] jscols_content2 = jscols_content[1] print(jscols_content1) print(jscols_content2) jscols_content1_cover = jscols_content1['cover'] jscols_content2_cover = jscols_content2['cover'] jscols_content1_bottom = jscols_content1['bottom'] jscols_content2_bottom = jscols_content2['bottom'] print(jscols_content1_cover) print(jscols_content2_cover) print(jscols_content1_bottom) print(jscols_content2_bottom) for each in range(len(jscols_content1_cover)): list_url.append(jscols_content1_cover[each]['url']) list_url.append(jscols_content2_cover[each]['url']) list_img.append('http:' + jscols_content1_cover[each]['imgUrl']) list_img.append('http:' + jscols_content2_cover[each]['imgUrl']) list_title.append('SL21' + jscols_content1_cover[each]['title']) list_title.append('SR21' + jscols_content2_cover[each]['title']) for each in range(len(jscols_brand)): list_url.append(jscols_brand[each]['href']) list_img.append('http:' + jscols_brand[each]['src']) list_title.append('S4' + str(each + 1)) for each in jscols_content1_bottom: for every in range(len(each)): list_url.append(each[every]['url']) list_img.append('http:' + each[every]['imgUrl']) list_title.append('SL3' + str(every + 1)) # print(each[every]) for each in jscols_content2_bottom: for every in range(len(each)): list_url.append(each[every]['url']) list_img.append('http:' + each[every]['imgUrl']) list_title.append('SR3' + str(every + 1)) for each in range(len(jscols_tag)): list_url.append(jscols_tag[each]['url']) list_img.append('no img') list_title.append('S1' + str(each + 1) + jscols_tag[each]['title']) ws.write(0, 0, '地点') ws.write(0, 1, '位置') ws.write(0, 2, 'URL') ws.write(0, 3, '照片') ws.write(0, 4, '主题') ws.write(0, 5, '时间') for each in range(len(list_title)): Add_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') ws.write(each + 1, 0, '广州') ws.write(each + 1, 1, '京东pc楼层') ws.write(each + 1, 2, list_url[each]) ws.write(each + 1, 3, list_img[each]) ws.write(each + 1, 4, list_title[each]) ws.write(each + 1, 5, Add_time) wb.save('D:\\jd\\jd_pc.xls') print(list_title) print(list_img) print(list_url) x = 0 for j in range(len(list_img)): if list_img[j] == 'no img': print(list_img[j]) else: print(list_img[j]) urllib.request.urlretrieve(list_img[j], 'D:\\jd_pc_pic\\' + list_title[j] + '.jpg') # urllib.request.urlretrieve(j, 'D:\\jd_pc_pic\\' + '\\%s.jpg' % x) x = x + 1 # print(jscols_brand) # # print(len(jscols)) # print(jscols)