获取微博广告博文数据

import requests
import json
import pandas as pd
import time
import re 

headers = {
    'User-Agent': 'XXXX',
    'Cookie': 'XXX'}

def get_ad(page):
    url = 'https://m.weibo.cn/api/container/getIndex?containerid=102803&openApp=0&since_id={}'.format(page)

    res = requests.get(url,headers=headers)
    data = json.loads(res.text)
    all_ad = []
    for k,i in enumerate(data['data']['cards']):
        aa = i['mblog'].get('from_cateid')
        if aa in ['Brand','Sfst','FanstopExtend','Wax']:
            result={}
            pattern = re.compile(r'<.*?>') 
            cc = pattern.sub('',i['mblog']['text'])
            result['uid'] = i['mblog']['user']['id']
            result['昵称'] = i['mblog']['user']['screen_name']
            result['排名'] = k+1
            result['出现页数'] = page+1
            result['博文'] = cc
            result['转发数'] = i['mblog']['reposts_count']
            result['评论数'] = i['mblog']['comments_count']
            result['点赞数'] = i['mblog']['attitudes_count']
            all_ad.append(result)
    return all_ad

all_data = []

for i in range(50):
    time.sleep(1)
    print(len(all_data))
    fina_data = get_ad(i)
    if fina_data:
        all_data+=fina_data

df1 =pd.DataFrame(all_data)
df1.to_excel('result'+time.strftime("%Y%m%d%H%M%S")+'.xlsx',index=False)
print('done')

 

posted @ 2018-05-18 18:12  Erick-LONG  阅读(507)  评论(1编辑  收藏  举报