微博爬虫 ----- 微博发布时间清洗

 

 

from datetime import datetime
from datetime import timedelta

if "刚刚" in publish_time:
    publish_time = datetime.now().strftime('%Y-%m-%d %H:%M')

elif "分钟" in publish_time:
    minute = publish_time[:publish_time.find("分钟")]
    minute = timedelta(minutes=int(minute))
    publish_time = (
        datetime.now() - minute).strftime(
        "%Y-%m-%d %H:%M")
elif "今天" in publish_time:
    today = datetime.now().strftime("%Y-%m-%d")
    time = publish_time.replace('今天','')
    publish_time = today + " " + time

elif "" in publish_time:
    year = datetime.now().strftime("%Y")
    publish_time = str(publish_time)
    print publish_time

    publish_time = year + "-" +publish_time.replace('','-').replace('','')
else:
    publish_time = publish_time[:16]

print "微博发布时间: " + publish_time

 

posted @ 2018-10-19 09:28  淋哥  阅读(1193)  评论(0编辑  收藏  举报