微博爬虫 ----- 微博发布时间清洗
from datetime import datetime from datetime import timedelta if "刚刚" in publish_time: publish_time = datetime.now().strftime('%Y-%m-%d %H:%M') elif "分钟" in publish_time: minute = publish_time[:publish_time.find("分钟")] minute = timedelta(minutes=int(minute)) publish_time = ( datetime.now() - minute).strftime( "%Y-%m-%d %H:%M") elif "今天" in publish_time: today = datetime.now().strftime("%Y-%m-%d") time = publish_time.replace('今天','') publish_time = today + " " + time elif "月" in publish_time: year = datetime.now().strftime("%Y") publish_time = str(publish_time) print publish_time publish_time = year + "-" +publish_time.replace('月','-').replace('日','') else: publish_time = publish_time[:16] print "微博发布时间: " + publish_time
如果觉得对您有帮助,麻烦您点一下推荐,谢谢!
好记忆不如烂笔头
好记忆不如烂笔头