获取微博内容的文字以及表情

表情是获取alt属性的值

传入内容的div区域即可,node就是内容的最外层的节点

    @decorator
    def getinfo_emoji(self, node):
        info = node.xpath(".//*")
        infolist= [[i.attrib.get("alt") ,i.text,i.tail] if i.tag=="a" else [i.attrib.get("alt"),i.tail ,i.text]  for i in info]
        newstr1 = ""
        if infolist:
            newstr1 = reduce(self.listlisttostr, infolist)
        if isinstance(newstr1, list):
            newstr1 = "".join([i.replace(" ", "").replace("\u200b", "") for i in newstr1 if i not in ['', ' ', None]])
        newstr2 = node.text.strip().replace(" ", "").replace("\u200b", "")
        newstr = newstr2 + newstr1
        return newstr

    @decorator
    def listlisttostr(self, x, y):
        xstr = "".join([i.replace(" ", "").replace("\u200b", "") for i in x if i not in ['', ' ', None]])
        ystr = "".join([j.replace(" ", "").replace("\u200b", "") for j in y if j not in ['', ' ', None]])
        return xstr + ystr

  

posted @ 2018-03-28 15:45  公众号python学习开发  阅读(320)  评论(0编辑  收藏  举报