获取微博内容的文字以及表情
表情是获取alt属性的值
传入内容的div区域即可,node就是内容的最外层的节点
@decorator def getinfo_emoji(self, node): info = node.xpath(".//*") infolist= [[i.attrib.get("alt") ,i.text,i.tail] if i.tag=="a" else [i.attrib.get("alt"),i.tail ,i.text] for i in info] newstr1 = "" if infolist: newstr1 = reduce(self.listlisttostr, infolist) if isinstance(newstr1, list): newstr1 = "".join([i.replace(" ", "").replace("\u200b", "") for i in newstr1 if i not in ['', ' ', None]]) newstr2 = node.text.strip().replace(" ", "").replace("\u200b", "") newstr = newstr2 + newstr1 return newstr @decorator def listlisttostr(self, x, y): xstr = "".join([i.replace(" ", "").replace("\u200b", "") for i in x if i not in ['', ' ', None]]) ystr = "".join([j.replace(" ", "").replace("\u200b", "") for j in y if j not in ['', ' ', None]]) return xstr + ystr