汽车之家反爬
修改转换编码方式进行破解
只是为练习字体反爬
#!/usr/bin/env python
# encoding: utf-8
from requests_html import HTMLSession
import re
import os
from fontTools.ttLib import TTFont
class QiCheZhiJia():
def __init__(self):
self.url="https://club.autohome.com.cn/bbs/thread/bb8c36ced93ce182/74203500-1.html"
self.hanzi=['不','了','呢','更','是','四','小','七','三','多','得','一','着','下','十','少','长','二','六','远','左','地','短','九','五','上','坏','很','右','低','高','矮','八','近','大','好','的','和']
self.session=HTMLSession()
self.f_dict={}
self.uniWordList=[]
self.utf8WordList=[]
def create_font(self,font_url):
# 列出已下载文件
font_file=font_url.split('/')[-1]
if not os.path.exists("./fonts"):
os.makedirs("./fonts")
file_list = os.listdir('./fonts')
if font_file not in file_list:
# 未下载则下载新库
print('不在字体库中, 下载:', font_file)
new_file = self.session.get(font_url).content
with open('./fonts/' + font_file, 'wb') as f:
f.write(new_file)
font = TTFont('./fonts/' + font_file)
else:
font = TTFont('./fonts/' + font_file)
gly_list = font.getGlyphOrder()[1:]
gly_list = font.getGlyphOrder()[1:]
for number,gly in enumerate(gly_list):
self.f_dict.setdefault(gly.lower().replace('uni','&#x'),self.hanzi[number])
self.uniWordList = font['cmap'].tables[0].ttFont.getGlyphOrder()
self.utf8WordList = [uniWord.replace("uni",r"\u").encode('utf-8').decode("unicode-escape") for uniWord in self.uniWordList[1:]]
print(self.utf8WordList)
def run(self):
req=self.session.get(self.url)
source=req.text
font_url=self.parse(source)
self.create_font(font_url)
info=req.html.xpath("//div[@class='tz-paragraph' and string-length(text())>1]//text()")
print(info)
elem=""
for item in info:
elem += item
for i in range(len(self.utf8WordList)):
# 将自定的字体信息,替换成国际标准
elem = elem.replace(self.utf8WordList[i], self.hanzi[i])
print(elem)
def parse(self,source):
plat=re.compile("'\),url\('(.*?)'\)")
font_url="http:"+plat.findall(source)[0]
return font_url
if __name__ == '__main__':
QiCheZhiJia().run()