1 Star 0 Fork 15

yaccbison/AntiCrawlers

forked from ayuliao/AntiCrawlers 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
案例14-字体反爬.py 1.78 KB
一键复制 编辑 原始数据 按行查看 历史
二两的分身 提交于 2021-06-28 15:02 . enjoy code
import re
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
from fontTools.ttLib import TTFont
from tff2png import convert_ttf
cookies ={
"session": ".eJyrViotTi1SsqpWyiyOT0zJzcxTsjLQUcrJTwexSopKU3WUcvOTMnNSlayUDM3gQEkHrDE-M0XJyhjCzkvMBSmKKTVNMjMDkiamFkq1tQDfeR3n.YLOC4w.Xbnx1QbrvUh8OUPb5jauC_Aau9U"
}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_16_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36"
}
url = "http://47.103.13.124:8001/font"
# 人为整理字体映射
fonts = {
"xe339": 6,
"xe624": 9,
"xe7df": 2,
"xe9c7": 7,
"xea16": 5,
"xee76": 0,
"xefd4": 8,
"xf19a": 3,
"xf57b": 1,
"xf593": 4,
}
def get_ttfont():
r = requests.get(url, cookies=cookies, headers=headers)
# 正则匹配出CSS中引入的字体文件
pattern = re.compile("src:url\('(.*)'\) format\('woff'\);")
woff_path = pattern.findall(r.text)
woff_url = urljoin(url, woff_path)
# 下载字体文件
woff_r = requests.get(woff_url, cookies=cookies, headers=headers, stream=True)
filename = 'target.woff'
with open(filename, 'wb') as f:
f.write(woff_r.content)
font = TTFont(filename)
return font
def crawler():
r = requests.get(url, cookies=cookies, headers=headers)
# BeautifulSoup 会对 原始的HTML编码,从而改变字体
# soup = BeautifulSoup(r.text, 'lxml')
# phone_number = soup.find('p', class_='custonfont').get_text()
# 正则匹配字体
pattern = re.compile('&#(.*)')
phone_number = pattern.findall(r.text)[0]
phone_number = phone_number.split('&#')
result = []
for n in phone_number:
v = fonts.get(n)
result.append(v)
print(''.join([str(i) for i in result]))
crawler()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yaccbison/anti-crawlers.git
[email protected]:yaccbison/anti-crawlers.git
yaccbison
anti-crawlers
AntiCrawlers
master

搜索帮助