1 Star 0 Fork 15

yaccbison/AntiCrawlers

forked from ayuliao/AntiCrawlers 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
案例8-WebDriver 特征检测反爬.py 2.47 KB
一键复制 编辑 原始数据 按行查看 历史
二两的分身 提交于 2021-06-28 15:02 . enjoy code
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
url = "http://47.103.13.124:8001/webdriver"
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)',
# 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_16_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
}
def add_header():
options = Options()
for k, v in headers.items():
options.add_argument(f'{k}={v}')
return options
def disable_img_css(options):
#禁止图片和css加载
prefs = {"profile.managed_default_content_settings.images": 2,
'permissions.default.stylesheet': 2}
options.add_experimental_option("prefs", prefs)
# 等待元素加载
def wait_element(brower, element_id, wait_time=10):
try:
# 隐式等待
# brower:需要隐式等待的浏览器
# wait_time:最长等待实际
# 1:每隔1秒判断一下对应的元素是否成功加载
WebDriverWait(brower, wait_time, 1).until(
EC.presence_of_element_located((By.ID, element_id))
)
except Exception as e:
# 元素等待了 wait_time 时间,已经没有完成加载
raise Exception(e)
options = add_header()
disable_img_css(options)
brower = webdriver.Chrome(executable_path='chromedriver', chrome_options=options)
with open('stealth.min.js') as f:
js = f.read()
# 在打印具体的网页前,执行隐藏浏览器特征的JavaScript
brower.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": js
})
try:
brower.get(url)
brower.add_cookie({"name": "session", "value": ".eJyrViotTi1SsqpWyiyOT0zJzcxTsjLQUcrJTwexSopKU3WUcvOTMnNSlayUDM3gQEkHrDE-M0XJyhjCzkvMBSmKKTVNMjMDkiamFkq1tQDfeR3n.YLOC4w.Xbnx1QbrvUh8OUPb5jauC_Aau9U"})
brower.get(url) # 再次访问,使用加载的Cookies
print('debugger')
finally:
time.sleep(5)
brower.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yaccbison/anti-crawlers.git
[email protected]:yaccbison/anti-crawlers.git
yaccbison
anti-crawlers
AntiCrawlers
master

搜索帮助