代码拉取完成,页面将自动刷新
同步操作将从 二毛/抖音爬虫 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
import os
from concurrent.futures import ThreadPoolExecutor
from playwright.sync_api import BrowserContext, sync_playwright
class Browser(object):
def __init__(self,
channel: str = 'msedge',
need_login: bool = True,
headless: bool = True,
ua: str = 'pc',
image: bool = False):
"""
可用对象包括:
self.context
self.browser
self.playwright
[注意]
playwright非线程安全
不能在同一线程内多次创建playwright实例,不能在不同线程调用同一个全局playwright对象
若需要在线程内调用,则需要在每个线程内创建playwright实例,可参考do_login写法
"""
self.start(channel, need_login, headless, ua, image)
def anti_js(self):
"""
注入js反检测,没用
"""
# js ="./js/anti.js"
js = "./js/stealth.min.js"
self.context.add_init_script(path=js)
def do_login(self):
"""
登录
"""
from login import Login
storage_state = "./auth.json" if os.path.exists("./auth.json") else None
self.context = self.browser.new_context(
**self._ua,
storage_state=storage_state,
permissions=['notifications'],
ignore_https_errors=True,
)
_login = Login(self.context)
if not _login.check_login():
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(_login.new_login)
cookies = future.result()
self.context.clear_cookies()
self.context.add_cookies(cookies)
def start(self, channel, need_login, headless, ua, image) -> BrowserContext:
"""
启动浏览器
"""
_args = [
'--disable-blink-features=AutomationControlled',
]
if not image: # 不显示图片
_args.append("--blink-settings=imagesEnabled=false")
self.playwright = sync_playwright().start()
self.browser = self.playwright.chromium.launch(
channel=channel,
headless=headless,
ignore_default_args=['--enable-automation'],
args=_args,
)
if ua == 'pc':
self._ua: dict = self.playwright.devices['Desktop Edge']
self._ua['user_agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50'
else:
self._ua = self.playwright.devices['iPhone 12']
if need_login: # 重用登录状态
self.do_login()
else:
self.context = self.browser.new_context(
**self._ua,
permissions=['notifications'],
ignore_https_errors=True,
)
# self.anti_js()
def stop(self):
"""
关闭浏览器
"""
self.context.close()
self.browser.close()
self.playwright.stop()
if __name__ == "__main__":
edge = Browser()
# edge = Browser(headless=False)
p = edge.context.new_page()
# p.goto('https://antispider1.scrape.center/')
# p.goto('https://antoinevastel.com/bots/')
# p.keyboard.press('End')
p.goto('https://antoinevastel.com/bots/datadome') # 过不去
# p.goto('https://www.douyin.com/search/xinhuashe?&type=user')
# p.screenshot(path="end.png")
edge.stop()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。