1 Star 0 Fork 2

gearlam/pdd_crawler

forked from seven/pdd_crawler 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
pdd_crawler.py 3.37 KB
一键复制 编辑 原始数据 按行查看 历史
seven 提交于 2020-07-13 16:55 . anti_content 破解
#-*-encoding:utf-8-*-
import requests
import execjs
from copyheaders import headers_raw_to_dict
import re
from urllib.parse import urlencode
session = requests.session()
def get_params():
headers = headers_raw_to_dict(
b'''
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9
Cache-Control: no-cache
Connection: keep-alive
Cookie: api_uid=rBUECV3bmWIWMBSFxJsVAg==; _nano_fp=Xpd8npP8XqdJnqEoXC_cAsjNmv0_PfL_IbsNUlws; ua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20WOW64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F81.0.4044.113%20Safari%2F537.36; webp=1; PDDAccessToken=CYFFW7TDAAAATBSMO3T3JGDI2WY3RT5E7LLYECN5NOBZ4ZWFN5MA113ac8e; pdd_user_id=4318316460; pdd_user_uin=XAEIKBCPSFC6Q2Q45WP4NLFNII_GEXDA; JSESSIONID=1123038327A631257B25F7AC4D220B27
Host: m.yangkeduo.com
Pragma: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36
'''
)
s = session.get( url = href, headers=headers)
if s.status_code == 200:
html = s .text
html = html.replace("\\", "")
list_id = re.search(r'list_id":"(.*?)"', html).group(1)
flip = re.search(r'flip":"(.*?)"', html).group(1)
return list_id, flip
def gen_signature( href ):
js = open("./get_anti.js", mode="r").read()
cxt = execjs.compile(js)
signature = cxt.call("get_anti_content", href)
return signature
def get_index( href ):
api_url = "http://m.yangkeduo.com/proxy/api/search?"
list_id , flip = get_params()
headers = headers_raw_to_dict(
b'''
Accept: application/json, text/plain, */*
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9
AccessToken: *******************************
Connection: keep-alive
Cookie: api_uid=rBUECV3bmWIWMBSFxJsVAg==; _nano_fp=Xpd8npP8XqdJnqEoXC_cAsjNmv0_PfL_IbsNUlws; ua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20WOW64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F81.0.4044.113%20Safari%2F537.36; webp=1; PDDAccessToken=CYFFW7TDAAAATBSMO3T3JGDI2WY3RT5E7LLYECN5NOBZ4ZWFN5MA113ac8e; pdd_user_id=4318316460; pdd_user_uin=XAEIKBCPSFC6Q2Q45WP4NLFNII_GEXDA; JSESSIONID=1123038327A631257B25F7AC4D220B27
Host: m.yangkeduo.com
Referer: http://m.yangkeduo.com/search_result.html?search_key=%E8%93%9D%E7%89%99%E8%80%B3%E6%9C%BA
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36
VerifyAuthToken:****************************
'''
)
data = {
"pdduid": "4318316460",
"item_ver": "lzqq",
"source": "search",
"search_met": "",
"list_id": list_id,
"sort": "default",
"filter": "",
"q": "蓝牙耳机",
"page": "2",
"size": "50",
"flip": flip,
"anti_content":gen_signature( href )
}
s = session.get( url = api_url + urlencode( data ),headers=headers )
if s.status_code == 200:
print( s.text )
href = "http://m.yangkeduo.com/search_result.html?search_key=%E8%93%9D%E7%89%99%E8%80%B3%E6%9C%BA"
get_index(href )
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/gearlam/pdd_crawler.git
[email protected]:gearlam/pdd_crawler.git
gearlam
pdd_crawler
pdd_crawler
master

搜索帮助