代码拉取完成,页面将自动刷新
import requests
import re
import asyncio
from concurrent.futures import ThreadPoolExecutor
import os
import time
def get_sec_uid(keyword):
# 获取用户sec_uid
headers = {
'authority': 'www.douyin.com',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cookie': 'ttwid=1%7CM9FkU8n5hTTvOiLZPjo8aU35Tr5BTMRIPed4BEtyDJA%7C1648208749%7Cdd2ade07781195bfd1fa21643fd59b3903008085a0f298cfb1c76b79bc3f78c7; passport_csrf_token=279410d140ba25b4e91cde4f6cd05aef; passport_csrf_token_default=279410d140ba25b4e91cde4f6cd05aef; MONITOR_WEB_ID=ecaa2897-8a68-4ade-b947-6cc9d931598e; odin_tt=417a9f77f6d2cd774da7807b0c714862d355b4a5ace66a03a9c00ac68dacbdec5ceca709555e30fff9adf1ecea84239111079643bd474943fe97d254cb0152ec; pwa_guide_count=3; _tea_utm_cache_6383=undefined; _tea_utm_cache_1300=undefined; _tea_utm_cache_2285=undefined; IS_HIDE_THEME_CHANGE=1; THEME_STAY_TIME=299720; __ac_signature=_02B4Z6wo00f01CI8ePQAAIDBQTa4nMTMMjwiHHxAAGrN5U0QgDC54gfGRL89zHJwk8zFNN8QWzhqZVluxIfOzDSckoBA5oFsW2dSnbl5l.IgboZ92n3goGNLjAJZBQlWId9Caa2Bweusdfcy29; douyin.com; strategyABtestKey=1648993217.224; s_v_web_id=verify_l1jbzhx9_1kxeNOEN_L1cd_42w9_Azm7_hVO7VNdDI4ld; msToken=4FsOZZEB0ReRvlBQJdmO-7GUER2EMYUSGfa6IEqs4AsvTxsTSD5ejPxg7yskHiuyLp51Av3hhmUSIWVcxC3P-pRxfCT00w1FQvpephON2euuQWyUOof7XWSxW2cg5gQ=; tt_scid=t7EPzZDDnRhuqBcv0W2umIeppJ3eoHUIusBuW.FwQfGu.AosRQ1Cb6EM9cCAQVLW56cc; msToken=mN3AP6NNssl_SxKLG6Tl30-ns7vuVjuVfu4gcQZbYYb87W3uohReUh31Zl1kKBFHNYlm-mqguVrVKCvCxqRJ1FtZMquGY9ihliRXZwTn-1R3zX4m9IOk0r7dvV5CpgA=; home_can_add_dy_2_desktop=0',
'referer': 'https://www.douyin.com/search/%E5%88%98%E5%B8%88%E5%B8%88?source=search_history&aid=89c0b0ee-9905-422a-adad-6262771a99c1&enter_from=recommend&gid=7075376490342305038',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Microsoft Edge";v="100"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29',
}
response = requests.get('https://www.douyin.com/aweme/v1/web/general/search/single/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_general&sort_type=0&publish_time=0&keyword={}&search_source=search_history&query_correct_type=1&is_filter_search=0&from_group_id=7075376490342305038&offset=0&count=10&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Edge&browser_version=100.0.1185.29&browser_online=true&engine_name=Blink&engine_version=100.0.4896.60&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7079002627996157440&msToken=mN3AP6NNssl_SxKLG6Tl30-ns7vuVjuVfu4gcQZbYYb87W3uohReUh31Zl1kKBFHNYlm-mqguVrVKCvCxqRJ1FtZMquGY9ihliRXZwTn-1R3zX4m9IOk0r7dvV5CpgA=&X-Bogus=DFSzswSOsdUAN9szSlxj3RXAIQR6&_signature=_02B4Z6wo00001ZcY6sgAAIDA9BIqoSsiGW2XGO5AAAeQPG8dden-CrmangUDQSgXK2be8jgqDjM2ZmSYTwyJHeRV-BeCkHWI4.T.x5yaCoZJn4Q3zXQxR6Nmh.IrhUHvmsIoURG7l7qpp9IJ61'.format(keyword), headers=headers)
try:
return response.json()['data'][0]['user_list'][0]['items'][0]['author']['sec_uid']
except:
return None
def get_normal_string():
# 获取含有视频链接信息的字符串
headers = {
'authority': 'www.douyin.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'max-age=0',
'cookie': 'douyin.com; ttwid=1%7CM9FkU8n5hTTvOiLZPjo8aU35Tr5BTMRIPed4BEtyDJA%7C1648208749%7Cdd2ade07781195bfd1fa21643fd59b3903008085a0f298cfb1c76b79bc3f78c7; passport_csrf_token=279410d140ba25b4e91cde4f6cd05aef; passport_csrf_token_default=279410d140ba25b4e91cde4f6cd05aef; MONITOR_WEB_ID=ecaa2897-8a68-4ade-b947-6cc9d931598e; odin_tt=417a9f77f6d2cd774da7807b0c714862d355b4a5ace66a03a9c00ac68dacbdec5ceca709555e30fff9adf1ecea84239111079643bd474943fe97d254cb0152ec; pwa_guide_count=3; _tea_utm_cache_6383=undefined; _tea_utm_cache_1300=undefined; _tea_utm_cache_2285=undefined; IS_HIDE_THEME_CHANGE=1; THEME_STAY_TIME=299720; __ac_nonce=06249a3c30083ec55465e; __ac_signature=_02B4Z6wo00f01CI8ePQAAIDBQTa4nMTMMjwiHHxAAGrN5U0QgDC54gfGRL89zHJwk8zFNN8QWzhqZVluxIfOzDSckoBA5oFsW2dSnbl5l.IgboZ92n3goGNLjAJZBQlWId9Caa2Bweusdfcy29; douyin.com; strategyABtestKey=1648993217.224; s_v_web_id=verify_l1jbzhx9_1kxeNOEN_L1cd_42w9_Azm7_hVO7VNdDI4ld; msToken=4FsOZZEB0ReRvlBQJdmO-7GUER2EMYUSGfa6IEqs4AsvTxsTSD5ejPxg7yskHiuyLp51Av3hhmUSIWVcxC3P-pRxfCT00w1FQvpephON2euuQWyUOof7XWSxW2cg5gQ=; home_can_add_dy_2_desktop=1; tt_scid=EMfspRGfZ7FLIBzlrEuWZDSIy6yiAuMKgzHJwsIcfNRe3fI7urXTAv.99v5Gz7BHd00e; msToken=AQEOChwSeREaGw8BejMHFLHlxYWo0cZQI0h7CbfF8NCXeXT4lJIFwjeheooB35Lp8P97Sdbfny1adTAUUtHdhhdtCcGgt2kXtARBl9XtdY_oxnljZvdpJuDEKwI7dWM=',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Microsoft Edge";v="100"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29',
}
response = requests.get('https://www.douyin.com/user/{}'.format(sec_uid),
headers=headers)
txt = response.text
r = re.compile(r'''(</script><script id="RENDER_DATA" type="application/json">)(.*])(</script>)''')
return r.search(txt).groups()[1]
def get_normal_dict():
# 从含有视频链接信息的字符串提取重要键值对
r = re.compile(r"video_id%3D(\w+)%26line%3D(\d)%26file_id%3D(\w+)%26sign%3D(\w+)%26is_play_url%3D(\d)%26source%3D(\w+)%26aid%3D(\d+)")
i = r.findall(normal_string)
ls = []
for s in i:
params = {
'video_id': s[0],
'line': s[1],
'file_id': s[2],
'sign': s[3],
'is_play_url': s[4],
'source':s[5],
'aid': s[6]
}
ls.append(params)
return ls
def get_video(params, name, up_name):
'''
下载视频
params: 视频特征字典
name: video_id
up_name: 抖音号或抖音名称
'''
headers = {
'authority': 'www.douyin.com',
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cookie': 'ttwid=1%7CM9FkU8n5hTTvOiLZPjo8aU35Tr5BTMRIPed4BEtyDJA%7C1648208749%7Cdd2ade07781195bfd1fa21643fd59b3903008085a0f298cfb1c76b79bc3f78c7; passport_csrf_token=279410d140ba25b4e91cde4f6cd05aef; passport_csrf_token_default=279410d140ba25b4e91cde4f6cd05aef; MONITOR_WEB_ID=ecaa2897-8a68-4ade-b947-6cc9d931598e; odin_tt=417a9f77f6d2cd774da7807b0c714862d355b4a5ace66a03a9c00ac68dacbdec5ceca709555e30fff9adf1ecea84239111079643bd474943fe97d254cb0152ec; pwa_guide_count=3; _tea_utm_cache_6383=undefined; _tea_utm_cache_1300=undefined; _tea_utm_cache_2285=undefined; IS_HIDE_THEME_CHANGE=1; THEME_STAY_TIME=299720; __ac_signature=_02B4Z6wo00f01CI8ePQAAIDBQTa4nMTMMjwiHHxAAGrN5U0QgDC54gfGRL89zHJwk8zFNN8QWzhqZVluxIfOzDSckoBA5oFsW2dSnbl5l.IgboZ92n3goGNLjAJZBQlWId9Caa2Bweusdfcy29; douyin.com; strategyABtestKey=1648993217.224; s_v_web_id=verify_l1jbzhx9_1kxeNOEN_L1cd_42w9_Azm7_hVO7VNdDI4ld; home_can_add_dy_2_desktop=1; tt_scid=wYQ9CV08zLOj9gljfBRWRrDMs6WFOLT1Bq1uwJfy2EGIxnfe2AJYvI5WthiOSi8h99a5; msToken=xjE-gyJdOM8AWBVvQcuypDx2bELRkMXgOyDk4rpDy0tnrpOQ-PSEyam3V3guVyCP_CTbv15cgWTzT6Nb8lbbPffdOP9ygjQ2VMp8wtuW4Cymmr1dqV8HmlQ8TnPjm5M=; msToken=aQZA1P-018EJlZPqPqrWxV_BlQJarmKJM-R5mWsonPmVEGio2n7cvtZS-CTqEZXclkSIwpD3_af5zjN6ORWdn9bBupt1mM2ev6niJK2UZ5SaNgatPZLsqQ==',
'range': 'bytes=0-',
'referer': 'https://www.douyin.com/discover?modal_id=7074783619138522368',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Microsoft Edge";v="100"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'video',
'sec-fetch-mode': 'no-cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29',
}
response = requests.get('https://www.douyin.com/aweme/v1/play/', headers=headers, params=params)
if not os.path.exists(up_name):
os.makedirs(up_name)
with open('{}/{}.mp4'.format(up_name, name), 'wb') as fp:
fp.write(response.content)
def run(ls, func):
# 多线程异步提取
loop = asyncio.get_event_loop()
exector = ThreadPoolExecutor(25)
tasks = []
for i in ls:
future = loop.run_in_executor(exector, func, i, i['video_id'], keyword)
tasks.append(future)
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
if __name__ == "__main__":
keyword = input("请输入抖音号或用户名:")
start = time.perf_counter()
sec_uid = get_sec_uid(keyword)
if sec_uid:
print('开始抓取 %s' % keyword)
normal_string = get_normal_string()
params_ls = get_normal_dict()
run(params_ls, get_video)
end = time.perf_counter()
print('用时: %f' % (end - start))
else:
print('没有查找到该用户')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。