代码拉取完成,页面将自动刷新
# -*- coding:UTF-8 -*-
from bs4 import BeautifulSoup
from retrying import retry
import requests
from configparser import ConfigParser
import time
import random
# def _result(result):
# return result is None
class downloader(object):
# def reqget(self,target):
# # target = 'http://www.qqddc.com/jxs.do?method=list&pn='+str(pagenum)+'&pp='+str(city)
# print('请求数据:',target)
# res = requests.get(url = target)
# if res.status_code != 200:
# print('请求失败:',target,' 重新尝试……')
# raise requests.RequestException('my_request_get error!!!!')
# html = res.text
# @retry(stop_max_attempt_number=10, wait_random_min=1000, wait_random_max=2000)
def doit(self,city,pagenum):
ret = ''
target = 'http://www.qqddc.com/jxs.do?method=list&pn='+str(pagenum)+'&pp='+str(city)
#target = 'http://www.qqddc.com/jxs.do?method=list&ct=355' #直接用城市id,不需要省份ID
f=open('ddc-cookie.txt','r')
cookies={}
for line in f.read().split(';'):
name,value=line.strip().split('=',1)
cookies[name]=value
headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
print('请求数据:',target)
res = requests.get(url = target,cookies=cookies,headers=headers)
# print('返回代码:',res.status_code)
# if res.status_code != 200:
# print('请求失败:',target,' 重新尝试……')
# raise requests.RequestException('my_request_get error!!!!')
html = res.text
bf = BeautifulSoup(html)
print('解析成功:',target)
divs = bf.find_all('div', class_ = 'item-txt')
for i in divs:
spans = i.find_all('span')
onetext = i.h1.a.text+'\t'+'品牌:'+ spans[0].a.text + '\t' + spans[1].text + '\t' + '地址:' + spans[2].text.split(":")[1]
ret = ret + onetext + '\n'
return ret
# print(texts.h1[0])
def writer(self, path, text,city,pagenum):
write_flag = True
#写内容
with open(path, 'a', encoding='utf-8') as f:
f.write('\n')
f.writelines(text)
# f.write('\n\n')
#更新配置
cfg = ConfigParser()
cfg.read('ddc-config.conf')
cfg.set('curr','city',str(city))
cfg.set('curr','pagenum',str(pagenum))
with open('ddc-config.conf', 'w') as configfile:
cfg.write(configfile)
if __name__ == "__main__":
ret = ''
# 超过页码通过302返回错误页面
# 城市ID超过,返回内容是空
# 从ct=1 pn=1开始循环
# target = 'http://www.qqddc.com/jxs.do?method=list&pn='+str(pagenum)+'&pp='+str(city)
target = 'http://www.qqddc.com/jxs.do?method=list&ct=455' #直接用城市id,不需要省份ID
f=open('ddc-cookie.txt','r')
cookies={}
for line in f.read().split(';'):
name,value=line.strip().split('=',1)
cookies[name]=value
headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
print('请求数据:',target)
res = requests.get(url = target,cookies=cookies,headers=headers)
html = res.text
bf = BeautifulSoup(html)
divs = bf.find_all('div', class_ = 'item-txt')
if divs:
print('false')
else:
print('true')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。