2 Star 0 Fork 0

isiyu/spider

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
ddc-test.py 3.49 KB
一键复制 编辑 原始数据 按行查看 历史
isiyu 提交于 2019-09-05 23:53 . modify ddc.py and add ddc-test.py
# -*- coding:UTF-8 -*-
from bs4 import BeautifulSoup
from retrying import retry
import requests
from configparser import ConfigParser
import time
import random
# def _result(result):
# return result is None
class downloader(object):
# def reqget(self,target):
# # target = 'http://www.qqddc.com/jxs.do?method=list&pn='+str(pagenum)+'&pp='+str(city)
# print('请求数据:',target)
# res = requests.get(url = target)
# if res.status_code != 200:
# print('请求失败:',target,' 重新尝试……')
# raise requests.RequestException('my_request_get error!!!!')
# html = res.text
# @retry(stop_max_attempt_number=10, wait_random_min=1000, wait_random_max=2000)
def doit(self,city,pagenum):
ret = ''
target = 'http://www.qqddc.com/jxs.do?method=list&pn='+str(pagenum)+'&pp='+str(city)
#target = 'http://www.qqddc.com/jxs.do?method=list&ct=355' #直接用城市id,不需要省份ID
f=open('ddc-cookie.txt','r')
cookies={}
for line in f.read().split(';'):
name,value=line.strip().split('=',1)
cookies[name]=value
headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
print('请求数据:',target)
res = requests.get(url = target,cookies=cookies,headers=headers)
# print('返回代码:',res.status_code)
# if res.status_code != 200:
# print('请求失败:',target,' 重新尝试……')
# raise requests.RequestException('my_request_get error!!!!')
html = res.text
bf = BeautifulSoup(html)
print('解析成功:',target)
divs = bf.find_all('div', class_ = 'item-txt')
for i in divs:
spans = i.find_all('span')
onetext = i.h1.a.text+'\t'+'品牌:'+ spans[0].a.text + '\t' + spans[1].text + '\t' + '地址:' + spans[2].text.split(":")[1]
ret = ret + onetext + '\n'
return ret
# print(texts.h1[0])
def writer(self, path, text,city,pagenum):
write_flag = True
#写内容
with open(path, 'a', encoding='utf-8') as f:
f.write('\n')
f.writelines(text)
# f.write('\n\n')
#更新配置
cfg = ConfigParser()
cfg.read('ddc-config.conf')
cfg.set('curr','city',str(city))
cfg.set('curr','pagenum',str(pagenum))
with open('ddc-config.conf', 'w') as configfile:
cfg.write(configfile)
if __name__ == "__main__":
ret = ''
# 超过页码通过302返回错误页面
# 城市ID超过,返回内容是空
# 从ct=1 pn=1开始循环
# target = 'http://www.qqddc.com/jxs.do?method=list&pn='+str(pagenum)+'&pp='+str(city)
target = 'http://www.qqddc.com/jxs.do?method=list&ct=455' #直接用城市id,不需要省份ID
f=open('ddc-cookie.txt','r')
cookies={}
for line in f.read().split(';'):
name,value=line.strip().split('=',1)
cookies[name]=value
headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
print('请求数据:',target)
res = requests.get(url = target,cookies=cookies,headers=headers)
html = res.text
bf = BeautifulSoup(html)
divs = bf.find_all('div', class_ = 'item-txt')
if divs:
print('false')
else:
print('true')
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/isiyu/spider.git
[email protected]:isiyu/spider.git
isiyu
spider
spider
master

搜索帮助