1 Star 0 Fork 1

kylinpoet/Python_spider

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
HttpClient.py 2.33 KB
一键复制 编辑 原始数据 按行查看 历史
我的代码会飞 提交于 2016-01-20 21:04 . Python_spider demo
# -*- coding: utf-8 -*-
import cookielib, urllib, urllib2, socket
import zlib,StringIO
class HttpClient:
__cookie = cookielib.CookieJar()
#代理设置,需要时添加(后续设置为多代理切换)
#__proxy_handler = urllib2.ProxyHandler({"http" : '42.121.6.80:8080'})
__req = urllib2.build_opener(urllib2.HTTPCookieProcessor(__cookie))#,__proxy_handler)
__req.addheaders = [
('Accept', 'application/javascript, */*;q=0.8'),
('User-Agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)')
]
urllib2.install_opener(__req)
def Get(self, url, refer=None,retries=3):
try:
req = urllib2.Request(url)
req.add_header('Accept-encoding', 'gzip')
if not (refer is None):
req.add_header('Referer', refer)
response = urllib2.urlopen(req, timeout=120)
html = response.read()
gzipped = response.headers.get('Content-Encoding')
if gzipped:
html = zlib.decompress(html, 16+zlib.MAX_WBITS)
return html
except Exception,what:
print what
if retries>0:
return self.Get(url,refer,retries-1)
else:
print "Get Failed",url
return ''
#except urllib2.HTTPError, e:
# return e.read()
#except socket.timeout, e:
# return ''
#except socket.error, e:
# return ''
def Post(self, url, data, refer=None):
try:
req = urllib2.Request(url, urllib.urlencode(data))
#req = urllib2.Request(url,data)
if not (refer is None):
req.add_header('Referer', refer)
return urllib2.urlopen(req, timeout=120).read()
except urllib2.HTTPError, e:
return e.read()
except socket.timeout, e:
return ''
except socket.error, e:
return ''
def Download(self, url, file):
output = open(file, 'wb')
output.write(urllib2.urlopen(url).read())
output.close()
def getCookie(self, key):
for c in self.__cookie:
if c.name == key:
return c.value
return ''
def setCookie(self, key, val, domain):
ck = cookielib.Cookie(version=0, name=key, value=val, port=None, port_specified=False, domain=domain, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
self.__cookie.set_cookie(ck)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/kylinpoet/Python_spider.git
[email protected]:kylinpoet/Python_spider.git
kylinpoet
Python_spider
Python_spider
master

搜索帮助