代码拉取完成,页面将自动刷新
同步操作将从 SmileSB101/ProxySpider 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
__author__ = 'changchang.cc'
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import urllib2
import httplib
import threading
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
inFile = open('proxy.txt')
outFile = open('verified.txt', 'w')
lock = threading.Lock()
def getProxyList(targeturl="http://www.xicidaili.com/nn/"):
countNum = 0
proxyFile = open('proxy.txt' , 'a')
requestHeader = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36"}
for page in range(1, 10):
url = targeturl + str(page)
#print url
request = urllib2.Request(url, headers=requestHeader)
html_doc = urllib2.urlopen(request).read()
soup = BeautifulSoup(html_doc, "html.parser")
#print soup
trs = soup.find('table', id='ip_list').find_all('tr')
for tr in trs[1:]:
tds = tr.find_all('td')
#国家
if tds[1].find('img') is None :
nation = '未知'
locate = '未知'
else:
nation = tds[1].find('img')['alt'].strip()
locate = tds[4].text.strip()
ip = tds[2].text.strip()
port = tds[3].text.strip()
anony = tds[5].text.strip()
protocol= tds[6].text.strip()
speed = tds[7].find('div')['title'].strip()
time = tds[9].text.strip()
proxyFile.write('%s|%s|%s|%s|%s|%s|%s|%s\n' % (nation, ip, port, locate, anony, protocol,speed, time) )
#print '%s=%s:%s' % (protocol, ip, port)
countNum += 1
proxyFile.close()
return countNum
def verifyProxyList():
'''
验证代理的有效性
'''
requestHeader = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36"}
myurl = 'http://www.baidu.com/'
while True:
lock.acquire()
ll = inFile.readline().strip()
lock.release()
if len(ll) == 0: break
line = ll.strip().split('|')
protocol= line[5]
ip = line[1]
port = line[2]
try:
conn = httplib.HTTPConnection(ip, port, timeout=5.0)
conn.request(method = 'GET', url = myurl, headers = requestHeader )
res = conn.getresponse()
lock.acquire()
print "+++Success:" + ip + ":" + port
outFile.write(ll + "\n")
lock.release()
except:
print "---Failure:" + ip + ":" + port
if __name__ == '__main__':
tmp = open('proxy.txt' , 'w')
tmp.write("")
tmp.close()
proxynum = getProxyList("http://www.xicidaili.com/nn/")
print u"国内高匿:" + str(proxynum)
proxynum = getProxyList("http://www.xicidaili.com/nt/")
print u"国内透明:" + str(proxynum)
proxynum = getProxyList("http://www.xicidaili.com/wn/")
print u"国外高匿:" + str(proxynum)
proxynum = getProxyList("http://www.xicidaili.com/wt/")
print u"国外透明:" + str(proxynum)
print u"\n验证代理的有效性:"
all_thread = []
for i in range(30):
t = threading.Thread(target=verifyProxyList)
all_thread.append(t)
t.start()
for t in all_thread:
t.join()
inFile.close()
outFile.close()
print "All Done."
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。