1 Star 0 Fork 0

sip/spider

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.py 1.96 KB
一键复制 编辑 原始数据 按行查看 历史
sip 提交于 2019-09-21 15:15 . spider commit
import threading
import queue
from collections import deque
from sqlhand import DbHandle
from toolhand import *
class MultiGetUrl(threading.Thread):
def __init__(self, task_queue, thread_name, logger, lock):
threading.Thread.__init__(self, name=thread_name)
self.queue = task_queue
self.lock = lock
self.logger = logger
self.tmp = []
def run(self):
while True:
info = self.queue.get()
if self.lock.locked():
self.tmp.append(info)
pass
else:
self.lock.acquire()
db = DbHandle(self.logger)
dbfile = "todayb.db"
db.db_connect(dbfile)
db.init_database()
print(info, self.getName())
for tt in self.tmp:
'''allstar(tt, db)'''
self.tmp = []
self.lock.release()
self.queue.task_done()
def main():
start = dict()
start['url'] = "https://www.nsfw.xxx"
logop = dict()
logop['logFile'] = "log.txt"
logop['logLevel'] = "INFO"
global global_logger
global_logger = getlog(logop)
global global_db
global_db = DbHandle(global_logger)
dbfile = "todayb.db"
global_db.db_connect(dbfile)
global_db.init_database()
'''global_db.select_urls()'''
global_db.insert_one(start, 'urls')
global_queue = queue()
thread_num = 3
lock = threading.Lock()
for i in range(thread_num):
print("thread:", i)
t1 = MultiGetUrl(queue, 'urlt_' + str(i), global_logger, lock)
t1.setDaemon(True)
t1.start()
allurl = global_db.select_urls()
while True:
print("in main while")
while len(allurl) > 0:
t = allurl.pop()
print(t)
global_queue.put(t)
allurl = global_db.select_urls()
global_queue.join()
if __name__ == "__main__":
print("hello world\n")
main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/si_peng/spider.git
[email protected]:si_peng/spider.git
si_peng
spider
spider
master

搜索帮助