代码拉取完成,页面将自动刷新
import requests
import time
import pymysql
import random
from datetime import datetime
'''此为原始版本已废弃'''
class Bookslist():
def __init__(self):
self.db_host = 'localhost'
self.db_user = 'root'
self.db_password = '5480TLQ.'
self.db_name = 'books_plus'
self.user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36 Edg/83.0.478.37",
# Add more user agents here...
]
#nnd,前面访问速度太快了,直接把我IP给ban了,还是别用多线程了,老老实实的设置个一两秒访问一次吧,反正也就几千个页面。
self.proxies = {
'http': 'http://127.0.0.1:10809',
'https': 'http://127.0.0.1:10809'
}
self.db = pymysql.connect(host=self.db_host, user=self.db_user, password=self.db_password, database=self.db_name)
self.cursor = self.db.cursor()
def get_simplepage_lk(self, level, page_num):
try:
url = f'https://api.yousuu.com/api/bookStore/books?countWord={level}&sort=score&page={page_num}'
headers = {
"User-Agent": random.choice(self.user_agents)
}
bookinfo = requests.get(url=url, headers=headers, proxies=self.proxies).json()
except Exception as e:
print(e)
print('龙空小说获取有问题,直接退出,请检查程序')
return None
time.sleep(random.uniform(1, 2))
return bookinfo['data']['books']
def insert_book_info(self, book):
sql_insert_booklist = r'INSERT INTO booklist (title, author, countWord, scorebyLK, scorerCountLK, updateAt, status) VALUES (%s, %s, %s, %s, %s, %s, %s)'
sql_insert_book_tags = r'INSERT INTO book_tags (book_id, tag) VALUES (%s, %s)' # 修改插入语句,需要指定 book_id
try:
title = book['title']
author = book['author']
scorebyLK = book['score']
scorerCountLK = book['scorerCount']
tags = book['tags']
status = book['status']
countWord = book['countWord']
updateAt = datetime.strptime(book['updateAt'], "%Y-%m-%dT%H:%M:%S.000%z")
except Exception:
print(f'出问题的书籍名字叫做:{book.get("title")}')
return
try:
self.cursor.execute('SELECT * FROM booklist WHERE title=%s', (title,))
if not self.cursor.fetchone():
self.cursor.execute(sql_insert_booklist,
(title, author, countWord, scorebyLK, scorerCountLK, updateAt, status))
if tags: # 判断 tags 是否为空
book_id = self.cursor.lastrowid # 获取插入的书籍的自增主键值
self.cursor.executemany(sql_insert_book_tags, [(book_id, tag) for tag in tags])
else:
print(f'书库已经有了该书籍:{title}')
except Exception as e:
print(e)
def store(self):
for level in range(2, 6):
for page_num in range(1, 501):
booksinfo = self.get_simplepage_lk(level, page_num)
if not booksinfo:
break
for book in booksinfo:
self.insert_book_info(book)
self.db.commit()
def run(self):
start = time.time()
self.store()
self.cursor.close()
self.db.close()
end = time.time()
print("执行时间:%.2f" % (end - start))
b = Bookslist()
b.run()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。