代码拉取完成,页面将自动刷新
"""
对数据库做定时(死循环)的增删改查
"""
import json
import time
import pymysql
import requests
SiteTypeDict = {
"政府部委": 1,
"拟在建项目": 2,
"新闻媒体": 3,
"企业网站": 4,
}
IsImportDict = {
"特殊类型": 1,
"非常重要": 2,
"比较重要": 3,
"一般重要": 4,
"不太重要": 5,
"未设置": 6,
}
UpdateIntervalDict = {
"3小时": 3,
"6小时": 6,
"12小时": 12,
"24小时": 24,
}
def get_run_computer(mess: str):
RunComputerDict = {
"KYLS-PYTHON": 1,
"KYLS-ZYC": 2,
"KYLS-YR": 3,
"KYLS-LZC": 4,
"KYLS-LCH": 5,
"KYLS-YTT": 6,
"其他": "7",
}
if mess.find("(") == -1:
return RunComputerDict["其他"]
res = mess[:mess.find("(")].strip()
try:
return RunComputerDict[res]
except:
return RunComputerDict["其他"]
def get_run_dir(mess):
ss = mess.split("\n")
if len(ss) < 2:
return "-"
return ss[1]
class Keep:
def __init__(self):
self.source = []
self.db, self.cursor = None, None
# 从后台获取数据
def reload(self):
# url = "http://yuri:[email protected]:8000/api/spider_data_config/?offset=0&limit=10000&search=&ordering=-run_time&site_admin=&run_status=&script_status=&province=&site_type="
url = "http://192.168.2.107:8000/api/spider_data_config/?offset=0&limit=10000&search=&ordering=-run_time&site_admin=&run_status=&script_status=&province=&site_type="
response = requests.get(url)
self.source = json.loads(response.text)["results"]
# 链接数据库
def connect(self):
db_config = {
"host": "localhost",
"port": 3306,
"user": "root",
"password": "root",
"db": "gva"
}
self.db = pymysql.connect(**db_config)
self.cursor = self.db.cursor(pymysql.cursors.DictCursor)
# 初始化数据表
def create(self):
for item in self.source:
try:
if item["script_status"] != "成功":
continue
data = {
"siteId": item["site_id"],
"siteName": item["site_name"],
"sitePathUrl": item["site_path_url"],
"siteType": SiteTypeDict[item["site_type"]],
"isImport": IsImportDict["未设置"],
"twoDayNum": item["temp_count"],
"nowNum": item["temp_total"],
"invalidNum": item["valid_total"],
"sitePathName": item["site_path_name"],
"sinceLastUpdate": int(time.time() - item["run_time"]) / 3600,
"updateInterval": UpdateIntervalDict["24小时"],
"runComputer": get_run_computer(item["run_message"]),
"runDirectory": get_run_dir(item["run_message"]),
}
if len(data["sitePathUrl"]) > 100:
data["sitePathUrl"] = data["sitePathUrl"][:100]
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Referer': 'http://localhost:8080/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53',
'sec-ch-ua': '"Microsoft Edge";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'x-token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJVVUlEIjoiNjZkOTE3OGEtMzQyYS00M2JlLWFmYWMtZGExMGZmOGUwY2FjIiwiSUQiOjEsIlVzZXJuYW1lIjoiYWRtaW4iLCJOaWNrTmFtZSI6InNuYWtlIiwiQXV0aG9yaXR5SWQiOjk1MjgsIkJ1ZmZlclRpbWUiOjg2NDAwLCJleHAiOjE2NjU4MDI4NjUsImlzcyI6InFtUGx1cyIsIm5iZiI6MTY2NTE5NzA2NX0.oE8U8mqwN72B1jmN2E5GqBCgUGPZ38wlsyL50whEJos',
'x-user-id': '1',
}
response = requests.post('http://192.168.2.106:8080/api/SM/createSpiderMon', headers=headers,
data=json.dumps(data),
verify=False)
print(response.text)
except Exception as e:
print(e)
continue
def update(self):
for item in self.source:
try:
data = {
"siteId": item["site_id"],
"siteName": item["site_name"],
"sitePathUrl": item["site_path_url"],
"siteType": SiteTypeDict[item["site_type"]],
"isImport": IsImportDict["未设置"],
"twoDayNum": item["temp_count"],
"nowNum": item["temp_total"],
"invalidNum": item["valid_total"],
"sitePathName": item["site_path_name"],
"sinceLastUpdate": int((time.time() - item["run_time"])/3600),
"updateInterval": UpdateIntervalDict["24小时"],
"runComputer": get_run_computer(item["run_message"]),
"runDirectory": get_run_dir(item["run_message"]),
}
sql = "UPDATE spider_mon SET two_day_num={}, now_num={}, invalid_num={}, run_computer='{}', run_directory='{}', since_last_update={} WHERE site_id='{}'"\
.format(data['twoDayNum'], data['nowNum'], data['invalidNum'], data['runComputer'], data['runDirectory'], data['sinceLastUpdate'], data['siteId'])
try:
self.cursor.execute(sql)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
except:
continue
def delete(self):
for item in self.source:
try:
if item["script_status"] == "成功":
continue
# Prepare SQL query to UPDATE required records
sql = "DELETE from spider_mon WHERE site_id='{}'".format(item["site_id"])
try:
self.cursor.execute(sql)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
except:
continue
def insert(self):
for item in self.source:
try:
if item["script_status"] != "成功":
continue
data = {
"siteId": item["site_id"],
"siteName": item["site_name"],
"sitePathUrl": item["site_path_url"],
"siteType": SiteTypeDict[item["site_type"]],
"isImport": IsImportDict["未设置"],
"twoDayNum": item["temp_count"],
"nowNum": item["temp_total"],
"invalidNum": item["valid_total"],
"sitePathName": item["site_path_name"],
"sinceLastUpdate": int(time.time() - item["run_time"]) / 3600,
"updateInterval": UpdateIntervalDict["24小时"],
"runComputer": get_run_computer(item["run_message"]),
"runDirectory": get_run_dir(item["run_message"]),
}
# 查询是否存在
sql = "select 1 from `spider_mon` where `site_id`='{}' limit 1" \
.format(data["siteId"])
self.cursor.execute(sql)
res = self.cursor.fetchall()
# 如果存在就跳过
if len(res) >= 1:
continue
# 不存在就插入
if len(data["sitePathUrl"]) > 100:
data["sitePathUrl"] = data["sitePathUrl"][:100]
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Referer': 'http://localhost:8080/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53',
'sec-ch-ua': '"Microsoft Edge";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'x-token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJVVUlEIjoiNjZkOTE3OGEtMzQyYS00M2JlLWFmYWMtZGExMGZmOGUwY2FjIiwiSUQiOjEsIlVzZXJuYW1lIjoiYWRtaW4iLCJOaWNrTmFtZSI6InNuYWtlIiwiQXV0aG9yaXR5SWQiOjk1MjgsIkJ1ZmZlclRpbWUiOjg2NDAwLCJleHAiOjE2NjU4MDI4NjUsImlzcyI6InFtUGx1cyIsIm5iZiI6MTY2NTE5NzA2NX0.oE8U8mqwN72B1jmN2E5GqBCgUGPZ38wlsyL50whEJos',
'x-user-id': '1',
}
response = requests.post('http://192.168.2.106:8080/api/SM/createSpiderMon', headers=headers,
data=json.dumps(data),
verify=False)
print(response.text)
except Exception as e:
print(e)
continue
def clear(self):
self.cursor.close()
self.db.close()
self.source = []
if __name__ == '__main__':
keep = Keep()
while True:
start = time.time()
keep.connect()
keep.reload()
print("初始化链接耗时", time.time() - start)
keep.insert()
print("insert耗时", time.time() - start)
keep.delete()
print("delete耗时", time.time() - start)
keep.update()
print("update耗时", time.time() - start)
keep.clear()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。