代码拉取完成,页面将自动刷新
# _*_ coding: utf-8 _*_
"""
Time: 2023/3/2 20:31
Author: Yi Zhan
Version: V 0.1
File: user_data_spider.py
"""
import time
import xlwt
from selenium import webdriver
def get_page_source(username):
path = 'chromedriver.exe'
browser = webdriver.Chrome(path)
url = 'https://leetcode.cn'+username
browser.get(url)
time.sleep(2)
return browser
from lxml import etree
def get_value(html, username, wb , ws, usernumber):
html = etree.HTML(html)
# tree = etree.parse(html)
list = html.xpath("//div[@data-title]")
ws.write(usernumber+1, 0, username)
for i in range(0, len(list)):
value = list[i].xpath('@data-title')
print(i,value)
ws.write(usernumber+1, i+1, value)
get_slovenumber(html,username,wb,ws,usernumber+1)
get_beatuser(html,username,wb,ws,usernumber+1)
wb.save('user_data.xls')
def create_excel():
wb = xlwt.Workbook()
ws = wb.add_sheet('userdata_sheet')
ws.write(0, 0, '用户名')
for i in range(1, 16):
ws.write(0, i, "题目" + str(i))
ws.write(0, 16,"easy")
ws.write(0, 17, "mid")
ws.write(0, 18, "diff")
ws.write(0, 19, "简单击败用户率")
ws.write(0, 20, "中等击败用户率")
ws.write(0, 21, "困难击败用户率")
wb.save('user_data.xls')
return wb,ws
import pandas as pd
import xlrd
def created_username():
row = pd.read_excel('username.xls',keep_default_na=False)
return row
def get_slovenumber(html, username, wb , ws, usernumber):
list1 = html.xpath("//span[@class='mr-[5px] text-base font-medium leading-[20px] text-label-1 dark:text-dark-label-1']")
ws.write(usernumber, 16,list1[0].text)
print(list1[0].text)
ws.write(usernumber, 17, list1[1].text)
print(list1[1].text)
ws.write(usernumber, 18, list1[2].text)
print(list1[2].text)
def get_beatuser(html, username, wb , ws, usernumber):
list2 = html.xpath("//span[@class='font-medium text-label-2 dark:text-dark-label-2']")
if(len(list2)>=3):
ws.write(usernumber, 19, list2[0].text)
ws.write(usernumber, 20, list2[1].text)
ws.write(usernumber, 21, list2[2].text)
import math
if __name__ == '__main__':
username = created_username()
wb = create_excel()
for i in range(0 , len(username)):
if username.loc[i, 'username'] == '':
pass
else:
print(username.loc[i, 'username'],type(username.loc[i, 'username']))
html = get_page_source(username.loc[i, 'username'])
js = "var q=document.documentElement.scrollTop=10000"
html.execute_script(js)
time.sleep(2)
get_value(html.page_source, username.loc[i, 'username'], wb[0], wb[1], i)
html.quit()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。