1 Star 0 Fork 0

大学失业人员/leetcode爬虫

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
user_data_spider.py 2.70 KB
一键复制 编辑 原始数据 按行查看 历史
大学失业人员 提交于 2023-03-09 16:40 . Initial commit
# _*_ coding: utf-8 _*_
"""
Time: 2023/3/2 20:31
Author: Yi Zhan
Version: V 0.1
File: user_data_spider.py
"""
import time
import xlwt
from selenium import webdriver
def get_page_source(username):
path = 'chromedriver.exe'
browser = webdriver.Chrome(path)
url = 'https://leetcode.cn'+username
browser.get(url)
time.sleep(2)
return browser
from lxml import etree
def get_value(html, username, wb , ws, usernumber):
html = etree.HTML(html)
# tree = etree.parse(html)
list = html.xpath("//div[@data-title]")
ws.write(usernumber+1, 0, username)
for i in range(0, len(list)):
value = list[i].xpath('@data-title')
print(i,value)
ws.write(usernumber+1, i+1, value)
get_slovenumber(html,username,wb,ws,usernumber+1)
get_beatuser(html,username,wb,ws,usernumber+1)
wb.save('user_data.xls')
def create_excel():
wb = xlwt.Workbook()
ws = wb.add_sheet('userdata_sheet')
ws.write(0, 0, '用户名')
for i in range(1, 16):
ws.write(0, i, "题目" + str(i))
ws.write(0, 16,"easy")
ws.write(0, 17, "mid")
ws.write(0, 18, "diff")
ws.write(0, 19, "简单击败用户率")
ws.write(0, 20, "中等击败用户率")
ws.write(0, 21, "困难击败用户率")
wb.save('user_data.xls')
return wb,ws
import pandas as pd
import xlrd
def created_username():
row = pd.read_excel('username.xls',keep_default_na=False)
return row
def get_slovenumber(html, username, wb , ws, usernumber):
list1 = html.xpath("//span[@class='mr-[5px] text-base font-medium leading-[20px] text-label-1 dark:text-dark-label-1']")
ws.write(usernumber, 16,list1[0].text)
print(list1[0].text)
ws.write(usernumber, 17, list1[1].text)
print(list1[1].text)
ws.write(usernumber, 18, list1[2].text)
print(list1[2].text)
def get_beatuser(html, username, wb , ws, usernumber):
list2 = html.xpath("//span[@class='font-medium text-label-2 dark:text-dark-label-2']")
if(len(list2)>=3):
ws.write(usernumber, 19, list2[0].text)
ws.write(usernumber, 20, list2[1].text)
ws.write(usernumber, 21, list2[2].text)
import math
if __name__ == '__main__':
username = created_username()
wb = create_excel()
for i in range(0 , len(username)):
if username.loc[i, 'username'] == '':
pass
else:
print(username.loc[i, 'username'],type(username.loc[i, 'username']))
html = get_page_source(username.loc[i, 'username'])
js = "var q=document.documentElement.scrollTop=10000"
html.execute_script(js)
time.sleep(2)
get_value(html.page_source, username.loc[i, 'username'], wb[0], wb[1], i)
html.quit()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/Oliveira123/Users_data_spider.git
[email protected]:Oliveira123/Users_data_spider.git
Oliveira123
Users_data_spider
leetcode爬虫
master

搜索帮助