3 Star 0 Fork 0

曾杨龙/实习

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
bus.py 3.15 KB
一键复制 编辑 原始数据 按行查看 历史
zyl 提交于 2023-09-08 14:52 . zyl-python-公交路线
# 通过城市名称构造第一类访问 url
# 访问第一类 url
# 解析该城市的公交分类,获取到想要的分类类型,构造第二类访问 url
# 访问第二类 url
# 解析分类类型的每一个公交线路,构造第三类访问 url
# 访问第三类 url
# 解析该公交路线的上、下两部分数据
# 循环第二类 -> 第三类的过程,直到爬完所有数据
import csv
from bs4 import BeautifulSoup
import requests
url = 'https://nanchang.8684.cn/'
response = requests.get(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.1.4031 SLBChan/30'}, timeout=10)
"""获取数据并解析"""
soup = BeautifulSoup(response.text, 'lxml')
soup_buslayer = soup.find('div', class_='bus-layer depth w120')
# 解析分类数据
dic_result = {}
soup_buslist = soup_buslayer.find_all('div', class_='pl10')
for soup_bus in soup_buslist:
name = soup_bus.find('span', class_='kt').get_text()
if '以数字开头' in name:
soup_a_list = soup_bus.find('div', class_='list')
for soup_a in soup_a_list.find_all('a'):
text = soup_a.get_text()
href = soup_a.get('href')
dic_result[text] = "https://nanchang.8684.cn" + href
print(dic_result)
bus_arr = []
index = 0
for key,value in dic_result.items():
print ('key: ',key,'value: ',value)
response = requests.get(url=value, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.1.4031 SLBChan/30'}, timeout=10)
"""获取数据并解析"""
soup = BeautifulSoup(response.content, 'lxml')
# 详细线路
soup_buslist = soup.find('div', class_='list clearfix')
for soup_a in soup_buslist.find_all('a'):
text = soup_a.get_text()
href = soup_a.get('href')
title = soup_a.get('title')
bus_arr.append([title, text, "https://nanchang.8684.cn" + href])
print(bus_arr)
bus_des = []
index = 0
for value in bus_arr:
bus_name = value[0]
# print ('路线名称: ',value[0],'value: ',value[2])
response = requests.get(url=value[2], headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.1.4031 SLBChan/30'}, timeout=10)
"""获取数据并解析"""
soup = BeautifulSoup(response.text, 'lxml')
# 详细线路
soup_buslist = soup.find('div', class_='info')
for soup_ul in soup_buslist.find_all('ul'):
date = soup_ul.select('li')[0].get_text()
money = soup_ul.select('li')[1].get_text()
company = soup_ul.select('li')[2].select('a')[0].get_text()
bus_des.append({"线路名称" : bus_name, "运行时间" : date,"票价" : money,"公司" : company})
print(bus_des)
#创建csv
fo = open("news.csv", "w", newline='', encoding='utf-8')
#表头
header = ["线路名称", "运行时间","票价","公司"]
writer = csv.DictWriter(fo,header)
#写入表头
writer.writeheader()
#将上一步的字典写入csv文件中
writer.writerows(bus_des)
fo.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Java
1
https://gitee.com/zyl512/practice.git
[email protected]:zyl512/practice.git
zyl512
practice
实习
master

搜索帮助