house_sale
/
安居客正则.py

import requests,csv
from bs4 import BeautifulSoup
from selenium import webdriver # 从selenium库中调用webdriver模块
import time # 调用time模块
from selenium.webdriver.chrome.options import Options # 从options模块中调用Options类
import re

headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
#f=open('安居客.csv','w',newline='',encoding='utf-8')
#writer=csv.writer(f)
#writer.writerow(['楼盘名称','区域位置','户型及建筑面积','售卖状态','住宅类型','楼盘优势'])
chrome_options = Options() # 实例化Option对象
chrome_options.add_argument('--headless') # 把Chrome浏览器设置为静默模式
driver = webdriver.Chrome(options = chrome_options) # 设置引擎为Chrome，在后台默默运行
for n in range(1,2):
    url='https://sh.fang.anjuke.com/loupan/all/p'+str(n)+'/'
    driver.get(url)
    time.sleep(2)
    pagesourse=driver.page_source
    try:
        #name=data.find('span',class_="items-name").text.strip()
        name1=re.findall('<span class="items-name">(.*?)</span>',pagesourse)
        location = re.findall('<span class="list-map" target="_blank">(.*?)</span>',pagesourse).replace('&nbsp','')
        print(location)
        #for i in location :
            #a1=re.findall('\[\\xa0(.*?)\\xa0(.*?)\\xa0\]\\xa0(.*?)\s',i)
            #print(a1)
        #location=data.find('span',class_="list-map").text.strip()
        #house_info=data.find('a',class_="huxing").text.strip()
        #sale_type=data.find('i',class_="status-icon onsale").text.strip()
        #house_type=data.find('i',class_="status-icon wuyetp").text.strip()
        #others=data.find('div',class_="tag-panel").find_all('span',class_="tag")
        #other=others[0].text.strip()+others[1].text.strip()+others[2].text.strip()+others[3].text.strip()
        #writer.writerow([name,location,house_info,sale_type,house_type,other])
        #print('''
        #楼盘名称：%s
        #区域位置：%s
        #户型及建筑面积:%s
        #售卖状态：%s、住宅类型：%s
        #楼盘优势：%s'''%(name,location,house_info,sale_type,house_type,other))


    except AttributeError:
        pass
driver.close()
#f.close()