爬取北京的二手房数据
作者:admin | 分类:企业名录 | 浏览:37 | 时间:2023-09-13 08:32:47网址示图
结果示图:
代码如下:
import requests
from bs4 import BeautifulSoup
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# 使用某一城市的链家二手房URL作为示例
url = 'https://bj.lianjia.com/ershoufang/' # 请替换为实际的URL
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
# 提取数据
houses = []
# 根据实际网页结构提取信息,这里的选择器是示例,请根据实际HTML结构进行调整
for item in soup.select('.info'):
title = item.select_one('.title a').text.strip()
price = item.select_one('.priceInfo .totalPrice span').text.strip()
per_meter = item.select_one('.priceInfo .unitPrice span').text.strip()
houses.append([title, price, per_meter])
# 保存到CSV
with open('lianjia_houses.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Title', 'Total Price', 'Price per meter'])
writer.writerows(houses)