Python爬虫实战：爬取贝壳网二手房成交数据，将数据存入Excel。

import requestsimport xlrd as xlrdfrom xlutils.copy import copyfrom lxml import etreedef getData():b = '枫丹丽城','锦泉源','金色阳光家园','奥林园','美域盛景','富士庄园','润泽园','骏腾名苑','泉水友好园','泉水人家幸福里','泉水家年华','龙畔金泉三期','龙畔金泉二期

马虎的程序猿

4801人浏览 · 2022-01-13 23:38:18

马虎的程序猿 · 2022-01-13 23:38:18 发布

import requests
import xlrd as xlrd
from xlutils.copy import copy
from lxml import etree
def getData():

    b = '枫丹丽城','锦泉源','金色阳光家园','奥林园','美域盛景','富士庄园','润泽园','骏腾名苑','泉水友好园','泉水人家幸福里','泉水家年华','龙畔金泉三期','龙畔金泉二期','龙畔金泉一期','龙畔金泉四期','龙畔金泉五期K1区','泉水N3区','泉水N1区','泉水N2区','龙畔金泉五期'
    for b_1 in b:
        for i in range(1,21):
            url = f'https://dl.ke.com/chengjiao/pg{i}rs{b_1}/'

            h = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.9 Safari/537.36',

                'Cookie': ''需要带上自己的cookie}


            res = requests.get(url,headers=h)


            xp = etree.HTML(res.text)

            list_l = xp.xpath('//ul[@class="listContent"]/li')


            datalist=[]
            for na in  list_l:
                title = na.xpath('.//div[@class="title"]/a/text()')[0]#.replace(' ','')
                houseInfo = na.xpath('.//div[@class="houseInfo"]/text()')[1].strip()#.replace(' ','')
                dealDate = na.xpath('.//div[@class="dealDate"]/text()')[0].strip()
                totalPrice = na.xpath('.//div[@class="totalPrice"]/span/text()')[0]+'万'
                positionInfo = na.xpath('.//div[@class="positionInfo"]/text()')[1].strip()#.replace(' ','')
                unitPrice = na.xpath('.//div[@class="unitPrice"]/span/text()')[0]+'元/平'
                dealCycleTxt = na.xpath('.//span[@class="dealCycleTxt"]/span/text()')[0]
                page = [title,houseInfo,dealDate,totalPrice,positionInfo,unitPrice,dealCycleTxt]
                datalist.append(page)

            index = len(datalist)
            workbook = xlrd.open_workbook('贝壳成交.xls')  # 打开工作簿

            sheets = workbook.sheet_names()  # 获取工作簿中的所有表格
            worksheet = workbook.sheet_by_name(sheets[0])  # 获取工作簿中所有表格中的的第一个表格
            rows_old = worksheet.nrows  # 获取表格中已存在的数据的行数
            new_workbook = copy(workbook)  # 将xlrd对象拷贝转化为xlwt对象
            new_worksheet = new_workbook.get_sheet(0)  # 获取转化后工作簿中的第一个表格
            for i in range(0, index):
                for j in range(0, len(datalist[i])):
                    new_worksheet.write(i + rows_old, j, datalist[i][j])  # 追加写入数据，注意是从i+rows_old行开始写入
            new_workbook.save('贝壳成交.xls')  # 保存工作簿
        print("xls格式表格写入数据成功！")



if __name__ == '__main__':
    getData()