我正在从JSON响应中抓取数据。我需要帮助刮取位置。有多个位置,我想知道是否有一种方法可以刮除所有位置。使用scrapy,我们可以执行.getall()来获取所有值,但是我不确定JSON如何工作。另外,如果我们可以将每个位置存储到具有loc_1,loc_2,loc_3等标头的新单元格,那将是很好的。
# -*- coding: utf-8 -*-
import scrapy
import json
links_list = open('test.txt').read().split('\n')
class MainSpider(scrapy.Spider):
name = 'main'
start_urls = links_list
def parse(self, response):
resp = json.loads(response.body)
lists = resp['results']
for each in lists:
try:
links = each['url']
if links:
yield response.follow(url=f'https://www.cbr.nl{links}', callback=self.parse_lists)
except:
pass
def parse_lists(self, response):
# scraping school's info
schools = json.loads(response.body)
name = schools['name']
type_ = schools['vehicleCategory']
lat = schools['lat']
lng = schools['lon']
# school's exams info
exams_taken = schools['examInformationAllLocations']['allAttempts']
pass_percentage = schools['examInformationAllLocations']['successfulAllAttemptsPercentage']
# scraping school's contact info
street_name = schools['contactInformation']['streetName']
house_number = schools['contactInformation']['houseNumber']
house_num_ext = schools['contactInformation']['houseNumberExtension']
zip_code = schools['contactInformation']['zipCode']
city = schools['contactInformation']['city']
website = schools['contactInformation']['website']
email = schools['contactInformation']['email']
phone_1 = schools['contactInformation']['phone1']
phone_2 = schools['contactInformation']['phone2']
kvk = schools['contactInformation']['kvk']
driving_school_num = schools['contactInformation']['drivingSchoolNumber']
trade_associations = schools['contactInformation']['tradeAssociations']
# scraping school's exams info
yield {
"Name": name if name else "N/A",
"Vehicle Type": type_ if type_ else "N/A",
"Latitude": lat if lat else "N/A",
"Longitude": lng if lng else "N/A",
"Street Name": street_name if street_name else "N/A",
"House Number": house_number if house_number else "N/A",
"Hourse Number Extension": house_num_ext if house_num_ext else "N/A",
"Zip Code": zip_code if zip_code else "N/A",
"City": city if city else "N/A",
"Website": website if website else "N/A",
"Email": email if email else "N/A",
"Phone 1": phone_1 if phone_1 else "N/A",
"Phone 2": phone_2 if phone_2 else "N/A",
"KVK": kvk if kvk else "N/A",
"Driving School Number": driving_school_num if driving_school_num else "N/A",
"Trade Associations": trade_associations if trade_associations else "N/A",
"Exams Taken": exams_taken if exams_taken else "N/A",
"Pass Percentage": pass_percentage if pass_percentage else "N/A",
}
声明一个空列表。然后遍历位置
all_locations = []
locations = schools['examInformation']
for location in locations:
exams_location = location['cbrLocation']
all_locations.append(exams_location)
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句