Skip to content

Commit 7b4ee7d

Browse files
Add URL to listing schema and normalise price for 'per week' listings
1 parent 4e8d4d2 commit 7b4ee7d

File tree

5 files changed

+18
-6
lines changed

5 files changed

+18
-6
lines changed

daft_scraper/cli/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
@app.command()
1919
def search(
2020
search_type: SearchType,
21-
headers: List[str] = ['id', 'price', 'title', 'propertyType'],
21+
headers: List[str] = ['id', 'price', 'title', 'propertyType', 'url'],
2222
location: List[str] = [Location.ALL.value],
2323
max_pages: int = sys.maxsize,
2424
min_price: int = 0,

daft_scraper/listing.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import re
2-
from marshmallow import Schema, fields, INCLUDE
2+
from marshmallow import Schema, fields, INCLUDE, post_load
33
from marshmallow.utils import missing
44

55

@@ -52,6 +52,7 @@ class ListingPRS(Schema):
5252

5353

5454
class Listing(Schema):
55+
URL_BASE = "https://daft.ie"
5556
PRICE_RE = re.compile(r'[0-9,]+')
5657

5758
class Meta:
@@ -61,7 +62,10 @@ class Meta:
6162
def convert_price(self, value):
6263
matches = self.PRICE_RE.findall(value)
6364
if matches:
64-
return int(matches[0].replace(',', ''))
65+
price_int = int(matches[0].replace(',', ''))
66+
if "week" in value:
67+
price_int *= 4.34
68+
return price_int
6569
return missing
6670

6771
def convert_bed_and_bath(self, value):
@@ -70,6 +74,14 @@ def convert_bed_and_bath(self, value):
7074
return int(matches[0])
7175
return missing
7276

77+
def get_url(self, seo_friendly_path):
78+
return "".join([self.URL_BASE, seo_friendly_path])
79+
80+
@post_load
81+
def post_load(self, data, **kwargs):
82+
data['url'] = self.get_url(data['seoFriendlyPath'])
83+
return data
84+
7385
id = fields.Int()
7486
title = fields.Str()
7587

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "daft-scraper"
3-
version = "1.0.1"
3+
version = "1.0.2"
44
description = "A webscraper for Daft.ie"
55
authors = ["Evan Smith <[email protected]>"]
66
license = "MIT"

tests/fixtures/listing.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
],
1414
"featuredLevel": "FEATURED",
1515
"publishDate": 1607001238000,
16-
"price": "From \u20ac3,328 per month",
16+
"price": "From \u20ac3,328 per week",
1717
"abbreviatedPrice": "\u20ac3,330+",
1818
"numBedrooms": "3 bed",
1919
"propertyType": "Apartments",

tests/test_listing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_confirm_next_json_parseable(self):
2020
got = Listing().load(json.load(fixture)['listing'])
2121

2222
self.assertEqual(got.get('id'), 2315059)
23-
self.assertEqual(got.get('price'), 3328)
23+
self.assertEqual(got.get('price'), 14443.52)
2424
self.assertEqual(got.get('numBedrooms'), 3)
2525
self.assertEqual(got.get('numBathrooms'), None)
2626

0 commit comments

Comments
 (0)