Skip to content

Commit 02e09e8

Browse files
DAFT-10: Change listing to object and get ad page info (county, description, etc.) (TheJokersThief#10)
1 parent d4d3d9e commit 02e09e8

File tree

6 files changed

+660
-10
lines changed

6 files changed

+660
-10
lines changed

daft_scraper/listing.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import json
12
import re
23
from marshmallow import Schema, fields, INCLUDE, post_load
34
from marshmallow.utils import missing
45

6+
from daft_scraper import Daft
7+
58

69
class Seller(Schema):
710
class Meta:
@@ -64,14 +67,14 @@ class Meta:
6467
unknown = INCLUDE
6568

6669
totalUnitTypes = fields.Int()
67-
subUnits = fields.List(fields.Nested(lambda: Listing()))
70+
subUnits = fields.List(fields.Nested(lambda: ListingSchema()))
6871
tagLine = fields.Str()
6972
location = fields.Str()
7073
aboutDevelopment = fields.Str()
7174
brochure = fields.Str()
7275

7376

74-
class Listing(Schema):
77+
class ListingSchema(Schema):
7578
URL_BASE = "https://daft.ie"
7679
PRICE_RE = re.compile(r'[0-9,]+')
7780

@@ -127,3 +130,34 @@ def post_load(self, data, **kwargs):
127130
image = fields.Dict(keys=fields.Str(), values=fields.Str())
128131
ber = fields.Nested(ListingBER, default=ListingBER())
129132
prs = fields.Nested(ListingPRS, default=ListingPRS())
133+
134+
135+
class Listing(dict):
136+
_ad_page_info = None
137+
138+
def __init__(self, data: dict):
139+
self.__dict__ = data
140+
141+
@property
142+
def ad_page_info(self):
143+
if not self._ad_page_info:
144+
parsed_page = Daft().get(self.url)
145+
script_text = parsed_page.find('script', {'id': '__NEXT_DATA__'})
146+
self._ad_page_info = json.loads(script_text.string)
147+
return self._ad_page_info
148+
149+
@property
150+
def description(self) -> str:
151+
return self.ad_page_info['props']['pageProps']['listing'].get('description', None)
152+
153+
@property
154+
def county(self) -> list:
155+
return self.ad_page_info['props']['pageProps']['dfpTargetingValues'].get('countyName', [])
156+
157+
@property
158+
def area(self) -> list:
159+
return self.ad_page_info['props']['pageProps']['dfpTargetingValues'].get('areaName', [])
160+
161+
@property
162+
def views(self) -> int:
163+
return self.ad_page_info['props']['pageProps'].get('listingViews', None)

daft_scraper/search/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import List
66

77
from daft_scraper import Daft
8-
from daft_scraper.listing import Listing
8+
from daft_scraper.listing import Listing, ListingSchema
99
from daft_scraper.search.options import Option, PriceOption, SalePriceOption
1010

1111

@@ -39,7 +39,6 @@ def search(self, query: List[Option], max_pages: int = sys.maxsize):
3939
path = path.replace('ireland', locations[0])
4040
del options['location']
4141

42-
print(path, options)
4342
# Init pagination params
4443
options['pageSize'] = self.PAGE_SIZE
4544
options['from'] = 0
@@ -87,7 +86,7 @@ def _get_page_data(self, path, params):
8786
def _get_listings(self, listings: dict):
8887
"""Convert a dict of listings into marshalled objects"""
8988
return [
90-
Listing().load(listing['listing'])
89+
Listing(ListingSchema().load(listing['listing']))
9190
for listing in listings
9291
]
9392

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "daft-scraper"
3-
version = "1.1.0"
3+
version = "1.2.0"
44
description = "A webscraper for Daft.ie"
55
authors = ["Evan Smith <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)