22 Base class inherited by every search engine
33"""
44
5- from abc import ABCMeta , abstractmethod
6- import random
75import asyncio
6+ import random
7+ from abc import ABCMeta , abstractmethod
88from enum import Enum , unique
9- import aiohttp
9+ from urllib .parse import urlencode , urlparse
10+
1011from bs4 import BeautifulSoup
1112
13+ import aiohttp
1214from search_engine_parser .core .exceptions import NoResultsOrTrafficError
1315
1416
@@ -34,9 +36,11 @@ class BaseSearch:
3436 name = None
3537 # Search Engine unformatted URL
3638 search_url = None
39+ # The url after all query params have been set
40+ _parsed_url = None
3741
3842 @abstractmethod
39- def parse_soup (self , soup , ** kwargs ):
43+ def parse_soup (self , soup ):
4044 """
4145 Defines the results contained in a soup
4246 """
@@ -75,16 +79,9 @@ def parse_result(self, results, **kwargs):
7579 pass
7680 return search_results
7781
78- @staticmethod
79- def parse_query (query ):
80- """
81- Replace spaces in query
82-
83- :param query: query to be processed
84- :type query: str
85- :rtype: str
86- """
87- return query .replace (" " , "%20" ).replace (":" , "%3A" )
82+ def get_params (self , query = None , page = None , offset = None , ** kwargs ):
83+ """ This function should be overwritten to return a dictionary of query params"""
84+ return {'q' : query , 'page' : page }
8885
8986 @staticmethod
9087 async def get_source (url ):
@@ -134,20 +131,19 @@ async def get_soup(self, url):
134131 def get_search_url (self , query = None , page = None , ** kwargs ):
135132 """
136133 Return a formatted search url
137- """
138- # Some URLs use offsets
139- offset = (page * 10 ) - 9
140-
141- return self .search_url .format (
142- query = query ,
143- page = page ,
144- offset = offset ,
145- )
134+ """
135+ if not self ._parsed_url :
136+ # Some URLs use offsets
137+ offset = (page * 10 ) - 9
138+ params = self .get_params (query = query , page = page , offset = offset , ** kwargs )
139+ url = self .search_url + urlencode (params )
140+ self ._parsed_url = urlparse (url )
141+ return self ._parsed_url .geturl ()
146142
147143 def get_results (self , soup , ** kwargs ):
148144 """ Get results from soup"""
149145
150- results = self .parse_soup (soup , ** kwargs )
146+ results = self .parse_soup (soup )
151147 # TODO Check if empty results is caused by traffic or answers to query
152148 # were not found
153149 if not results :
@@ -167,7 +163,6 @@ def search(self, query=None, page=None, **kwargs):
167163 :type page: int
168164 :return: dictionary. Containing titles, links, netlocs and descriptions.
169165 """
170- parsed_query = self .parse_query (query )
171166 # Get search Page Results
172167 loop = asyncio .get_event_loop ()
173168 soup = loop .run_until_complete (
@@ -191,6 +186,5 @@ async def async_search(self, query=None, page=None, callback=None, **kwargs):
191186 # TODO callback should be called
192187 if callback :
193188 pass
194- parsed_query = self .parse_query (query )
195- soup = await self .get_soup (self .get_search_url (parsed_query , page ))
189+ soup = await self .get_soup (self .get_search_url (query , page , ** kwargs ))
196190 return self .get_results (soup , ** kwargs )
0 commit comments