Performance Improvement: Return based on request for other engines

MeNsaaH · MeNsaaH · commit 7bbaf54b8b1b · 2020-03-25T02:11:30.000+01:00
diff --git a/search_engine_parser/core/engines/aol.py b/search_engine_parser/core/engines/aol.py
@@ -1,7 +1,7 @@
 """@desc
 		Parser for AOL search results
 """
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 
 
 class AolSearch(BaseSearch):
@@ -22,7 +22,7 @@ def parse_soup(self, soup):
         # find all divs
         return soup.find_all('div', class_='algo-sr')
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
         
@@ -31,21 +31,19 @@ def parse_single_result(self, single_result):
         :return: parsed title, link and description of single result
         :rtype: dict
         """
+        rdict = {}
         h3_tag = single_result.find('h3')
         link_tag = h3_tag.find('a')
-        caption = single_result.find('div', class_='compText aAbs')
-        desc = caption.find('p', class_='lh-16')
-        # Get the text and link
-        title = link_tag.text
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            # Get the text and link
+            rdict["titles"] = link_tag.text
 
-        link = link_tag.get('href')
+        if return_type in (ReturnType.FULL, ReturnType.LINK):
+            rdict["links"] = link_tag.get("href")
+
+        if return_type in (ReturnType.FULL, return_type.DESCRIPTION):
+            caption = single_result.find('div', class_='compText aAbs')
+            desc = caption.find('p', class_='lh-16')
+            rdict["descriptions"] = desc.text
 
-        desc = desc.text
-        rdict = dict()
-        if title and link and desc:
-            rdict = {
-                "titles": title,
-                "links": link,
-                "descriptions": desc,
-            }
         return rdict
diff --git a/search_engine_parser/core/engines/ask.py b/search_engine_parser/core/engines/ask.py
@@ -1,7 +1,7 @@
 """@desc
 		Parser for ask search results
 """
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 
 
 class AskSearch(BaseSearch):
@@ -24,7 +24,7 @@ def parse_soup(self, soup):
         # find all class_='PartialSearchResults-item' => each result
         return soup.find_all('div', class_="PartialSearchResults-item")
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
 
@@ -34,14 +34,16 @@ def parse_single_result(self, single_result):
         :rtype: str, str, str
         """
 
-        title = single_result.find('a').text
-        link = single_result.a["href"]
-        desc = single_result.find(
-            'p', class_="PartialSearchResults-item-abstract").text
-        search_results = {
-            "titles": title,
-            "links": link,
-            "descriptions": desc,
-        }
-
-        return search_results
+        rdict = {}
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            rdict["titles"] = single_result.find('a').text
+
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            rdict["links"] = single_result.a["href"]
+
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            rdict["descriptions"] = single_result.find(
+                'p', class_="PartialSearchResults-item-abstract").text
+
+
+        return rdict
diff --git a/search_engine_parser/core/engines/baidu.py b/search_engine_parser/core/engines/baidu.py
@@ -3,7 +3,7 @@
 """
 
 import re
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 
 
 class BaiduSearch(BaseSearch):
@@ -39,27 +39,25 @@ def parse_soup(self, soup):
 
         return soup.find_all('div', {'id': re.compile(r"^\d{1,2}")})
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
 
-                :param single_result: single result found in div with a numeric id
-                :type single_result: `bs4.element.Tag`
-                :return: parsed title, link and description of single result
-                :rtype: dict
-                """
-        h3_tag = single_result.find('h3')
-        link_tag = single_result.find('a')
-
-        # Get the text and link
-        title = h3_tag.text
-        link = link_tag.get('href')
-        desc = single_result.find('div', class_='c-abstract').text
-        rdict = dict()
-        if title and link and desc:
-            rdict = {
-                "titles": title,
-                "links": link,
-                "descriptions": desc,
-            }
+        :param single_result: single result found in div with a numeric id
+        :type single_result: `bs4.element.Tag`
+        :return: parsed title, link and description of single result
+        :rtype: dict
+        """
+        rdict = {}
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            h3_tag = single_result.find('h3')
+            rdict["title"] = h3_tag.text
+
+        if return_type in (ReturnType.FULL, ReturnType.LINK):
+            link_tag = single_result.find('a')
+            # Get the text and link
+            rdict["links"] = link_tag.get('href')
+
+        if return_type in (ReturnType.FULL, return_type.DESCRIPTION):
+            rdict["descriptions"] = single_result.find('div', class_='c-abstract').text
         return rdict
diff --git a/search_engine_parser/core/engines/bing.py b/search_engine_parser/core/engines/bing.py
@@ -1,7 +1,7 @@
 """@desc
 		Parser for Bing search results
 """
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 
 
 class BingSearch(BaseSearch):
@@ -22,7 +22,7 @@ def parse_soup(self, soup):
         # find all li tags
         return soup.find_all('li', class_='b_algo')
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
 
@@ -31,19 +31,20 @@ def parse_single_result(self, single_result):
         :return: parsed title, link and description of single result
         :rtype: dict
         """
+        rdict = {}
         h2_tag = single_result.find('h2')
         link_tag = h2_tag.find('a')
-        caption = single_result.find('div', class_='b_caption')
-        desc = caption.find('p')
-        # Get the text and link
-        title = link_tag.text
-
-        link = link_tag.get('href')
-
-        desc = desc.text
-        rdict = {
-            "titles": title,
-            "links": link,
-            "descriptions": desc,
-        }
+
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            rdict["titles"] = link_tag.text
+
+        if return_type in (ReturnType.FULL, return_type.LINK):
+            link = link_tag.get('href')
+            rdict["links"] = link
+
+        if return_type in (ReturnType.FULL, return_type.DESCRIPTIONS):
+            caption = single_result.find('div', class_='b_caption')
+            desc = caption.find('p')
+            rdict["descriptions"] = desc.text
+
         return rdict
diff --git a/search_engine_parser/core/engines/duckduckgo.py b/search_engine_parser/core/engines/duckduckgo.py
@@ -2,7 +2,7 @@
 		Parser for DuckDuckGo search results
 """
 import re
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 
 
 class DuckDuckGoSearch(BaseSearch):
@@ -25,7 +25,7 @@ def parse_soup(self, soup):
         # find all div tags
         return soup.find_all('div', class_='result')
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
 
@@ -34,27 +34,28 @@ def parse_single_result(self, single_result):
         :return: parsed title, link and description of single result
         :rtype: dict
         """
-        h2 = single_result.find('h2', class_="result__title") #pylint: disable=invalid-name
-        link_tag = single_result.find('a', class_="result__url")
-        desc = single_result.find(class_='result__snippet')
 
-        # Get the text and link
-        title = h2.text.strip()
+        rdict = {}
 
-        # raw link is of format "/url?q=REAL-LINK&sa=..."
-        raw_link = self.base_url + link_tag.get('href')
-        
-        re_str = re.findall("uddg=(.+)", raw_link)[0]
-        re_str = re_str.replace("%3A", ":")
-        link = re_str.replace("%2F", "/")
-        link = link.replace("%2D", "-")
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            h2 = single_result.find('h2', class_="result__title") #pylint: disable=invalid-name
+            # Get the text and link
+            rdict["titles"] = h2.text.strip()
+
+        if return_type in (ReturnType.FULL, ReturnType.LINK):
+            link_tag = single_result.find('a', class_="result__url")
+            # raw link is of format "/url?q=REAL-LINK&sa=..."
+            raw_link = self.base_url + link_tag.get('href')
+            re_str = re.findall("uddg=(.+)", raw_link)[0]
+            re_str = re_str.replace("%3A", ":")
+            link = re_str.replace("%2F", "/")
+            link = link.replace("%2D", "-")
+            rdict["links"] = link
+
+        if return_type in (ReturnType.FULL, ReturnType.DESCRIPTION):
+            desc = single_result.find(class_='result__snippet')
+            rdict["descriptions"] = desc.text
 
-        desc = desc.text
-        rdict = {
-            "titles": title,
-            "links": link,
-            "descriptions": desc,
-        }
         return rdict
 
     def get_search_url(self, query=None, page=None, **kwargs):
@@ -64,7 +65,7 @@ def get_search_url(self, query=None, page=None, **kwargs):
         # Start value for the page
         start = 0 if (page < 2) else (((page-1) * 50) - 20)
 
-        type_ = self.keywords.get("type", None)
+        type_ = kwargs.get("type", None)
 
         return self.search_url.format(
             query=query,
diff --git a/search_engine_parser/core/engines/github.py b/search_engine_parser/core/engines/github.py
@@ -1,7 +1,7 @@
 """@desc
 		Parser for GitHub search results
 """
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 from search_engine_parser.core.exceptions import IncorrectKeyWord
 
 
@@ -19,7 +19,8 @@ class GitHubSearch(BaseSearch):
         "\n\tAs of May 2019, GitHub reports having over 37 million users and more than 100 million"\
         " repositories (including at least 28 million public repositories), making it the largest "\
         "host of source code in the world."
-    def parse_soup(self, soup):
+
+    def parse_soup(self, soup, **kwargs):
         """
         Parses GitHub for a search query.
         """
@@ -34,7 +35,7 @@ def parse_soup(self, soup):
             "Issues",
             "Commits",
             "Code")
-        self.type = self.keywords.get("type", None)
+        self.type = kwargs.get("type", None)
         if self.type not in allowed_types:
             raise IncorrectKeyWord("No type <{type_}> exists".format(type_=self.type))
         # find all li tags
@@ -52,7 +53,7 @@ def parse_soup(self, soup):
         elif self.type == "Commits":
             return soup.find_all('div', class_='commits-list-item')
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
 
diff --git a/search_engine_parser/core/engines/myanimelist.py b/search_engine_parser/core/engines/myanimelist.py
@@ -13,14 +13,14 @@ class MyAnimeListSearch(BaseSearch):
     name = "MyAnimeList"
 
     search_url = "https://myanimelist.net/anime.php?q={query}&show={offset}"
-    summary = "\tMyAnimeList, often abbreviated as MAL, is an anime and manga social"\ "networking and social cataloging application website."\
+    summary = "\tMyAnimeList, often abbreviated as MAL, is an anime and manga social"\
+        "networking and social cataloging application website."\
         "\n\tThe site provides its users with a list-like system to organize"\
         "and score anime and manga.\n\tIt facilitates finding users who share"\
         "similar tastes and provides a large database on anime and manga.\n\tThe"\
         "site claims to have 4.4 million anime and 775,000 manga entries."\
         "\n\tIn 2015, the site received over 120 million visitors a month."
 
-    """Override get_search_url"""
 
     def get_search_url(self, query=None, page=None):
         """
diff --git a/search_engine_parser/core/engines/stackoverflow.py b/search_engine_parser/core/engines/stackoverflow.py
@@ -1,7 +1,7 @@
 """@desc
 		Parser for AOL search results
 """
-from search_engine_parser.core.base import BaseSearch
+from search_engine_parser.core.base import BaseSearch, ReturnType
 
 
 class StackOverflowSearch(BaseSearch):
@@ -25,7 +25,7 @@ def parse_soup(self, soup):
         # find all divs
         return soup.find_all('div', class_='summary')
 
-    def parse_single_result(self, single_result):
+    def parse_single_result(self, single_result, return_type=ReturnType.FULL):
         """
         Parses the source code to return
         
@@ -34,19 +34,19 @@ def parse_single_result(self, single_result):
         :return: parsed title, link and description of single result
         :rtype: dict
         """
+        rdict = {}
         h3 = single_result.find('h3') #pylint: disable=invalid-name
         link_tag = h3.find('a')
-        caption = single_result.find('div', class_='excerpt')
-        # Get the text and link
-        title = link_tag.text
+        if return_type in (ReturnType.FULL, return_type.TITLE):
+            # Get the text and link
+            rdict["titles"] = link_tag.text
 
-        ref_link = link_tag.get('href')
-        link = self.base_url + ref_link
+        if return_type in (ReturnType.FULL, return_type.LINK):
+            ref_link = link_tag.get('href')
+            link = self.base_url + ref_link
+            rdict["links"] = link
 
-        desc = caption.text
-        rdict = {
-            "titles": title,
-            "links": link,
-            "descriptions": desc,
-        }
+        if return_type in (ReturnType.FULL, return_type.DESCRIPTIONS):
+            caption = single_result.find('div', class_='excerpt')
+            rdict["descriptions"] = caption.text
         return rdict
diff --git a/search_engine_parser/core/engines/yahoo.py b/search_engine_parser/core/engines/yahoo.py
diff --git a/search_engine_parser/core/engines/yandex.py b/search_engine_parser/core/engines/yandex.py
diff --git a/search_engine_parser/core/engines/youtube.py b/search_engine_parser/core/engines/youtube.py