22 Parser for DuckDuckGo search results
33"""
44import re
5- from search_engine_parser .core .base import BaseSearch
5+ from search_engine_parser .core .base import BaseSearch , ReturnType
66
77
88class DuckDuckGoSearch (BaseSearch ):
@@ -25,7 +25,7 @@ def parse_soup(self, soup):
2525 # find all div tags
2626 return soup .find_all ('div' , class_ = 'result' )
2727
28- def parse_single_result (self , single_result ):
28+ def parse_single_result (self , single_result , return_type = ReturnType . FULL ):
2929 """
3030 Parses the source code to return
3131
@@ -34,27 +34,28 @@ def parse_single_result(self, single_result):
3434 :return: parsed title, link and description of single result
3535 :rtype: dict
3636 """
37- h2 = single_result .find ('h2' , class_ = "result__title" ) #pylint: disable=invalid-name
38- link_tag = single_result .find ('a' , class_ = "result__url" )
39- desc = single_result .find (class_ = 'result__snippet' )
4037
41- # Get the text and link
42- title = h2 .text .strip ()
38+ rdict = {}
4339
44- # raw link is of format "/url?q=REAL-LINK&sa=..."
45- raw_link = self .base_url + link_tag .get ('href' )
46-
47- re_str = re .findall ("uddg=(.+)" , raw_link )[0 ]
48- re_str = re_str .replace ("%3A" , ":" )
49- link = re_str .replace ("%2F" , "/" )
50- link = link .replace ("%2D" , "-" )
40+ if return_type in (ReturnType .FULL , return_type .TITLE ):
41+ h2 = single_result .find ('h2' , class_ = "result__title" ) #pylint: disable=invalid-name
42+ # Get the text and link
43+ rdict ["titles" ] = h2 .text .strip ()
44+
45+ if return_type in (ReturnType .FULL , ReturnType .LINK ):
46+ link_tag = single_result .find ('a' , class_ = "result__url" )
47+ # raw link is of format "/url?q=REAL-LINK&sa=..."
48+ raw_link = self .base_url + link_tag .get ('href' )
49+ re_str = re .findall ("uddg=(.+)" , raw_link )[0 ]
50+ re_str = re_str .replace ("%3A" , ":" )
51+ link = re_str .replace ("%2F" , "/" )
52+ link = link .replace ("%2D" , "-" )
53+ rdict ["links" ] = link
54+
55+ if return_type in (ReturnType .FULL , ReturnType .DESCRIPTION ):
56+ desc = single_result .find (class_ = 'result__snippet' )
57+ rdict ["descriptions" ] = desc .text
5158
52- desc = desc .text
53- rdict = {
54- "titles" : title ,
55- "links" : link ,
56- "descriptions" : desc ,
57- }
5859 return rdict
5960
6061 def get_search_url (self , query = None , page = None , ** kwargs ):
@@ -64,7 +65,7 @@ def get_search_url(self, query=None, page=None, **kwargs):
6465 # Start value for the page
6566 start = 0 if (page < 2 ) else (((page - 1 ) * 50 ) - 20 )
6667
67- type_ = self . keywords .get ("type" , None )
68+ type_ = kwargs .get ("type" , None )
6869
6970 return self .search_url .format (
7071 query = query ,
0 commit comments