Skip to content

Commit 0339541

Browse files
committed
Docs: Updated Usage instructions and added module docs
1 parent ef1e8d5 commit 0339541

File tree

4 files changed

+106
-55
lines changed

4 files changed

+106
-55
lines changed

README.md

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -83,33 +83,40 @@ Found on [Read the Docs](https://search-engine-parser.readthedocs.io/en/latest)
8383
Query Results can be scraped from popular search engines as shown in the example snippet below
8484

8585
```python
86-
from search_engine_parser.engines.yahoo import Search as YahooSearch
87-
from search_engine_parser.engines.google import Search as GoogleSearch
88-
from search_engine_parser.engines.bing import Search as BingSearch
89-
import pprint
90-
91-
search_args = ('preaching to the choir', 1)
92-
gsearch = GoogleSearch()
93-
ysearch = YahooSearch()
94-
bsearch = BingSearch()
95-
gresults = gsearch.search(*search_args)
96-
yresults = ysearch.search(*search_args)
97-
bresults = bsearch.search(*search_args)
98-
a = {
99-
"Google": gresults,
100-
"Yahoo": yresults,
101-
"Bing": bresults}
102-
# pretty print the result from each engine
103-
for k, v in a.items():
104-
print(f"-------------{k}------------")
105-
pprint.pprint(v)
106-
107-
# print first title from google search
108-
print(gresults["titles"][0])
109-
# print 10th link from yahoo search
110-
print(yresults["links"][9])
111-
# print 6th description from bing search
112-
print(bresults["descriptions"][5])
86+
import pprint
87+
88+
from search_engine_parser.core.engines.bing import Search as BingSearch
89+
from search_engine_parser.core.engines.google import Search as GoogleSearch
90+
from search_engine_parser.core.engines.yahoo import Search as YahooSearch
91+
92+
search_args = ('preaching to the choir', 1)
93+
gsearch = GoogleSearch()
94+
ysearch = YahooSearch()
95+
bsearch = BingSearch()
96+
gresults = gsearch.search(*search_args)
97+
yresults = ysearch.search(*search_args)
98+
bresults = bsearch.search(*search_args)
99+
a = {
100+
"Google": gresults,
101+
"Yahoo": yresults,
102+
"Bing": bresults
103+
}
104+
105+
# pretty print the result from each engine
106+
for k, v in a.items():
107+
print(f"-------------{k}------------")
108+
for result in v:
109+
pprint.pprint(result)
110+
111+
# print first title from google search
112+
print(gresults["titles"][0])
113+
# print 10th link from yahoo search
114+
print(yresults["links"][9])
115+
# print 6th description from bing search
116+
print(bresults["descriptions"][5])
117+
118+
# print first result containing links, descriptions and title
119+
print(gresults[0])
113120
```
114121

115122
For localization, you can pass the `url` keyword and a localized url. This would use the url to query and parse using the same engine's parser
@@ -118,6 +125,26 @@ For localization, you can pass the `url` keyword and a localized url. This would
118125
results = gsearch.search(*search_args, url="google.de")
119126
```
120127

128+
#### Async
129+
search-engine-parser supports `async` hence you could use codes like
130+
```python
131+
results = await gsearch.async_search(*search_args)
132+
```
133+
134+
#### Results
135+
The `SearchResults` after the searching
136+
```python
137+
>>> results = gsearch.search("preaching the choir", 1)
138+
>>> results
139+
<search_engine_parser.core.base.SearchResult object at 0x7f907426a280>
140+
# The object supports retreiving individual results by iteration of just by type (links, descriptions, titles)
141+
>>> results[0] # Returns the first <SearchItem>
142+
>>> results[0]["description"] # Get the description of the first item
143+
>>> results[0]["link"] # get the link of the first item
144+
>>> results["descriptions"] # Returns a list of all descriptions from all results
145+
```
146+
It can be iterated like a normal list to return individual SearchItem
147+
121148
### Command line
122149

123150
Search engine parser comes with a CLI tool known as `pysearch` e.g

docs/engines.md

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,26 @@ The engine modules are in the [search_engine_parser/core/engines/](https://githu
3838
# name of the engine to be displayed on the CLI, preferably PascalCase
3939
name = "FakeEngine"
4040
# engine url to be search, with parameters to be formatted e.g query , page
41-
search_url = "https://search.fake.com/fake/search?q={query}&page={page}"
41+
search_url = "https://search.fake.com/fake/search"
4242
# a short 2 or 3 line summary of the engine with some statistics, preferably obtained from wikipedia
4343
summary = "\t According to netmarketshare, this site is balderdash among "\
4444
"search engines with a market share that is close to 100%. "\
4545
"The fake engine includes many popular features but was solely created to show you an example ."
4646

4747

48+
# this function should return the dict of params to be passed to the search_url
49+
def get_params(self, query=None, page=None, offset=None, **kwargs):
50+
params = {}
51+
params["q"] =query
52+
params["page"] = page
53+
return params
54+
4855
# This function should use beautiful soup (combined with regex if necessary)
4956
# to return all the divs containiing results
5057
def parse_soup(self, soup):
5158
return soup.find_all('div', class_='fake-result-div')
5259

53-
# This function should parse each div to return title, link, and description
60+
# This function should parse each result soup to return title, link, and description
5461
# NOTE: The implementation may not be as straightforward as shown below
5562
def parse_single_result(self, single_result):
5663
title_div = single_result.find('div', class_='fake-title')
@@ -69,32 +76,13 @@ The engine modules are in the [search_engine_parser/core/engines/](https://githu
6976

7077
* Import the engine by adding to the following files
7178

72-
[search_engine_parser/core/engines/__init__.py](https://github.com/bisoncorps/search-engine-parser/blob/master/search_engine_parser/core/engines/__init__.py)
79+
[search_engine_parser/__init__.py](https://github.com/bisoncorps/search-engine-parser/blob/master/search_engine_parser/__init__.py)
7380

7481
```python
7582
...
76-
from .fake import FakeEngineSearch
77-
```
78-
79-
[search_engine_parser/core/__init__.py](https://github.com/bisoncorps/search-engine-parser/blob/master/search_engine_parser/core/__init__.py)
80-
81-
```python
82-
from search_engine_parser.core.engines import (
83-
...
84-
FakeEngineSearch
85-
)
83+
from search_engine_parser.core.engines.fake import Search as FakeEngineSearch
8684
```
8785

88-
* Write Tests for the Engine to the [search_engine_parser/test/](https://github.com/bisoncorps/search-engine-parser/blob/master/search_engine_parser/test) directory
89-
90-
* Include into the CLI at [search_engine_parser/core/cli.py](https://github.com/bisoncorps/search-engine-parser/blob/master/search_engine_parser/core/cli.py)
91-
92-
```python
93-
def main(args):
94-
...
95-
elif engine == 'fake':
96-
engine_class = FakeEngineSearch
97-
```
9886

9987
* Make sure to write code documentation by following the [documentation guide](https://github.com/bisoncorps/search-engine-parser/blob/master/docs/documentation.md#documenting-an-engine)
10088

search_engine_parser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
2222
"""
2323

24+
# Allow import using `search_engine_parser.engines`
25+
from search_engine_parser.core import engines
2426
# Support for older versions of imports
2527
# DEPRECATION_WARNING: These imports will be removed in later versions
2628
from search_engine_parser.core.engines.aol import Search as AolSearch

search_engine_parser/core/base.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,41 @@ class ReturnType(Enum):
2626

2727
# All results returned are each items of search
2828
class SearchItem(dict):
29-
pass
29+
"""
30+
SearchItem is a dict of results containing keys (titles, descriptions, links and other
31+
additional keys dependending on the engine)
32+
>>> result
33+
<search_engine_parser.core.base.SearchItem object at 0x7f907426a280>
34+
>>> result["description"]
35+
Some description
36+
>>> result["descriptions"]
37+
Same description
38+
"""
39+
def __getitem__(self, value):
40+
""" Allow getting by index and by type ('descriptions', 'links'...)"""
41+
try:
42+
return super().__getitem__(value)
43+
except KeyError:
44+
pass
45+
if not value.endswith('s'):
46+
value += 's'
47+
return super().__getitem__(value)
3048

3149

3250
class SearchResult():
51+
"""
52+
The SearchResults after the searching
53+
54+
>>> results = gsearch.search("preaching the choir", 1)
55+
>>> results
56+
<search_engine_parser.core.base.SearchResult object at 0x7f907426a280>
57+
58+
The object supports retreiving individual results by iteration of just by type
59+
>>> results[0] # Returns the first result <SearchItem>
60+
>>> results["descriptions"] # Returns a list of all descriptions from all results
61+
62+
It can be iterated like a normal list to return individual SearchItem
63+
"""
3364
# Hold the results
3465
results = []
3566
# This method is inefficient, it will be in Deprecation soon
@@ -54,8 +85,11 @@ def keys(self):
5485
keys = x.keys()
5586
return keys
5687

57-
def __len__(self):
58-
return len(self.results)
88+
def __len__(self):
89+
return len(self.results)
90+
91+
def __repr_(self):
92+
return "<SearchResult: {} results>".format(len(self.results))
5993

6094

6195
class BaseSearch:
@@ -188,7 +222,7 @@ def get_results(self, soup, **kwargs):
188222

189223
return search_results
190224

191-
def search(self, query=None, page=None, **kwargs):
225+
def search(self, query=None, page=1, **kwargs):
192226
"""
193227
Query the search engine
194228
@@ -206,7 +240,7 @@ def search(self, query=None, page=None, **kwargs):
206240
query, page, **kwargs)))
207241
return self.get_results(soup, **kwargs)
208242

209-
async def async_search(self, query=None, page=None, callback=None, **kwargs):
243+
async def async_search(self, query=None, page=1, callback=None, **kwargs):
210244
"""
211245
Query the search engine but in async mode
212246

0 commit comments

Comments
 (0)