diff --git a/README.md b/README.md index 3f7bfd1..c0c14c4 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,18 @@ # MediaWiki API -This MIT Licensed library provides a very simple convenience wrapper -around the [MediaWiki API](http://www.mediawiki.org/wiki/API). and -includes support for authenticated sessions. It requires Python 3 +This MIT-licensed library provides a very simple convenience wrapper +around the [MediaWiki API](https://www.mediawiki.org/wiki/API), +including support for authenticated sessions. It requires Python 3 and that your wiki is using MediaWiki 1.15.3 or greater. * **Installation:** ``pip install mwapi`` * **Documentation:** https://pythonhosted.org/mwapi -* **Repositiory:** https://github.com/mediawiki-utilities/python-mwapi +* **Repository:** https://github.com/mediawiki-utilities/python-mwapi * **License:** MIT -## Example +## Examples + +### Single query >>> import mwapi >>> @@ -27,6 +29,102 @@ and that your wiki is using MediaWiki 1.15.3 or greater. 'timestamp': '2005-12-23T00:07:17Z'}], 'title': 'Grigol Ordzhonikidze', 'pageid': 1429626}}}, 'batchcomplete': ''} +### Query with continuation + +```python +import mwapi +from mwapi.errors import APIError + +session = mwapi.Session('https://en.wikipedia.org/') + +# If passed a `continuation` parameter, returns an iterable over a continued query. +# On each iteration, a new request is made for the next portion of the results. +continued = session.get( + formatversion=2, + action='query', + generator='categorymembers', + gcmtitle='Category:17th-century classical composers', + gcmlimit=100, # 100 results per request + continuation=True) + +pages = [] +try: + for portion in continued: + if 'query' in portion: + for page in portion['query']['pages']: + pages.append(page['title']) + else: + print("MediaWiki returned empty result batch.") +except APIError as error: + raise ValueError( + "MediaWiki returned an error:", str(error) + ) + +print("Fetched {} pages".format(len(pages))) +``` + +### Asynchronous single query + +```python +import asyncio +import aiohttp +import mwapi + +async def query(): + async with aiohttp.ClientSession() as s: + session = mwapi.AsyncSession( + 'https://en.wikipedia.org', + user_agent='mwapi async demo', + session=s) + response = await asyncio.create_task( + session.get(action='query', prop='revisions', revids=32423425) + ) + print(response) + +asyncio.run(query()) +``` + +### Asynchronous query with continuation + +```python +import asyncio +import aiohttp + +import mwapi +from mwapi.errors import APIError + +async def query(): + async with aiohttp.ClientSession() as s: + session = mwapi.AsyncSession( + 'https://en.wikipedia.org', + user_agent='mwapi async demo', + session=s) + + continued = await asyncio.create_task( + session.get( + formatversion=2, + action='query', + generator='categorymembers', + gcmtitle='Category:17th-century classical composers', + gcmlimit=100, # 100 results per request + continuation=True) + ) + pages = [] + try: + async for portion in continued: + if 'query' in portion: + for page in portion['query']['pages']: + pages.append(page['title']) + else: + print("MediaWiki returned empty result batch.") + except APIError as error: + raise ValueError( + "MediaWiki returned an error:", str(error) + ) + print("Fetched {} pages".format(len(pages))) + +asyncio.run(query()) +``` ## Authors * YuviPanda -- https://github.com/yuvipanda diff --git a/demo_queries.py b/demo_queries.py index 040a997..1197d12 100644 --- a/demo_queries.py +++ b/demo_queries.py @@ -13,11 +13,11 @@ 5. Cause the API to throw an error and catch it. """ -import getpass import sys from itertools import islice import mwapi +import mwapi.cli import mwapi.errors my_agent = 'mwapi demo script ' @@ -25,8 +25,7 @@ formatversion=2, user_agent=my_agent) -print("Logging into English Wikipedia") -session.login(input("Username: "), getpass.getpass("Password: ")) +mwapi.cli.do_login(session, 'https://en.wikipedia.org') print("whoami?") print("\t", session.get(action='query', meta='userinfo'), "\n") @@ -82,6 +81,7 @@ def query_revisions(title=None, pageid=None, batch=50, limit=50, if yielded >= limit: break + print("Querying by title") rev_ids = [] sys.stdout.write("\t ") diff --git a/doc/cli.rst b/doc/cli.rst new file mode 100644 index 0000000..958baf4 --- /dev/null +++ b/doc/cli.rst @@ -0,0 +1 @@ +.. automodule:: mwapi.cli diff --git a/doc/index.rst b/doc/index.rst index 5460620..9faa772 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -21,6 +21,7 @@ Contents session errors + cli Authors ------- diff --git a/mwapi/__init__.py b/mwapi/__init__.py index b796d2e..8a945c3 100644 --- a/mwapi/__init__.py +++ b/mwapi/__init__.py @@ -13,10 +13,13 @@ :License: MIT """ from .session import Session +from .async_session import AsyncSession +from .about import (__name__, __version__, __author__, __author_email__, + __description__, __license__, __url__) MWApi = Session -__all__ = [MWApi, Session] - -__version__ = "0.4.0" +__all__ = [MWApi, Session, AsyncSession, + __name__, __version__, __author__, __author_email__, + __description__, __license__, __url__] diff --git a/mwapi/about.py b/mwapi/about.py new file mode 100644 index 0000000..89d2bfb --- /dev/null +++ b/mwapi/about.py @@ -0,0 +1,10 @@ +__name__ = "mwapi" +__version__ = "0.6.1" +__author__ = "Aaron Halfaker" +__author_email__ = "aaron.halfaker@gmail.com" +__description__ = "Simple wrapper for the Mediawiki API" +__license__ = "MIT" +__url__ = "https://github.com/mediawiki-utilities/python-mwapi" + +all = [__name__, __version__, __author__, __author_email__, __description__, + __license__, __url__] diff --git a/mwapi/async_session.py b/mwapi/async_session.py new file mode 100644 index 0000000..c02f0ee --- /dev/null +++ b/mwapi/async_session.py @@ -0,0 +1,213 @@ +import logging + +import asyncio +import aiohttp + +from .errors import (APIError, ConnectionError, RequestError, TimeoutError, + TooManyRedirectsError) +from .util import _normalize_params + +DEFAULT_USERAGENT = "mwapi (python) -- default user-agent" + +logger = logging.getLogger(__name__) + + +class AsyncSession: + """ + Constructs a new API asynchronous session. + + :Parameters: + host : `str` + Host to which to connect to. Must include http:// or https:// and + no trailing "/". + user_agent : `str` + The User-Agent header to include with all requests. Use this field + to identify your script/bot/application to system admins of the + MediaWiki API you are using. + formatversion : int + The formatversion to supply to the API for all requests. + api_path : `str` + The path to "api.php" on the server -- must begin with "/". + timeout : `float` + How long to wait for the server to send data before giving up + and raising an error ( + :class:`aiohttp.client_exceptions.ServerTimeoutError` or + :class:`asyncio.TimeoutError`). + By default aiohttp uses a total 300 seconds (5min) timeout. + session : `aiohttp.ClientSession` + (optional) an `aiohttp` session object to use + """ + + def __init__(self, host, user_agent=None, formatversion=None, + api_path=None, + timeout=None, session=None, **session_params): + self.host = str(host) + self.formatversion = int(formatversion) \ + if formatversion is not None else None + self.api_path = str(api_path or "/w/api.php") + self.api_url = self.host + self.api_path + self.timeout = float(timeout) \ + if timeout is not None else aiohttp.ClientTimeout(total=300) + self.session = session or aiohttp.ClientSession() + for key, value in session_params.items(): + setattr(self.session, key, value) + + self.headers = {} + + if user_agent is None: + logger.warning("Sending requests with default User-Agent. " + + "Set 'user_agent' on mwapi.Session to quiet this " + + "message.") + self.headers['User-Agent'] = DEFAULT_USERAGENT + else: + self.headers['User-Agent'] = user_agent + + async def _request(self, method, params=None, auth=None): + params = params or {} + if self.formatversion is not None: + params['formatversion'] = self.formatversion + + if method.lower() == "post": + data = params + data['format'] = "json" + params = None + + else: + data = None + params = params or {} + params['format'] = "json" + + try: + async with self.session.request(method=method, url=self.api_url, + params=params, data=data, + timeout=self.timeout, + headers=self.headers, + verify_ssl=True, + auth=auth) as resp: + + doc = await resp.json() + + if 'error' in doc: + raise APIError.from_doc(doc['error']) + + if 'warnings' in doc: + logger.warning("The following query raised warnings: {0}" + .format(params or data)) + for module, warning in doc['warnings'].items(): + logger.warning("\t- {0} -- {1}" + .format(module, warning)) + return doc + + except (ValueError, aiohttp.ContentTypeError): + if resp is None: + prefix = "No response data" + else: + prefix = (await resp.text())[:350] + raise ValueError("Could not decode as JSON:\n{0}" + .format(prefix)) + except (aiohttp.ServerTimeoutError, + asyncio.TimeoutError) as e: + raise TimeoutError(str(e)) from e + except aiohttp.ClientConnectionError as e: + raise ConnectionError(str(e)) from e + except aiohttp.TooManyRedirects as e: + raise TooManyRedirectsError(str(e)) from e + except Exception as e: + raise RequestError(str(e)) from e + + + async def request(self, method, params=None, query_continue=None, + auth=None, continuation=False): + """ + Sends an HTTP request to the API. + + :Parameters: + method : `str` + Which HTTP method to use for the request? + (Usually "POST" or "GET") + params : `dict` + A set of parameters to send with the request. These parameters + will be included in the POST body for post requests or a query + string otherwise. + query_continue : `dict` + A 'continue' field from a past request. This field represents + the point from which a query should be continued. + auth : mixed + Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. + continuation : `bool` + If true, a continuation will be attempted and a generator of + JSON response documents will be returned. + + :Returns: + A response JSON documents (or a generator of documents if + `continuation == True`) + """ + normal_params = _normalize_params(params, query_continue) + if continuation: + return self._continuation(method, params=normal_params, auth=auth) + else: + return await self._request(method, params=normal_params, auth=auth) + + async def _continuation(self, method, params=None, auth=None): + if "continue" not in params: + params["continue"] = "" + + while True: + doc = await self._request(method, params=params, auth=auth) + yield doc + if "continue" not in doc: + break + # re-send all continue values in the next call + params.update(doc["continue"]) + + async def get(self, query_continue=None, auth=None, continuation=False, + **params): + """Makes an API request with the GET method + + :Parameters: + query_continue : `dict` + Optionally, the value of a query continuation 'continue' field. + auth : mixed + Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. + continuation : `bool` + If true, a continuation will be attempted and a generator of + JSON response documents will be returned. + params : + Keyword parameters to be sent in the query string. + + :Returns: + A response JSON documents (or a generator of documents if + `continuation == True`) + + :Raises: + :class:`mwapi.errors.APIError` : if the API responds with an error + """ + return await self.request("GET", params=params, auth=auth, + query_continue=query_continue, + continuation=continuation) + + async def post(self, query_continue=None, auth=None, continuation=False, + **params): + """Makes an API request with the POST method + + :Parameters: + query_continue : `dict` + Optionally, the value of a query continuation 'continue' field. + auth : mixed + Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. + continuation : `bool` + If true, a continuation will be attempted and a generator of + JSON response documents will be returned. + params : + Keyword parameters to be sent in the POST message body. + + :Returns: + A response JSON documents (or a generator of documents if + `continuation == True`) + + :Raises: + :class:`mwapi.errors.APIError` : if the API responds with an error + """ + return await self.request("POST", params=params, auth=auth, + query_continue=query_continue, + continuation=continuation) diff --git a/mwapi/cli.py b/mwapi/cli.py new file mode 100644 index 0000000..61ecdd0 --- /dev/null +++ b/mwapi/cli.py @@ -0,0 +1,69 @@ +""" +Command-line Interface (cli) +============================ + +This module provides utilities for interacting with a user from the +command-line. + +.. autofunction:: mwapi.cli.do_login +""" +import getpass +import sys + +from .errors import ClientInteractionRequest + + +def do_login(session, for_what): + """ + Performs a login handshake with a user on the command-line. This method + will handle all of the follow-up requests (e.g. capcha or two-factor). A + login that requires two-factor looks like this:: + + >>> import mwapi.cli + >>> import mwapi + >>> mwapi.cli.do_login(mwapi.Session("https://en.wikipedia.org"), "English Wikipedia") + Log into English Wikipedia + Username: Halfak (WMF) + Passord: + Please enter verification code from your mobile app + Token(OATHToken): 234567 + + :Parameters: + session : :class:`mwapi.Session` + A session object to use for login + for_what : `str` + A name to display to the use (for what they are logging into) + """ # noqa + username, password = request_username_password(for_what) + try: + session.login(username, password) + except ClientInteractionRequest as cir: + params = request_interaction(cir) + session.continue_login(cir.login_token, **params) + + +def request_interaction(cir): + sys.stderr.write("{0}\n".format(cir.message)) + + params = {} + for req_doc in cir.requests: + # sys.stderr.write("id: {0}\n".format(req_doc['id'])) + for name, field in req_doc['fields'].items(): + prefix = "{0}({1}): ".format(field['label'], name) + if field.get('sensitive', False): + value = getpass.getpass(prefix) + else: + sys.stderr.write(prefix) + sys.stderr.flush() + value = open('/dev/tty').readline().strip() + + params[name] = value + + return params + + +def request_username_password(for_what): + sys.stderr.write("Log into " + for_what + "\n") + sys.stderr.write("Username: ") + sys.stderr.flush() + return open('/dev/tty').readline().strip(), getpass.getpass("Password: ") diff --git a/mwapi/errors.py b/mwapi/errors.py index abea912..5e8a24f 100644 --- a/mwapi/errors.py +++ b/mwapi/errors.py @@ -17,6 +17,8 @@ .. autoclass:: TimeoutError """ import requests.exceptions +import aiohttp +import asyncio class APIError(RuntimeError): @@ -47,19 +49,38 @@ class LoginError(RuntimeError): @classmethod def from_doc(cls, doc): - return cls(doc.get('result')) + return cls(doc.get('status') + " -- " + doc.get('message')) -class RequestError(requests.exceptions.RequestException): +class ClientInteractionRequest(RuntimeError): """ - A generic error thrown by :mod:`requests`. + Thrown when user input is needed to log in. + """ + + def __init__(self, login_token, message, requests): + super().__init__((login_token, message, requests)) + self.login_token = login_token + self.message = message + self.requests = requests + + @classmethod + def from_doc(cls, login_token, doc): + return cls(login_token, doc.get('message'), doc.get('requests', [])) + + +class RequestError(requests.exceptions.RequestException, + aiohttp.ClientError): + """ + A generic error thrown by :mod:`requests` or `aiohttp`. """ pass -class ConnectionError(requests.exceptions.ConnectionError): +class ConnectionError(requests.exceptions.ConnectionError, + aiohttp.ClientConnectionError): """ - Handles a :class:`requests.exceptions.ConnectionError` + Handles a :class:`requests.exceptions.ConnectionError` or + :class:`aiohttp.ClientConnectionError`. """ pass @@ -71,15 +92,21 @@ class HTTPError(requests.exceptions.HTTPError): pass -class TooManyRedirectsError(requests.exceptions.TooManyRedirects): +class TooManyRedirectsError(requests.exceptions.TooManyRedirects, + aiohttp.TooManyRedirects): """ - Handles a :class:`requests.exceptions.TooManyRedirects` + Handles a :class:`requests.exceptions.TooManyRedirects` or + :class:`aiohttp.TooManyRedirects`. """ pass -class TimeoutError(requests.exceptions.Timeout): +class TimeoutError(requests.exceptions.Timeout, + aiohttp.ServerTimeoutError, + asyncio.TimeoutError): """ - Handles a :class:`requests.exceptions.TimeoutError` + Handles a :class:`requests.exceptions.TimeoutError` or + :class:`aiohttp.ServerTimeoutError` or + :class:`asyncio.TimeoutError`. """ pass diff --git a/mwapi/session.py b/mwapi/session.py index 30684a9..8b5707b 100644 --- a/mwapi/session.py +++ b/mwapi/session.py @@ -19,8 +19,10 @@ import requests import requests.exceptions -from .errors import (APIError, ConnectionError, HTTPError, LoginError, - RequestError, TimeoutError, TooManyRedirectsError) +from .errors import (APIError, ClientInteractionRequest, ConnectionError, + HTTPError, LoginError, RequestError, TimeoutError, + TooManyRedirectsError) +from .util import _normalize_params DEFAULT_USERAGENT = "mwapi (python) -- default user-agent" @@ -57,8 +59,8 @@ def __init__(self, host, user_agent=None, formatversion=None, api_path=None, timeout=None, session=None, **session_params): self.host = str(host) - self.formatversion = int(formatversion) if formatversion is not None \ - else None + self.formatversion = int(formatversion) \ + if formatversion is not None else None self.api_path = str(api_path or "/w/api.php") self.api_url = self.host + self.api_path self.timeout = float(timeout) if timeout is not None else None @@ -209,7 +211,7 @@ def _continuation(self, method, params=None, files=None, auth=None): params.update(doc['continue']) files = None # Don't send files again - def login(self, username, password): + def login(self, username, password, login_token=None): """ Authenticate with the given credentials. If authentication is successful, all further requests sent will be signed the authenticated @@ -226,19 +228,50 @@ def login(self, username, password): :Raises: :class:`mwapi.errors.LoginError` : if authentication fails + :class:`mwapi.errors.ClientInteractionRequest` : if authentication requires a continue_login() call :class:`mwapi.errors.APIError` : if the API responds with an error """ - token_doc = self.post(action="login", lgname=username, - lgpassword=password) + if login_token is None: + token_doc = self.post(action='query', meta='tokens', type='login') + login_token = token_doc['query']['tokens']['logintoken'] + + login_doc = self.post( + action="clientlogin", username=username, password=password, + logintoken=login_token, loginreturnurl="http://example.org/") + + if login_doc['clientlogin']['status'] == "UI": + raise ClientInteractionRequest.from_doc( + login_token, login_doc['clientlogin']) + elif login_doc['clientlogin']['status'] != 'PASS': + raise LoginError.from_doc(login_doc['clientlogin']) + return login_doc['clientlogin'] + + def continue_login(self, login_token, **params): + """ + Continues a login that requires an additional step. This is common + for when login requires completing a captcha or supplying a two-factor + authentication token. - login_doc = self.post(action="login", lgname=username, - lgpassword=password, - lgtoken=token_doc['login']['token']) + :Parameters: + login_token : `str` + A login token generated by the MediaWiki API (and used in a + previous call to login()) + params : `mixed` + A set of parameters to include with the request. This depends + on what "requests" for additional information were made by the + MediaWiki API. + """ - result = login_doc['login']['result'] - if result != 'Success': - raise LoginError.from_doc(login_doc['login']) - return result + login_params = { + 'action': "clientlogin", + 'logintoken': login_token, + 'logincontinue': 1 + } + login_params.update(params) + login_doc = self.post(**login_params) + if login_doc['clientlogin']['status'] != 'PASS': + raise LoginError.from_doc(login_doc['clientlogin']) + return login_doc['clientlogin'] def logout(self): """ @@ -308,21 +341,3 @@ def post(self, query_continue=None, upload_file=None, auth=None, return self.request('POST', params=params, auth=auth, query_continue=query_continue, files=files, continuation=continuation) - - -def _normalize_value(value): - if isinstance(value, str): - return value - elif hasattr(value, "__iter__"): - return "|".join(str(v) for v in value) - else: - return value - - -def _normalize_params(params, query_continue=None): - normal_params = {k: _normalize_value(v) for k, v in params.items()} - - if query_continue is not None: - normal_params.update(query_continue) - - return normal_params diff --git a/mwapi/util.py b/mwapi/util.py new file mode 100644 index 0000000..48321ca --- /dev/null +++ b/mwapi/util.py @@ -0,0 +1,19 @@ +def _normalize_value(value): + if isinstance(value, str): + return value + elif isinstance(value, bool): + return "" if value else None + elif hasattr(value, "__iter__"): + return "|".join(str(v) for v in value) + else: + return value + + +def _normalize_params(params, query_continue=None): + normal_params = {k: _normalize_value(v) for k, v in params.items()} + normal_params = {k: v for k, v in normal_params.items() if v is not None} + + if query_continue is not None: + normal_params.update(query_continue) + + return normal_params diff --git a/setup.py b/setup.py index d279580..3e37fb2 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,20 @@ +import os.path + from setuptools import setup +about_path = os.path.join(os.path.dirname(__file__), "mwapi/about.py") +exec(compile(open(about_path).read(), about_path, "exec")) + setup( - name="mwapi", - version="0.4.0", # Change in mwapi/__init__.py - author="Yuvi Panda", - author_email="yuvipanda@gmail.com", - url="http://github.com/yuvipanda/python-mwapi", + name=__name__, # noqa + version=__version__, # noqa + author=__author__, # noqa + author_email=__author_email__, # noqa + description=__description__, # noqa + url=__url__, # noqa + license=__license__, # noqa packages=["mwapi"], - license=open("LICENSE").read(), - description="Simple wrapper for the Mediawiki API", long_description=open("README.md").read(), - install_requires=["requests"] + long_description_content_type="text/markdown", + install_requires=["requests", "aiohttp"] )