From e62825fa24480c1ec89ba88ffb4846eb0e71e596 Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 15 Jul 2019 15:23:21 -0700 Subject: [PATCH 01/42] Shared connection (sync) draft --- .../azure-eventhubs/azure/eventhub/client.py | 28 +- .../azure/eventhub/connection_manager.py | 50 +++ .../azure/eventhub/consumer.py | 341 +++++------------- .../azure/eventhub/producer.py | 275 ++++++-------- 4 files changed, 286 insertions(+), 408 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index 308aa2000a6d..9770f30dc114 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -27,6 +27,7 @@ from azure.eventhub.error import ConnectError from .client_abstract import EventHubClientAbstract from .common import EventHubSASTokenCredential, EventHubSharedKeyCredential +from .connection_manager import _ConnectionManager log = logging.getLogger(__name__) @@ -47,6 +48,23 @@ class EventHubClient(EventHubClientAbstract): """ + def __init__(self, host, event_hub_path, credential, **kwargs): + super(EventHubClient, self).__init__(host, event_hub_path, credential, **kwargs) + alt_creds = { + "username": self._auth_config.get("iot_username"), + "password": self._auth_config.get("iot_password") + } + self._conn_manager = _ConnectionManager() + + def __del__(self): + self._conn_manager.close_connection() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + def _create_auth(self, username=None, password=None): """ Create an ~uamqp.authentication.SASTokenAuth instance to authenticate @@ -100,15 +118,18 @@ def _management_request(self, mgmt_msg, op_type): while True: connect_count += 1 mgmt_auth = self._create_auth(**alt_creds) - mgmt_client = uamqp.AMQPClient(self.mgmt_target, auth=mgmt_auth, debug=self.config.network_tracing) + mgmt_client = uamqp.AMQPClient(self.mgmt_target) try: - mgmt_client.open() + conn = self._conn_manager.get_connection() + mgmt_client.open(connection=self._conn_manager.get_connection()) + print(conn._state) response = mgmt_client.mgmt_request( mgmt_msg, constants.READ_OPERATION, op_type=op_type, status_code_field=b'status-code', description_fields=b'status-description') + print(conn._state) return response except (errors.AMQPConnectionError, errors.TokenAuthFailure, compat.TimeoutException) as failure: if connect_count >= self.config.max_retries: @@ -268,3 +289,6 @@ def create_producer(self, partition_id=None, operation=None, send_timeout=None): handler = EventHubProducer( self, target, partition=partition_id, send_timeout=send_timeout) return handler + + def close(self): + self._conn_manager.close_connection() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py new file mode 100644 index 000000000000..026348c3a215 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py @@ -0,0 +1,50 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import threading +from uamqp import Connection, TransportType + + +class _ConnectionManager(object): + def __init__(self, **kwargs): + self._lock = threading.Lock() + self._conn = None + + self._container_id = kwargs.get("container_id") + self._debug = kwargs.get("debug") + self._error_policy = kwargs.get("error_policy") + self._properties = kwargs.get("properties") + self._encoding = kwargs.get("encoding") or "UTF-8" + self._transport_type = kwargs.get('transport_type') or TransportType.Amqp + self._http_proxy = kwargs.get('http_proxy') + self._max_frame_size = kwargs.get("max_frame_size") + self._channel_max = kwargs.get("channel_max") + self._idle_timeout = kwargs.get("idle_timeout") + self._remote_idle_timeout_empty_frame_send_ratio = kwargs.get("remote_idle_timeout_empty_frame_send_ratio") + + def get_connection(self, host, auth): + # type: (...) -> Connection + with self._lock: + if self._conn is None: + self._conn = Connection( + host, + auth, + container_id=self._container_id, + max_frame_size=self._max_frame_size, + channel_max=self._channel_max, + idle_timeout=self._idle_timeout, + properties=self._properties, + remote_idle_timeout_empty_frame_send_ratio=self._remote_idle_timeout_empty_frame_send_ratio, + error_policy=self._error_policy, + debug=self._debug, + encoding=self._encoding) + return self._conn + + def close_connection(self): + with self._lock: + if self._conn: + self._conn.destroy() + self._conn = None + diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 856c77d6fb65..2909d0fe5f0c 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -75,17 +75,7 @@ def __init__(self, client, source, event_position=None, prefetch=300, owner_leve source.set_filter(self.offset._selector()) # pylint: disable=protected-access if owner_level: self.properties = {types.AMQPSymbol(self._epoch): types.AMQPLong(int(owner_level))} - self._handler = ReceiveClient( - source, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - prefetch=self.prefetch, - link_properties=self.properties, - timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent)) # pylint: disable=protected-access + self._handler = None def __enter__(self): return self @@ -97,84 +87,52 @@ def __iter__(self): return self def __next__(self): - self._open() max_retries = self.client.config.max_retries - connecting_count = 0 + retry_count = 0 while True: - connecting_count += 1 try: + self._open() if not self.messages_iter: self.messages_iter = self._handler.receive_messages_iter() message = next(self.messages_iter) event_data = EventData(message=message) self.offset = EventPosition(event_data.offset, inclusive=False) return event_data - except errors.AuthenticationException as auth_error: - if connecting_count < max_retries: - log.info("EventHubConsumer disconnected due to token error. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer authentication failed. Shutting down.") - error = AuthenticationError(str(auth_error), auth_error) - self.close(auth_error) - raise error - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry and self.auto_reconnect: - log.info("EventHubConsumer detached. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer detached. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(error) - raise error - except errors.AMQPConnectionError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer connection lost. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer connection lost. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(error) - raise error - except compat.TimeoutException as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer timed out receiving event data. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer timed out. Shutting down.") - self.close(shutdown) - raise ConnectionLostError(str(shutdown), shutdown) - except StopIteration: - raise - except KeyboardInterrupt: - log.info("EventHubConsumer stops due to keyboard interrupt") - self.close() - raise - except Exception as e: - log.error("Unexpected error occurred (%r). Shutting down.", e) - error = EventHubError("Receive failed: {}".format(e), e) - self.close(exception=error) - raise error + except Exception as exception: + self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 def _check_closed(self): if self.error: raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.", self.error) + def _create_handler(self): + alt_creds = { + "username": self.client._auth_config.get("iot_username"), + "password": self.client._auth_config.get("iot_password")} + source = Source(self.source) + if self.offset is not None: + source.set_filter(self.offset._selector()) + self._handler = ReceiveClient( + source, + auth=self.client.get_auth(**alt_creds), + debug=self.client.config.network_tracing, + prefetch=self.prefetch, + link_properties=self.properties, + timeout=self.timeout, + error_policy=self.retry_policy, + keep_alive_interval=self.keep_alive, + client_name=self.name, + properties=self.client._create_properties( + self.client.config.user_agent)) # pylint: disable=protected-access + self.messages_iter = None + def _redirect(self, redirect): self.redirected = redirect self.running = False self.messages_iter = None - self._open() + self._close_connection() def _open(self): """ @@ -184,129 +142,79 @@ def _open(self): """ # pylint: disable=protected-access - self._check_closed() - if self.redirected: - self.client._process_redirect_uri(self.redirected) - self.source = self.redirected.address - source = Source(self.source) - if self.offset is not None: - source.set_filter(self.offset._selector()) - - alt_creds = { - "username": self.client._auth_config.get("iot_username"), - "password":self.client._auth_config.get("iot_password")} - self._handler = ReceiveClient( - source, - auth=self.client.get_auth(**alt_creds), - debug=self.client.config.network_tracing, - prefetch=self.prefetch, - link_properties=self.properties, - timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent)) # pylint: disable=protected-access if not self.running: - self._connect() + if self.redirected: + self.client._process_redirect_uri(self.redirected) + self.source = self.redirected.address + self._create_handler() + self._handler.open(connection=self.client._conn_manager.get_connection( + self.client.address.hostname, + self.client.get_auth() + )) + while not self._handler.client_ready(): + time.sleep(0.05) self.running = True - def _connect(self): - connected = self._build_connection() - if not connected: - time.sleep(self.reconnect_backoff) - while not self._build_connection(is_reconnect=True): - time.sleep(self.reconnect_backoff) - - def _build_connection(self, is_reconnect=False): - """ + def _close_handler(self): + self._handler.close() # close the link (sharing connection) or connection (not sharing) + self.running = False - :param is_reconnect: True - trying to reconnect after fail to connect or a connection is lost. - False - the 1st time to connect - :return: True - connected. False - not connected - """ - # pylint: disable=protected-access - if is_reconnect: - alt_creds = { - "username": self.client._auth_config.get("iot_username"), - "password": self.client._auth_config.get("iot_password")} - self._handler.close() - source = Source(self.source) - if self.offset is not None: - source.set_filter(self.offset._selector()) - self._handler = ReceiveClient( - source, - auth=self.client.get_auth(**alt_creds), - debug=self.client.config.network_tracing, - prefetch=self.prefetch, - link_properties=self.properties, - timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties( - self.client.config.user_agent)) # pylint: disable=protected-access - self.messages_iter = None - try: - self._handler.open() - while not self._handler.client_ready(): - time.sleep(0.05) - return True - except errors.AuthenticationException as shutdown: - if is_reconnect: - log.info("EventHubConsumer couldn't authenticate. Shutting down. (%r)", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - self.close(exception=error) - raise error - else: - log.info("EventHubConsumer couldn't authenticate. Attempting reconnect.") - return False - except errors.LinkRedirect as redirect: - self._redirect(redirect) - return True - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry: - log.info("EventHubConsumer detached. Attempting reconnect.") - return False - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if is_reconnect: + def _close_connection(self): + self._close_handler() + self.client._conn_manager.close_connection() # close the shared connection. + + def _handle_exception(self, exception, retry_count, max_retries): + if isinstance(exception, KeyboardInterrupt): + log.info("EventHubConsumer stops due to keyboard interrupt") + self.close() + raise + elif retry_count >= max_retries: + log.info("EventHubConsumer has an error and has exhausted retrying. (%r)", exception) + if isinstance(exception, errors.AuthenticationException): + log.info("EventHubConsumer authentication failed. Shutting down.") + error = AuthenticationError(str(exception), exception) + elif isinstance(exception, errors.LinkDetach): + log.info("EventHubConsumer link detached. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.ConnectionClose): + log.info("EventHubConsumer connection closed. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.MessageHandlerError): log.info("EventHubConsumer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - self.close(exception=error) - raise error + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.AMQPConnectionError): + log.info("EventHubConsumer connection lost. Shutting down.") + error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ + else ConnectError + error = error_type(str(exception), exception) + elif isinstance(exception, compat.TimeoutException): + log.info("EventHubConsumer timed out. Shutting down.") + error = ConnectionLostError(str(exception), exception) else: - log.info("EventHubConsumer detached. Attempting reconnect.") - return False - except errors.AMQPConnectionError as shutdown: - if is_reconnect: - log.info("EventHubConsumer connection error (%r). Shutting down.", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - self.close(exception=error) - raise error - else: - log.info("EventHubConsumer couldn't authenticate. Attempting reconnect.") - return False - except compat.TimeoutException as shutdown: - if is_reconnect: - log.info("EventHubConsumer authentication timed out. Shutting down.") - error = AuthenticationError(str(shutdown), shutdown) - self.close(exception=error) - raise error - else: - log.info("EventHubConsumer authentication timed out. Attempting reconnect.") - return False - except Exception as e: - log.error("Unexpected error occurred when building connection (%r). Shutting down.", e) - error = EventHubError("Unexpected error occurred when building connection", e) + log.error("Unexpected error occurred (%r). Shutting down.", exception) + error = EventHubError("Receive failed: {}".format(exception), exception) self.close(exception=error) raise error - - def _reconnect(self): - return self._build_connection(is_reconnect=True) + else: + log.info("EventHubConsumer has an exception (%r). Retrying...", exception) + if isinstance(exception, errors.AuthenticationException): + self._close_connection() + elif isinstance(exception, errors.LinkRedirect): + log.info("EventHubConsumer link redirected. Redirecting...") + redirect = exception + self._redirect(redirect) + elif isinstance(exception, errors.LinkDetach): + self._close_handler() + elif isinstance(exception, errors.ConnectionClose): + self._close_connection() + elif isinstance(exception, errors.MessageHandlerError): + self._close_handler() + elif isinstance(exception, errors.AMQPConnectionError): + self._close_connection() + elif isinstance(exception, compat.TimeoutException): + pass # Timeout doesn't need to recreate link or exception + else: + self._close_connection() @property def queue_size(self): @@ -348,17 +256,16 @@ def receive(self, max_batch_size=None, timeout=None): """ self._check_closed() - self._open() max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size timeout = self.client.config.receive_timeout if timeout is None else timeout data_batch = [] # type: List[EventData] max_retries = self.client.config.max_retries - connecting_count = 0 + retry_count = 0 while True: - connecting_count += 1 try: + self._open() timeout_ms = 1000 * timeout if timeout else 0 message_batch = self._handler.receive_message_batch( max_batch_size=max_batch_size - (len(data_batch) if data_batch else 0), @@ -368,59 +275,9 @@ def receive(self, max_batch_size=None, timeout=None): self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch - except errors.AuthenticationException as auth_error: - if connecting_count < max_retries: - log.info("EventHubConsumer disconnected due to token error. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer authentication failed. Shutting down.") - error = AuthenticationError(str(auth_error), auth_error) - self.close(auth_error) - raise error - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry and self.auto_reconnect: - log.info("EventHubConsumer detached. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer detached. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(error) - raise error - except errors.AMQPConnectionError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer connection lost. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer connection lost. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(error) - raise error - except compat.TimeoutException as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer timed out receiving event data. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubConsumer timed out. Shutting down.") - self.close(shutdown) - raise ConnectionLostError(str(shutdown), shutdown) - except KeyboardInterrupt: - log.info("EventHubConsumer stops due to keyboard interrupt") - self.close() - raise - except Exception as e: - log.error("Unexpected error occurred (%r). Shutting down.", e) - error = EventHubError("Receive failed: {}".format(e), e) - self.close(exception=error) - raise error + except Exception as exception: + self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 def close(self, exception=None): # type:(Exception) -> None @@ -456,6 +313,6 @@ def close(self, exception=None): self.error = EventHubError(str(exception)) else: self.error = EventHubError("This receive handler is now closed.") - self._handler.close() + self._handler.close() # this will close link if sharing connection. Otherwise close connection next = __next__ # for python2.7 diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 3f95b7be08c3..dcfec2b22ee6 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -7,7 +7,7 @@ import uuid import logging import time -from typing import Iterator, Generator, List, Union +from typing import Iterable, Union from uamqp import constants, errors from uamqp import compat @@ -85,6 +85,23 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close(exc_val) + def _create_handler(self): + self._handler = SendClient( + self.target, + auth=self.client.get_auth(), + debug=self.client.config.network_tracing, + msg_timeout=self.timeout, + error_policy=self.retry_policy, + keep_alive_interval=self.keep_alive, + client_name=self.name, + properties=self.client._create_properties(self.client.config.user_agent)) # pylint: disable=protected-access + + def _redirect(self, redirect): + self.redirected = redirect + self.running = False + self.messages_iter = None + self._close_connection() + def _open(self): """ Open the EventHubProducer using the supplied connection. @@ -93,182 +110,112 @@ def _open(self): """ # pylint: disable=protected-access - self._check_closed() - if self.redirected: - self.target = self.redirected.address - self._handler = SendClient( - self.target, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - msg_timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent)) if not self.running: - self._connect() + if self.redirected: + self.target = self.redirected.address + self._create_handler() + self._handler.open(connection=self.client._conn_manager.get_connection( + self.client.address.hostname, + self.client.get_auth() + )) + while not self._handler.client_ready(): + time.sleep(0.05) self.running = True - def _connect(self): - connected = self._build_connection() - if not connected: - time.sleep(self.reconnect_backoff) - while not self._build_connection(is_reconnect=True): - time.sleep(self.reconnect_backoff) - - def _build_connection(self, is_reconnect=False): - """ + def _close_handler(self): + self._handler.close() # close the link (sharing connection) or connection (not sharing) + self.running = False - :param is_reconnect: True - trying to reconnect after fail to connect or a connection is lost. - False - the 1st time to connect - :return: True - connected. False - not connected - """ - # pylint: disable=protected-access - if is_reconnect: - self._handler.close() - self._handler = SendClient( - self.target, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - msg_timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent)) - try: - self._handler.open() - while not self._handler.client_ready(): - time.sleep(0.05) - return True - except errors.AuthenticationException as shutdown: - if is_reconnect: - log.info("EventHubProducer couldn't authenticate. Shutting down. (%r)", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - self.close(exception=error) - raise error - else: - log.info("EventHubProducer couldn't authenticate. Attempting reconnect.") - return False - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry: - log.info("EventHubProducer detached. Attempting reconnect.") - return False - else: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if is_reconnect: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - self.close(exception=error) - raise error - else: - log.info("EventHubProducer detached. Attempting reconnect.") - return False - except errors.AMQPConnectionError as shutdown: - if is_reconnect: - log.info("EventHubProducer connection error (%r). Shutting down.", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - self.close(exception=error) - raise error - else: - log.info("EventHubProducer couldn't authenticate. Attempting reconnect.") - return False - except compat.TimeoutException as shutdown: - if is_reconnect: - log.info("EventHubProducer authentication timed out. Shutting down.") - error = AuthenticationError(str(shutdown), shutdown) - self.close(exception=error) - raise error + def _close_connection(self): + self._close_handler() + self.client._conn_manager.close_connection() # close the shared connection. + + def _handle_exception(self, exception, retry_count, max_retries): + if isinstance(exception, KeyboardInterrupt): + log.info("EventHubConsumer stops due to keyboard interrupt") + self.close() + raise + elif isinstance(exception, ( + errors.MessageAccepted, + errors.MessageAlreadySettled, + errors.MessageModified, + errors.MessageRejected, + errors.MessageReleased, + errors.MessageContentTooLarge) + ): + log.error("Event data error (%r)", exception) + error = EventDataError(str(exception), exception) + self.close(exception) + raise error + elif isinstance(exception, errors.MessageException): + log.error("Event data send error (%r)", exception) + error = EventDataSendError(str(exception), exception) + self.close(exception) + raise error + elif retry_count >= max_retries: + log.info("EventHubConsumer has an error and has exhausted retrying. (%r)", exception) + if isinstance(exception, errors.AuthenticationException): + log.info("EventHubConsumer authentication failed. Shutting down.") + error = AuthenticationError(str(exception), exception) + elif isinstance(exception, errors.LinkDetach): + log.info("EventHubConsumer link detached. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.ConnectionClose): + log.info("EventHubConsumer connection closed. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.MessageHandlerError): + log.info("EventHubConsumer detached. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.AMQPConnectionError): + log.info("EventHubConsumer connection lost. Shutting down.") + error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ + else ConnectError + error = error_type(str(exception), exception) + elif isinstance(exception, compat.TimeoutException): + log.info("EventHubConsumer timed out. Shutting down.") + error = ConnectionLostError(str(exception), exception) else: - log.info("EventHubProducer authentication timed out. Attempting reconnect.") - return False - except Exception as e: - log.info("Unexpected error occurred when building connection (%r). Shutting down.", e) - error = EventHubError("Unexpected error occurred when building connection", e) + log.error("Unexpected error occurred (%r). Shutting down.", exception) + error = EventHubError("Receive failed: {}".format(exception), exception) self.close(exception=error) raise error - - def _reconnect(self): - return self._build_connection(is_reconnect=True) + else: + log.info("EventHubConsumer has an exception (%r). Retrying...", exception) + if isinstance(exception, errors.AuthenticationException): + self._close_connection() + elif isinstance(exception, errors.LinkRedirect): + log.info("EventHubConsumer link redirected. Redirecting...") + redirect = exception + self._redirect(redirect) + elif isinstance(exception, errors.LinkDetach): + self._close_handler() + elif isinstance(exception, errors.ConnectionClose): + self._close_connection() + elif isinstance(exception, errors.MessageHandlerError): + self._close_handler() + elif isinstance(exception, errors.AMQPConnectionError): + self._close_connection() + elif isinstance(exception, compat.TimeoutException): + pass # Timeout doesn't need to recreate link or exception + else: + self._close_connection() def _send_event_data(self): self._open() max_retries = self.client.config.max_retries - connecting_count = 0 + retry_count = 0 while True: - connecting_count += 1 try: if self.unsent_events: self._handler.queue_message(*self.unsent_events) self._handler.wait() self.unsent_events = self._handler.pending_messages if self._outcome != constants.MessageSendResult.Ok: - EventHubProducer._error(self._outcome, self._condition) + _error(self._outcome, self._condition) return - except (errors.MessageAccepted, - errors.MessageAlreadySettled, - errors.MessageModified, - errors.MessageRejected, - errors.MessageReleased, - errors.MessageContentTooLarge) as msg_error: - raise EventDataError(str(msg_error), msg_error) - except errors.MessageException as failed: - log.error("Send event data error (%r)", failed) - error = EventDataSendError(str(failed), failed) - self.close(exception=error) - raise error - except errors.AuthenticationException as auth_error: - if connecting_count < max_retries: - log.info("EventHubProducer disconnected due to token error. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubProducer authentication failed. Shutting down.") - error = AuthenticationError(str(auth_error), auth_error) - self.close(auth_error) - raise error - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry: - log.info("EventHubProducer detached. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if connecting_count < max_retries: - log.info("EventHubProducer detached. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(error) - raise error - except errors.AMQPConnectionError as shutdown: - if connecting_count < max_retries: - log.info("EventHubProducer connection lost. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubProducer connection lost. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - self.close(error) - raise error - except compat.TimeoutException as shutdown: - if connecting_count < max_retries: - log.info("EventHubProducer timed out sending event data. Attempting reconnect.") - self._reconnect() - else: - log.info("EventHubProducer timed out. Shutting down.") - self.close(shutdown) - raise ConnectionLostError(str(shutdown), shutdown) - except Exception as e: - log.info("Unexpected error occurred (%r). Shutting down.", e) - error = EventHubError("Send failed: {}".format(e), e) - self.close(exception=error) - raise error + except Exception as exception: + self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 def _check_closed(self): if self.error: @@ -293,13 +240,8 @@ def _on_outcome(self, outcome, condition): self._outcome = outcome self._condition = condition - @staticmethod - def _error(outcome, condition): - if outcome != constants.MessageSendResult.Ok: - raise condition - def send(self, event_data, partition_key=None): - # type:(Union[EventData, Union[List[EventData], Iterator[EventData], Generator[EventData]]], Union[str, bytes]) -> None + # type:(Union[EventData, Iterable[EventData]], Union[str, bytes]) -> None """ Sends an event data and blocks until acknowledgement is received or operation times out. @@ -370,3 +312,8 @@ def close(self, exception=None): else: self.error = EventHubError("This send handler is now closed.") self._handler.close() + + +def _error(outcome, condition): + if outcome != constants.MessageSendResult.Ok: + raise condition From 84ae397327a0f0cbb9b56fe7dd2e4a61555eb96e Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 16 Jul 2019 11:52:43 -0700 Subject: [PATCH 02/42] Shared connection (sync) draft 2 --- ...tion_manager.py => _connection_manager.py} | 18 ++++- .../azure-eventhubs/azure/eventhub/client.py | 29 ++++--- .../azure/eventhub/consumer.py | 65 +++------------- .../azure-eventhubs/azure/eventhub/error.py | 76 ++++++++++++++++++- .../azure/eventhub/producer.py | 73 +----------------- 5 files changed, 117 insertions(+), 144 deletions(-) rename sdk/eventhub/azure-eventhubs/azure/eventhub/{connection_manager.py => _connection_manager.py} (86%) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py similarity index 86% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py index 026348c3a215..3e1a17a1051d 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/connection_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py @@ -7,7 +7,7 @@ from uamqp import Connection, TransportType -class _ConnectionManager(object): +class _SharedConnectionManager(object): def __init__(self, **kwargs): self._lock = threading.Lock() self._conn = None @@ -42,9 +42,23 @@ def get_connection(self, host, auth): encoding=self._encoding) return self._conn - def close_connection(self): + def close_connection(self, conn=None): with self._lock: if self._conn: self._conn.destroy() self._conn = None + +class _SeparateConnectionManager(object): + def __init__(self, **kwargs): + pass + + def get_connection(self, host, auth): + return None + + def close_connection(self): + pass + + +def get_connection_manager(**kwargs): + return _SeparateConnectionManager(**kwargs) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index 9770f30dc114..a88beebdc275 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -24,10 +24,10 @@ from azure.eventhub.producer import EventHubProducer from azure.eventhub.consumer import EventHubConsumer from azure.eventhub.common import parse_sas_token, EventPosition -from azure.eventhub.error import ConnectError +from azure.eventhub.error import ConnectError, EventHubError from .client_abstract import EventHubClientAbstract from .common import EventHubSASTokenCredential, EventHubSharedKeyCredential -from .connection_manager import _ConnectionManager +from ._connection_manager import get_connection_manager log = logging.getLogger(__name__) @@ -54,7 +54,7 @@ def __init__(self, host, event_hub_path, credential, **kwargs): "username": self._auth_config.get("iot_username"), "password": self._auth_config.get("iot_password") } - self._conn_manager = _ConnectionManager() + self._conn_manager = get_connection_manager(**kwargs) def __del__(self): self._conn_manager.close_connection() @@ -111,28 +111,23 @@ def _create_auth(self, username=None, password=None): transport_type=transport_type) def _management_request(self, mgmt_msg, op_type): - alt_creds = { - "username": self._auth_config.get("iot_username"), - "password": self._auth_config.get("iot_password")} - connect_count = 0 - while True: - connect_count += 1 - mgmt_auth = self._create_auth(**alt_creds) + retry_count = 0 + while retry_count <= self.config.max_retries: + retry_count += 1 + mgmt_auth = self._create_auth() mgmt_client = uamqp.AMQPClient(self.mgmt_target) try: - conn = self._conn_manager.get_connection() - mgmt_client.open(connection=self._conn_manager.get_connection()) - print(conn._state) + conn = self._conn_manager.get_connection(self.host, mgmt_auth) + mgmt_client.open(connection=conn) response = mgmt_client.mgmt_request( mgmt_msg, constants.READ_OPERATION, op_type=op_type, status_code_field=b'status-code', description_fields=b'status-description') - print(conn._state) return response except (errors.AMQPConnectionError, errors.TokenAuthFailure, compat.TimeoutException) as failure: - if connect_count >= self.config.max_retries: + if retry_count >= self.config.max_retries: err = ConnectError( "Can not connect to EventHubs or get management info from the service. " "Please make sure the connection string or token is correct and retry. " @@ -140,6 +135,10 @@ def _management_request(self, mgmt_msg, op_type): failure ) raise err + except Exception as failure: + if retry_count >= self.config.max_retries: + err = EventHubError("Unexpected error happened during management request", failure) + raise err finally: mgmt_client.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 2909d0fe5f0c..b8fc6ff4a2ea 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -14,7 +14,8 @@ from uamqp import ReceiveClient, Source from azure.eventhub.common import EventData, EventPosition -from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, _error_handler +from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, \ + _error_handler, _handle_exception log = logging.getLogger(__name__) @@ -70,9 +71,6 @@ def __init__(self, client, source, event_position=None, prefetch=300, owner_leve self.error = None partition = self.source.split('/')[-1] self.name = "EHReceiver-{}-partition{}".format(uuid.uuid4(), partition) - source = Source(self.source) - if self.offset is not None: - source.set_filter(self.offset._selector()) # pylint: disable=protected-access if owner_level: self.properties = {types.AMQPSymbol(self._epoch): types.AMQPLong(int(owner_level))} self._handler = None @@ -146,10 +144,15 @@ def _open(self): if self.redirected: self.client._process_redirect_uri(self.redirected) self.source = self.redirected.address + alt_creds = { + "username": self.client._auth_config.get("iot_username"), + "password": self.client._auth_config.get("iot_password")} + else: + alt_creds = {} self._create_handler() self._handler.open(connection=self.client._conn_manager.get_connection( self.client.address.hostname, - self.client.get_auth() + self.client.get_auth(**alt_creds) )) while not self._handler.client_ready(): time.sleep(0.05) @@ -164,57 +167,7 @@ def _close_connection(self): self.client._conn_manager.close_connection() # close the shared connection. def _handle_exception(self, exception, retry_count, max_retries): - if isinstance(exception, KeyboardInterrupt): - log.info("EventHubConsumer stops due to keyboard interrupt") - self.close() - raise - elif retry_count >= max_retries: - log.info("EventHubConsumer has an error and has exhausted retrying. (%r)", exception) - if isinstance(exception, errors.AuthenticationException): - log.info("EventHubConsumer authentication failed. Shutting down.") - error = AuthenticationError(str(exception), exception) - elif isinstance(exception, errors.LinkDetach): - log.info("EventHubConsumer link detached. Shutting down.") - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.ConnectionClose): - log.info("EventHubConsumer connection closed. Shutting down.") - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.MessageHandlerError): - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.AMQPConnectionError): - log.info("EventHubConsumer connection lost. Shutting down.") - error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ - else ConnectError - error = error_type(str(exception), exception) - elif isinstance(exception, compat.TimeoutException): - log.info("EventHubConsumer timed out. Shutting down.") - error = ConnectionLostError(str(exception), exception) - else: - log.error("Unexpected error occurred (%r). Shutting down.", exception) - error = EventHubError("Receive failed: {}".format(exception), exception) - self.close(exception=error) - raise error - else: - log.info("EventHubConsumer has an exception (%r). Retrying...", exception) - if isinstance(exception, errors.AuthenticationException): - self._close_connection() - elif isinstance(exception, errors.LinkRedirect): - log.info("EventHubConsumer link redirected. Redirecting...") - redirect = exception - self._redirect(redirect) - elif isinstance(exception, errors.LinkDetach): - self._close_handler() - elif isinstance(exception, errors.ConnectionClose): - self._close_connection() - elif isinstance(exception, errors.MessageHandlerError): - self._close_handler() - elif isinstance(exception, errors.AMQPConnectionError): - self._close_connection() - elif isinstance(exception, compat.TimeoutException): - pass # Timeout doesn't need to recreate link or exception - else: - self._close_connection() + _handle_exception(exception, retry_count, max_retries, self, log) @property def queue_size(self): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py index 6932daa7cc0f..d15dabf0c051 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------------------------- import six -from uamqp import constants, errors +from uamqp import constants, errors, compat _NO_RETRY_ERRORS = ( @@ -128,3 +128,77 @@ class EventDataSendError(EventHubError): """ pass + + +def _handle_exception(exception, retry_count, max_retries, closable, log): + if isinstance(exception, KeyboardInterrupt): + log.info("EventHubConsumer stops due to keyboard interrupt") + closable.close() + raise + elif isinstance(exception, ( + errors.MessageAccepted, + errors.MessageAlreadySettled, + errors.MessageModified, + errors.MessageRejected, + errors.MessageReleased, + errors.MessageContentTooLarge) + ): + log.error("Event data error (%r)", exception) + error = EventDataError(str(exception), exception) + closable.close(exception) + raise error + elif isinstance(exception, errors.MessageException): + log.error("Event data send error (%r)", exception) + error = EventDataSendError(str(exception), exception) + closable.close(exception) + raise error + elif retry_count >= max_retries: + log.info("EventHubConsumer has an error and has exhausted retrying. (%r)", exception) + if isinstance(exception, errors.AuthenticationException): + log.info("EventHubConsumer authentication failed. Shutting down.") + error = AuthenticationError(str(exception), exception) + elif isinstance(exception, errors.VendorLinkDetach): + log.info("EventHubConsumer link detached. Shutting down.") + error = ConnectError(str(exception), exception) + elif isinstance(exception, errors.LinkDetach): + log.info("EventHubConsumer link detached. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.ConnectionClose): + log.info("EventHubConsumer connection closed. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.MessageHandlerError): + log.info("EventHubConsumer detached. Shutting down.") + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.AMQPConnectionError): + log.info("EventHubConsumer connection lost. Shutting down.") + error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ + else ConnectError + error = error_type(str(exception), exception) + elif isinstance(exception, compat.TimeoutException): + log.info("EventHubConsumer timed out. Shutting down.") + error = ConnectionLostError(str(exception), exception) + else: + log.error("Unexpected error occurred (%r). Shutting down.", exception) + error = EventHubError("Receive failed: {}".format(exception), exception) + closable.close(exception=error) + raise error + else: + log.info("EventHubConsumer has an exception (%r). Retrying...", exception) + if isinstance(exception, errors.AuthenticationException): + closable._close_connection() + elif isinstance(exception, errors.LinkRedirect): + log.info("EventHubConsumer link redirected. Redirecting...") + redirect = exception + closable._redirect(redirect) + elif isinstance(exception, errors.LinkDetach): + closable._close_handler() + elif isinstance(exception, errors.ConnectionClose): + closable._close_connection() + elif isinstance(exception, errors.MessageHandlerError): + closable._close_handler() + elif isinstance(exception, errors.AMQPConnectionError): + closable._close_connection() + elif isinstance(exception, compat.TimeoutException): + pass # Timeout doesn't need to recreate link or exception + else: + closable._close_connection() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index dcfec2b22ee6..cfc32c7157cd 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -15,7 +15,7 @@ from azure.eventhub.common import EventData, _BatchSendEventData from azure.eventhub.error import EventHubError, ConnectError, \ - AuthenticationError, EventDataError, EventDataSendError, ConnectionLostError, _error_handler + AuthenticationError, EventDataError, EventDataSendError, ConnectionLostError, _error_handler, _handle_exception log = logging.getLogger(__name__) @@ -131,82 +131,15 @@ def _close_connection(self): self.client._conn_manager.close_connection() # close the shared connection. def _handle_exception(self, exception, retry_count, max_retries): - if isinstance(exception, KeyboardInterrupt): - log.info("EventHubConsumer stops due to keyboard interrupt") - self.close() - raise - elif isinstance(exception, ( - errors.MessageAccepted, - errors.MessageAlreadySettled, - errors.MessageModified, - errors.MessageRejected, - errors.MessageReleased, - errors.MessageContentTooLarge) - ): - log.error("Event data error (%r)", exception) - error = EventDataError(str(exception), exception) - self.close(exception) - raise error - elif isinstance(exception, errors.MessageException): - log.error("Event data send error (%r)", exception) - error = EventDataSendError(str(exception), exception) - self.close(exception) - raise error - elif retry_count >= max_retries: - log.info("EventHubConsumer has an error and has exhausted retrying. (%r)", exception) - if isinstance(exception, errors.AuthenticationException): - log.info("EventHubConsumer authentication failed. Shutting down.") - error = AuthenticationError(str(exception), exception) - elif isinstance(exception, errors.LinkDetach): - log.info("EventHubConsumer link detached. Shutting down.") - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.ConnectionClose): - log.info("EventHubConsumer connection closed. Shutting down.") - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.MessageHandlerError): - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.AMQPConnectionError): - log.info("EventHubConsumer connection lost. Shutting down.") - error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ - else ConnectError - error = error_type(str(exception), exception) - elif isinstance(exception, compat.TimeoutException): - log.info("EventHubConsumer timed out. Shutting down.") - error = ConnectionLostError(str(exception), exception) - else: - log.error("Unexpected error occurred (%r). Shutting down.", exception) - error = EventHubError("Receive failed: {}".format(exception), exception) - self.close(exception=error) - raise error - else: - log.info("EventHubConsumer has an exception (%r). Retrying...", exception) - if isinstance(exception, errors.AuthenticationException): - self._close_connection() - elif isinstance(exception, errors.LinkRedirect): - log.info("EventHubConsumer link redirected. Redirecting...") - redirect = exception - self._redirect(redirect) - elif isinstance(exception, errors.LinkDetach): - self._close_handler() - elif isinstance(exception, errors.ConnectionClose): - self._close_connection() - elif isinstance(exception, errors.MessageHandlerError): - self._close_handler() - elif isinstance(exception, errors.AMQPConnectionError): - self._close_connection() - elif isinstance(exception, compat.TimeoutException): - pass # Timeout doesn't need to recreate link or exception - else: - self._close_connection() + _handle_exception(exception, retry_count, max_retries, self, log) def _send_event_data(self): - self._open() max_retries = self.client.config.max_retries retry_count = 0 while True: try: if self.unsent_events: + self._open() self._handler.queue_message(*self.unsent_events) self._handler.wait() self.unsent_events = self._handler.pending_messages From 6e6c82798bf6c3032a0b149a28714fdd85b4aa90 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 16 Jul 2019 11:53:06 -0700 Subject: [PATCH 03/42] Shared connection (sync) test update --- .../azure-eventhubs/tests/test_negative.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index 1bf9855c80eb..ac19a01f76c9 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -26,7 +26,7 @@ def test_send_with_invalid_hostname(invalid_hostname, connstr_receivers): client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) sender = client.create_producer() with pytest.raises(AuthenticationError): - sender._open() + sender.send(EventData("test data")) @pytest.mark.liveTest @@ -34,7 +34,7 @@ def test_receive_with_invalid_hostname_sync(invalid_hostname): client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): - receiver._open() + receiver.receive(timeout=3) @pytest.mark.liveTest @@ -43,7 +43,7 @@ def test_send_with_invalid_key(invalid_key, connstr_receivers): client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) sender = client.create_producer() with pytest.raises(AuthenticationError): - sender._open() + sender.send(EventData("test data")) @pytest.mark.liveTest @@ -51,7 +51,7 @@ def test_receive_with_invalid_key_sync(invalid_key): client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): - receiver._open() + receiver.receive(timeout=3) @pytest.mark.liveTest @@ -60,7 +60,7 @@ def test_send_with_invalid_policy(invalid_policy, connstr_receivers): client = EventHubClient.from_connection_string(invalid_policy, network_tracing=False) sender = client.create_producer() with pytest.raises(AuthenticationError): - sender._open() + sender.send(EventData("test data")) @pytest.mark.liveTest @@ -68,7 +68,7 @@ def test_receive_with_invalid_policy_sync(invalid_policy): client = EventHubClient.from_connection_string(invalid_policy, network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): - receiver._open() + receiver.receive(timeout=3) @pytest.mark.liveTest @@ -90,7 +90,7 @@ def test_non_existing_entity_sender(connection_str): client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) sender = client.create_producer(partition_id="1") with pytest.raises(AuthenticationError): - sender._open() + sender.send(EventData("test data")) @pytest.mark.liveTest @@ -98,7 +98,7 @@ def test_non_existing_entity_receiver(connection_str): client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): - receiver._open() + receiver.receive(timeout=3) @pytest.mark.liveTest @@ -122,7 +122,7 @@ def test_send_to_invalid_partitions(connection_str): sender = client.create_producer(partition_id=p) try: with pytest.raises(ConnectError): - sender._open() + sender.send(EventData("test data")) finally: sender.close() From a86146e776d455795f53fe9f577188d1380e9a6b Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 17 Jul 2019 12:14:56 -0700 Subject: [PATCH 04/42] Shared connection --- .../azure/eventhub/_connection_manager.py | 25 +- .../eventhub/aio/_connection_manager_async.py | 77 +++++ .../azure/eventhub/aio/client_async.py | 45 ++- .../azure/eventhub/aio/consumer_async.py | 300 ++++-------------- .../azure/eventhub/aio/error_async.py | 79 +++++ .../azure/eventhub/aio/producer_async.py | 232 +++----------- .../azure-eventhubs/azure/eventhub/client.py | 30 +- .../azure/eventhub/consumer.py | 5 +- .../azure-eventhubs/azure/eventhub/error.py | 45 +-- .../azure/eventhub/producer.py | 31 +- 10 files changed, 365 insertions(+), 504 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py index 3e1a17a1051d..9600226df2fb 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py @@ -3,14 +3,14 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # -------------------------------------------------------------------------------------------- -import threading -from uamqp import Connection, TransportType +from threading import RLock +from uamqp import Connection, TransportType, c_uamqp class _SharedConnectionManager(object): def __init__(self, **kwargs): - self._lock = threading.Lock() - self._conn = None + self._lock = RLock() + self._conn = None # type: Connection self._container_id = kwargs.get("container_id") self._debug = kwargs.get("debug") @@ -42,12 +42,22 @@ def get_connection(self, host, auth): encoding=self._encoding) return self._conn - def close_connection(self, conn=None): + def close_connection(self): with self._lock: if self._conn: self._conn.destroy() self._conn = None + def reset_connection_if_broken(self): + with self._lock: + if self._conn and self._conn._state in ( + c_uamqp.ConnectionState.CLOSE_RCVD, + c_uamqp.ConnectionState.CLOSE_SENT, + c_uamqp.ConnectionState.DISCARDING, + c_uamqp.ConnectionState.END, + ): + self._conn = None + class _SeparateConnectionManager(object): def __init__(self, **kwargs): @@ -59,6 +69,9 @@ def get_connection(self, host, auth): def close_connection(self): pass + def reset_connection_if_broken(self): + pass + def get_connection_manager(**kwargs): - return _SeparateConnectionManager(**kwargs) + return _SharedConnectionManager(**kwargs) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py new file mode 100644 index 000000000000..bd54a2bc4e3a --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py @@ -0,0 +1,77 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from asyncio import Lock +from uamqp import TransportType, c_uamqp +from uamqp.async_ops import ConnectionAsync + + +class _SharedConnectionManager(object): + def __init__(self, **kwargs): + self._lock = Lock() + self._conn = None + + self._container_id = kwargs.get("container_id") + self._debug = kwargs.get("debug") + self._error_policy = kwargs.get("error_policy") + self._properties = kwargs.get("properties") + self._encoding = kwargs.get("encoding") or "UTF-8" + self._transport_type = kwargs.get('transport_type') or TransportType.Amqp + self._http_proxy = kwargs.get('http_proxy') + self._max_frame_size = kwargs.get("max_frame_size") + self._channel_max = kwargs.get("channel_max") + self._idle_timeout = kwargs.get("idle_timeout") + self._remote_idle_timeout_empty_frame_send_ratio = kwargs.get("remote_idle_timeout_empty_frame_send_ratio") + + async def get_connection(self, host, auth): + # type: (...) -> ConnectionAsync + async with self._lock: + if self._conn is None: + self._conn = ConnectionAsync( + host, + auth, + container_id=self._container_id, + max_frame_size=self._max_frame_size, + channel_max=self._channel_max, + idle_timeout=self._idle_timeout, + properties=self._properties, + remote_idle_timeout_empty_frame_send_ratio=self._remote_idle_timeout_empty_frame_send_ratio, + error_policy=self._error_policy, + debug=self._debug, + encoding=self._encoding) + return self._conn + + async def close_connection(self): + async with self._lock: + if self._conn: + await self._conn.destroy_async() + self._conn = None + + def reset_connection_if_broken(self): + with self._lock: + if self._conn and self._conn._state in ( + c_uamqp.ConnectionState.CLOSE_RCVD, + c_uamqp.ConnectionState.CLOSE_SENT, + c_uamqp.ConnectionState.DISCARDING, + c_uamqp.ConnectionState.END, + ): + self._conn = None + +class _SeparateConnectionManager(object): + def __init__(self, **kwargs): + pass + + async def get_connection(self, host, auth): + pass # return None + + async def close_connection(self): + pass + + def reset_connection_if_broken(self): + pass + + +def get_connection_manager(**kwargs): + return _SharedConnectionManager(**kwargs) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py index f552cb0a167b..0141923f9fe3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py @@ -22,6 +22,8 @@ from .producer_async import EventHubProducer from .consumer_async import EventHubConsumer +from ._connection_manager_async import get_connection_manager +from .error_async import _handle_exception log = logging.getLogger(__name__) @@ -42,6 +44,16 @@ class EventHubClient(EventHubClientAbstract): """ + def __init__(self, host, event_hub_path, credential, **kwargs): + super(EventHubClient, self).__init__(host, event_hub_path, credential, **kwargs) + self._conn_manager = get_connection_manager(**kwargs) + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + def _create_auth(self, username=None, password=None): """ Create an ~uamqp.authentication.cbs_auth_async.SASTokenAuthAsync instance to authenticate @@ -85,17 +97,21 @@ def _create_auth(self, username=None, password=None): get_jwt_token, http_proxy=http_proxy, transport_type=transport_type) + async def _handle_exception(self, exception, retry_count, max_retries): + await _handle_exception(exception, retry_count, max_retries, self, log) + + async def _close_connection(self): + self._conn_manager.reset_connection_if_broken() + async def _management_request(self, mgmt_msg, op_type): - alt_creds = { - "username": self._auth_config.get("iot_username"), - "password": self._auth_config.get("iot_password")} - connect_count = 0 + max_retries = self.config.max_retries + retry_count = 0 while True: - connect_count += 1 - mgmt_auth = self._create_auth(**alt_creds) + mgmt_auth = self._create_auth() mgmt_client = AMQPClientAsync(self.mgmt_target, auth=mgmt_auth, debug=self.config.network_tracing) try: - await mgmt_client.open_async() + conn = await self._conn_manager.get_connection(self.host, mgmt_auth) + await mgmt_client.open_async(connection=conn) response = await mgmt_client.mgmt_request_async( mgmt_msg, constants.READ_OPERATION, @@ -103,15 +119,9 @@ async def _management_request(self, mgmt_msg, op_type): status_code_field=b'status-code', description_fields=b'status-description') return response - except (errors.AMQPConnectionError, errors.TokenAuthFailure, compat.TimeoutException) as failure: - if connect_count >= self.config.max_retries: - err = ConnectError( - "Can not connect to EventHubs or get management info from the service. " - "Please make sure the connection string or token is correct and retry. " - "Besides, this method doesn't work if you use an IoT connection string.", - failure - ) - raise err + except Exception as exception: + await self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 finally: await mgmt_client.close_async() @@ -263,3 +273,6 @@ def create_producer( handler = EventHubProducer( self, target, partition=partition_id, send_timeout=send_timeout, loop=loop) return handler + + async def close(self): + await self._conn_manager.close_connection() \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index 6cf020176d96..203cdd9882bd 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -12,6 +12,7 @@ from azure.eventhub import EventData, EventPosition from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, _error_handler +from ..aio.error_async import _handle_exception log = logging.getLogger(__name__) @@ -71,23 +72,9 @@ def __init__( # pylint: disable=super-init-not-called self.properties = None partition = self.source.split('/')[-1] self.name = "EHReceiver-{}-partition{}".format(uuid.uuid4(), partition) - source = Source(self.source) - if self.offset is not None: - source.set_filter(self.offset._selector()) # pylint: disable=protected-access if owner_level: self.properties = {types.AMQPSymbol(self._epoch): types.AMQPLong(int(owner_level))} - self._handler = ReceiveClientAsync( - source, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - prefetch=self.prefetch, - link_properties=self.properties, - timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent), # pylint: disable=protected-access - loop=self.loop) + self._handler = None async def __aenter__(self): return self @@ -99,74 +86,52 @@ def __aiter__(self): return self async def __anext__(self): - await self._open() max_retries = self.client.config.max_retries - connecting_count = 0 + retry_count = 0 while True: - connecting_count += 1 try: + await self._open() if not self.messages_iter: self.messages_iter = self._handler.receive_messages_iter_async() message = await self.messages_iter.__anext__() event_data = EventData(message=message) self.offset = EventPosition(event_data.offset, inclusive=False) return event_data - except errors.AuthenticationException as auth_error: - if connecting_count < max_retries: - log.info("EventHubConsumer disconnected due to token error. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer authentication failed. Shutting down.") - error = AuthenticationError(str(auth_error), auth_error) - await self.close(auth_error) - raise error - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry and self.auto_reconnect: - log.info("EventHubConsumer detached. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer detached. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(error) - raise error - except errors.AMQPConnectionError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer connection lost. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer connection lost. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(error) - raise error - except compat.TimeoutException as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer timed out receiving event data. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer timed out. Shutting down.") - await self.close(shutdown) - raise ConnectionLostError(str(shutdown), shutdown) - except StopAsyncIteration: - raise - except Exception as e: - log.error("Unexpected error occurred (%r). Shutting down.", e) - error = EventHubError("Receive failed: {}".format(e), e) - await self.close(exception=error) - raise error + except Exception as exception: + await self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 def _check_closed(self): if self.error: - raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.", - self.error) + raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.") + + def _create_handler(self): + alt_creds = { + "username": self.client._auth_config.get("iot_username"), + "password": self.client._auth_config.get("iot_password")} + source = Source(self.source) + if self.offset is not None: + source.set_filter(self.offset._selector()) + self._handler = ReceiveClientAsync( + source, + auth=self.client.get_auth(**alt_creds), + debug=self.client.config.network_tracing, + prefetch=self.prefetch, + link_properties=self.properties, + timeout=self.timeout, + error_policy=self.retry_policy, + keep_alive_interval=self.keep_alive, + client_name=self.name, + properties=self.client._create_properties( + self.client.config.user_agent), # pylint: disable=protected-access + loop=self.loop) + self.messages_iter = None + + async def _redirect(self, redirect): + self.redirected = redirect + self.running = False + self.messages_iter = None + await self._close_connection() async def _open(self): """ @@ -176,121 +141,34 @@ async def _open(self): """ # pylint: disable=protected-access - self._check_closed() - if self.redirected: - self.source = self.redirected.address - source = Source(self.source) - if self.offset is not None: - source.set_filter(self.offset._selector()) # pylint: disable=protected-access - alt_creds = { - "username": self.client._auth_config.get("iot_username"), - "password":self.client._auth_config.get("iot_password")} - self._handler = ReceiveClientAsync( - source, - auth=self.client.get_auth(**alt_creds), - debug=self.client.config.network_tracing, - prefetch=self.prefetch, - link_properties=self.properties, - timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent), # pylint: disable=protected-access - loop=self.loop) if not self.running: - await self._connect() + if self.redirected: + self.client._process_redirect_uri(self.redirected) + self.source = self.redirected.address + alt_creds = { + "username": self.client._auth_config.get("iot_username"), + "password": self.client._auth_config.get("iot_password")} + else: + alt_creds = {} + self._create_handler() + await self._handler.open_async(connection=await self.client._conn_manager.get_connection( + self.client.address.hostname, + self.client.get_auth(**alt_creds) + )) + while not await self._handler.client_ready_async(): + await asyncio.sleep(0.05) self.running = True - async def _connect(self): - connected = await self._build_connection() - if not connected: - await asyncio.sleep(self.reconnect_backoff) - while not await self._build_connection(is_reconnect=True): - await asyncio.sleep(self.reconnect_backoff) + async def _close_handler(self): + await self._handler.close_async() # close the link (sharing connection) or connection (not sharing) + self.running = False - async def _build_connection(self, is_reconnect=False): # pylint: disable=too-many-statements - # pylint: disable=protected-access - if is_reconnect: - alt_creds = { - "username": self.client._auth_config.get("iot_username"), - "password":self.client._auth_config.get("iot_password")} - await self._handler.close_async() - source = Source(self.source) - if self.offset is not None: - source.set_filter(self.offset._selector()) # pylint: disable=protected-access - self._handler = ReceiveClientAsync( - source, - auth=self.client.get_auth(**alt_creds), - debug=self.client.config.network_tracing, - prefetch=self.prefetch, - link_properties=self.properties, - timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent), # pylint: disable=protected-access - loop=self.loop) - self.messages_iter = None - try: - await self._handler.open_async() - while not await self._handler.client_ready_async(): - await asyncio.sleep(0.05) - return True - except errors.AuthenticationException as shutdown: - if is_reconnect: - log.info("EventHubConsumer couldn't authenticate. Shutting down. (%r)", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubConsumer couldn't authenticate. Attempting reconnect.") - return False - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry: - log.info("EventHubConsumer detached. Attempting reconnect.") - return False - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if is_reconnect: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubConsumer detached. Attempting reconnect.") - return False - except errors.AMQPConnectionError as shutdown: - if is_reconnect: - log.info("EventHubConsumer connection error (%r). Shutting down.", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubConsumer couldn't authenticate. Attempting reconnect.") - return False - except compat.TimeoutException as shutdown: - if is_reconnect: - log.info("EventHubConsumer authentication timed out. Shutting down.") - error = AuthenticationError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubConsumer authentication timed out. Attempting reconnect.") - return False - except Exception as e: - log.error("Unexpected error occurred when building connection (%r). Shutting down.", e) - error = EventHubError("Unexpected error occurred when building connection", e) - await self.close(exception=error) - raise error - - async def _reconnect(self): - """If the EventHubConsumer was disconnected from the service with - a retryable error - attempt to reconnect.""" - return await self._build_connection(is_reconnect=True) + async def _close_connection(self): + await self._close_handler() + self.client._conn_manager.reset_connection_if_broken() + + async def _handle_exception(self, exception, retry_count, max_retries): + await _handle_exception(exception, retry_count, max_retries, self, log) @property def queue_size(self): @@ -333,17 +211,15 @@ async def receive(self, max_batch_size=None, timeout=None): """ self._check_closed() - await self._open() - max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size timeout = self.client.config.receive_timeout if timeout is None else timeout data_batch = [] max_retries = self.client.config.max_retries - connecting_count = 0 + retry_count = 0 while True: - connecting_count += 1 try: + await self._open() timeout_ms = 1000 * timeout if timeout else 0 message_batch = await self._handler.receive_message_batch_async( max_batch_size=max_batch_size, @@ -353,55 +229,9 @@ async def receive(self, max_batch_size=None, timeout=None): self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch - except errors.AuthenticationException as auth_error: - if connecting_count < max_retries: - log.info("EventHubConsumer disconnected due to token error. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer authentication failed. Shutting down.") - error = AuthenticationError(str(auth_error), auth_error) - await self.close(auth_error) - raise error - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry and self.auto_reconnect: - log.info("EventHubConsumer detached. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer detached. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(error) - raise error - except errors.AMQPConnectionError as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer connection lost. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer connection lost. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(error) - raise error - except compat.TimeoutException as shutdown: - if connecting_count < max_retries: - log.info("EventHubConsumer timed out receiving event data. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubConsumer timed out. Shutting down.") - await self.close(shutdown) - raise ConnectionLostError(str(shutdown), shutdown) - except Exception as e: - log.info("Unexpected error occurred (%r). Shutting down.", e) - error = EventHubError("Receive failed: {}".format(e), e) - await self.close(exception=error) - raise error + except Exception as exception: + await self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 async def close(self, exception=None): # type: (Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py new file mode 100644 index 000000000000..500dde08e8a7 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py @@ -0,0 +1,79 @@ +from uamqp import errors, compat +from ..error import EventHubError, EventDataSendError, \ + EventDataError, ConnectError, ConnectionLostError, AuthenticationError + + +async def _handle_exception(exception, retry_count, max_retries, closable, log): + type_name = type(closable).__name__ + if isinstance(exception, KeyboardInterrupt): + log.info("{} stops due to keyboard interrupt".format(type_name)) + await closable.close() + raise + + elif isinstance(exception, ( + errors.MessageAccepted, + errors.MessageAlreadySettled, + errors.MessageModified, + errors.MessageRejected, + errors.MessageReleased, + errors.MessageContentTooLarge) + ): + log.error("Event data error (%r)", exception) + error = EventDataError(str(exception), exception) + await closable.close(exception) + raise error + elif isinstance(exception, errors.MessageException): + log.error("Event data send error (%r)", exception) + error = EventDataSendError(str(exception), exception) + await closable.close(exception) + raise error + elif retry_count >= max_retries: + log.info("{} has an error and has exhausted retrying. (%r)".format(type_name), exception) + if isinstance(exception, errors.AuthenticationException): + log.info("{} authentication failed. Shutting down.".format(type_name)) + error = AuthenticationError(str(exception), exception) + elif isinstance(exception, errors.VendorLinkDetach): + log.info("{} link detached. Shutting down.".format(type_name)) + error = ConnectError(str(exception), exception) + elif isinstance(exception, errors.LinkDetach): + log.info("{} link detached. Shutting down.".format(type_name)) + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.ConnectionClose): + log.info("{} connection closed. Shutting down.".format(type_name)) + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.MessageHandlerError): + log.info("{} detached. Shutting down.".format(type_name)) + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.AMQPConnectionError): + log.info("{} connection lost. Shutting down.".format(type_name)) + error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ + else ConnectError + error = error_type(str(exception), exception) + elif isinstance(exception, compat.TimeoutException): + log.info("{} timed out. Shutting down.".format(type_name)) + error = ConnectionLostError(str(exception), exception) + else: + log.error("Unexpected error occurred (%r). Shutting down.", exception) + error = EventHubError("Receive failed: {}".format(exception), exception) + await closable.close() + raise error + else: + log.info("{} has an exception (%r). Retrying...".format(type_name), exception) + if isinstance(exception, errors.AuthenticationException): + await closable._close_connection() + elif isinstance(exception, errors.LinkRedirect): + log.info("{} link redirected. Redirecting...".format(type_name)) + redirect = exception + await closable._redirect(redirect) + elif isinstance(exception, errors.LinkDetach): + await closable._close_handler() + elif isinstance(exception, errors.ConnectionClose): + await closable._close_connection() + elif isinstance(exception, errors.MessageHandlerError): + await closable._close_handler() + elif isinstance(exception, errors.AMQPConnectionError): + await closable._close_connection() + elif isinstance(exception, compat.TimeoutException): + pass # Timeout doesn't need to recreate link or connection to retry + else: + await closable._close_connection() \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index aef8dc50ff02..07237efe5936 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -13,6 +13,8 @@ from azure.eventhub.common import EventData, _BatchSendEventData from azure.eventhub.error import EventHubError, ConnectError, \ AuthenticationError, EventDataError, EventDataSendError, ConnectionLostError, _error_handler +from .error_async import _handle_exception +from ..producer import _error, _set_partition_key log = logging.getLogger(__name__) @@ -68,6 +70,17 @@ def __init__( # pylint: disable=super-init-not-called if partition: self.target += "/Partitions/" + partition self.name += "-partition{}".format(partition) + self._handler = None + self._outcome = None + self._condition = None + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close(exc_val) + + def _create_handler(self): self._handler = SendClientAsync( self.target, auth=self.client.get_auth(), @@ -76,16 +89,14 @@ def __init__( # pylint: disable=super-init-not-called error_policy=self.retry_policy, keep_alive_interval=self.keep_alive, client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent), # pylint: disable=protected-access + properties=self.client._create_properties( + self.client.config.user_agent), # pylint: disable=protected-access loop=self.loop) - self._outcome = None - self._condition = None - - async def __aenter__(self): - return self - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close(exc_val) + async def _redirect(self, redirect): + self.redirected = redirect + self.running = False + await self._close_connection() async def _open(self): """ @@ -94,188 +105,49 @@ async def _open(self): context will be used to create a new handler before opening it. """ - if self.redirected: - self.target = self.redirected.address - self._handler = SendClientAsync( - self.target, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - msg_timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent), # pylint: disable=protected-access - loop=self.loop) if not self.running: - await self._connect() + if self.redirected: + self.target = self.redirected.address + self._create_handler() + await self._handler.open_async(connection=await self.client._conn_manager.get_connection( + self.client.address.hostname, + self.client.get_auth() + )) + while not await self._handler.client_ready_async(): + await asyncio.sleep(0.05) self.running = True - async def _connect(self): - connected = await self._build_connection() - if not connected: - await asyncio.sleep(self.reconnect_backoff) - while not await self._build_connection(is_reconnect=True): - await asyncio.sleep(self.reconnect_backoff) - - async def _build_connection(self, is_reconnect=False): - """ + async def _close_handler(self): + await self._handler.close_async() # close the link (sharing connection) or connection (not sharing) + self.running = False - :param is_reconnect: True - trying to reconnect after fail to connect or a connection is lost. - False - the 1st time to connect - :return: True - connected. False - not connected - """ - # pylint: disable=protected-access - if is_reconnect: - await self._handler.close_async() - self._handler = SendClientAsync( - self.target, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - msg_timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent), - loop=self.loop) - try: - await self._handler.open_async() - while not await self._handler.client_ready_async(): - await asyncio.sleep(0.05) - return True - except errors.AuthenticationException as shutdown: - if is_reconnect: - log.info("EventHubProducer couldn't authenticate. Shutting down. (%r)", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubProducer couldn't authenticate. Attempting reconnect.") - return False - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry: - log.info("EventHubProducer detached. Attempting reconnect.") - return False - else: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if is_reconnect: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubProducer detached. Attempting reconnect.") - return False - except errors.AMQPConnectionError as shutdown: - if is_reconnect: - log.info("EventHubProducer connection error (%r). Shutting down.", shutdown) - error = AuthenticationError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubProducer couldn't authenticate. Attempting reconnect.") - return False - except compat.TimeoutException as shutdown: - if is_reconnect: - log.info("EventHubProducer authentication timed out. Shutting down.") - error = AuthenticationError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - else: - log.info("EventHubProducer authentication timed out. Attempting reconnect.") - return False - except Exception as e: - log.info("Unexpected error occurred when building connection (%r). Shutting down.", e) - error = EventHubError("Unexpected error occurred when building connection", e) - await self.close(exception=error) - raise error + async def _close_connection(self): + await self._close_handler() + await self.client._conn_manager.close_connection() # close the shared connection. - async def _reconnect(self): - return await self._build_connection(is_reconnect=True) + async def _handle_exception(self, exception, retry_count, max_retries): + await _handle_exception(exception, retry_count, max_retries, self, log) async def _send_event_data(self): - await self._open() max_retries = self.client.config.max_retries - connecting_count = 0 + retry_count = 0 while True: - connecting_count += 1 try: if self.unsent_events: + await self._open() self._handler.queue_message(*self.unsent_events) await self._handler.wait_async() self.unsent_events = self._handler.pending_messages if self._outcome != constants.MessageSendResult.Ok: - EventHubProducer._error(self._outcome, self._condition) + _error(self._outcome, self._condition) return - except (errors.MessageAccepted, - errors.MessageAlreadySettled, - errors.MessageModified, - errors.MessageRejected, - errors.MessageReleased, - errors.MessageContentTooLarge) as msg_error: - raise EventDataError(str(msg_error), msg_error) - except errors.MessageException as failed: - log.error("Send event data error (%r)", failed) - error = EventDataSendError(str(failed), failed) - await self.close(exception=error) - raise error - except errors.AuthenticationException as auth_error: - if connecting_count < max_retries: - log.info("EventHubProducer disconnected due to token error. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubProducer authentication failed. Shutting down.") - error = AuthenticationError(str(auth_error), auth_error) - await self.close(auth_error) - raise error - except (errors.LinkDetach, errors.ConnectionClose) as shutdown: - if shutdown.action.retry: - log.info("EventHubProducer detached. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(exception=error) - raise error - except errors.MessageHandlerError as shutdown: - if connecting_count < max_retries: - log.info("EventHubProducer detached. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubProducer detached. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(error) - raise error - except errors.AMQPConnectionError as shutdown: - if connecting_count < max_retries: - log.info("EventHubProducer connection lost. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubProducer connection lost. Shutting down.") - error = ConnectionLostError(str(shutdown), shutdown) - await self.close(error) - raise error - except compat.TimeoutException as shutdown: - if connecting_count < max_retries: - log.info("EventHubProducer timed out sending event data. Attempting reconnect.") - await self._reconnect() - else: - log.info("EventHubProducer timed out. Shutting down.") - await self.close(shutdown) - raise ConnectionLostError(str(shutdown), shutdown) - except Exception as e: - log.info("Unexpected error occurred (%r). Shutting down.", e) - error = EventHubError("Send failed: {}".format(e), e) - await self.close(exception=error) - raise error + except Exception as exception: + await self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 def _check_closed(self): if self.error: - raise EventHubError("This producer has been closed. Please create a new producer to send event data.", - self.error) + raise EventHubError("This producer has been closed. Please create a new producer to send event data.") def _on_outcome(self, outcome, condition): """ @@ -289,20 +161,8 @@ def _on_outcome(self, outcome, condition): self._outcome = outcome self._condition = condition - @staticmethod - def _error(outcome, condition): - if outcome != constants.MessageSendResult.Ok: - raise condition - - @staticmethod - def _set_partition_key(event_datas, partition_key): - ed_iter = iter(event_datas) - for ed in ed_iter: - ed._set_partition_key(partition_key) - yield ed - async def send(self, event_data, partition_key=None): - # type:(Union[EventData, Union[List[EventData], Iterator[EventData], Generator[EventData]]], Union[str, bytes]) -> None + # type:(Union[EventData, Iterable[EventData]], Union[str, bytes]) -> None """ Sends an event data and blocks until acknowledgement is received or operation times out. @@ -332,7 +192,7 @@ async def send(self, event_data, partition_key=None): event_data._set_partition_key(partition_key) wrapper_event_data = event_data else: - event_data_with_pk = self._set_partition_key(event_data, partition_key) + event_data_with_pk = _set_partition_key(event_data, partition_key) wrapper_event_data = _BatchSendEventData( event_data_with_pk, partition_key=partition_key) if partition_key else _BatchSendEventData(event_data) @@ -373,4 +233,4 @@ async def close(self, exception=None): self.error = EventHubError(str(exception)) else: self.error = EventHubError("This send handler is now closed.") - await self._handler.close_async() \ No newline at end of file + await self._handler.close_async() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index a88beebdc275..65f22b19662a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -28,7 +28,7 @@ from .client_abstract import EventHubClientAbstract from .common import EventHubSASTokenCredential, EventHubSharedKeyCredential from ._connection_manager import get_connection_manager - +from .error import _handle_exception log = logging.getLogger(__name__) @@ -50,10 +50,6 @@ class EventHubClient(EventHubClientAbstract): def __init__(self, host, event_hub_path, credential, **kwargs): super(EventHubClient, self).__init__(host, event_hub_path, credential, **kwargs) - alt_creds = { - "username": self._auth_config.get("iot_username"), - "password": self._auth_config.get("iot_password") - } self._conn_manager = get_connection_manager(**kwargs) def __del__(self): @@ -110,10 +106,16 @@ def _create_auth(self, username=None, password=None): get_jwt_token, http_proxy=http_proxy, transport_type=transport_type) + def _handle_exception(self, exception, retry_count, max_retries): + _handle_exception(exception, retry_count, max_retries, self, log) + + def _close_connection(self): + self._conn_manager.reset_connection_if_broken() + def _management_request(self, mgmt_msg, op_type): + max_retries = self.config.max_retries retry_count = 0 while retry_count <= self.config.max_retries: - retry_count += 1 mgmt_auth = self._create_auth() mgmt_client = uamqp.AMQPClient(self.mgmt_target) try: @@ -126,19 +128,9 @@ def _management_request(self, mgmt_msg, op_type): status_code_field=b'status-code', description_fields=b'status-description') return response - except (errors.AMQPConnectionError, errors.TokenAuthFailure, compat.TimeoutException) as failure: - if retry_count >= self.config.max_retries: - err = ConnectError( - "Can not connect to EventHubs or get management info from the service. " - "Please make sure the connection string or token is correct and retry. " - "Besides, this method doesn't work if you use an IoT connection string.", - failure - ) - raise err - except Exception as failure: - if retry_count >= self.config.max_retries: - err = EventHubError("Unexpected error happened during management request", failure) - raise err + except Exception as exception: + self._handle_exception(exception, retry_count, max_retries) + retry_count += 1 finally: mgmt_client.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index b8fc6ff4a2ea..dde3a456311b 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -102,8 +102,7 @@ def __next__(self): def _check_closed(self): if self.error: - raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.", - self.error) + raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.") def _create_handler(self): alt_creds = { @@ -164,7 +163,7 @@ def _close_handler(self): def _close_connection(self): self._close_handler() - self.client._conn_manager.close_connection() # close the shared connection. + self.client._conn_manager.reset_connection_if_broken() def _handle_exception(self, exception, retry_count, max_retries): _handle_exception(exception, retry_count, max_retries, self, log) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py index d15dabf0c051..db8ae4794e1e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py @@ -131,8 +131,9 @@ class EventDataSendError(EventHubError): def _handle_exception(exception, retry_count, max_retries, closable, log): + type_name = type(closable).__name__ if isinstance(exception, KeyboardInterrupt): - log.info("EventHubConsumer stops due to keyboard interrupt") + log.info("{} stops due to keyboard interrupt".format(type_name)) closable.close() raise elif isinstance(exception, ( @@ -153,52 +154,58 @@ def _handle_exception(exception, retry_count, max_retries, closable, log): closable.close(exception) raise error elif retry_count >= max_retries: - log.info("EventHubConsumer has an error and has exhausted retrying. (%r)", exception) + log.info("{} has an error and has exhausted retrying. (%r)".format(type_name), exception) if isinstance(exception, errors.AuthenticationException): - log.info("EventHubConsumer authentication failed. Shutting down.") + log.info("{} authentication failed. Shutting down.".format(type_name)) error = AuthenticationError(str(exception), exception) elif isinstance(exception, errors.VendorLinkDetach): - log.info("EventHubConsumer link detached. Shutting down.") + log.info("{} link detached. Shutting down.".format(type_name)) error = ConnectError(str(exception), exception) elif isinstance(exception, errors.LinkDetach): - log.info("EventHubConsumer link detached. Shutting down.") + log.info("{} link detached. Shutting down.".format(type_name)) error = ConnectionLostError(str(exception), exception) elif isinstance(exception, errors.ConnectionClose): - log.info("EventHubConsumer connection closed. Shutting down.") + log.info("{} connection closed. Shutting down.".format(type_name)) error = ConnectionLostError(str(exception), exception) elif isinstance(exception, errors.MessageHandlerError): - log.info("EventHubConsumer detached. Shutting down.") + log.info("{} detached. Shutting down.".format(type_name)) error = ConnectionLostError(str(exception), exception) elif isinstance(exception, errors.AMQPConnectionError): - log.info("EventHubConsumer connection lost. Shutting down.") + log.info("{} connection lost. Shutting down.".format(type_name)) error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ else ConnectError error = error_type(str(exception), exception) elif isinstance(exception, compat.TimeoutException): - log.info("EventHubConsumer timed out. Shutting down.") + log.info("{} timed out. Shutting down.".format(type_name)) error = ConnectionLostError(str(exception), exception) else: log.error("Unexpected error occurred (%r). Shutting down.", exception) error = EventHubError("Receive failed: {}".format(exception), exception) - closable.close(exception=error) + closable.close() raise error else: - log.info("EventHubConsumer has an exception (%r). Retrying...", exception) + log.info("{} has an exception (%r). Retrying...".format(type_name), exception) if isinstance(exception, errors.AuthenticationException): closable._close_connection() elif isinstance(exception, errors.LinkRedirect): - log.info("EventHubConsumer link redirected. Redirecting...") + log.info("{} link redirected. Redirecting...".format(type_name)) redirect = exception - closable._redirect(redirect) + if hasattr(closable, "_redirect"): + closable._redirect(redirect) elif isinstance(exception, errors.LinkDetach): - closable._close_handler() + if hasattr(closable, "_close_handler"): + closable._close_handler() elif isinstance(exception, errors.ConnectionClose): - closable._close_connection() + if hasattr(closable, "_close_connection"): + closable._close_connection() elif isinstance(exception, errors.MessageHandlerError): - closable._close_handler() + if hasattr(closable, "_close_handler"): + closable._close_handler() elif isinstance(exception, errors.AMQPConnectionError): - closable._close_connection() + if hasattr(closable, "_close_connection"): + closable._close_connection() elif isinstance(exception, compat.TimeoutException): - pass # Timeout doesn't need to recreate link or exception + pass # Timeout doesn't need to recreate link or connection to retry else: - closable._close_connection() + if hasattr(closable, "_close_connection"): + closable._close_connection() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index cfc32c7157cd..3f3ff4c9492e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -67,15 +67,7 @@ def __init__(self, client, target, partition=None, send_timeout=60, keep_alive=N if partition: self.target += "/Partitions/" + partition self.name += "-partition{}".format(partition) - self._handler = SendClient( - self.target, - auth=self.client.get_auth(), - debug=self.client.config.network_tracing, - msg_timeout=self.timeout, - error_policy=self.retry_policy, - keep_alive_interval=self.keep_alive, - client_name=self.name, - properties=self.client._create_properties(self.client.config.user_agent)) # pylint: disable=protected-access + self._handler = None self._outcome = None self._condition = None @@ -99,7 +91,6 @@ def _create_handler(self): def _redirect(self, redirect): self.redirected = redirect self.running = False - self.messages_iter = None self._close_connection() def _open(self): @@ -128,7 +119,7 @@ def _close_handler(self): def _close_connection(self): self._close_handler() - self.client._conn_manager.close_connection() # close the shared connection. + self.client._conn_manager.reset_connection_if_broken() def _handle_exception(self, exception, retry_count, max_retries): _handle_exception(exception, retry_count, max_retries, self, log) @@ -152,14 +143,7 @@ def _send_event_data(self): def _check_closed(self): if self.error: - raise EventHubError("This producer has been closed. Please create a new producer to send event data.", self.error) - - @staticmethod - def _set_partition_key(event_datas, partition_key): - ed_iter = iter(event_datas) - for ed in ed_iter: - ed._set_partition_key(partition_key) - yield ed + raise EventHubError("This producer has been closed. Please create a new producer to send event data.") def _on_outcome(self, outcome, condition): """ @@ -205,7 +189,7 @@ def send(self, event_data, partition_key=None): event_data._set_partition_key(partition_key) wrapper_event_data = event_data else: - event_data_with_pk = self._set_partition_key(event_data, partition_key) + event_data_with_pk = _set_partition_key(event_data, partition_key) wrapper_event_data = _BatchSendEventData( event_data_with_pk, partition_key=partition_key) if partition_key else _BatchSendEventData(event_data) @@ -250,3 +234,10 @@ def close(self, exception=None): def _error(outcome, condition): if outcome != constants.MessageSendResult.Ok: raise condition + + +def _set_partition_key(event_datas, partition_key): + ed_iter = iter(event_datas) + for ed in ed_iter: + ed._set_partition_key(partition_key) + yield ed From c5a23c8953644ec7f50d083cd7ccfa4510b71dce Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 17 Jul 2019 12:24:29 -0700 Subject: [PATCH 05/42] Fix an issue --- .../azure/eventhub/aio/error_async.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py index 500dde08e8a7..3afea84c3904 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py @@ -60,20 +60,27 @@ async def _handle_exception(exception, retry_count, max_retries, closable, log): else: log.info("{} has an exception (%r). Retrying...".format(type_name), exception) if isinstance(exception, errors.AuthenticationException): - await closable._close_connection() + if hasattr(closable, "_close_connection"): + await closable._close_connection() elif isinstance(exception, errors.LinkRedirect): log.info("{} link redirected. Redirecting...".format(type_name)) redirect = exception - await closable._redirect(redirect) + if hasattr(closable, "_redirect"): + await closable._redirect(redirect) elif isinstance(exception, errors.LinkDetach): - await closable._close_handler() + if hasattr(closable, "_close_handler"): + await closable._close_handler() elif isinstance(exception, errors.ConnectionClose): - await closable._close_connection() + if hasattr(closable, "_close_connection"): + await closable._close_connection() elif isinstance(exception, errors.MessageHandlerError): - await closable._close_handler() + if hasattr(closable, "_close_handler"): + await closable._close_handler() elif isinstance(exception, errors.AMQPConnectionError): - await closable._close_connection() + if hasattr(closable, "_close_connection"): + await closable._close_connection() elif isinstance(exception, compat.TimeoutException): pass # Timeout doesn't need to recreate link or connection to retry else: - await closable._close_connection() \ No newline at end of file + if hasattr(closable, "_close_connection"): + await closable._close_connection() From b83264d16408c3bdaed215a5f4ae2c307705d3bf Mon Sep 17 00:00:00 2001 From: yijxie Date: Sun, 21 Jul 2019 23:25:19 -0700 Subject: [PATCH 06/42] add retry exponential delay and timeout to exception handling --- .../eventhub/_consumer_producer_mixin.py | 109 +++++++++++++++++ .../eventhub/aio/_connection_manager_async.py | 5 +- .../aio/_consumer_producer_mixin_async.py | 112 ++++++++++++++++++ .../azure/eventhub/aio/client_async.py | 2 +- .../azure/eventhub/aio/consumer_async.py | 75 +++++------- .../azure/eventhub/aio/error_async.py | 100 ++++++++++------ .../azure/eventhub/aio/producer_async.py | 98 ++++++--------- .../azure-eventhubs/azure/eventhub/client.py | 3 - .../azure-eventhubs/azure/eventhub/common.py | 1 + .../azure/eventhub/configuration.py | 6 +- .../azure/eventhub/consumer.py | 93 +++++---------- .../azure-eventhubs/azure/eventhub/error.py | 103 ++++++++++------ .../azure/eventhub/producer.py | 95 ++++++--------- 13 files changed, 481 insertions(+), 321 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py new file mode 100644 index 000000000000..9ac6fb468945 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -0,0 +1,109 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from __future__ import unicode_literals + +import logging +import time + +from uamqp import errors +from azure.eventhub.error import EventHubError, _handle_exception + +log = logging.getLogger(__name__) + + +class ConsumerProducerMixin(object): + def __init__(self): + self.client = None + self._handler = None + self.name = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close(exc_val) + + def _check_closed(self): + if self.error: + raise EventHubError("{} has been closed. Please create a new consumer to receive event data.".format(self.name)) + + def _create_handler(self): + pass + + def _redirect(self, redirect): + self.redirected = redirect + self.running = False + self._close_connection() + + def _open(self, timeout_time=None): + """ + Open the EventHubConsumer using the supplied connection. + If the handler has previously been redirected, the redirect + context will be used to create a new handler before opening it. + + """ + # pylint: disable=protected-access + if not self.running: + if self.redirected: + alt_creds = { + "username": self.client._auth_config.get("iot_username"), + "password": self.client._auth_config.get("iot_password")} + else: + alt_creds = {} + self._create_handler() + self._handler.open(connection=self.client._conn_manager.get_connection( + self.client.address.hostname, + self.client.get_auth(**alt_creds) + )) + while not self._handler.client_ready(): + if timeout_time and time.time() >= timeout_time: + return + time.sleep(0.05) + self.running = True + + def _close_handler(self): + self._handler.close() # close the link (sharing connection) or connection (not sharing) + self.running = False + + def _close_connection(self): + self._close_handler() + self.client._conn_manager.reset_connection_if_broken() + + def _handle_exception(self, exception, retry_count, max_retries, timeout_time): + _handle_exception(exception, retry_count, max_retries, self, timeout_time) + + def close(self, exception=None): + # type:(Exception) -> None + """ + Close down the handler. If the handler has already closed, + this will be a no op. An optional exception can be passed in to + indicate that the handler was shutdown due to error. + + :param exception: An optional exception if the handler is closing + due to an error. + :type exception: Exception + + Example: + .. literalinclude:: ../examples/test_examples_eventhub.py + :start-after: [START eventhub_client_receiver_close] + :end-before: [END eventhub_client_receiver_close] + :language: python + :dedent: 4 + :caption: Close down the handler. + + """ + self.running = False + if self.error: + return + if isinstance(exception, errors.LinkRedirect): + self.redirected = exception + elif isinstance(exception, EventHubError): + self.error = exception + elif exception: + self.error = EventHubError(str(exception)) + else: + self.error = EventHubError("{} handler is closed.".format(self.name)) + if self._handler: + self._handler.close() # this will close link if sharing connection. Otherwise close connection diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py index bd54a2bc4e3a..3178e1fb72a7 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_connection_manager_async.py @@ -49,8 +49,8 @@ async def close_connection(self): await self._conn.destroy_async() self._conn = None - def reset_connection_if_broken(self): - with self._lock: + async def reset_connection_if_broken(self): + async with self._lock: if self._conn and self._conn._state in ( c_uamqp.ConnectionState.CLOSE_RCVD, c_uamqp.ConnectionState.CLOSE_SENT, @@ -59,6 +59,7 @@ def reset_connection_if_broken(self): ): self._conn = None + class _SeparateConnectionManager(object): def __init__(self, **kwargs): pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py new file mode 100644 index 000000000000..5a0f0d9eaa4d --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -0,0 +1,112 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import asyncio +import logging +import time + +from uamqp import errors +from azure.eventhub.error import EventHubError, ConnectError +from ..aio.error_async import _handle_exception + +log = logging.getLogger(__name__) + + +class ConsumerProducerMixin(object): + + def __init__(self): + self.client = None + self._handler = None + self.name = None + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close(exc_val) + + def _check_closed(self): + if self.error: + raise EventHubError("{} has been closed. Please create a new consumer to receive event data.".format(self.name)) + + def _create_handler(self): + pass + + async def _redirect(self, redirect): + self.redirected = redirect + self.running = False + await self._close_connection() + + async def _open(self, timeout_time=None): + """ + Open the EventHubConsumer using the supplied connection. + If the handler has previously been redirected, the redirect + context will be used to create a new handler before opening it. + + """ + # pylint: disable=protected-access + if not self.running: + if self.redirected: + alt_creds = { + "username": self.client._auth_config.get("iot_username"), + "password": self.client._auth_config.get("iot_password")} + else: + alt_creds = {} + self._create_handler() + await self._handler.open_async(connection=await self.client._conn_manager.get_connection( + self.client.address.hostname, + self.client.get_auth(**alt_creds) + )) + while not await self._handler.client_ready_async(): + if timeout_time and time.time() >= timeout_time: + return + await asyncio.sleep(0.05) + self.running = True + + async def _close_handler(self): + await self._handler.close_async() # close the link (sharing connection) or connection (not sharing) + self.running = False + + async def _close_connection(self): + await self._close_handler() + await self.client._conn_manager.reset_connection_if_broken() + + async def _handle_exception(self, exception, retry_count, max_retries, timeout_time): + await _handle_exception(exception, retry_count, max_retries, self, timeout_time) + + async def close(self, exception=None): + # type: (Exception) -> None + """ + Close down the handler. If the handler has already closed, + this will be a no op. An optional exception can be passed in to + indicate that the handler was shutdown due to error. + + :param exception: An optional exception if the handler is closing + due to an error. + :type exception: Exception + + Example: + .. literalinclude:: ../examples/async_examples/test_examples_eventhub_async.py + :start-after: [START eventhub_client_async_receiver_close] + :end-before: [END eventhub_client_async_receiver_close] + :language: python + :dedent: 4 + :caption: Close down the handler. + + """ + self.running = False + if self.error: + return + if isinstance(exception, errors.LinkRedirect): + self.redirected = exception + elif isinstance(exception, EventHubError): + self.error = exception + elif isinstance(exception, (errors.LinkDetach, errors.ConnectionClose)): + self.error = ConnectError(str(exception), exception) + elif exception: + self.error = EventHubError(str(exception)) + else: + self.error = EventHubError("This receive handler is now closed.") + if self._handler: + await self._handler.close_async() \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py index 0141923f9fe3..513e13e4f75e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py @@ -101,7 +101,7 @@ async def _handle_exception(self, exception, retry_count, max_retries): await _handle_exception(exception, retry_count, max_retries, self, log) async def _close_connection(self): - self._conn_manager.reset_connection_if_broken() + await self._conn_manager.reset_connection_if_broken() async def _management_request(self, mgmt_msg, op_type): max_retries = self.config.max_retries diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index 203cdd9882bd..acd20181845f 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -6,6 +6,7 @@ import uuid import logging from typing import List +import time from uamqp import errors, types, compat from uamqp import ReceiveClientAsync, Source @@ -13,11 +14,12 @@ from azure.eventhub import EventData, EventPosition from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, _error_handler from ..aio.error_async import _handle_exception +from ._consumer_producer_mixin_async import ConsumerProducerMixin log = logging.getLogger(__name__) -class EventHubConsumer(object): +class EventHubConsumer(ConsumerProducerMixin): """ A consumer responsible for reading EventData from a specific Event Hub partition and as a member of a specific consumer group. @@ -55,6 +57,7 @@ def __init__( # pylint: disable=super-init-not-called :type owner_level: int :param loop: An event loop. """ + super(EventHubConsumer, self).__init__() self.loop = loop or asyncio.get_event_loop() self.running = False self.client = client @@ -76,12 +79,6 @@ def __init__( # pylint: disable=super-init-not-called self.properties = {types.AMQPSymbol(self._epoch): types.AMQPLong(int(owner_level))} self._handler = None - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close(exc_val) - def __aiter__(self): return self @@ -101,10 +98,6 @@ async def __anext__(self): await self._handle_exception(exception, retry_count, max_retries) retry_count += 1 - def _check_closed(self): - if self.error: - raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.") - def _create_handler(self): alt_creds = { "username": self.client._auth_config.get("iot_username"), @@ -128,12 +121,10 @@ def _create_handler(self): self.messages_iter = None async def _redirect(self, redirect): - self.redirected = redirect - self.running = False self.messages_iter = None - await self._close_connection() + await super(EventHubConsumer, self)._redirect(redirect) - async def _open(self): + async def _open(self, timeout_time=None): """ Open the EventHubConsumer using the supplied connection. If the handler has previously been redirected, the redirect @@ -141,34 +132,10 @@ async def _open(self): """ # pylint: disable=protected-access - if not self.running: - if self.redirected: - self.client._process_redirect_uri(self.redirected) - self.source = self.redirected.address - alt_creds = { - "username": self.client._auth_config.get("iot_username"), - "password": self.client._auth_config.get("iot_password")} - else: - alt_creds = {} - self._create_handler() - await self._handler.open_async(connection=await self.client._conn_manager.get_connection( - self.client.address.hostname, - self.client.get_auth(**alt_creds) - )) - while not await self._handler.client_ready_async(): - await asyncio.sleep(0.05) - self.running = True - - async def _close_handler(self): - await self._handler.close_async() # close the link (sharing connection) or connection (not sharing) - self.running = False - - async def _close_connection(self): - await self._close_handler() - self.client._conn_manager.reset_connection_if_broken() - - async def _handle_exception(self, exception, retry_count, max_retries): - await _handle_exception(exception, retry_count, max_retries, self, log) + if not self.running and self.redirected: + self.client._process_redirect_uri(self.redirected) + self.source = self.redirected.address + await super(EventHubConsumer, self)._open(timeout_time) @property def queue_size(self): @@ -213,24 +180,38 @@ async def receive(self, max_batch_size=None, timeout=None): self._check_closed() max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size timeout = self.client.config.receive_timeout if timeout is None else timeout + if not timeout: + timeout = 100_000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout data_batch = [] + start_time = time.time() + timeout_time = start_time + timeout max_retries = self.client.config.max_retries retry_count = 0 + last_exception = None while True: try: - await self._open() - timeout_ms = 1000 * timeout if timeout else 0 + await self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + log.info("%r receive operation timed out. (%r)", self.name, last_exception) + raise last_exception + return data_batch + + remaining_time_ms = 1000 * remaining_time message_batch = await self._handler.receive_message_batch_async( max_batch_size=max_batch_size, - timeout=timeout_ms) + timeout=remaining_time_ms) for message in message_batch: event_data = EventData(message=message) self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch + except EventHubError: + raise except Exception as exception: - await self._handle_exception(exception, retry_count, max_retries) + last_exception = await self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 async def close(self, exception=None): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py index 3afea84c3904..d78233c1f896 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py @@ -1,15 +1,49 @@ +import asyncio +import time +import logging + from uamqp import errors, compat from ..error import EventHubError, EventDataSendError, \ EventDataError, ConnectError, ConnectionLostError, AuthenticationError -async def _handle_exception(exception, retry_count, max_retries, closable, log): - type_name = type(closable).__name__ +log = logging.getLogger(__name__) + + +def _create_eventhub_exception(exception): + if isinstance(exception, errors.AuthenticationException): + error = AuthenticationError(str(exception), exception) + elif isinstance(exception, errors.VendorLinkDetach): + error = ConnectError(str(exception), exception) + elif isinstance(exception, errors.LinkDetach): + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.ConnectionClose): + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.MessageHandlerError): + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.AMQPConnectionError): + error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ + else ConnectError + error = error_type(str(exception), exception) + elif isinstance(exception, compat.TimeoutException): + error = ConnectionLostError(str(exception), exception) + else: + error = EventHubError(str(exception), exception) + return error + + +async def _handle_exception(exception, retry_count, max_retries, closable, timeout_time): + try: + name = closable.name + except AttributeError: + name = closable.container_id if isinstance(exception, KeyboardInterrupt): - log.info("{} stops due to keyboard interrupt".format(type_name)) - await closable.close() + log.info("%r stops due to keyboard interrupt", name) + closable.close() + raise + elif isinstance(exception, EventHubError): + closable.close() raise - elif isinstance(exception, ( errors.MessageAccepted, errors.MessageAlreadySettled, @@ -18,52 +52,23 @@ async def _handle_exception(exception, retry_count, max_retries, closable, log): errors.MessageReleased, errors.MessageContentTooLarge) ): - log.error("Event data error (%r)", exception) + log.info("%r Event data error (%r)", name, exception) error = EventDataError(str(exception), exception) - await closable.close(exception) raise error elif isinstance(exception, errors.MessageException): - log.error("Event data send error (%r)", exception) + log.info("%r Event data send error (%r)", name, exception) error = EventDataSendError(str(exception), exception) - await closable.close(exception) raise error elif retry_count >= max_retries: - log.info("{} has an error and has exhausted retrying. (%r)".format(type_name), exception) - if isinstance(exception, errors.AuthenticationException): - log.info("{} authentication failed. Shutting down.".format(type_name)) - error = AuthenticationError(str(exception), exception) - elif isinstance(exception, errors.VendorLinkDetach): - log.info("{} link detached. Shutting down.".format(type_name)) - error = ConnectError(str(exception), exception) - elif isinstance(exception, errors.LinkDetach): - log.info("{} link detached. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.ConnectionClose): - log.info("{} connection closed. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.MessageHandlerError): - log.info("{} detached. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.AMQPConnectionError): - log.info("{} connection lost. Shutting down.".format(type_name)) - error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ - else ConnectError - error = error_type(str(exception), exception) - elif isinstance(exception, compat.TimeoutException): - log.info("{} timed out. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - else: - log.error("Unexpected error occurred (%r). Shutting down.", exception) - error = EventHubError("Receive failed: {}".format(exception), exception) - await closable.close() + error = _create_eventhub_exception(exception) + log.info("%r has exhausted retry. Exception still occurs (%r)", name, exception) raise error else: - log.info("{} has an exception (%r). Retrying...".format(type_name), exception) if isinstance(exception, errors.AuthenticationException): if hasattr(closable, "_close_connection"): await closable._close_connection() elif isinstance(exception, errors.LinkRedirect): - log.info("{} link redirected. Redirecting...".format(type_name)) + log.info("%r link redirect received. Redirecting...", name) redirect = exception if hasattr(closable, "_redirect"): await closable._redirect(redirect) @@ -84,3 +89,20 @@ async def _handle_exception(exception, retry_count, max_retries, closable, log): else: if hasattr(closable, "_close_connection"): await closable._close_connection() + # start processing retry delay + try: + backoff_factor = closable.client.config.backoff_factor + backoff_max = closable.client.config.backoff_max + except AttributeError: + backoff_factor = closable.config.backoff_factor + backoff_max = closable.config.backoff_max + backoff = backoff_factor * 2 ** retry_count + if backoff <= backoff_max and time.time() + backoff <= timeout_time: + await asyncio.sleep(backoff) + log.info("%r has an exception (%r). Retrying...", format(name), exception) + return _create_eventhub_exception(exception) + else: + error = _create_eventhub_exception(exception) + log.info("%r operation has timed out. Last exception before timeout is (%r)", name, error) + raise error + # end of processing retry delay \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index 07237efe5936..96d60ffc8943 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -5,21 +5,22 @@ import uuid import asyncio import logging -from typing import Iterator, Generator, List, Union +from typing import Iterable, Union +import time -from uamqp import constants, errors, compat +from uamqp import constants, errors from uamqp import SendClientAsync from azure.eventhub.common import EventData, _BatchSendEventData -from azure.eventhub.error import EventHubError, ConnectError, \ - AuthenticationError, EventDataError, EventDataSendError, ConnectionLostError, _error_handler -from .error_async import _handle_exception +from azure.eventhub.error import _error_handler, OperationTimeoutError from ..producer import _error, _set_partition_key +from ._consumer_producer_mixin_async import ConsumerProducerMixin + log = logging.getLogger(__name__) -class EventHubProducer(object): +class EventHubProducer(ConsumerProducerMixin): """ A producer responsible for transmitting EventData to a specific Event Hub, grouped together in batches. Depending on the options specified at creation, the producer may @@ -53,6 +54,7 @@ def __init__( # pylint: disable=super-init-not-called :type auto_reconnect: bool :param loop: An event loop. If not specified the default event loop will be used. """ + super(EventHubProducer, self).__init__() self.loop = loop or asyncio.get_event_loop() self.running = False self.client = client @@ -74,12 +76,6 @@ def __init__( # pylint: disable=super-init-not-called self._outcome = None self._condition = None - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.close(exc_val) - def _create_handler(self): self._handler = SendClientAsync( self.target, @@ -93,48 +89,39 @@ def _create_handler(self): self.client.config.user_agent), # pylint: disable=protected-access loop=self.loop) - async def _redirect(self, redirect): - self.redirected = redirect - self.running = False - await self._close_connection() - - async def _open(self): + async def _open(self, timeout_time=None): """ Open the EventHubProducer using the supplied connection. If the handler has previously been redirected, the redirect context will be used to create a new handler before opening it. """ - if not self.running: - if self.redirected: - self.target = self.redirected.address - self._create_handler() - await self._handler.open_async(connection=await self.client._conn_manager.get_connection( - self.client.address.hostname, - self.client.get_auth() - )) - while not await self._handler.client_ready_async(): - await asyncio.sleep(0.05) - self.running = True - - async def _close_handler(self): - await self._handler.close_async() # close the link (sharing connection) or connection (not sharing) - self.running = False - - async def _close_connection(self): - await self._close_handler() - await self.client._conn_manager.close_connection() # close the shared connection. - - async def _handle_exception(self, exception, retry_count, max_retries): - await _handle_exception(exception, retry_count, max_retries, self, log) - - async def _send_event_data(self): + if not self.running and self.redirected: + self.client._process_redirect_uri(self.redirected) + self.target = self.redirected.address + await super(EventHubProducer, self)._open(timeout_time) + + async def _send_event_data(self, timeout=None): + timeout = self.client.config.send_timeout if timeout is None else timeout + if not timeout: + timeout = 100_000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout + start_time = time.time() + timeout_time = start_time + timeout max_retries = self.client.config.max_retries retry_count = 0 + last_exception = None while True: try: if self.unsent_events: - await self._open() + await self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time < 0.0: + if last_exception: + error = last_exception + else: + error = OperationTimeoutError("send operation timed out") + log.info("%r send operation timed out. (%r)", self.name, error) + raise error self._handler.queue_message(*self.unsent_events) await self._handler.wait_async() self.unsent_events = self._handler.pending_messages @@ -142,13 +129,9 @@ async def _send_event_data(self): _error(self._outcome, self._condition) return except Exception as exception: - await self._handle_exception(exception, retry_count, max_retries) + last_exception = await self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 - def _check_closed(self): - if self.error: - raise EventHubError("This producer has been closed. Please create a new producer to send event data.") - def _on_outcome(self, outcome, condition): """ Called when the outcome is received for a delivery. @@ -161,7 +144,7 @@ def _on_outcome(self, outcome, condition): self._outcome = outcome self._condition = condition - async def send(self, event_data, partition_key=None): + async def send(self, event_data, partition_key=None, timeout=None): # type:(Union[EventData, Iterable[EventData]], Union[str, bytes]) -> None """ Sends an event data and blocks until acknowledgement is @@ -198,7 +181,7 @@ async def send(self, event_data, partition_key=None): partition_key=partition_key) if partition_key else _BatchSendEventData(event_data) wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] - await self._send_event_data() + await self._send_event_data(timeout) async def close(self, exception=None): # type: (Exception) -> None @@ -220,17 +203,4 @@ async def close(self, exception=None): :caption: Close down the handler. """ - self.running = False - if self.error: - return - if isinstance(exception, errors.LinkRedirect): - self.redirected = exception - elif isinstance(exception, EventHubError): - self.error = exception - elif isinstance(exception, (errors.LinkDetach, errors.ConnectionClose)): - self.error = ConnectError(str(exception), exception) - elif exception: - self.error = EventHubError(str(exception)) - else: - self.error = EventHubError("This send handler is now closed.") - await self._handler.close_async() + await super(EventHubProducer, self).close(exception) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index 65f22b19662a..5de8f8531093 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -52,9 +52,6 @@ def __init__(self, host, event_hub_path, credential, **kwargs): super(EventHubClient, self).__init__(host, event_hub_path, credential, **kwargs) self._conn_manager = get_connection_manager(**kwargs) - def __del__(self): - self._conn_manager.close_connection() - def __enter__(self): return self diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py index 5a6702a60324..5ac6258eeb0a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py @@ -8,6 +8,7 @@ import calendar import json import six +from enum import Enum from uamqp import BatchMessage, Message, types from uamqp.message import MessageHeader, MessageProperties diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py index 27eb649628ec..58563bdba0e0 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py @@ -8,7 +8,11 @@ class _Configuration(object): def __init__(self, **kwargs): self.user_agent = kwargs.get("user_agent") - self.max_retries = kwargs.get("max_retries", 3) + self.retry_total = kwargs.pop('retry_total', 3) + self.max_retries = self.retry_total or kwargs.get("max_retries", 3) + self.backoff_factor = kwargs.pop('retry_backoff_factor', 0.8) + self.backoff_max = kwargs.pop('retry_backoff_max', 120) + self.network_tracing = kwargs.get("network_tracing", False) self.http_proxy = kwargs.get("http_proxy") self.transport_type = TransportType.AmqpOverWebsocket if self.http_proxy \ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index dde3a456311b..95cdeedd43aa 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -10,18 +10,16 @@ from typing import List from uamqp import types, errors -from uamqp import compat from uamqp import ReceiveClient, Source from azure.eventhub.common import EventData, EventPosition -from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, \ - _error_handler, _handle_exception - +from azure.eventhub.error import _error_handler, EventHubError +from ._consumer_producer_mixin import ConsumerProducerMixin log = logging.getLogger(__name__) -class EventHubConsumer(object): +class EventHubConsumer(ConsumerProducerMixin): """ A consumer responsible for reading EventData from a specific Event Hub partition and as a member of a specific consumer group. @@ -55,6 +53,7 @@ def __init__(self, client, source, event_position=None, prefetch=300, owner_leve consumer if owner_level is set. :type owner_level: int """ + super(EventHubConsumer, self).__init__() self.running = False self.client = client self.source = source @@ -70,17 +69,11 @@ def __init__(self, client, source, event_position=None, prefetch=300, owner_leve self.redirected = None self.error = None partition = self.source.split('/')[-1] - self.name = "EHReceiver-{}-partition{}".format(uuid.uuid4(), partition) + self.name = "EHConsumer-{}-partition{}".format(uuid.uuid4(), partition) if owner_level: self.properties = {types.AMQPSymbol(self._epoch): types.AMQPLong(int(owner_level))} self._handler = None - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close(exc_val) - def __iter__(self): return self @@ -100,10 +93,6 @@ def __next__(self): self._handle_exception(exception, retry_count, max_retries) retry_count += 1 - def _check_closed(self): - if self.error: - raise EventHubError("This consumer has been closed. Please create a new consumer to receive event data.") - def _create_handler(self): alt_creds = { "username": self.client._auth_config.get("iot_username"), @@ -126,12 +115,10 @@ def _create_handler(self): self.messages_iter = None def _redirect(self, redirect): - self.redirected = redirect - self.running = False self.messages_iter = None - self._close_connection() + super(EventHubConsumer, self)._redirect(redirect) - def _open(self): + def _open(self, timeout_time=None): """ Open the EventHubConsumer using the supplied connection. If the handler has previously been redirected, the redirect @@ -139,34 +126,10 @@ def _open(self): """ # pylint: disable=protected-access - if not self.running: - if self.redirected: - self.client._process_redirect_uri(self.redirected) - self.source = self.redirected.address - alt_creds = { - "username": self.client._auth_config.get("iot_username"), - "password": self.client._auth_config.get("iot_password")} - else: - alt_creds = {} - self._create_handler() - self._handler.open(connection=self.client._conn_manager.get_connection( - self.client.address.hostname, - self.client.get_auth(**alt_creds) - )) - while not self._handler.client_ready(): - time.sleep(0.05) - self.running = True - - def _close_handler(self): - self._handler.close() # close the link (sharing connection) or connection (not sharing) - self.running = False - - def _close_connection(self): - self._close_handler() - self.client._conn_manager.reset_connection_if_broken() - - def _handle_exception(self, exception, retry_count, max_retries): - _handle_exception(exception, retry_count, max_retries, self, log) + if not self.running and self.redirected: + self.client._process_redirect_uri(self.redirected) + self.source = self.redirected.address + super(EventHubConsumer, self)._open(timeout_time) @property def queue_size(self): @@ -211,24 +174,37 @@ def receive(self, max_batch_size=None, timeout=None): max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size timeout = self.client.config.receive_timeout if timeout is None else timeout + if not timeout: + timeout = 100_000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout data_batch = [] # type: List[EventData] + start_time = time.time() + timeout_time = start_time + timeout max_retries = self.client.config.max_retries retry_count = 0 + last_exception = None while True: try: - self._open() - timeout_ms = 1000 * timeout if timeout else 0 + self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + log.info("%r receive operation timed out. (%r)", self.name, last_exception) + raise last_exception + return data_batch + remaining_time_ms = 1000 * remaining_time message_batch = self._handler.receive_message_batch( max_batch_size=max_batch_size - (len(data_batch) if data_batch else 0), - timeout=timeout_ms) + timeout=remaining_time_ms) for message in message_batch: event_data = EventData(message=message) self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch + except EventHubError: + raise except Exception as exception: - self._handle_exception(exception, retry_count, max_retries) + last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 def close(self, exception=None): @@ -254,17 +230,6 @@ def close(self, exception=None): if self.messages_iter: self.messages_iter.close() self.messages_iter = None - self.running = False - if self.error: - return - if isinstance(exception, errors.LinkRedirect): - self.redirected = exception - elif isinstance(exception, EventHubError): - self.error = exception - elif exception: - self.error = EventHubError(str(exception)) - else: - self.error = EventHubError("This receive handler is now closed.") - self._handler.close() # this will close link if sharing connection. Otherwise close connection + super(EventHubConsumer, self).close(exception) next = __next__ # for python2.7 diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py index db8ae4794e1e..650f6bacfec6 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py @@ -3,6 +3,8 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # -------------------------------------------------------------------------------------------- import six +import time +import logging from uamqp import constants, errors, compat @@ -15,6 +17,7 @@ b"com.microsoft:argument-error" ) +log = logging.getLogger(__name__) def _error_handler(error): """ @@ -130,10 +133,45 @@ class EventDataSendError(EventHubError): pass -def _handle_exception(exception, retry_count, max_retries, closable, log): - type_name = type(closable).__name__ +class OperationTimeoutError(EventHubError): + """Operation times out + + """ + pass + + +def _create_eventhub_exception(exception): + if isinstance(exception, errors.AuthenticationException): + error = AuthenticationError(str(exception), exception) + elif isinstance(exception, errors.VendorLinkDetach): + error = ConnectError(str(exception), exception) + elif isinstance(exception, errors.LinkDetach): + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.ConnectionClose): + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.MessageHandlerError): + error = ConnectionLostError(str(exception), exception) + elif isinstance(exception, errors.AMQPConnectionError): + error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ + else ConnectError + error = error_type(str(exception), exception) + elif isinstance(exception, compat.TimeoutException): + error = ConnectionLostError(str(exception), exception) + else: + error = EventHubError(str(exception), exception) + return error + + +def _handle_exception(exception, retry_count, max_retries, closable, timeout_time): + try: + name = closable.name + except AttributeError: + name = closable.container_id if isinstance(exception, KeyboardInterrupt): - log.info("{} stops due to keyboard interrupt".format(type_name)) + log.info("%r stops due to keyboard interrupt", name) + closable.close() + raise + elif isinstance(exception, EventHubError): closable.close() raise elif isinstance(exception, ( @@ -144,51 +182,23 @@ def _handle_exception(exception, retry_count, max_retries, closable, log): errors.MessageReleased, errors.MessageContentTooLarge) ): - log.error("Event data error (%r)", exception) + log.info("%r Event data error (%r)", name, exception) error = EventDataError(str(exception), exception) - closable.close(exception) raise error elif isinstance(exception, errors.MessageException): - log.error("Event data send error (%r)", exception) + log.info("%r Event data send error (%r)", name, exception) error = EventDataSendError(str(exception), exception) - closable.close(exception) raise error elif retry_count >= max_retries: - log.info("{} has an error and has exhausted retrying. (%r)".format(type_name), exception) - if isinstance(exception, errors.AuthenticationException): - log.info("{} authentication failed. Shutting down.".format(type_name)) - error = AuthenticationError(str(exception), exception) - elif isinstance(exception, errors.VendorLinkDetach): - log.info("{} link detached. Shutting down.".format(type_name)) - error = ConnectError(str(exception), exception) - elif isinstance(exception, errors.LinkDetach): - log.info("{} link detached. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.ConnectionClose): - log.info("{} connection closed. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.MessageHandlerError): - log.info("{} detached. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - elif isinstance(exception, errors.AMQPConnectionError): - log.info("{} connection lost. Shutting down.".format(type_name)) - error_type = AuthenticationError if str(exception).startswith("Unable to open authentication session") \ - else ConnectError - error = error_type(str(exception), exception) - elif isinstance(exception, compat.TimeoutException): - log.info("{} timed out. Shutting down.".format(type_name)) - error = ConnectionLostError(str(exception), exception) - else: - log.error("Unexpected error occurred (%r). Shutting down.", exception) - error = EventHubError("Receive failed: {}".format(exception), exception) - closable.close() + error = _create_eventhub_exception(exception) + log.info("%r has exhausted retry. Exception still occurs (%r)", name, exception) raise error else: - log.info("{} has an exception (%r). Retrying...".format(type_name), exception) if isinstance(exception, errors.AuthenticationException): - closable._close_connection() + if hasattr(closable, "_close_connection"): + closable._close_connection() elif isinstance(exception, errors.LinkRedirect): - log.info("{} link redirected. Redirecting...".format(type_name)) + log.info("%r link redirect received. Redirecting...", name) redirect = exception if hasattr(closable, "_redirect"): closable._redirect(redirect) @@ -209,3 +219,20 @@ def _handle_exception(exception, retry_count, max_retries, closable, log): else: if hasattr(closable, "_close_connection"): closable._close_connection() + # start processing retry delay + try: + backoff_factor = closable.client.config.backoff_factor + backoff_max = closable.client.config.backoff_max + except AttributeError: + backoff_factor = closable.config.backoff_factor + backoff_max = closable.config.backoff_max + backoff = backoff_factor * 2 ** retry_count + if backoff <= backoff_max and time.time() + backoff <= timeout_time: + time.sleep(backoff) + log.info("%r has an exception (%r). Retrying...", format(name), exception) + return _create_eventhub_exception(exception) + else: + error = _create_eventhub_exception(exception) + log.info("%r operation has timed out. Last exception before timeout is (%r)", name, error) + raise error + # end of processing retry delay diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 3f3ff4c9492e..956da690801c 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -14,13 +14,13 @@ from uamqp import SendClient from azure.eventhub.common import EventData, _BatchSendEventData -from azure.eventhub.error import EventHubError, ConnectError, \ - AuthenticationError, EventDataError, EventDataSendError, ConnectionLostError, _error_handler, _handle_exception +from azure.eventhub.error import OperationTimeoutError, _error_handler +from ._consumer_producer_mixin import ConsumerProducerMixin log = logging.getLogger(__name__) -class EventHubProducer(object): +class EventHubProducer(ConsumerProducerMixin): """ A producer responsible for transmitting EventData to a specific Event Hub, grouped together in batches. Depending on the options specified at creation, the producer may @@ -51,6 +51,7 @@ def __init__(self, client, target, partition=None, send_timeout=60, keep_alive=N Default value is `True`. :type auto_reconnect: bool """ + super(EventHubProducer, self).__init__() self.running = False self.client = client self.target = target @@ -71,12 +72,6 @@ def __init__(self, client, target, partition=None, send_timeout=60, keep_alive=N self._outcome = None self._condition = None - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close(exc_val) - def _create_handler(self): self._handler = SendClient( self.target, @@ -88,12 +83,7 @@ def _create_handler(self): client_name=self.name, properties=self.client._create_properties(self.client.config.user_agent)) # pylint: disable=protected-access - def _redirect(self, redirect): - self.redirected = redirect - self.running = False - self._close_connection() - - def _open(self): + def _open(self, timeout_time=None): """ Open the EventHubProducer using the supplied connection. If the handler has previously been redirected, the redirect @@ -101,50 +91,42 @@ def _open(self): """ # pylint: disable=protected-access - if not self.running: - if self.redirected: - self.target = self.redirected.address - self._create_handler() - self._handler.open(connection=self.client._conn_manager.get_connection( - self.client.address.hostname, - self.client.get_auth() - )) - while not self._handler.client_ready(): - time.sleep(0.05) - self.running = True - - def _close_handler(self): - self._handler.close() # close the link (sharing connection) or connection (not sharing) - self.running = False - - def _close_connection(self): - self._close_handler() - self.client._conn_manager.reset_connection_if_broken() - - def _handle_exception(self, exception, retry_count, max_retries): - _handle_exception(exception, retry_count, max_retries, self, log) - - def _send_event_data(self): + if not self.running and self.redirected: + self.client._process_redirect_uri(self.redirected) + self.target = self.redirected.address + super(EventHubProducer, self)._open() + + def _send_event_data(self, timeout=None): + timeout = self.client.config.send_timeout if timeout is None else timeout + if not timeout: + timeout = 100_000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout + start_time = time.time() + timeout_time = start_time + timeout max_retries = self.client.config.max_retries retry_count = 0 + last_exception = None while True: try: if self.unsent_events: - self._open() + self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + error = last_exception + else: + error = OperationTimeoutError("send operation timed out") + log.info("%r send operation timed out. (%r)", self.name, error) + raise error self._handler.queue_message(*self.unsent_events) self._handler.wait() self.unsent_events = self._handler.pending_messages - if self._outcome != constants.MessageSendResult.Ok: - _error(self._outcome, self._condition) + if self._outcome != constants.MessageSendResult.Ok: + _error(self._outcome, self._condition) return except Exception as exception: - self._handle_exception(exception, retry_count, max_retries) + last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 - def _check_closed(self): - if self.error: - raise EventHubError("This producer has been closed. Please create a new producer to send event data.") - def _on_outcome(self, outcome, condition): """ Called when the outcome is received for a delivery. @@ -157,8 +139,8 @@ def _on_outcome(self, outcome, condition): self._outcome = outcome self._condition = condition - def send(self, event_data, partition_key=None): - # type:(Union[EventData, Iterable[EventData]], Union[str, bytes]) -> None + def send(self, event_data, partition_key=None, timeout=None): + # type:(Union[EventData, Iterable[EventData]], Union[str, bytes], float) -> None """ Sends an event data and blocks until acknowledgement is received or operation times out. @@ -195,7 +177,7 @@ def send(self, event_data, partition_key=None): partition_key=partition_key) if partition_key else _BatchSendEventData(event_data) wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] - self._send_event_data() + self._send_event_data(timeout=timeout) def close(self, exception=None): # type:(Exception) -> None @@ -217,18 +199,7 @@ def close(self, exception=None): :caption: Close down the handler. """ - self.running = False - if self.error: - return - if isinstance(exception, errors.LinkRedirect): - self.redirected = exception - elif isinstance(exception, EventHubError): - self.error = exception - elif exception: - self.error = EventHubError(str(exception)) - else: - self.error = EventHubError("This send handler is now closed.") - self._handler.close() + super(EventHubProducer, self).close(exception) def _error(outcome, condition): From 4f1778105765a1603a3b5bae006f7537389d5113 Mon Sep 17 00:00:00 2001 From: yijxie Date: Sun, 21 Jul 2019 23:32:07 -0700 Subject: [PATCH 07/42] put module method before class def --- .../azure/eventhub/producer.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 956da690801c..a6746ee1aff5 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -20,6 +20,18 @@ log = logging.getLogger(__name__) +def _error(outcome, condition): + if outcome != constants.MessageSendResult.Ok: + raise condition + + +def _set_partition_key(event_datas, partition_key): + ed_iter = iter(event_datas) + for ed in ed_iter: + ed._set_partition_key(partition_key) + yield ed + + class EventHubProducer(ConsumerProducerMixin): """ A producer responsible for transmitting EventData to a specific Event Hub, @@ -200,15 +212,3 @@ def close(self, exception=None): """ super(EventHubProducer, self).close(exception) - - -def _error(outcome, condition): - if outcome != constants.MessageSendResult.Ok: - raise condition - - -def _set_partition_key(event_datas, partition_key): - ed_iter = iter(event_datas) - for ed in ed_iter: - ed._set_partition_key(partition_key) - yield ed From f0d98d1eb5f9b6580a0c4f48f471bb87eddfc3d1 Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 22 Jul 2019 11:38:49 -0700 Subject: [PATCH 08/42] fixed Client.get_properties error --- .../azure-eventhubs/azure/eventhub/aio/client_async.py | 2 +- .../azure-eventhubs/azure/eventhub/aio/error_async.py | 4 ++-- sdk/eventhub/azure-eventhubs/azure/eventhub/client.py | 2 +- sdk/eventhub/azure-eventhubs/azure/eventhub/error.py | 4 ++-- sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py index 513e13e4f75e..20462a7753f4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py @@ -98,7 +98,7 @@ def _create_auth(self, username=None, password=None): transport_type=transport_type) async def _handle_exception(self, exception, retry_count, max_retries): - await _handle_exception(exception, retry_count, max_retries, self, log) + await _handle_exception(exception, retry_count, max_retries, self) async def _close_connection(self): await self._conn_manager.reset_connection_if_broken() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py index d78233c1f896..957a3005662e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py @@ -32,7 +32,7 @@ def _create_eventhub_exception(exception): return error -async def _handle_exception(exception, retry_count, max_retries, closable, timeout_time): +async def _handle_exception(exception, retry_count, max_retries, closable, timeout_time=None): try: name = closable.name except AttributeError: @@ -97,7 +97,7 @@ async def _handle_exception(exception, retry_count, max_retries, closable, timeo backoff_factor = closable.config.backoff_factor backoff_max = closable.config.backoff_max backoff = backoff_factor * 2 ** retry_count - if backoff <= backoff_max and time.time() + backoff <= timeout_time: + if backoff <= backoff_max and (timeout_time is None or time.time() + backoff <= timeout_time): await asyncio.sleep(backoff) log.info("%r has an exception (%r). Retrying...", format(name), exception) return _create_eventhub_exception(exception) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index 5de8f8531093..b7fad40779c9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -104,7 +104,7 @@ def _create_auth(self, username=None, password=None): transport_type=transport_type) def _handle_exception(self, exception, retry_count, max_retries): - _handle_exception(exception, retry_count, max_retries, self, log) + _handle_exception(exception, retry_count, max_retries, self) def _close_connection(self): self._conn_manager.reset_connection_if_broken() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py index 650f6bacfec6..31f456f84eb8 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/error.py @@ -162,7 +162,7 @@ def _create_eventhub_exception(exception): return error -def _handle_exception(exception, retry_count, max_retries, closable, timeout_time): +def _handle_exception(exception, retry_count, max_retries, closable, timeout_time=None): try: name = closable.name except AttributeError: @@ -227,7 +227,7 @@ def _handle_exception(exception, retry_count, max_retries, closable, timeout_tim backoff_factor = closable.config.backoff_factor backoff_max = closable.config.backoff_max backoff = backoff_factor * 2 ** retry_count - if backoff <= backoff_max and time.time() + backoff <= timeout_time: + if backoff <= backoff_max and (timeout_time is None or time.time() + backoff <= timeout_time): time.sleep(backoff) log.info("%r has an exception (%r). Retrying...", format(name), exception) return _create_eventhub_exception(exception) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index a6746ee1aff5..0a839463e4fb 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -106,7 +106,7 @@ def _open(self, timeout_time=None): if not self.running and self.redirected: self.client._process_redirect_uri(self.redirected) self.target = self.redirected.address - super(EventHubProducer, self)._open() + super(EventHubProducer, self)._open(timeout_time) def _send_event_data(self, timeout=None): timeout = self.client.config.send_timeout if timeout is None else timeout From ed7d41404500c4ee94dbcbfc90c260f7ffd0c715 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 26 Jul 2019 19:09:21 -0700 Subject: [PATCH 09/42] new eph (draft) --- .../azure-eventhubs/azure/eph/__init__.py | 11 ++ .../azure/eph/_cancellation_token.py | 22 ++++ .../azure/eph/_consumer_worker.py | 2 + .../azure/eph/checkpoint_manager.py | 19 +++ .../azure/eph/event_processor.py | 112 ++++++++++++++++++ .../azure/eph/in_memory_checkpoint_store.py | 37 ++++++ .../azure/eph/partition_manager.py | 41 +++++++ .../azure/eph/partition_processor.py | 32 +++++ .../azure/eph/sqlite3_partition_manager.py | 105 ++++++++++++++++ 9 files changed, 381 insertions(+) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/__init__.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/_cancellation_token.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py new file mode 100644 index 000000000000..4d8fbaf5b5d9 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py @@ -0,0 +1,11 @@ +from .event_processor import EventProcessor +from .partition_processor import PartitionProcessor +from .partition_manager import PartitionManager +from .sqlite3_partition_manager import Sqlite3PartitionManager + +__all__ = [ + 'EventProcessor', + 'PartitionProcessor', + 'PartitionManager', + 'Sqlite3PartitionManager', +] \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/_cancellation_token.py b/sdk/eventhub/azure-eventhubs/azure/eph/_cancellation_token.py new file mode 100644 index 000000000000..475e44337731 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/_cancellation_token.py @@ -0,0 +1,22 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +""" +Based on https://stackoverflow.com/questions/43229939/how-to-pass-a-boolean-by-reference-across-threads-and-modules +""" + + +class CancellationToken: + """ + Thread Safe Mutable Cancellation Token. + """ + def __init__(self): + self.is_cancelled = False + + def cancel(self): + """ + Cancel the token. + """ + self.is_cancelled = True diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py b/sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py new file mode 100644 index 000000000000..64c65eb6d692 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py @@ -0,0 +1,2 @@ +class ConsumerWorker(object): + pass \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py new file mode 100644 index 000000000000..000e0b4bf11e --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py @@ -0,0 +1,19 @@ +from .partition_manager import PartitionManager + + +class CheckpointManager(object): + """Users use checkpoint manager to update checkpoint。 + + """ + def __init__(self, partition_id, eventhub_name, consumer_group_name, instance_id, partition_manager: PartitionManager): + self.partition_id = partition_id + self.eventhub_name = eventhub_name + self.consumer_group_name = consumer_group_name + self.instance_id = instance_id + self.partition_manager = partition_manager + + async def update_checkpoint(self, + offset, sequence_number): + await self.partition_manager.\ + update_checkpoint(self.eventhub_name, self.consumer_group_name, self.partition_id, self.instance_id, + offset, sequence_number) diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py new file mode 100644 index 000000000000..d270678d6575 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py @@ -0,0 +1,112 @@ +from typing import Callable +import uuid +import asyncio +from azure.eventhub import EventPosition, EventHubError +from azure.eventhub.aio import EventHubClient +from ._cancellation_token import CancellationToken +from .checkpoint_manager import CheckpointManager +from .partition_manager import PartitionManager +from .partition_processor import PartitionProcessor + + +class EventProcessor(object): + def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, + partition_processor_callable: Callable[[str, str, str, CheckpointManager], PartitionProcessor], + partition_manager: PartitionManager, **kwargs): + """ + + :param consumer_group_name: + :param eventhub_client: + :param partition_processor_callable: + :param partition_manager: + :param initial_event_position: + :param max_batch_size: + """ + self.consumer_group_name = consumer_group_name + self.eventhub_client = eventhub_client + self.eventhub_name = eventhub_client.eh_name + self.partition_processor_callable = partition_processor_callable + self.partition_manager = partition_manager + self.initial_event_position = kwargs.get("initial_event_position", "-1") + self.max_batch_size = kwargs.get("max_batch_size", 300) + self.max_wait_time = kwargs.get("max_wait_time") + self.tasks = [] + self.cancellation_token = CancellationToken() + self.instance_id = str(uuid.uuid4()) + self.partition_ids = None + + async def start(self): + client = self.eventhub_client + partition_ids = await client.get_partition_ids() + self.partition_ids = partition_ids + + claimed_list = await self._claim_partitions() + await self._start_claimed_partitions(claimed_list) + + async def stop(self): + self.cancellation_token.cancel() + await self.partition_manager.close() + + async def _claim_partitions(self): + partitions_ownership = await self.partition_manager.list_ownership(self.eventhub_name, self.consumer_group_name) + partitions_ownership_dict = dict() + for ownership in partitions_ownership: + partitions_ownership_dict[ownership["partition_id"]] = ownership + + to_claim_list = [] + for pid in self.partition_ids: + p_ownership = partitions_ownership_dict.get(pid) + if p_ownership: + to_claim_list.append(p_ownership) + else: + new_ownership = dict() + new_ownership["eventhub_name"] = self.eventhub_name + new_ownership["consumer_group_name"] = self.consumer_group_name + new_ownership["instance_id"] = self.instance_id + new_ownership["partition_id"] = pid + new_ownership["owner_level"] = 1 # will increment in preview 3 + to_claim_list.append(new_ownership) + claimed_list = await self.partition_manager.claim_ownership(to_claim_list) + return claimed_list + + async def _start_claimed_partitions(self, claimed_partitions): + consumers = [] + for partition in claimed_partitions: + partition_id = partition["partition_id"] + offset = partition.get("offset") + offset = offset or self.initial_event_position + consumer = self.eventhub_client.create_consumer(self.consumer_group_name, partition_id, + EventPosition(str(offset))) + consumers.append(consumer) + + partition_processor = self.partition_processor_callable( + eventhub_name=self.eventhub_name, + consumer_group_name=self.consumer_group_name, + partition_id=partition_id, + checkpoint_manager=CheckpointManager(partition_id, self.eventhub_name, self.consumer_group_name, + self.instance_id, self.partition_manager) + ) + + loop = asyncio.get_running_loop() + task = loop.create_task( + _receive(consumer, partition_processor, self.max_wait_time, self.cancellation_token)) + self.tasks.append(task) + + await asyncio.gather(*self.tasks) + await asyncio.gather(*[consumer.close() for consumer in consumers]) + + +async def _receive(partition_consumer, partition_processor, max_wait_time, cancellation_token): + try: + async with partition_consumer: + while not cancellation_token.is_cancelled: + events = await partition_consumer.receive(timeout=max_wait_time) + await partition_processor.process_events(events) + else: + await partition_processor.close(reason="Cancelled") + await partition_consumer.close() + except EventHubError as eh_err: + await partition_consumer.close() + await partition_processor.close(reason=eh_err) + except Exception as err: + await partition_processor.process_error(err) diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py b/sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py new file mode 100644 index 000000000000..5bfcf8166b84 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py @@ -0,0 +1,37 @@ +import asyncio +import time +import uuid +from .partition_manager import PartitionManager + + +class InMemoryPartitionManager(PartitionManager): + def __init__(self): + self.lock = asyncio.Lock() + self.store = dict() + + async def list_ownership(self, eventhub_name, consumer_group_name): + return self.store.values() + + async def claim_ownership(self, partitions): + for partition in partitions: + partition_id = partition["partition_id"] + if partition_id not in self.store: + self.store[partition_id] = partition + partition["last_modified_time"] = time.time() + partition["ETag"] = uuid.uuid4() + return partitions + + async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, instance_id, + offset, sequence_number): + checkpoint = self.store.get(partition_id) + if not checkpoint: + checkpoint = dict() + self.store[partition_id] = checkpoint + checkpoint["eventhub_name"] = eventhub_name + checkpoint["consumer_group_name"] = consumer_group_name + checkpoint["instance_id"] = instance_id + checkpoint["partition_id"] = partition_id + checkpoint["offset"] = offset + checkpoint["sequence_number"] = sequence_number + + print("checkpoint saved: ", checkpoint) diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py new file mode 100644 index 000000000000..329cf0d3d4ca --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py @@ -0,0 +1,41 @@ +from typing import Iterable, Dict, Any +from abc import ABC, abstractmethod + + +class PartitionManager(ABC): + """Subclass this class to implement the read/write access to storage service. + + Users may do their own subclass for checkpoint storage. + """ + + @abstractmethod + async def list_ownership(self, eventhub_name: str, consumer_group_name: str) -> Iterable[Dict[str, Any]]: + """ + + :param eventhub_name: + :param consumer_group_name: + :return: Iterable of dictionaries containing the following partition ownership information: + eventhub_name + consumer_group_name + instance_id + partition_id + owner_level + offset + sequence_number + last_modified_time + etag + """ + pass + + @abstractmethod + async def claim_ownership(self, partitions: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: + pass + + @abstractmethod + async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, instance_id, + offset, sequence_number) -> None: + pass + + @abstractmethod + async def close(self): + pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py new file mode 100644 index 000000000000..e76b450412e1 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py @@ -0,0 +1,32 @@ +from typing import List +from abc import ABC, abstractmethod +from .checkpoint_manager import CheckpointManager + +from azure.eventhub import EventData + + +class PartitionProcessor(ABC): + def __init__(self, eventhub_name, consumer_group_name, partition_id, checkpoint_manager: CheckpointManager): + self.partition_id = partition_id + self.eventhub_name = eventhub_name + self.consumer_group_name = consumer_group_name + self.checkpoint_manager = checkpoint_manager + + async def close(self, reason): + """Called when EventProcessor stops processing this PartitionProcessor. + + """ + pass + + @abstractmethod + async def process_events(self, events: List[EventData]): + """Called when a batch of events have been received. + + """ + pass + + async def process_error(self, error): + """Called when the underlying event hub partition consumer experiences an non-retriable error during receiving. + + """ + pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py new file mode 100644 index 000000000000..7d7c5a7ff16a --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py @@ -0,0 +1,105 @@ +import asyncio +import time +import uuid +import sqlite3 +from .partition_manager import PartitionManager + + +class Sqlite3PartitionManager(PartitionManager): + def __init__(self, db_filename, ownership_table="ownership"): + super(Sqlite3PartitionManager, self).__init__() + self.ownership_table = ownership_table + conn = sqlite3.connect(db_filename) + c = conn.cursor() + try: + c.execute("create table " + ownership_table + + "(eventhub_name text," + "consumer_group_name text," + "instance_id text," + "partition_id text," + "owner_level integer," + "sequence_number integer," + "offset integer," + "last_modified_time integer," + "etag text)") + except sqlite3.OperationalError: + pass + finally: + c.close() + self.conn = conn + + def __del__(self): + self.conn.close() + + async def list_ownership(self, eventhub_name, consumer_group_name): + cursor = self.conn.cursor() + try: + cursor.execute("select " + "eventhub_name, " + "consumer_group_name," + "instance_id," + "partition_id," + "owner_level," + "sequence_number," + "offset," + "last_modified_time," + "etag " + "from "+self.ownership_table+" where eventhub_name=? " + "and consumer_group_name=?", + (eventhub_name, consumer_group_name)) + result_list = [] + for row in cursor.fetchall(): + d = dict() + d["eventhub_name"] = row[0] + d["consumer_group_name"] = row[1] + d["instance_id"] = row[2] + d["partition_id"] = row[3] + d["owner_level"] = row[4] + d["sequence_number"] = row[5] + d["offset"] = row[6] + d["last_modified_time"] = row[7] + d["etag"] = row[8] + result_list.append(d) + return result_list + finally: + cursor.close() + + async def claim_ownership(self, partitions): + cursor = self.conn.cursor() + try: + for p in partitions: + cursor.execute("select * from " + self.ownership_table + + " where eventhub_name=? " + "and consumer_group_name=? " + "and partition_id =?", + (p["eventhub_name"], p["consumer_group_name"], + p["partition_id"])) + if not cursor.fetchall(): + cursor.execute("insert into " + self.ownership_table + + " (eventhub_name,consumer_group_name,partition_id,instance_id,owner_level,last_modified_time,etag) " + "values (?,?,?,?,?,?,?)", + (p["eventhub_name"], p["consumer_group_name"], p["partition_id"], p["instance_id"], p["owner_level"], + time.time(), str(uuid.uuid4()) + )) + else: + cursor.execute("update "+self.ownership_table+" set instance_id=?, owner_level=?, last_modified_time=?, etag=? " + "where eventhub_name=? and consumer_group_name=? and partition_id=?", + (p["instance_id"], p["owner_level"], time.time(), str(uuid.uuid4()), + p["eventhub_name"], p["consumer_group_name"], p["partition_id"])) + self.conn.commit() + return partitions + finally: + cursor.close() + + async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, instance_id, + offset, sequence_number): + cursor = self.conn.cursor() + try: + cursor.execute("update "+self.ownership_table+" set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", + (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) + self.conn.commit() + finally: + cursor.close() + + async def close(self): + self.conn.close() From ee228b474a8bf5d7f643659da7b5f153981974de Mon Sep 17 00:00:00 2001 From: yijxie Date: Sun, 28 Jul 2019 22:15:27 -0700 Subject: [PATCH 10/42] new eph (draft2) --- .../azure-eventhubs/azure/eph/__init__.py | 2 ++ .../azure-eventhubs/azure/eph/close_reason.py | 8 +++++ .../azure/eph/event_processor.py | 31 ++++++++++++++----- .../azure/eph/sqlite3_partition_manager.py | 4 --- 4 files changed, 33 insertions(+), 12 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py index 4d8fbaf5b5d9..7a74bdf3724b 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py @@ -2,8 +2,10 @@ from .partition_processor import PartitionProcessor from .partition_manager import PartitionManager from .sqlite3_partition_manager import Sqlite3PartitionManager +from .close_reason import CloseReason __all__ = [ + 'CloseReason', 'EventProcessor', 'PartitionProcessor', 'PartitionManager', diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py b/sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py new file mode 100644 index 000000000000..177ccc81a4f3 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class CloseReason(Enum): + SHUTDOWN = 0 + LEASE_LOST = 1 + EXCEPTION = 2 + diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py index d270678d6575..aea0bc530bf7 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py @@ -1,17 +1,22 @@ from typing import Callable import uuid import asyncio +import logging + from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient from ._cancellation_token import CancellationToken from .checkpoint_manager import CheckpointManager from .partition_manager import PartitionManager from .partition_processor import PartitionProcessor +from .close_reason import CloseReason + +logger = logging.getLogger(__name__) class EventProcessor(object): def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, - partition_processor_callable: Callable[[str, str, str, CheckpointManager], PartitionProcessor], + partition_processor_callable: Callable[..., PartitionProcessor], partition_manager: PartitionManager, **kwargs): """ @@ -36,14 +41,25 @@ def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, self.partition_ids = None async def start(self): + """Start the EventProcessor. + + :param timeout: + """ + logger.info("EventProcessor %r is being started", self.instance_id) client = self.eventhub_client partition_ids = await client.get_partition_ids() self.partition_ids = partition_ids claimed_list = await self._claim_partitions() await self._start_claimed_partitions(claimed_list) + logger.info("EventProcessor %r is started", self.instance_id) async def stop(self): + """Stop all the partition consumer + + :param kwargs: + :return: + """ self.cancellation_token.cancel() await self.partition_manager.close() @@ -86,7 +102,6 @@ async def _start_claimed_partitions(self, claimed_partitions): checkpoint_manager=CheckpointManager(partition_id, self.eventhub_name, self.consumer_group_name, self.instance_id, self.partition_manager) ) - loop = asyncio.get_running_loop() task = loop.create_task( _receive(consumer, partition_processor, self.max_wait_time, self.cancellation_token)) @@ -103,10 +118,10 @@ async def _receive(partition_consumer, partition_processor, max_wait_time, cance events = await partition_consumer.receive(timeout=max_wait_time) await partition_processor.process_events(events) else: - await partition_processor.close(reason="Cancelled") - await partition_consumer.close() - except EventHubError as eh_err: + await partition_processor.close(reason=CloseReason.SHUTDOWN) + except Exception as exception: + # TODO: separate exception lease stolen + await partition_processor.close(reason=CloseReason.EXCEPTION) + finally: await partition_consumer.close() - await partition_processor.close(reason=eh_err) - except Exception as err: - await partition_processor.process_error(err) + # TODO: try to inform other EventProcessors to take the partition? diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py index 7d7c5a7ff16a..598508a39ba6 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py @@ -1,4 +1,3 @@ -import asyncio import time import uuid import sqlite3 @@ -28,9 +27,6 @@ def __init__(self, db_filename, ownership_table="ownership"): c.close() self.conn = conn - def __del__(self): - self.conn.close() - async def list_ownership(self, eventhub_name, consumer_group_name): cursor = self.conn.cursor() try: From 1d6571954aacfd8829f395964dc6d5ec71be7ed6 Mon Sep 17 00:00:00 2001 From: yijxie Date: Sun, 28 Jul 2019 22:15:54 -0700 Subject: [PATCH 11/42] remove in memory partition manager --- .../azure/eph/in_memory_checkpoint_store.py | 37 ------------------- 1 file changed, 37 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py b/sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py deleted file mode 100644 index 5bfcf8166b84..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eph/in_memory_checkpoint_store.py +++ /dev/null @@ -1,37 +0,0 @@ -import asyncio -import time -import uuid -from .partition_manager import PartitionManager - - -class InMemoryPartitionManager(PartitionManager): - def __init__(self): - self.lock = asyncio.Lock() - self.store = dict() - - async def list_ownership(self, eventhub_name, consumer_group_name): - return self.store.values() - - async def claim_ownership(self, partitions): - for partition in partitions: - partition_id = partition["partition_id"] - if partition_id not in self.store: - self.store[partition_id] = partition - partition["last_modified_time"] = time.time() - partition["ETag"] = uuid.uuid4() - return partitions - - async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, instance_id, - offset, sequence_number): - checkpoint = self.store.get(partition_id) - if not checkpoint: - checkpoint = dict() - self.store[partition_id] = checkpoint - checkpoint["eventhub_name"] = eventhub_name - checkpoint["consumer_group_name"] = consumer_group_name - checkpoint["instance_id"] = instance_id - checkpoint["partition_id"] = partition_id - checkpoint["offset"] = offset - checkpoint["sequence_number"] = sequence_number - - print("checkpoint saved: ", checkpoint) From 4895385f68fa6c34fbfeddca20cf89011ce4985b Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 29 Jul 2019 18:57:26 -0700 Subject: [PATCH 12/42] EventProcessor draft 3 --- .../azure/eph/_consumer_worker.py | 2 - .../azure/eph/checkpoint_manager.py | 19 -- .../azure-eventhubs/azure/eph/close_reason.py | 8 - .../azure/eph/event_processor.py | 127 ------------- .../azure/eph/partition_processor.py | 32 ---- .../eventprocessor}/__init__.py | 5 + .../eventprocessor}/_cancellation_token.py | 0 .../eventprocessor/checkpoint_manager.py | 31 ++++ .../eventhub/eventprocessor/close_reason.py | 13 ++ .../eventprocessor/event_processor.py | 170 ++++++++++++++++++ .../eventprocessor}/partition_manager.py | 9 +- .../eventprocessor/partition_processor.py | 34 ++++ .../sqlite3_partition_manager.py | 16 ++ 13 files changed, 275 insertions(+), 191 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py rename sdk/eventhub/azure-eventhubs/azure/{eph => eventhub/eventprocessor}/__init__.py (53%) rename sdk/eventhub/azure-eventhubs/azure/{eph => eventhub/eventprocessor}/_cancellation_token.py (100%) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py rename sdk/eventhub/azure-eventhubs/azure/{eph => eventhub/eventprocessor}/partition_manager.py (69%) create mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py rename sdk/eventhub/azure-eventhubs/azure/{eph => eventhub/eventprocessor}/sqlite3_partition_manager.py (86%) diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py b/sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py deleted file mode 100644 index 64c65eb6d692..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eph/_consumer_worker.py +++ /dev/null @@ -1,2 +0,0 @@ -class ConsumerWorker(object): - pass \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py deleted file mode 100644 index 000e0b4bf11e..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eph/checkpoint_manager.py +++ /dev/null @@ -1,19 +0,0 @@ -from .partition_manager import PartitionManager - - -class CheckpointManager(object): - """Users use checkpoint manager to update checkpoint。 - - """ - def __init__(self, partition_id, eventhub_name, consumer_group_name, instance_id, partition_manager: PartitionManager): - self.partition_id = partition_id - self.eventhub_name = eventhub_name - self.consumer_group_name = consumer_group_name - self.instance_id = instance_id - self.partition_manager = partition_manager - - async def update_checkpoint(self, - offset, sequence_number): - await self.partition_manager.\ - update_checkpoint(self.eventhub_name, self.consumer_group_name, self.partition_id, self.instance_id, - offset, sequence_number) diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py b/sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py deleted file mode 100644 index 177ccc81a4f3..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eph/close_reason.py +++ /dev/null @@ -1,8 +0,0 @@ -from enum import Enum - - -class CloseReason(Enum): - SHUTDOWN = 0 - LEASE_LOST = 1 - EXCEPTION = 2 - diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py deleted file mode 100644 index aea0bc530bf7..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eph/event_processor.py +++ /dev/null @@ -1,127 +0,0 @@ -from typing import Callable -import uuid -import asyncio -import logging - -from azure.eventhub import EventPosition, EventHubError -from azure.eventhub.aio import EventHubClient -from ._cancellation_token import CancellationToken -from .checkpoint_manager import CheckpointManager -from .partition_manager import PartitionManager -from .partition_processor import PartitionProcessor -from .close_reason import CloseReason - -logger = logging.getLogger(__name__) - - -class EventProcessor(object): - def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, - partition_processor_callable: Callable[..., PartitionProcessor], - partition_manager: PartitionManager, **kwargs): - """ - - :param consumer_group_name: - :param eventhub_client: - :param partition_processor_callable: - :param partition_manager: - :param initial_event_position: - :param max_batch_size: - """ - self.consumer_group_name = consumer_group_name - self.eventhub_client = eventhub_client - self.eventhub_name = eventhub_client.eh_name - self.partition_processor_callable = partition_processor_callable - self.partition_manager = partition_manager - self.initial_event_position = kwargs.get("initial_event_position", "-1") - self.max_batch_size = kwargs.get("max_batch_size", 300) - self.max_wait_time = kwargs.get("max_wait_time") - self.tasks = [] - self.cancellation_token = CancellationToken() - self.instance_id = str(uuid.uuid4()) - self.partition_ids = None - - async def start(self): - """Start the EventProcessor. - - :param timeout: - """ - logger.info("EventProcessor %r is being started", self.instance_id) - client = self.eventhub_client - partition_ids = await client.get_partition_ids() - self.partition_ids = partition_ids - - claimed_list = await self._claim_partitions() - await self._start_claimed_partitions(claimed_list) - logger.info("EventProcessor %r is started", self.instance_id) - - async def stop(self): - """Stop all the partition consumer - - :param kwargs: - :return: - """ - self.cancellation_token.cancel() - await self.partition_manager.close() - - async def _claim_partitions(self): - partitions_ownership = await self.partition_manager.list_ownership(self.eventhub_name, self.consumer_group_name) - partitions_ownership_dict = dict() - for ownership in partitions_ownership: - partitions_ownership_dict[ownership["partition_id"]] = ownership - - to_claim_list = [] - for pid in self.partition_ids: - p_ownership = partitions_ownership_dict.get(pid) - if p_ownership: - to_claim_list.append(p_ownership) - else: - new_ownership = dict() - new_ownership["eventhub_name"] = self.eventhub_name - new_ownership["consumer_group_name"] = self.consumer_group_name - new_ownership["instance_id"] = self.instance_id - new_ownership["partition_id"] = pid - new_ownership["owner_level"] = 1 # will increment in preview 3 - to_claim_list.append(new_ownership) - claimed_list = await self.partition_manager.claim_ownership(to_claim_list) - return claimed_list - - async def _start_claimed_partitions(self, claimed_partitions): - consumers = [] - for partition in claimed_partitions: - partition_id = partition["partition_id"] - offset = partition.get("offset") - offset = offset or self.initial_event_position - consumer = self.eventhub_client.create_consumer(self.consumer_group_name, partition_id, - EventPosition(str(offset))) - consumers.append(consumer) - - partition_processor = self.partition_processor_callable( - eventhub_name=self.eventhub_name, - consumer_group_name=self.consumer_group_name, - partition_id=partition_id, - checkpoint_manager=CheckpointManager(partition_id, self.eventhub_name, self.consumer_group_name, - self.instance_id, self.partition_manager) - ) - loop = asyncio.get_running_loop() - task = loop.create_task( - _receive(consumer, partition_processor, self.max_wait_time, self.cancellation_token)) - self.tasks.append(task) - - await asyncio.gather(*self.tasks) - await asyncio.gather(*[consumer.close() for consumer in consumers]) - - -async def _receive(partition_consumer, partition_processor, max_wait_time, cancellation_token): - try: - async with partition_consumer: - while not cancellation_token.is_cancelled: - events = await partition_consumer.receive(timeout=max_wait_time) - await partition_processor.process_events(events) - else: - await partition_processor.close(reason=CloseReason.SHUTDOWN) - except Exception as exception: - # TODO: separate exception lease stolen - await partition_processor.close(reason=CloseReason.EXCEPTION) - finally: - await partition_consumer.close() - # TODO: try to inform other EventProcessors to take the partition? diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py deleted file mode 100644 index e76b450412e1..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eph/partition_processor.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import List -from abc import ABC, abstractmethod -from .checkpoint_manager import CheckpointManager - -from azure.eventhub import EventData - - -class PartitionProcessor(ABC): - def __init__(self, eventhub_name, consumer_group_name, partition_id, checkpoint_manager: CheckpointManager): - self.partition_id = partition_id - self.eventhub_name = eventhub_name - self.consumer_group_name = consumer_group_name - self.checkpoint_manager = checkpoint_manager - - async def close(self, reason): - """Called when EventProcessor stops processing this PartitionProcessor. - - """ - pass - - @abstractmethod - async def process_events(self, events: List[EventData]): - """Called when a batch of events have been received. - - """ - pass - - async def process_error(self, error): - """Called when the underlying event hub partition consumer experiences an non-retriable error during receiving. - - """ - pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py similarity index 53% rename from sdk/eventhub/azure-eventhubs/azure/eph/__init__.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py index 7a74bdf3724b..ae82c6d3c09e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eph/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py @@ -1,3 +1,8 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + from .event_processor import EventProcessor from .partition_processor import PartitionProcessor from .partition_manager import PartitionManager diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/_cancellation_token.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_cancellation_token.py similarity index 100% rename from sdk/eventhub/azure-eventhubs/azure/eph/_cancellation_token.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_cancellation_token.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py new file mode 100644 index 000000000000..58342c303b76 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -0,0 +1,31 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + + +from .partition_manager import PartitionManager + + +class CheckpointManager(object): + """Every PartitionProcessor has a CheckpointManager to save the partition's checkpoint. + + """ + def __init__(self, partition_id, eventhub_name, consumer_group_name, instance_id, partition_manager: PartitionManager): + self._partition_id = partition_id + self._eventhub_name = eventhub_name + self._consumer_group_name = consumer_group_name + self._instance_id = instance_id + self._partition_manager = partition_manager + + async def update_checkpoint(self, + offset, sequence_number): + """Users call this method in PartitionProcessor.process_events() to save checkpoints + + :param offset: offset of the processed EventData + :param sequence_number: sequence_number of the processed EventData + :return: None + """ + await self._partition_manager.\ + update_checkpoint(self._eventhub_name, self._consumer_group_name, self._partition_id, self._instance_id, + offset, sequence_number) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py new file mode 100644 index 000000000000..dc308141a023 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py @@ -0,0 +1,13 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from enum import Enum + + +class CloseReason(Enum): + SHUTDOWN = 0 # user call EventProcessor.stop() + LEASE_LOST = 1 # lose the ownership of a partition. + EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events + USER_EXCEPTION = 3 # user's code in EventProcessor.process_events() raises an exception diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py new file mode 100644 index 000000000000..b4776e6ad00b --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -0,0 +1,170 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from typing import Callable +import uuid +import asyncio +import logging + +from azure.eventhub import EventPosition, EventHubError +from azure.eventhub.aio import EventHubClient +from ._cancellation_token import CancellationToken +from .checkpoint_manager import CheckpointManager +from .partition_manager import PartitionManager +from .partition_processor import PartitionProcessor +from .close_reason import CloseReason + +log = logging.getLogger(__name__) + + +class EventProcessor(object): + def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, + partition_processor_callable: Callable[..., PartitionProcessor], + partition_manager: PartitionManager, **kwargs): + """An EventProcessor automatically creates and runs consumers for all partitions of the eventhub. + + It provides the user a convenient way to receive events from multiple partitions and save checkpoints. + If multiple EventProcessors are running for an event hub, they will automatically balance loading. This feature + won't be availabe until preview 3. + + :param consumer_group_name: the consumer group that is used to receive events + from the event hub that the eventhub_client is going to receive events from + :param eventhub_client: an instance of azure.eventhub.aio.EventClient object + :param partition_processor_callable: a callable that is called to return a PartitionProcessor + :param partition_manager: an instance of a PartitionManager implementation + :param initial_event_position: the offset to start a partition consumer if the partition has no checkpoint yet + """ + self._consumer_group_name = consumer_group_name + self._eventhub_client = eventhub_client + self._eventhub_name = eventhub_client.eh_name + self._partition_processor_callable = partition_processor_callable + self._partition_manager = partition_manager + self._initial_event_position = kwargs.get("initial_event_position", "-1") + self._max_batch_size = eventhub_client.config.max_batch_size + self._receive_timeout = eventhub_client.config.receive_timeout + self._tasks = [] + self._cancellation_token = CancellationToken() + self._instance_id = str(uuid.uuid4()) + self._partition_ids = None + + async def start(self): + """Start the EventProcessor. + 1. retrieve the partition ids from eventhubs + 2. claim partition ownership of these partitions. + 3. repeatedly call EvenHubConsumer.receive() to retrieve events and + call user defined PartitionProcessor.process_events() + """ + log.info("EventProcessor %r is being started", self._instance_id) + client = self._eventhub_client + partition_ids = await client.get_partition_ids() + self.partition_ids = partition_ids + + claimed_list = await self._claim_partitions() + await self._start_claimed_partitions(claimed_list) + log.info("EventProcessor %r is started", self._instance_id) + + async def stop(self): + """Stop all the partition consumer + + It sends out a cancellation token to stop all partitions' EventHubConsumer will stop receiving events. + + """ + self._cancellation_token.cancel() + # It's not agreed whether a partition manager has method close(). + await self._partition_manager.close() + + async def _claim_partitions(self): + partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) + partitions_ownership_dict = dict() + for ownership in partitions_ownership: + partitions_ownership_dict[ownership["partition_id"]] = ownership + + to_claim_list = [] + for pid in self.partition_ids: + p_ownership = partitions_ownership_dict.get(pid) + if p_ownership: + to_claim_list.append(p_ownership) + else: + new_ownership = dict() + new_ownership["eventhub_name"] = self._eventhub_name + new_ownership["consumer_group_name"] = self._consumer_group_name + new_ownership["instance_id"] = self._instance_id + new_ownership["partition_id"] = pid + new_ownership["owner_level"] = 1 # will increment in preview 3 + to_claim_list.append(new_ownership) + claimed_list = await self._partition_manager.claim_ownership(to_claim_list) + return claimed_list + + async def _start_claimed_partitions(self, claimed_partitions): + consumers = [] + for partition in claimed_partitions: + partition_id = partition["partition_id"] + offset = partition.get("offset") + offset = offset or self._initial_event_position + consumer = self._eventhub_client.create_consumer(self._consumer_group_name, partition_id, + EventPosition(str(offset))) + consumers.append(consumer) + + partition_processor = self._partition_processor_callable( + eventhub_name=self._eventhub_name, + consumer_group_name=self._consumer_group_name, + partition_id=partition_id, + checkpoint_manager=CheckpointManager(partition_id, self._eventhub_name, self._consumer_group_name, + self._instance_id, self._partition_manager) + ) + loop = asyncio.get_running_loop() + task = loop.create_task( + _receive(consumer, partition_processor, self._receive_timeout, self._cancellation_token)) + self._tasks.append(task) + + await asyncio.gather(*self._tasks) + + +async def _receive(partition_consumer, partition_processor, receive_timeout, cancellation_token): + async with partition_consumer: + while not cancellation_token.is_cancelled: + try: + events = await partition_consumer.receive(timeout=receive_timeout) + except EventHubError as eh_err: + if eh_err.error == "link:stolen": + reason = CloseReason.LEASE_LOST + else: + reason = CloseReason.EVENTHUB_EXCEPTION + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "has met an exception receiving events. It's being closed. The exception is %r.", + partition_processor._checkpoint_manager._instance_id, + partition_processor._eventhub_name, + partition_processor._partition_id, + partition_processor._consumer_group_name, + eh_err + ) + await partition_processor.close(reason=reason) + break + try: + await partition_processor.process_events(events) + except Exception as exp: # user code has caused an error + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "has met an exception from user code process_events. It's being closed. The exception is %r.", + partition_processor.checkpoint_manager.instance_id, + partition_processor.eventhub_name, + partition_processor.partition_id, + partition_processor.consumer_group_name, + exp + ) + await partition_processor.close(reason=CloseReason.USER_EXCEPTION) + break + else: + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "has been shutdown", + partition_processor.checkpoint_manager.instance_id, + partition_processor.eventhub_name, + partition_processor.partition_id, + partition_processor.consumer_group_name + ) + await partition_processor.close(reason=CloseReason.SHUTDOWN) + # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py similarity index 69% rename from sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py index 329cf0d3d4ca..b60f5e716869 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eph/partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py @@ -1,11 +1,15 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + from typing import Iterable, Dict, Any from abc import ABC, abstractmethod class PartitionManager(ABC): - """Subclass this class to implement the read/write access to storage service. + """Subclass PartitionManager to implement the read/write access to storage service to list/claim ownership and save checkpoint. - Users may do their own subclass for checkpoint storage. """ @abstractmethod @@ -36,6 +40,5 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ offset, sequence_number) -> None: pass - @abstractmethod async def close(self): pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py new file mode 100644 index 000000000000..ba017d36751b --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -0,0 +1,34 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from typing import List +from abc import ABC, abstractmethod +from .checkpoint_manager import CheckpointManager + +from azure.eventhub import EventData + + +class PartitionProcessor(ABC): + def __init__(self, eventhub_name, consumer_group_name, partition_id, checkpoint_manager: CheckpointManager): + self._partition_id = partition_id + self._eventhub_name = eventhub_name + self._consumer_group_name = consumer_group_name + self._checkpoint_manager = checkpoint_manager + + async def close(self, reason): + """Called when EventProcessor stops processing this PartitionProcessor. + + There are four different reasons to trigger the PartitionProcessor to close. + Refer to enum class CloseReason of close_reason.py + + """ + pass + + @abstractmethod + async def process_events(self, events: List[EventData]): + """Called when a batch of events have been received. + + """ + pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py similarity index 86% rename from sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 598508a39ba6..3030f972f6c8 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eph/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -1,3 +1,8 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + import time import uuid import sqlite3 @@ -5,7 +10,18 @@ class Sqlite3PartitionManager(PartitionManager): + """An implementation of PartitionManager by using the sqlite3 in Python standard library. + Sqlite3 is a mini sql database that runs in memory or files. + + + """ def __init__(self, db_filename, ownership_table="ownership"): + """ + + :param db_filename: name of file that saves the sql data. + Sqlite3 will run in memory without a file when db_filename is ":memory:". + :param ownership_table: The table name of the sqlite3 database. + """ super(Sqlite3PartitionManager, self).__init__() self.ownership_table = ownership_table conn = sqlite3.connect(db_filename) From 6415ac2f1e9aafd20950b0e5cbd07ff3b39ed644 Mon Sep 17 00:00:00 2001 From: yijxie Date: Mon, 29 Jul 2019 19:02:31 -0700 Subject: [PATCH 13/42] small format change --- .../azure/eventhub/eventprocessor/checkpoint_manager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py index 58342c303b76..a381400074a8 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -18,8 +18,7 @@ def __init__(self, partition_id, eventhub_name, consumer_group_name, instance_id self._instance_id = instance_id self._partition_manager = partition_manager - async def update_checkpoint(self, - offset, sequence_number): + async def update_checkpoint(self, offset, sequence_number): """Users call this method in PartitionProcessor.process_events() to save checkpoints :param offset: offset of the processed EventData From a685f87b158c64f49fada73571611809dbbbcfed Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 10:40:03 -0700 Subject: [PATCH 14/42] Fix logging --- .../eventprocessor/event_processor.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index b4776e6ad00b..334def230a3f 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -60,10 +60,8 @@ async def start(self): client = self._eventhub_client partition_ids = await client.get_partition_ids() self.partition_ids = partition_ids - claimed_list = await self._claim_partitions() await self._start_claimed_partitions(claimed_list) - log.info("EventProcessor %r is started", self._instance_id) async def stop(self): """Stop all the partition consumer @@ -73,7 +71,7 @@ async def stop(self): """ self._cancellation_token.cancel() # It's not agreed whether a partition manager has method close(). - await self._partition_manager.close() + log.info("EventProcessor %r cancellation token has been sent", self._instance_id) async def _claim_partitions(self): partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) @@ -120,6 +118,9 @@ async def _start_claimed_partitions(self, claimed_partitions): self._tasks.append(task) await asyncio.gather(*self._tasks) + await self._partition_manager.close() + log.info("EventProcessor %r partition manager is closed", self._instance_id) + log.info("EventProcessor %r partition has stopped", self._instance_id) async def _receive(partition_consumer, partition_processor, receive_timeout, cancellation_token): @@ -149,22 +150,22 @@ async def _receive(partition_consumer, partition_processor, receive_timeout, can log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception from user code process_events. It's being closed. The exception is %r.", - partition_processor.checkpoint_manager.instance_id, - partition_processor.eventhub_name, - partition_processor.partition_id, - partition_processor.consumer_group_name, + partition_processor._checkpoint_manager._instance_id, + partition_processor._eventhub_name, + partition_processor._partition_id, + partition_processor._consumer_group_name, exp ) await partition_processor.close(reason=CloseReason.USER_EXCEPTION) break else: + await partition_processor.close(reason=CloseReason.SHUTDOWN) log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has been shutdown", - partition_processor.checkpoint_manager.instance_id, - partition_processor.eventhub_name, - partition_processor.partition_id, - partition_processor.consumer_group_name + partition_processor._checkpoint_manager._instance_id, + partition_processor._eventhub_name, + partition_processor._partition_id, + partition_processor._consumer_group_name ) - await partition_processor.close(reason=CloseReason.SHUTDOWN) # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? From 6388f4a13c3b0ebdb1697e2a81a3a4e0f7582bda Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 10:40:22 -0700 Subject: [PATCH 15/42] Add EventProcessor example --- .../eventprocessor/event_processor_example.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py new file mode 100644 index 000000000000..ae65515ea56e --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py @@ -0,0 +1,37 @@ +import asyncio +import logging +import os +from azure.eventhub.aio import EventHubClient +from azure.eventhub.eventprocessor import EventProcessor +from azure.eventhub.eventprocessor import PartitionProcessor +from azure.eventhub.eventprocessor import Sqlite3PartitionManager + +logging.basicConfig(level=logging.INFO) + + +# Create you own PartitionProcessor +class MyPartitionProcessor(PartitionProcessor): + async def process_events(self, events): + print("PartitionProcessor for eventhub:{}, consumer group:{}, partition id:{}, number of events processed:{}". + format(self._eventhub_name, self._consumer_group_name, self._partition_id, len(events))) + if events: + await self._checkpoint_manager.update_checkpoint(events[-1].offset, events[-1].sequence_number) + + +CONNECTION_STR = os.environ["EVENT_HUB_CONN_STR"] + + +async def stop_after_awhile(event_processor, duration): + await asyncio.sleep(duration) + await event_processor.stop() + + +async def main(): + client = EventHubClient.from_connection_string(CONNECTION_STR) + partition_manager = Sqlite3PartitionManager(db_filename=":memory:") + event_processor = EventProcessor("$default", client, MyPartitionProcessor, partition_manager) + await asyncio.gather(event_processor.start(), stop_after_awhile(event_processor, 100)) + +if __name__ == '__main__': + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) From 6394f19f042f3add1165081421f02e1badec6c0e Mon Sep 17 00:00:00 2001 From: Yunhao Ling <47871814+yunhaoling@users.noreply.github.com> Date: Mon, 29 Jul 2019 21:21:06 -0700 Subject: [PATCH 16/42] use decorator to implement retry logic and update some tests (#6544) --- .../eventhub/_consumer_producer_mixin.py | 22 +++++- .../aio/_consumer_producer_mixin_async.py | 22 +++++- .../azure/eventhub/aio/consumer_async.py | 63 +++++++++++++++- .../azure/eventhub/aio/producer_async.py | 40 +++++++++-- .../azure/eventhub/consumer.py | 72 +++++++++++++++++-- .../azure/eventhub/producer.py | 43 +++++++++-- .../tests/asynctests/test_negative_async.py | 12 ++++ .../tests/asynctests/test_receive_async.py | 6 +- .../tests/asynctests/test_send_async.py | 18 +++++ .../azure-eventhubs/tests/test_negative.py | 9 +++ .../azure-eventhubs/tests/test_send.py | 17 +++++ 11 files changed, 299 insertions(+), 25 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 95f6e908c404..f53736feeb03 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -7,12 +7,30 @@ import logging import time -from uamqp import errors +from uamqp import errors, constants from azure.eventhub.error import EventHubError, _handle_exception log = logging.getLogger(__name__) +def _retry_decorator(to_be_wrapped_func): + def wrapped_func(*args, **kwargs): + timeout = kwargs.get("timeout", None) + if not timeout: + timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout + timeout_time = time.time() + timeout + max_retries = args[0].client.config.max_retries + retry_count = 0 + last_exception = None + while True: + try: + return to_be_wrapped_func(args[0], timeout_time=timeout_time, last_exception=last_exception, **kwargs) + except Exception as exception: + last_exception = args[0]._handle_exception(exception, retry_count, max_retries, timeout_time) + retry_count += 1 + return wrapped_func + + class ConsumerProducerMixin(object): def __init__(self): self.client = None @@ -61,6 +79,8 @@ def _open(self, timeout_time=None): if timeout_time and time.time() >= timeout_time: return time.sleep(0.05) + self._max_message_size_on_link = self._handler.message_handler._link.peer_max_message_size \ + or constants.MAX_MESSAGE_LENGTH_BYTES # pylint: disable=protected-access self.running = True def _close_handler(self): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index e6b35ad41ae4..648138c93d73 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -6,13 +6,31 @@ import logging import time -from uamqp import errors +from uamqp import errors, constants from azure.eventhub.error import EventHubError, ConnectError from ..aio.error_async import _handle_exception log = logging.getLogger(__name__) +def _retry_decorator(to_be_wrapped_func): + async def wrapped_func(*args, **kwargs): + timeout = kwargs.get("timeout", None) + if not timeout: + timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout + timeout_time = time.time() + timeout + max_retries = args[0].client.config.max_retries + retry_count = 0 + last_exception = None + while True: + try: + return await to_be_wrapped_func(args[0], timeout_time=timeout_time, last_exception=last_exception, **kwargs) + except Exception as exception: + last_exception = await args[0]._handle_exception(exception, retry_count, max_retries, timeout_time) + retry_count += 1 + return wrapped_func + + class ConsumerProducerMixin(object): def __init__(self): @@ -62,6 +80,8 @@ async def _open(self, timeout_time=None): if timeout_time and time.time() >= timeout_time: return await asyncio.sleep(0.05) + self._max_message_size_on_link = self._handler.message_handler._link.peer_max_message_size \ + or constants.MAX_MESSAGE_LENGTH_BYTES # pylint: disable=protected-access self.running = True async def _close_handler(self): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index dac2d0c0fa61..fdf029e37764 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -14,7 +14,7 @@ from azure.eventhub import EventData, EventPosition from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, _error_handler from ..aio.error_async import _handle_exception -from ._consumer_producer_mixin_async import ConsumerProducerMixin +from ._consumer_producer_mixin_async import ConsumerProducerMixin, _retry_decorator log = logging.getLogger(__name__) @@ -159,11 +159,72 @@ def queue_size(self): return self._handler._received_messages.qsize() return 0 + @_retry_decorator + async def _receive(self, **kwargs): + timeout_time = kwargs.get("timeout_time") + last_exception = kwargs.get("last_exception") + max_batch_size = kwargs.get("max_batch_size") + data_batch = kwargs.get("data_batch") + + await self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + log.info("%r receive operation timed out. (%r)", self.name, last_exception) + raise last_exception + return data_batch + + remaining_time_ms = 1000 * remaining_time + message_batch = await self._handler.receive_message_batch_async( + max_batch_size=max_batch_size, + timeout=remaining_time_ms) + for message in message_batch: + event_data = EventData(message=message) + self.offset = EventPosition(event_data.offset) + data_batch.append(event_data) + return data_batch + async def receive(self, **kwargs): # type: (int, float) -> List[EventData] """ Receive events asynchronously from the EventHub. + :param max_batch_size: Receive a batch of events. Batch size will + be up to the maximum specified, but will return as soon as service + returns no new events. If combined with a timeout and no events are + retrieve before the time, the result will be empty. If no batch + size is supplied, the prefetch size will be the maximum. + :type max_batch_size: int + :param timeout: The maximum wait time to build up the requested message count for the batch. + If not specified, the default wait time specified when the consumer was created will be used. + :type timeout: float + :rtype: list[~azure.eventhub.common.EventData] + :raises: ~azure.eventhub.AuthenticationError, ~azure.eventhub.ConnectError, ~azure.eventhub.ConnectionLostError, + ~azure.eventhub.EventHubError + + Example: + .. literalinclude:: ../examples/async_examples/test_examples_eventhub_async.py + :start-after: [START eventhub_client_async_receive] + :end-before: [END eventhub_client_async_receive] + :language: python + :dedent: 4 + :caption: Receives events asynchronously + + """ + self._check_closed() + + max_batch_size = kwargs.get("max_batch_size", None) + timeout = kwargs.get("timeout", None) or self.client.config.receive_timeout + max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size + data_batch = [] # type: List[EventData] + + return await self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) + + async def _legacy_receive(self, **kwargs): + # type: (int, float) -> List[EventData] + """ + Receive events asynchronously from the EventHub. + :param max_batch_size: Receive a batch of events. Batch size will be up to the maximum specified, but will return as soon as service returns no new events. If combined with a timeout and no events are diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index e326aef0a115..0dae1734419a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -14,7 +14,7 @@ from azure.eventhub.common import EventData, EventDataBatch from azure.eventhub.error import _error_handler, OperationTimeoutError, EventDataError from ..producer import _error, _set_partition_key -from ._consumer_producer_mixin_async import ConsumerProducerMixin +from ._consumer_producer_mixin_async import ConsumerProducerMixin, _retry_decorator log = logging.getLogger(__name__) @@ -98,7 +98,7 @@ def _create_handler(self): self.client.config.user_agent), # pylint: disable=protected-access loop=self.loop) - async def _open(self, timeout_time=None): + async def _open(self, timeout_time=None, **kwargs): """ Open the EventHubProducer using the supplied connection. If the handler has previously been redirected, the redirect @@ -110,7 +110,32 @@ async def _open(self, timeout_time=None): self.target = self.redirected.address await super(EventHubProducer, self)._open(timeout_time) - async def _send_event_data(self, timeout=None): + @_retry_decorator + async def _send_event_data(self, **kwargs): + timeout_time = kwargs.get("timeout_time") + last_exception = kwargs.get("last_exception") + + if self.unsent_events: + await self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + error = last_exception + else: + error = OperationTimeoutError("send operation timed out") + log.info("%r send operation timed out. (%r)", self.name, error) + raise error + self._handler._msg_timeout = remaining_time # pylint: disable=protected-access + self._handler.queue_message(*self.unsent_events) + await self._handler.wait_async() + self.unsent_events = self._handler.pending_messages + if self._outcome != constants.MessageSendResult.Ok: + if self._outcome == constants.MessageSendResult.Timeout: + self._condition = OperationTimeoutError("send operation timed out") + _error(self._outcome, self._condition) + return + + async def _legacy_send_event_data(self, timeout=None): timeout = timeout or self.client.config.send_timeout if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout @@ -170,14 +195,19 @@ async def create_batch(self, **kwargs): """ max_size = kwargs.get("max_size", None) partition_key = kwargs.get("partition_key", None) + + @_retry_decorator + async def wrapped_open(*args, **kwargs): + await self._open(**kwargs) + if not self._max_message_size_on_link: - await self._open() + await wrapped_open(self, timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' .format(max_size, self._max_message_size_on_link)) - return EventDataBatch(max_size or self._max_message_size_on_link, partition_key) + return EventDataBatch(max_size=(max_size or self._max_message_size_on_link), partition_key=partition_key) async def send(self, event_data, **kwargs): # type:(Union[EventData, EventDataBatch, Iterable[EventData]], Union[str, bytes]) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index e59c440c7c88..1139a8b725d4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -14,7 +14,7 @@ from azure.eventhub.common import EventData, EventPosition from azure.eventhub.error import _error_handler, EventHubError -from ._consumer_producer_mixin import ConsumerProducerMixin +from ._consumer_producer_mixin import ConsumerProducerMixin, _retry_decorator log = logging.getLogger(__name__) @@ -152,7 +152,65 @@ def queue_size(self): return self._handler._received_messages.qsize() return 0 + @_retry_decorator + def _receive(self, **kwargs): + timeout_time = kwargs.get("timeout_time") + last_exception = kwargs.get("last_exception") + max_batch_size = kwargs.get("max_batch_size") + data_batch = kwargs.get("data_batch") + + self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + log.info("%r receive operation timed out. (%r)", self.name, last_exception) + raise last_exception + return data_batch + remaining_time_ms = 1000 * remaining_time + message_batch = self._handler.receive_message_batch( + max_batch_size=max_batch_size - (len(data_batch) if data_batch else 0), + timeout=remaining_time_ms) + for message in message_batch: + event_data = EventData(message=message) + self.offset = EventPosition(event_data.offset) + data_batch.append(event_data) + return data_batch + def receive(self, **kwargs): + """ + Receive events from the EventHub. + + :param max_batch_size: Receive a batch of events. Batch size will + be up to the maximum specified, but will return as soon as service + returns no new events. If combined with a timeout and no events are + retrieve before the time, the result will be empty. If no batch + size is supplied, the prefetch size will be the maximum. + :type max_batch_size: int + :param timeout: The maximum wait time to build up the requested message count for the batch. + If not specified, the default wait time specified when the consumer was created will be used. + :type timeout: float + :rtype: list[~azure.eventhub.common.EventData] + :raises: ~azure.eventhub.AuthenticationError, ~azure.eventhub.ConnectError, ~azure.eventhub.ConnectionLostError, + ~azure.eventhub.EventHubError + Example: + .. literalinclude:: ../examples/test_examples_eventhub.py + :start-after: [START eventhub_client_sync_receive] + :end-before: [END eventhub_client_sync_receive] + :language: python + :dedent: 4 + :caption: Receive events from the EventHub. + + """ + self._check_closed() + + max_batch_size = kwargs.get("max_batch_size", None) + timeout = kwargs.get("timeout", None) or self.client.config.receive_timeout + max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size + data_batch = [] # type: List[EventData] + + return self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) + + def _legacy_receive(self, **kwargs): # type:(int, float) -> List[EventData] """ Receive events from the EventHub. @@ -182,17 +240,19 @@ def receive(self, **kwargs): timeout = kwargs.get("timeout", None) self._check_closed() + max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size + data_batch = [] # type: List[EventData] + timeout = self.client.config.receive_timeout if timeout is None else timeout if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout - - data_batch = [] # type: List[EventData] - start_time = time.time() - timeout_time = start_time + timeout + timeout_time = time.time() + timeout max_retries = self.client.config.max_retries retry_count = 0 last_exception = None + + self._receive() while True: try: self._open(timeout_time) @@ -211,8 +271,6 @@ def receive(self, **kwargs): self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch - except EventHubError: - raise except Exception as exception: last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index da2a9ee95368..01c3c28fe374 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -15,7 +15,7 @@ from azure.eventhub.common import EventData, EventDataBatch from azure.eventhub.error import _error_handler, OperationTimeoutError, EventDataError -from ._consumer_producer_mixin import ConsumerProducerMixin +from ._consumer_producer_mixin import ConsumerProducerMixin, _retry_decorator log = logging.getLogger(__name__) @@ -105,7 +105,7 @@ def _create_handler(self): link_properties=self._link_properties, properties=self.client._create_properties(self.client.config.user_agent)) # pylint: disable=protected-access - def _open(self, timeout_time=None): + def _open(self, timeout_time=None, **kwargs): """ Open the EventHubProducer using the supplied connection. If the handler has previously been redirected, the redirect @@ -118,12 +118,36 @@ def _open(self, timeout_time=None): self.target = self.redirected.address super(EventHubProducer, self)._open(timeout_time) - def _send_event_data(self, timeout=None): + @_retry_decorator + def _send_event_data(self, **kwargs): + timeout_time = kwargs.get("timeout_time") + last_exception = kwargs.get("last_exception") + + if self.unsent_events: + self._open(timeout_time) + remaining_time = timeout_time - time.time() + if remaining_time <= 0.0: + if last_exception: + error = last_exception + else: + error = OperationTimeoutError("send operation timed out") + log.info("%r send operation timed out. (%r)", self.name, error) + raise error + self._handler._msg_timeout = remaining_time # pylint: disable=protected-access + self._handler.queue_message(*self.unsent_events) + self._handler.wait() + self.unsent_events = self._handler.pending_messages + if self._outcome != constants.MessageSendResult.Ok: + if self._outcome == constants.MessageSendResult.Timeout: + self._condition = OperationTimeoutError("send operation timed out") + _error(self._outcome, self._condition) + return + + def _legacy_send_event_data(self, timeout=None): timeout = timeout or self.client.config.send_timeout if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout - start_time = time.time() - timeout_time = start_time + timeout + timeout_time = time.time() + timeout max_retries = self.client.config.max_retries retry_count = 0 last_exception = None @@ -178,14 +202,19 @@ def create_batch(self, **kwargs): """ max_size = kwargs.get("max_size", None) partition_key = kwargs.get("partition_key", None) + + @_retry_decorator + def wrapped_open(*args, **kwargs): + self._open(**kwargs) + if not self._max_message_size_on_link: - self._open() + wrapped_open(self, timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' .format(max_size, self._max_message_size_on_link)) - return EventDataBatch(max_size or self._max_message_size_on_link, partition_key) + return EventDataBatch(max_size=(max_size or self._max_message_size_on_link), partition_key=partition_key) def send(self, event_data, **kwargs): # type:(Union[EventData, EventDataBatch, Iterable[EventData]], Union[str, bytes], float) -> None diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py index 3d43942fe6c8..4d17002eb9fd 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py @@ -100,6 +100,7 @@ async def test_non_existing_entity_sender_async(connection_str): sender = client.create_producer(partition_id="1") with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -109,6 +110,7 @@ async def test_non_existing_entity_receiver_async(connection_str): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=5) + await receiver.close() @pytest.mark.liveTest @@ -196,3 +198,13 @@ async def test_max_receivers_async(connstr_senders): failed = [o for o in outputs if isinstance(o, EventHubError)] assert len(failed) == 1 print(failed[0].message) + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_create_batch_with_invalid_hostname(invalid_hostname): + client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) + sender = client.create_producer() + with pytest.raises(AuthenticationError): + batch_event_data = await sender.create_batch(max_size=300, partition_key="key") + await sender.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_receive_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_receive_async.py index ae696bf469b5..2a2e4836c2d5 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_receive_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_receive_async.py @@ -185,7 +185,7 @@ async def test_exclusive_receiver_async(connstr_senders): await pump(receiver1) output2 = await pump(receiver2) with pytest.raises(ConnectionLostError): - await receiver1.receive(timeout=1) + await receiver1.receive(timeout=3) assert output2 == 1 finally: await receiver1.close() @@ -230,7 +230,7 @@ async def test_exclusive_receiver_after_non_exclusive_receiver_async(connstr_sen await pump(receiver1) output2 = await pump(receiver2) with pytest.raises(ConnectionLostError): - await receiver1.receive(timeout=1) + await receiver1.receive(timeout=3) assert output2 == 1 finally: await receiver1.close() @@ -248,7 +248,7 @@ async def test_non_exclusive_receiver_after_exclusive_receiver_async(connstr_sen receiver2 = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1"), prefetch=10) try: output1 = await pump(receiver1) - with pytest.raises(ConnectError): + with pytest.raises(ConnectionLostError): await pump(receiver2) assert output1 == 1 finally: diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py index 3d5fb70601ea..c84268d15f21 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py @@ -246,3 +246,21 @@ async def test_send_over_websocket_async(connstr_receivers): for r in receivers: r.close() + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_send_with_create_event_batch_async(connstr_receivers): + connection_str, receivers = connstr_receivers + client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) + sender = client.create_producer() + + event_data_batch = await sender.create_batch(max_size=100 * 1024) + while True: + try: + event_data_batch.try_add(EventData('A single event data')) + except ValueError: + break + + await sender.send(event_data_batch) + await sender.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index 4749df940d9c..01707f4256e8 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -214,3 +214,12 @@ def test_message_body_types(connstr_senders): raise finally: receiver.close() + + +@pytest.mark.liveTest +def test_create_batch_with_invalid_hostname(invalid_hostname): + client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) + sender = client.create_producer() + with pytest.raises(AuthenticationError): + batch_event_data = sender.create_batch(max_size=300, partition_key="key") + sender.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/test_send.py b/sdk/eventhub/azure-eventhubs/tests/test_send.py index f50ac702fb52..3d7bc3815c22 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_send.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_send.py @@ -249,3 +249,20 @@ def test_send_over_websocket_sync(connstr_receivers): received.extend(r.receive(timeout=3)) assert len(received) == 20 + + +@pytest.mark.liveTest +def test_send_with_create_event_batch_sync(connstr_receivers): + connection_str, receivers = connstr_receivers + client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) + sender = client.create_producer() + + event_data_batch = sender.create_batch(max_size=100 * 1024) + while True: + try: + event_data_batch.try_add(EventData('A single event data')) + except ValueError: + break + + sender.send(event_data_batch) + sender.close() From 56fdd1e6d4d9b2469d0092c77173ae68c879092a Mon Sep 17 00:00:00 2001 From: Yunhao Ling <47871814+yunhaoling@users.noreply.github.com> Date: Tue, 30 Jul 2019 01:08:17 -0700 Subject: [PATCH 17/42] Update livetest (#6547) --- .../azure-eventhubs/tests/asynctests/test_negative_async.py | 2 +- .../azure-eventhubs/tests/asynctests/test_reconnect_async.py | 2 +- sdk/eventhub/azure-eventhubs/tests/test_negative.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py index 4d17002eb9fd..7be6974954aa 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py @@ -121,7 +121,7 @@ async def test_receive_from_invalid_partitions_async(connection_str): client = EventHubClient.from_connection_string(connection_str, network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id=p, event_position=EventPosition("-1")) with pytest.raises(ConnectError): - await receiver.receive(timeout=10) + await receiver.receive(timeout=5) await receiver.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py index 91357e2553b9..56c57924edde 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py @@ -37,7 +37,7 @@ async def test_send_with_long_interval_async(connstr_receivers, sleep): for r in receivers: if not sleep: # if sender sleeps, the receivers will be disconnected. destroy connection to simulate r._handler._connection._conn.destroy() - received.extend(r.receive(timeout=1)) + received.extend(r.receive(timeout=3)) assert len(received) == 2 assert list(received[0].body)[0] == b"A single event" diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index 01707f4256e8..22743096b109 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -108,7 +108,6 @@ def test_non_existing_entity_receiver(connection_str): receiver.close() - @pytest.mark.liveTest def test_receive_from_invalid_partitions_sync(connection_str): partitions = ["XYZ", "-1", "1000", "-"] @@ -117,7 +116,7 @@ def test_receive_from_invalid_partitions_sync(connection_str): receiver = client.create_consumer(consumer_group="$default", partition_id=p, event_position=EventPosition("-1")) try: with pytest.raises(ConnectError): - receiver.receive(timeout=10) + receiver.receive(timeout=5) finally: receiver.close() From 10f0be665ecc394cdfdd76ec6ca25324664ca3ef Mon Sep 17 00:00:00 2001 From: Yunhao Ling <47871814+yunhaoling@users.noreply.github.com> Date: Tue, 30 Jul 2019 11:26:46 -0700 Subject: [PATCH 18/42] Remove legacy code and update livetest (#6549) * Update livetest * Remove legacy code and update livetest --- .../azure/eventhub/aio/consumer_async.py | 67 ------------------- .../azure/eventhub/aio/producer_async.py | 38 +---------- .../azure/eventhub/consumer.py | 65 ------------------ .../azure/eventhub/producer.py | 37 +--------- .../tests/asynctests/test_negative_async.py | 35 ++++++++-- .../azure-eventhubs/tests/test_negative.py | 21 +++++- 6 files changed, 55 insertions(+), 208 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index fdf029e37764..aefdebfb2a5a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -220,73 +220,6 @@ async def receive(self, **kwargs): return await self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) - async def _legacy_receive(self, **kwargs): - # type: (int, float) -> List[EventData] - """ - Receive events asynchronously from the EventHub. - - :param max_batch_size: Receive a batch of events. Batch size will - be up to the maximum specified, but will return as soon as service - returns no new events. If combined with a timeout and no events are - retrieve before the time, the result will be empty. If no batch - size is supplied, the prefetch size will be the maximum. - :type max_batch_size: int - :param timeout: The maximum wait time to build up the requested message count for the batch. - If not specified, the default wait time specified when the consumer was created will be used. - :type timeout: float - :rtype: list[~azure.eventhub.common.EventData] - :raises: ~azure.eventhub.AuthenticationError, ~azure.eventhub.ConnectError, ~azure.eventhub.ConnectionLostError, - ~azure.eventhub.EventHubError - - Example: - .. literalinclude:: ../examples/async_examples/test_examples_eventhub_async.py - :start-after: [START eventhub_client_async_receive] - :end-before: [END eventhub_client_async_receive] - :language: python - :dedent: 4 - :caption: Receives events asynchronously - - """ - max_batch_size = kwargs.get("max_batch_size", None) - timeout = kwargs.get("timeout", None) - - self._check_closed() - max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size - timeout = self.client.config.receive_timeout if timeout is None else timeout - if not timeout: - timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout - - data_batch = [] - start_time = time.time() - timeout_time = start_time + timeout - max_retries = self.client.config.max_retries - retry_count = 0 - last_exception = None - while True: - try: - await self._open(timeout_time) - remaining_time = timeout_time - time.time() - if remaining_time <= 0.0: - if last_exception: - log.info("%r receive operation timed out. (%r)", self.name, last_exception) - raise last_exception - return data_batch - - remaining_time_ms = 1000 * remaining_time - message_batch = await self._handler.receive_message_batch_async( - max_batch_size=max_batch_size, - timeout=remaining_time_ms) - for message in message_batch: - event_data = EventData(message=message) - self.offset = EventPosition(event_data.offset) - data_batch.append(event_data) - return data_batch - except EventHubError: - raise - except Exception as exception: - last_exception = await self._handle_exception(exception, retry_count, max_retries, timeout_time) - retry_count += 1 - async def close(self, exception=None): # type: (Exception) -> None """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index 0dae1734419a..b62651ae6a5a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -135,40 +135,6 @@ async def _send_event_data(self, **kwargs): _error(self._outcome, self._condition) return - async def _legacy_send_event_data(self, timeout=None): - timeout = timeout or self.client.config.send_timeout - if not timeout: - timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout - start_time = time.time() - timeout_time = start_time + timeout - max_retries = self.client.config.max_retries - retry_count = 0 - last_exception = None - while True: - try: - if self.unsent_events: - await self._open(timeout_time) - remaining_time = timeout_time - time.time() - if remaining_time < 0.0: - if last_exception: - error = last_exception - else: - error = OperationTimeoutError("send operation timed out") - log.info("%r send operation timed out. (%r)", self.name, error) - raise error - self._handler._msg_timeout = remaining_time # pylint: disable=protected-access - self._handler.queue_message(*self.unsent_events) - await self._handler.wait_async() - self.unsent_events = self._handler.pending_messages - if self._outcome != constants.MessageSendResult.Ok: - if self._outcome == constants.MessageSendResult.Timeout: - self._condition = OperationTimeoutError("send operation timed out") - _error(self._outcome, self._condition) - return - except Exception as exception: - last_exception = await self._handle_exception(exception, retry_count, max_retries, timeout_time) - retry_count += 1 - def _on_outcome(self, outcome, condition): """ Called when the outcome is received for a delivery. @@ -197,11 +163,11 @@ async def create_batch(self, **kwargs): partition_key = kwargs.get("partition_key", None) @_retry_decorator - async def wrapped_open(*args, **kwargs): + async def _wrapped_open(*args, **kwargs): await self._open(**kwargs) if not self._max_message_size_on_link: - await wrapped_open(self, timeout=self.client.config.send_timeout) + await _wrapped_open(self, timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 1139a8b725d4..a1edbb90b767 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -210,71 +210,6 @@ def receive(self, **kwargs): return self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) - def _legacy_receive(self, **kwargs): - # type:(int, float) -> List[EventData] - """ - Receive events from the EventHub. - - :param max_batch_size: Receive a batch of events. Batch size will - be up to the maximum specified, but will return as soon as service - returns no new events. If combined with a timeout and no events are - retrieve before the time, the result will be empty. If no batch - size is supplied, the prefetch size will be the maximum. - :type max_batch_size: int - :param timeout: The maximum wait time to build up the requested message count for the batch. - If not specified, the default wait time specified when the consumer was created will be used. - :type timeout: float - :rtype: list[~azure.eventhub.common.EventData] - :raises: ~azure.eventhub.AuthenticationError, ~azure.eventhub.ConnectError, ~azure.eventhub.ConnectionLostError, - ~azure.eventhub.EventHubError - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_sync_receive] - :end-before: [END eventhub_client_sync_receive] - :language: python - :dedent: 4 - :caption: Receive events from the EventHub. - - """ - max_batch_size = kwargs.get("max_batch_size", None) - timeout = kwargs.get("timeout", None) - - self._check_closed() - - max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size - data_batch = [] # type: List[EventData] - - timeout = self.client.config.receive_timeout if timeout is None else timeout - if not timeout: - timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout - timeout_time = time.time() + timeout - max_retries = self.client.config.max_retries - retry_count = 0 - last_exception = None - - self._receive() - while True: - try: - self._open(timeout_time) - remaining_time = timeout_time - time.time() - if remaining_time <= 0.0: - if last_exception: - log.info("%r receive operation timed out. (%r)", self.name, last_exception) - raise last_exception - return data_batch - remaining_time_ms = 1000 * remaining_time - message_batch = self._handler.receive_message_batch( - max_batch_size=max_batch_size - (len(data_batch) if data_batch else 0), - timeout=remaining_time_ms) - for message in message_batch: - event_data = EventData(message=message) - self.offset = EventPosition(event_data.offset) - data_batch.append(event_data) - return data_batch - except Exception as exception: - last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) - retry_count += 1 - def close(self, exception=None): # type:(Exception) -> None """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 01c3c28fe374..373e6ba1f28e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -143,39 +143,6 @@ def _send_event_data(self, **kwargs): _error(self._outcome, self._condition) return - def _legacy_send_event_data(self, timeout=None): - timeout = timeout or self.client.config.send_timeout - if not timeout: - timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout - timeout_time = time.time() + timeout - max_retries = self.client.config.max_retries - retry_count = 0 - last_exception = None - while True: - try: - if self.unsent_events: - self._open(timeout_time) - remaining_time = timeout_time - time.time() - if remaining_time <= 0.0: - if last_exception: - error = last_exception - else: - error = OperationTimeoutError("send operation timed out") - log.info("%r send operation timed out. (%r)", self.name, error) - raise error - self._handler._msg_timeout = remaining_time # pylint: disable=protected-access - self._handler.queue_message(*self.unsent_events) - self._handler.wait() - self.unsent_events = self._handler.pending_messages - if self._outcome != constants.MessageSendResult.Ok: - if self._outcome == constants.MessageSendResult.Timeout: - self._condition = OperationTimeoutError("send operation timed out") - _error(self._outcome, self._condition) - return - except Exception as exception: - last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) - retry_count += 1 - def _on_outcome(self, outcome, condition): """ Called when the outcome is received for a delivery. @@ -204,11 +171,11 @@ def create_batch(self, **kwargs): partition_key = kwargs.get("partition_key", None) @_retry_decorator - def wrapped_open(*args, **kwargs): + def _wrapped_open(*args, **kwargs): self._open(**kwargs) if not self._max_message_size_on_link: - wrapped_open(self, timeout=self.client.config.send_timeout) + _wrapped_open(self, timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py index 7be6974954aa..0ab4fe53f006 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py @@ -202,9 +202,36 @@ async def test_max_receivers_async(connstr_senders): @pytest.mark.liveTest @pytest.mark.asyncio -async def test_create_batch_with_invalid_hostname(invalid_hostname): +async def test_create_batch_with_invalid_hostname_async(invalid_hostname): client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) sender = client.create_producer() - with pytest.raises(AuthenticationError): - batch_event_data = await sender.create_batch(max_size=300, partition_key="key") - await sender.close() + try: + with pytest.raises(AuthenticationError): + batch_event_data = await sender.create_batch(max_size=300, partition_key="key") + finally: + await sender.close() + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_create_batch_with_none_async(connection_str): + client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + sender = client.create_producer() + batch_event_data = await sender.create_batch(max_size=300, partition_key="key") + try: + with pytest.raises(ValueError): + batch_event_data.try_add(EventData(None)) + finally: + await sender.close() + + +@pytest.mark.liveTest +@pytest.mark.asyncio +async def test_create_batch_with_too_large_size_async(connection_str): + client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + sender = client.create_producer() + try: + with pytest.raises(ValueError): + batch_event_data = await sender.create_batch(max_size=5 * 1024 * 1024, partition_key="key") + finally: + await sender.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index 22743096b109..3682461f9db2 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -216,9 +216,28 @@ def test_message_body_types(connstr_senders): @pytest.mark.liveTest -def test_create_batch_with_invalid_hostname(invalid_hostname): +def test_create_batch_with_invalid_hostname_sync(invalid_hostname): client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) sender = client.create_producer() with pytest.raises(AuthenticationError): batch_event_data = sender.create_batch(max_size=300, partition_key="key") sender.close() + + +@pytest.mark.liveTest +def test_create_batch_with_none_sync(connection_str): + client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + sender = client.create_producer() + batch_event_data = sender.create_batch(max_size=300, partition_key="key") + with pytest.raises(ValueError): + batch_event_data.try_add(EventData(None)) + sender.close() + + +@pytest.mark.liveTest +def test_create_batch_with_too_large_size_sync(connection_str): + client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + sender = client.create_producer() + with pytest.raises(ValueError): + batch_event_data = sender.create_batch(max_size=5 * 1024 * 1024, partition_key="key") + sender.close() From fbb66bd95e3fdda1574f7b294e351a97ab3d1772 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 18:42:17 -0700 Subject: [PATCH 19/42] make sync longrunning multi-threaded --- .../tests/test_longrunning_receive.py | 62 +++++++++---------- .../tests/test_longrunning_send.py | 44 +++++++------ 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/tests/test_longrunning_receive.py b/sdk/eventhub/azure-eventhubs/tests/test_longrunning_receive.py index bbd945d09e9b..47559b778af3 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_longrunning_receive.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_longrunning_receive.py @@ -14,6 +14,7 @@ import time import os import sys +import threading import pytest from logging.handlers import RotatingFileHandler @@ -22,6 +23,7 @@ from azure.eventhub import EventHubClient from azure.eventhub import EventHubSharedKeyCredential + def get_logger(filename, level=logging.INFO): azure_logger = logging.getLogger("azure.eventhub") azure_logger.setLevel(level) @@ -47,38 +49,33 @@ def get_logger(filename, level=logging.INFO): logger = get_logger("recv_test.log", logging.INFO) -def get_partitions(args): - eh_data = args.get_properties() - return eh_data["partition_ids"] - - -def pump(receivers, duration): +def pump(receiver, duration): total = 0 iteration = 0 deadline = time.time() + duration - try: - while time.time() < deadline: - for pid, receiver in receivers.items(): + with receiver: + try: + while time.time() < deadline: batch = receiver.receive(timeout=5) size = len(batch) total += size iteration += 1 if size == 0: print("{}: No events received, queue size {}, delivered {}".format( - pid, + receiver.partition, receiver.queue_size, total)) - elif iteration >= 50: + elif iteration >= 5: iteration = 0 print("{}: total received {}, last sn={}, last offset={}".format( - pid, + receiver.partition, total, batch[-1].sequence_number, batch[-1].offset)) - print("Total received {}".format(total)) - except Exception as e: - print("EventHubConsumer failed: {}".format(e)) - raise + print("{}: Total received {}".format(receiver.partition, total)) + except Exception as e: + print("EventHubConsumer failed: {}".format(e)) + raise @pytest.mark.liveTest @@ -112,22 +109,23 @@ def test_long_running_receive(connection_str): except ImportError: raise ValueError("Must specify either '--conn-str' or '--address'") - try: - if not args.partitions: - partitions = get_partitions(client) - else: - partitions = args.partitions.split(",") - pumps = {} - for pid in partitions: - pumps[pid] = client.create_consumer(consumer_group="$default", - partition_id=pid, - event_position=EventPosition(args.offset), - prefetch=50) - pump(pumps, args.duration) - finally: - for pid in partitions: - pumps[pid].close() + if args.partitions: + partitions = args.partitions.split(",") + else: + partitions = client.get_partition_ids() + + threads = [] + for pid in partitions: + consumer = client.create_consumer(consumer_group="$default", + partition_id=pid, + event_position=EventPosition(args.offset), + prefetch=300) + thread = threading.Thread(target=pump, args=(consumer, args.duration)) + thread.start() + threads.append(thread) + for thread in threads: + thread.join() if __name__ == '__main__': - test_long_running_receive(os.environ.get('EVENT_HUB_CONNECTION_STR')) + test_long_running_receive(os.environ.get('EVENT_HUB_PERF_CONN_STR')) diff --git a/sdk/eventhub/azure-eventhubs/tests/test_longrunning_send.py b/sdk/eventhub/azure-eventhubs/tests/test_longrunning_send.py index e4826d05e3fa..e737ee6889d7 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_longrunning_send.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_longrunning_send.py @@ -13,11 +13,12 @@ import time import os import sys +import threading import logging import pytest from logging.handlers import RotatingFileHandler -from azure.eventhub import EventHubClient, EventHubProducer, EventData, EventHubSharedKeyCredential +from azure.eventhub import EventHubClient, EventDataBatch, EventData, EventHubSharedKeyCredential def get_logger(filename, level=logging.INFO): @@ -42,34 +43,30 @@ def get_logger(filename, level=logging.INFO): return azure_logger -logger = get_logger("send_test.log", logging.INFO) - -def check_send_successful(outcome, condition): - if outcome.value != 0: - print("Send failed {}".format(condition)) +logger = get_logger("send_test.log", logging.INFO) -def main(client, args): - sender = client.create_producer() +def send(sender, args): + # sender = client.create_producer() deadline = time.time() + args.duration total = 0 - try: with sender: - event_list = [] + batch = sender.create_batch() while time.time() < deadline: data = EventData(body=b"D" * args.payload) - event_list.append(data) - total += 1 - if total % 100 == 0: - sender.send(event_list) - event_list = [] - print("Send total {}".format(total)) + try: + batch.try_add(data) + total += 1 + except ValueError: + sender.send(batch, timeout=0) + print("Sent total {} of partition {}".format(total, sender.partition)) + batch = sender.create_batch() except Exception as err: - print("Send failed {}".format(err)) + print("Partition {} send failed {}".format(sender.partition, err)) raise - print("Sent total {}".format(total)) + print("Sent total {} of partition {}".format(total, sender.partition)) @pytest.mark.liveTest @@ -105,10 +102,17 @@ def test_long_running_send(connection_str): raise ValueError("Must specify either '--conn-str' or '--address'") try: - main(client, args) + partition_ids = client.get_partition_ids() + threads = [] + for pid in partition_ids: + sender = client.create_producer(partition_id=pid) + thread = threading.Thread(target=send, args=(sender, args)) + thread.start() + threads.append(thread) + thread.join() except KeyboardInterrupt: pass if __name__ == '__main__': - test_long_running_send(os.environ.get('EVENT_HUB_CONNECTION_STR')) + test_long_running_send(os.environ.get('EVENT_HUB_PERF_CONN_STR')) From 00ff72345050331d2db400b8e648b5f14db324d5 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 18:43:38 -0700 Subject: [PATCH 20/42] small changes on async long running test --- .../asynctests/test_longrunning_receive_async.py | 14 ++++---------- .../asynctests/test_longrunning_send_async.py | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_receive_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_receive_async.py index 74c05d174e47..900612684001 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_receive_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_receive_async.py @@ -44,12 +44,8 @@ def get_logger(filename, level=logging.INFO): return azure_logger -logger = get_logger("recv_test_async.log", logging.INFO) - -async def get_partitions(client): - eh_data = await client.get_properties() - return eh_data["partition_ids"] +logger = get_logger("recv_test_async.log", logging.INFO) async def pump(_pid, receiver, _args, _dl): @@ -76,9 +72,7 @@ async def pump(_pid, receiver, _args, _dl): total, batch[-1].sequence_number, batch[-1].offset)) - print("{}: total received {}".format( - _pid, - total)) + print("{}: Total received {}".format(receiver.partition, total)) except Exception as e: print("Partition {} receiver failed: {}".format(_pid, e)) raise @@ -127,11 +121,11 @@ async def test_long_running_receive_async(connection_str): receiver = client.create_consumer(consumer_group="$default", partition_id=pid, event_position=EventPosition(args.offset), - prefetch=50, + prefetch=300, loop=loop) pumps.append(pump(pid, receiver, args, args.duration)) await asyncio.gather(*pumps) if __name__ == '__main__': - asyncio.run(test_long_running_receive_async(os.environ.get('EVENT_HUB_CONNECTION_STR'))) + asyncio.run(test_long_running_receive_async(os.environ.get('EVENT_HUB_PERF_CONN_STR'))) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_send_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_send_async.py index 809fa3430b59..00279d168d70 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_send_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_send_async.py @@ -122,4 +122,4 @@ async def test_long_running_partition_send_async(connection_str): if __name__ == '__main__': - asyncio.run(test_long_running_partition_send_async(os.environ.get('EVENT_HUB_CONNECTION_STR'))) + asyncio.run(test_long_running_partition_send_async(os.environ.get('EVENT_HUB_PERF_CONN_STR'))) From 0f5180c43371a7842f6c920ceccc8cb5d1e05ae0 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 18:47:55 -0700 Subject: [PATCH 21/42] reset retry_count for iterator --- .../azure-eventhubs/azure/eventhub/aio/consumer_async.py | 4 +++- sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index aefdebfb2a5a..4a508029ebbd 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -81,6 +81,7 @@ def __init__( # pylint: disable=super-init-not-called self.error = None self._link_properties = {} partition = self.source.split('/')[-1] + self.partition = partition self.name = "EHReceiver-{}-partition{}".format(uuid.uuid4(), partition) if owner_level: self._link_properties[types.AMQPSymbol(self._epoch)] = types.AMQPLong(int(owner_level)) @@ -102,6 +103,7 @@ async def __anext__(self): message = await self.messages_iter.__anext__() event_data = EventData(message=message) self.offset = EventPosition(event_data.offset, inclusive=False) + retry_count = 0 return event_data except Exception as exception: await self._handle_exception(exception, retry_count, max_retries) @@ -185,7 +187,7 @@ async def _receive(self, **kwargs): return data_batch async def receive(self, **kwargs): - # type: (int, float) -> List[EventData] + # type: (...) -> List[EventData] """ Receive events asynchronously from the EventHub. diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index a1edbb90b767..67499ebf35f4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -75,6 +75,7 @@ def __init__(self, client, source, **kwargs): self.redirected = None self.error = None partition = self.source.split('/')[-1] + self.partition = partition self.name = "EHConsumer-{}-partition{}".format(uuid.uuid4(), partition) if owner_level: self._link_properties[types.AMQPSymbol(self._epoch)] = types.AMQPLong(int(owner_level)) @@ -96,6 +97,7 @@ def __next__(self): message = next(self.messages_iter) event_data = EventData(message=message) self.offset = EventPosition(event_data.offset, inclusive=False) + retry_count = 0 return event_data except Exception as exception: self._handle_exception(exception, retry_count, max_retries) @@ -177,6 +179,7 @@ def _receive(self, **kwargs): return data_batch def receive(self, **kwargs): + # type: (...) -> List[EventData] """ Receive events from the EventHub. From 6e0c238566af425f63fdd7fcdd8d76263bd9caf4 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 18:49:17 -0700 Subject: [PATCH 22/42] Don't return early when open a ReceiveClient or SendClient --- .../azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py | 2 -- .../azure/eventhub/aio/_consumer_producer_mixin_async.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index f53736feeb03..ff1a921d9220 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -76,8 +76,6 @@ def _open(self, timeout_time=None): self.client.get_auth(**alt_creds) )) while not self._handler.client_ready(): - if timeout_time and time.time() >= timeout_time: - return time.sleep(0.05) self._max_message_size_on_link = self._handler.message_handler._link.peer_max_message_size \ or constants.MAX_MESSAGE_LENGTH_BYTES # pylint: disable=protected-access diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 648138c93d73..68587637f1c3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -77,8 +77,6 @@ async def _open(self, timeout_time=None): self.client.get_auth(**alt_creds) )) while not await self._handler.client_ready_async(): - if timeout_time and time.time() >= timeout_time: - return await asyncio.sleep(0.05) self._max_message_size_on_link = self._handler.message_handler._link.peer_max_message_size \ or constants.MAX_MESSAGE_LENGTH_BYTES # pylint: disable=protected-access From 0efc95f205570ed0590f506575be68e0dec86e04 Mon Sep 17 00:00:00 2001 From: yijxie Date: Tue, 30 Jul 2019 18:49:36 -0700 Subject: [PATCH 23/42] type annotation change --- sdk/eventhub/azure-eventhubs/azure/eventhub/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index deda0ddc01fb..ea0be685dd66 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -202,7 +202,7 @@ def get_partition_properties(self, partition): def create_consumer( self, consumer_group, partition_id, event_position, **kwargs ): - # type: (str, str, EventPosition, int, str, int) -> EventHubConsumer + # type: (str, str, EventPosition, ...) -> EventHubConsumer """ Create a consumer to the client for a particular consumer group and partition. @@ -246,7 +246,7 @@ def create_consumer( return handler def create_producer(self, **kwargs): - # type: (str, str, float) -> EventHubProducer + # type: (...) -> EventHubProducer """ Create an producer to send EventData object to an EventHub. From 90fbafb4e464a520191603e045f32374dd47174a Mon Sep 17 00:00:00 2001 From: Yunhao Ling Date: Wed, 31 Jul 2019 13:28:43 -0700 Subject: [PATCH 24/42] Update kwargs and remove unused import --- .../azure/eventhub/aio/client_async.py | 15 ++----- .../azure/eventhub/aio/consumer_async.py | 40 +++++++++---------- .../azure/eventhub/aio/producer_async.py | 11 ++--- .../azure-eventhubs/azure/eventhub/client.py | 15 ++----- .../azure-eventhubs/azure/eventhub/common.py | 16 +++----- .../azure/eventhub/consumer.py | 38 +++++++++--------- .../azure/eventhub/producer.py | 10 ++--- 7 files changed, 57 insertions(+), 88 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py index 17479119ccad..57748525fdc5 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py @@ -12,11 +12,8 @@ from uamqp import ( Message, AMQPClientAsync, - errors, ) -from uamqp import compat -from azure.eventhub.error import ConnectError from azure.eventhub.common import parse_sas_token, EventPosition, EventHubSharedKeyCredential, EventHubSASTokenCredential from ..client_abstract import EventHubClientAbstract @@ -193,8 +190,7 @@ async def get_partition_properties(self, partition): output['is_empty'] = partition_info[b'is_partition_empty'] return output - def create_consumer( - self, consumer_group, partition_id, event_position, **kwargs): + def create_consumer(self, consumer_group, partition_id, event_position, **kwargs): # type: (str, str, EventPosition, int, str, int, asyncio.AbstractEventLoop) -> EventHubConsumer """ Create an async consumer to the client for a particular consumer group and partition. @@ -240,8 +236,7 @@ def create_consumer( prefetch=prefetch, loop=loop) return handler - def create_producer( - self, **kwargs): + def create_producer(self, partition_id=None, operation=None, send_timeout=None, loop=None): # type: (str, str, float, asyncio.AbstractEventLoop) -> EventHubProducer """ Create an async producer to send EventData object to an EventHub. @@ -268,10 +263,6 @@ def create_producer( :caption: Add an async producer to the client to send EventData. """ - partition_id = kwargs.get("partition_id", None) - operation = kwargs.get("operation", None) - send_timeout = kwargs.get("send_timeout", None) - loop = kwargs.get("loop", None) target = "amqps://{}{}".format(self.address.hostname, self.address.path) if operation: @@ -283,4 +274,4 @@ def create_producer( return handler async def close(self): - await self._conn_manager.close_connection() \ No newline at end of file + await self._conn_manager.close_connection() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index 4a508029ebbd..40324827b2f4 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -8,12 +8,11 @@ from typing import List import time -from uamqp import errors, types, compat +from uamqp import errors, types from uamqp import ReceiveClientAsync, Source from azure.eventhub import EventData, EventPosition -from azure.eventhub.error import EventHubError, AuthenticationError, ConnectError, ConnectionLostError, _error_handler -from ..aio.error_async import _handle_exception +from azure.eventhub.error import EventHubError, ConnectError, _error_handler from ._consumer_producer_mixin_async import ConsumerProducerMixin, _retry_decorator log = logging.getLogger(__name__) @@ -148,19 +147,6 @@ async def _open(self, timeout_time=None): self.source = self.redirected.address await super(EventHubConsumer, self)._open(timeout_time) - @property - def queue_size(self): - # type: () -> int - """ - The current size of the unprocessed Event queue. - - :rtype: int - """ - # pylint: disable=protected-access - if self._handler._received_messages: - return self._handler._received_messages.qsize() - return 0 - @_retry_decorator async def _receive(self, **kwargs): timeout_time = kwargs.get("timeout_time") @@ -186,8 +172,21 @@ async def _receive(self, **kwargs): data_batch.append(event_data) return data_batch - async def receive(self, **kwargs): - # type: (...) -> List[EventData] + @property + def queue_size(self): + # type: () -> int + """ + The current size of the unprocessed Event queue. + + :rtype: int + """ + # pylint: disable=protected-access + if self._handler._received_messages: + return self._handler._received_messages.qsize() + return 0 + + async def receive(self, max_batch_size=None, timeout=None): + # type: (int, float) -> List[EventData] """ Receive events asynchronously from the EventHub. @@ -215,9 +214,8 @@ async def receive(self, **kwargs): """ self._check_closed() - max_batch_size = kwargs.get("max_batch_size", None) - timeout = kwargs.get("timeout", None) or self.client.config.receive_timeout - max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size + timeout = timeout or self.client.config.receive_timeout + max_batch_size = max_batch_size or min(self.client.config.max_batch_size, self.prefetch) data_batch = [] # type: List[EventData] return await self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index b62651ae6a5a..bc132e0a46cc 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -147,7 +147,8 @@ def _on_outcome(self, outcome, condition): self._outcome = outcome self._condition = condition - async def create_batch(self, **kwargs): + async def create_batch(self, max_size=None, partition_key=None): + # type:(int, str) -> EventDataBatch """ Create an EventDataBatch object with max size being max_size. The max_size should be no greater than the max allowed message size defined by the service side. @@ -159,8 +160,6 @@ async def create_batch(self, **kwargs): :return: an EventDataBatch instance :rtype: ~azure.eventhub.EventDataBatch """ - max_size = kwargs.get("max_size", None) - partition_key = kwargs.get("partition_key", None) @_retry_decorator async def _wrapped_open(*args, **kwargs): @@ -175,8 +174,8 @@ async def _wrapped_open(*args, **kwargs): return EventDataBatch(max_size=(max_size or self._max_message_size_on_link), partition_key=partition_key) - async def send(self, event_data, **kwargs): - # type:(Union[EventData, EventDataBatch, Iterable[EventData]], Union[str, bytes]) -> None + async def send(self, event_data, partition_key=None, timeout=None): + # type:(Union[EventData, EventDataBatch, Iterable[EventData]], Union[str, bytes], float) -> None """ Sends an event data and blocks until acknowledgement is received or operation times out. @@ -204,8 +203,6 @@ async def send(self, event_data, **kwargs): :caption: Sends an event data and blocks until acknowledgement is received or operation times out. """ - partition_key = kwargs.get("partition_key", None) - timeout = kwargs.get("timeout", None) self._check_closed() if isinstance(event_data, EventData): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index ea0be685dd66..52ce44cf23a9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -18,18 +18,16 @@ from uamqp import Message from uamqp import authentication from uamqp import constants -from uamqp import errors -from uamqp import compat from azure.eventhub.producer import EventHubProducer from azure.eventhub.consumer import EventHubConsumer from azure.eventhub.common import parse_sas_token, EventPosition -from azure.eventhub.error import ConnectError, EventHubError from .client_abstract import EventHubClientAbstract from .common import EventHubSASTokenCredential, EventHubSharedKeyCredential from ._connection_manager import get_connection_manager from .error import _handle_exception + log = logging.getLogger(__name__) @@ -199,9 +197,7 @@ def get_partition_properties(self, partition): output['is_empty'] = partition_info[b'is_partition_empty'] return output - def create_consumer( - self, consumer_group, partition_id, event_position, **kwargs - ): + def create_consumer(self, consumer_group, partition_id, event_position, **kwargs): # type: (str, str, EventPosition, ...) -> EventHubConsumer """ Create a consumer to the client for a particular consumer group and partition. @@ -245,8 +241,8 @@ def create_consumer( prefetch=prefetch) return handler - def create_producer(self, **kwargs): - # type: (...) -> EventHubProducer + def create_producer(self, partition_id=None, operation=None, send_timeout=None): + # type: (str, str, float) -> EventHubProducer """ Create an producer to send EventData object to an EventHub. @@ -271,9 +267,6 @@ def create_producer(self, **kwargs): :caption: Add a producer to the client to send EventData. """ - partition_id = kwargs.get("partition_id", None) - operation = kwargs.get("operation", None) - send_timeout = kwargs.get("send_timeout", None) target = "amqps://{}{}".format(self.address.hostname, self.address.path) if operation: diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py index ea00d0aef5ff..0ac743bd91d2 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py @@ -11,7 +11,7 @@ import logging from azure.eventhub.error import EventDataError -from uamqp import BatchMessage, Message, types, constants, errors +from uamqp import BatchMessage, Message, types, constants from uamqp.message import MessageHeader, MessageProperties log = logging.getLogger(__name__) @@ -57,7 +57,7 @@ class EventData(object): PROP_TIMESTAMP = b"x-opt-enqueued-time" PROP_DEVICE_ID = b"iothub-connection-device-id" - def __init__(self, body=None, **kwargs): + def __init__(self, body=None, to_device=None, message=None): """ Initialize EventData. @@ -70,8 +70,6 @@ def __init__(self, body=None, **kwargs): :param message: The received message. :type message: ~uamqp.message.Message """ - to_device = kwargs.get("to_device", None) - message = kwargs.get("message", None) self._partition_key = types.AMQPSymbol(EventData.PROP_PARTITION_KEY) self._annotations = {} @@ -215,7 +213,7 @@ def body(self): except TypeError: raise ValueError("Message data empty.") - def body_as_str(self, **kwargs): + def body_as_str(self, encoding='UTF-8'): """ The body of the event data as a string if the data is of a compatible type. @@ -224,7 +222,6 @@ def body_as_str(self, **kwargs): Default is 'UTF-8' :rtype: str or unicode """ - encoding = kwargs.get("encoding", 'UTF-8') data = self.body try: return "".join(b.decode(encoding) for b in data) @@ -237,7 +234,7 @@ def body_as_str(self, **kwargs): except Exception as e: raise TypeError("Message data is not compatible with string type: {}".format(e)) - def body_as_json(self, **kwargs): + def body_as_json(self, encoding='UTF-8'): """ The body of the event loaded as a JSON object is the data is compatible. @@ -245,7 +242,6 @@ def body_as_json(self, **kwargs): Default is 'UTF-8' :rtype: dict """ - encoding = kwargs.get("encoding", 'UTF-8') data_str = self.body_as_str(encoding=encoding) try: return json.loads(data_str) @@ -263,9 +259,7 @@ class EventDataBatch(object): Do not instantiate an EventDataBatch object directly. """ - def __init__(self, **kwargs): - max_size = kwargs.get("max_size", None) - partition_key = kwargs.get("partition_key", None) + def __init__(self, max_size=None, partition_key=None): self.max_size = max_size or constants.MAX_MESSAGE_LENGTH_BYTES self._partition_key = partition_key self.message = BatchMessage(data=[], multi_messages=False, properties=None) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 67499ebf35f4..03827f546c0f 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -13,9 +13,10 @@ from uamqp import ReceiveClient, Source from azure.eventhub.common import EventData, EventPosition -from azure.eventhub.error import _error_handler, EventHubError +from azure.eventhub.error import _error_handler from ._consumer_producer_mixin import ConsumerProducerMixin, _retry_decorator + log = logging.getLogger(__name__) @@ -141,19 +142,6 @@ def _open(self, timeout_time=None): self.source = self.redirected.address super(EventHubConsumer, self)._open(timeout_time) - @property - def queue_size(self): - # type:() -> int - """ - The current size of the unprocessed Event queue. - - :rtype: int - """ - # pylint: disable=protected-access - if self._handler._received_messages: - return self._handler._received_messages.qsize() - return 0 - @_retry_decorator def _receive(self, **kwargs): timeout_time = kwargs.get("timeout_time") @@ -178,8 +166,21 @@ def _receive(self, **kwargs): data_batch.append(event_data) return data_batch - def receive(self, **kwargs): - # type: (...) -> List[EventData] + @property + def queue_size(self): + # type:() -> int + """ + The current size of the unprocessed Event queue. + + :rtype: int + """ + # pylint: disable=protected-access + if self._handler._received_messages: + return self._handler._received_messages.qsize() + return 0 + + def receive(self, max_batch_size=None, timeout=None): + # type: (int, float) -> List[EventData] """ Receive events from the EventHub. @@ -206,9 +207,8 @@ def receive(self, **kwargs): """ self._check_closed() - max_batch_size = kwargs.get("max_batch_size", None) - timeout = kwargs.get("timeout", None) or self.client.config.receive_timeout - max_batch_size = min(self.client.config.max_batch_size, self.prefetch) if max_batch_size is None else max_batch_size + timeout = timeout or self.client.config.receive_timeout + max_batch_size = max_batch_size or min(self.client.config.max_batch_size, self.prefetch) data_batch = [] # type: List[EventData] return self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 373e6ba1f28e..c4a30d81b189 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -10,7 +10,6 @@ from typing import Iterable, Union from uamqp import types, constants, errors -from uamqp import compat from uamqp import SendClient from azure.eventhub.common import EventData, EventDataBatch @@ -155,7 +154,8 @@ def _on_outcome(self, outcome, condition): self._outcome = outcome self._condition = condition - def create_batch(self, **kwargs): + def create_batch(self, max_size=None, partition_key=None): + # type:(int, str) -> EventDataBatch """ Create an EventDataBatch object with max size being max_size. The max_size should be no greater than the max allowed message size defined by the service side. @@ -167,8 +167,6 @@ def create_batch(self, **kwargs): :return: an EventDataBatch instance :rtype: ~azure.eventhub.EventDataBatch """ - max_size = kwargs.get("max_size", None) - partition_key = kwargs.get("partition_key", None) @_retry_decorator def _wrapped_open(*args, **kwargs): @@ -183,7 +181,7 @@ def _wrapped_open(*args, **kwargs): return EventDataBatch(max_size=(max_size or self._max_message_size_on_link), partition_key=partition_key) - def send(self, event_data, **kwargs): + def send(self, event_data, partition_key=None, timeout=None): # type:(Union[EventData, EventDataBatch, Iterable[EventData]], Union[str, bytes], float) -> None """ Sends an event data and blocks until acknowledgement is @@ -212,8 +210,6 @@ def send(self, event_data, **kwargs): :caption: Sends an event data and blocks until acknowledgement is received or operation times out. """ - partition_key = kwargs.get("partition_key", None) - timeout = kwargs.get("timeout", None) self._check_closed() if isinstance(event_data, EventData): From e06bad853e630e9834170726a19a99e25faa4702 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 31 Jul 2019 14:49:59 -0700 Subject: [PATCH 25/42] Misc changes from EventProcessor PR review --- .../azure/eventhub/eventprocessor/__init__.py | 3 +- .../eventprocessor/_cancellation_token.py | 22 ------ .../eventprocessor/event_processor.py | 73 +++++++++---------- .../eventprocessor/partition_processor.py | 17 ++++- .../{close_reason.py => utils.py} | 15 ++-- 5 files changed, 61 insertions(+), 69 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_cancellation_token.py rename sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/{close_reason.py => utils.py} (51%) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py index ae82c6d3c09e..f4b48afac6f3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/__init__.py @@ -4,10 +4,9 @@ # ----------------------------------------------------------------------------------- from .event_processor import EventProcessor -from .partition_processor import PartitionProcessor +from .partition_processor import PartitionProcessor, CloseReason from .partition_manager import PartitionManager from .sqlite3_partition_manager import Sqlite3PartitionManager -from .close_reason import CloseReason __all__ = [ 'CloseReason', diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_cancellation_token.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_cancellation_token.py deleted file mode 100644 index 475e44337731..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/_cancellation_token.py +++ /dev/null @@ -1,22 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -""" -Based on https://stackoverflow.com/questions/43229939/how-to-pass-a-boolean-by-reference-across-threads-and-modules -""" - - -class CancellationToken: - """ - Thread Safe Mutable Cancellation Token. - """ - def __init__(self): - self.is_cancelled = False - - def cancel(self): - """ - Cancel the token. - """ - self.is_cancelled = True diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 334def230a3f..6bd0c49170a2 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -3,25 +3,24 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from typing import Callable +from typing import Callable, List import uuid import asyncio import logging from azure.eventhub import EventPosition, EventHubError from azure.eventhub.aio import EventHubClient -from ._cancellation_token import CancellationToken from .checkpoint_manager import CheckpointManager from .partition_manager import PartitionManager -from .partition_processor import PartitionProcessor -from .close_reason import CloseReason +from .partition_processor import PartitionProcessor, CloseReason +from .utils import get_running_loop log = logging.getLogger(__name__) class EventProcessor(object): - def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, - partition_processor_callable: Callable[..., PartitionProcessor], + def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, + partition_processor_factory: Callable[..., PartitionProcessor], partition_manager: PartitionManager, **kwargs): """An EventProcessor automatically creates and runs consumers for all partitions of the eventhub. @@ -39,13 +38,12 @@ def __init__(self, consumer_group_name: str, eventhub_client: EventHubClient, self._consumer_group_name = consumer_group_name self._eventhub_client = eventhub_client self._eventhub_name = eventhub_client.eh_name - self._partition_processor_callable = partition_processor_callable + self._partition_processor_factory = partition_processor_factory self._partition_manager = partition_manager self._initial_event_position = kwargs.get("initial_event_position", "-1") self._max_batch_size = eventhub_client.config.max_batch_size self._receive_timeout = eventhub_client.config.receive_timeout - self._tasks = [] - self._cancellation_token = CancellationToken() + self._tasks: List[asyncio.Task] = [] self._instance_id = str(uuid.uuid4()) self._partition_ids = None @@ -57,8 +55,7 @@ async def start(self): call user defined PartitionProcessor.process_events() """ log.info("EventProcessor %r is being started", self._instance_id) - client = self._eventhub_client - partition_ids = await client.get_partition_ids() + partition_ids = await self._eventhub_client.get_partition_ids() self.partition_ids = partition_ids claimed_list = await self._claim_partitions() await self._start_claimed_partitions(claimed_list) @@ -69,9 +66,10 @@ async def stop(self): It sends out a cancellation token to stop all partitions' EventHubConsumer will stop receiving events. """ - self._cancellation_token.cancel() - # It's not agreed whether a partition manager has method close(). - log.info("EventProcessor %r cancellation token has been sent", self._instance_id) + for task in self._tasks: + task.cancel() + # It's not agreed whether a partition manager has method close(). + log.info("EventProcessor %r has been cancelled", self._instance_id) async def _claim_partitions(self): partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) @@ -105,34 +103,42 @@ async def _start_claimed_partitions(self, claimed_partitions): EventPosition(str(offset))) consumers.append(consumer) - partition_processor = self._partition_processor_callable( + partition_processor = self._partition_processor_factory( eventhub_name=self._eventhub_name, consumer_group_name=self._consumer_group_name, partition_id=partition_id, checkpoint_manager=CheckpointManager(partition_id, self._eventhub_name, self._consumer_group_name, self._instance_id, self._partition_manager) ) - loop = asyncio.get_running_loop() + loop = get_running_loop() task = loop.create_task( - _receive(consumer, partition_processor, self._receive_timeout, self._cancellation_token)) + _receive(consumer, partition_processor, self._receive_timeout)) self._tasks.append(task) await asyncio.gather(*self._tasks) await self._partition_manager.close() log.info("EventProcessor %r partition manager is closed", self._instance_id) - log.info("EventProcessor %r partition has stopped", self._instance_id) + log.info("EventProcessor %r has stopped", self._instance_id) -async def _receive(partition_consumer, partition_processor, receive_timeout, cancellation_token): - async with partition_consumer: - while not cancellation_token.is_cancelled: +async def _receive(partition_consumer, partition_processor, receive_timeout): + try: + while True: try: events = await partition_consumer.receive(timeout=receive_timeout) + except asyncio.CancelledError: + await partition_processor.close(reason=CloseReason.SHUTDOWN) + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "has been shutdown", + partition_processor._checkpoint_manager._instance_id, + partition_processor._eventhub_name, + partition_processor._partition_id, + partition_processor._consumer_group_name + ) + break except EventHubError as eh_err: - if eh_err.error == "link:stolen": - reason = CloseReason.LEASE_LOST - else: - reason = CloseReason.EVENTHUB_EXCEPTION + reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", @@ -142,6 +148,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout, can partition_processor._consumer_group_name, eh_err ) + await partition_processor.process_error(eh_err) await partition_processor.close(reason=reason) break try: @@ -156,16 +163,8 @@ async def _receive(partition_consumer, partition_processor, receive_timeout, can partition_processor._consumer_group_name, exp ) - await partition_processor.close(reason=CloseReason.USER_EXCEPTION) - break - else: - await partition_processor.close(reason=CloseReason.SHUTDOWN) - log.info( - "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has been shutdown", - partition_processor._checkpoint_manager._instance_id, - partition_processor._eventhub_name, - partition_processor._partition_id, - partition_processor._consumer_group_name - ) + await partition_processor.process_error(exp) + # TODO: will review whether to break and close partition processor after user's code has an exception # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? + finally: + await partition_consumer.close() \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index ba017d36751b..8818a2b89f0f 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -5,11 +5,18 @@ from typing import List from abc import ABC, abstractmethod +from enum import Enum from .checkpoint_manager import CheckpointManager from azure.eventhub import EventData +class CloseReason(Enum): + SHUTDOWN = 0 # user call EventProcessor.stop() + OWNERSHIP_LOST = 1 # lose the ownership of a partition. + EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events + + class PartitionProcessor(ABC): def __init__(self, eventhub_name, consumer_group_name, partition_id, checkpoint_manager: CheckpointManager): self._partition_id = partition_id @@ -20,8 +27,8 @@ def __init__(self, eventhub_name, consumer_group_name, partition_id, checkpoint_ async def close(self, reason): """Called when EventProcessor stops processing this PartitionProcessor. - There are four different reasons to trigger the PartitionProcessor to close. - Refer to enum class CloseReason of close_reason.py + There are different reasons to trigger the PartitionProcessor to close. + Refer to enum class CloseReason """ pass @@ -32,3 +39,9 @@ async def process_events(self, events: List[EventData]): """ pass + + async def process_error(self, error): + """Called when an error happens + + """ + pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py similarity index 51% rename from sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py rename to sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py index dc308141a023..368cd8469f10 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/close_reason.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/utils.py @@ -3,11 +3,14 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # ----------------------------------------------------------------------------------- -from enum import Enum +import asyncio -class CloseReason(Enum): - SHUTDOWN = 0 # user call EventProcessor.stop() - LEASE_LOST = 1 # lose the ownership of a partition. - EVENTHUB_EXCEPTION = 2 # Exception happens during receiving events - USER_EXCEPTION = 3 # user's code in EventProcessor.process_events() raises an exception +def get_running_loop(): + try: + return asyncio.get_running_loop() + except AttributeError: # 3.5 / 3.6 + loop = asyncio._get_running_loop() # pylint: disable=protected-access + if loop is None: + raise RuntimeError('No running event loop') + return loop From dd1d7ae67f7c4cfc0d34cb8e18555204a349cbb9 Mon Sep 17 00:00:00 2001 From: yijxie Date: Wed, 31 Jul 2019 15:25:58 -0700 Subject: [PATCH 26/42] raise asyncio.CancelledError out instead of supressing it. --- sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py index 957a3005662e..b44f8cb54a33 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/error_async.py @@ -33,6 +33,8 @@ def _create_eventhub_exception(exception): async def _handle_exception(exception, retry_count, max_retries, closable, timeout_time=None): + if isinstance(exception, asyncio.CancelledError): + raise try: name = closable.name except AttributeError: From a31ee6755ed1c3963579463c3eb281cdba98711f Mon Sep 17 00:00:00 2001 From: Yunhao Ling <47871814+yunhaoling@users.noreply.github.com> Date: Wed, 31 Jul 2019 15:57:17 -0700 Subject: [PATCH 27/42] Update livetest and small fixed (#6594) * Add missing close in livetest * Update livetest to wait longer * Close handler each time before retry --- .../azure/eventhub/_consumer_producer_mixin.py | 2 ++ .../azure/eventhub/aio/_consumer_producer_mixin_async.py | 2 ++ sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py | 2 +- .../azure-eventhubs/tests/asynctests/test_negative_async.py | 6 ++++++ sdk/eventhub/azure-eventhubs/tests/test_negative.py | 4 +++- sdk/eventhub/azure-eventhubs/tests/test_receive.py | 4 ++-- 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index ff1a921d9220..341639213569 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -64,6 +64,8 @@ def _open(self, timeout_time=None): """ # pylint: disable=protected-access if not self.running: + if self._handler: + self._handler.close() if self.redirected: alt_creds = { "username": self.client._auth_config.get("iot_username"), diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 68587637f1c3..23b8bd6a8fa6 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -65,6 +65,8 @@ async def _open(self, timeout_time=None): """ # pylint: disable=protected-access if not self.running: + if self._handler: + await self._handler.close_async() if self.redirected: alt_creds = { "username": self.client._auth_config.get("iot_username"), diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index c4a30d81b189..570bd8609964 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -223,7 +223,7 @@ def send(self, event_data, partition_key=None, timeout=None): wrapper_event_data = event_data else: if partition_key: - event_data = self._set_partition_key(event_data, partition_key) + event_data = _set_partition_key(event_data, partition_key) wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py index 0ab4fe53f006..4406da855f59 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py @@ -30,6 +30,7 @@ async def test_send_with_invalid_hostname_async(invalid_hostname, connstr_receiv sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -39,6 +40,7 @@ async def test_receive_with_invalid_hostname_async(invalid_hostname): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) + await receiver.close() @pytest.mark.liveTest @@ -49,6 +51,7 @@ async def test_send_with_invalid_key_async(invalid_key, connstr_receivers): sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -58,6 +61,7 @@ async def test_receive_with_invalid_key_async(invalid_key): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) + await receiver.close() @pytest.mark.liveTest @@ -68,6 +72,7 @@ async def test_send_with_invalid_policy_async(invalid_policy, connstr_receivers) sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -77,6 +82,7 @@ async def test_receive_with_invalid_policy_async(invalid_policy): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) + await receiver.close() @pytest.mark.liveTest diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index 3682461f9db2..a1fee7605818 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -27,6 +27,7 @@ def test_send_with_invalid_hostname(invalid_hostname, connstr_receivers): sender = client.create_producer() with pytest.raises(AuthenticationError): sender.send(EventData("test data")) + sender.close() @pytest.mark.liveTest @@ -47,6 +48,7 @@ def test_send_with_invalid_key(invalid_key, connstr_receivers): sender.send(EventData("test data")) sender.close() + @pytest.mark.liveTest def test_receive_with_invalid_key_sync(invalid_key): client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) @@ -96,13 +98,13 @@ def test_non_existing_entity_sender(connection_str): sender = client.create_producer(partition_id="1") with pytest.raises(AuthenticationError): sender.send(EventData("test data")) + sender.close() @pytest.mark.liveTest def test_non_existing_entity_receiver(connection_str): client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) - with pytest.raises(AuthenticationError): receiver.receive(timeout=5) receiver.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/test_receive.py b/sdk/eventhub/azure-eventhubs/tests/test_receive.py index 35c5e39c992b..d241a8e6e585 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_receive.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_receive.py @@ -148,10 +148,10 @@ def test_receive_with_custom_datetime_sync(connstr_senders): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) with receiver: all_received = [] - received = receiver.receive(timeout=1) + received = receiver.receive(timeout=5) while received: all_received.extend(received) - received = receiver.receive(timeout=1) + received = receiver.receive(timeout=5) assert len(all_received) == 5 for received_event in all_received: From 997dacf9480a40ac028ebda646bdd8b6222e8d38 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 1 Aug 2019 17:51:55 -0700 Subject: [PATCH 28/42] Fix feedback from PR (1) --- .../eventprocessor/event_processor.py | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 6bd0c49170a2..1218cc572308 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -20,7 +20,7 @@ class EventProcessor(object): def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory: Callable[..., PartitionProcessor], + partition_processor_factory: Callable[[str, str, str, CheckpointManager], PartitionProcessor], partition_manager: PartitionManager, **kwargs): """An EventProcessor automatically creates and runs consumers for all partitions of the eventhub. @@ -28,12 +28,13 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, If multiple EventProcessors are running for an event hub, they will automatically balance loading. This feature won't be availabe until preview 3. + :param eventhub_client: an instance of azure.eventhub.aio.EventClient object :param consumer_group_name: the consumer group that is used to receive events from the event hub that the eventhub_client is going to receive events from - :param eventhub_client: an instance of azure.eventhub.aio.EventClient object - :param partition_processor_callable: a callable that is called to return a PartitionProcessor + :param partition_processor_factory: a callable (constructor, function, etc) that is called to return a PartitionProcessor :param partition_manager: an instance of a PartitionManager implementation :param initial_event_position: the offset to start a partition consumer if the partition has no checkpoint yet + :type initial_event_position: int or str """ self._consumer_group_name = consumer_group_name self._eventhub_client = eventhub_client @@ -56,20 +57,20 @@ async def start(self): """ log.info("EventProcessor %r is being started", self._instance_id) partition_ids = await self._eventhub_client.get_partition_ids() - self.partition_ids = partition_ids + self._partition_ids = partition_ids claimed_list = await self._claim_partitions() await self._start_claimed_partitions(claimed_list) async def stop(self): """Stop all the partition consumer - It sends out a cancellation token to stop all partitions' EventHubConsumer will stop receiving events. - + This method cancels tasks that are running EventHubConsumer.receive() for the owned partitions of this EventProcessor. """ for task in self._tasks: task.cancel() # It's not agreed whether a partition manager has method close(). log.info("EventProcessor %r has been cancelled", self._instance_id) + await asyncio.sleep(2) # give some time to finish after cancelled async def _claim_partitions(self): partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) @@ -78,7 +79,7 @@ async def _claim_partitions(self): partitions_ownership_dict[ownership["partition_id"]] = ownership to_claim_list = [] - for pid in self.partition_ids: + for pid in self._partition_ids: p_ownership = partitions_ownership_dict.get(pid) if p_ownership: to_claim_list.append(p_ownership) @@ -114,11 +115,12 @@ async def _start_claimed_partitions(self, claimed_partitions): task = loop.create_task( _receive(consumer, partition_processor, self._receive_timeout)) self._tasks.append(task) - - await asyncio.gather(*self._tasks) - await self._partition_manager.close() - log.info("EventProcessor %r partition manager is closed", self._instance_id) - log.info("EventProcessor %r has stopped", self._instance_id) + try: + await asyncio.gather(*self._tasks, return_exceptions=True) + finally: + await self._partition_manager.close() + log.info("EventProcessor %r partition manager is closed", self._instance_id) + log.info("EventProcessor %r has stopped", self._instance_id) async def _receive(partition_consumer, partition_processor, receive_timeout): @@ -126,20 +128,21 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): while True: try: events = await partition_consumer.receive(timeout=receive_timeout) - except asyncio.CancelledError: - await partition_processor.close(reason=CloseReason.SHUTDOWN) + except asyncio.CancelledError as cancelled_error: log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " - "has been shutdown", + "is cancelled", partition_processor._checkpoint_manager._instance_id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name ) + await partition_processor.process_error(cancelled_error) + await partition_processor.close(reason=CloseReason.SHUTDOWN) break except EventHubError as eh_err: reason = CloseReason.LEASE_LOST if eh_err.error == "link:stolen" else CloseReason.EVENTHUB_EXCEPTION - log.info( + log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", partition_processor._checkpoint_manager._instance_id, @@ -153,8 +156,20 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): break try: await partition_processor.process_events(events) - except Exception as exp: # user code has caused an error + except asyncio.CancelledError as cancelled_error: log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " + "is cancelled.", + partition_processor._checkpoint_manager._instance_id, + partition_processor._eventhub_name, + partition_processor._partition_id, + partition_processor._consumer_group_name + ) + await partition_processor.process_error(cancelled_error) + await partition_processor.close(reason=CloseReason.SHUTDOWN) + break + except Exception as exp: # user code has caused an error + log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception from user code process_events. It's being closed. The exception is %r.", partition_processor._checkpoint_manager._instance_id, @@ -164,7 +179,9 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): exp ) await partition_processor.process_error(exp) + await partition_processor.close(reason=CloseReason.EVENTHUB_EXCEPTION) + break # TODO: will review whether to break and close partition processor after user's code has an exception # TODO: try to inform other EventProcessors to take the partition when this partition is closed in preview 3? finally: - await partition_consumer.close() \ No newline at end of file + await partition_consumer.close() From d688090f53e7d320b1d156b04b53d5e9502f87a0 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 1 Aug 2019 18:23:54 -0700 Subject: [PATCH 29/42] Revert "Merge branch 'eventhubs_dev' into eventhubs_eph" This reverts commit 19a55392b9a4ea9afa97e29b601a4bc67e61c779, reversing changes made to 9d18dd9dc6b9055915f1761895daf359672d0a87. --- .../azure/eventhub/_consumer_producer_mixin.py | 2 -- .../azure/eventhub/aio/_consumer_producer_mixin_async.py | 2 -- sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py | 2 +- .../azure-eventhubs/tests/asynctests/test_negative_async.py | 6 ------ sdk/eventhub/azure-eventhubs/tests/test_negative.py | 4 +--- sdk/eventhub/azure-eventhubs/tests/test_receive.py | 4 ++-- 6 files changed, 4 insertions(+), 16 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 341639213569..ff1a921d9220 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -64,8 +64,6 @@ def _open(self, timeout_time=None): """ # pylint: disable=protected-access if not self.running: - if self._handler: - self._handler.close() if self.redirected: alt_creds = { "username": self.client._auth_config.get("iot_username"), diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 23b8bd6a8fa6..68587637f1c3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -65,8 +65,6 @@ async def _open(self, timeout_time=None): """ # pylint: disable=protected-access if not self.running: - if self._handler: - await self._handler.close_async() if self.redirected: alt_creds = { "username": self.client._auth_config.get("iot_username"), diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 570bd8609964..c4a30d81b189 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -223,7 +223,7 @@ def send(self, event_data, partition_key=None, timeout=None): wrapper_event_data = event_data else: if partition_key: - event_data = _set_partition_key(event_data, partition_key) + event_data = self._set_partition_key(event_data, partition_key) wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py index 4406da855f59..0ab4fe53f006 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py @@ -30,7 +30,6 @@ async def test_send_with_invalid_hostname_async(invalid_hostname, connstr_receiv sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) - await sender.close() @pytest.mark.liveTest @@ -40,7 +39,6 @@ async def test_receive_with_invalid_hostname_async(invalid_hostname): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) - await receiver.close() @pytest.mark.liveTest @@ -51,7 +49,6 @@ async def test_send_with_invalid_key_async(invalid_key, connstr_receivers): sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) - await sender.close() @pytest.mark.liveTest @@ -61,7 +58,6 @@ async def test_receive_with_invalid_key_async(invalid_key): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) - await receiver.close() @pytest.mark.liveTest @@ -72,7 +68,6 @@ async def test_send_with_invalid_policy_async(invalid_policy, connstr_receivers) sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) - await sender.close() @pytest.mark.liveTest @@ -82,7 +77,6 @@ async def test_receive_with_invalid_policy_async(invalid_policy): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) - await receiver.close() @pytest.mark.liveTest diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index a1fee7605818..3682461f9db2 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -27,7 +27,6 @@ def test_send_with_invalid_hostname(invalid_hostname, connstr_receivers): sender = client.create_producer() with pytest.raises(AuthenticationError): sender.send(EventData("test data")) - sender.close() @pytest.mark.liveTest @@ -48,7 +47,6 @@ def test_send_with_invalid_key(invalid_key, connstr_receivers): sender.send(EventData("test data")) sender.close() - @pytest.mark.liveTest def test_receive_with_invalid_key_sync(invalid_key): client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) @@ -98,13 +96,13 @@ def test_non_existing_entity_sender(connection_str): sender = client.create_producer(partition_id="1") with pytest.raises(AuthenticationError): sender.send(EventData("test data")) - sender.close() @pytest.mark.liveTest def test_non_existing_entity_receiver(connection_str): client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) + with pytest.raises(AuthenticationError): receiver.receive(timeout=5) receiver.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/test_receive.py b/sdk/eventhub/azure-eventhubs/tests/test_receive.py index d241a8e6e585..35c5e39c992b 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_receive.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_receive.py @@ -148,10 +148,10 @@ def test_receive_with_custom_datetime_sync(connstr_senders): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) with receiver: all_received = [] - received = receiver.receive(timeout=5) + received = receiver.receive(timeout=1) while received: all_received.extend(received) - received = receiver.receive(timeout=5) + received = receiver.receive(timeout=1) assert len(all_received) == 5 for received_event in all_received: From 2399dcb8c4475c52bfeb71a698142a43f4b582de Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 1 Aug 2019 18:51:04 -0700 Subject: [PATCH 30/42] Fix feedback from PR (2) --- .../eventprocessor/checkpoint_manager.py | 7 +++-- .../eventprocessor/event_processor.py | 12 ++++---- .../sqlite3_partition_manager.py | 30 +++++-------------- 3 files changed, 17 insertions(+), 32 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py index a381400074a8..7062fe3aad3b 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -25,6 +25,7 @@ async def update_checkpoint(self, offset, sequence_number): :param sequence_number: sequence_number of the processed EventData :return: None """ - await self._partition_manager.\ - update_checkpoint(self._eventhub_name, self._consumer_group_name, self._partition_id, self._instance_id, - offset, sequence_number) + await self._partition_manager.update_checkpoint( + self._eventhub_name, self._consumer_group_name, self._partition_id, self._instance_id, offset, + sequence_number + ) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 1218cc572308..2ce25ed44245 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -17,6 +17,8 @@ log = logging.getLogger(__name__) +OWNER_LEVEL = 0 + class EventProcessor(object): def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, @@ -64,7 +66,7 @@ async def start(self): async def stop(self): """Stop all the partition consumer - This method cancels tasks that are running EventHubConsumer.receive() for the owned partitions of this EventProcessor. + This method cancels tasks that are running EventHubConsumer.receive() for the partitions owned by this EventProcessor. """ for task in self._tasks: task.cancel() @@ -84,12 +86,8 @@ async def _claim_partitions(self): if p_ownership: to_claim_list.append(p_ownership) else: - new_ownership = dict() - new_ownership["eventhub_name"] = self._eventhub_name - new_ownership["consumer_group_name"] = self._consumer_group_name - new_ownership["instance_id"] = self._instance_id - new_ownership["partition_id"] = pid - new_ownership["owner_level"] = 1 # will increment in preview 3 + new_ownership = {"eventhub_name": self._eventhub_name, "consumer_group_name": self._consumer_group_name, + "instance_id": self._instance_id, "partition_id": pid, "owner_level": OWNER_LEVEL} to_claim_list.append(new_ownership) claimed_list = await self._partition_manager.claim_ownership(to_claim_list) return claimed_list diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 3030f972f6c8..8339a237cf79 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -15,7 +15,7 @@ class Sqlite3PartitionManager(PartitionManager): """ - def __init__(self, db_filename, ownership_table="ownership"): + def __init__(self, db_filename=":memory:", ownership_table="ownership"): """ :param db_filename: name of file that saves the sql data. @@ -46,31 +46,17 @@ def __init__(self, db_filename, ownership_table="ownership"): async def list_ownership(self, eventhub_name, consumer_group_name): cursor = self.conn.cursor() try: - cursor.execute("select " - "eventhub_name, " - "consumer_group_name," - "instance_id," - "partition_id," - "owner_level," - "sequence_number," - "offset," - "last_modified_time," - "etag " - "from "+self.ownership_table+" where eventhub_name=? " + fields = ["eventhub_name", "consumer_group_name", "instance_id", "partition_id", "owner_level", + "sequence_number", + "offset", "last_modified_time", "etag"] + cursor.execute("select " + ",".join(fields) + + " from "+self.ownership_table+" where eventhub_name=? " "and consumer_group_name=?", (eventhub_name, consumer_group_name)) result_list = [] + for row in cursor.fetchall(): - d = dict() - d["eventhub_name"] = row[0] - d["consumer_group_name"] = row[1] - d["instance_id"] = row[2] - d["partition_id"] = row[3] - d["owner_level"] = row[4] - d["sequence_number"] = row[5] - d["offset"] = row[6] - d["last_modified_time"] = row[7] - d["etag"] = row[8] + d = dict(zip(fields, row)) result_list.append(d) return result_list finally: From 5ad02559779ab23b8d9f94cba7fe569dbad2aac8 Mon Sep 17 00:00:00 2001 From: Yunhao Ling <47871814+yunhaoling@users.noreply.github.com> Date: Thu, 1 Aug 2019 18:56:22 -0700 Subject: [PATCH 31/42] Update code according to the review (#6623) * Wait longer for reconnect op * Raise authentication error when open timeout * Optimize retry decorator * Update code according to review * Small fix --- .../eventhub/_consumer_producer_mixin.py | 15 ++++++--- .../aio/_consumer_producer_mixin_async.py | 15 ++++++--- .../azure/eventhub/aio/client_async.py | 4 +-- .../azure/eventhub/aio/consumer_async.py | 10 +++--- .../azure/eventhub/aio/producer_async.py | 16 +++------ .../azure-eventhubs/azure/eventhub/client.py | 2 +- .../azure/eventhub/client_abstract.py | 6 ++-- .../azure-eventhubs/azure/eventhub/common.py | 33 ++++++++++--------- .../azure/eventhub/consumer.py | 7 ++-- .../azure/eventhub/producer.py | 14 ++------ .../tests/asynctests/test_reconnect_async.py | 2 +- .../tests/asynctests/test_send_async.py | 2 +- .../azure-eventhubs/tests/test_reconnect.py | 2 +- .../azure-eventhubs/tests/test_send.py | 2 +- 14 files changed, 61 insertions(+), 69 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 341639213569..bebef7a51982 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -7,26 +7,27 @@ import logging import time -from uamqp import errors, constants +from uamqp import errors, constants, compat from azure.eventhub.error import EventHubError, _handle_exception log = logging.getLogger(__name__) def _retry_decorator(to_be_wrapped_func): - def wrapped_func(*args, **kwargs): + def wrapped_func(self, *args, **kwargs): timeout = kwargs.get("timeout", None) if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout timeout_time = time.time() + timeout - max_retries = args[0].client.config.max_retries + max_retries = self.client.config.max_retries retry_count = 0 last_exception = None + kwargs.pop("timeout", None) while True: try: - return to_be_wrapped_func(args[0], timeout_time=timeout_time, last_exception=last_exception, **kwargs) + return to_be_wrapped_func(timeout_time=timeout_time, last_exception=last_exception, **kwargs) except Exception as exception: - last_exception = args[0]._handle_exception(exception, retry_count, max_retries, timeout_time) + last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 return wrapped_func @@ -92,6 +93,10 @@ def _close_connection(self): self.client._conn_manager.reset_connection_if_broken() def _handle_exception(self, exception, retry_count, max_retries, timeout_time): + if not self.running and isinstance(exception, compat.TimeoutException): + exception = errors.AuthenticationException("Authorization timeout.") + return _handle_exception(exception, retry_count, max_retries, self, timeout_time) + return _handle_exception(exception, retry_count, max_retries, self, timeout_time) def close(self, exception=None): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 23b8bd6a8fa6..aa539110e50a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -6,7 +6,7 @@ import logging import time -from uamqp import errors, constants +from uamqp import errors, constants, compat from azure.eventhub.error import EventHubError, ConnectError from ..aio.error_async import _handle_exception @@ -14,19 +14,20 @@ def _retry_decorator(to_be_wrapped_func): - async def wrapped_func(*args, **kwargs): + async def wrapped_func(self, *args, **kwargs): timeout = kwargs.get("timeout", None) if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout timeout_time = time.time() + timeout - max_retries = args[0].client.config.max_retries + max_retries = self.client.config.max_retries retry_count = 0 last_exception = None + kwargs.pop("timeout", None) while True: try: - return await to_be_wrapped_func(args[0], timeout_time=timeout_time, last_exception=last_exception, **kwargs) + return await to_be_wrapped_func(timeout_time=timeout_time, last_exception=last_exception, **kwargs) except Exception as exception: - last_exception = await args[0]._handle_exception(exception, retry_count, max_retries, timeout_time) + last_exception = await self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 return wrapped_func @@ -93,6 +94,10 @@ async def _close_connection(self): await self.client._conn_manager.reset_connection_if_broken() async def _handle_exception(self, exception, retry_count, max_retries, timeout_time): + if not self.running and isinstance(exception, compat.TimeoutException): + exception = errors.AuthenticationException("Authorization timeout.") + return await _handle_exception(exception, retry_count, max_retries, self, timeout_time) + return await _handle_exception(exception, retry_count, max_retries, self, timeout_time) async def close(self, exception=None): diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py index 57748525fdc5..381ed9cb5dd6 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/client_async.py @@ -191,7 +191,7 @@ async def get_partition_properties(self, partition): return output def create_consumer(self, consumer_group, partition_id, event_position, **kwargs): - # type: (str, str, EventPosition, int, str, int, asyncio.AbstractEventLoop) -> EventHubConsumer + # type: (str, str, EventPosition) -> EventHubConsumer """ Create an async consumer to the client for a particular consumer group and partition. @@ -236,7 +236,7 @@ def create_consumer(self, consumer_group, partition_id, event_position, **kwargs prefetch=prefetch, loop=loop) return handler - def create_producer(self, partition_id=None, operation=None, send_timeout=None, loop=None): + def create_producer(self, *, partition_id=None, operation=None, send_timeout=None, loop=None): # type: (str, str, float, asyncio.AbstractEventLoop) -> EventHubProducer """ Create an async producer to send EventData object to an EventHub. diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index 40324827b2f4..8457913abcf0 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -100,7 +100,7 @@ async def __anext__(self): if not self.messages_iter: self.messages_iter = self._handler.receive_messages_iter_async() message = await self.messages_iter.__anext__() - event_data = EventData(message=message) + event_data = EventData._from_message(message) self.offset = EventPosition(event_data.offset, inclusive=False) retry_count = 0 return event_data @@ -147,7 +147,6 @@ async def _open(self, timeout_time=None): self.source = self.redirected.address await super(EventHubConsumer, self)._open(timeout_time) - @_retry_decorator async def _receive(self, **kwargs): timeout_time = kwargs.get("timeout_time") last_exception = kwargs.get("last_exception") @@ -167,7 +166,7 @@ async def _receive(self, **kwargs): max_batch_size=max_batch_size, timeout=remaining_time_ms) for message in message_batch: - event_data = EventData(message=message) + event_data = EventData._from_message(message) self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch @@ -185,7 +184,7 @@ def queue_size(self): return self._handler._received_messages.qsize() return 0 - async def receive(self, max_batch_size=None, timeout=None): + async def receive(self, *, max_batch_size=None, timeout=None): # type: (int, float) -> List[EventData] """ Receive events asynchronously from the EventHub. @@ -218,7 +217,8 @@ async def receive(self, max_batch_size=None, timeout=None): max_batch_size = max_batch_size or min(self.client.config.max_batch_size, self.prefetch) data_batch = [] # type: List[EventData] - return await self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) + return await _retry_decorator(self._receive)(self, timeout=timeout, + max_batch_size=max_batch_size, data_batch=data_batch) async def close(self, exception=None): # type: (Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index bc132e0a46cc..e3cd1d9fcb09 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -110,11 +110,7 @@ async def _open(self, timeout_time=None, **kwargs): self.target = self.redirected.address await super(EventHubProducer, self)._open(timeout_time) - @_retry_decorator - async def _send_event_data(self, **kwargs): - timeout_time = kwargs.get("timeout_time") - last_exception = kwargs.get("last_exception") - + async def _send_event_data(self, timeout_time=None, last_exception=None): if self.unsent_events: await self._open(timeout_time) remaining_time = timeout_time - time.time() @@ -161,12 +157,8 @@ async def create_batch(self, max_size=None, partition_key=None): :rtype: ~azure.eventhub.EventDataBatch """ - @_retry_decorator - async def _wrapped_open(*args, **kwargs): - await self._open(**kwargs) - if not self._max_message_size_on_link: - await _wrapped_open(self, timeout=self.client.config.send_timeout) + await _retry_decorator(self._open)(self, timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' @@ -174,7 +166,7 @@ async def _wrapped_open(*args, **kwargs): return EventDataBatch(max_size=(max_size or self._max_message_size_on_link), partition_key=partition_key) - async def send(self, event_data, partition_key=None, timeout=None): + async def send(self, event_data, *, partition_key=None, timeout=None): # type:(Union[EventData, EventDataBatch, Iterable[EventData]], Union[str, bytes], float) -> None """ Sends an event data and blocks until acknowledgement is @@ -220,7 +212,7 @@ async def send(self, event_data, partition_key=None, timeout=None): wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] - await self._send_event_data(timeout) + await _retry_decorator(self._send_event_data)(self, timeout=timeout) async def close(self, exception=None): # type: (Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index 52ce44cf23a9..706db5498fe3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -242,7 +242,7 @@ def create_consumer(self, consumer_group, partition_id, event_position, **kwargs return handler def create_producer(self, partition_id=None, operation=None, send_timeout=None): - # type: (str, str, float) -> EventHubProducer + # type: (str, str, float, ...) -> EventHubProducer """ Create an producer to send EventData object to an EventHub. diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py index 8c97797c01ff..d908d4702ac5 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py @@ -282,11 +282,9 @@ def from_connection_string(cls, conn_str, **kwargs): return cls._from_iothub_connection_string(conn_str, **kwargs) @abstractmethod - def create_consumer( - self, consumer_group, partition_id, event_position, **kwargs - ): + def create_consumer(self, consumer_group, partition_id, event_position, **kwargs): pass @abstractmethod - def create_producer(self, **kwargs): + def create_producer(self, partition_id=None, operation=None, send_timeout=None): pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py index 0ac743bd91d2..701b45484d75 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py @@ -57,7 +57,7 @@ class EventData(object): PROP_TIMESTAMP = b"x-opt-enqueued-time" PROP_DEVICE_ID = b"iothub-connection-device-id" - def __init__(self, body=None, to_device=None, message=None): + def __init__(self, body=None, to_device=None): """ Initialize EventData. @@ -67,8 +67,6 @@ def __init__(self, body=None, to_device=None, message=None): :type batch: Generator :param to_device: An IoT device to route to. :type to_device: str - :param message: The received message. - :type message: ~uamqp.message.Message """ self._partition_key = types.AMQPSymbol(EventData.PROP_PARTITION_KEY) @@ -77,20 +75,14 @@ def __init__(self, body=None, to_device=None, message=None): self.msg_properties = MessageProperties() if to_device: self.msg_properties.to = '/devices/{}/messages/devicebound'.format(to_device) - if message: - self.message = message - self.msg_properties = message.properties - self._annotations = message.annotations - self._app_properties = message.application_properties + if body and isinstance(body, list): + self.message = Message(body[0], properties=self.msg_properties) + for more in body[1:]: + self.message._body.append(more) # pylint: disable=protected-access + elif body is None: + raise ValueError("EventData cannot be None.") else: - if body and isinstance(body, list): - self.message = Message(body[0], properties=self.msg_properties) - for more in body[1:]: - self.message._body.append(more) # pylint: disable=protected-access - elif body is None: - raise ValueError("EventData cannot be None.") - else: - self.message = Message(body, properties=self.msg_properties) + self.message = Message(body, properties=self.msg_properties) def __str__(self): dic = { @@ -125,6 +117,15 @@ def _set_partition_key(self, value): self.message.header = header self._annotations = annotations + @staticmethod + def _from_message(message): + event_data = EventData(body='') + event_data.message = message + event_data.msg_properties = message.properties + event_data._annotations = message.annotations + event_data._app_properties = message.application_properties + return event_data + @property def sequence_number(self): """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 03827f546c0f..33a0e8ed187e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -96,7 +96,7 @@ def __next__(self): if not self.messages_iter: self.messages_iter = self._handler.receive_messages_iter() message = next(self.messages_iter) - event_data = EventData(message=message) + event_data = EventData._from_message(message) self.offset = EventPosition(event_data.offset, inclusive=False) retry_count = 0 return event_data @@ -142,7 +142,6 @@ def _open(self, timeout_time=None): self.source = self.redirected.address super(EventHubConsumer, self)._open(timeout_time) - @_retry_decorator def _receive(self, **kwargs): timeout_time = kwargs.get("timeout_time") last_exception = kwargs.get("last_exception") @@ -161,7 +160,7 @@ def _receive(self, **kwargs): max_batch_size=max_batch_size - (len(data_batch) if data_batch else 0), timeout=remaining_time_ms) for message in message_batch: - event_data = EventData(message=message) + event_data = EventData._from_message(message) self.offset = EventPosition(event_data.offset) data_batch.append(event_data) return data_batch @@ -211,7 +210,7 @@ def receive(self, max_batch_size=None, timeout=None): max_batch_size = max_batch_size or min(self.client.config.max_batch_size, self.prefetch) data_batch = [] # type: List[EventData] - return self._receive(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) + return _retry_decorator(self._receive)(self, timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) def close(self, exception=None): # type:(Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index 570bd8609964..dd4b7aa2396a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -117,11 +117,7 @@ def _open(self, timeout_time=None, **kwargs): self.target = self.redirected.address super(EventHubProducer, self)._open(timeout_time) - @_retry_decorator - def _send_event_data(self, **kwargs): - timeout_time = kwargs.get("timeout_time") - last_exception = kwargs.get("last_exception") - + def _send_event_data(self, timeout_time=None, last_exception=None): if self.unsent_events: self._open(timeout_time) remaining_time = timeout_time - time.time() @@ -168,12 +164,8 @@ def create_batch(self, max_size=None, partition_key=None): :rtype: ~azure.eventhub.EventDataBatch """ - @_retry_decorator - def _wrapped_open(*args, **kwargs): - self._open(**kwargs) - if not self._max_message_size_on_link: - _wrapped_open(self, timeout=self.client.config.send_timeout) + _retry_decorator(self._open)(self, timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' @@ -227,7 +219,7 @@ def send(self, event_data, partition_key=None, timeout=None): wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] - self._send_event_data(timeout=timeout) + _retry_decorator(self._send_event_data)(self, timeout=timeout) def close(self, exception=None): # type:(Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py index 56c57924edde..05be713e2d8c 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_reconnect_async.py @@ -37,7 +37,7 @@ async def test_send_with_long_interval_async(connstr_receivers, sleep): for r in receivers: if not sleep: # if sender sleeps, the receivers will be disconnected. destroy connection to simulate r._handler._connection._conn.destroy() - received.extend(r.receive(timeout=3)) + received.extend(r.receive(timeout=5)) assert len(received) == 2 assert list(received[0].body)[0] == b"A single event" diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py index c84268d15f21..aa301bad3119 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_send_async.py @@ -255,7 +255,7 @@ async def test_send_with_create_event_batch_async(connstr_receivers): client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) sender = client.create_producer() - event_data_batch = await sender.create_batch(max_size=100 * 1024) + event_data_batch = await sender.create_batch(max_size=100000) while True: try: event_data_batch.try_add(EventData('A single event data')) diff --git a/sdk/eventhub/azure-eventhubs/tests/test_reconnect.py b/sdk/eventhub/azure-eventhubs/tests/test_reconnect.py index 223a759ea9c5..0796cee2178d 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_reconnect.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_reconnect.py @@ -32,7 +32,7 @@ def test_send_with_long_interval_sync(connstr_receivers, sleep): for r in receivers: if not sleep: r._handler._connection._conn.destroy() - received.extend(r.receive(timeout=3)) + received.extend(r.receive(timeout=5)) assert len(received) == 2 assert list(received[0].body)[0] == b"A single event" diff --git a/sdk/eventhub/azure-eventhubs/tests/test_send.py b/sdk/eventhub/azure-eventhubs/tests/test_send.py index 3d7bc3815c22..8499ff93b36d 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_send.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_send.py @@ -257,7 +257,7 @@ def test_send_with_create_event_batch_sync(connstr_receivers): client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) sender = client.create_producer() - event_data_batch = sender.create_batch(max_size=100 * 1024) + event_data_batch = sender.create_batch(max_size=100000) while True: try: event_data_batch.try_add(EventData('A single event data')) From 5679065e4f1575721747e33c1f74a632e3736ff6 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 1 Aug 2019 20:49:28 -0700 Subject: [PATCH 32/42] Fix feedback from PR (3) --- .../eventprocessor/checkpoint_manager.py | 6 ++--- .../eventprocessor/event_processor.py | 26 +++++++++++-------- .../eventprocessor/partition_manager.py | 4 +-- .../sqlite3_partition_manager.py | 14 +++++----- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py index 7062fe3aad3b..653c058cb20c 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -11,11 +11,11 @@ class CheckpointManager(object): """Every PartitionProcessor has a CheckpointManager to save the partition's checkpoint. """ - def __init__(self, partition_id, eventhub_name, consumer_group_name, instance_id, partition_manager: PartitionManager): + def __init__(self, partition_id, eventhub_name, consumer_group_name, owner_id, partition_manager: PartitionManager): self._partition_id = partition_id self._eventhub_name = eventhub_name self._consumer_group_name = consumer_group_name - self._instance_id = instance_id + self._owner_id = owner_id self._partition_manager = partition_manager async def update_checkpoint(self, offset, sequence_number): @@ -26,6 +26,6 @@ async def update_checkpoint(self, offset, sequence_number): :return: None """ await self._partition_manager.update_checkpoint( - self._eventhub_name, self._consumer_group_name, self._partition_id, self._instance_id, offset, + self._eventhub_name, self._consumer_group_name, self._partition_id, self._owner_id, offset, sequence_number ) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 2ce25ed44245..0ee78af84b8a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -47,9 +47,13 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, self._max_batch_size = eventhub_client.config.max_batch_size self._receive_timeout = eventhub_client.config.receive_timeout self._tasks: List[asyncio.Task] = [] - self._instance_id = str(uuid.uuid4()) + self._id = str(uuid.uuid4()) self._partition_ids = None + @property + def id(self): + return self._id + async def start(self): """Start the EventProcessor. 1. retrieve the partition ids from eventhubs @@ -57,7 +61,7 @@ async def start(self): 3. repeatedly call EvenHubConsumer.receive() to retrieve events and call user defined PartitionProcessor.process_events() """ - log.info("EventProcessor %r is being started", self._instance_id) + log.info("EventProcessor %r is being started", self._id) partition_ids = await self._eventhub_client.get_partition_ids() self._partition_ids = partition_ids claimed_list = await self._claim_partitions() @@ -71,7 +75,7 @@ async def stop(self): for task in self._tasks: task.cancel() # It's not agreed whether a partition manager has method close(). - log.info("EventProcessor %r has been cancelled", self._instance_id) + log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled async def _claim_partitions(self): @@ -87,7 +91,7 @@ async def _claim_partitions(self): to_claim_list.append(p_ownership) else: new_ownership = {"eventhub_name": self._eventhub_name, "consumer_group_name": self._consumer_group_name, - "instance_id": self._instance_id, "partition_id": pid, "owner_level": OWNER_LEVEL} + "owner_id": self._id, "partition_id": pid, "owner_level": OWNER_LEVEL} to_claim_list.append(new_ownership) claimed_list = await self._partition_manager.claim_ownership(to_claim_list) return claimed_list @@ -107,7 +111,7 @@ async def _start_claimed_partitions(self, claimed_partitions): consumer_group_name=self._consumer_group_name, partition_id=partition_id, checkpoint_manager=CheckpointManager(partition_id, self._eventhub_name, self._consumer_group_name, - self._instance_id, self._partition_manager) + self._id, self._partition_manager) ) loop = get_running_loop() task = loop.create_task( @@ -117,8 +121,8 @@ async def _start_claimed_partitions(self, claimed_partitions): await asyncio.gather(*self._tasks, return_exceptions=True) finally: await self._partition_manager.close() - log.info("EventProcessor %r partition manager is closed", self._instance_id) - log.info("EventProcessor %r has stopped", self._instance_id) + log.info("EventProcessor %r partition manager is closed", self._id) + log.info("EventProcessor %r has stopped", self._id) async def _receive(partition_consumer, partition_processor, receive_timeout): @@ -130,7 +134,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "is cancelled", - partition_processor._checkpoint_manager._instance_id, + partition_processor._checkpoint_manager._id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name @@ -143,7 +147,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager._instance_id, + partition_processor._checkpoint_manager._id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name, @@ -158,7 +162,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "is cancelled.", - partition_processor._checkpoint_manager._instance_id, + partition_processor._checkpoint_manager.owner_id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name @@ -170,7 +174,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception from user code process_events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager._instance_id, + partition_processor._checkpoint_manager._owner_id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name, diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py index b60f5e716869..99222e264029 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_manager.py @@ -21,7 +21,7 @@ async def list_ownership(self, eventhub_name: str, consumer_group_name: str) -> :return: Iterable of dictionaries containing the following partition ownership information: eventhub_name consumer_group_name - instance_id + owner_id partition_id owner_level offset @@ -36,7 +36,7 @@ async def claim_ownership(self, partitions: Iterable[Dict[str, Any]]) -> Iterabl pass @abstractmethod - async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, instance_id, + async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, owner_id, offset, sequence_number) -> None: pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 8339a237cf79..4ba7b489e487 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -30,7 +30,7 @@ def __init__(self, db_filename=":memory:", ownership_table="ownership"): c.execute("create table " + ownership_table + "(eventhub_name text," "consumer_group_name text," - "instance_id text," + "owner_id text," "partition_id text," "owner_level integer," "sequence_number integer," @@ -46,7 +46,7 @@ def __init__(self, db_filename=":memory:", ownership_table="ownership"): async def list_ownership(self, eventhub_name, consumer_group_name): cursor = self.conn.cursor() try: - fields = ["eventhub_name", "consumer_group_name", "instance_id", "partition_id", "owner_level", + fields = ["eventhub_name", "consumer_group_name", "owner_id", "partition_id", "owner_level", "sequence_number", "offset", "last_modified_time", "etag"] cursor.execute("select " + ",".join(fields) + @@ -74,22 +74,22 @@ async def claim_ownership(self, partitions): p["partition_id"])) if not cursor.fetchall(): cursor.execute("insert into " + self.ownership_table + - " (eventhub_name,consumer_group_name,partition_id,instance_id,owner_level,last_modified_time,etag) " + " (eventhub_name,consumer_group_name,partition_id,owner_id,owner_level,last_modified_time,etag) " "values (?,?,?,?,?,?,?)", - (p["eventhub_name"], p["consumer_group_name"], p["partition_id"], p["instance_id"], p["owner_level"], + (p["eventhub_name"], p["consumer_group_name"], p["partition_id"], p["owner_id"], p["owner_level"], time.time(), str(uuid.uuid4()) )) else: - cursor.execute("update "+self.ownership_table+" set instance_id=?, owner_level=?, last_modified_time=?, etag=? " + cursor.execute("update "+self.ownership_table+" set owner_id=?, owner_level=?, last_modified_time=?, etag=? " "where eventhub_name=? and consumer_group_name=? and partition_id=?", - (p["instance_id"], p["owner_level"], time.time(), str(uuid.uuid4()), + (p["owner_id"], p["owner_level"], time.time(), str(uuid.uuid4()), p["eventhub_name"], p["consumer_group_name"], p["partition_id"])) self.conn.commit() return partitions finally: cursor.close() - async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, instance_id, + async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, owner_id, offset, sequence_number): cursor = self.conn.cursor() try: From 83d0ec2acf7bb05b8e4e12d6984d8900006c5349 Mon Sep 17 00:00:00 2001 From: yijxie Date: Thu, 1 Aug 2019 23:54:29 -0700 Subject: [PATCH 33/42] small bug fixing --- .../eventprocessor/event_processor.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 0ee78af84b8a..9ec4d5fdf5ce 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -72,9 +72,9 @@ async def stop(self): This method cancels tasks that are running EventHubConsumer.receive() for the partitions owned by this EventProcessor. """ - for task in self._tasks: + for i in range(len(self._tasks)): + task = self._tasks.pop() task.cancel() - # It's not agreed whether a partition manager has method close(). log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled @@ -97,15 +97,11 @@ async def _claim_partitions(self): return claimed_list async def _start_claimed_partitions(self, claimed_partitions): - consumers = [] for partition in claimed_partitions: partition_id = partition["partition_id"] - offset = partition.get("offset") - offset = offset or self._initial_event_position + offset = partition.get("offset", self._initial_event_position) consumer = self._eventhub_client.create_consumer(self._consumer_group_name, partition_id, EventPosition(str(offset))) - consumers.append(consumer) - partition_processor = self._partition_processor_factory( eventhub_name=self._eventhub_name, consumer_group_name=self._consumer_group_name, @@ -118,8 +114,9 @@ async def _start_claimed_partitions(self, claimed_partitions): _receive(consumer, partition_processor, self._receive_timeout)) self._tasks.append(task) try: - await asyncio.gather(*self._tasks, return_exceptions=True) + await asyncio.gather(*self._tasks) finally: + # TODO: It's not agreed whether a partition manager has method close(). await self._partition_manager.close() log.info("EventProcessor %r partition manager is closed", self._id) log.info("EventProcessor %r has stopped", self._id) @@ -134,7 +131,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "is cancelled", - partition_processor._checkpoint_manager._id, + partition_processor._checkpoint_manager._owner_id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name @@ -147,7 +144,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager._id, + partition_processor._checkpoint_manager._owner_id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name, @@ -162,7 +159,7 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "is cancelled.", - partition_processor._checkpoint_manager.owner_id, + partition_processor._checkpoint_manager._owner_id, partition_processor._eventhub_name, partition_processor._partition_id, partition_processor._consumer_group_name From a2195baf463ee4040b5795cb53ace30e29d41920 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 2 Aug 2019 11:07:10 -0700 Subject: [PATCH 34/42] Remove old EPH --- .../azure/eventprocessorhost/__init__.py | 21 - .../abstract_checkpoint_manager.py | 72 - .../abstract_event_processor.py | 58 - .../abstract_lease_manager.py | 134 - .../eventprocessorhost/azure_blob_lease.py | 72 - .../azure_storage_checkpoint_manager.py | 487 --- .../eventprocessorhost/cancellation_token.py | 20 - .../azure/eventprocessorhost/checkpoint.py | 34 - .../azure/eventprocessorhost/eh_config.py | 71 - .../eventprocessorhost/eh_partition_pump.py | 170 - .../azure/eventprocessorhost/eph.py | 110 - .../azure/eventprocessorhost/lease.py | 60 - .../eventprocessorhost/partition_context.py | 155 - .../eventprocessorhost/partition_manager.py | 364 -- .../eventprocessorhost/partition_pump.py | 159 - .../eventprocessorhost/vendor/__init__.py | 5 - .../vendor/storage/__init__.py | 5 - .../vendor/storage/blob/__init__.py | 31 - .../vendor/storage/blob/_constants.py | 14 - .../vendor/storage/blob/_deserialization.py | 556 --- .../vendor/storage/blob/_download_chunking.py | 178 - .../vendor/storage/blob/_encryption.py | 187 - .../vendor/storage/blob/_error.py | 29 - .../vendor/storage/blob/_serialization.py | 153 - .../vendor/storage/blob/_upload_chunking.py | 496 --- .../vendor/storage/blob/appendblobservice.py | 781 ---- .../vendor/storage/blob/baseblobservice.py | 3397 ----------------- .../vendor/storage/blob/blockblobservice.py | 1199 ------ .../vendor/storage/blob/models.py | 825 ---- .../vendor/storage/blob/pageblobservice.py | 1522 -------- .../storage/blob/sharedaccesssignature.py | 275 -- .../vendor/storage/common/__init__.py | 39 - .../vendor/storage/common/_auth.py | 129 - .../storage/common/_common_conversion.py | 126 - .../vendor/storage/common/_connection.py | 161 - .../vendor/storage/common/_constants.py | 51 - .../vendor/storage/common/_deserialization.py | 384 -- .../vendor/storage/common/_encryption.py | 233 -- .../vendor/storage/common/_error.py | 218 -- .../vendor/storage/common/_http/__init__.py | 74 - .../vendor/storage/common/_http/httpclient.py | 107 - .../vendor/storage/common/_serialization.py | 371 -- .../storage/common/cloudstorageaccount.py | 198 - .../vendor/storage/common/models.py | 672 ---- .../vendor/storage/common/retry.py | 306 -- .../storage/common/sharedaccesssignature.py | 180 - .../vendor/storage/common/storageclient.py | 440 --- .../vendor/storage/common/tokencredential.py | 48 - .../vendor/vendor_azure_storage_version.md | 2 - sdk/eventhub/azure-eventhubs/setup.py | 3 - 50 files changed, 15382 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/__init__.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_checkpoint_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_event_processor.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_lease_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_blob_lease.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_storage_checkpoint_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/cancellation_token.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/checkpoint.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_config.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_partition_pump.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eph.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/lease.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_context.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_pump.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/__init__.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/__init__.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/__init__.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_constants.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_deserialization.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_download_chunking.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_encryption.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_error.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_serialization.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_upload_chunking.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/appendblobservice.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/baseblobservice.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/blockblobservice.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/models.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/pageblobservice.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/sharedaccesssignature.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/__init__.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_auth.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_common_conversion.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_connection.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_constants.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_deserialization.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_encryption.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_error.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/__init__.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/httpclient.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_serialization.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/cloudstorageaccount.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/models.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/retry.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/sharedaccesssignature.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/storageclient.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/tokencredential.py delete mode 100644 sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/vendor_azure_storage_version.md diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/__init__.py deleted file mode 100644 index c1905da23d12..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# -------------------------------------------------------------------------------------------- - -""" -The module provides a means to process Azure Event Hubs events at scale. -""" -try: - from azure.eventprocessorhost.abstract_event_processor import AbstractEventProcessor - from azure.eventprocessorhost.azure_storage_checkpoint_manager import AzureStorageCheckpointLeaseManager - from azure.eventprocessorhost.azure_blob_lease import AzureBlobLease - from azure.eventprocessorhost.checkpoint import Checkpoint - from azure.eventprocessorhost.eh_config import EventHubConfig - from azure.eventprocessorhost.eh_partition_pump import EventHubPartitionPump, PartitionReceiver - from azure.eventprocessorhost.eph import EventProcessorHost, EPHOptions - from azure.eventprocessorhost.partition_manager import PartitionManager - from azure.eventprocessorhost.partition_context import PartitionContext - from azure.eventprocessorhost.partition_pump import PartitionPump -except (SyntaxError, ImportError): - raise ImportError("EventProcessHost is only compatible with Python 3.5 and above.") diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_checkpoint_manager.py deleted file mode 100644 index b4828596542a..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_checkpoint_manager.py +++ /dev/null @@ -1,72 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -""" -Author: Aaron (Ari) Bornstien -""" -from abc import ABC, abstractmethod - -class AbstractCheckpointManager(ABC): - """ - If you wish to have EventProcessorHost store checkpoints somewhere other than Azure Storage, - you can write your own checkpoint manager using this abstract class. - """ - def __init__(self): - pass - - @abstractmethod - async def create_checkpoint_store_if_not_exists_async(self): - """ - Create the checkpoint store if it doesn't exist. Do nothing if it does exist. - - :return: `True` if the checkpoint store already exists or was created OK, `False` - if there was a failure. - :rtype: bool - """ - - @abstractmethod - async def get_checkpoint_async(self, partition_id): - """ - Get the checkpoint data associated with the given partition. - Could return null if no checkpoint has been created for that partition. - - :param partition_id: The ID of a given parition. - :type partition_id: str - :return: Given partition checkpoint info, or `None` if none has been previously stored. - :rtype: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - - @abstractmethod - async def create_checkpoint_if_not_exists_async(self, partition_id): - """ - Create the given partition checkpoint if it doesn't exist.Do nothing if it does exist. - The offset/sequenceNumber for a freshly-created checkpoint should be set to StartOfStream/0. - - :param partition_id: The ID of a given parition. - :type partition_id: str - :return: The checkpoint for the given partition, whether newly created or already existing. - :rtype: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - - @abstractmethod - async def update_checkpoint_async(self, lease, checkpoint): - """ - Update the checkpoint in the store with the offset/sequenceNumber in the provided checkpoint. - - :param lease: The lease to be updated. - :type lease: ~azure.eventprocessorhost.lease.Lease - :param checkpoint: offset/sequeceNumber to update the store with. - :type checkpoint: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - - @abstractmethod - async def delete_checkpoint_async(self, partition_id): - """ - Delete the stored checkpoint for the given partition. If there is no stored checkpoint - for the given partition, that is treated as success. - - :param partition_id: The ID of a given parition. - :type partition_id: str - """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_event_processor.py deleted file mode 100644 index 4fbd7fb20463..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_event_processor.py +++ /dev/null @@ -1,58 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -from abc import ABC, abstractmethod - - -class AbstractEventProcessor(ABC): - """ - Abstract that must be extended by event processor classes. - """ - def __init__(self, params=None): - pass - - @abstractmethod - async def open_async(self, context): - """ - Called by processor host to initialize the event processor. - - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.partition_context.PartitionContext - """ - - @abstractmethod - async def close_async(self, context, reason): - """ - Called by processor host to indicate that the event processor is being stopped. - - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.partition_context.PartitionContext - :param reason: The reason for closing. - :type reason: str - """ - - @abstractmethod - async def process_events_async(self, context, messages): - """ - Called by the processor host when a batch of events has arrived. - This is where the real work of the event processor is done. - - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.partition_context.PartitionContext - :param messages: The events to be processed. - :type messages: list[~azure.eventhub.common.EventData] - """ - - @abstractmethod - async def process_error_async(self, context, error): - """ - Called when the underlying client experiences an error while receiving. - EventProcessorHost will take care of recovering from the error and - continuing to pump messages. - - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.partition_context.PartitionContext - :param error: The error that occured. - """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_lease_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_lease_manager.py deleted file mode 100644 index 1577a3b58969..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/abstract_lease_manager.py +++ /dev/null @@ -1,134 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -""" -Author: Aaron (Ari) Bornstien -""" -from abc import ABC, abstractmethod - -class AbstractLeaseManager(ABC): - """ - If you wish to have EventProcessorHost store leases somewhere other than Azure Storage, - you can write your own lease manager using this abstract class. The Azure Storage managers - use the same storage for both lease and checkpoints, so both interfaces are implemented by - the same class.You are free to do the same thing if you have a unified store for both - types of data. - """ - - def __init__(self, lease_renew_interval, lease_duration): - self.lease_renew_interval = lease_renew_interval - self.lease_duration = lease_duration - - @abstractmethod - async def create_lease_store_if_not_exists_async(self): - """ - Create the lease store if it does not exist, do nothing if it does exist. - - :return: `True` if the lease store already exists or was created successfully, `False` if not. - :rtype: bool - """ - - @abstractmethod - async def delete_lease_store_async(self): - """ - Not used by EventProcessorHost, but a convenient function to have for testing. - - :return: `True` if the lease store was deleted successfully, `False` if not. - :rtype: bool - """ - - async def get_lease_async(self, partition_id): - """ - Return the lease info for the specified partition. - Can return null if no lease has been created in the store for the specified partition. - - :param partition_id: The ID of a given partition. - :type parition_id: str - :return: Lease info for the partition, or `None`. - :rtype: - """ - - @abstractmethod - def get_all_leases(self): - """ - Return the lease info for all partitions. - A typical implementation could just call get_lease_async() on all partitions. - - :return: A list of lease info. - :rtype: - """ - - @abstractmethod - async def create_lease_if_not_exists_async(self, partition_id): - """ - Create in the store the lease info for the given partition, if it does not exist. - Do nothing if it does exist in the store already. - - :param partition_id: The ID of a given partition. - :type parition_id: str - :return: The existing or newly-created lease info for the partition. - """ - - @abstractmethod - async def delete_lease_async(self, lease): - """ - Delete the lease info for the given partition from the store. - If there is no stored lease for the given partition, that is treated as success. - - :param lease: The lease to be deleted. - :type lease: ~azure.eventprocessorhost.lease.Lease - """ - - @abstractmethod - async def acquire_lease_async(self, lease): - """ - Acquire the lease on the desired partition for this EventProcessorHost. - Note that it is legal to acquire a lease that is already owned by another host. - Lease-stealing is how partitions are redistributed when additional hosts are started. - - :param lease: The lease to be acquired. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the lease was acquired successfully, `False` if not. - :rtype: bool - """ - - @abstractmethod - async def renew_lease_async(self, lease): - """ - Renew a lease currently held by this host. - If the lease has been stolen, or expired, or released, it is not possible to renew it. - You will have to call get_lease_async() and then acquire_lease_async() again. - - :param lease: The lease to be renewed. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the lease was renewed successfully, `False` if not. - :rtype: bool - """ - - @abstractmethod - async def release_lease_async(self, lease): - """ - Give up a lease currently held by this host. If the lease has been stolen, or expired, - releasing it is unnecessary, and will fail if attempted. - - :param lease: The lease to be released. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the lease was released successfully, `False` if not. - :rtype: bool - """ - - @abstractmethod - async def update_lease_async(self, lease): - """ - Update the store with the information in the provided lease. It is necessary to currently - hold a lease in order to update it. If the lease has been stolen, or expired, or released, - it cannot be updated. Updating should renew the lease before performing the update to - avoid lease expiration during the process. - - :param lease: The lease to be updated. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the updated was performed successfully, `False` if not. - :rtype: bool - """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_blob_lease.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_blob_lease.py deleted file mode 100644 index 3ffb32961662..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_blob_lease.py +++ /dev/null @@ -1,72 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import asyncio -import json - -from azure.eventprocessorhost.lease import Lease - - -class AzureBlobLease(Lease): - """ - Azure Blob Lease - """ - - def __init__(self): - """ - Init Azure Blob Lease. - """ - super() - Lease.__init__(self) - self.offset = None - self.state = lambda: None - - def serializable(self): - """ - Returns Serialiazble instance of `__dict__`. - """ - serial = self.__dict__.copy() - del serial['state'] - return serial - - def with_lease(self, lease): - """ - Init with exisiting lease. - """ - super().with_source(lease) - - def with_blob(self, blob): - """ - Init Azure Blob Lease with existing blob. - """ - content = json.loads(blob.content) - self.partition_id = content["partition_id"] - self.owner = content["owner"] - self.token = content["token"] - self.epoch = content["epoch"] - self.offset = content["offset"] - self.sequence_number = content["sequence_number"] - self.event_processor_context = content.get("event_processor_context") - - def with_source(self, lease): - """ - Init Azure Blob Lease from existing. - """ - super().with_source(lease) - self.offset = lease.offset - self.sequence_number = lease.sequence_number - - async def is_expired(self): - """ - Check and return Azure Blob Lease state using Storage API. - """ - if asyncio.iscoroutinefunction(self.state): - current_state = await self.state() - else: - current_state = self.state() - if current_state: - return current_state != "leased" - return False - \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_storage_checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_storage_checkpoint_manager.py deleted file mode 100644 index 18acb52db82a..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_storage_checkpoint_manager.py +++ /dev/null @@ -1,487 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import re -import json -import uuid -import logging -import concurrent.futures -import functools -import asyncio -import requests - -from .vendor.storage.blob import BlockBlobService -from azure.eventprocessorhost.azure_blob_lease import AzureBlobLease -from azure.eventprocessorhost.checkpoint import Checkpoint -from azure.eventprocessorhost.abstract_lease_manager import AbstractLeaseManager -from azure.eventprocessorhost.abstract_checkpoint_manager import AbstractCheckpointManager - - -_logger = logging.getLogger(__name__) - - -class AzureStorageCheckpointLeaseManager(AbstractCheckpointManager, AbstractLeaseManager): - """ - Manages checkpoints and lease with azure storage blobs. In this implementation, - checkpoints are data that's actually in the lease blob, so checkpoint operations - turn into lease operations under the covers. - - :param str storage_account_name: The storage account name. This is used to - authenticate requests signed with an account key and to construct the storage - endpoint. It is required unless a connection string is given. - :param str storage_account_key: The storage account key. This is used for shared key - authentication. If neither account key or sas token is specified, anonymous access - will be used. - :param str lease_container_name: The name of the container that will be used to store - leases. If it does not already exist it will be created. Default value is 'eph-leases'. - :param int lease_renew_interval: The interval in seconds at which EPH will attempt to - renew the lease of a particular partition. Default value is 10. - :param int lease_duration: The duration in seconds of a lease on a partition. - Default value is 30. - :param str sas_token: A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both specified, - account key will be used to sign. If neither are specified, anonymous access will be used. - :param str endpoint_suffix: The host base component of the url, minus the account name. - Defaults to Azure (core.windows.net). Override this to use a National Cloud. - :param str connection_string: If specified, this will override all other endpoint parameters. - See http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ - for the connection string format. - """ - - def __init__(self, storage_account_name=None, storage_account_key=None, lease_container_name="eph-leases", - storage_blob_prefix=None, lease_renew_interval=10, lease_duration=30, - sas_token=None, endpoint_suffix="core.windows.net", connection_string=None): - AbstractCheckpointManager.__init__(self) - AbstractLeaseManager.__init__(self, lease_renew_interval, lease_duration) - self.storage_account_name = storage_account_name - self.storage_account_key = storage_account_key - self.storage_sas_token = sas_token - self.endpoint_suffix = endpoint_suffix - self.connection_string = connection_string - self.lease_container_name = lease_container_name - self.storage_blob_prefix = storage_blob_prefix - self.storage_client = None - self.consumer_group_directory = None - self.host = None - self.storage_max_execution_time = 120 - self.request_session = requests.Session() - self.request_session.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)) - self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=32) - - # Validate storage inputs - if not self.storage_account_name and not self.connection_string: - raise ValueError("Need a valid storage account name or connection string.") - if not re.compile(r"^[a-z0-9](([a-z0-9\-[^\-])){1,61}[a-z0-9]$").match(self.lease_container_name): - raise ValueError("Azure Storage lease container name is invalid.\ - Please check naming conventions at\ - https:# msdn.microsoft.com/en-us/library/azure/dd135715.aspx") - - if self.storage_blob_prefix: - self.storage_blob_prefix.replace(" ", "") # Convert all-whitespace to empty string. - else: - self.storage_blob_prefix = "" # Convert null prefix to empty string. - - def initialize(self, host): - """ - The EventProcessorHost can't pass itself to the AzureStorageCheckpointLeaseManager - constructor because it is still being constructed. Do other initialization here - also because it might throw and hence we don't want it in the constructor. - """ - self.host = host - self.storage_client = BlockBlobService(account_name=self.storage_account_name, - account_key=self.storage_account_key, - sas_token=self.storage_sas_token, - endpoint_suffix=self.endpoint_suffix, - connection_string=self.connection_string, - request_session=self.request_session) - self.consumer_group_directory = self.storage_blob_prefix + self.host.eh_config.consumer_group - - # Checkpoint Management Methods - - async def create_checkpoint_store_if_not_exists_async(self): - """ - Create the checkpoint store if it doesn't exist. Do nothing if it does exist. - - :return: `True` if the checkpoint store already exists or was created OK, `False` - if there was a failure. - :rtype: bool - """ - await self.create_lease_store_if_not_exists_async() - - async def get_checkpoint_async(self, partition_id): - """ - Get the checkpoint data associated with the given partition. - Could return null if no checkpoint has been created for that partition. - - :param partition_id: The partition ID. - :type partition_id: str - :return: Given partition checkpoint info, or `None` if none has been previously stored. - :rtype: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - - lease = await self.get_lease_async(partition_id) - checkpoint = None - if lease: - if lease.offset: - checkpoint = Checkpoint(partition_id, lease.offset, - lease.sequence_number) - return checkpoint - - async def create_checkpoint_if_not_exists_async(self, partition_id): - """ - Create the given partition checkpoint if it doesn't exist.Do nothing if it does exist. - The offset/sequenceNumber for a freshly-created checkpoint should be set to StartOfStream/0. - - :param partition_id: The partition ID. - :type partition_id: str - :return: The checkpoint for the given partition, whether newly created or already existing. - :rtype: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - checkpoint = await self.get_checkpoint_async(partition_id) - if not checkpoint: - await self.create_lease_if_not_exists_async(partition_id) - checkpoint = Checkpoint(partition_id) - return checkpoint - - async def update_checkpoint_async(self, lease, checkpoint): - """ - Update the checkpoint in the store with the offset/sequenceNumber in the provided checkpoint - checkpoint:offset/sequeceNumber to update the store with. - - :param lease: The stored lease to be updated. - :type lease: ~azure.eventprocessorhost.lease.Lease - :param checkpoint: The checkpoint to update the lease with. - :type checkpoint: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - new_lease = AzureBlobLease() - new_lease.with_source(lease) - new_lease.offset = checkpoint.offset - new_lease.sequence_number = checkpoint.sequence_number - return await self.update_lease_async(new_lease) - - async def delete_checkpoint_async(self, partition_id): - """ - Delete the stored checkpoint for the given partition. If there is no stored checkpoint - for the given partition, that is treated as success. - - :param partition_id: The partition ID. - :type partition_id: str - """ - return # Make this a no-op to avoid deleting leases by accident. - - # Lease Management Methods - - async def create_lease_store_if_not_exists_async(self): - """ - Create the lease store if it does not exist, do nothing if it does exist. - - :return: `True` if the lease store already exists or was created successfully, `False` if not. - :rtype: bool - """ - try: - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.create_container, - self.lease_container_name)) - - except Exception as err: # pylint: disable=broad-except - _logger.error("%r", err) - raise err - - return True - - async def delete_lease_store_async(self): - """ - Not used by EventProcessorHost, but a convenient function to have for testing. - - :return: `True` if the lease store was deleted successfully, `False` if not. - :rtype: bool - """ - return "Not Supported in Python" - - async def get_lease_async(self, partition_id): - """ - Return the lease info for the specified partition. - Can return null if no lease has been created in the store for the specified partition. - - :param partition_id: The partition ID. - :type partition_id: str - :return: lease info for the partition, or `None`. - :rtype: ~azure.eventprocessorhost.lease.Lease - """ - blob_name = "{}/{}".format(self.consumer_group_directory, partition_id) - try: - blob = await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.get_blob_to_text, - self.lease_container_name, blob_name)) - lease = AzureBlobLease() - lease.with_blob(blob) - - async def state(): - """ - Allow lease to curry storage_client to get state - """ - try: - loop = asyncio.get_event_loop() - res = await loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.get_blob_properties, - self.lease_container_name, - blob_name)) - return res.properties.lease.state - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to get lease state %r %r", err, partition_id) - - lease.state = state - return lease - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to get lease %r %r", err, partition_id) - - async def get_all_leases(self): - """ - Return the lease info for all partitions. - A typical implementation could just call get_lease_async() on all partitions. - - :return: A list of lease info. - :rtype: list[~azure.eventprocessorhost.lease.Lease] - """ - lease_futures = [] - partition_ids = await self.host.partition_manager.get_partition_ids_async() - for partition_id in partition_ids: - lease_futures.append(self.get_lease_async(partition_id)) - return lease_futures - - async def create_lease_if_not_exists_async(self, partition_id): - """ - Create in the store the lease info for the given partition, if it does not exist. - Do nothing if it does exist in the store already. - - :param partition_id: The ID of a given parition. - :type partition_id: str - :return: the existing or newly-created lease info for the partition. - :rtype: ~azure.eventprocessorhost.lease.Lease - """ - return_lease = None - blob_name = "{}/{}".format(self.consumer_group_directory, partition_id) - try: - return_lease = AzureBlobLease() - return_lease.partition_id = partition_id - serializable_lease = return_lease.serializable() - json_lease = json.dumps(serializable_lease) - _logger.info("Creating Lease %r %r %r", - self.lease_container_name, - partition_id, - json.dumps({k:v for k, v in serializable_lease.items() if k != 'event_processor_context'})) - - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.create_blob_from_text, - self.lease_container_name, - blob_name, - json_lease)) - except Exception: # pylint: disable=broad-except - try: - return_lease = await self.get_lease_async(partition_id) - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to create lease %r", err) - raise err - return return_lease - - async def delete_lease_async(self, lease): - """ - Delete the lease info for the given partition from the store. - If there is no stored lease for the given partition, that is treated as success. - - :param lease: The stored lease to be deleted. - :type lease: ~azure.eventprocessorhost.lease.Lease - """ - - blob_name = "{}/{}".format(self.consumer_group_directory, lease.partition_id) - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.delete_blob, - self.lease_container_name, - blob_name, - lease_id=lease.token)) - - async def acquire_lease_async(self, lease): - """ - Acquire the lease on the desired partition for this EventProcessorHost. - Note that it is legal to acquire a lease that is already owned by another host. - Lease-stealing is how partitions are redistributed when additional hosts are started. - - :param lease: The stored lease to be acquired. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the lease was acquired successfully, `False` if not. - :rtype: bool - """ - retval = True - new_lease_id = str(uuid.uuid4()) - partition_id = lease.partition_id - blob_name = "{}/{}".format(self.consumer_group_directory, lease.partition_id) - try: - if asyncio.iscoroutinefunction(lease.state): - state = await lease.state() - else: - state = lease.state() - if state == "leased": - if not lease.token: - # We reach here in a race condition: when this instance of EventProcessorHost - # scanned the lease blobs, this partition was unowned (token is empty) but - # between then and now, another instance of EPH has established a lease - # (getLeaseState() is LEASED). We normally enforcethat we only steal the lease - # if it is still owned by the instance which owned it when we scanned, but we - # can't do that when we don't know who owns it. The safest thing to do is just - # fail the acquisition. If that means that one EPH instance gets more partitions - # than it should, rebalancing will take care of that quickly enough. - retval = False - else: - _logger.info("ChangingLease %r %r", self.host.guid, lease.partition_id) - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.change_blob_lease, - self.lease_container_name, - blob_name, - lease.token, - new_lease_id)) - lease.token = new_lease_id - else: - _logger.info("AcquiringLease %r %r", self.host.guid, lease.partition_id) - lease.token = await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.acquire_blob_lease, - self.lease_container_name, - blob_name, - self.lease_duration, - new_lease_id)) - lease.owner = self.host.host_name - lease.increment_epoch() - # check if this solves the issue - retval = await self.update_lease_async(lease) - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to acquire lease %r %r %r", err, partition_id, lease.token) - return False - - return retval - - async def renew_lease_async(self, lease): - """ - Renew a lease currently held by this host. - If the lease has been stolen, or expired, or released, it is not possible to renew it. - You will have to call getLease() and then acquireLease() again. - - :param lease: The stored lease to be renewed. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the lease was renewed successfully, `False` if not. - :rtype: bool - """ - blob_name = "{}/{}".format(self.consumer_group_directory, lease.partition_id) - try: - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.renew_blob_lease, - self.lease_container_name, - blob_name, - lease_id=lease.token, - timeout=self.lease_duration)) - except Exception as err: # pylint: disable=broad-except - if "LeaseIdMismatchWithLeaseOperation" in str(err): - _logger.info("LeaseLost on partition %r", lease.partition_id) - else: - _logger.error("Failed to renew lease on partition %r with token %r %r", - lease.partition_id, lease.token, err) - return False - return True - - async def release_lease_async(self, lease): - """ - Give up a lease currently held by this host. If the lease has been stolen, or expired, - releasing it is unnecessary, and will fail if attempted. - - :param lease: The stored lease to be released. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the lease was released successfully, `False` if not. - :rtype: bool - """ - lease_id = None - blob_name = "{}/{}".format(self.consumer_group_directory, lease.partition_id) - try: - _logger.info("Releasing lease %r %r", self.host.guid, lease.partition_id) - lease_id = lease.token - released_copy = AzureBlobLease() - released_copy.with_lease(lease) - released_copy.token = None - released_copy.owner = None - released_copy.state = None - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.create_blob_from_text, - self.lease_container_name, - blob_name, - json.dumps(released_copy.serializable()), - lease_id=lease_id)) - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.release_blob_lease, - self.lease_container_name, - blob_name, - lease_id)) - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to release lease %r %r %r", - err, lease.partition_id, lease_id) - return False - return True - - async def update_lease_async(self, lease): - """ - Update the store with the information in the provided lease. It is necessary to currently - hold a lease in order to update it. If the lease has been stolen, or expired, or released, - it cannot be updated. Updating should renew the lease before performing the update to - avoid lease expiration during the process. - - :param lease: The stored lease to be updated. - :type lease: ~azure.eventprocessorhost.lease.Lease - :return: `True` if the updated was performed successfully, `False` if not. - :rtype: bool - """ - if lease is None: - return False - - if not lease.token: - return False - - _logger.debug("Updating lease %r %r", self.host.guid, lease.partition_id) - - blob_name = "{}/{}".format(self.consumer_group_directory, lease.partition_id) - # First, renew the lease to make sure the update will go through. - if await self.renew_lease_async(lease): - try: - await self.host.loop.run_in_executor( - self.executor, - functools.partial( - self.storage_client.create_blob_from_text, - self.lease_container_name, - blob_name, - json.dumps(lease.serializable()), - lease_id=lease.token)) - - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to update lease %r %r %r", - self.host.guid, lease.partition_id, err) - raise err - else: - return False - return True diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/cancellation_token.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/cancellation_token.py deleted file mode 100644 index ae1aeaebdffc..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/cancellation_token.py +++ /dev/null @@ -1,20 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -""" -Based on https://stackoverflow.com/questions/43229939/how-to-pass-a-boolean-by-reference-across-threads-and-modules -""" -class CancellationToken: - """ - Thread Safe Mutable Cancellation Token. - """ - def __init__(self): - self.is_cancelled = False - - def cancel(self): - """ - Cancel the token. - """ - self.is_cancelled = True diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/checkpoint.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/checkpoint.py deleted file mode 100644 index ff09052336f0..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/checkpoint.py +++ /dev/null @@ -1,34 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -class Checkpoint: - """ - Contains checkpoint metadata. - """ - - def __init__(self, partition_id, offset="-1", sequence_number="0"): - """Initialize Checkpoint. - - :param partition_id: The parition ID of the checkpoint. - :type partition_id: str - :param offset: The receive offset of the checkpoint. - :type offset: str - :param sequence_number: The sequence number of the checkpoint. - :type sequence_number: str - """ - self.partition_id = partition_id - self.offset = offset - self.sequence_number = sequence_number - - def from_source(self, checkpoint): - """ - Creates a new Checkpoint from an existing checkpoint. - - :param checkpoint: Existing checkpoint. - :type checkpoint: ~azure.eventprocessorhost.checkpoint.Checkpoint - """ - self.partition_id = checkpoint.partition_id - self.offset = checkpoint.offset - self.sequence_number = checkpoint.sequence_number diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_config.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_config.py deleted file mode 100644 index 73f89a8306e8..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_config.py +++ /dev/null @@ -1,71 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import time -import urllib -import hmac -import hashlib -import base64 - -class EventHubConfig: - """ - A container class for Event Hub properties. - - :param sb_name: The EventHub (ServiceBus) namespace. - :type sb_name: str - :param eh_name: The EventHub name. - :type eh_name: str - :param policy: The SAS policy name. - :type policy: str - :param sas_key: The SAS access key. - :type sas_key: str - :param consumer_group: The EventHub consumer group to receive from. The - default value is '$default'. - :type consumer_group: str - :param namespace_suffix: The ServiceBus namespace URL suffix. - The default value is 'servicebus.windows.net'. - :type namespace_suffix: str - """ - def __init__(self, sb_name, eh_name, policy, sas_key, - consumer_group="$default", - namespace_suffix="servicebus.windows.net"): - self.sb_name = sb_name - self.eh_name = eh_name - self.policy = policy - self.sas_key = sas_key - self.namespace_suffix = namespace_suffix - self.consumer_group = consumer_group - self.client_address = self.get_client_address() - self.rest_token = self.get_rest_token() - - def get_client_address(self): - """ - Returns an auth token dictionary for making calls to eventhub - REST API. - - :rtype: str - """ - return "amqps://{}:{}@{}.{}:5671/{}".format( - urllib.parse.quote_plus(self.policy), - urllib.parse.quote_plus(self.sas_key), - self.sb_name, - self.namespace_suffix, - self.eh_name) - - def get_rest_token(self): - """ - Returns an auth token for making calls to eventhub REST API. - - :rtype: str - """ - uri = urllib.parse.quote_plus( - "https://{}.{}/{}".format(self.sb_name, self.namespace_suffix, self.eh_name)) - sas = self.sas_key.encode('utf-8') - expiry = str(int(time.time() + 10000)) - string_to_sign = ('{}\n{}'.format(uri, expiry)).encode('utf-8') - signed_hmac_sha256 = hmac.HMAC(sas, string_to_sign, hashlib.sha256) - signature = urllib.parse.quote(base64.b64encode(signed_hmac_sha256.digest())) - return 'SharedAccessSignature sr={}&sig={}&se={}&skn={}' \ - .format(uri, signature, expiry, self.policy) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_partition_pump.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_partition_pump.py deleted file mode 100644 index 598a93c639c0..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eh_partition_pump.py +++ /dev/null @@ -1,170 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import logging -import asyncio -from azure.eventhub import EventPosition, EventHubSharedKeyCredential -from azure.eventhub.aio import EventHubClient -from azure.eventprocessorhost.partition_pump import PartitionPump - - -_logger = logging.getLogger(__name__) - - -class EventHubPartitionPump(PartitionPump): - """ - Pulls and messages from lease partition from eventhub and sends them to processor. - """ - - def __init__(self, host, lease): - PartitionPump.__init__(self, host, lease) - self.eh_client = None - self.partition_receiver = None - self.partition_receive_handler = None - self.running = None - - async def on_open_async(self): - """ - Eventhub Override for on_open_async. - """ - _opened_ok = False - _retry_count = 0 - while (not _opened_ok) and (_retry_count < 5): - try: - await self.open_clients_async() - _opened_ok = True - except Exception as err: # pylint: disable=broad-except - _logger.warning( - "%r,%r PartitionPumpWarning: Failure creating client or receiver, retrying: %r", - self.host.guid, self.partition_context.partition_id, err) - last_exception = err - _retry_count += 1 - - if not _opened_ok: - await self.processor.process_error_async(self.partition_context, last_exception) - self.set_pump_status("OpenFailed") - - if self.pump_status == "Opening": - loop = asyncio.get_event_loop() - self.set_pump_status("Running") - self.running = loop.create_task(self.partition_receiver.run()) - - if self.pump_status in ["OpenFailed", "Errored"]: - self.set_pump_status("Closing") - await self.clean_up_clients_async() - self.set_pump_status("Closed") - - - async def open_clients_async(self): - """ - Responsible for establishing connection to event hub client - throws EventHubsException, IOException, InterruptedException, ExecutionException. - """ - await self.partition_context.get_initial_offset_async() - # Create event hub client and receive handler and set options - hostname = "{}.{}".format(self.host.eh_config.sb_name, self.host.eh_config.namespace_suffix) - event_hub_path = self.host.eh_config.eh_name - shared_key_cred = EventHubSharedKeyCredential(self.host.eh_config.policy, self.host.eh_config.sas_key) - - self.eh_client = EventHubClient( - hostname, event_hub_path, shared_key_cred, - network_tracing=self.host.eph_options.debug_trace, - http_proxy=self.host.eph_options.http_proxy) - self.partition_receive_handler = self.eh_client.create_consumer( - partition_id=self.partition_context.partition_id, - consumer_group=self.partition_context.consumer_group_name, - event_position=EventPosition(self.partition_context.offset), - prefetch=self.host.eph_options.prefetch_count, - loop=self.loop) - self.partition_receiver = PartitionReceiver(self) - - async def clean_up_clients_async(self): - """ - Resets the pump swallows all exceptions. - """ - if self.partition_receiver: - if self.eh_client: - await self.partition_receive_handler.close() - self.partition_receiver = None - self.partition_receive_handler = None - self.eh_client = None - - async def on_closing_async(self, reason): - """ - Overides partition pump on closing. - - :param reason: The reason for the shutdown. - :type reason: str - """ - self.partition_receiver.eh_partition_pump.set_pump_status("Errored") - try: - await self.running - except TypeError: - _logger.debug("No partition pump running.") - except Exception as err: # pylint: disable=broad-except - _logger.info("Error on closing partition pump: %r", err) - await self.clean_up_clients_async() - - -class PartitionReceiver: - """ - Recieves events asynchronously until lease is lost. - """ - - def __init__(self, eh_partition_pump): - self.eh_partition_pump = eh_partition_pump - self.max_batch_size = self.eh_partition_pump.host.eph_options.max_batch_size - self.recieve_timeout = self.eh_partition_pump.host.eph_options.receive_timeout - - async def run(self): - """ - Runs the async partion reciever event loop to retrive messages from the event queue. - """ - # Implement pull max batch from queue instead of one message at a time - while self.eh_partition_pump.pump_status != "Errored" and not self.eh_partition_pump.is_closing(): - if self.eh_partition_pump.partition_receive_handler: - try: - msgs = await self.eh_partition_pump.partition_receive_handler.receive( - max_batch_size=self.max_batch_size, - timeout=self.recieve_timeout) - except Exception as e: # pylint: disable=broad-except - _logger.info("Error raised while attempting to receive messages: %r", e) - await self.process_error_async(e) - else: - if not msgs: - _logger.info("No events received, queue size %r, release %r", - self.eh_partition_pump.partition_receive_handler.queue_size, - self.eh_partition_pump.host.eph_options.release_pump_on_timeout) - if self.eh_partition_pump.host.eph_options.release_pump_on_timeout: - await self.process_error_async(TimeoutError("No events received")) - else: - await self.process_events_async(msgs) - - async def process_events_async(self, events): - """ - This method is called on the thread that the EH client uses to run the pump. - There is one pump per EventHubClient. Since each PartitionPump creates a - new EventHubClient, using that thread to call OnEvents does no harm. Even if OnEvents - is slow, the pump will get control back each time OnEvents returns, and be able to receive - a new batch of messages with which to make the next OnEvents call.The pump gains nothing - by running faster than OnEvents. - - :param events: List of events to be processed. - :type events: list of ~azure.eventhub.common.EventData - """ - await self.eh_partition_pump.process_events_async(events) - - async def process_error_async(self, error): - """ - Handles processing errors this is never called since python recieve client doesn't - have error handling implemented (TBD add fault pump handling). - - :param error: An error the occurred. - :type error: Exception - """ - try: - await self.eh_partition_pump.process_error_async(error) - finally: - self.eh_partition_pump.set_pump_status("Errored") diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eph.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eph.py deleted file mode 100644 index 90200d4698fd..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/eph.py +++ /dev/null @@ -1,110 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import uuid -import asyncio -from azure.eventprocessorhost.partition_manager import PartitionManager - - -class EventProcessorHost: - """ - Represents a host for processing Event Hubs event data at scale. - Takes in an event hub, a event processor class definition, a config object, - as well as a storage manager and optional event processor params (ep_params). - """ - - def __init__(self, event_processor, eh_config, storage_manager, ep_params=None, eph_options=None, loop=None): - """ - Initialize EventProcessorHost. - - :param event_processor: The event processing handler. - :type event_processor: ~azure.eventprocessorhost.abstract_event_processor.AbstractEventProcessor - :param eh_config: The EPH connection configuration. - :type eh_config: ~azure.eventprocessorhost.eh_config.EventHubConfig - :param storage_manager: The Azure storage manager for persisting lease and - checkpoint information. - :type storage_manager: - ~azure.eventprocessorhost.azure_storage_checkpoint_manager.AzureStorageCheckpointLeaseManager - :param ep_params: Optional arbitrary parameters to be passed into the event_processor - on initialization. - :type ep_params: list - :param eph_options: EPH configuration options. - :type eph_options: ~azure.eventprocessorhost.eph.EPHOptions - :param loop: An eventloop. If not provided the default asyncio event loop will be used. - """ - self.event_processor = event_processor - self.event_processor_params = ep_params - self.eh_config = eh_config - self.guid = str(uuid.uuid4()) - self.host_name = "host" + str(self.guid) - self.loop = loop or asyncio.get_event_loop() - self.eph_options = eph_options or EPHOptions() - self.partition_manager = PartitionManager(self) - self.storage_manager = storage_manager - if self.storage_manager: - self.storage_manager.initialize(self) - - async def open_async(self): - """ - Starts the host. - """ - if not self.loop: - self.loop = asyncio.get_event_loop() - await self.partition_manager.start_async() - - async def close_async(self): - """ - Stops the host. - """ - await self.partition_manager.stop_async() - - -class EPHOptions: - """ - Class that contains default and overidable EPH option. - - :ivar max_batch_size: The maximum number of events retrieved for processing - at a time. This value must be less than or equal to the prefetch count. The actual - number of events returned for processing may be any number up to the maximum. - The default value is 10. - :vartype max_batch_size: int - :ivar prefetch_count: The number of events to fetch from the service in advance of - processing. The default value is 300. - :vartype prefetch_count: int - :ivar receive_timeout: The length of time a single partition receiver will wait in - order to receive a batch of events. Default is 60 seconds. - :vartype receive_timeout: float - :ivar release_pump_on_timeout: Whether to shutdown an individual partition receiver if - no events were received in the specified timeout. Shutting down the pump will release - the lease to allow it to be picked up by another host. Default is False. - :vartype release_pump_on_timeout: bool - :ivar initial_offset_provider: The initial event offset to receive from if no persisted - offset is found. Default is "-1" (i.e. from the first event available). - :vartype initial_offset_provider: str - :ivar debug_trace: Whether to emit the network traffic in the logs. In order to view - these events the logger must be configured to track "uamqp". Default is False. - :vartype debug_trace: bool - :ivar http_proxy: HTTP proxy configuration. This should be a dictionary with - the following keys present: 'proxy_hostname' and 'proxy_port'. Additional optional - keys are 'username' and 'password'. - :vartype http_proxy: dict - :ivar keep_alive_interval: The time in seconds between asynchronously pinging a receiver - connection to keep it alive during inactivity. Default is None - i.e. no connection pinging. - :vartype keep_alive_interval: int - :ivar auto_reconnect_on_error: Whether to automatically attempt to reconnect a receiver - connection if it is detach from the service with a retryable error. Default is True. - :vartype auto_reconnect_on_error: bool - """ - - def __init__(self): - self.max_batch_size = 10 - self.prefetch_count = 300 - self.receive_timeout = 60 - self.release_pump_on_timeout = False - self.initial_offset_provider = "-1" - self.debug_trace = False - self.http_proxy = None - self.keep_alive_interval = None - self.auto_reconnect_on_error = True diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/lease.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/lease.py deleted file mode 100644 index 02e863e2c5e5..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/lease.py +++ /dev/null @@ -1,60 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - - -class Lease: - """ - Lease contains partition processing state metadata used to manage partition state. - """ - - def __init__(self): - self.partition_id = None - self.sequence_number = None - self.owner = None - self.token = None - self.epoch = 0 - self.event_processor_context = None - - def with_partition_id(self, partition_id): - """ - Init with partition Id. - - :param partition_id: ID of a given partition. - :type partition_id: str - """ - self.partition_id = partition_id - self.owner = None - self.token = None - self.epoch = 0 - self.event_processor_context = None - - def with_source(self, lease): - """ - Init with existing lease. - - :param lease: An existing Lease. - :type lease: ~azure.eventprocessorhost.lease.Lease - """ - self.partition_id = lease.partition_id - self.epoch = lease.epoch - self.owner = lease.owner - self.token = lease.token - self.event_processor_context = lease.event_processor_context - - async def is_expired(self): - """ - Determines whether the lease is expired. By default lease never expires. - Deriving class implements the lease expiry logic. - - :rtype: bool - """ - return False - - def increment_epoch(self): - """ - Increment lease epoch. - """ - self.epoch += 1 - return self.epoch diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_context.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_context.py deleted file mode 100644 index 51266ae13702..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_context.py +++ /dev/null @@ -1,155 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- -import asyncio -import logging -from azure.eventprocessorhost.checkpoint import Checkpoint - - -_logger = logging.getLogger(__name__) - - -class PartitionContext: - """ - Encapsulates information related to an Event Hubs partition used by AbstractEventProcessor. - """ - - def __init__(self, host, partition_id, eh_path, consumer_group_name, pump_loop=None): - self.host = host - self.partition_id = partition_id - self.eh_path = eh_path - self.consumer_group_name = consumer_group_name - self.offset = "-1" - self.sequence_number = 0 - self.lease = None - self.event_processor_context = None - self.pump_loop = pump_loop or asyncio.get_event_loop() - - def set_offset_and_sequence_number(self, event_data): - """ - Updates offset based on event. - - :param event_data: A received EventData with valid offset and sequenceNumber. - :type event_data: ~azure.eventhub.common.EventData - """ - if not event_data: - raise Exception(event_data) - self.offset = event_data.offset - self.sequence_number = event_data.sequence_number - - async def get_initial_offset_async(self): # throws InterruptedException, ExecutionException - """ - Gets the initial offset for processing the partition. - - :rtype: str - """ - _logger.info("Calling user-provided initial offset provider %r %r", - self.host.guid, self.partition_id) - starting_checkpoint = await self.host.storage_manager.get_checkpoint_async(self.partition_id) - if not starting_checkpoint: - # No checkpoint was ever stored. Use the initialOffsetProvider instead - # defaults to "-1" - self.offset = self.host.eph_options.initial_offset_provider - self.sequence_number = -1 - else: - self.offset = starting_checkpoint.offset - self.sequence_number = starting_checkpoint.sequence_number - - _logger.info("%r %r Initial offset/sequenceNumber provided %r/%r", - self.host.guid, self.partition_id, self.offset, self.sequence_number) - return self.offset - - async def checkpoint_async(self, event_processor_context=None): - """ - Generates a checkpoint for the partition using the curren offset and sequenceNumber for - and persists to the checkpoint manager. - - :param event_processor_context An optional custom state value for the Event Processor. - This data must be in a JSON serializable format. - :type event_processor_context: str or dict - """ - captured_checkpoint = Checkpoint(self.partition_id, self.offset, self.sequence_number) - await self.persist_checkpoint_async(captured_checkpoint, event_processor_context) - self.event_processor_context = event_processor_context - - async def checkpoint_async_event_data(self, event_data, event_processor_context=None): - """ - Stores the offset and sequenceNumber from the provided received EventData instance, - then writes those values to the checkpoint store via the checkpoint manager. - Optionally stores the state of the Event Processor along the checkpoint. - - :param event_data: A received EventData with valid offset and sequenceNumber. - :type event_data: ~azure.eventhub.common.EventData - :param event_processor_context An optional custom state value for the Event Processor. - This data must be in a JSON serializable format. - :type event_processor_context: str or dict - :raises: ValueError if suplied event_data is None. - :raises: ValueError if the sequenceNumber is less than the last checkpointed value. - """ - if not event_data: - raise ValueError("event_data") - if event_data.sequence_number > self.sequence_number: - #We have never seen this sequence number yet - raise ValueError("Argument Out Of Range event_data x-opt-sequence-number") - - await self.persist_checkpoint_async(Checkpoint(self.partition_id, - event_data.offset, - event_data.sequence_number), - event_processor_context) - self.event_processor_context = event_processor_context - - def to_string(self): - """ - Returns the parition context in the following format: - "PartitionContext({EventHubPath}{ConsumerGroupName}{PartitionId}{SequenceNumber})" - - :rtype: str - """ - return "PartitionContext({}{}{}{})".format(self.eh_path, - self.consumer_group_name, - self.partition_id, - self.sequence_number) - - async def persist_checkpoint_async(self, checkpoint, event_processor_context=None): - """ - Persists the checkpoint, and - optionally - the state of the Event Processor. - - :param checkpoint: The checkpoint to persist. - :type checkpoint: ~azure.eventprocessorhost.checkpoint.Checkpoint - :param event_processor_context An optional custom state value for the Event Processor. - This data must be in a JSON serializable format. - :type event_processor_context: str or dict - """ - _logger.debug("PartitionPumpCheckpointStart %r %r %r %r", - self.host.guid, checkpoint.partition_id, checkpoint.offset, checkpoint.sequence_number) - try: - in_store_checkpoint = await self.host.storage_manager.get_checkpoint_async(checkpoint.partition_id) - if not in_store_checkpoint or checkpoint.sequence_number >= in_store_checkpoint.sequence_number: - if not in_store_checkpoint: - _logger.info("persisting checkpoint %r", checkpoint.__dict__) - await self.host.storage_manager.create_checkpoint_if_not_exists_async(checkpoint.partition_id) - - self.lease.event_processor_context = event_processor_context - if not await self.host.storage_manager.update_checkpoint_async(self.lease, checkpoint): - _logger.error("Failed to persist checkpoint for partition: %r", self.partition_id) - raise Exception("failed to persist checkpoint") - self.lease.offset = checkpoint.offset - self.lease.sequence_number = checkpoint.sequence_number - else: - _logger.error( # pylint: disable=logging-not-lazy - "Ignoring out of date checkpoint with offset %r/sequence number %r because " + - "current persisted checkpoint has higher offset %r/sequence number %r", - checkpoint.offset, - checkpoint.sequence_number, - in_store_checkpoint.offset, - in_store_checkpoint.sequence_number) - raise Exception("offset/sequenceNumber invalid") - - except Exception as err: - _logger.error("PartitionPumpCheckpointError %r %r %r", - self.host.guid, checkpoint.partition_id, err) - raise - finally: - _logger.debug("PartitionPumpCheckpointStop %r %r", - self.host.guid, checkpoint.partition_id) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_manager.py deleted file mode 100644 index 41ffe9d043bd..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_manager.py +++ /dev/null @@ -1,364 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import logging -import asyncio -from queue import Queue -from collections import Counter - -from azure.eventhub.aio import EventHubClient -from azure.eventhub import EventHubSharedKeyCredential - -from azure.eventprocessorhost.eh_partition_pump import EventHubPartitionPump -from azure.eventprocessorhost.cancellation_token import CancellationToken - - -_logger = logging.getLogger(__name__) - - -class PartitionManager: - """ - Manages the partition event pump execution. - """ - - def __init__(self, host): - self.host = host - self.partition_pumps = {} - self.partition_ids = None - self.run_task = None - self.cancellation_token = CancellationToken() - - async def get_partition_ids_async(self): - """ - Returns a list of all the event hub partition IDs. - - :rtype: list[str] - """ - if not self.partition_ids: - hostname = "{}.{}".format(self.host.eh_config.sb_name, self.host.eh_config.namespace_suffix) - event_hub_path = self.host.eh_config.eh_name - shared_key_cred = EventHubSharedKeyCredential(self.host.eh_config.policy, self.host.eh_config.sas_key) - - eh_client = EventHubClient( - hostname, event_hub_path, shared_key_cred, - network_tracing=self.host.eph_options.debug_trace, - # http_proxy=self.host.eph_options.http_proxy, - ) - try: - eh_info = await eh_client.get_properties() - self.partition_ids = eh_info['partition_ids'] - except Exception as err: # pylint: disable=broad-except - raise Exception("Failed to get partition ids", repr(err)) - return self.partition_ids - - async def start_async(self): - """ - Intializes the partition checkpoint and lease store and then calls run async. - """ - if self.run_task: - raise Exception("A PartitionManager cannot be started multiple times.") - - partition_count = await self.initialize_stores_async() - _logger.info("%r PartitionCount: %r", self.host.guid, partition_count) - self.run_task = asyncio.ensure_future(self.run_async()) - - async def stop_async(self): - """ - Terminiates the partition manger. - """ - self.cancellation_token.cancel() - if self.run_task and not self.run_task.done(): - await self.run_task - - async def run_async(self): - """ - Starts the run loop and manages exceptions and cleanup. - """ - try: - await self.run_loop_async() - except Exception as err: # pylint: disable=broad-except - _logger.error("Run loop failed %r", err) - - try: - _logger.info("Shutting down all pumps %r", self.host.guid) - await self.remove_all_pumps_async("Shutdown") - except Exception as err: # pylint: disable=broad-except - raise Exception("Failed to remove all pumps {!r}".format(err)) - - async def initialize_stores_async(self): - """ - Intializes the partition checkpoint and lease store ensures that a checkpoint - exists for all partitions. Note in this case checkpoint and lease stores are - the same storage manager construct. - - :return: Returns the number of partitions. - :rtype: int - """ - await self.host.storage_manager.create_checkpoint_store_if_not_exists_async() - partition_ids = await self.get_partition_ids_async() - retry_tasks = [] - for partition_id in partition_ids: - retry_tasks.append( - self.retry_async( - self.host.storage_manager.create_checkpoint_if_not_exists_async, - partition_id=partition_id, - retry_message="Failure creating checkpoint for partition, retrying", - final_failure_message="Out of retries creating checkpoint blob for partition", - max_retries=5, - host_id=self.host.host_name)) - - await asyncio.gather(*retry_tasks) - return len(partition_ids) - - def retry(self, func, partition_id, retry_message, final_failure_message, max_retries, host_id): - """ - Make attempt_renew_lease async call sync. - """ - loop = asyncio.new_event_loop() - loop.run_until_complete(self.retry_async(func, partition_id, retry_message, - final_failure_message, max_retries, host_id)) - - async def retry_async(self, func, partition_id, retry_message, - final_failure_message, max_retries, host_id): - """ - Throws if it runs out of retries. If it returns, action succeeded. - """ - created_okay = False - retry_count = 0 - while not created_okay and retry_count <= max_retries: - try: - await func(partition_id) - created_okay = True - except Exception as err: # pylint: disable=broad-except - _logger.error("%r %r %r %r", retry_message, host_id, partition_id, err) - retry_count += 1 - if not created_okay: - raise Exception(host_id, final_failure_message) - - async def run_loop_async(self): - """ - This is the main execution loop for allocating and manging pumps. - """ - while not self.cancellation_token.is_cancelled: - lease_manager = self.host.storage_manager - # Inspect all leases. - # Acquire any expired leases. - # Renew any leases that currently belong to us. - getting_all_leases = await lease_manager.get_all_leases() - leases_owned_by_others_q = Queue() - renew_tasks = [ - self.attempt_renew_lease_async( - get_lease_task, - owned_by_others_q=leases_owned_by_others_q, - lease_manager=lease_manager) - for get_lease_task in getting_all_leases] - await asyncio.gather(*renew_tasks) - - # Extract all leasees leases_owned_by_others and our_lease_count from the - all_leases = {} - leases_owned_by_others = [] - our_lease_count = 0 - while not leases_owned_by_others_q.empty(): - lease_owned_by_other = leases_owned_by_others_q.get() - # Check if lease is owned by other and append - if lease_owned_by_other[0]: - leases_owned_by_others.append(lease_owned_by_other[1]) - else: - our_lease_count += 1 - all_leases[lease_owned_by_other[1].partition_id] = lease_owned_by_other[1] - - # Grab more leases if available and needed for load balancing - leases_owned_by_others_count = len(leases_owned_by_others) - if leases_owned_by_others_count > 0: - steal_this_lease = self.which_lease_to_steal( - leases_owned_by_others, our_lease_count) - if steal_this_lease: - try: - _logger.info("Lease to steal %r", steal_this_lease.serializable()) - if await lease_manager.acquire_lease_async(steal_this_lease): - _logger.info("Stole lease sucessfully %r %r", - self.host.guid, steal_this_lease.partition_id) - else: - _logger.info("Failed to steal lease for partition %r %r", - self.host.guid, steal_this_lease.partition_id) - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to steal lease %r", err) - - for partition_id in all_leases: - try: - updated_lease = all_leases[partition_id] - if updated_lease.owner == self.host.host_name: - _logger.debug("Attempting to renew lease %r %r", - self.host.guid, partition_id) - await self.check_and_add_pump_async(partition_id, updated_lease) - else: - _logger.debug("Removing pump due to lost lease.") - await self.remove_pump_async(partition_id, "LeaseLost") - except Exception as err: # pylint: disable=broad-except - _logger.error("Failed to update lease %r", err) - await asyncio.sleep(lease_manager.lease_renew_interval) - - async def check_and_add_pump_async(self, partition_id, lease): - """ - Updates the lease on an exisiting pump. - - :param partition_id: The partition ID. - :type partition_id: str - :param lease: The lease to be used. - :type lease: ~azure.eventprocessorhost.lease.Lease - """ - if partition_id in self.partition_pumps: - # There already is a pump. Make sure the pump is working and replace the lease. - captured_pump = self.partition_pumps[partition_id] - if captured_pump.pump_status == "Errored" or captured_pump.is_closing(): - # The existing pump is bad. Remove it. - await self.remove_pump_async(partition_id, "Shutdown") - else: - # Pump is working, should just replace the lease. - # This is causing a race condition since if the checkpoint is being updated - # when the lease changes then the pump will error and shut down - captured_pump.set_lease(lease) - else: - _logger.info("Starting pump %r %r", self.host.guid, partition_id) - await self.create_new_pump_async(partition_id, lease) - - async def create_new_pump_async(self, partition_id, lease): - """ - Create a new pump thread with a given lease. - - :param partition_id: The partition ID. - :type partition_id: str - :param lease: The lease to be used. - :type lease: ~azure.eventprocessorhost.lease.Lease - """ - loop = asyncio.get_event_loop() - partition_pump = EventHubPartitionPump(self.host, lease) - # Do the put after start, if the start fails then put doesn't happen - loop.create_task(partition_pump.open_async()) - self.partition_pumps[partition_id] = partition_pump - _logger.info("Created new partition pump %r %r", self.host.guid, partition_id) - - async def remove_pump_async(self, partition_id, reason): - """ - Stops a single partiton pump. - - :param partition_id: The partition ID. - :type partition_id: str - :param reason: A reason for closing. - :type reason: str - """ - if partition_id in self.partition_pumps: - captured_pump = self.partition_pumps[partition_id] - if not captured_pump.is_closing(): - await captured_pump.close_async(reason) - # else, pump is already closing/closed, don't need to try to shut it down again - del self.partition_pumps[partition_id] # remove pump - _logger.debug("Removed pump %r %r", self.host.guid, partition_id) - _logger.debug("%r pumps still running", len(self.partition_pumps)) - else: - # PartitionManager main loop tries to remove pump for every partition that the - # host does not own, just to be sure. Not finding a pump for a partition is normal - # and expected most of the time. - _logger.debug("No pump found to remove for this partition %r %r", - self.host.guid, partition_id) - - async def remove_all_pumps_async(self, reason): - """ - Stops all partition pumps - (Note this might be wrong and need to await all tasks before returning done). - - :param reason: A reason for closing. - :type reason: str - :rtype: bool - """ - pump_tasks = [self.remove_pump_async(p_id, reason) for p_id in self.partition_pumps] - await asyncio.gather(*pump_tasks) - return True - - def which_lease_to_steal(self, stealable_leases, have_lease_count): - """ - Determines and return which lease to steal - If the number of leases is a multiple of the number of hosts, then the desired - configuration is that all hosts own the name number of leases, and the - difference between the "biggest" owner and any other is 0. - - If the number of leases is not a multiple of the number of hosts, then the most - even configurationpossible is for some hosts to have (self, leases/hosts) leases - and others to have (self, (self, leases/hosts) + 1). For example, for 16 partitions - distributed over five hosts, the distribution would be 4, 3, 3, 3, 3, or any of the - possible reorderings. - - In either case, if the difference between this host and the biggest owner is 2 or more, - then thesystem is not in the most evenly-distributed configuration, so steal one lease - from the biggest. If there is a tie for biggest, we pick whichever appears first in the - list because it doesn't really matter which "biggest" is trimmed down. - - Stealing one at a time prevents flapping because it reduces the difference between the - biggest and this host by two at a time. If the starting difference is two or greater, - then the difference cannot end up below 0. This host may become tied for biggest, but it - cannot become larger than the host that it is stealing from. - - :param stealable_leases: List of leases to determine which can be stolen. - :type stealable_leases: list[~azure.eventprocessorhost.lease.Lease] - :param have_lease_count: Lease count. - :type have_lease_count: int - :rtype: ~azure.eventprocessorhost.lease.Lease - """ - counts_by_owner = self.count_leases_by_owner(stealable_leases) - biggest_owner = (sorted(counts_by_owner.items(), key=lambda kv: kv[1])).pop() - steal_this_lease = None - if (biggest_owner[1] - have_lease_count) >= 2: - steal_this_lease = [l for l in stealable_leases if l.owner == biggest_owner[0]][0] - - return steal_this_lease - - def count_leases_by_owner(self, leases): # pylint: disable=no-self-use - """ - Returns a dictionary of leases by current owner. - """ - owners = [l.owner for l in leases] - return dict(Counter(owners)) - - def attempt_renew_lease(self, lease_task, owned_by_others_q, lease_manager): - """ - Make attempt_renew_lease async call sync. - """ - loop = asyncio.new_event_loop() - loop.run_until_complete(self.attempt_renew_lease_async(lease_task, owned_by_others_q, lease_manager)) - - async def attempt_renew_lease_async(self, lease_task, owned_by_others_q, lease_manager): - """ - Attempts to renew a potential lease if possible and - marks in the queue as none adds to adds to the queue. - """ - try: - possible_lease = await lease_task - if await possible_lease.is_expired(): - _logger.info("Trying to aquire lease %r %r", - self.host.guid, possible_lease.partition_id) - if await lease_manager.acquire_lease_async(possible_lease): - owned_by_others_q.put((False, possible_lease)) - else: - owned_by_others_q.put((True, possible_lease)) - - elif possible_lease.owner == self.host.host_name: - try: - _logger.debug("Trying to renew lease %r %r", - self.host.guid, possible_lease.partition_id) - if await lease_manager.renew_lease_async(possible_lease): - owned_by_others_q.put((False, possible_lease)) - else: - owned_by_others_q.put((True, possible_lease)) - except Exception as err: # pylint: disable=broad-except - # Update to 'Lease Lost' exception. - _logger.error("Lease lost exception %r %r %r", - err, self.host.guid, possible_lease.partition_id) - owned_by_others_q.put((True, possible_lease)) - else: - owned_by_others_q.put((True, possible_lease)) - - except Exception as err: # pylint: disable=broad-except - _logger.error( - "Failure during getting/acquiring/renewing lease, skipping %r", err) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_pump.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_pump.py deleted file mode 100644 index 9dbc55dfd221..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/partition_pump.py +++ /dev/null @@ -1,159 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# --------------------- - -from abc import abstractmethod -import logging -import asyncio -from azure.eventprocessorhost.partition_context import PartitionContext - - -_logger = logging.getLogger(__name__) - - -class PartitionPump(): - """ - Manages individual connection to a given partition. - """ - - def __init__(self, host, lease): - self.host = host - self.lease = lease - self.pump_status = "Uninitialized" - self.partition_context = None - self.processor = None - self.loop = None - - def run(self): - """ - Makes pump sync so that it can be run in a thread. - """ - self.loop = asyncio.new_event_loop() - self.loop.run_until_complete(self.open_async()) - - def set_pump_status(self, status): - """ - Updates pump status and logs update to console. - """ - self.pump_status = status - _logger.info("%r partition %r", status, self.lease.partition_id) - - def set_lease(self, new_lease): - """ - Sets a new partition lease to be processed by the pump. - - :param lease: The lease to set. - :type lease: ~azure.eventprocessorhost.lease.Lease - """ - if self.partition_context: - self.partition_context.lease = new_lease - self.partition_context.event_processor_context = new_lease.event_processor_context - - async def open_async(self): - """ - Opens partition pump. - """ - self.set_pump_status("Opening") - self.partition_context = PartitionContext(self.host, self.lease.partition_id, - self.host.eh_config.client_address, - self.host.eh_config.consumer_group, - self.loop) - self.partition_context.lease = self.lease - self.partition_context.event_processor_context = self.lease.event_processor_context - self.processor = self.host.event_processor(self.host.event_processor_params) - try: - await self.processor.open_async(self.partition_context) - except Exception as err: # pylint: disable=broad-except - # If the processor won't create or open, only thing we can do here is pass the buck. - # Null it out so we don't try to operate on it further. - await self.process_error_async(err) - self.processor = None - self.set_pump_status("OpenFailed") - - # If Open Async Didn't Fail call OnOpenAsync - if self.pump_status == "Opening": - await self.on_open_async() - - @abstractmethod - async def on_open_async(self): - """ - Event handler for on open event. - """ - - def is_closing(self): - """ - Returns whether pump is closing. - - :rtype: bool - """ - return self.pump_status == "Closing" or self.pump_status == "Closed" - - async def close_async(self, reason): - """ - Safely closes the pump. - - :param reason: The reason for the shutdown. - :type reason: str - """ - self.set_pump_status("Closing") - try: - await self.on_closing_async(reason) - if self.processor: - _logger.info("PartitionPumpInvokeProcessorCloseStart %r %r %r", - self.host.guid, self.partition_context.partition_id, reason) - await self.processor.close_async(self.partition_context, reason) - _logger.info("PartitionPumpInvokeProcessorCloseStart %r %r", - self.host.guid, self.partition_context.partition_id) - except Exception as err: # pylint: disable=broad-except - await self.process_error_async(err) - _logger.error("%r %r %r", self.host.guid, self.partition_context.partition_id, err) - raise err - - if reason == "LeaseLost": - try: - _logger.info("Lease Lost releasing ownership") - await self.host.storage_manager.release_lease_async(self.partition_context.lease) - except Exception as err: # pylint: disable=broad-except - _logger.error("%r %r %r", self.host.guid, self.partition_context.partition_id, err) - raise err - - self.set_pump_status("Closed") - - @abstractmethod - async def on_closing_async(self, reason): - """ - Event handler for on closing event. - - :param reason: The reason for the shutdown. - :type reason: str - """ - - async def process_events_async(self, events): - """ - Process pump events. - - :param events: List of events to be processed. - :type events: list[~azure.eventhub.common.EventData] - """ - if events: - # Synchronize to serialize calls to the processor. The handler is not installed until - # after OpenAsync returns, so ProcessEventsAsync cannot conflict with OpenAsync. There - # could be a conflict between ProcessEventsAsync and CloseAsync, however. All calls to - # CloseAsync are protected by synchronizing too. - try: - last = events[-1] - if last is not None: - self.partition_context.set_offset_and_sequence_number(last) - await self.processor.process_events_async(self.partition_context, events) - except Exception as err: # pylint: disable=broad-except - await self.process_error_async(err) - - async def process_error_async(self, error): - """ - Passes error to the event processor for processing. - - :param error: An error the occurred. - :type error: Exception - """ - await self.processor.process_error_async(self.partition_context, error) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/__init__.py deleted file mode 100644 index 9ff70295da3a..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# -------------------------------------------------------------------------------------------- -__path__ = __import__('pkgutil').extend_path(__path__, __name__) \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/__init__.py deleted file mode 100644 index 9ff70295da3a..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# -------------------------------------------------------------------------------------------- -__path__ = __import__('pkgutil').extend_path(__path__, __name__) \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/__init__.py deleted file mode 100644 index eb3e5d0fde33..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from .appendblobservice import AppendBlobService -from .blockblobservice import BlockBlobService -from .models import ( - Container, - ContainerProperties, - Blob, - BlobProperties, - BlobBlock, - BlobBlockList, - PageRange, - ContentSettings, - CopyProperties, - ContainerPermissions, - BlobPermissions, - _LeaseActions, - AppendBlockProperties, - PageBlobProperties, - ResourceProperties, - Include, - SequenceNumberAction, - BlockListType, - PublicAccess, - BlobPrefix, - DeleteSnapshot, -) -from .pageblobservice import PageBlobService diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_constants.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_constants.py deleted file mode 100644 index 062a035662e3..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_constants.py +++ /dev/null @@ -1,14 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -__author__ = 'Microsoft Corp. ' -__version__ = '2.0.1' - -# x-ms-version for storage service. -X_MS_VERSION = '2018-11-09' - -# internal configurations, should not be changed -_LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE = 4 * 1024 * 1024 diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_deserialization.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_deserialization.py deleted file mode 100644 index 969f256b4a76..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_deserialization.py +++ /dev/null @@ -1,556 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from azure.common import AzureException -from dateutil import parser - -try: - from xml.etree import cElementTree as ETree -except ImportError: - from xml.etree import ElementTree as ETree -from ..common._common_conversion import ( - _decode_base64_to_text, - _to_str, - _get_content_md5 -) -from ..common._deserialization import ( - _parse_properties, - _to_int, - _parse_metadata, - _convert_xml_to_signed_identifiers, - _bool, -) -from .models import ( - Container, - Blob, - BlobBlock, - BlobBlockList, - BlobBlockState, - BlobProperties, - PageRange, - ContainerProperties, - AppendBlockProperties, - PageBlobProperties, - ResourceProperties, - BlobPrefix, - AccountInformation, - UserDelegationKey, -) -from ._encryption import _decrypt_blob -from ..common.models import _list -from ..common._error import ( - _validate_content_match, - _ERROR_DECRYPTION_FAILURE, -) - - -def _parse_base_properties(response): - ''' - Extracts basic response headers. - ''' - resource_properties = ResourceProperties() - resource_properties.last_modified = parser.parse(response.headers.get('last-modified')) - resource_properties.etag = response.headers.get('etag') - - return resource_properties - - -def _parse_page_properties(response): - ''' - Extracts page response headers. - ''' - put_page = PageBlobProperties() - put_page.last_modified = parser.parse(response.headers.get('last-modified')) - put_page.etag = response.headers.get('etag') - put_page.sequence_number = _to_int(response.headers.get('x-ms-blob-sequence-number')) - - return put_page - - -def _parse_append_block(response): - ''' - Extracts append block response headers. - ''' - append_block = AppendBlockProperties() - append_block.last_modified = parser.parse(response.headers.get('last-modified')) - append_block.etag = response.headers.get('etag') - append_block.append_offset = _to_int(response.headers.get('x-ms-blob-append-offset')) - append_block.committed_block_count = _to_int(response.headers.get('x-ms-blob-committed-block-count')) - - return append_block - - -def _parse_snapshot_blob(response, name): - ''' - Extracts snapshot return header. - ''' - snapshot = response.headers.get('x-ms-snapshot') - - return _parse_blob(response, name, snapshot) - - -def _parse_lease(response): - ''' - Extracts lease time and ID return headers. - ''' - lease = {'time': response.headers.get('x-ms-lease-time')} - if lease['time']: - lease['time'] = _to_int(lease['time']) - - lease['id'] = response.headers.get('x-ms-lease-id') - - return lease - - -def _parse_blob(response, name, snapshot, validate_content=False, require_encryption=False, - key_encryption_key=None, key_resolver_function=None, start_offset=None, end_offset=None): - if response is None: - return None - - metadata = _parse_metadata(response) - props = _parse_properties(response, BlobProperties) - - # For range gets, only look at 'x-ms-blob-content-md5' for overall MD5 - content_settings = getattr(props, 'content_settings') - if 'content-range' in response.headers: - if 'x-ms-blob-content-md5' in response.headers: - setattr(content_settings, 'content_md5', _to_str(response.headers['x-ms-blob-content-md5'])) - else: - delattr(content_settings, 'content_md5') - - if validate_content: - computed_md5 = _get_content_md5(response.body) - _validate_content_match(response.headers['content-md5'], computed_md5) - - if key_encryption_key is not None or key_resolver_function is not None: - try: - response.body = _decrypt_blob(require_encryption, key_encryption_key, key_resolver_function, - response, start_offset, end_offset) - except: - raise AzureException(_ERROR_DECRYPTION_FAILURE) - - return Blob(name, snapshot, response.body, props, metadata) - - -def _parse_container(response, name): - if response is None: - return None - - metadata = _parse_metadata(response) - props = _parse_properties(response, ContainerProperties) - return Container(name, props, metadata) - - -def _convert_xml_to_signed_identifiers_and_access(response): - acl = _convert_xml_to_signed_identifiers(response) - acl.public_access = response.headers.get('x-ms-blob-public-access') - - return acl - - -def _convert_xml_to_containers(response): - ''' - - - string-value - string-value - int-value - - - container-name - - date/time-value - etag - locked | unlocked - available | leased | expired | breaking | broken - infinite | fixed - blob | container - true | false - true | false - - - value - - - - marker-value - - ''' - if response is None or response.body is None: - return None - - containers = _list() - list_element = ETree.fromstring(response.body) - - # Set next marker - setattr(containers, 'next_marker', list_element.findtext('NextMarker')) - - containers_element = list_element.find('Containers') - - for container_element in containers_element.findall('Container'): - # Name element - container = Container() - container.name = container_element.findtext('Name') - - # Metadata - metadata_root_element = container_element.find('Metadata') - if metadata_root_element is not None: - container.metadata = dict() - for metadata_element in metadata_root_element: - container.metadata[metadata_element.tag] = metadata_element.text - - # Properties - properties_element = container_element.find('Properties') - container.properties.etag = properties_element.findtext('Etag') - container.properties.last_modified = parser.parse(properties_element.findtext('Last-Modified')) - container.properties.lease_status = properties_element.findtext('LeaseStatus') - container.properties.lease_state = properties_element.findtext('LeaseState') - container.properties.lease_duration = properties_element.findtext('LeaseDuration') - container.properties.public_access = properties_element.findtext('PublicAccess') - container.properties.has_immutability_policy = properties_element.findtext('HasImmutabilityPolicy') - container.properties.has_legal_hold = properties_element.findtext('HasLegalHold') - - # Add container to list - containers.append(container) - - return containers - - -LIST_BLOBS_ATTRIBUTE_MAP = { - 'Last-Modified': (None, 'last_modified', parser.parse), - 'Etag': (None, 'etag', _to_str), - 'x-ms-blob-sequence-number': (None, 'sequence_number', _to_int), - 'BlobType': (None, 'blob_type', _to_str), - 'Content-Length': (None, 'content_length', _to_int), - 'ServerEncrypted': (None, 'server_encrypted', _bool), - 'Content-Type': ('content_settings', 'content_type', _to_str), - 'Content-Encoding': ('content_settings', 'content_encoding', _to_str), - 'Content-Disposition': ('content_settings', 'content_disposition', _to_str), - 'Content-Language': ('content_settings', 'content_language', _to_str), - 'Content-MD5': ('content_settings', 'content_md5', _to_str), - 'Cache-Control': ('content_settings', 'cache_control', _to_str), - 'LeaseStatus': ('lease', 'status', _to_str), - 'LeaseState': ('lease', 'state', _to_str), - 'LeaseDuration': ('lease', 'duration', _to_str), - 'CopyId': ('copy', 'id', _to_str), - 'CopySource': ('copy', 'source', _to_str), - 'CopyStatus': ('copy', 'status', _to_str), - 'CopyProgress': ('copy', 'progress', _to_str), - 'CopyCompletionTime': ('copy', 'completion_time', _to_str), - 'CopyStatusDescription': ('copy', 'status_description', _to_str), - 'AccessTier': (None, 'blob_tier', _to_str), - 'AccessTierChangeTime': (None, 'blob_tier_change_time', parser.parse), - 'AccessTierInferred': (None, 'blob_tier_inferred', _bool), - 'ArchiveStatus': (None, 'rehydration_status', _to_str), - 'DeletedTime': (None, 'deleted_time', parser.parse), - 'RemainingRetentionDays': (None, 'remaining_retention_days', _to_int), - 'Creation-Time': (None, 'creation_time', parser.parse), -} - - -def _convert_xml_to_blob_list(response): - ''' - - - string-value - string-value - int-value - string-value - - - blob-name - true - date-time-value - - date-time-value - etag - size-in-bytes - blob-content-type - - - - - sequence-number - BlockBlob|PageBlob|AppendBlob - locked|unlocked - available | leased | expired | breaking | broken - infinite | fixed - id - pending | success | aborted | failed - source url - bytes copied/bytes total - datetime - error string - P4 | P6 | P10 | P20 | P30 | P40 | P50 | P60 | Archive | Cool | Hot - date-time-value - true - datetime - int - date-time-value - - - value - - - - blob-prefix - - - - - ''' - if response is None or response.body is None: - return None - - blob_list = _list() - list_element = ETree.fromstring(response.body) - - setattr(blob_list, 'next_marker', list_element.findtext('NextMarker')) - - blobs_element = list_element.find('Blobs') - blob_prefix_elements = blobs_element.findall('BlobPrefix') - if blob_prefix_elements is not None: - for blob_prefix_element in blob_prefix_elements: - prefix = BlobPrefix() - prefix.name = blob_prefix_element.findtext('Name') - blob_list.append(prefix) - - for blob_element in blobs_element.findall('Blob'): - blob = Blob() - blob.name = blob_element.findtext('Name') - blob.snapshot = blob_element.findtext('Snapshot') - - deleted = blob_element.findtext('Deleted') - if deleted: - blob.deleted = _bool(deleted) - - # Properties - properties_element = blob_element.find('Properties') - if properties_element is not None: - for property_element in properties_element: - info = LIST_BLOBS_ATTRIBUTE_MAP.get(property_element.tag) - if info is None: - setattr(blob.properties, property_element.tag, _to_str(property_element.text)) - elif info[0] is None: - setattr(blob.properties, info[1], info[2](property_element.text)) - else: - attr = getattr(blob.properties, info[0]) - setattr(attr, info[1], info[2](property_element.text)) - - # Metadata - metadata_root_element = blob_element.find('Metadata') - if metadata_root_element is not None: - blob.metadata = dict() - for metadata_element in metadata_root_element: - blob.metadata[metadata_element.tag] = metadata_element.text - - # Add blob to list - blob_list.append(blob) - - return blob_list - - -def _convert_xml_to_blob_name_list(response): - ''' - - - string-value - string-value - int-value - string-value - - - blob-name - true - date-time-value - - date-time-value - etag - size-in-bytes - blob-content-type - - - - - sequence-number - BlockBlob|PageBlob|AppendBlob - locked|unlocked - available | leased | expired | breaking | broken - infinite | fixed - id - pending | success | aborted | failed - source url - bytes copied/bytes total - datetime - error string - P4 | P6 | P10 | P20 | P30 | P40 | P50 | P60 | Archive | Cool | Hot - date-time-value - true - datetime - int - date-time-value - - - value - - - - blob-prefix - - - - - ''' - if response is None or response.body is None: - return None - - blob_list = _list() - list_element = ETree.fromstring(response.body) - - setattr(blob_list, 'next_marker', list_element.findtext('NextMarker')) - - blobs_element = list_element.find('Blobs') - blob_prefix_elements = blobs_element.findall('BlobPrefix') - if blob_prefix_elements is not None: - for blob_prefix_element in blob_prefix_elements: - blob_list.append(blob_prefix_element.findtext('Name')) - - for blob_element in blobs_element.findall('Blob'): - blob_list.append(blob_element.findtext('Name')) - - return blob_list - - -def _convert_xml_to_block_list(response): - ''' - - - - - base64-encoded-block-id - size-in-bytes - - - - - base64-encoded-block-id - size-in-bytes - - - - - Converts xml response to block list class. - ''' - if response is None or response.body is None: - return None - - block_list = BlobBlockList() - - list_element = ETree.fromstring(response.body) - - committed_blocks_element = list_element.find('CommittedBlocks') - if committed_blocks_element is not None: - for block_element in committed_blocks_element.findall('Block'): - block_id = _decode_base64_to_text(block_element.findtext('Name', '')) - block_size = int(block_element.findtext('Size')) - block = BlobBlock(id=block_id, state=BlobBlockState.Committed) - block._set_size(block_size) - block_list.committed_blocks.append(block) - - uncommitted_blocks_element = list_element.find('UncommittedBlocks') - if uncommitted_blocks_element is not None: - for block_element in uncommitted_blocks_element.findall('Block'): - block_id = _decode_base64_to_text(block_element.findtext('Name', '')) - block_size = int(block_element.findtext('Size')) - block = BlobBlock(id=block_id, state=BlobBlockState.Uncommitted) - block._set_size(block_size) - block_list.uncommitted_blocks.append(block) - - return block_list - - -def _convert_xml_to_page_ranges(response): - ''' - - - - Start Byte - End Byte - - - Start Byte - End Byte - - - Start Byte - End Byte - - - ''' - if response is None or response.body is None: - return None - - page_list = list() - - list_element = ETree.fromstring(response.body) - - for page_range_element in list_element: - if page_range_element.tag == 'PageRange': - is_cleared = False - elif page_range_element.tag == 'ClearRange': - is_cleared = True - else: - pass # ignore any unrecognized Page Range types - - page_list.append( - PageRange( - int(page_range_element.findtext('Start')), - int(page_range_element.findtext('End')), - is_cleared - ) - ) - - return page_list - - -def _parse_account_information(response): - account_info = AccountInformation() - account_info.sku_name = response.headers['x-ms-sku-name'] - account_info.account_kind = response.headers['x-ms-account-kind'] - - return account_info - - -def _convert_xml_to_user_delegation_key(response): - """ - - - Guid - Guid - String, formatted ISO Date - String, formatted ISO Date - b - String, rest api version used to create delegation key - Ovg+o0K/0/2V8upg7AwlyAPCriEcOSXKuBu2Gv/PU70Y7aWDW3C2ZRmw6kYWqPWBaM1GosLkcSZkgsobAlT+Sw== - - - Converts xml response to UserDelegationKey class. - """ - - if response is None or response.body is None: - return None - - delegation_key = UserDelegationKey() - - key_element = ETree.fromstring(response.body) - delegation_key.signed_oid = key_element.findtext('SignedOid') - delegation_key.signed_tid = key_element.findtext('SignedTid') - delegation_key.signed_start = key_element.findtext('SignedStart') - delegation_key.signed_expiry = key_element.findtext('SignedExpiry') - delegation_key.signed_service = key_element.findtext('SignedService') - delegation_key.signed_version = key_element.findtext('SignedVersion') - delegation_key.value = key_element.findtext('Value') - - return delegation_key diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_download_chunking.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_download_chunking.py deleted file mode 100644 index e68a0e5dee42..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_download_chunking.py +++ /dev/null @@ -1,178 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import threading - - -def _download_blob_chunks(blob_service, container_name, blob_name, snapshot, - download_size, block_size, progress, start_range, end_range, - stream, max_connections, progress_callback, validate_content, - lease_id, if_modified_since, if_unmodified_since, if_match, - if_none_match, timeout, operation_context): - - downloader_class = _ParallelBlobChunkDownloader if max_connections > 1 else _SequentialBlobChunkDownloader - - downloader = downloader_class( - blob_service, - container_name, - blob_name, - snapshot, - download_size, - block_size, - progress, - start_range, - end_range, - stream, - progress_callback, - validate_content, - lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout, - operation_context, - ) - - if max_connections > 1: - import concurrent.futures - executor = concurrent.futures.ThreadPoolExecutor(max_connections) - list(executor.map(downloader.process_chunk, downloader.get_chunk_offsets())) - else: - for chunk in downloader.get_chunk_offsets(): - downloader.process_chunk(chunk) - - -class _BlobChunkDownloader(object): - def __init__(self, blob_service, container_name, blob_name, snapshot, download_size, - chunk_size, progress, start_range, end_range, stream, - progress_callback, validate_content, lease_id, if_modified_since, - if_unmodified_since, if_match, if_none_match, timeout, operation_context): - # identifiers for the blob - self.blob_service = blob_service - self.container_name = container_name - self.blob_name = blob_name - self.snapshot = snapshot - - # information on the download range/chunk size - self.chunk_size = chunk_size - self.download_size = download_size - self.start_index = start_range - self.blob_end = end_range - - # the destination that we will write to - self.stream = stream - - # progress related - self.progress_callback = progress_callback - self.progress_total = progress - - # parameters for each get blob operation - self.timeout = timeout - self.operation_context = operation_context - self.validate_content = validate_content - self.lease_id = lease_id - self.if_modified_since = if_modified_since - self.if_unmodified_since = if_unmodified_since - self.if_match = if_match - self.if_none_match = if_none_match - - def get_chunk_offsets(self): - index = self.start_index - while index < self.blob_end: - yield index - index += self.chunk_size - - def process_chunk(self, chunk_start): - if chunk_start + self.chunk_size > self.blob_end: - chunk_end = self.blob_end - else: - chunk_end = chunk_start + self.chunk_size - - chunk_data = self._download_chunk(chunk_start, chunk_end).content - length = chunk_end - chunk_start - if length > 0: - self._write_to_stream(chunk_data, chunk_start) - self._update_progress(length) - - # should be provided by the subclass - def _update_progress(self, length): - pass - - # should be provided by the subclass - def _write_to_stream(self, chunk_data, chunk_start): - pass - - def _download_chunk(self, chunk_start, chunk_end): - response = self.blob_service._get_blob( - self.container_name, - self.blob_name, - snapshot=self.snapshot, - start_range=chunk_start, - end_range=chunk_end - 1, - validate_content=self.validate_content, - lease_id=self.lease_id, - if_modified_since=self.if_modified_since, - if_unmodified_since=self.if_unmodified_since, - if_match=self.if_match, - if_none_match=self.if_none_match, - timeout=self.timeout, - _context=self.operation_context - ) - - # This makes sure that if_match is set so that we can validate - # that subsequent downloads are to an unmodified blob - self.if_match = response.properties.etag - return response - - -class _ParallelBlobChunkDownloader(_BlobChunkDownloader): - def __init__(self, blob_service, container_name, blob_name, snapshot, download_size, - chunk_size, progress, start_range, end_range, stream, - progress_callback, validate_content, lease_id, if_modified_since, - if_unmodified_since, if_match, if_none_match, timeout, operation_context): - - super(_ParallelBlobChunkDownloader, self).__init__(blob_service, container_name, blob_name, snapshot, - download_size, - chunk_size, progress, start_range, end_range, stream, - progress_callback, validate_content, lease_id, - if_modified_since, - if_unmodified_since, if_match, if_none_match, timeout, - operation_context) - - # for a parallel download, the stream is always seekable, so we note down the current position - # in order to seek to the right place when out-of-order chunks come in - self.stream_start = stream.tell() - - # since parallel operations are going on - # it is essential to protect the writing and progress reporting operations - self.stream_lock = threading.Lock() - self.progress_lock = threading.Lock() - - def _update_progress(self, length): - if self.progress_callback is not None: - with self.progress_lock: - self.progress_total += length - total_so_far = self.progress_total - self.progress_callback(total_so_far, self.download_size) - - def _write_to_stream(self, chunk_data, chunk_start): - with self.stream_lock: - self.stream.seek(self.stream_start + (chunk_start - self.start_index)) - self.stream.write(chunk_data) - - -class _SequentialBlobChunkDownloader(_BlobChunkDownloader): - def __init__(self, *args): - super(_SequentialBlobChunkDownloader, self).__init__(*args) - - def _update_progress(self, length): - if self.progress_callback is not None: - self.progress_total += length - self.progress_callback(self.progress_total, self.download_size) - - def _write_to_stream(self, chunk_data, chunk_start): - # chunk_start is ignored in the case of sequential download since we cannot seek the destination stream - self.stream.write(chunk_data) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_encryption.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_encryption.py deleted file mode 100644 index 757b49067475..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_encryption.py +++ /dev/null @@ -1,187 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -from json import ( - dumps, - loads, -) -from os import urandom - -from cryptography.hazmat.primitives.padding import PKCS7 - -from ..common._encryption import ( - _generate_encryption_data_dict, - _generate_AES_CBC_cipher, - _dict_to_encryption_data, - _validate_and_unwrap_cek, - _EncryptionAlgorithm, -) -from ..common._error import ( - _validate_not_none, - _validate_key_encryption_key_wrap, - _ERROR_DATA_NOT_ENCRYPTED, - _ERROR_UNSUPPORTED_ENCRYPTION_ALGORITHM, -) - - -def _encrypt_blob(blob, key_encryption_key): - ''' - Encrypts the given blob using AES256 in CBC mode with 128 bit padding. - Wraps the generated content-encryption-key using the user-provided key-encryption-key (kek). - Returns a json-formatted string containing the encryption metadata. This method should - only be used when a blob is small enough for single shot upload. Encrypting larger blobs - is done as a part of the _upload_blob_chunks method. - - :param bytes blob: - The blob to be encrypted. - :param object key_encryption_key: - The user-provided key-encryption-key. Must implement the following methods: - wrap_key(key)--wraps the specified key using an algorithm of the user's choice. - get_key_wrap_algorithm()--returns the algorithm used to wrap the specified symmetric key. - get_kid()--returns a string key id for this key-encryption-key. - :return: A tuple of json-formatted string containing the encryption metadata and the encrypted blob data. - :rtype: (str, bytes) - ''' - - _validate_not_none('blob', blob) - _validate_not_none('key_encryption_key', key_encryption_key) - _validate_key_encryption_key_wrap(key_encryption_key) - - # AES256 uses 256 bit (32 byte) keys and always with 16 byte blocks - content_encryption_key = urandom(32) - initialization_vector = urandom(16) - - cipher = _generate_AES_CBC_cipher(content_encryption_key, initialization_vector) - - # PKCS7 with 16 byte blocks ensures compatibility with AES. - padder = PKCS7(128).padder() - padded_data = padder.update(blob) + padder.finalize() - - # Encrypt the data. - encryptor = cipher.encryptor() - encrypted_data = encryptor.update(padded_data) + encryptor.finalize() - encryption_data = _generate_encryption_data_dict(key_encryption_key, content_encryption_key, - initialization_vector) - encryption_data['EncryptionMode'] = 'FullBlob' - - return dumps(encryption_data), encrypted_data - - -def _generate_blob_encryption_data(key_encryption_key): - ''' - Generates the encryption_metadata for the blob. - - :param bytes key_encryption_key: - The key-encryption-key used to wrap the cek associate with this blob. - :return: A tuple containing the cek and iv for this blob as well as the - serialized encryption metadata for the blob. - :rtype: (bytes, bytes, str) - ''' - encryption_data = None - content_encryption_key = None - initialization_vector = None - if key_encryption_key: - _validate_key_encryption_key_wrap(key_encryption_key) - content_encryption_key = urandom(32) - initialization_vector = urandom(16) - encryption_data = _generate_encryption_data_dict(key_encryption_key, - content_encryption_key, - initialization_vector) - encryption_data['EncryptionMode'] = 'FullBlob' - encryption_data = dumps(encryption_data) - - return content_encryption_key, initialization_vector, encryption_data - - -def _decrypt_blob(require_encryption, key_encryption_key, key_resolver, - response, start_offset, end_offset): - ''' - Decrypts the given blob contents and returns only the requested range. - - :param bool require_encryption: - Whether or not the calling blob service requires objects to be decrypted. - :param object key_encryption_key: - The user-provided key-encryption-key. Must implement the following methods: - wrap_key(key)--wraps the specified key using an algorithm of the user's choice. - get_key_wrap_algorithm()--returns the algorithm used to wrap the specified symmetric key. - get_kid()--returns a string key id for this key-encryption-key. - :param key_resolver(kid): - The user-provided key resolver. Uses the kid string to return a key-encryption-key - implementing the interface defined above. - :return: The decrypted blob content. - :rtype: bytes - ''' - _validate_not_none('response', response) - content = response.body - _validate_not_none('content', content) - - try: - encryption_data = _dict_to_encryption_data(loads(response.headers['x-ms-meta-encryptiondata'])) - except: - if require_encryption: - raise ValueError(_ERROR_DATA_NOT_ENCRYPTED) - - return content - - if not (encryption_data.encryption_agent.encryption_algorithm == _EncryptionAlgorithm.AES_CBC_256): - raise ValueError(_ERROR_UNSUPPORTED_ENCRYPTION_ALGORITHM) - - blob_type = response.headers['x-ms-blob-type'] - - iv = None - unpad = False - start_range, end_range = 0, len(content) - if 'content-range' in response.headers: - content_range = response.headers['content-range'] - # Format: 'bytes x-y/size' - - # Ignore the word 'bytes' - content_range = content_range.split(' ') - - content_range = content_range[1].split('-') - start_range = int(content_range[0]) - content_range = content_range[1].split('/') - end_range = int(content_range[0]) - blob_size = int(content_range[1]) - - if start_offset >= 16: - iv = content[:16] - content = content[16:] - start_offset -= 16 - else: - iv = encryption_data.content_encryption_IV - - if end_range == blob_size - 1: - unpad = True - else: - unpad = True - iv = encryption_data.content_encryption_IV - - if blob_type == 'PageBlob': - unpad = False - - content_encryption_key = _validate_and_unwrap_cek(encryption_data, key_encryption_key, key_resolver) - cipher = _generate_AES_CBC_cipher(content_encryption_key, iv) - decryptor = cipher.decryptor() - - content = decryptor.update(content) + decryptor.finalize() - if unpad: - unpadder = PKCS7(128).unpadder() - content = unpadder.update(content) + unpadder.finalize() - - return content[start_offset: len(content) - end_offset] - - -def _get_blob_encryptor_and_padder(cek, iv, should_pad): - encryptor = None - padder = None - - if cek is not None and iv is not None: - cipher = _generate_AES_CBC_cipher(cek, iv) - encryptor = cipher.encryptor() - padder = PKCS7(128).padder() if should_pad else None - - return encryptor, padder diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_error.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_error.py deleted file mode 100644 index f24edc81377e..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_error.py +++ /dev/null @@ -1,29 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -_ERROR_PAGE_BLOB_SIZE_ALIGNMENT = \ - 'Invalid page blob size: {0}. ' + \ - 'The size must be aligned to a 512-byte boundary.' - -_ERROR_PAGE_BLOB_START_ALIGNMENT = \ - 'start_range must align with 512 page size' - -_ERROR_PAGE_BLOB_END_ALIGNMENT = \ - 'end_range must align with 512 page size' - -_ERROR_INVALID_BLOCK_ID = \ - 'All blocks in block list need to have valid block ids.' - -_ERROR_INVALID_LEASE_DURATION = \ - "lease_duration param needs to be between 15 and 60 or -1." - -_ERROR_INVALID_LEASE_BREAK_PERIOD = \ - "lease_break_period param needs to be between 0 and 60." - -_ERROR_NO_SINGLE_THREAD_CHUNKING = \ - 'To use blob chunk downloader more than 1 thread must be ' + \ - 'used since get_blob_to_bytes should be called for single threaded ' + \ - 'blob downloads.' diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_serialization.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_serialization.py deleted file mode 100644 index 611d73db5093..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_serialization.py +++ /dev/null @@ -1,153 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from xml.sax.saxutils import escape as xml_escape -from datetime import date -try: - from xml.etree import cElementTree as ETree -except ImportError: - from xml.etree import ElementTree as ETree -from ..common._common_conversion import ( - _encode_base64, - _str, -) -from ..common._serialization import ( - _to_utc_datetime, -) -from ..common._error import ( - _validate_not_none, - _ERROR_START_END_NEEDED_FOR_MD5, - _ERROR_RANGE_TOO_LARGE_FOR_MD5, -) -from ._error import ( - _ERROR_PAGE_BLOB_START_ALIGNMENT, - _ERROR_PAGE_BLOB_END_ALIGNMENT, - _ERROR_INVALID_BLOCK_ID, -) -from io import BytesIO - - -def _get_path(container_name=None, blob_name=None): - ''' - Creates the path to access a blob resource. - - container_name: - Name of container. - blob_name: - The path to the blob. - ''' - if container_name and blob_name: - return '/{0}/{1}'.format( - _str(container_name), - _str(blob_name)) - elif container_name: - return '/{0}'.format(_str(container_name)) - else: - return '/' - - -def _validate_and_format_range_headers(request, start_range, end_range, start_range_required=True, - end_range_required=True, check_content_md5=False, align_to_page=False, - range_header_name='x-ms-range'): - # If end range is provided, start range must be provided - if start_range_required or end_range is not None: - _validate_not_none('start_range', start_range) - if end_range_required: - _validate_not_none('end_range', end_range) - - # Page ranges must be 512 aligned - if align_to_page: - if start_range is not None and start_range % 512 != 0: - raise ValueError(_ERROR_PAGE_BLOB_START_ALIGNMENT) - if end_range is not None and end_range % 512 != 511: - raise ValueError(_ERROR_PAGE_BLOB_END_ALIGNMENT) - - # Format based on whether end_range is present - request.headers = request.headers or {} - if end_range is not None: - request.headers[range_header_name] = 'bytes={0}-{1}'.format(start_range, end_range) - elif start_range is not None: - request.headers[range_header_name] = "bytes={0}-".format(start_range) - - # Content MD5 can only be provided for a complete range less than 4MB in size - if check_content_md5: - if start_range is None or end_range is None: - raise ValueError(_ERROR_START_END_NEEDED_FOR_MD5) - if end_range - start_range > 4 * 1024 * 1024: - raise ValueError(_ERROR_RANGE_TOO_LARGE_FOR_MD5) - - request.headers['x-ms-range-get-content-md5'] = 'true' - - -def _convert_block_list_to_xml(block_id_list): - ''' - - - first-base64-encoded-block-id - second-base64-encoded-block-id - third-base64-encoded-block-id - - - Convert a block list to xml to send. - - block_id_list: - A list of BlobBlock containing the block ids and block state that are used in put_block_list. - Only get block from latest blocks. - ''' - if block_id_list is None: - return '' - - block_list_element = ETree.Element('BlockList') - - # Enabled - for block in block_id_list: - if block.id is None: - raise ValueError(_ERROR_INVALID_BLOCK_ID) - id = xml_escape(_str(format(_encode_base64(block.id)))) - ETree.SubElement(block_list_element, block.state).text = id - - # Add xml declaration and serialize - try: - stream = BytesIO() - ETree.ElementTree(block_list_element).write(stream, xml_declaration=True, encoding='utf-8', method='xml') - except: - raise - finally: - output = stream.getvalue() - stream.close() - - # return xml value - return output - - -def _convert_delegation_key_info_to_xml(start_time, expiry_time): - """ - - - String, formatted ISO Date - String, formatted ISO Date - - - Convert key info to xml to send. - """ - if start_time is None or expiry_time is None: - raise ValueError("delegation key start/end times are required") - - key_info_element = ETree.Element('KeyInfo') - ETree.SubElement(key_info_element, 'Start').text = \ - _to_utc_datetime(start_time) if isinstance(start_time, date) else start_time - ETree.SubElement(key_info_element, 'Expiry').text = \ - _to_utc_datetime(expiry_time) if isinstance(expiry_time, date) else expiry_time - - # Add xml declaration and serialize - try: - stream = BytesIO() - ETree.ElementTree(key_info_element).write(stream, xml_declaration=True, encoding='utf-8', method='xml') - finally: - output = stream.getvalue() - stream.close() - - # return xml value - return output diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_upload_chunking.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_upload_chunking.py deleted file mode 100644 index b94f05811be7..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/_upload_chunking.py +++ /dev/null @@ -1,496 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from io import (BytesIO, IOBase, SEEK_CUR, SEEK_END, SEEK_SET, UnsupportedOperation) -from threading import Lock - -from math import ceil - -from ..common._common_conversion import _encode_base64 -from ..common._error import _ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM -from ..common._serialization import ( - url_quote, - _get_data_bytes_only, - _len_plus -) -from ._constants import ( - _LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE -) -from ._encryption import ( - _get_blob_encryptor_and_padder, -) -from .models import BlobBlock - - -def _upload_blob_chunks(blob_service, container_name, blob_name, - blob_size, block_size, stream, max_connections, - progress_callback, validate_content, lease_id, uploader_class, - maxsize_condition=None, if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None, - content_encryption_key=None, initialization_vector=None, resource_properties=None): - encryptor, padder = _get_blob_encryptor_and_padder(content_encryption_key, initialization_vector, - uploader_class is not _PageBlobChunkUploader) - - uploader = uploader_class( - blob_service, - container_name, - blob_name, - blob_size, - block_size, - stream, - max_connections > 1, - progress_callback, - validate_content, - lease_id, - timeout, - encryptor, - padder - ) - - uploader.maxsize_condition = maxsize_condition - - # Access conditions do not work with parallelism - if max_connections > 1: - uploader.if_match = uploader.if_none_match = uploader.if_modified_since = uploader.if_unmodified_since = None - else: - uploader.if_match = if_match - uploader.if_none_match = if_none_match - uploader.if_modified_since = if_modified_since - uploader.if_unmodified_since = if_unmodified_since - - if progress_callback is not None: - progress_callback(0, blob_size) - - if max_connections > 1: - import concurrent.futures - from threading import BoundedSemaphore - - ''' - Ensures we bound the chunking so we only buffer and submit 'max_connections' amount of work items to the executor. - This is necessary as the executor queue will keep accepting submitted work items, which results in buffering all the blocks if - the max_connections + 1 ensures the next chunk is already buffered and ready for when the worker thread is available. - ''' - chunk_throttler = BoundedSemaphore(max_connections + 1) - - executor = concurrent.futures.ThreadPoolExecutor(max_connections) - futures = [] - running_futures = [] - - # Check for exceptions and fail fast. - for chunk in uploader.get_chunk_streams(): - for f in running_futures: - if f.done(): - if f.exception(): - raise f.exception() - else: - running_futures.remove(f) - - chunk_throttler.acquire() - future = executor.submit(uploader.process_chunk, chunk) - - # Calls callback upon completion (even if the callback was added after the Future task is done). - future.add_done_callback(lambda x: chunk_throttler.release()) - futures.append(future) - running_futures.append(future) - - # result() will wait until completion and also raise any exceptions that may have been set. - range_ids = [f.result() for f in futures] - else: - range_ids = [uploader.process_chunk(result) for result in uploader.get_chunk_streams()] - - if resource_properties: - resource_properties.last_modified = uploader.last_modified - resource_properties.etag = uploader.etag - - return range_ids - - -def _upload_blob_substream_blocks(blob_service, container_name, blob_name, - blob_size, block_size, stream, max_connections, - progress_callback, validate_content, lease_id, uploader_class, - maxsize_condition=None, if_match=None, timeout=None): - uploader = uploader_class( - blob_service, - container_name, - blob_name, - blob_size, - block_size, - stream, - max_connections > 1, - progress_callback, - validate_content, - lease_id, - timeout, - None, - None - ) - - uploader.maxsize_condition = maxsize_condition - - # ETag matching does not work with parallelism as a ranged upload may start - # before the previous finishes and provides an etag - uploader.if_match = if_match if not max_connections > 1 else None - - if progress_callback is not None: - progress_callback(0, blob_size) - - if max_connections > 1: - import concurrent.futures - executor = concurrent.futures.ThreadPoolExecutor(max_connections) - range_ids = list(executor.map(uploader.process_substream_block, uploader.get_substream_blocks())) - else: - range_ids = [uploader.process_substream_block(result) for result in uploader.get_substream_blocks()] - - return range_ids - - -class _BlobChunkUploader(object): - def __init__(self, blob_service, container_name, blob_name, blob_size, - chunk_size, stream, parallel, progress_callback, - validate_content, lease_id, timeout, encryptor, padder): - self.blob_service = blob_service - self.container_name = container_name - self.blob_name = blob_name - self.blob_size = blob_size - self.chunk_size = chunk_size - self.stream = stream - self.parallel = parallel - self.stream_start = stream.tell() if parallel else None - self.stream_lock = Lock() if parallel else None - self.progress_callback = progress_callback - self.progress_total = 0 - self.progress_lock = Lock() if parallel else None - self.validate_content = validate_content - self.lease_id = lease_id - self.timeout = timeout - self.encryptor = encryptor - self.padder = padder - self.last_modified = None - self.etag = None - - def get_chunk_streams(self): - index = 0 - while True: - data = b'' - read_size = self.chunk_size - - # Buffer until we either reach the end of the stream or get a whole chunk. - while True: - if self.blob_size: - read_size = min(self.chunk_size - len(data), self.blob_size - (index + len(data))) - temp = self.stream.read(read_size) - temp = _get_data_bytes_only('temp', temp) - data += temp - - # We have read an empty string and so are at the end - # of the buffer or we have read a full chunk. - if temp == b'' or len(data) == self.chunk_size: - break - - if len(data) == self.chunk_size: - if self.padder: - data = self.padder.update(data) - if self.encryptor: - data = self.encryptor.update(data) - yield index, data - else: - if self.padder: - data = self.padder.update(data) + self.padder.finalize() - if self.encryptor: - data = self.encryptor.update(data) + self.encryptor.finalize() - if len(data) > 0: - yield index, data - break - index += len(data) - - def process_chunk(self, chunk_data): - chunk_bytes = chunk_data[1] - chunk_offset = chunk_data[0] - return self._upload_chunk_with_progress(chunk_offset, chunk_bytes) - - def _update_progress(self, length): - if self.progress_callback is not None: - if self.progress_lock is not None: - with self.progress_lock: - self.progress_total += length - total = self.progress_total - else: - self.progress_total += length - total = self.progress_total - self.progress_callback(total, self.blob_size) - - def _upload_chunk_with_progress(self, chunk_offset, chunk_data): - range_id = self._upload_chunk(chunk_offset, chunk_data) - self._update_progress(len(chunk_data)) - return range_id - - def get_substream_blocks(self): - assert self.chunk_size is not None - lock = self.stream_lock - blob_length = self.blob_size - - if blob_length is None: - blob_length = _len_plus(self.stream) - if blob_length is None: - raise ValueError(_ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM.format('stream')) - - blocks = int(ceil(blob_length / (self.chunk_size * 1.0))) - last_block_size = self.chunk_size if blob_length % self.chunk_size == 0 else blob_length % self.chunk_size - - for i in range(blocks): - yield ('BlockId{}'.format("%05d" % i), - _SubStream(self.stream, i * self.chunk_size, last_block_size if i == blocks - 1 else self.chunk_size, - lock)) - - def process_substream_block(self, block_data): - return self._upload_substream_block_with_progress(block_data[0], block_data[1]) - - def _upload_substream_block_with_progress(self, block_id, block_stream): - range_id = self._upload_substream_block(block_id, block_stream) - self._update_progress(len(block_stream)) - return range_id - - def set_response_properties(self, resp): - self.etag = resp.etag - self.last_modified = resp.last_modified - - -class _BlockBlobChunkUploader(_BlobChunkUploader): - def _upload_chunk(self, chunk_offset, chunk_data): - block_id = url_quote(_encode_base64('{0:032d}'.format(chunk_offset))) - self.blob_service._put_block( - self.container_name, - self.blob_name, - chunk_data, - block_id, - validate_content=self.validate_content, - lease_id=self.lease_id, - timeout=self.timeout, - ) - return BlobBlock(block_id) - - def _upload_substream_block(self, block_id, block_stream): - try: - self.blob_service._put_block( - self.container_name, - self.blob_name, - block_stream, - block_id, - validate_content=self.validate_content, - lease_id=self.lease_id, - timeout=self.timeout, - ) - finally: - block_stream.close() - return BlobBlock(block_id) - - -class _PageBlobChunkUploader(_BlobChunkUploader): - def _is_chunk_empty(self, chunk_data): - # read until non-zero byte is encountered - # if reached the end without returning, then chunk_data is all 0's - for each_byte in chunk_data: - if each_byte != 0 and each_byte != b'\x00': - return False - return True - - def _upload_chunk(self, chunk_start, chunk_data): - # avoid uploading the empty pages - if not self._is_chunk_empty(chunk_data): - chunk_end = chunk_start + len(chunk_data) - 1 - resp = self.blob_service._update_page( - self.container_name, - self.blob_name, - chunk_data, - chunk_start, - chunk_end, - validate_content=self.validate_content, - lease_id=self.lease_id, - if_match=self.if_match, - timeout=self.timeout, - ) - - if not self.parallel: - self.if_match = resp.etag - - self.set_response_properties(resp) - - -class _AppendBlobChunkUploader(_BlobChunkUploader): - def _upload_chunk(self, chunk_offset, chunk_data): - if not hasattr(self, 'current_length'): - resp = self.blob_service.append_block( - self.container_name, - self.blob_name, - chunk_data, - validate_content=self.validate_content, - lease_id=self.lease_id, - maxsize_condition=self.maxsize_condition, - timeout=self.timeout, - if_modified_since=self.if_modified_since, - if_unmodified_since=self.if_unmodified_since, - if_match=self.if_match, - if_none_match=self.if_none_match - ) - - self.current_length = resp.append_offset - else: - resp = self.blob_service.append_block( - self.container_name, - self.blob_name, - chunk_data, - validate_content=self.validate_content, - lease_id=self.lease_id, - maxsize_condition=self.maxsize_condition, - appendpos_condition=self.current_length + chunk_offset, - timeout=self.timeout, - ) - - self.set_response_properties(resp) - - -class _SubStream(IOBase): - def __init__(self, wrapped_stream, stream_begin_index, length, lockObj): - # Python 2.7: file-like objects created with open() typically support seek(), but are not - # derivations of io.IOBase and thus do not implement seekable(). - # Python > 3.0: file-like objects created with open() are derived from io.IOBase. - try: - # only the main thread runs this, so there's no need grabbing the lock - wrapped_stream.seek(0, SEEK_CUR) - except: - raise ValueError("Wrapped stream must support seek().") - - self._lock = lockObj - self._wrapped_stream = wrapped_stream - self._position = 0 - self._stream_begin_index = stream_begin_index - self._length = length - self._buffer = BytesIO() - - # we must avoid buffering more than necessary, and also not use up too much memory - # so the max buffer size is capped at 4MB - self._max_buffer_size = length if length < _LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE \ - else _LARGE_BLOB_UPLOAD_MAX_READ_BUFFER_SIZE - self._current_buffer_start = 0 - self._current_buffer_size = 0 - - def __len__(self): - return self._length - - def close(self): - if self._buffer: - self._buffer.close() - self._wrapped_stream = None - IOBase.close(self) - - def fileno(self): - return self._wrapped_stream.fileno() - - def flush(self): - pass - - def read(self, n): - if self.closed: - raise ValueError("Stream is closed.") - - # adjust if out of bounds - if n + self._position >= self._length: - n = self._length - self._position - - # return fast - if n is 0 or self._buffer.closed: - return b'' - - # attempt first read from the read buffer and update position - read_buffer = self._buffer.read(n) - bytes_read = len(read_buffer) - bytes_remaining = n - bytes_read - self._position += bytes_read - - # repopulate the read buffer from the underlying stream to fulfill the request - # ensure the seek and read operations are done atomically (only if a lock is provided) - if bytes_remaining > 0: - with self._buffer: - # either read in the max buffer size specified on the class - # or read in just enough data for the current block/sub stream - current_max_buffer_size = min(self._max_buffer_size, self._length - self._position) - - # lock is only defined if max_connections > 1 (parallel uploads) - if self._lock: - with self._lock: - # reposition the underlying stream to match the start of the data to read - absolute_position = self._stream_begin_index + self._position - self._wrapped_stream.seek(absolute_position, SEEK_SET) - # If we can't seek to the right location, our read will be corrupted so fail fast. - if self._wrapped_stream.tell() != absolute_position: - raise IOError("Stream failed to seek to the desired location.") - buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size) - else: - buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size) - - if buffer_from_stream: - # update the buffer with new data from the wrapped stream - # we need to note down the start position and size of the buffer, in case seek is performed later - self._buffer = BytesIO(buffer_from_stream) - self._current_buffer_start = self._position - self._current_buffer_size = len(buffer_from_stream) - - # read the remaining bytes from the new buffer and update position - second_read_buffer = self._buffer.read(bytes_remaining) - read_buffer += second_read_buffer - self._position += len(second_read_buffer) - - return read_buffer - - def readable(self): - return True - - def readinto(self, b): - raise UnsupportedOperation - - def seek(self, offset, whence=0): - if whence is SEEK_SET: - start_index = 0 - elif whence is SEEK_CUR: - start_index = self._position - elif whence is SEEK_END: - start_index = self._length - offset = - offset - else: - raise ValueError("Invalid argument for the 'whence' parameter.") - - pos = start_index + offset - - if pos > self._length: - pos = self._length - elif pos < 0: - pos = 0 - - # check if buffer is still valid - # if not, drop buffer - if pos < self._current_buffer_start or pos >= self._current_buffer_start + self._current_buffer_size: - self._buffer.close() - self._buffer = BytesIO() - else: # if yes seek to correct position - delta = pos - self._current_buffer_start - self._buffer.seek(delta, SEEK_SET) - - self._position = pos - return pos - - def seekable(self): - return True - - def tell(self): - return self._position - - def write(self): - raise UnsupportedOperation - - def writelines(self): - raise UnsupportedOperation - - def writeable(self): - return False diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/appendblobservice.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/appendblobservice.py deleted file mode 100644 index 266852c21468..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/appendblobservice.py +++ /dev/null @@ -1,781 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import sys -from os import path - -from ..common._common_conversion import ( - _to_str, - _int_to_str, - _datetime_to_utc_string, - _get_content_md5, -) -from ..common._constants import ( - SERVICE_HOST_BASE, - DEFAULT_PROTOCOL, -) -from ..common._error import ( - _validate_not_none, - _validate_type_bytes, - _validate_encryption_unsupported, - _ERROR_VALUE_NEGATIVE, -) -from ..common._http import HTTPRequest -from ..common._serialization import ( - _get_data_bytes_only, - _add_metadata_headers, -) -from ._deserialization import ( - _parse_append_block, - _parse_base_properties, -) -from ._serialization import ( - _get_path, - _validate_and_format_range_headers, -) -from ._upload_chunking import ( - _AppendBlobChunkUploader, - _upload_blob_chunks, -) -from .baseblobservice import BaseBlobService -from .models import ( - _BlobTypes, - ResourceProperties -) - -if sys.version_info >= (3,): - from io import BytesIO -else: - from cStringIO import StringIO as BytesIO - - -class AppendBlobService(BaseBlobService): - ''' - An append blob is comprised of blocks and is optimized for append operations. - When you modify an append blob, blocks are added to the end of the blob only, - via the append_block operation. Updating or deleting of existing blocks is not - supported. Unlike a block blob, an append blob does not expose its block IDs. - - Each block in an append blob can be a different size, up to a maximum of 4 MB, - and an append blob can include up to 50,000 blocks. The maximum size of an - append blob is therefore slightly more than 195 GB (4 MB X 50,000 blocks). - - :ivar int MAX_BLOCK_SIZE: - The size of the blocks put by append_blob_from_* methods. Smaller blocks - may be put if there is less data provided. The maximum block size the service - supports is 4MB. - ''' - MAX_BLOCK_SIZE = 4 * 1024 * 1024 - - def __init__(self, account_name=None, account_key=None, sas_token=None, is_emulated=False, - protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, custom_domain=None, request_session=None, - connection_string=None, socket_timeout=None, token_credential=None): - ''' - :param str account_name: - The storage account name. This is used to authenticate requests - signed with an account key and to construct the storage endpoint. It - is required unless a connection string is given, or if a custom - domain is used with anonymous authentication. - :param str account_key: - The storage account key. This is used for shared key authentication. - If neither account key or sas token is specified, anonymous access - will be used. - :param str sas_token: - A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both - specified, account key will be used to sign. If neither are - specified, anonymous access will be used. - :param bool is_emulated: - Whether to use the emulator. Defaults to False. If specified, will - override all other parameters besides connection string and request - session. - :param str protocol: - The protocol to use for requests. Defaults to https. - :param str endpoint_suffix: - The host base component of the url, minus the account name. Defaults - to Azure (core.windows.net). Override this to use the China cloud - (core.chinacloudapi.cn). - :param str custom_domain: - The custom domain to use. This can be set in the Azure Portal. For - example, 'www.mydomain.com'. - :param requests.Session request_session: - The session object to use for http requests. - :param str connection_string: - If specified, this will override all other parameters besides - request session. See - http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ - for the connection string format. - :param int socket_timeout: - If specified, this will override the default socket timeout. The timeout specified is in seconds. - See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. - :param token_credential: - A token credential used to authenticate HTTPS requests. The token value - should be updated before its expiration. - :type `~azure.storage.common.TokenCredential` - ''' - self.blob_type = _BlobTypes.AppendBlob - super(AppendBlobService, self).__init__( - account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix, - custom_domain, request_session, connection_string, socket_timeout, token_credential) - - def create_blob(self, container_name, blob_name, content_settings=None, - metadata=None, lease_id=None, - if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - ''' - Creates a blob or overrides an existing blob. Use if_none_match=* to - prevent overriding an existing blob. - - See create_blob_from_* for high level - functions that handle the creation and upload of large blobs with - automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to - perform the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Append Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = {'timeout': _int_to_str(timeout)} - request.headers = { - 'x-ms-blob-type': _to_str(self.blob_type), - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - _add_metadata_headers(metadata, request) - if content_settings is not None: - request.headers.update(content_settings._to_headers()) - - return self._perform_request(request, _parse_base_properties) - - def append_block(self, container_name, blob_name, block, - validate_content=False, maxsize_condition=None, - appendpos_condition=None, - lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Commits a new block of data to the end of an existing append blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param bytes block: - Content of the block in bytes. - :param bool validate_content: - If true, calculates an MD5 hash of the block content. The storage - service checks the hash of the content that has arrived - with the hash that was sent. This is primarily valuable for detecting - bitflips on the wire if using http instead of https as https (the default) - will already validate. Note that this MD5 hash is not stored with the - blob. - :param int maxsize_condition: - Optional conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). - :param int appendpos_condition: - Optional conditional header, used only for the Append Block operation. - A number indicating the byte offset to compare. Append Block will - succeed only if the append position is equal to this number. If it - is not, the request will fail with the - AppendPositionConditionNotMet error - (HTTP status code 412 - Precondition Failed). - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: - ETag, last modified, append offset, and committed block count - properties for the updated Append Blob - :rtype: :class:`~azure.storage.blob.models.AppendBlockProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('block', block) - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'appendblock', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-blob-condition-maxsize': _to_str(maxsize_condition), - 'x-ms-blob-condition-appendpos': _to_str(appendpos_condition), - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - request.body = _get_data_bytes_only('block', block) - - if validate_content: - computed_md5 = _get_content_md5(request.body) - request.headers['Content-MD5'] = _to_str(computed_md5) - - return self._perform_request(request, _parse_append_block) - - def append_block_from_url(self, container_name, blob_name, copy_source_url, source_range_start=None, - source_range_end=None, source_content_md5=None, source_if_modified_since=None, - source_if_unmodified_since=None, source_if_match=None, - source_if_none_match=None, maxsize_condition=None, - appendpos_condition=None, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - """ - Creates a new block to be committed as part of a blob, where the contents are read from a source url. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob. - :param str copy_source_url: - The URL of the source data. It can point to any Azure Blob or File, that is either public or has a - shared access signature attached. - :param int source_range_start: - This indicates the start of the range of bytes(inclusive) that has to be taken from the copy source. - :param int source_range_end: - This indicates the end of the range of bytes(inclusive) that has to be taken from the copy source. - :param str source_content_md5: - If given, the service will calculate the MD5 hash of the block content and compare against this value. - :param datetime source_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the source resource has been modified since the specified time. - :param datetime source_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the source resource has not been modified since the specified date/time. - :param str source_if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the source resource's ETag matches the value specified. - :param str source_if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the source resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the source resource does not exist, and fail the - operation if it does exist. - :param int maxsize_condition: - Optional conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). - :param int appendpos_condition: - Optional conditional header, used only for the Append Block operation. - A number indicating the byte offset to compare. Append Block will - succeed only if the append position is equal to this number. If it - is not, the request will fail with the - AppendPositionConditionNotMet error - (HTTP status code 412 - Precondition Failed). - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - """ - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('copy_source_url', copy_source_url) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'appendblock', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-copy-source': copy_source_url, - 'x-ms-source-content-md5': source_content_md5, - 'x-ms-source-if-Modified-Since': _datetime_to_utc_string(source_if_modified_since), - 'x-ms-source-if-Unmodified-Since': _datetime_to_utc_string(source_if_unmodified_since), - 'x-ms-source-if-Match': _to_str(source_if_match), - 'x-ms-source-if-None-Match': _to_str(source_if_none_match), - 'x-ms-blob-condition-maxsize': _to_str(maxsize_condition), - 'x-ms-blob-condition-appendpos': _to_str(appendpos_condition), - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - - _validate_and_format_range_headers(request, source_range_start, source_range_end, - start_range_required=False, - end_range_required=False, - range_header_name="x-ms-source-range") - - return self._perform_request(request, _parse_append_block) - - # ----Convenience APIs---------------------------------------------- - - def append_blob_from_path( - self, container_name, blob_name, file_path, validate_content=False, - maxsize_condition=None, progress_callback=None, lease_id=None, timeout=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None): - ''' - Appends to the content of an existing blob from a file path, with automatic - chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param str file_path: - Path of the file to upload as the blob content. - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param int maxsize_condition: - Optional conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :return: ETag and last modified properties for the Append Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('file_path', file_path) - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - count = path.getsize(file_path) - with open(file_path, 'rb') as stream: - return self.append_blob_from_stream( - container_name, - blob_name, - stream, - count=count, - validate_content=validate_content, - maxsize_condition=maxsize_condition, - progress_callback=progress_callback, - lease_id=lease_id, - timeout=timeout, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match) - - def append_blob_from_bytes( - self, container_name, blob_name, blob, index=0, count=None, - validate_content=False, maxsize_condition=None, progress_callback=None, - lease_id=None, timeout=None, if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None): - ''' - Appends to the content of an existing blob from an array of bytes, with - automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param bytes blob: - Content of blob as an array of bytes. - :param int index: - Start index in the array of bytes. - :param int count: - Number of bytes to upload. Set to None or negative value to upload - all bytes starting from index. - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param int maxsize_condition: - Optional conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :return: ETag and last modified properties for the Append Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('blob', blob) - _validate_not_none('index', index) - _validate_type_bytes('blob', blob) - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - if index < 0: - raise IndexError(_ERROR_VALUE_NEGATIVE.format('index')) - - if count is None or count < 0: - count = len(blob) - index - - stream = BytesIO(blob) - stream.seek(index) - - return self.append_blob_from_stream( - container_name, - blob_name, - stream, - count=count, - validate_content=validate_content, - maxsize_condition=maxsize_condition, - lease_id=lease_id, - progress_callback=progress_callback, - timeout=timeout, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match) - - def append_blob_from_text( - self, container_name, blob_name, text, encoding='utf-8', - validate_content=False, maxsize_condition=None, progress_callback=None, - lease_id=None, timeout=None, if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None): - ''' - Appends to the content of an existing blob from str/unicode, with - automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param str text: - Text to upload to the blob. - :param str encoding: - Python encoding to use to convert the text to bytes. - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param int maxsize_condition: - Optional conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :return: ETag and last modified properties for the Append Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('text', text) - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - if not isinstance(text, bytes): - _validate_not_none('encoding', encoding) - text = text.encode(encoding) - - return self.append_blob_from_bytes( - container_name, - blob_name, - text, - index=0, - count=len(text), - validate_content=validate_content, - maxsize_condition=maxsize_condition, - lease_id=lease_id, - progress_callback=progress_callback, - timeout=timeout, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match) - - def append_blob_from_stream( - self, container_name, blob_name, stream, count=None, - validate_content=False, maxsize_condition=None, progress_callback=None, - lease_id=None, timeout=None, if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None): - ''' - Appends to the content of an existing blob from a file/stream, with - automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param io.IOBase stream: - Opened stream to upload as the blob content. - :param int count: - Number of bytes to read from the stream. This is optional, but - should be supplied for optimal performance. - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param int maxsize_condition: - Conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetime will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :return: ETag and last modified properties for the Append Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('stream', stream) - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - # _upload_blob_chunks returns the block ids for block blobs so resource_properties - # is passed as a parameter to get the last_modified and etag for page and append blobs. - # this info is not needed for block_blobs since _put_block_list is called after which gets this info - resource_properties = ResourceProperties() - _upload_blob_chunks( - blob_service=self, - container_name=container_name, - blob_name=blob_name, - blob_size=count, - block_size=self.MAX_BLOCK_SIZE, - stream=stream, - max_connections=1, # upload not easily parallelizable - progress_callback=progress_callback, - validate_content=validate_content, - lease_id=lease_id, - uploader_class=_AppendBlobChunkUploader, - maxsize_condition=maxsize_condition, - timeout=timeout, - resource_properties=resource_properties, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match - ) - - return resource_properties diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/baseblobservice.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/baseblobservice.py deleted file mode 100644 index e7ea8c7e6c73..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/baseblobservice.py +++ /dev/null @@ -1,3397 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import sys -from abc import ABCMeta - -from azure.common import AzureHttpError - -from ..common._auth import ( - _StorageSASAuthentication, - _StorageSharedKeyAuthentication, - _StorageNoAuthentication, -) -from ..common._common_conversion import ( - _int_to_str, - _to_str, - _datetime_to_utc_string, -) -from ..common._connection import _ServiceParameters -from ..common._constants import ( - SERVICE_HOST_BASE, - DEFAULT_PROTOCOL, -) -from ..common._deserialization import ( - _convert_xml_to_service_properties, - _parse_metadata, - _parse_properties, - _convert_xml_to_service_stats, - _parse_length_from_content_range, -) -from ..common._error import ( - _dont_fail_not_exist, - _dont_fail_on_exist, - _validate_not_none, - _validate_decryption_required, - _validate_access_policies, - _ERROR_PARALLEL_NOT_SEEKABLE, - _validate_user_delegation_key, -) -from ..common._http import HTTPRequest -from ..common._serialization import ( - _get_request_body, - _convert_signed_identifiers_to_xml, - _convert_service_properties_to_xml, - _add_metadata_headers, -) -from ..common.models import ( - Services, - ListGenerator, - _OperationContext, -) -from .sharedaccesssignature import ( - BlobSharedAccessSignature, -) -from ..common.storageclient import StorageClient -from ._deserialization import ( - _convert_xml_to_containers, - _parse_blob, - _convert_xml_to_blob_list, - _convert_xml_to_blob_name_list, - _parse_container, - _parse_snapshot_blob, - _parse_lease, - _convert_xml_to_signed_identifiers_and_access, - _parse_base_properties, - _parse_account_information, - _convert_xml_to_user_delegation_key, -) -from ._download_chunking import _download_blob_chunks -from ._error import ( - _ERROR_INVALID_LEASE_DURATION, - _ERROR_INVALID_LEASE_BREAK_PERIOD, -) -from ._serialization import ( - _get_path, - _validate_and_format_range_headers, - _convert_delegation_key_info_to_xml, -) -from .models import ( - BlobProperties, - _LeaseActions, - ContainerPermissions, - BlobPermissions, -) - -from ._constants import ( - X_MS_VERSION, - __version__ as package_version, -) - -_CONTAINER_ALREADY_EXISTS_ERROR_CODE = 'ContainerAlreadyExists' -_BLOB_NOT_FOUND_ERROR_CODE = 'BlobNotFound' -_CONTAINER_NOT_FOUND_ERROR_CODE = 'ContainerNotFound' - -if sys.version_info >= (3,): - from io import BytesIO -else: - from cStringIO import StringIO as BytesIO - - -class BaseBlobService(StorageClient): - ''' - This is the main class managing Blob resources. - - The Blob service stores text and binary data as blobs in the cloud. - The Blob service offers the following three resources: the storage account, - containers, and blobs. Within your storage account, containers provide a - way to organize sets of blobs. For more information please see: - https://msdn.microsoft.com/en-us/library/azure/ee691964.aspx - - :ivar int MAX_SINGLE_GET_SIZE: - The size of the first range get performed by get_blob_to_* methods if - max_connections is greater than 1. Less data will be returned if the - blob is smaller than this. - :ivar int MAX_CHUNK_GET_SIZE: - The size of subsequent range gets performed by get_blob_to_* methods if - max_connections is greater than 1 and the blob is larger than MAX_SINGLE_GET_SIZE. - Less data will be returned if the remainder of the blob is smaller than - this. If this is set to larger than 4MB, content_validation will throw an - error if enabled. However, if content_validation is not desired a size - greater than 4MB may be optimal. Setting this below 4MB is not recommended. - :ivar object key_encryption_key: - The key-encryption-key optionally provided by the user. If provided, will be used to - encrypt/decrypt in supported methods. - For methods requiring decryption, either the key_encryption_key OR the resolver must be provided. - If both are provided, the resolver will take precedence. - Must implement the following methods for APIs requiring encryption: - wrap_key(key)--wraps the specified key (bytes) using an algorithm of the user's choice. Returns the encrypted key as bytes. - get_key_wrap_algorithm()--returns the algorithm used to wrap the specified symmetric key. - get_kid()--returns a string key id for this key-encryption-key. - Must implement the following methods for APIs requiring decryption: - unwrap_key(key, algorithm)--returns the unwrapped form of the specified symmetric key using the string-specified algorithm. - get_kid()--returns a string key id for this key-encryption-key. - :ivar function key_resolver_function(kid): - A function to resolve keys optionally provided by the user. If provided, will be used to decrypt in supported methods. - For methods requiring decryption, either the key_encryption_key OR - the resolver must be provided. If both are provided, the resolver will take precedence. - It uses the kid string to return a key-encryption-key implementing the interface defined above. - :ivar bool require_encryption: - A flag that may be set to ensure that all messages successfully uploaded to the queue and all those downloaded and - successfully read from the queue are/were encrypted while on the server. If this flag is set, all required - parameters for encryption/decryption must be provided. See the above comments on the key_encryption_key and resolver. - ''' - - __metaclass__ = ABCMeta - MAX_SINGLE_GET_SIZE = 32 * 1024 * 1024 - MAX_CHUNK_GET_SIZE = 4 * 1024 * 1024 - - def __init__(self, account_name=None, account_key=None, sas_token=None, is_emulated=False, - protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, custom_domain=None, request_session=None, - connection_string=None, socket_timeout=None, token_credential=None): - ''' - :param str account_name: - The storage account name. This is used to authenticate requests - signed with an account key and to construct the storage endpoint. It - is required unless a connection string is given, or if a custom - domain is used with anonymous authentication. - :param str account_key: - The storage account key. This is used for shared key authentication. - If neither account key or sas token is specified, anonymous access - will be used. - :param str sas_token: - A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both - specified, account key will be used to sign. If neither are - specified, anonymous access will be used. - :param bool is_emulated: - Whether to use the emulator. Defaults to False. If specified, will - override all other parameters besides connection string and request - session. - :param str protocol: - The protocol to use for requests. Defaults to https. - :param str endpoint_suffix: - The host base component of the url, minus the account name. Defaults - to Azure (core.windows.net). Override this to use the China cloud - (core.chinacloudapi.cn). - :param str custom_domain: - The custom domain to use. This can be set in the Azure Portal. For - example, 'www.mydomain.com'. - :param requests.Session request_session: - The session object to use for http requests. - :param str connection_string: - If specified, this will override all other parameters besides - request session. See - http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ - for the connection string format - :param int socket_timeout: - If specified, this will override the default socket timeout. The timeout specified is in seconds. - See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. - :param token_credential: - A token credential used to authenticate HTTPS requests. The token value - should be updated before its expiration. - :type `~azure.storage.common.TokenCredential` - ''' - service_params = _ServiceParameters.get_service_parameters( - 'blob', - account_name=account_name, - account_key=account_key, - sas_token=sas_token, - token_credential=token_credential, - is_emulated=is_emulated, - protocol=protocol, - endpoint_suffix=endpoint_suffix, - custom_domain=custom_domain, - request_session=request_session, - connection_string=connection_string, - socket_timeout=socket_timeout) - - super(BaseBlobService, self).__init__(service_params) - - if self.account_key: - self.authentication = _StorageSharedKeyAuthentication( - self.account_name, - self.account_key, - self.is_emulated - ) - elif self.sas_token: - self.authentication = _StorageSASAuthentication(self.sas_token) - elif self.token_credential: - self.authentication = self.token_credential - else: - self.authentication = _StorageNoAuthentication() - - self.require_encryption = False - self.key_encryption_key = None - self.key_resolver_function = None - self._X_MS_VERSION = X_MS_VERSION - self._update_user_agent_string(package_version) - - def make_blob_url(self, container_name, blob_name, protocol=None, sas_token=None, snapshot=None): - ''' - Creates the url to access a blob. - - :param str container_name: - Name of container. - :param str blob_name: - Name of blob. - :param str protocol: - Protocol to use: 'http' or 'https'. If not specified, uses the - protocol specified when BaseBlobService was initialized. - :param str sas_token: - Shared access signature token created with - generate_shared_access_signature. - :param str snapshot: - An string value that uniquely identifies the snapshot. The value of - this query parameter indicates the snapshot version. - :return: blob access URL. - :rtype: str - ''' - - url = '{}://{}/{}/{}'.format( - protocol or self.protocol, - self.primary_endpoint, - container_name, - blob_name, - ) - - if snapshot and sas_token: - url = '{}?snapshot={}&{}'.format(url, snapshot, sas_token) - elif snapshot: - url = '{}?snapshot={}'.format(url, snapshot) - elif sas_token: - url = '{}?{}'.format(url, sas_token) - - return url - - def make_container_url(self, container_name, protocol=None, sas_token=None): - ''' - Creates the url to access a container. - - :param str container_name: - Name of container. - :param str protocol: - Protocol to use: 'http' or 'https'. If not specified, uses the - protocol specified when BaseBlobService was initialized. - :param str sas_token: - Shared access signature token created with - generate_shared_access_signature. - :return: container access URL. - :rtype: str - ''' - - url = '{}://{}/{}?restype=container'.format( - protocol or self.protocol, - self.primary_endpoint, - container_name, - ) - - if sas_token: - url = '{}&{}'.format(url, sas_token) - - return url - - def generate_account_shared_access_signature(self, resource_types, permission, - expiry, start=None, ip=None, protocol=None): - ''' - Generates a shared access signature for the blob service. - Use the returned signature with the sas_token parameter of any BlobService. - - :param ResourceTypes resource_types: - Specifies the resource types that are accessible with the account SAS. - :param AccountPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. The default value - is https,http. See :class:`~azure.storage.common.models.Protocol` for possible values. - :return: A Shared Access Signature (sas) token. - :rtype: str - ''' - _validate_not_none('self.account_name', self.account_name) - _validate_not_none('self.account_key', self.account_key) - - sas = BlobSharedAccessSignature(self.account_name, self.account_key) - return sas.generate_account(Services.BLOB, resource_types, permission, - expiry, start=start, ip=ip, protocol=protocol) - - def generate_container_shared_access_signature(self, container_name, - permission=None, expiry=None, - start=None, id=None, ip=None, protocol=None, - cache_control=None, content_disposition=None, - content_encoding=None, content_language=None, - content_type=None, user_delegation_key=None): - ''' - Generates a shared access signature for the container. - Use the returned signature with the sas_token parameter of any BlobService. - - :param str container_name: - Name of container. - :param ContainerPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Permissions must be ordered read, write, delete, list. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str id: - A unique value up to 64 characters in length that correlates to a - stored access policy. To create a stored access policy, use - set_blob_service_properties. - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. The default value - is https,http. See :class:`~azure.storage.common.models.Protocol` for possible values. - :param str cache_control: - Response header value for Cache-Control when resource is accessed - using this shared access signature. - :param str content_disposition: - Response header value for Content-Disposition when resource is accessed - using this shared access signature. - :param str content_encoding: - Response header value for Content-Encoding when resource is accessed - using this shared access signature. - :param str content_language: - Response header value for Content-Language when resource is accessed - using this shared access signature. - :param str content_type: - Response header value for Content-Type when resource is accessed - using this shared access signature. - :param ~azure.storage.blob.models.UserDelegationKey user_delegation_key: - Instead of an account key, the user could pass in a user delegation key. - A user delegation key can be obtained from the service by authenticating with an AAD identity; - this can be accomplished by calling get_user_delegation_key. - When present, the SAS is signed with the user delegation key instead. - :return: A Shared Access Signature (sas) token. - :rtype: str - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('self.account_name', self.account_name) - - if user_delegation_key is not None: - _validate_user_delegation_key(user_delegation_key) - sas = BlobSharedAccessSignature(self.account_name, user_delegation_key=user_delegation_key) - else: - _validate_not_none('self.account_key', self.account_key) - sas = BlobSharedAccessSignature(self.account_name, account_key=self.account_key) - - return sas.generate_container( - container_name, - permission, - expiry, - start=start, - id=id, - ip=ip, - protocol=protocol, - cache_control=cache_control, - content_disposition=content_disposition, - content_encoding=content_encoding, - content_language=content_language, - content_type=content_type, - ) - - def generate_blob_shared_access_signature( - self, container_name, blob_name, snapshot=None, permission=None, - expiry=None, start=None, id=None, ip=None, protocol=None, - cache_control=None, content_disposition=None, - content_encoding=None, content_language=None, - content_type=None, user_delegation_key=None): - ''' - Generates a shared access signature for the blob or one of its snapshots. - Use the returned signature with the sas_token parameter of any BlobService. - - :param str container_name: - Name of container. - :param str blob_name: - Name of blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to grant permission. - :param BlobPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Permissions must be ordered read, write, delete, list. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str id: - A unique value up to 64 characters in length that correlates to a - stored access policy. To create a stored access policy, use :func:`~set_container_acl`. - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. The default value - is https,http. See :class:`~azure.storage.common.models.Protocol` for possible values. - :param str cache_control: - Response header value for Cache-Control when resource is accessed - using this shared access signature. - :param str content_disposition: - Response header value for Content-Disposition when resource is accessed - using this shared access signature. - :param str content_encoding: - Response header value for Content-Encoding when resource is accessed - using this shared access signature. - :param str content_language: - Response header value for Content-Language when resource is accessed - using this shared access signature. - :param str content_type: - Response header value for Content-Type when resource is accessed - using this shared access signature. - :param ~azure.storage.blob.models.UserDelegationKey user_delegation_key: - Instead of an account key, the user could pass in a user delegation key. - A user delegation key can be obtained from the service by authenticating with an AAD identity; - this can be accomplished by calling get_user_delegation_key. - When present, the SAS is signed with the user delegation key instead. - :return: A Shared Access Signature (sas) token. - :rtype: str - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('self.account_name', self.account_name) - - if user_delegation_key is not None: - _validate_user_delegation_key(user_delegation_key) - sas = BlobSharedAccessSignature(self.account_name, user_delegation_key=user_delegation_key) - else: - _validate_not_none('self.account_key', self.account_key) - sas = BlobSharedAccessSignature(self.account_name, account_key=self.account_key) - - return sas.generate_blob( - container_name=container_name, - blob_name=blob_name, - snapshot=snapshot, - permission=permission, - expiry=expiry, - start=start, - id=id, - ip=ip, - protocol=protocol, - cache_control=cache_control, - content_disposition=content_disposition, - content_encoding=content_encoding, - content_language=content_language, - content_type=content_type, - ) - - def get_user_delegation_key(self, key_start_time, key_expiry_time, timeout=None): - """ - Obtain a user delegation key for the purpose of signing SAS tokens. - A token credential must be present on the service object for this request to succeed. - - :param datetime key_start_time: - A DateTime value. Indicates when the key becomes valid. - :param datetime key_expiry_time: - A DateTime value. Indicates when the key stops being valid. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: - """ - _validate_not_none('key_start_time', key_start_time) - _validate_not_none('key_end_time', key_expiry_time) - - request = HTTPRequest() - request.method = 'POST' - request.host_locations = self._get_host_locations(secondary=True) - request.query = { - 'restype': 'service', - 'comp': 'userdelegationkey', - 'timeout': _int_to_str(timeout), - } - request.body = _get_request_body(_convert_delegation_key_info_to_xml(key_start_time, key_expiry_time)) - return self._perform_request(request, _convert_xml_to_user_delegation_key) - - def list_containers(self, prefix=None, num_results=None, include_metadata=False, - marker=None, timeout=None): - ''' - Returns a generator to list the containers under the specified account. - The generator will lazily follow the continuation tokens returned by - the service and stop when all containers have been returned or num_results is reached. - - If num_results is specified and the account has more than that number of - containers, the generator will have a populated next_marker field once it - finishes. This marker can be used to create a new generator if more - results are desired. - - :param str prefix: - Filters the results to return only containers whose names - begin with the specified prefix. - :param int num_results: - Specifies the maximum number of containers to return. A single list - request may return up to 1000 contianers and potentially a continuation - token which should be followed to get additional resutls. - :param bool include_metadata: - Specifies that container metadata be returned in the response. - :param str marker: - An opaque continuation token. This value can be retrieved from the - next_marker field of a previous generator object if num_results was - specified and that generator has finished enumerating results. If - specified, this generator will begin returning results from the point - where the previous generator stopped. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - include = 'metadata' if include_metadata else None - operation_context = _OperationContext(location_lock=True) - kwargs = {'prefix': prefix, 'marker': marker, 'max_results': num_results, - 'include': include, 'timeout': timeout, '_context': operation_context} - resp = self._list_containers(**kwargs) - - return ListGenerator(resp, self._list_containers, (), kwargs) - - def _list_containers(self, prefix=None, marker=None, max_results=None, - include=None, timeout=None, _context=None): - ''' - Returns a list of the containers under the specified account. - - :param str prefix: - Filters the results to return only containers whose names - begin with the specified prefix. - :param str marker: - A string value that identifies the portion of the list - to be returned with the next list operation. The operation returns - a next_marker value within the response body if the list returned was - not complete. The marker value may then be used in a subsequent - call to request the next set of list items. The marker value is - opaque to the client. - :param int max_results: - Specifies the maximum number of containers to return. A single list - request may return up to 1000 contianers and potentially a continuation - token which should be followed to get additional resutls. - :param str include: - Include this parameter to specify that the container's - metadata be returned as part of the response body. set this - parameter to string 'metadata' to get container's metadata. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path() - request.query = { - 'comp': 'list', - 'prefix': _to_str(prefix), - 'marker': _to_str(marker), - 'maxresults': _int_to_str(max_results), - 'include': _to_str(include), - 'timeout': _int_to_str(timeout) - } - - return self._perform_request(request, _convert_xml_to_containers, operation_context=_context) - - def create_container(self, container_name, metadata=None, - public_access=None, fail_on_exist=False, timeout=None): - ''' - Creates a new container under the specified account. If the container - with the same name already exists, the operation fails if - fail_on_exist is True. - - :param str container_name: - Name of container to create. - :param metadata: - A dict with name_value pairs to associate with the - container as metadata. Example:{'Category':'test'} - :type metadata: dict(str, str) - :param ~azure.storage.blob.models.PublicAccess public_access: - Possible values include: container, blob. - :param bool fail_on_exist: - Specify whether to throw an exception when the container exists. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: True if container is created, False if container already exists. - :rtype: bool - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-blob-public-access': _to_str(public_access) - } - _add_metadata_headers(metadata, request) - - if not fail_on_exist: - try: - self._perform_request(request, expected_errors=[_CONTAINER_ALREADY_EXISTS_ERROR_CODE]) - return True - except AzureHttpError as ex: - _dont_fail_on_exist(ex) - return False - else: - self._perform_request(request) - return True - - def get_container_properties(self, container_name, lease_id=None, timeout=None): - ''' - Returns all user-defined metadata and system properties for the specified - container. The data returned does not include the container's list of blobs. - - :param str container_name: - Name of existing container. - :param str lease_id: - If specified, get_container_properties only succeeds if the - container's lease is active and matches this ID. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: properties for the specified container within a container object. - :rtype: :class:`~azure.storage.blob.models.Container` - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'timeout': _int_to_str(timeout), - } - request.headers = {'x-ms-lease-id': _to_str(lease_id)} - - return self._perform_request(request, _parse_container, [container_name]) - - def get_container_metadata(self, container_name, lease_id=None, timeout=None): - ''' - Returns all user-defined metadata for the specified container. - - :param str container_name: - Name of existing container. - :param str lease_id: - If specified, get_container_metadata only succeeds if the - container's lease is active and matches this ID. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: - A dictionary representing the container metadata name, value pairs. - :rtype: dict(str, str) - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'comp': 'metadata', - 'timeout': _int_to_str(timeout), - } - request.headers = {'x-ms-lease-id': _to_str(lease_id)} - - return self._perform_request(request, _parse_metadata) - - def set_container_metadata(self, container_name, metadata=None, - lease_id=None, if_modified_since=None, timeout=None): - ''' - Sets one or more user-defined name-value pairs for the specified - container. Each call to this operation replaces all existing metadata - attached to the container. To remove all metadata from the container, - call this operation with no metadata dict. - - :param str container_name: - Name of existing container. - :param metadata: - A dict containing name-value pairs to associate with the container as - metadata. Example: {'category':'test'} - :type metadata: dict(str, str) - :param str lease_id: - If specified, set_container_metadata only succeeds if the - container's lease is active and matches this ID. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Container - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'comp': 'metadata', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'x-ms-lease-id': _to_str(lease_id), - } - _add_metadata_headers(metadata, request) - - return self._perform_request(request, _parse_base_properties) - - def get_container_acl(self, container_name, lease_id=None, timeout=None): - ''' - Gets the permissions for the specified container. - The permissions indicate whether container data may be accessed publicly. - - :param str container_name: - Name of existing container. - :param lease_id: - If specified, get_container_acl only succeeds if the - container's lease is active and matches this ID. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: A dictionary of access policies associated with the container. dict of str to - :class:`..common.models.AccessPolicy` and a public_access property - if public access is turned on - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'comp': 'acl', - 'timeout': _int_to_str(timeout), - } - request.headers = {'x-ms-lease-id': _to_str(lease_id)} - - return self._perform_request(request, _convert_xml_to_signed_identifiers_and_access) - - def set_container_acl(self, container_name, signed_identifiers=None, - public_access=None, lease_id=None, - if_modified_since=None, if_unmodified_since=None, timeout=None): - ''' - Sets the permissions for the specified container or stored access - policies that may be used with Shared Access Signatures. The permissions - indicate whether blobs in a container may be accessed publicly. - - :param str container_name: - Name of existing container. - :param signed_identifiers: - A dictionary of access policies to associate with the container. The - dictionary may contain up to 5 elements. An empty dictionary - will clear the access policies set on the service. - :type signed_identifiers: dict(str, :class:`~azure.storage.common.models.AccessPolicy`) - :param ~azure.storage.blob.models.PublicAccess public_access: - Possible values include: container, blob. - :param str lease_id: - If specified, set_container_acl only succeeds if the - container's lease is active and matches this ID. - :param datetime if_modified_since: - A datetime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified date/time. - :param datetime if_unmodified_since: - A datetime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Container - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_access_policies(signed_identifiers) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'comp': 'acl', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-blob-public-access': _to_str(public_access), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'x-ms-lease-id': _to_str(lease_id), - } - request.body = _get_request_body( - _convert_signed_identifiers_to_xml(signed_identifiers)) - - return self._perform_request(request, _parse_base_properties) - - def delete_container(self, container_name, fail_not_exist=False, - lease_id=None, if_modified_since=None, - if_unmodified_since=None, timeout=None): - ''' - Marks the specified container for deletion. The container and any blobs - contained within it are later deleted during garbage collection. - - :param str container_name: - Name of container to delete. - :param bool fail_not_exist: - Specify whether to throw an exception when the container doesn't - exist. - :param str lease_id: - If specified, delete_container only succeeds if the - container's lease is active and matches this ID. - Required if the container has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: True if container is deleted, False container doesn't exist. - :rtype: bool - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'DELETE' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - } - - if not fail_not_exist: - try: - self._perform_request(request, expected_errors=[_CONTAINER_NOT_FOUND_ERROR_CODE]) - return True - except AzureHttpError as ex: - _dont_fail_not_exist(ex) - return False - else: - self._perform_request(request) - return True - - def _lease_container_impl( - self, container_name, lease_action, lease_id, lease_duration, - lease_break_period, proposed_lease_id, if_modified_since, - if_unmodified_since, timeout): - ''' - Establishes and manages a lease on a container. - The Lease Container operation can be called in one of five modes - Acquire, to request a new lease - Renew, to renew an existing lease - Change, to change the ID of an existing lease - Release, to free the lease if it is no longer needed so that another - client may immediately acquire a lease against the container - Break, to end the lease but ensure that another client cannot acquire - a new lease until the current lease period has expired - - :param str container_name: - Name of existing container. - :param str lease_action: - Possible _LeaseActions values: acquire|renew|release|break|change - :param str lease_id: - Required if the container has an active lease. - :param int lease_duration: - Specifies the duration of the lease, in seconds, or negative one - (-1) for a lease that never expires. A non-infinite lease can be - between 15 and 60 seconds. A lease duration cannot be changed - using renew or change. For backwards compatibility, the default is - 60, and the value is only used on an acquire operation. - :param int lease_break_period: - For a break operation, this is the proposed duration of - seconds that the lease should continue before it is broken, between - 0 and 60 seconds. This break period is only used if it is shorter - than the time remaining on the lease. If longer, the time remaining - on the lease is used. A new lease will not be available before the - break period has expired, but the lease may be held for longer than - the break period. If this header does not appear with a break - operation, a fixed-duration lease breaks after the remaining lease - period elapses, and an infinite lease breaks immediately. - :param str proposed_lease_id: - Optional for Acquire, required for Change. Proposed lease ID, in a - GUID string format. The Blob service returns 400 (Invalid request) - if the proposed lease ID is not in the correct format. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: - Response headers returned from the service call. - :rtype: dict(str, str) - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('lease_action', lease_action) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'comp': 'lease', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-lease-action': _to_str(lease_action), - 'x-ms-lease-duration': _to_str(lease_duration), - 'x-ms-lease-break-period': _to_str(lease_break_period), - 'x-ms-proposed-lease-id': _to_str(proposed_lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - } - - return self._perform_request(request, _parse_lease) - - def acquire_container_lease( - self, container_name, lease_duration=-1, proposed_lease_id=None, - if_modified_since=None, if_unmodified_since=None, timeout=None): - ''' - Requests a new lease. If the container does not have an active lease, - the Blob service creates a lease on the container and returns a new - lease ID. - - :param str container_name: - Name of existing container. - :param int lease_duration: - Specifies the duration of the lease, in seconds, or negative one - (-1) for a lease that never expires. A non-infinite lease can be - between 15 and 60 seconds. A lease duration cannot be changed - using renew or change. Default is -1 (infinite lease). - :param str proposed_lease_id: - Proposed lease ID, in a GUID string format. The Blob service returns - 400 (Invalid request) if the proposed lease ID is not in the correct format. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: the lease ID of the newly created lease. - :return: str - ''' - _validate_not_none('lease_duration', lease_duration) - if lease_duration is not -1 and \ - (lease_duration < 15 or lease_duration > 60): - raise ValueError(_ERROR_INVALID_LEASE_DURATION) - - lease = self._lease_container_impl(container_name, - _LeaseActions.Acquire, - None, # lease_id - lease_duration, - None, # lease_break_period - proposed_lease_id, - if_modified_since, - if_unmodified_since, - timeout) - return lease['id'] - - def renew_container_lease( - self, container_name, lease_id, if_modified_since=None, - if_unmodified_since=None, timeout=None): - ''' - Renews the lease. The lease can be renewed if the lease ID specified - matches that associated with the container. Note that - the lease may be renewed even if it has expired as long as the container - has not been leased again since the expiration of that lease. When you - renew a lease, the lease duration clock resets. - - :param str container_name: - Name of existing container. - :param str lease_id: - Lease ID for active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: the lease ID of the renewed lease. - :return: str - ''' - _validate_not_none('lease_id', lease_id) - - lease = self._lease_container_impl(container_name, - _LeaseActions.Renew, - lease_id, - None, # lease_duration - None, # lease_break_period - None, # proposed_lease_id - if_modified_since, - if_unmodified_since, - timeout) - return lease['id'] - - def release_container_lease( - self, container_name, lease_id, if_modified_since=None, - if_unmodified_since=None, timeout=None): - ''' - Release the lease. The lease may be released if the lease_id specified matches - that associated with the container. Releasing the lease allows another client - to immediately acquire the lease for the container as soon as the release is complete. - - :param str container_name: - Name of existing container. - :param str lease_id: - Lease ID for active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('lease_id', lease_id) - - self._lease_container_impl(container_name, - _LeaseActions.Release, - lease_id, - None, # lease_duration - None, # lease_break_period - None, # proposed_lease_id - if_modified_since, - if_unmodified_since, - timeout) - - def break_container_lease( - self, container_name, lease_break_period=None, - if_modified_since=None, if_unmodified_since=None, timeout=None): - ''' - Break the lease, if the container has an active lease. Once a lease is - broken, it cannot be renewed. Any authorized request can break the lease; - the request is not required to specify a matching lease ID. When a lease - is broken, the lease break period is allowed to elapse, during which time - no lease operation except break and release can be performed on the container. - When a lease is successfully broken, the response indicates the interval - in seconds until a new lease can be acquired. - - :param str container_name: - Name of existing container. - :param int lease_break_period: - This is the proposed duration of seconds that the lease - should continue before it is broken, between 0 and 60 seconds. This - break period is only used if it is shorter than the time remaining - on the lease. If longer, the time remaining on the lease is used. - A new lease will not be available before the break period has - expired, but the lease may be held for longer than the break - period. If this header does not appear with a break - operation, a fixed-duration lease breaks after the remaining lease - period elapses, and an infinite lease breaks immediately. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: Approximate time remaining in the lease period, in seconds. - :return: int - ''' - if (lease_break_period is not None) and (lease_break_period < 0 or lease_break_period > 60): - raise ValueError(_ERROR_INVALID_LEASE_BREAK_PERIOD) - - lease = self._lease_container_impl(container_name, - _LeaseActions.Break, - None, # lease_id - None, # lease_duration - lease_break_period, - None, # proposed_lease_id - if_modified_since, - if_unmodified_since, - timeout) - return lease['time'] - - def change_container_lease( - self, container_name, lease_id, proposed_lease_id, - if_modified_since=None, if_unmodified_since=None, timeout=None): - ''' - Change the lease ID of an active lease. A change must include the current - lease ID and a new lease ID. - - :param str container_name: - Name of existing container. - :param str lease_id: - Lease ID for active lease. - :param str proposed_lease_id: - Proposed lease ID, in a GUID string format. The Blob service returns 400 - (Invalid request) if the proposed lease ID is not in the correct format. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('lease_id', lease_id) - - self._lease_container_impl(container_name, - _LeaseActions.Change, - lease_id, - None, # lease_duration - None, # lease_break_period - proposed_lease_id, - if_modified_since, - if_unmodified_since, - timeout) - - def list_blobs(self, container_name, prefix=None, num_results=None, include=None, - delimiter=None, marker=None, timeout=None): - ''' - Returns a generator to list the blobs under the specified container. - The generator will lazily follow the continuation tokens returned by - the service and stop when all blobs have been returned or num_results is reached. - - If num_results is specified and the account has more than that number of - blobs, the generator will have a populated next_marker field once it - finishes. This marker can be used to create a new generator if more - results are desired. - - :param str container_name: - Name of existing container. - :param str prefix: - Filters the results to return only blobs whose names - begin with the specified prefix. - :param int num_results: - Specifies the maximum number of blobs to return, - including all :class:`BlobPrefix` elements. If the request does not specify - num_results or specifies a value greater than 5,000, the server will - return up to 5,000 items. Setting num_results to a value less than - or equal to zero results in error response code 400 (Bad Request). - :param ~azure.storage.blob.models.Include include: - Specifies one or more additional datasets to include in the response. - :param str delimiter: - When the request includes this parameter, the operation - returns a :class:`~azure.storage.blob.models.BlobPrefix` element in the - result list that acts as a placeholder for all blobs whose names begin - with the same substring up to the appearance of the delimiter character. - The delimiter may be a single character or a string. - :param str marker: - An opaque continuation token. This value can be retrieved from the - next_marker field of a previous generator object if num_results was - specified and that generator has finished enumerating results. If - specified, this generator will begin returning results from the point - where the previous generator stopped. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - operation_context = _OperationContext(location_lock=True) - args = (container_name,) - kwargs = {'prefix': prefix, 'marker': marker, 'max_results': num_results, - 'include': include, 'delimiter': delimiter, 'timeout': timeout, - '_context': operation_context, - '_converter': _convert_xml_to_blob_list} - resp = self._list_blobs(*args, **kwargs) - - return ListGenerator(resp, self._list_blobs, args, kwargs) - - def list_blob_names(self, container_name, prefix=None, num_results=None, - include=None, delimiter=None, marker=None, - timeout=None): - ''' - Returns a generator to list the blob names under the specified container. - The generator will lazily follow the continuation tokens returned by - the service and stop when all blobs have been returned or num_results is reached. - - If num_results is specified and the account has more than that number of - blobs, the generator will have a populated next_marker field once it - finishes. This marker can be used to create a new generator if more - results are desired. - - :param str container_name: - Name of existing container. - :param str prefix: - Filters the results to return only blobs whose names - begin with the specified prefix. - :param int num_results: - Specifies the maximum number of blobs to return, - including all :class:`BlobPrefix` elements. If the request does not specify - num_results or specifies a value greater than 5,000, the server will - return up to 5,000 items. Setting num_results to a value less than - or equal to zero results in error response code 400 (Bad Request). - :param ~azure.storage.blob.models.Include include: - Specifies one or more additional datasets to include in the response. - :param str delimiter: - When the request includes this parameter, the operation - returns a :class:`~azure.storage.blob.models.BlobPrefix` element in the - result list that acts as a placeholder for all blobs whose names begin - with the same substring up to the appearance of the delimiter character. - The delimiter may be a single character or a string. - :param str marker: - An opaque continuation token. This value can be retrieved from the - next_marker field of a previous generator object if num_results was - specified and that generator has finished enumerating results. If - specified, this generator will begin returning results from the point - where the previous generator stopped. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - operation_context = _OperationContext(location_lock=True) - args = (container_name,) - kwargs = {'prefix': prefix, 'marker': marker, 'max_results': num_results, - 'include': include, 'delimiter': delimiter, 'timeout': timeout, - '_context': operation_context, - '_converter': _convert_xml_to_blob_name_list} - resp = self._list_blobs(*args, **kwargs) - - return ListGenerator(resp, self._list_blobs, args, kwargs) - - def _list_blobs(self, container_name, prefix=None, marker=None, - max_results=None, include=None, delimiter=None, timeout=None, - _context=None, _converter=None): - ''' - Returns the list of blobs under the specified container. - - :param str container_name: - Name of existing container. - :parm str prefix: - Filters the results to return only blobs whose names - begin with the specified prefix. - :param str marker: - A string value that identifies the portion of the list - to be returned with the next list operation. The operation returns - a next_marker value within the response body if the list returned was - not complete. The marker value may then be used in a subsequent - call to request the next set of list items. The marker value is - opaque to the client. - :param int max_results: - Specifies the maximum number of blobs to return, - including all :class:`~azure.storage.blob.models.BlobPrefix` elements. If the request does not specify - max_results or specifies a value greater than 5,000, the server will - return up to 5,000 items. Setting max_results to a value less than - or equal to zero results in error response code 400 (Bad Request). - :param str include: - Specifies one or more datasets to include in the - response. To specify more than one of these options on the URI, - you must separate each option with a comma. Valid values are: - snapshots: - Specifies that snapshots should be included in the - enumeration. Snapshots are listed from oldest to newest in - the response. - metadata: - Specifies that blob metadata be returned in the response. - uncommittedblobs: - Specifies that blobs for which blocks have been uploaded, - but which have not been committed using Put Block List - (REST API), be included in the response. - copy: - Version 2012-02-12 and newer. Specifies that metadata - related to any current or previous Copy Blob operation - should be included in the response. - deleted: - Version 2017-07-29 and newer. Specifies that soft deleted blobs - which are retained by the service should be included - in the response. - :param str delimiter: - When the request includes this parameter, the operation - returns a :class:`~azure.storage.blob.models.BlobPrefix` element in the response body that acts as a - placeholder for all blobs whose names begin with the same - substring up to the appearance of the delimiter character. The - delimiter may be a single character or a string. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('container_name', container_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name) - request.query = { - 'restype': 'container', - 'comp': 'list', - 'prefix': _to_str(prefix), - 'delimiter': _to_str(delimiter), - 'marker': _to_str(marker), - 'maxresults': _int_to_str(max_results), - 'include': _to_str(include), - 'timeout': _int_to_str(timeout), - } - - return self._perform_request(request, _converter, operation_context=_context) - - def get_blob_account_information(self, container_name=None, blob_name=None, timeout=None): - """ - Gets information related to the storage account. - The information can also be retrieved if the user has a SAS to a container or blob. - - :param str container_name: - Name of existing container. - Optional, unless using a SAS token to a specific container or blob, in which case it's required. - :param str blob_name: - Name of existing blob. - Optional, unless using a SAS token to a specific blob, in which case it's required. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: The :class:`~azure.storage.blob.models.AccountInformation`. - """ - request = HTTPRequest() - request.method = 'HEAD' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'restype': 'account', - 'comp': 'properties', - 'timeout': _int_to_str(timeout), - } - - return self._perform_request(request, _parse_account_information) - - def get_blob_service_stats(self, timeout=None): - ''' - Retrieves statistics related to replication for the Blob service. It is - only available when read-access geo-redundant replication is enabled for - the storage account. - - With geo-redundant replication, Azure Storage maintains your data durable - in two locations. In both locations, Azure Storage constantly maintains - multiple healthy replicas of your data. The location where you read, - create, update, or delete data is the primary storage account location. - The primary location exists in the region you choose at the time you - create an account via the Azure Management Azure classic portal, for - example, North Central US. The location to which your data is replicated - is the secondary location. The secondary location is automatically - determined based on the location of the primary; it is in a second data - center that resides in the same region as the primary location. Read-only - access is available from the secondary location, if read-access geo-redundant - replication is enabled for your storage account. - - :param int timeout: - The timeout parameter is expressed in seconds. - :return: The blob service stats. - :rtype: :class:`~azure.storage.common.models.ServiceStats` - ''' - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(primary=False, secondary=True) - request.path = _get_path() - request.query = { - 'restype': 'service', - 'comp': 'stats', - 'timeout': _int_to_str(timeout), - } - - return self._perform_request(request, _convert_xml_to_service_stats) - - def set_blob_service_properties( - self, logging=None, hour_metrics=None, minute_metrics=None, - cors=None, target_version=None, timeout=None, delete_retention_policy=None, static_website=None): - ''' - Sets the properties of a storage account's Blob service, including - Azure Storage Analytics. If an element (ex Logging) is left as None, the - existing settings on the service for that functionality are preserved. - - :param logging: - Groups the Azure Analytics Logging settings. - :type logging: - :class:`~azure.storage.common.models.Logging` - :param hour_metrics: - The hour metrics settings provide a summary of request - statistics grouped by API in hourly aggregates for blobs. - :type hour_metrics: - :class:`~azure.storage.common.models.Metrics` - :param minute_metrics: - The minute metrics settings provide request statistics - for each minute for blobs. - :type minute_metrics: - :class:`~azure.storage.common.models.Metrics` - :param cors: - You can include up to five CorsRule elements in the - list. If an empty list is specified, all CORS rules will be deleted, - and CORS will be disabled for the service. - :type cors: list(:class:`~azure.storage.common.models.CorsRule`) - :param str target_version: - Indicates the default version to use for requests if an incoming - request's version is not specified. - :param int timeout: - The timeout parameter is expressed in seconds. - :param delete_retention_policy: - The delete retention policy specifies whether to retain deleted blobs. - It also specifies the number of days and versions of blob to keep. - :type delete_retention_policy: - :class:`~azure.storage.common.models.DeleteRetentionPolicy` - :param static_website: - Specifies whether the static website feature is enabled, - and if yes, indicates the index document and 404 error document to use. - :type static_website: - :class:`~azure.storage.common.models.StaticWebsite` - ''' - if all(parameter is None for parameter in [logging, hour_metrics, minute_metrics, cors, target_version, - delete_retention_policy, static_website]): - - raise ValueError("set_blob_service_properties should be called with at least one parameter") - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path() - request.query = { - 'restype': 'service', - 'comp': 'properties', - 'timeout': _int_to_str(timeout), - } - request.body = _get_request_body( - _convert_service_properties_to_xml(logging, hour_metrics, minute_metrics, - cors, target_version, delete_retention_policy, static_website)) - - self._perform_request(request) - - def get_blob_service_properties(self, timeout=None): - ''' - Gets the properties of a storage account's Blob service, including - Azure Storage Analytics. - - :param int timeout: - The timeout parameter is expressed in seconds. - :return: The blob :class:`~azure.storage.common.models.ServiceProperties` with an attached - target_version property. - ''' - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path() - request.query = { - 'restype': 'service', - 'comp': 'properties', - 'timeout': _int_to_str(timeout), - } - - return self._perform_request(request, _convert_xml_to_service_properties) - - def get_blob_properties( - self, container_name, blob_name, snapshot=None, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Returns all user-defined metadata, standard HTTP properties, and - system properties for the blob. It does not return the content of the blob. - Returns :class:`~azure.storage.blob.models.Blob` - with :class:`~azure.storage.blob.models.BlobProperties` and a metadata dict. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: a blob object including properties and metadata. - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'HEAD' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'snapshot': _to_str(snapshot), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - - return self._perform_request(request, _parse_blob, [blob_name, snapshot]) - - def set_blob_properties( - self, container_name, blob_name, content_settings=None, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Sets system properties on the blob. If one property is set for the - content_settings, all properties will be overriden. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'properties', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - 'x-ms-lease-id': _to_str(lease_id) - } - if content_settings is not None: - request.headers.update(content_settings._to_headers()) - - return self._perform_request(request, _parse_base_properties) - - def exists(self, container_name, blob_name=None, snapshot=None, timeout=None): - ''' - Returns a boolean indicating whether the container exists (if blob_name - is None), or otherwise a boolean indicating whether the blob exists. - - :param str container_name: - Name of a container. - :param str blob_name: - Name of a blob. If None, the container will be checked for existence. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the snapshot. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: A boolean indicating whether the resource exists. - :rtype: bool - ''' - _validate_not_none('container_name', container_name) - try: - # make head request to see if container/blob/snapshot exists - request = HTTPRequest() - request.method = 'GET' if blob_name is None else 'HEAD' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'snapshot': _to_str(snapshot), - 'timeout': _int_to_str(timeout), - 'restype': 'container' if blob_name is None else None, - } - - expected_errors = [_CONTAINER_NOT_FOUND_ERROR_CODE] if blob_name is None \ - else [_CONTAINER_NOT_FOUND_ERROR_CODE, _BLOB_NOT_FOUND_ERROR_CODE] - self._perform_request(request, expected_errors=expected_errors) - - return True - except AzureHttpError as ex: - _dont_fail_not_exist(ex) - return False - - def _get_blob( - self, container_name, blob_name, snapshot=None, start_range=None, - end_range=None, validate_content=False, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None, - _context=None): - ''' - Downloads a blob's content, metadata, and properties. You can also - call this API to read a snapshot. You can specify a range if you don't - need to download the blob in its entirety. If no range is specified, - the full blob will be downloaded. - - See get_blob_to_* for high level functions that handle the download - of large blobs with automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve. - :param int start_range: - Start of byte range to use for downloading a section of the blob. - If no end_range is given, all bytes after the start_range will be downloaded. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param int end_range: - End of byte range to use for downloading a section of the blob. - If end_range is given, start_range must be provided. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param bool validate_content: - When this is set to True and specified together with the Range header, - the service returns the MD5 hash for the range, as long as the range - is less than or equal to 4 MB in size. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: A Blob with content, properties, and metadata. - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_decryption_required(self.require_encryption, - self.key_encryption_key, - self.key_resolver_function) - - start_offset, end_offset = 0, 0 - if self.key_encryption_key is not None or self.key_resolver_function is not None: - if start_range is not None: - # Align the start of the range along a 16 byte block - start_offset = start_range % 16 - start_range -= start_offset - - # Include an extra 16 bytes for the IV if necessary - # Because of the previous offsetting, start_range will always - # be a multiple of 16. - if start_range > 0: - start_offset += 16 - start_range -= 16 - - if end_range is not None: - # Align the end of the range along a 16 byte block - end_offset = 15 - (end_range % 16) - end_range += end_offset - - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'snapshot': _to_str(snapshot), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - _validate_and_format_range_headers( - request, - start_range, - end_range, - start_range_required=False, - end_range_required=False, - check_content_md5=validate_content) - - return self._perform_request(request, _parse_blob, - [blob_name, snapshot, validate_content, self.require_encryption, - self.key_encryption_key, self.key_resolver_function, - start_offset, end_offset], - operation_context=_context) - - def get_blob_to_path( - self, container_name, blob_name, file_path, open_mode='wb', - snapshot=None, start_range=None, end_range=None, - validate_content=False, progress_callback=None, - max_connections=2, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, - timeout=None): - ''' - Downloads a blob to a file path, with automatic chunking and progress - notifications. Returns an instance of :class:`~azure.storage.blob.models.Blob` with - properties and metadata. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str file_path: - Path of file to write out to. - :param str open_mode: - Mode to use when opening the file. Note that specifying append only - open_mode prevents parallel download. So, max_connections must be set - to 1 if this open_mode is used. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve. - :param int start_range: - Start of byte range to use for downloading a section of the blob. - If no end_range is given, all bytes after the start_range will be downloaded. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param int end_range: - End of byte range to use for downloading a section of the blob. - If end_range is given, start_range must be provided. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param bool validate_content: - If set to true, validates an MD5 hash for each retrieved portion of - the blob. This is primarily valuable for detecting bitflips on the wire - if using http instead of https as https (the default) will already - validate. Note that the service will only return transactional MD5s - for chunks 4MB or less so the first get request will be of size - self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If - self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be - thrown. As computing the MD5 takes processing time and more requests - will need to be done due to the reduced chunk size there may be some - increase in latency. - :param progress_callback: - Callback for progress with signature function(current, total) - where current is the number of bytes transfered so far, and total is - the size of the blob if known. - :type progress_callback: func(current, total) - :param int max_connections: - If set to 2 or greater, an initial get will be done for the first - self.MAX_SINGLE_GET_SIZE bytes of the blob. If this is the entire blob, - the method returns at this point. If it is not, it will download the - remaining data parallel using the number of threads equal to - max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE. - If set to 1, a single large get request will be done. This is not - generally recommended but available if very few threads should be - used, network requests are very expensive, or a non-seekable stream - prevents parallel download. This may also be useful if many blobs are - expected to be empty as an extra request is required for empty blobs - if max_connections is greater than 1. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: A Blob with properties and metadata. If max_connections is greater - than 1, the content_md5 (if set on the blob) will not be returned. If you - require this value, either use get_blob_properties or set max_connections - to 1. - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('file_path', file_path) - _validate_not_none('open_mode', open_mode) - - if max_connections > 1 and 'a' in open_mode: - raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE) - - with open(file_path, open_mode) as stream: - blob = self.get_blob_to_stream( - container_name, - blob_name, - stream, - snapshot, - start_range, - end_range, - validate_content, - progress_callback, - max_connections, - lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - - return blob - - def get_blob_to_stream( - self, container_name, blob_name, stream, snapshot=None, - start_range=None, end_range=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - - ''' - Downloads a blob to a stream, with automatic chunking and progress - notifications. Returns an instance of :class:`~azure.storage.blob.models.Blob` with - properties and metadata. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param io.IOBase stream: - Opened stream to write to. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve. - :param int start_range: - Start of byte range to use for downloading a section of the blob. - If no end_range is given, all bytes after the start_range will be downloaded. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param int end_range: - End of byte range to use for downloading a section of the blob. - If end_range is given, start_range must be provided. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param bool validate_content: - If set to true, validates an MD5 hash for each retrieved portion of - the blob. This is primarily valuable for detecting bitflips on the wire - if using http instead of https as https (the default) will already - validate. Note that the service will only return transactional MD5s - for chunks 4MB or less so the first get request will be of size - self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If - self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be - thrown. As computing the MD5 takes processing time and more requests - will need to be done due to the reduced chunk size there may be some - increase in latency. - :param progress_callback: - Callback for progress with signature function(current, total) - where current is the number of bytes transfered so far, and total is - the size of the blob if known. - :type progress_callback: func(current, total) - :param int max_connections: - If set to 2 or greater, an initial get will be done for the first - self.MAX_SINGLE_GET_SIZE bytes of the blob. If this is the entire blob, - the method returns at this point. If it is not, it will download the - remaining data parallel using the number of threads equal to - max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE. - If set to 1, a single large get request will be done. This is not - generally recommended but available if very few threads should be - used, network requests are very expensive, or a non-seekable stream - prevents parallel download. This may also be useful if many blobs are - expected to be empty as an extra request is required for empty blobs - if max_connections is greater than 1. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: A Blob with properties and metadata. If max_connections is greater - than 1, the content_md5 (if set on the blob) will not be returned. If you - require this value, either use get_blob_properties or set max_connections - to 1. - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('stream', stream) - - if end_range is not None: - _validate_not_none("start_range", start_range) - - # the stream must be seekable if parallel download is required - if max_connections > 1: - if sys.version_info >= (3,) and not stream.seekable(): - raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE) - - try: - stream.seek(stream.tell()) - except (NotImplementedError, AttributeError): - raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE) - - # The service only provides transactional MD5s for chunks under 4MB. - # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first - # chunk so a transactional MD5 can be retrieved. - first_get_size = self.MAX_SINGLE_GET_SIZE if not validate_content else self.MAX_CHUNK_GET_SIZE - - initial_request_start = start_range if start_range is not None else 0 - - if end_range is not None and end_range - start_range < first_get_size: - initial_request_end = end_range - else: - initial_request_end = initial_request_start + first_get_size - 1 - - # Send a context object to make sure we always retry to the initial location - operation_context = _OperationContext(location_lock=True) - try: - blob = self._get_blob(container_name, - blob_name, - snapshot, - start_range=initial_request_start, - end_range=initial_request_end, - validate_content=validate_content, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - _context=operation_context) - - # Parse the total blob size and adjust the download size if ranges - # were specified - blob_size = _parse_length_from_content_range(blob.properties.content_range) - if end_range is not None: - # Use the end_range unless it is over the end of the blob - download_size = min(blob_size, end_range - start_range + 1) - elif start_range is not None: - download_size = blob_size - start_range - else: - download_size = blob_size - except AzureHttpError as ex: - if start_range is None and ex.status_code == 416: - # Get range will fail on an empty blob. If the user did not - # request a range, do a regular get request in order to get - # any properties. - blob = self._get_blob(container_name, - blob_name, - snapshot, - validate_content=validate_content, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - _context=operation_context) - - # Set the download size to empty - download_size = 0 - else: - raise ex - - # Mark the first progress chunk. If the blob is small or this is a single - # shot download, this is the only call - if progress_callback: - progress_callback(blob.properties.content_length, download_size) - - # Write the content to the user stream - # Clear blob content since output has been written to user stream - if blob.content is not None: - stream.write(blob.content) - blob.content = None - - # If the blob is small, the download is complete at this point. - # If blob size is large, download the rest of the blob in chunks. - if blob.properties.content_length != download_size: - # Lock on the etag. This can be overriden by the user by specifying '*' - if_match = if_match if if_match is not None else blob.properties.etag - - end_blob = blob_size - if end_range is not None: - # Use the end_range unless it is over the end of the blob - end_blob = min(blob_size, end_range + 1) - - _download_blob_chunks( - self, - container_name, - blob_name, - snapshot, - download_size, - self.MAX_CHUNK_GET_SIZE, - first_get_size, - initial_request_end + 1, # start where the first download ended - end_blob, - stream, - max_connections, - progress_callback, - validate_content, - lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout, - operation_context - ) - - # Set the content length to the download size instead of the size of - # the last range - blob.properties.content_length = download_size - - # Overwrite the content range to the user requested range - blob.properties.content_range = 'bytes {0}-{1}/{2}'.format(start_range, end_range, blob_size) - - # Overwrite the content MD5 as it is the MD5 for the last range instead - # of the stored MD5 - # TODO: Set to the stored MD5 when the service returns this - blob.properties.content_md5 = None - - return blob - - def get_blob_to_bytes( - self, container_name, blob_name, snapshot=None, - start_range=None, end_range=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Downloads a blob as an array of bytes, with automatic chunking and - progress notifications. Returns an instance of :class:`~azure.storage.blob.models.Blob` with - properties, metadata, and content. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve. - :param int start_range: - Start of byte range to use for downloading a section of the blob. - If no end_range is given, all bytes after the start_range will be downloaded. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param int end_range: - End of byte range to use for downloading a section of the blob. - If end_range is given, start_range must be provided. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param bool validate_content: - If set to true, validates an MD5 hash for each retrieved portion of - the blob. This is primarily valuable for detecting bitflips on the wire - if using http instead of https as https (the default) will already - validate. Note that the service will only return transactional MD5s - for chunks 4MB or less so the first get request will be of size - self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If - self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be - thrown. As computing the MD5 takes processing time and more requests - will need to be done due to the reduced chunk size there may be some - increase in latency. - :param progress_callback: - Callback for progress with signature function(current, total) - where current is the number of bytes transfered so far, and total is - the size of the blob if known. - :type progress_callback: func(current, total) - :param int max_connections: - If set to 2 or greater, an initial get will be done for the first - self.MAX_SINGLE_GET_SIZE bytes of the blob. If this is the entire blob, - the method returns at this point. If it is not, it will download the - remaining data parallel using the number of threads equal to - max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE. - If set to 1, a single large get request will be done. This is not - generally recommended but available if very few threads should be - used, network requests are very expensive, or a non-seekable stream - prevents parallel download. This may also be useful if many blobs are - expected to be empty as an extra request is required for empty blobs - if max_connections is greater than 1. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: A Blob with properties and metadata. If max_connections is greater - than 1, the content_md5 (if set on the blob) will not be returned. If you - require this value, either use get_blob_properties or set max_connections - to 1. - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - - stream = BytesIO() - blob = self.get_blob_to_stream( - container_name, - blob_name, - stream, - snapshot, - start_range, - end_range, - validate_content, - progress_callback, - max_connections, - lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - - blob.content = stream.getvalue() - return blob - - def get_blob_to_text( - self, container_name, blob_name, encoding='utf-8', snapshot=None, - start_range=None, end_range=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Downloads a blob as unicode text, with automatic chunking and progress - notifications. Returns an instance of :class:`~azure.storage.blob.models.Blob` with - properties, metadata, and content. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str encoding: - Python encoding to use when decoding the blob data. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve. - :param int start_range: - Start of byte range to use for downloading a section of the blob. - If no end_range is given, all bytes after the start_range will be downloaded. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param int end_range: - End of byte range to use for downloading a section of the blob. - If end_range is given, start_range must be provided. - The start_range and end_range params are inclusive. - Ex: start_range=0, end_range=511 will download first 512 bytes of blob. - :param bool validate_content: - If set to true, validates an MD5 hash for each retrieved portion of - the blob. This is primarily valuable for detecting bitflips on the wire - if using http instead of https as https (the default) will already - validate. Note that the service will only return transactional MD5s - for chunks 4MB or less so the first get request will be of size - self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If - self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be - thrown. As computing the MD5 takes processing time and more requests - will need to be done due to the reduced chunk size there may be some - increase in latency. - :param progress_callback: - Callback for progress with signature function(current, total) - where current is the number of bytes transfered so far, and total is - the size of the blob if known. - :type progress_callback: func(current, total) - :param int max_connections: - If set to 2 or greater, an initial get will be done for the first - self.MAX_SINGLE_GET_SIZE bytes of the blob. If this is the entire blob, - the method returns at this point. If it is not, it will download the - remaining data parallel using the number of threads equal to - max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE. - If set to 1, a single large get request will be done. This is not - generally recommended but available if very few threads should be - used, network requests are very expensive, or a non-seekable stream - prevents parallel download. This may also be useful if many blobs are - expected to be empty as an extra request is required for empty blobs - if max_connections is greater than 1. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: A Blob with properties and metadata. If max_connections is greater - than 1, the content_md5 (if set on the blob) will not be returned. If you - require this value, either use get_blob_properties or set max_connections - to 1. - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('encoding', encoding) - - blob = self.get_blob_to_bytes(container_name, - blob_name, - snapshot, - start_range, - end_range, - validate_content, - progress_callback, - max_connections, - lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - blob.content = blob.content.decode(encoding) - return blob - - def get_blob_metadata( - self, container_name, blob_name, snapshot=None, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Returns all user-defined metadata for the specified blob or snapshot. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - The snapshot parameter is an opaque value that, - when present, specifies the blob snapshot to retrieve. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: - A dictionary representing the blob metadata name, value pairs. - :rtype: dict(str, str) - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'snapshot': _to_str(snapshot), - 'comp': 'metadata', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - - return self._perform_request(request, _parse_metadata) - - def set_blob_metadata(self, container_name, blob_name, - metadata=None, lease_id=None, - if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - ''' - Sets user-defined metadata for the specified blob as one or more - name-value pairs. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param metadata: - Dict containing name and value pairs. Each call to this operation - replaces all existing metadata attached to the blob. To remove all - metadata from the blob, call this operation with no metadata headers. - :type metadata: dict(str, str) - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'metadata', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - 'x-ms-lease-id': _to_str(lease_id), - } - _add_metadata_headers(metadata, request) - - return self._perform_request(request, _parse_base_properties) - - def _lease_blob_impl(self, container_name, blob_name, - lease_action, lease_id, - lease_duration, lease_break_period, - proposed_lease_id, if_modified_since, - if_unmodified_since, if_match, if_none_match, timeout=None): - ''' - Establishes and manages a lease on a blob for write and delete operations. - The Lease Blob operation can be called in one of five modes: - Acquire, to request a new lease. - Renew, to renew an existing lease. - Change, to change the ID of an existing lease. - Release, to free the lease if it is no longer needed so that another - client may immediately acquire a lease against the blob. - Break, to end the lease but ensure that another client cannot acquire - a new lease until the current lease period has expired. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str lease_action: - Possible _LeaseActions acquire|renew|release|break|change - :param str lease_id: - Required if the blob has an active lease. - :param int lease_duration: - Specifies the duration of the lease, in seconds, or negative one - (-1) for a lease that never expires. A non-infinite lease can be - between 15 and 60 seconds. A lease duration cannot be changed - using renew or change. - :param int lease_break_period: - For a break operation, this is the proposed duration of - seconds that the lease should continue before it is broken, between - 0 and 60 seconds. This break period is only used if it is shorter - than the time remaining on the lease. If longer, the time remaining - on the lease is used. A new lease will not be available before the - break period has expired, but the lease may be held for longer than - the break period. If this header does not appear with a break - operation, a fixed-duration lease breaks after the remaining lease - period elapses, and an infinite lease breaks immediately. - :param str proposed_lease_id: - Optional for acquire, required for change. Proposed lease ID, in a - GUID string format. The Blob service returns 400 (Invalid request) - if the proposed lease ID is not in the correct format. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: - Response headers returned from the service call. - :rtype: dict(str, str) - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('lease_action', lease_action) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'lease', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-lease-action': _to_str(lease_action), - 'x-ms-lease-duration': _to_str(lease_duration), - 'x-ms-lease-break-period': _to_str(lease_break_period), - 'x-ms-proposed-lease-id': _to_str(proposed_lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - - return self._perform_request(request, _parse_lease) - - def acquire_blob_lease(self, container_name, blob_name, - lease_duration=-1, - proposed_lease_id=None, - if_modified_since=None, - if_unmodified_since=None, - if_match=None, - if_none_match=None, timeout=None): - ''' - Requests a new lease. If the blob does not have an active lease, the Blob - service creates a lease on the blob and returns a new lease ID. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param int lease_duration: - Specifies the duration of the lease, in seconds, or negative one - (-1) for a lease that never expires. A non-infinite lease can be - between 15 and 60 seconds. A lease duration cannot be changed - using renew or change. Default is -1 (infinite lease). - :param str proposed_lease_id: - Proposed lease ID, in a GUID string format. The Blob service - returns 400 (Invalid request) if the proposed lease ID is not - in the correct format. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: the lease ID of the newly created lease. - :return: str - ''' - _validate_not_none('lease_duration', lease_duration) - - if lease_duration is not -1 and \ - (lease_duration < 15 or lease_duration > 60): - raise ValueError(_ERROR_INVALID_LEASE_DURATION) - lease = self._lease_blob_impl(container_name, - blob_name, - _LeaseActions.Acquire, - None, # lease_id - lease_duration, - None, # lease_break_period - proposed_lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - return lease['id'] - - def renew_blob_lease(self, container_name, blob_name, - lease_id, if_modified_since=None, - if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Renews the lease. The lease can be renewed if the lease ID specified on - the request matches that associated with the blob. Note that the lease may - be renewed even if it has expired as long as the blob has not been modified - or leased again since the expiration of that lease. When you renew a lease, - the lease duration clock resets. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str lease_id: - Lease ID for active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: the lease ID of the renewed lease. - :return: str - ''' - _validate_not_none('lease_id', lease_id) - - lease = self._lease_blob_impl(container_name, - blob_name, - _LeaseActions.Renew, - lease_id, - None, # lease_duration - None, # lease_break_period - None, # proposed_lease_id - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - return lease['id'] - - def release_blob_lease(self, container_name, blob_name, - lease_id, if_modified_since=None, - if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Releases the lease. The lease may be released if the lease ID specified on the - request matches that associated with the blob. Releasing the lease allows another - client to immediately acquire the lease for the blob as soon as the release is complete. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str lease_id: - Lease ID for active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('lease_id', lease_id) - - self._lease_blob_impl(container_name, - blob_name, - _LeaseActions.Release, - lease_id, - None, # lease_duration - None, # lease_break_period - None, # proposed_lease_id - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - - def break_blob_lease(self, container_name, blob_name, - lease_break_period=None, - if_modified_since=None, - if_unmodified_since=None, - if_match=None, - if_none_match=None, timeout=None): - ''' - Breaks the lease, if the blob has an active lease. Once a lease is broken, - it cannot be renewed. Any authorized request can break the lease; the request - is not required to specify a matching lease ID. When a lease is broken, - the lease break period is allowed to elapse, during which time no lease operation - except break and release can be performed on the blob. When a lease is successfully - broken, the response indicates the interval in seconds until a new lease can be acquired. - - A lease that has been broken can also be released, in which case another client may - immediately acquire the lease on the blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param int lease_break_period: - For a break operation, this is the proposed duration of - seconds that the lease should continue before it is broken, between - 0 and 60 seconds. This break period is only used if it is shorter - than the time remaining on the lease. If longer, the time remaining - on the lease is used. A new lease will not be available before the - break period has expired, but the lease may be held for longer than - the break period. If this header does not appear with a break - operation, a fixed-duration lease breaks after the remaining lease - period elapses, and an infinite lease breaks immediately. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: Approximate time remaining in the lease period, in seconds. - :return: int - ''' - if (lease_break_period is not None) and (lease_break_period < 0 or lease_break_period > 60): - raise ValueError(_ERROR_INVALID_LEASE_BREAK_PERIOD) - - lease = self._lease_blob_impl(container_name, - blob_name, - _LeaseActions.Break, - None, # lease_id - None, # lease_duration - lease_break_period, - None, # proposed_lease_id - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - return lease['time'] - - def change_blob_lease(self, container_name, blob_name, - lease_id, - proposed_lease_id, - if_modified_since=None, - if_unmodified_since=None, - if_match=None, - if_none_match=None, timeout=None): - ''' - Changes the lease ID of an active lease. A change must include the current - lease ID and a new lease ID. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str lease_id: - Required if the blob has an active lease. - :param str proposed_lease_id: - Proposed lease ID, in a GUID string format. The Blob service returns - 400 (Invalid request) if the proposed lease ID is not in the correct format. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - self._lease_blob_impl(container_name, - blob_name, - _LeaseActions.Change, - lease_id, - None, # lease_duration - None, # lease_break_period - proposed_lease_id, - if_modified_since, - if_unmodified_since, - if_match, - if_none_match, - timeout) - - def snapshot_blob(self, container_name, blob_name, - metadata=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, - if_none_match=None, lease_id=None, timeout=None): - ''' - Creates a read-only snapshot of a blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param metadata: - Specifies a user-defined name-value pair associated with the blob. - If no name-value pairs are specified, the operation will copy the - base blob metadata to the snapshot. If one or more name-value pairs - are specified, the snapshot is created with the specified metadata, - and metadata is not copied from the base blob. - :type metadata: dict(str, str) - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: snapshot properties - :rtype: :class:`~azure.storage.blob.models.Blob` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'snapshot', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - 'x-ms-lease-id': _to_str(lease_id) - } - _add_metadata_headers(metadata, request) - - return self._perform_request(request, _parse_snapshot_blob, [blob_name]) - - def copy_blob(self, container_name, blob_name, copy_source, - metadata=None, - source_if_modified_since=None, - source_if_unmodified_since=None, - source_if_match=None, source_if_none_match=None, - destination_if_modified_since=None, - destination_if_unmodified_since=None, - destination_if_match=None, - destination_if_none_match=None, - destination_lease_id=None, - source_lease_id=None, timeout=None): - ''' - Copies a blob asynchronously. This operation returns a copy operation - properties object, including a copy ID you can use to check or abort the - copy operation. The Blob service copies blobs on a best-effort basis. - - The source blob for a copy operation may be a block blob, an append blob, - or a page blob. If the destination blob already exists, it must be of the - same blob type as the source blob. Any existing destination blob will be - overwritten. The destination blob cannot be modified while a copy operation - is in progress. - - When copying from a page blob, the Blob service creates a destination page - blob of the source blob's length, initially containing all zeroes. Then - the source page ranges are enumerated, and non-empty ranges are copied. - - For a block blob or an append blob, the Blob service creates a committed - blob of zero length before returning from this operation. When copying - from a block blob, all committed blocks and their block IDs are copied. - Uncommitted blocks are not copied. At the end of the copy operation, the - destination blob will have the same committed block count as the source. - - When copying from an append blob, all committed blocks are copied. At the - end of the copy operation, the destination blob will have the same committed - block count as the source. - - For all blob types, you can call get_blob_properties on the destination - blob to check the status of the copy operation. The final blob will be - committed when the copy completes. - - :param str container_name: - Name of the destination container. The container must exist. - :param str blob_name: - Name of the destination blob. If the destination blob exists, it will - be overwritten. Otherwise, it will be created. - :param str copy_source: - A URL of up to 2 KB in length that specifies an Azure file or blob. - The value should be URL-encoded as it would appear in a request URI. - If the source is in another account, the source must either be public - or must be authenticated via a shared access signature. If the source - is public, no authentication is required. - Examples: - https://myaccount.blob.core.windows.net/mycontainer/myblob - https://myaccount.blob.core.windows.net/mycontainer/myblob?snapshot= - https://otheraccount.blob.core.windows.net/mycontainer/myblob?sastoken - :param metadata: - Name-value pairs associated with the blob as metadata. If no name-value - pairs are specified, the operation will copy the metadata from the - source blob or file to the destination blob. If one or more name-value - pairs are specified, the destination blob is created with the specified - metadata, and metadata is not copied from the source blob or file. - :type metadata: dict(str, str) - :param datetime source_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source - blob has been modified since the specified date/time. - :param datetime source_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source blob - has not been modified since the specified date/time. - :param ETag source_if_match: - An ETag value, or the wildcard character (*). Specify this conditional - header to copy the source blob only if its ETag matches the value - specified. If the ETag values do not match, the Blob service returns - status code 412 (Precondition Failed). This header cannot be specified - if the source is an Azure File. - :param ETag source_if_none_match: - An ETag value, or the wildcard character (*). Specify this conditional - header to copy the blob only if its ETag does not match the value - specified. If the values are identical, the Blob service returns status - code 412 (Precondition Failed). This header cannot be specified if the - source is an Azure File. - :param datetime destination_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has been modified since the specified date/time. - If the destination blob has not been modified, the Blob service returns - status code 412 (Precondition Failed). - :param datetime destination_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has not been modified since the specified - date/time. If the destination blob has been modified, the Blob service - returns status code 412 (Precondition Failed). - :param ETag destination_if_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - matches the ETag value for an existing destination blob. If the ETag for - the destination blob does not match the ETag specified for If-Match, the - Blob service returns status code 412 (Precondition Failed). - :param ETag destination_if_none_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - does not match the ETag value for the destination blob. Specify the wildcard - character (*) to perform the operation only if the destination blob does not - exist. If the specified condition isn't met, the Blob service returns status - code 412 (Precondition Failed). - :param str destination_lease_id: - The lease ID specified for this header must match the lease ID of the - destination blob. If the request does not include the lease ID or it is not - valid, the operation fails with status code 412 (Precondition Failed). - :param str source_lease_id: - Specify this to perform the Copy Blob operation only if - the lease ID given matches the active lease ID of the source blob. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: Copy operation properties such as status, source, and ID. - :rtype: :class:`~azure.storage.blob.models.CopyProperties` - ''' - return self._copy_blob(container_name, blob_name, copy_source, - metadata, - None, - source_if_modified_since, source_if_unmodified_since, - source_if_match, source_if_none_match, - destination_if_modified_since, - destination_if_unmodified_since, - destination_if_match, - destination_if_none_match, - destination_lease_id, - source_lease_id, timeout, - False, False) - - def _copy_blob(self, container_name, blob_name, copy_source, - metadata=None, - premium_page_blob_tier=None, - source_if_modified_since=None, - source_if_unmodified_since=None, - source_if_match=None, source_if_none_match=None, - destination_if_modified_since=None, - destination_if_unmodified_since=None, - destination_if_match=None, - destination_if_none_match=None, - destination_lease_id=None, - source_lease_id=None, timeout=None, - incremental_copy=False, - requires_sync=None): - ''' - See copy_blob for more details. This helper method - allows for standard copies as well as incremental copies which are only supported for page blobs and sync - copies which are only supported for block blobs. - :param bool incremental_copy: - Performs an incremental copy operation on a page blob instead of a standard copy operation. - :param bool requires_sync: - Enforces that the service will not return a response until the copy is complete. - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('copy_source', copy_source) - - if copy_source.startswith('/'): - # Backwards compatibility for earlier versions of the SDK where - # the copy source can be in the following formats: - # - Blob in named container: - # /accountName/containerName/blobName - # - Snapshot in named container: - # /accountName/containerName/blobName?snapshot= - # - Blob in root container: - # /accountName/blobName - # - Snapshot in root container: - # /accountName/blobName?snapshot= - account, _, source = \ - copy_source.partition('/')[2].partition('/') - copy_source = self.protocol + '://' + \ - self.primary_endpoint + '/' + source - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - - if incremental_copy: - request.query = { - 'comp': 'incrementalcopy', - 'timeout': _int_to_str(timeout), - } - else: - request.query = {'timeout': _int_to_str(timeout)} - - request.headers = { - 'x-ms-copy-source': _to_str(copy_source), - 'x-ms-source-if-modified-since': _to_str(source_if_modified_since), - 'x-ms-source-if-unmodified-since': _to_str(source_if_unmodified_since), - 'x-ms-source-if-match': _to_str(source_if_match), - 'x-ms-source-if-none-match': _to_str(source_if_none_match), - 'If-Modified-Since': _datetime_to_utc_string(destination_if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(destination_if_unmodified_since), - 'If-Match': _to_str(destination_if_match), - 'If-None-Match': _to_str(destination_if_none_match), - 'x-ms-lease-id': _to_str(destination_lease_id), - 'x-ms-source-lease-id': _to_str(source_lease_id), - 'x-ms-access-tier': _to_str(premium_page_blob_tier), - 'x-ms-requires-sync': _to_str(requires_sync) - } - - _add_metadata_headers(metadata, request) - - return self._perform_request(request, _parse_properties, [BlobProperties]).copy - - def abort_copy_blob(self, container_name, blob_name, copy_id, - lease_id=None, timeout=None): - ''' - Aborts a pending copy_blob operation, and leaves a destination blob - with zero length and full metadata. - - :param str container_name: - Name of destination container. - :param str blob_name: - Name of destination blob. - :param str copy_id: - Copy identifier provided in the copy.id of the original - copy_blob operation. - :param str lease_id: - Required if the destination blob has an active infinite lease. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('copy_id', copy_id) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'copy', - 'copyid': _to_str(copy_id), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-copy-action': 'abort', - } - - self._perform_request(request) - - def delete_blob(self, container_name, blob_name, snapshot=None, - lease_id=None, delete_snapshots=None, - if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - ''' - Marks the specified blob or snapshot for deletion. - The blob is later deleted during garbage collection. - - Note that in order to delete a blob, you must delete all of its - snapshots. You can delete both at the same time with the Delete - Blob operation. - - If a delete retention policy is enabled for the service, then this operation soft deletes the blob or snapshot - and retains the blob or snapshot for specified number of days. - After specified number of days, blob's data is removed from the service during garbage collection. - Soft deleted blob or snapshot is accessible through List Blobs API specifying include=Include.Deleted option. - Soft-deleted blob or snapshot can be restored using Undelete API. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to delete. - :param str lease_id: - Required if the blob has an active lease. - :param ~azure.storage.blob.models.DeleteSnapshot delete_snapshots: - Required if the blob has associated snapshots. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'DELETE' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-delete-snapshots': _to_str(delete_snapshots), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - request.query = { - 'snapshot': _to_str(snapshot), - 'timeout': _int_to_str(timeout) - } - - self._perform_request(request) - - def undelete_blob(self, container_name, blob_name, timeout=None): - ''' - The undelete Blob operation restores the contents and metadata of soft deleted blob or snapshot. - Attempting to undelete a blob or snapshot that is not soft deleted will succeed without any changes. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'undelete', - 'timeout': _int_to_str(timeout) - } - - self._perform_request(request) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/blockblobservice.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/blockblobservice.py deleted file mode 100644 index 26900d3c6149..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/blockblobservice.py +++ /dev/null @@ -1,1199 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from io import ( - BytesIO -) -from os import ( - path, -) - -from ..common._common_conversion import ( - _encode_base64, - _to_str, - _int_to_str, - _datetime_to_utc_string, - _get_content_md5, -) -from ..common._constants import ( - SERVICE_HOST_BASE, - DEFAULT_PROTOCOL, -) -from ..common._error import ( - _validate_not_none, - _validate_type_bytes, - _validate_encryption_required, - _validate_encryption_unsupported, - _ERROR_VALUE_NEGATIVE, - _ERROR_VALUE_SHOULD_BE_STREAM -) -from ..common._http import HTTPRequest -from ..common._serialization import ( - _get_request_body, - _get_data_bytes_only, - _get_data_bytes_or_stream_only, - _add_metadata_headers, -) -from ..common._serialization import ( - _len_plus -) -from ._deserialization import ( - _convert_xml_to_block_list, - _parse_base_properties, -) -from ._encryption import ( - _encrypt_blob, - _generate_blob_encryption_data, -) -from ._serialization import ( - _convert_block_list_to_xml, - _get_path, - _validate_and_format_range_headers, -) -from ._upload_chunking import ( - _BlockBlobChunkUploader, - _upload_blob_chunks, - _upload_blob_substream_blocks, -) -from .baseblobservice import BaseBlobService -from .models import ( - _BlobTypes, -) - - -class BlockBlobService(BaseBlobService): - ''' - Block blobs let you upload large blobs efficiently. Block blobs are comprised - of blocks, each of which is identified by a block ID. You create or modify a - block blob by writing a set of blocks and committing them by their block IDs. - Each block can be a different size, up to a maximum of 100 MB, and a block blob - can include up to 50,000 blocks. The maximum size of a block blob is therefore - approximately 4.75 TB (100 MB X 50,000 blocks). If you are writing a block - blob that is no more than 64 MB in size, you can upload it in its entirety with - a single write operation; see create_blob_from_bytes. - - :ivar int MAX_SINGLE_PUT_SIZE: - The largest size upload supported in a single put call. This is used by - the create_blob_from_* methods if the content length is known and is less - than this value. - :ivar int MAX_BLOCK_SIZE: - The size of the blocks put by create_blob_from_* methods if the content - length is unknown or is larger than MAX_SINGLE_PUT_SIZE. Smaller blocks - may be put. The maximum block size the service supports is 100MB. - :ivar int MIN_LARGE_BLOCK_UPLOAD_THRESHOLD: - The minimum block size at which the the memory-optimized, block upload - algorithm is considered. This algorithm is only applicable to the create_blob_from_file and - create_blob_from_stream methods and will prevent the full buffering of blocks. - In addition to the block size, ContentMD5 validation and Encryption must be disabled as - these options require the blocks to be buffered. - ''' - - MAX_SINGLE_PUT_SIZE = 64 * 1024 * 1024 - MAX_BLOCK_SIZE = 4 * 1024 * 1024 - MIN_LARGE_BLOCK_UPLOAD_THRESHOLD = 4 * 1024 * 1024 + 1 - - def __init__(self, account_name=None, account_key=None, sas_token=None, is_emulated=False, - protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, custom_domain=None, - request_session=None, connection_string=None, socket_timeout=None, token_credential=None): - ''' - :param str account_name: - The storage account name. This is used to authenticate requests - signed with an account key and to construct the storage endpoint. It - is required unless a connection string is given, or if a custom - domain is used with anonymous authentication. - :param str account_key: - The storage account key. This is used for shared key authentication. - If neither account key or sas token is specified, anonymous access - will be used. - :param str sas_token: - A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both - specified, account key will be used to sign. If neither are - specified, anonymous access will be used. - :param bool is_emulated: - Whether to use the emulator. Defaults to False. If specified, will - override all other parameters besides connection string and request - session. - :param str protocol: - The protocol to use for requests. Defaults to https. - :param str endpoint_suffix: - The host base component of the url, minus the account name. Defaults - to Azure (core.windows.net). Override this to use the China cloud - (core.chinacloudapi.cn). - :param str custom_domain: - The custom domain to use. This can be set in the Azure Portal. For - example, 'www.mydomain.com'. - :param requests.Session request_session: - The session object to use for http requests. - :param str connection_string: - If specified, this will override all other parameters besides - request session. See - http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ - for the connection string format. - :param int socket_timeout: - If specified, this will override the default socket timeout. The timeout specified is in seconds. - See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. - :param token_credential: - A token credential used to authenticate HTTPS requests. The token value - should be updated before its expiration. - :type `~azure.storage.common.TokenCredential` - ''' - self.blob_type = _BlobTypes.BlockBlob - super(BlockBlobService, self).__init__( - account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix, - custom_domain, request_session, connection_string, socket_timeout, token_credential) - - def put_block(self, container_name, blob_name, block, block_id, - validate_content=False, lease_id=None, timeout=None): - ''' - Creates a new block to be committed as part of a blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob. - :param block: Content of the block. - :type block: io.IOBase or bytes - Content of the block. - :param str block_id: - A valid Base64 string value that identifies the block. Prior to - encoding, the string must be less than or equal to 64 bytes in size. - For a given blob, the length of the value specified for the blockid - parameter must be the same size for each block. Note that the Base64 - string must be URL-encoded. - :param bool validate_content: - If true, calculates an MD5 hash of the block content. The storage - service checks the hash of the content that has arrived - with the hash that was sent. This is primarily valuable for detecting - bitflips on the wire if using http instead of https as https (the default) - will already validate. Note that this MD5 hash is not stored with the - blob. - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. - ''' - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - self._put_block( - container_name, - blob_name, - block, - block_id, - validate_content=validate_content, - lease_id=lease_id, - timeout=timeout - ) - - def put_block_list( - self, container_name, blob_name, block_list, content_settings=None, - metadata=None, validate_content=False, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, - timeout=None): - ''' - Writes a blob by specifying the list of block IDs that make up the blob. - In order to be written as part of a blob, a block must have been - successfully written to the server in a prior Put Block operation. - - You can call Put Block List to update a blob by uploading only those - blocks that have changed, then committing the new and existing blocks - together. You can do this by specifying whether to commit a block from - the committed block list or from the uncommitted block list, or to commit - the most recently uploaded version of the block, whichever list it may - belong to. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param block_list: - A list of :class:`~azure.storeage.blob.models.BlobBlock` containing the block ids and block state. - :type block_list: list(:class:`~azure.storage.blob.models.BlobBlock`) - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set properties on the blob. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash of the block list content. The storage - service checks the hash of the block list content that has arrived - with the hash that was sent. This is primarily valuable for detecting - bitflips on the wire if using http instead of https as https (the default) - will already validate. Note that this check is associated with - the block list content, and not with the content of the blob itself. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Block Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - return self._put_block_list( - container_name, - blob_name, - block_list, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout - ) - - def get_block_list(self, container_name, blob_name, snapshot=None, - block_list_type=None, lease_id=None, timeout=None): - ''' - Retrieves the list of blocks that have been uploaded as part of a - block blob. There are two block lists maintained for a blob: - Committed Block List: - The list of blocks that have been successfully committed to a - given blob with Put Block List. - Uncommitted Block List: - The list of blocks that have been uploaded for a blob using - Put Block, but that have not yet been committed. These blocks - are stored in Azure in association with a blob, but do not yet - form part of the blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - Datetime to determine the time to retrieve the blocks. - :param str block_list_type: - Specifies whether to return the list of committed blocks, the list - of uncommitted blocks, or both lists together. Valid values are: - committed, uncommitted, or all. - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: list committed and/or uncommitted blocks for Block Blob - :rtype: :class:`~azure.storage.blob.models.BlobBlockList` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'blocklist', - 'snapshot': _to_str(snapshot), - 'blocklisttype': _to_str(block_list_type), - 'timeout': _int_to_str(timeout), - } - request.headers = {'x-ms-lease-id': _to_str(lease_id)} - - return self._perform_request(request, _convert_xml_to_block_list) - - def put_block_from_url(self, container_name, blob_name, copy_source_url, block_id, - source_range_start=None, source_range_end=None, - source_content_md5=None, lease_id=None, timeout=None): - """ - Creates a new block to be committed as part of a blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob. - :param str copy_source_url: - The URL of the source data. It can point to any Azure Blob or File, that is either public or has a - shared access signature attached. - :param int source_range_start: - This indicates the start of the range of bytes(inclusive) that has to be taken from the copy source. - :param int source_range_end: - This indicates the end of the range of bytes(inclusive) that has to be taken from the copy source. - :param str block_id: - A valid Base64 string value that identifies the block. Prior to - encoding, the string must be less than or equal to 64 bytes in size. - For a given blob, the length of the value specified for the blockid - parameter must be the same size for each block. Note that the Base64 - string must be URL-encoded. - :param str source_content_md5: - If given, the service will calculate the MD5 hash of the block content and compare against this value. - :param str lease_id: - Required if the blob has an active lease. - :param int timeout: - The timeout parameter is expressed in seconds. - """ - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('copy_source_url', copy_source_url) - _validate_not_none('block_id', block_id) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'block', - 'blockid': _encode_base64(_to_str(block_id)), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-copy-source': copy_source_url, - 'x-ms-source-content-md5': source_content_md5, - } - _validate_and_format_range_headers( - request, - source_range_start, - source_range_end, - start_range_required=False, - end_range_required=False, - range_header_name="x-ms-source-range" - ) - - self._perform_request(request) - - # ----Convenience APIs----------------------------------------------------- - - def create_blob_from_path( - self, container_name, blob_name, file_path, content_settings=None, - metadata=None, validate_content=False, progress_callback=None, - max_connections=2, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None): - ''' - Creates a new blob from a file path, or updates the content of an - existing blob, with automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param str file_path: - Path of the file to upload as the blob content. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. Also note that if enabled, the memory-efficient upload algorithm - will not be used, because computing the MD5 hash requires buffering - entire blocks, and doing so defeats the purpose of the memory-efficient algorithm. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use when the blob size exceeds - 64MB. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: ETag and last modified properties for the Block Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('file_path', file_path) - - count = path.getsize(file_path) - with open(file_path, 'rb') as stream: - return self.create_blob_from_stream( - container_name=container_name, - blob_name=blob_name, - stream=stream, - count=count, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - lease_id=lease_id, - progress_callback=progress_callback, - max_connections=max_connections, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout) - - def create_blob_from_stream( - self, container_name, blob_name, stream, count=None, - content_settings=None, metadata=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None, use_byte_buffer=False): - ''' - Creates a new blob from a file/stream, or updates the content of - an existing blob, with automatic chunking and progress - notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param io.IOBase stream: - Opened file/stream to upload as the blob content. - :param int count: - Number of bytes to read from the stream. This is optional, but - should be supplied for optimal performance. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. Also note that if enabled, the memory-efficient upload algorithm - will not be used, because computing the MD5 hash requires buffering - entire blocks, and doing so defeats the purpose of the memory-efficient algorithm. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use when the blob size exceeds - 64MB. Note that parallel upload requires the stream to be seekable. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param bool use_byte_buffer: - If True, this will force usage of the original full block buffering upload path. - By default, this value is False and will employ a memory-efficient, - streaming upload algorithm under the following conditions: - The provided stream is seekable, 'require_encryption' is False, and - MAX_BLOCK_SIZE >= MIN_LARGE_BLOCK_UPLOAD_THRESHOLD. - One should consider the drawbacks of using this approach. In order to achieve - memory-efficiency, a IOBase stream or file-like object is segmented into logical blocks - using a SubStream wrapper. In order to read the correct data, each SubStream must acquire - a lock so that it can safely seek to the right position on the shared, underlying stream. - If max_connections > 1, the concurrency will result in a considerable amount of seeking on - the underlying stream. For the most common inputs such as a file-like stream object, seeking - is an inexpensive operation and this is not much of a concern. However, for other variants of streams - this may not be the case. The trade-off for memory-efficiency must be weighed against the cost of seeking - with your input stream. - The SubStream class will attempt to buffer up to 4 MB internally to reduce the amount of - seek and read calls to the underlying stream. This is particularly beneficial when uploading larger blocks. - :return: ETag and last modified properties for the Block Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('stream', stream) - _validate_encryption_required(self.require_encryption, self.key_encryption_key) - - # Adjust count to include padding if we are expected to encrypt. - adjusted_count = count - if (self.key_encryption_key is not None) and (adjusted_count is not None): - adjusted_count += (16 - (count % 16)) - - # Do single put if the size is smaller than MAX_SINGLE_PUT_SIZE - if adjusted_count is not None and (adjusted_count < self.MAX_SINGLE_PUT_SIZE): - if progress_callback: - progress_callback(0, count) - - data = stream.read(count) - resp = self._put_blob( - container_name=container_name, - blob_name=blob_name, - blob=data, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout) - - if progress_callback: - progress_callback(count, count) - - return resp - else: # Size is larger than MAX_SINGLE_PUT_SIZE, must upload with multiple put_block calls - cek, iv, encryption_data = None, None, None - - use_original_upload_path = use_byte_buffer or validate_content or self.require_encryption or \ - self.MAX_BLOCK_SIZE < self.MIN_LARGE_BLOCK_UPLOAD_THRESHOLD or \ - hasattr(stream, 'seekable') and not stream.seekable() or \ - not hasattr(stream, 'seek') or not hasattr(stream, 'tell') - - if use_original_upload_path: - if self.key_encryption_key: - cek, iv, encryption_data = _generate_blob_encryption_data(self.key_encryption_key) - - block_ids = _upload_blob_chunks( - blob_service=self, - container_name=container_name, - blob_name=blob_name, - blob_size=count, - block_size=self.MAX_BLOCK_SIZE, - stream=stream, - max_connections=max_connections, - progress_callback=progress_callback, - validate_content=validate_content, - lease_id=lease_id, - uploader_class=_BlockBlobChunkUploader, - timeout=timeout, - content_encryption_key=cek, - initialization_vector=iv - ) - else: - block_ids = _upload_blob_substream_blocks( - blob_service=self, - container_name=container_name, - blob_name=blob_name, - blob_size=count, - block_size=self.MAX_BLOCK_SIZE, - stream=stream, - max_connections=max_connections, - progress_callback=progress_callback, - validate_content=validate_content, - lease_id=lease_id, - uploader_class=_BlockBlobChunkUploader, - timeout=timeout, - ) - - return self._put_block_list( - container_name=container_name, - blob_name=blob_name, - block_list=block_ids, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - encryption_data=encryption_data - ) - - def create_blob_from_bytes( - self, container_name, blob_name, blob, index=0, count=None, - content_settings=None, metadata=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Creates a new blob from an array of bytes, or updates the content - of an existing blob, with automatic chunking and progress - notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param bytes blob: - Content of blob as an array of bytes. - :param int index: - Start index in the array of bytes. - :param int count: - Number of bytes to upload. Set to None or negative value to upload - all bytes starting from index. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use when the blob size exceeds - 64MB. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: ETag and last modified properties for the Block Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('blob', blob) - _validate_not_none('index', index) - _validate_type_bytes('blob', blob) - - if index < 0: - raise IndexError(_ERROR_VALUE_NEGATIVE.format('index')) - - if count is None or count < 0: - count = len(blob) - index - - stream = BytesIO(blob) - stream.seek(index) - - return self.create_blob_from_stream( - container_name=container_name, - blob_name=blob_name, - stream=stream, - count=count, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - progress_callback=progress_callback, - max_connections=max_connections, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - use_byte_buffer=True - ) - - def create_blob_from_text( - self, container_name, blob_name, text, encoding='utf-8', - content_settings=None, metadata=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None): - ''' - Creates a new blob from str/unicode, or updates the content of an - existing blob, with automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param str text: - Text to upload to the blob. - :param str encoding: - Python encoding to use to convert the text to bytes. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use when the blob size exceeds - 64MB. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :return: ETag and last modified properties for the Block Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('text', text) - - if not isinstance(text, bytes): - _validate_not_none('encoding', encoding) - text = text.encode(encoding) - - return self.create_blob_from_bytes( - container_name=container_name, - blob_name=blob_name, - blob=text, - index=0, - count=len(text), - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - lease_id=lease_id, - progress_callback=progress_callback, - max_connections=max_connections, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout) - - def set_standard_blob_tier( - self, container_name, blob_name, standard_blob_tier, timeout=None): - ''' - Sets the block blob tiers on the blob. This API is only supported for block blobs on standard storage accounts. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to update. - :param StandardBlobTier standard_blob_tier: - A standard blob tier value to set the blob to. For this version of the library, - this is only applicable to block blobs on standard storage accounts. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('standard_blob_tier', standard_blob_tier) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'tier', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-access-tier': _to_str(standard_blob_tier) - } - - self._perform_request(request) - - def copy_blob(self, container_name, blob_name, copy_source, - metadata=None, source_if_modified_since=None, - source_if_unmodified_since=None, source_if_match=None, - source_if_none_match=None, destination_if_modified_since=None, - destination_if_unmodified_since=None, destination_if_match=None, - destination_if_none_match=None, destination_lease_id=None, - source_lease_id=None, timeout=None, requires_sync=None): - - ''' - Copies a blob. This operation returns a copy operation - properties object. The copy operation may be configured to either be an - asynchronous, best-effort operation, or a synchronous operation. - - The source must be a block blob if requires_sync is true. Any existing - destination blob will be overwritten. The destination blob cannot be - modified while a copy operation is in progress. - - When copying from a block blob, all committed blocks and their block IDs are - copied. Uncommitted blocks are not copied. At the end of the copy operation, - the destination blob will have the same committed block count as the source. - - You can call get_blob_properties on the destination blob to check the status - of the copy operation. The final blob will be committed when the copy completes. - - :param str container_name: - Name of the destination container. The container must exist. - :param str blob_name: - Name of the destination blob. If the destination blob exists, it will - be overwritten. Otherwise, it will be created. - :param str copy_source: - A URL of up to 2 KB in length that specifies an Azure file or blob. - The value should be URL-encoded as it would appear in a request URI. - If the source is in another account, the source must either be public - or must be authenticated via a shared access signature. If the source - is public, no authentication is required. - Examples: - https://myaccount.blob.core.windows.net/mycontainer/myblob - https://myaccount.blob.core.windows.net/mycontainer/myblob?snapshot= - https://otheraccount.blob.core.windows.net/mycontainer/myblob?sastoken - :param metadata: - Name-value pairs associated with the blob as metadata. If no name-value - pairs are specified, the operation will copy the metadata from the - source blob or file to the destination blob. If one or more name-value - pairs are specified, the destination blob is created with the specified - metadata, and metadata is not copied from the source blob or file. - :type metadata: dict(str, str) - :param datetime source_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source - blob has been modified since the specified date/time. - :param datetime source_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source blob - has not been modified since the specified date/time. - :param ETag source_if_match: - An ETag value, or the wildcard character (*). Specify this conditional - header to copy the source blob only if its ETag matches the value - specified. If the ETag values do not match, the Blob service returns - status code 412 (Precondition Failed). This header cannot be specified - if the source is an Azure File. - :param ETag source_if_none_match: - An ETag value, or the wildcard character (*). Specify this conditional - header to copy the blob only if its ETag does not match the value - specified. If the values are identical, the Blob service returns status - code 412 (Precondition Failed). This header cannot be specified if the - source is an Azure File. - :param datetime destination_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has been modified since the specified date/time. - If the destination blob has not been modified, the Blob service returns - status code 412 (Precondition Failed). - :param datetime destination_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has not been modified since the specified - date/time. If the destination blob has been modified, the Blob service - returns status code 412 (Precondition Failed). - :param ETag destination_if_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - matches the ETag value for an existing destination blob. If the ETag for - the destination blob does not match the ETag specified for If-Match, the - Blob service returns status code 412 (Precondition Failed). - :param ETag destination_if_none_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - does not match the ETag value for the destination blob. Specify the wildcard - character (*) to perform the operation only if the destination blob does not - exist. If the specified condition isn't met, the Blob service returns status - code 412 (Precondition Failed). - :param str destination_lease_id: - The lease ID specified for this header must match the lease ID of the - destination blob. If the request does not include the lease ID or it is not - valid, the operation fails with status code 412 (Precondition Failed). - :param str source_lease_id: - Specify this to perform the Copy Blob operation only if - the lease ID given matches the active lease ID of the source blob. - :param int timeout: - The timeout parameter is expressed in seconds. - :param bool requires_sync: - Enforces that the service will not return a response until the copy is complete. - :return: Copy operation properties such as status, source, and ID. - :rtype: :class:`~azure.storage.blob.models.CopyProperties` - ''' - - return self._copy_blob(container_name, blob_name, copy_source, - metadata, - premium_page_blob_tier=None, - source_if_modified_since=source_if_modified_since, - source_if_unmodified_since=source_if_unmodified_since, - source_if_match=source_if_match, - source_if_none_match=source_if_none_match, - destination_if_modified_since=destination_if_modified_since, - destination_if_unmodified_since=destination_if_unmodified_since, - destination_if_match=destination_if_match, - destination_if_none_match=destination_if_none_match, - destination_lease_id=destination_lease_id, - source_lease_id=source_lease_id, timeout=timeout, - incremental_copy=False, - requires_sync=requires_sync) - - # -----Helper methods------------------------------------ - def _put_blob(self, container_name, blob_name, blob, content_settings=None, - metadata=None, validate_content=False, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, - timeout=None): - ''' - Creates a blob or updates an existing blob. - - See create_blob_from_* for high level - functions that handle the creation and upload of large blobs with - automatic chunking and progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param bytes blob: - Content of blob as bytes (size < 64MB). For larger size, you - must call put_block and put_block_list to set content of blob. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set properties on the blob. - :param metadata: - Name-value pairs associated with the blob as metadata. - :param bool validate_content: - If true, calculates an MD5 hash of the blob content. The storage - service checks the hash of the content that has arrived - with the hash that was sent. This is primarily valuable for detecting - bitflips on the wire if using http instead of https as https (the default) - will already validate. Note that this MD5 hash is not stored with the - blob. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the new Block Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_encryption_required(self.require_encryption, self.key_encryption_key) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = {'timeout': _int_to_str(timeout)} - request.headers = { - 'x-ms-blob-type': _to_str(self.blob_type), - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - _add_metadata_headers(metadata, request) - if content_settings is not None: - request.headers.update(content_settings._to_headers()) - blob = _get_data_bytes_only('blob', blob) - if self.key_encryption_key: - encryption_data, blob = _encrypt_blob(blob, self.key_encryption_key) - request.headers['x-ms-meta-encryptiondata'] = encryption_data - request.body = blob - - if validate_content: - computed_md5 = _get_content_md5(request.body) - request.headers['Content-MD5'] = _to_str(computed_md5) - - return self._perform_request(request, _parse_base_properties) - - def _put_block(self, container_name, blob_name, block, block_id, - validate_content=False, lease_id=None, timeout=None): - ''' - See put_block for more details. This helper method - allows for encryption or other such special behavior because - it is safely handled by the library. These behaviors are - prohibited in the public version of this function. - ''' - - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('block', block) - _validate_not_none('block_id', block_id) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'block', - 'blockid': _encode_base64(_to_str(block_id)), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id) - } - request.body = _get_data_bytes_or_stream_only('block', block) - if hasattr(request.body, 'read'): - if _len_plus(request.body) is None: - try: - data = b'' - for chunk in iter(lambda: request.body.read(4096), b""): - data += chunk - request.body = data - except AttributeError: - raise ValueError(_ERROR_VALUE_SHOULD_BE_STREAM.format('request.body')) - - if validate_content: - computed_md5 = _get_content_md5(request.body) - request.headers['Content-MD5'] = _to_str(computed_md5) - - self._perform_request(request) - - def _put_block_list( - self, container_name, blob_name, block_list, content_settings=None, - metadata=None, validate_content=False, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, - timeout=None, encryption_data=None): - ''' - See put_block_list for more details. This helper method - allows for encryption or other such special behavior because - it is safely handled by the library. These behaviors are - prohibited in the public version of this function. - :param str encryption_data: - A JSON formatted string containing the encryption metadata generated for this - blob if it was encrypted all at once upon upload. This should only be passed - in by internal methods. - ''' - - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('block_list', block_list) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'blocklist', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - _add_metadata_headers(metadata, request) - if content_settings is not None: - request.headers.update(content_settings._to_headers()) - request.body = _get_request_body( - _convert_block_list_to_xml(block_list)) - - if validate_content: - computed_md5 = _get_content_md5(request.body) - request.headers['Content-MD5'] = _to_str(computed_md5) - - if encryption_data is not None: - request.headers['x-ms-meta-encryptiondata'] = encryption_data - - return self._perform_request(request, _parse_base_properties) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/models.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/models.py deleted file mode 100644 index 225d0f9370e8..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/models.py +++ /dev/null @@ -1,825 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from ..common._common_conversion import _to_str - - -class Container(object): - ''' - Blob container class. - - :ivar str name: - The name of the container. - :ivar metadata: - A dict containing name-value pairs associated with the container as metadata. - This var is set to None unless the include=metadata param was included - for the list containers operation. If this parameter was specified but the - container has no metadata, metadata will be set to an empty dictionary. - :vartype metadata: dict(str, str) - :ivar ContainerProperties properties: - System properties for the container. - ''' - - def __init__(self, name=None, props=None, metadata=None): - self.name = name - self.properties = props or ContainerProperties() - self.metadata = metadata - - -class ContainerProperties(object): - ''' - Blob container's properties class. - - :ivar datetime last_modified: - A datetime object representing the last time the container was modified. - :ivar str etag: - The ETag contains a value that you can use to perform operations - conditionally. - :ivar LeaseProperties lease: - Stores all the lease information for the container. - :ivar bool has_immutability_policy: - Represents whether the container has an immutability policy. - :ivar bool has_legal_hold: - Represents whether the container has a legal hold. - ''' - - def __init__(self): - self.last_modified = None - self.etag = None - self.lease = LeaseProperties() - self.public_access = None - self.has_immutability_policy = None - self.has_legal_hold = None - - -class Blob(object): - ''' - Blob class. - - :ivar str name: - Name of blob. - :ivar str snapshot: - A DateTime value that uniquely identifies the snapshot. The value of - this header indicates the snapshot version, and may be used in - subsequent requests to access the snapshot. - :ivar content: - Blob content. - :vartype content: str or bytes - :ivar BlobProperties properties: - Stores all the system properties for the blob. - :ivar metadata: - Name-value pairs associated with the blob as metadata. - :ivar bool deleted: - Specify whether the blob was soft deleted. - In other words, if the blob is being retained by the delete retention policy, - this field would be True. The blob could be undeleted or it will be garbage collected after the specified - time period. - ''' - - def __init__(self, name=None, snapshot=None, content=None, props=None, metadata=None, deleted=False): - self.name = name - self.snapshot = snapshot - self.content = content - self.properties = props or BlobProperties() - self.metadata = metadata - self.deleted = deleted - - -class BlobProperties(object): - ''' - Blob Properties - - :ivar str blob_type: - String indicating this blob's type. - :ivar datetime last_modified: - A datetime object representing the last time the blob was modified. - :ivar str etag: - The ETag contains a value that you can use to perform operations - conditionally. - :ivar int content_length: - The length of the content returned. If the entire blob was requested, - the length of blob in bytes. If a subset of the blob was requested, the - length of the returned subset. - :ivar str content_range: - Indicates the range of bytes returned in the event that the client - requested a subset of the blob. - :ivar int append_blob_committed_block_count: - (For Append Blobs) Number of committed blocks in the blob. - :ivar int page_blob_sequence_number: - (For Page Blobs) Sequence number for page blob used for coordinating - concurrent writes. - :ivar bool server_encrypted: - Set to true if the blob is encrypted on the server. - :ivar ~azure.storage.blob.models.CopyProperties copy: - Stores all the copy properties for the blob. - :ivar ~azure.storage.blob.models.ContentSettings content_settings: - Stores all the content settings for the blob. - :ivar ~azure.storage.blob.models.LeaseProperties lease: - Stores all the lease information for the blob. - :ivar StandardBlobTier blob_tier: - Indicates the access tier of the blob. The hot tier is optimized - for storing data that is accessed frequently. The cool storage tier - is optimized for storing data that is infrequently accessed and stored - for at least a month. The archive tier is optimized for storing - data that is rarely accessed and stored for at least six months - with flexible latency requirements. - :ivar datetime blob_tier_change_time: - Indicates when the access tier was last changed. - :ivar bool blob_tier_inferred: - Indicates whether the access tier was inferred by the service. - If false, it indicates that the tier was set explicitly. - :ivar datetime deleted_time: - A datetime object representing the time at which the blob was deleted. - :ivar int remaining_retention_days: - The number of days that the blob will be retained before being permanently deleted by the service. - :ivar datetime creation_time: - Indicates when the blob was created, in UTC. - ''' - - def __init__(self): - self.blob_type = None - self.last_modified = None - self.etag = None - self.content_length = None - self.content_range = None - self.append_blob_committed_block_count = None - self.page_blob_sequence_number = None - self.server_encrypted = None - self.copy = CopyProperties() - self.content_settings = ContentSettings() - self.lease = LeaseProperties() - self.blob_tier = None - self.blob_tier_change_time = None - self.blob_tier_inferred = False - self.deleted_time = None - self.remaining_retention_days = None - self.creation_time = None - - -class ContentSettings(object): - ''' - Used to store the content settings of a blob. - - :ivar str content_type: - The content type specified for the blob. If no content type was - specified, the default content type is application/octet-stream. - :ivar str content_encoding: - If the content_encoding has previously been set - for the blob, that value is stored. - :ivar str content_language: - If the content_language has previously been set - for the blob, that value is stored. - :ivar str content_disposition: - content_disposition conveys additional information about how to - process the response payload, and also can be used to attach - additional metadata. If content_disposition has previously been set - for the blob, that value is stored. - :ivar str cache_control: - If the cache_control has previously been set for - the blob, that value is stored. - :ivar str content_md5: - If the content_md5 has been set for the blob, this response - header is stored so that the client can check for message content - integrity. - ''' - - def __init__( - self, content_type=None, content_encoding=None, - content_language=None, content_disposition=None, - cache_control=None, content_md5=None): - self.content_type = content_type - self.content_encoding = content_encoding - self.content_language = content_language - self.content_disposition = content_disposition - self.cache_control = cache_control - self.content_md5 = content_md5 - - def _to_headers(self): - return { - 'x-ms-blob-cache-control': _to_str(self.cache_control), - 'x-ms-blob-content-type': _to_str(self.content_type), - 'x-ms-blob-content-disposition': _to_str(self.content_disposition), - 'x-ms-blob-content-md5': _to_str(self.content_md5), - 'x-ms-blob-content-encoding': _to_str(self.content_encoding), - 'x-ms-blob-content-language': _to_str(self.content_language), - } - - -class CopyProperties(object): - ''' - Blob Copy Properties. - - :ivar str id: - String identifier for the last attempted Copy Blob operation where this blob - was the destination blob. This header does not appear if this blob has never - been the destination in a Copy Blob operation, or if this blob has been - modified after a concluded Copy Blob operation using Set Blob Properties, - Put Blob, or Put Block List. - :ivar str source: - URL up to 2 KB in length that specifies the source blob used in the last attempted - Copy Blob operation where this blob was the destination blob. This header does not - appear if this blob has never been the destination in a Copy Blob operation, or if - this blob has been modified after a concluded Copy Blob operation using - Set Blob Properties, Put Blob, or Put Block List. - :ivar str status: - State of the copy operation identified by Copy ID, with these values: - success: - Copy completed successfully. - pending: - Copy is in progress. Check copy_status_description if intermittent, - non-fatal errors impede copy progress but don't cause failure. - aborted: - Copy was ended by Abort Copy Blob. - failed: - Copy failed. See copy_status_description for failure details. - :ivar str progress: - Contains the number of bytes copied and the total bytes in the source in the last - attempted Copy Blob operation where this blob was the destination blob. Can show - between 0 and Content-Length bytes copied. - :ivar datetime completion_time: - Conclusion time of the last attempted Copy Blob operation where this blob was the - destination blob. This value can specify the time of a completed, aborted, or - failed copy attempt. - :ivar str status_description: - only appears when x-ms-copy-status is failed or pending. Describes cause of fatal - or non-fatal copy operation failure. - ''' - - def __init__(self): - self.id = None - self.source = None - self.status = None - self.progress = None - self.completion_time = None - self.status_description = None - - -class LeaseProperties(object): - ''' - Blob Lease Properties. - - :ivar str status: - The lease status of the blob. - Possible values: locked|unlocked - :ivar str state: - Lease state of the blob. - Possible values: available|leased|expired|breaking|broken - :ivar str duration: - When a blob is leased, specifies whether the lease is of infinite or fixed duration. - ''' - - def __init__(self): - self.status = None - self.state = None - self.duration = None - - -class BlobPrefix(object): - ''' - BlobPrefix objects may potentially returned in the blob list when - :func:`~azure.storage.blob.baseblobservice.BaseBlobService.list_blobs` is - used with a delimiter. Prefixes can be thought of as virtual blob directories. - - :ivar str name: The name of the blob prefix. - ''' - - def __init__(self): - self.name = None - - -class BlobBlockState(object): - '''Block blob block types.''' - - Committed = 'Committed' - '''Committed blocks.''' - - Latest = 'Latest' - '''Latest blocks.''' - - Uncommitted = 'Uncommitted' - '''Uncommitted blocks.''' - - -class BlobBlock(object): - ''' - BlockBlob Block class. - - :ivar str id: - Block id. - :ivar str state: - Block state. - Possible valuse: committed|uncommitted - :ivar int size: - Block size in bytes. - ''' - - def __init__(self, id=None, state=BlobBlockState.Latest): - self.id = id - self.state = state - - def _set_size(self, size): - self.size = size - - -class BlobBlockList(object): - ''' - Blob Block List class. - - :ivar committed_blocks: - List of committed blocks. - :vartype committed_blocks: list(:class:`~azure.storage.blob.models.BlobBlock`) - :ivar uncommitted_blocks: - List of uncommitted blocks. - :vartype uncommitted_blocks: list(:class:`~azure.storage.blob.models.BlobBlock`) - ''' - - def __init__(self): - self.committed_blocks = list() - self.uncommitted_blocks = list() - - -class PageRange(object): - ''' - Page Range for page blob. - - :ivar int start: - Start of page range in bytes. - :ivar int end: - End of page range in bytes. - :ivar bool is_cleared: - Indicates if a page range is cleared or not. Only applicable - for get_page_range_diff API. - ''' - - def __init__(self, start=None, end=None, is_cleared=False): - self.start = start - self.end = end - self.is_cleared = is_cleared - - -class ResourceProperties(object): - ''' - Base response for a resource request. - - :ivar str etag: - Opaque etag value that can be used to check if resource - has been modified. - :ivar datetime last_modified: - Datetime for last time resource was modified. - ''' - - def __init__(self): - self.last_modified = None - self.etag = None - - -class AppendBlockProperties(ResourceProperties): - ''' - Response for an append block request. - - :ivar int append_offset: - Position to start next append. - :ivar int committed_block_count: - Number of committed append blocks. - ''' - - def __init__(self): - super(ResourceProperties, self).__init__() - self.append_offset = None - self.committed_block_count = None - - -class PageBlobProperties(ResourceProperties): - ''' - Response for a page request. - - :ivar int sequence_number: - Identifer for page blobs to help handle concurrent writes. - ''' - - def __init__(self): - super(ResourceProperties, self).__init__() - self.sequence_number = None - - -class PublicAccess(object): - ''' - Specifies whether data in the container may be accessed publicly and the level of access. - ''' - - OFF = 'off' - ''' - Specifies that there is no public read access for both the container and blobs within the container. - Clients cannot enumerate the containers within the storage account as well as the blobs within the container. - ''' - - Blob = 'blob' - ''' - Specifies public read access for blobs. Blob data within this container can be read - via anonymous request, but container data is not available. Clients cannot enumerate - blobs within the container via anonymous request. - ''' - - Container = 'container' - ''' - Specifies full public read access for container and blob data. Clients can enumerate - blobs within the container via anonymous request, but cannot enumerate containers - within the storage account. - ''' - - -class DeleteSnapshot(object): - ''' - Required if the blob has associated snapshots. Specifies how to handle the snapshots. - ''' - - Include = 'include' - ''' - Delete the base blob and all of its snapshots. - ''' - - Only = 'only' - ''' - Delete only the blob's snapshots and not the blob itself. - ''' - - -class BlockListType(object): - ''' - Specifies whether to return the list of committed blocks, the list of uncommitted - blocks, or both lists together. - ''' - - All = 'all' - '''Both committed and uncommitted blocks.''' - - Committed = 'committed' - '''Committed blocks.''' - - Uncommitted = 'uncommitted' - '''Uncommitted blocks.''' - - -class SequenceNumberAction(object): - '''Sequence number actions.''' - - Increment = 'increment' - ''' - Increments the value of the sequence number by 1. If specifying this option, - do not include the x-ms-blob-sequence-number header. - ''' - - Max = 'max' - ''' - Sets the sequence number to be the higher of the value included with the - request and the value currently stored for the blob. - ''' - - Update = 'update' - '''Sets the sequence number to the value included with the request.''' - - -class _LeaseActions(object): - '''Actions for a lease.''' - - Acquire = 'acquire' - '''Acquire the lease.''' - - Break = 'break' - '''Break the lease.''' - - Change = 'change' - '''Change the lease ID.''' - - Release = 'release' - '''Release the lease.''' - - Renew = 'renew' - '''Renew the lease.''' - - -class _BlobTypes(object): - '''Blob type options.''' - - AppendBlob = 'AppendBlob' - '''Append blob type.''' - - BlockBlob = 'BlockBlob' - '''Block blob type.''' - - PageBlob = 'PageBlob' - '''Page blob type.''' - - -class Include(object): - ''' - Specifies the datasets to include in the blob list response. - - :ivar ~azure.storage.blob.models.Include Include.COPY: - Specifies that metadata related to any current or previous Copy Blob operation - should be included in the response. - :ivar ~azure.storage.blob.models.Include Include.METADATA: - Specifies that metadata be returned in the response. - :ivar ~azure.storage.blob.models.Include Include.SNAPSHOTS: - Specifies that snapshots should be included in the enumeration. - :ivar ~azure.storage.blob.models.Include Include.UNCOMMITTED_BLOBS: - Specifies that blobs for which blocks have been uploaded, but which have not - been committed using Put Block List, be included in the response. - :ivar ~azure.storage.blob.models.Include Include.DELETED: - Specifies that deleted blobs should be returned in the response. - ''' - - def __init__(self, snapshots=False, metadata=False, uncommitted_blobs=False, - copy=False, deleted=False, _str=None): - ''' - :param bool snapshots: - Specifies that snapshots should be included in the enumeration. - :param bool metadata: - Specifies that metadata be returned in the response. - :param bool uncommitted_blobs: - Specifies that blobs for which blocks have been uploaded, but which have - not been committed using Put Block List, be included in the response. - :param bool copy: - Specifies that metadata related to any current or previous Copy Blob - operation should be included in the response. - :param bool deleted: - Specifies that deleted blobs should be returned in the response. - :param str _str: - A string representing the includes. - ''' - if not _str: - _str = '' - components = _str.split(',') - self.snapshots = snapshots or ('snapshots' in components) - self.metadata = metadata or ('metadata' in components) - self.uncommitted_blobs = uncommitted_blobs or ('uncommittedblobs' in components) - self.copy = copy or ('copy' in components) - self.deleted = deleted or ('deleted' in components) - - def __or__(self, other): - return Include(_str=str(self) + str(other)) - - def __add__(self, other): - return Include(_str=str(self) + str(other)) - - def __str__(self): - include = (('snapshots,' if self.snapshots else '') + - ('metadata,' if self.metadata else '') + - ('uncommittedblobs,' if self.uncommitted_blobs else '') + - ('copy,' if self.copy else '') + - ('deleted,' if self.deleted else '')) - return include.rstrip(',') - - -Include.COPY = Include(copy=True) -Include.METADATA = Include(metadata=True) -Include.SNAPSHOTS = Include(snapshots=True) -Include.UNCOMMITTED_BLOBS = Include(uncommitted_blobs=True) -Include.DELETED = Include(deleted=True) - - -class BlobPermissions(object): - ''' - BlobPermissions class to be used with - :func:`~azure.storage.blob.baseblobservice.BaseBlobService.generate_blob_shared_access_signature` API. - - :ivar BlobPermissions BlobPermissions.ADD: - Add a block to an append blob. - :ivar BlobPermissions BlobPermissions.CREATE: - Write a new blob, snapshot a blob, or copy a blob to a new blob. - :ivar BlobPermissions BlobPermissions.DELETE: - Delete the blob. - :ivar BlobPermissions BlobPermissions.READ: - Read the content, properties, metadata and block list. Use the blob as the source of a copy operation. - :ivar BlobPermissions BlobPermissions.WRITE: - Create or write content, properties, metadata, or block list. Snapshot or lease - the blob. Resize the blob (page blob only). Use the blob as the destination of a - copy operation within the same account. - ''' - - def __init__(self, read=False, add=False, create=False, write=False, - delete=False, _str=None): - ''' - :param bool read: - Read the content, properties, metadata and block list. Use the blob as - the source of a copy operation. - :param bool add: - Add a block to an append blob. - :param bool create: - Write a new blob, snapshot a blob, or copy a blob to a new blob. - :param bool write: - Create or write content, properties, metadata, or block list. Snapshot - or lease the blob. Resize the blob (page blob only). Use the blob as the - destination of a copy operation within the same account. - :param bool delete: - Delete the blob. - :param str _str: - A string representing the permissions. - ''' - if not _str: - _str = '' - self.read = read or ('r' in _str) - self.add = add or ('a' in _str) - self.create = create or ('c' in _str) - self.write = write or ('w' in _str) - self.delete = delete or ('d' in _str) - - def __or__(self, other): - return BlobPermissions(_str=str(self) + str(other)) - - def __add__(self, other): - return BlobPermissions(_str=str(self) + str(other)) - - def __str__(self): - return (('r' if self.read else '') + - ('a' if self.add else '') + - ('c' if self.create else '') + - ('w' if self.write else '') + - ('d' if self.delete else '')) - - -BlobPermissions.ADD = BlobPermissions(add=True) -BlobPermissions.CREATE = BlobPermissions(create=True) -BlobPermissions.DELETE = BlobPermissions(delete=True) -BlobPermissions.READ = BlobPermissions(read=True) -BlobPermissions.WRITE = BlobPermissions(write=True) - - -class ContainerPermissions(object): - ''' - ContainerPermissions class to be used with :func:`~azure.storage.blob.baseblobservice.BaseBlobService.generate_container_shared_access_signature` - API and for the AccessPolicies used with :func:`~azure.storage.blob.baseblobservice.BaseBlobService.set_container_acl`. - - :ivar ContainerPermissions ContainerPermissions.DELETE: - Delete any blob in the container. Note: You cannot grant permissions to - delete a container with a container SAS. Use an account SAS instead. - :ivar ContainerPermissions ContainerPermissions.LIST: - List blobs in the container. - :ivar ContainerPermissions ContainerPermissions.READ: - Read the content, properties, metadata or block list of any blob in the - container. Use any blob in the container as the source of a copy operation. - :ivar ContainerPermissions ContainerPermissions.WRITE: - For any blob in the container, create or write content, properties, - metadata, or block list. Snapshot or lease the blob. Resize the blob - (page blob only). Use the blob as the destination of a copy operation - within the same account. Note: You cannot grant permissions to read or - write container properties or metadata, nor to lease a container, with - a container SAS. Use an account SAS instead. - ''' - - def __init__(self, read=False, add=False, create=False, write=False, delete=False, list=False, - _str=None): - ''' - :param bool read: - Read the content, properties, metadata or block list of any blob in the - container. Use any blob in the container as the source of a copy operation. - :param bool add: - Add a block to any append blob in the container. - :param bool create: - Write a new blob to the container, snapshot any blob in the container, or copy a blob to - a new blob in the container. Note: You cannot grant permissions to create a container - with a container SAS. Use an account SAS to create a container instead. - :param bool write: - For any blob in the container, create or write content, properties, - metadata, or block list. Snapshot or lease the blob. Resize the blob - (page blob only). Use the blob as the destination of a copy operation - within the same account. Note: You cannot grant permissions to read or - write container properties or metadata, nor to lease a container, with - a container SAS. Use an account SAS instead. - :param bool delete: - Delete any blob in the container. Note: You cannot grant permissions to - delete a container with a container SAS. Use an account SAS instead. - :param bool list: - List blobs in the container. - :param str _str: - A string representing the permissions. - ''' - if not _str: - _str = '' - self.read = read or ('r' in _str) - self.add = add or ('a' in _str) - self.create = create or ('c' in _str) - self.write = write or ('w' in _str) - self.delete = delete or ('d' in _str) - self.list = list or ('l' in _str) - - def __or__(self, other): - return ContainerPermissions(_str=str(self) + str(other)) - - def __add__(self, other): - return ContainerPermissions(_str=str(self) + str(other)) - - def __str__(self): - return (('r' if self.read else '') + - ('a' if self.add else '') + - ('c' if self.create else '') + - ('w' if self.write else '') + - ('d' if self.delete else '') + - ('l' if self.list else '')) - - -ContainerPermissions.DELETE = ContainerPermissions(delete=True) -ContainerPermissions.LIST = ContainerPermissions(list=True) -ContainerPermissions.READ = ContainerPermissions(read=True) -ContainerPermissions.WRITE = ContainerPermissions(write=True) -ContainerPermissions.ADD = ContainerPermissions(add=True) -ContainerPermissions.CREATE = ContainerPermissions(create=True) - - -class PremiumPageBlobTier(object): - ''' - Specifies the page blob tier to set the blob to. This is only applicable to page - blobs on premium storage accounts. - Please take a look at https://docs.microsoft.com/en-us/azure/storage/storage-premium-storage#scalability-and-performance-targets - for detailed information on the corresponding IOPS and throughtput per PageBlobTier. - ''' - - P4 = 'P4' - ''' P4 Tier ''' - - P6 = 'P6' - ''' P6 Tier ''' - - P10 = 'P10' - ''' P10 Tier ''' - - P20 = 'P20' - ''' P20 Tier ''' - - P30 = 'P30' - ''' P30 Tier ''' - - P40 = 'P40' - ''' P40 Tier ''' - - P50 = 'P50' - ''' P50 Tier ''' - - P60 = 'P60' - ''' P60 Tier ''' - - -class StandardBlobTier(object): - ''' - Specifies the blob tier to set the blob to. This is only applicable for block blobs on standard storage accounts. - ''' - - Archive = 'Archive' - ''' Archive ''' - - Cool = 'Cool' - ''' Cool ''' - - Hot = 'Hot' - ''' Hot ''' - - -class AccountInformation(object): - """ - Holds information related to the storage account. - - :ivar str sku_name: - Name of the storage SKU, also known as account type. - Example: Standard_LRS, Standard_ZRS, Standard_GRS, Standard_RAGRS, Premium_LRS, Premium_ZRS - :ivar str account_kind: - Describes the flavour of the storage account, also known as account kind. - Example: Storage, StorageV2, BlobStorage - """ - def __init__(self): - self.sku_name = None - self.account_kind = None - - -class UserDelegationKey(object): - """ - Represents a user delegation key, provided to the user by Azure Storage - based on their Azure Active Directory access token. - - The fields are saved as simple strings since the user does not have to interact with this object; - to generate an identify SAS, the user can simply pass it to the right API. - - :ivar str signed_oid: - Object ID of this token. - :ivar str signed_tid: - Tenant ID of the tenant that issued this token. - :ivar str signed_start: - The datetime this token becomes valid. - :ivar str signed_expiry: - The datetime this token expires. - :ivar str signed_service: - What service this key is valid for. - :ivar str signed_version: - The version identifier of the REST service that created this token. - :ivar str value: - The user delegation key. - """ - def __init__(self): - self.signed_oid = None - self.signed_tid = None - self.signed_start = None - self.signed_expiry = None - self.signed_service = None - self.signed_version = None - self.value = None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/pageblobservice.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/pageblobservice.py deleted file mode 100644 index 3c3217b285f3..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/pageblobservice.py +++ /dev/null @@ -1,1522 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import sys -from os import path - -from ..common._common_conversion import ( - _int_to_str, - _to_str, - _datetime_to_utc_string, - _get_content_md5, -) -from ..common._constants import ( - SERVICE_HOST_BASE, - DEFAULT_PROTOCOL, -) -from ..common._error import ( - _validate_not_none, - _validate_type_bytes, - _validate_encryption_required, - _validate_encryption_unsupported, - _ERROR_VALUE_NEGATIVE, -) -from ..common._http import HTTPRequest -from ..common._serialization import ( - _get_data_bytes_only, - _add_metadata_headers, -) -from ._deserialization import ( - _convert_xml_to_page_ranges, - _parse_page_properties, - _parse_base_properties, -) -from ._encryption import _generate_blob_encryption_data -from ._error import ( - _ERROR_PAGE_BLOB_SIZE_ALIGNMENT, -) -from ._serialization import ( - _get_path, - _validate_and_format_range_headers, -) -from ._upload_chunking import ( - _PageBlobChunkUploader, - _upload_blob_chunks, -) -from .baseblobservice import BaseBlobService -from .models import ( - _BlobTypes, - ResourceProperties) - -if sys.version_info >= (3,): - from io import BytesIO -else: - from cStringIO import StringIO as BytesIO - -# Keep this value sync with _ERROR_PAGE_BLOB_SIZE_ALIGNMENT -_PAGE_ALIGNMENT = 512 - - -class PageBlobService(BaseBlobService): - ''' - Page blobs are a collection of 512-byte pages optimized for random read and - write operations. To create a page blob, you initialize the page blob and - specify the maximum size the page blob will grow. To add or update the - contents of a page blob, you write a page or pages by specifying an offset - and a range that align to 512-byte page boundaries. A write to a page blob - can overwrite just one page, some pages, or up to 4 MB of the page blob. - Writes to page blobs happen in-place and are immediately committed to the - blob. The maximum size for a page blob is 8 TB. - - :ivar int MAX_PAGE_SIZE: - The size of the pages put by create_blob_from_* methods. Smaller pages - may be put if there is less data provided. The maximum page size the service - supports is 4MB. When using the create_blob_from_* methods, empty pages are skipped. - ''' - - MAX_PAGE_SIZE = 4 * 1024 * 1024 - - def __init__(self, account_name=None, account_key=None, sas_token=None, is_emulated=False, - protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, custom_domain=None, - request_session=None, connection_string=None, socket_timeout=None, token_credential=None): - ''' - :param str account_name: - The storage account name. This is used to authenticate requests - signed with an account key and to construct the storage endpoint. It - is required unless a connection string is given, or if a custom - domain is used with anonymous authentication. - :param str account_key: - The storage account key. This is used for shared key authentication. - If neither account key or sas token is specified, anonymous access - will be used. - :param str sas_token: - A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both - specified, account key will be used to sign. If neither are - specified, anonymous access will be used. - :param bool is_emulated: - Whether to use the emulator. Defaults to False. If specified, will - override all other parameters besides connection string and request - session. - :param str protocol: - The protocol to use for requests. Defaults to https. - :param str endpoint_suffix: - The host base component of the url, minus the account name. Defaults - to Azure (core.windows.net). Override this to use the China cloud - (core.chinacloudapi.cn). - :param str custom_domain: - The custom domain to use. This can be set in the Azure Portal. For - example, 'www.mydomain.com'. - :param requests.Session request_session: - The session object to use for http requests. - :param str connection_string: - If specified, this will override all other parameters besides - request session. See - http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ - for the connection string format. - :param int socket_timeout: - If specified, this will override the default socket timeout. The timeout specified is in seconds. - See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. - :param token_credential: - A token credential used to authenticate HTTPS requests. The token value - should be updated before its expiration. - :type `~azure.storage.common.TokenCredential` - ''' - self.blob_type = _BlobTypes.PageBlob - super(PageBlobService, self).__init__( - account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix, - custom_domain, request_session, connection_string, socket_timeout, token_credential) - - def create_blob( - self, container_name, blob_name, content_length, content_settings=None, - sequence_number=None, metadata=None, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None, premium_page_blob_tier=None): - ''' - Creates a new Page Blob. - - See create_blob_from_* for high level functions that handle the - creation and upload of large blobs with automatic chunking and - progress notifications. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param int content_length: - Required. This header specifies the maximum size - for the page blob, up to 1 TB. The page blob size must be aligned - to a 512-byte boundary. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set properties on the blob. - :param int sequence_number: - The sequence number is a user-controlled value that you can use to - track requests. The value of the sequence number must be between 0 - and 2^63 - 1.The default value is 0. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :param PremiumPageBlobTier premium_page_blob_tier: - A page blob tier value to set the blob to. The tier correlates to the size of the - blob and number of allowed IOPS. This is only applicable to page blobs on - premium storage accounts. - :return: ETag and last modified properties for the new Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - return self._create_blob( - container_name, - blob_name, - content_length, - content_settings=content_settings, - sequence_number=sequence_number, - metadata=metadata, - lease_id=lease_id, - premium_page_blob_tier=premium_page_blob_tier, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout - ) - - def incremental_copy_blob(self, container_name, blob_name, copy_source, - metadata=None, destination_if_modified_since=None, destination_if_unmodified_since=None, - destination_if_match=None, destination_if_none_match=None, destination_lease_id=None, - source_lease_id=None, timeout=None): - ''' - Copies an incremental copy of a blob asynchronously. This operation returns a copy operation - properties object, including a copy ID you can use to check or abort the - copy operation. The Blob service copies blobs on a best-effort basis. - - The source blob for an incremental copy operation must be a page blob. - Call get_blob_properties on the destination blob to check the status of the copy operation. - The final blob will be committed when the copy completes. - - :param str container_name: - Name of the destination container. The container must exist. - :param str blob_name: - Name of the destination blob. If the destination blob exists, it will - be overwritten. Otherwise, it will be created. - :param str copy_source: - A URL of up to 2 KB in length that specifies an Azure page blob. - The value should be URL-encoded as it would appear in a request URI. - The copy source must be a snapshot and include a valid SAS token or be public. - Example: - https://myaccount.blob.core.windows.net/mycontainer/myblob?snapshot=&sastoken - :param metadata: - Name-value pairs associated with the blob as metadata. If no name-value - pairs are specified, the operation will copy the metadata from the - source blob or file to the destination blob. If one or more name-value - pairs are specified, the destination blob is created with the specified - metadata, and metadata is not copied from the source blob or file. - :type metadata: dict(str, str). - :param datetime destination_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has been modified since the specified date/time. - If the destination blob has not been modified, the Blob service returns - status code 412 (Precondition Failed). - :param datetime destination_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the destination blob - has not been modified since the specified ate/time. If the destination blob - has been modified, the Blob service returns status code 412 (Precondition Failed). - :param ETag destination_if_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - matches the ETag value for an existing destination blob. If the ETag for - the destination blob does not match the ETag specified for If-Match, the - Blob service returns status code 412 (Precondition Failed). - :param ETag destination_if_none_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - does not match the ETag value for the destination blob. Specify the wildcard - character (*) to perform the operation only if the destination blob does not - exist. If the specified condition isn't met, the Blob service returns status - code 412 (Precondition Failed). - :param str destination_lease_id: - The lease ID specified for this header must match the lease ID of the - destination blob. If the request does not include the lease ID or it is not - valid, the operation fails with status code 412 (Precondition Failed). - :param str source_lease_id: - Specify this to perform the Copy Blob operation only if - the lease ID given matches the active lease ID of the source blob. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: Copy operation properties such as status, source, and ID. - :rtype: :class:`~azure.storage.blob.models.CopyProperties` - ''' - return self._copy_blob(container_name, blob_name, copy_source, - metadata, - source_if_modified_since=None, source_if_unmodified_since=None, - source_if_match=None, source_if_none_match=None, - destination_if_modified_since=destination_if_modified_since, - destination_if_unmodified_since=destination_if_unmodified_since, - destination_if_match=destination_if_match, - destination_if_none_match=destination_if_none_match, - destination_lease_id=destination_lease_id, - source_lease_id=source_lease_id, timeout=timeout, - incremental_copy=True) - - def update_page( - self, container_name, blob_name, page, start_range, end_range, - validate_content=False, lease_id=None, if_sequence_number_lte=None, - if_sequence_number_lt=None, if_sequence_number_eq=None, - if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - ''' - Updates a range of pages. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param bytes page: - Content of the page. - :param int start_range: - Start of byte range to use for writing to a section of the blob. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-1023, etc. - :param int end_range: - End of byte range to use for writing to a section of the blob. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-1023, etc. - :param bool validate_content: - If true, calculates an MD5 hash of the page content. The storage - service checks the hash of the content that has arrived - with the hash that was sent. This is primarily valuable for detecting - bitflips on the wire if using http instead of https as https (the default) - will already validate. Note that this MD5 hash is not stored with the - blob. - :param str lease_id: - Required if the blob has an active lease. - :param int if_sequence_number_lte: - If the blob's sequence number is less than or equal to - the specified value, the request proceeds; otherwise it fails. - :param int if_sequence_number_lt: - If the blob's sequence number is less than the specified - value, the request proceeds; otherwise it fails. - :param int if_sequence_number_eq: - If the blob's sequence number is equal to the specified - value, the request proceeds; otherwise it fails. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify an ETag value for this conditional - header to write the page only if the blob's ETag value matches the - value specified. If the values do not match, the Blob service fails. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify an ETag value for this conditional - header to write the page only if the blob's ETag value does not - match the value specified. If the values are identical, the Blob - service fails. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - - return self._update_page( - container_name, - blob_name, - page, - start_range, - end_range, - validate_content=validate_content, - lease_id=lease_id, - if_sequence_number_lte=if_sequence_number_lte, - if_sequence_number_lt=if_sequence_number_lt, - if_sequence_number_eq=if_sequence_number_eq, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout - ) - - def update_page_from_url(self, container_name, blob_name, start_range, end_range, copy_source_url, - source_range_start, source_content_md5=None, source_if_modified_since=None, - source_if_unmodified_since=None, source_if_match=None, source_if_none_match=None, - lease_id=None, if_sequence_number_lte=None, if_sequence_number_lt=None, - if_sequence_number_eq=None, if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - """ - Updates a range of pages to a page blob where the contents are read from a URL. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob. - :param int start_range: - Start of byte range to use for writing to a section of the blob. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-1023, etc. - :param int end_range: - End of byte range to use for writing to a section of the blob. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-1023, etc. - :param str copy_source_url: - The URL of the source data. It can point to any Azure Blob or File, that is either public or has a - shared access signature attached. - :param int source_range_start: - This indicates the start of the range of bytes(inclusive) that has to be taken from the copy source. - The service will read the same number of bytes as the destination range (end_range-start_range). - :param str source_content_md5: - If given, the service will calculate the MD5 hash of the block content and compare against this value. - :param datetime source_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the source resource has been modified since the specified time. - :param datetime source_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the source resource has not been modified since the specified date/time. - :param str source_if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the source resource's ETag matches the value specified. - :param str source_if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the source resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the source resource does not exist, and fail the - operation if it does exist. - :param str lease_id: - Required if the blob has an active lease. - :param int if_sequence_number_lte: - If the blob's sequence number is less than or equal to - the specified value, the request proceeds; otherwise it fails. - :param int if_sequence_number_lt: - If the blob's sequence number is less than the specified - value, the request proceeds; otherwise it fails. - :param int if_sequence_number_eq: - If the blob's sequence number is equal to the specified - value, the request proceeds; otherwise it fails. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - """ - _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('copy_source_url', copy_source_url) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'page', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-page-write': 'update', - 'x-ms-copy-source': copy_source_url, - 'x-ms-source-content-md5': source_content_md5, - 'x-ms-source-if-Modified-Since': _datetime_to_utc_string(source_if_modified_since), - 'x-ms-source-if-Unmodified-Since': _datetime_to_utc_string(source_if_unmodified_since), - 'x-ms-source-if-Match': _to_str(source_if_match), - 'x-ms-source-if-None-Match': _to_str(source_if_none_match), - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-if-sequence-number-le': _to_str(if_sequence_number_lte), - 'x-ms-if-sequence-number-lt': _to_str(if_sequence_number_lt), - 'x-ms-if-sequence-number-eq': _to_str(if_sequence_number_eq), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - _validate_and_format_range_headers( - request, - start_range, - end_range, - align_to_page=True) - _validate_and_format_range_headers( - request, - source_range_start, - source_range_start+(end_range-start_range), - range_header_name="x-ms-source-range") - - return self._perform_request(request, _parse_page_properties) - - def clear_page( - self, container_name, blob_name, start_range, end_range, - lease_id=None, if_sequence_number_lte=None, - if_sequence_number_lt=None, if_sequence_number_eq=None, - if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - ''' - Clears a range of pages. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param int start_range: - Start of byte range to use for writing to a section of the blob. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-1023, etc. - :param int end_range: - End of byte range to use for writing to a section of the blob. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-1023, etc. - :param str lease_id: - Required if the blob has an active lease. - :param int if_sequence_number_lte: - If the blob's sequence number is less than or equal to - the specified value, the request proceeds; otherwise it fails. - :param int if_sequence_number_lt: - If the blob's sequence number is less than the specified - value, the request proceeds; otherwise it fails. - :param int if_sequence_number_eq: - If the blob's sequence number is equal to the specified - value, the request proceeds; otherwise it fails. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify an ETag value for this conditional - header to write the page only if the blob's ETag value matches the - value specified. If the values do not match, the Blob service fails. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify an ETag value for this conditional - header to write the page only if the blob's ETag value does not - match the value specified. If the values are identical, the Blob - service fails. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'page', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-page-write': 'clear', - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-if-sequence-number-le': _to_str(if_sequence_number_lte), - 'x-ms-if-sequence-number-lt': _to_str(if_sequence_number_lt), - 'x-ms-if-sequence-number-eq': _to_str(if_sequence_number_eq), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - _validate_and_format_range_headers( - request, - start_range, - end_range, - align_to_page=True) - - return self._perform_request(request, _parse_page_properties) - - def get_page_ranges( - self, container_name, blob_name, snapshot=None, start_range=None, - end_range=None, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None): - ''' - Returns the list of valid page ranges for a Page Blob or snapshot - of a page blob. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to retrieve information - from. - :param int start_range: - Start of byte range to use for getting valid page ranges. - If no end_range is given, all bytes after the start_range will be searched. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-, etc. - :param int end_range: - End of byte range to use for getting valid page ranges. - If end_range is given, start_range must be provided. - This range will return valid page ranges for from the offset start up to - offset end. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-, etc. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: A list of valid Page Ranges for the Page Blob. - :rtype: list(:class:`~azure.storage.blob.models.PageRange`) - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'pagelist', - 'snapshot': _to_str(snapshot), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - if start_range is not None: - _validate_and_format_range_headers( - request, - start_range, - end_range, - start_range_required=False, - end_range_required=False, - align_to_page=True) - - return self._perform_request(request, _convert_xml_to_page_ranges) - - def get_page_ranges_diff( - self, container_name, blob_name, previous_snapshot, snapshot=None, - start_range=None, end_range=None, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None): - ''' - The response will include only the pages that are different between either a - recent snapshot or the current blob and a previous snapshot, including pages - that were cleared. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str previous_snapshot: - The snapshot parameter is an opaque DateTime value that - specifies a previous blob snapshot to be compared - against a more recent snapshot or the current blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that - specifies a more recent blob snapshot to be compared - against a previous snapshot (previous_snapshot). - :param int start_range: - Start of byte range to use for getting different page ranges. - If no end_range is given, all bytes after the start_range will be searched. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-, etc. - :param int end_range: - End of byte range to use for getting different page ranges. - If end_range is given, start_range must be provided. - This range will return valid page ranges for from the offset start up to - offset end. - Pages must be aligned with 512-byte boundaries, the start offset - must be a modulus of 512 and the end offset must be a modulus of - 512-1. Examples of valid byte ranges are 0-511, 512-, etc. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: A list of different Page Ranges for the Page Blob. - :rtype: list(:class:`~azure.storage.blob.models.PageRange`) - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('previous_snapshot', previous_snapshot) - request = HTTPRequest() - request.method = 'GET' - request.host_locations = self._get_host_locations(secondary=True) - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'pagelist', - 'snapshot': _to_str(snapshot), - 'prevsnapshot': _to_str(previous_snapshot), - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - if start_range is not None: - _validate_and_format_range_headers( - request, - start_range, - end_range, - start_range_required=False, - end_range_required=False, - align_to_page=True) - - return self._perform_request(request, _convert_xml_to_page_ranges) - - def set_sequence_number( - self, container_name, blob_name, sequence_number_action, sequence_number=None, - lease_id=None, if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - - ''' - Sets the blob sequence number. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param str sequence_number_action: - This property indicates how the service should modify the blob's sequence - number. See :class:`~azure.storage.blob.models.SequenceNumberAction` for more information. - :param str sequence_number: - This property sets the blob's sequence number. The sequence number is a - user-controlled property that you can use to track requests and manage - concurrency issues. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('sequence_number_action', sequence_number_action) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'properties', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-blob-sequence-number': _to_str(sequence_number), - 'x-ms-sequence-number-action': _to_str(sequence_number_action), - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - - return self._perform_request(request, _parse_page_properties) - - def resize_blob( - self, container_name, blob_name, content_length, - lease_id=None, if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - - ''' - Resizes a page blob to the specified size. If the specified value is less - than the current size of the blob, then all pages above the specified value - are cleared. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of existing blob. - :param int content_length: - Size to resize blob to. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. - :return: ETag and last modified properties for the updated Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('content_length', content_length) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'properties', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-blob-content-length': _to_str(content_length), - 'x-ms-lease-id': _to_str(lease_id), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match), - } - - return self._perform_request(request, _parse_page_properties) - - # ----Convenience APIs----------------------------------------------------- - - def create_blob_from_path( - self, container_name, blob_name, file_path, content_settings=None, - metadata=None, validate_content=False, progress_callback=None, max_connections=2, - lease_id=None, if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None, premium_page_blob_tier=None): - ''' - Creates a new blob from a file path, or updates the content of an - existing blob, with automatic chunking and progress notifications. - Empty chunks are skipped, while non-emtpy ones(even if only partly filled) are uploaded. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param str file_path: - Path of the file to upload as the blob content. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each page of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param premium_page_blob_tier: - A page blob tier value to set the blob to. The tier correlates to the size of the - blob and number of allowed IOPS. This is only applicable to page blobs on - premium storage accounts. - :return: ETag and last modified properties for the Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('file_path', file_path) - - count = path.getsize(file_path) - with open(file_path, 'rb') as stream: - return self.create_blob_from_stream( - container_name=container_name, - blob_name=blob_name, - stream=stream, - count=count, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - progress_callback=progress_callback, - max_connections=max_connections, - lease_id=lease_id, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - premium_page_blob_tier=premium_page_blob_tier) - - def create_blob_from_stream( - self, container_name, blob_name, stream, count, content_settings=None, - metadata=None, validate_content=False, progress_callback=None, - max_connections=2, lease_id=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None, - premium_page_blob_tier=None): - ''' - Creates a new blob from a file/stream, or updates the content of an - existing blob, with automatic chunking and progress notifications. - Empty chunks are skipped, while non-emtpy ones(even if only partly filled) are uploaded. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param io.IOBase stream: - Opened file/stream to upload as the blob content. - :param int count: - Number of bytes to read from the stream. This is required, a page - blob cannot be created if the count is unknown. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set the blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each page of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use. Note that parallel upload - requires the stream to be seekable. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param premium_page_blob_tier: - A page blob tier value to set the blob to. The tier correlates to the size of the - blob and number of allowed IOPS. This is only applicable to page blobs on - premium storage accounts. - :return: ETag and last modified properties for the Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('stream', stream) - _validate_not_none('count', count) - _validate_encryption_required(self.require_encryption, self.key_encryption_key) - - if count < 0: - raise ValueError(_ERROR_VALUE_NEGATIVE.format('count')) - - if count % _PAGE_ALIGNMENT != 0: - raise ValueError(_ERROR_PAGE_BLOB_SIZE_ALIGNMENT.format(count)) - - cek, iv, encryption_data = None, None, None - if self.key_encryption_key is not None: - cek, iv, encryption_data = _generate_blob_encryption_data(self.key_encryption_key) - - response = self._create_blob( - container_name=container_name, - blob_name=blob_name, - content_length=count, - content_settings=content_settings, - metadata=metadata, - lease_id=lease_id, - premium_page_blob_tier=premium_page_blob_tier, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - encryption_data=encryption_data - ) - - if count == 0: - return response - - # _upload_blob_chunks returns the block ids for block blobs so resource_properties - # is passed as a parameter to get the last_modified and etag for page and append blobs. - # this info is not needed for block_blobs since _put_block_list is called after which gets this info - resource_properties = ResourceProperties() - _upload_blob_chunks( - blob_service=self, - container_name=container_name, - blob_name=blob_name, - blob_size=count, - block_size=self.MAX_PAGE_SIZE, - stream=stream, - max_connections=max_connections, - progress_callback=progress_callback, - validate_content=validate_content, - lease_id=lease_id, - uploader_class=_PageBlobChunkUploader, - if_match=response.etag, - timeout=timeout, - content_encryption_key=cek, - initialization_vector=iv, - resource_properties=resource_properties - ) - - return resource_properties - - def create_blob_from_bytes( - self, container_name, blob_name, blob, index=0, count=None, - content_settings=None, metadata=None, validate_content=False, - progress_callback=None, max_connections=2, lease_id=None, - if_modified_since=None, if_unmodified_since=None, if_match=None, - if_none_match=None, timeout=None, premium_page_blob_tier=None): - ''' - Creates a new blob from an array of bytes, or updates the content - of an existing blob, with automatic chunking and progress - notifications. Empty chunks are skipped, while non-emtpy ones(even if only partly filled) are uploaded. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to create or update. - :param bytes blob: - Content of blob as an array of bytes. - :param int index: - Start index in the byte array. - :param int count: - Number of bytes to upload. Set to None or negative value to upload - all bytes starting from index. - :param ~azure.storage.blob.models.ContentSettings content_settings: - ContentSettings object used to set blob properties. - :param metadata: - Name-value pairs associated with the blob as metadata. - :type metadata: dict(str, str) - :param bool validate_content: - If true, calculates an MD5 hash for each page of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https as https (the default) will - already validate. Note that this MD5 hash is not stored with the - blob. - :param progress_callback: - Callback for progress with signature function(current, total) where - current is the number of bytes transfered so far, and total is the - size of the blob, or None if the total size is unknown. - :type progress_callback: func(current, total) - :param int max_connections: - Maximum number of parallel connections to use. - :param str lease_id: - Required if the blob has an active lease. - :param datetime if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. - :param datetime if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. - :param str if_match: - An ETag value, or the wildcard character (*). Specify this header to perform - the operation only if the resource's ETag matches the value specified. - :param str if_none_match: - An ETag value, or the wildcard character (*). Specify this header - to perform the operation only if the resource's ETag does not match - the value specified. Specify the wildcard character (*) to perform - the operation only if the resource does not exist, and fail the - operation if it does exist. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - :param premium_page_blob_tier: - A page blob tier value to set the blob to. The tier correlates to the size of the - blob and number of allowed IOPS. This is only applicable to page blobs on - premium storage accounts. - :return: ETag and last modified properties for the Page Blob - :rtype: :class:`~azure.storage.blob.models.ResourceProperties` - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('blob', blob) - _validate_type_bytes('blob', blob) - - if index < 0: - raise IndexError(_ERROR_VALUE_NEGATIVE.format('index')) - - if count is None or count < 0: - count = len(blob) - index - - stream = BytesIO(blob) - stream.seek(index) - - return self.create_blob_from_stream( - container_name=container_name, - blob_name=blob_name, - stream=stream, - count=count, - content_settings=content_settings, - metadata=metadata, - validate_content=validate_content, - lease_id=lease_id, - progress_callback=progress_callback, - max_connections=max_connections, - if_modified_since=if_modified_since, - if_unmodified_since=if_unmodified_since, - if_match=if_match, - if_none_match=if_none_match, - timeout=timeout, - premium_page_blob_tier=premium_page_blob_tier) - - def set_premium_page_blob_tier( - self, container_name, blob_name, premium_page_blob_tier, - timeout=None): - ''' - Sets the page blob tiers on the blob. This API is only supported for page blobs on premium accounts. - - :param str container_name: - Name of existing container. - :param str blob_name: - Name of blob to update. - :param PremiumPageBlobTier premium_page_blob_tier: - A page blob tier value to set the blob to. The tier correlates to the size of the - blob and number of allowed IOPS. This is only applicable to page blobs on - premium storage accounts. - :param int timeout: - The timeout parameter is expressed in seconds. This method may make - multiple calls to the Azure service and the timeout will apply to - each call individually. - ''' - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('premium_page_blob_tier', premium_page_blob_tier) - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'tier', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-access-tier': _to_str(premium_page_blob_tier) - } - - self._perform_request(request) - - def copy_blob(self, container_name, blob_name, copy_source, - metadata=None, - source_if_modified_since=None, - source_if_unmodified_since=None, - source_if_match=None, source_if_none_match=None, - destination_if_modified_since=None, - destination_if_unmodified_since=None, - destination_if_match=None, - destination_if_none_match=None, - destination_lease_id=None, - source_lease_id=None, timeout=None, - premium_page_blob_tier=None): - ''' - Copies a blob asynchronously. This operation returns a copy operation - properties object, including a copy ID you can use to check or abort the - copy operation. The Blob service copies blobs on a best-effort basis. - - The source blob for a copy operation must be a page blob. If the destination - blob already exists, it must be of the same blob type as the source blob. - Any existing destination blob will be overwritten. - The destination blob cannot be modified while a copy operation is in progress. - - When copying from a page blob, the Blob service creates a destination page - blob of the source blob's length, initially containing all zeroes. Then - the source page ranges are enumerated, and non-empty ranges are copied. - - If the tier on the source blob is larger than the tier being passed to this - copy operation or if the size of the blob exceeds the tier being passed to - this copy operation then the operation will fail. - - You can call get_blob_properties on the destination - blob to check the status of the copy operation. The final blob will be - committed when the copy completes. - - :param str container_name: - Name of the destination container. The container must exist. - :param str blob_name: - Name of the destination blob. If the destination blob exists, it will - be overwritten. Otherwise, it will be created. - :param str copy_source: - A URL of up to 2 KB in length that specifies an Azure file or blob. - The value should be URL-encoded as it would appear in a request URI. - If the source is in another account, the source must either be public - or must be authenticated via a shared access signature. If the source - is public, no authentication is required. - Examples: - https://myaccount.blob.core.windows.net/mycontainer/myblob - https://myaccount.blob.core.windows.net/mycontainer/myblob?snapshot= - https://otheraccount.blob.core.windows.net/mycontainer/myblob?sastoken - :param metadata: - Name-value pairs associated with the blob as metadata. If no name-value - pairs are specified, the operation will copy the metadata from the - source blob or file to the destination blob. If one or more name-value - pairs are specified, the destination blob is created with the specified - metadata, and metadata is not copied from the source blob or file. - :type metadata: dict(str, str). - :param datetime source_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source - blob has been modified since the specified date/time. - :param datetime source_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source blob - has not been modified since the specified date/time. - :param ETag source_if_match: - An ETag value, or the wildcard character (*). Specify this conditional - header to copy the source blob only if its ETag matches the value - specified. If the ETag values do not match, the Blob service returns - status code 412 (Precondition Failed). This header cannot be specified - if the source is an Azure File. - :param ETag source_if_none_match: - An ETag value, or the wildcard character (*). Specify this conditional - header to copy the blob only if its ETag does not match the value - specified. If the values are identical, the Blob service returns status - code 412 (Precondition Failed). This header cannot be specified if the - source is an Azure File. - :param datetime destination_if_modified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has been modified since the specified date/time. - If the destination blob has not been modified, the Blob service returns - status code 412 (Precondition Failed). - :param datetime destination_if_unmodified_since: - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only - if the destination blob has not been modified since the specified - date/time. If the destination blob has been modified, the Blob service - returns status code 412 (Precondition Failed). - :param ETag destination_if_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - matches the ETag value for an existing destination blob. If the ETag for - the destination blob does not match the ETag specified for If-Match, the - Blob service returns status code 412 (Precondition Failed). - :param ETag destination_if_none_match: - An ETag value, or the wildcard character (*). Specify an ETag value for - this conditional header to copy the blob only if the specified ETag value - does not match the ETag value for the destination blob. Specify the wildcard - character (*) to perform the operation only if the destination blob does not - exist. If the specified condition isn't met, the Blob service returns status - code 412 (Precondition Failed). - :param str destination_lease_id: - The lease ID specified for this header must match the lease ID of the - destination blob. If the request does not include the lease ID or it is not - valid, the operation fails with status code 412 (Precondition Failed). - :param str source_lease_id: - Specify this to perform the Copy Blob operation only if - the lease ID given matches the active lease ID of the source blob. - :param int timeout: - The timeout parameter is expressed in seconds. - :param PageBlobTier premium_page_blob_tier: - A page blob tier value to set on the destination blob. The tier correlates to - the size of the blob and number of allowed IOPS. This is only applicable to - page blobs on premium storage accounts. - If the tier on the source blob is larger than the tier being passed to this - copy operation or if the size of the blob exceeds the tier being passed to - this copy operation then the operation will fail. - :return: Copy operation properties such as status, source, and ID. - :rtype: :class:`~azure.storage.blob.models.CopyProperties` - ''' - return self._copy_blob(container_name, blob_name, copy_source, - metadata, premium_page_blob_tier, - source_if_modified_since, source_if_unmodified_since, - source_if_match, source_if_none_match, - destination_if_modified_since, - destination_if_unmodified_since, - destination_if_match, - destination_if_none_match, - destination_lease_id, - source_lease_id, timeout, - False) - - # -----Helper methods----------------------------------------------------- - - def _create_blob( - self, container_name, blob_name, content_length, content_settings=None, - sequence_number=None, metadata=None, lease_id=None, premium_page_blob_tier=None, if_modified_since=None, - if_unmodified_since=None, if_match=None, if_none_match=None, timeout=None, - encryption_data=None): - ''' - See create_blob for more details. This helper method - allows for encryption or other such special behavior because - it is safely handled by the library. These behaviors are - prohibited in the public version of this function. - :param str encryption_data: - The JSON formatted encryption metadata to upload as a part of the blob. - This should only be passed internally from other methods and only applied - when uploading entire blob contents immediately follows creation of the blob. - ''' - - _validate_not_none('container_name', container_name) - _validate_not_none('blob_name', blob_name) - _validate_not_none('content_length', content_length) - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = {'timeout': _int_to_str(timeout)} - request.headers = { - 'x-ms-blob-type': _to_str(self.blob_type), - 'x-ms-blob-content-length': _to_str(content_length), - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-blob-sequence-number': _to_str(sequence_number), - 'x-ms-access-tier': _to_str(premium_page_blob_tier), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - _add_metadata_headers(metadata, request) - if content_settings is not None: - request.headers.update(content_settings._to_headers()) - - if encryption_data is not None: - request.headers['x-ms-meta-encryptiondata'] = encryption_data - - return self._perform_request(request, _parse_base_properties) - - def _update_page( - self, container_name, blob_name, page, start_range, end_range, - validate_content=False, lease_id=None, if_sequence_number_lte=None, - if_sequence_number_lt=None, if_sequence_number_eq=None, - if_modified_since=None, if_unmodified_since=None, - if_match=None, if_none_match=None, timeout=None): - ''' - See update_page for more details. This helper method - allows for encryption or other such special behavior because - it is safely handled by the library. These behaviors are - prohibited in the public version of this function. - ''' - - request = HTTPRequest() - request.method = 'PUT' - request.host_locations = self._get_host_locations() - request.path = _get_path(container_name, blob_name) - request.query = { - 'comp': 'page', - 'timeout': _int_to_str(timeout), - } - request.headers = { - 'x-ms-page-write': 'update', - 'x-ms-lease-id': _to_str(lease_id), - 'x-ms-if-sequence-number-le': _to_str(if_sequence_number_lte), - 'x-ms-if-sequence-number-lt': _to_str(if_sequence_number_lt), - 'x-ms-if-sequence-number-eq': _to_str(if_sequence_number_eq), - 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), - 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), - 'If-Match': _to_str(if_match), - 'If-None-Match': _to_str(if_none_match) - } - _validate_and_format_range_headers( - request, - start_range, - end_range, - align_to_page=True) - request.body = _get_data_bytes_only('page', page) - - if validate_content: - computed_md5 = _get_content_md5(request.body) - request.headers['Content-MD5'] = _to_str(computed_md5) - - return self._perform_request(request, _parse_page_properties) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/sharedaccesssignature.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/sharedaccesssignature.py deleted file mode 100644 index a9538c9d65ba..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/blob/sharedaccesssignature.py +++ /dev/null @@ -1,275 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -from ..common.sharedaccesssignature import ( - SharedAccessSignature, - _SharedAccessHelper, - _QueryStringConstants, - _sign_string, -) -from ._constants import X_MS_VERSION -from ..common._serialization import ( - url_quote, -) - - -class BlobSharedAccessSignature(SharedAccessSignature): - ''' - Provides a factory for creating blob and container access - signature tokens with a common account name and account key. Users can either - use the factory or can construct the appropriate service and use the - generate_*_shared_access_signature method directly. - ''' - - def __init__(self, account_name, account_key=None, user_delegation_key=None): - ''' - :param str account_name: - The storage account name used to generate the shared access signatures. - :param str account_key: - The access key to generate the shares access signatures. - :param ~azure.storage.blob.models.UserDelegationKey user_delegation_key: - Instead of an account key, the user could pass in a user delegation key. - A user delegation key can be obtained from the service by authenticating with an AAD identity; - this can be accomplished by calling get_user_delegation_key on any Blob service object. - ''' - super(BlobSharedAccessSignature, self).__init__(account_name, account_key, x_ms_version=X_MS_VERSION) - self.user_delegation_key = user_delegation_key - - def generate_blob(self, container_name, blob_name, snapshot=None, permission=None, - expiry=None, start=None, id=None, ip=None, protocol=None, - cache_control=None, content_disposition=None, - content_encoding=None, content_language=None, - content_type=None): - ''' - Generates a shared access signature for the blob or one of its snapshots. - Use the returned signature with the sas_token parameter of any BlobService. - - :param str container_name: - Name of container. - :param str blob_name: - Name of blob. - :param str snapshot: - The snapshot parameter is an opaque DateTime value that, - when present, specifies the blob snapshot to grant permission. - :param BlobPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Permissions must be ordered read, write, delete, list. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str id: - A unique value up to 64 characters in length that correlates to a - stored access policy. To create a stored access policy, use - set_blob_service_properties. - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. The default value - is https,http. See :class:`~azure.storage.common.models.Protocol` for possible values. - :param str cache_control: - Response header value for Cache-Control when resource is accessed - using this shared access signature. - :param str content_disposition: - Response header value for Content-Disposition when resource is accessed - using this shared access signature. - :param str content_encoding: - Response header value for Content-Encoding when resource is accessed - using this shared access signature. - :param str content_language: - Response header value for Content-Language when resource is accessed - using this shared access signature. - :param str content_type: - Response header value for Content-Type when resource is accessed - using this shared access signature. - ''' - resource_path = container_name + '/' + blob_name - - sas = _BlobSharedAccessHelper() - sas.add_base(permission, expiry, start, ip, protocol, self.x_ms_version) - sas.add_id(id) - sas.add_resource('b' if snapshot is None else 'bs') - sas.add_timestamp(snapshot) - sas.add_override_response_headers(cache_control, content_disposition, - content_encoding, content_language, - content_type) - sas.add_resource_signature(self.account_name, self.account_key, resource_path, - user_delegation_key=self.user_delegation_key) - - return sas.get_token() - - def generate_container(self, container_name, permission=None, expiry=None, - start=None, id=None, ip=None, protocol=None, - cache_control=None, content_disposition=None, - content_encoding=None, content_language=None, - content_type=None): - ''' - Generates a shared access signature for the container. - Use the returned signature with the sas_token parameter of any BlobService. - - :param str container_name: - Name of container. - :param ContainerPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Permissions must be ordered read, write, delete, list. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str id: - A unique value up to 64 characters in length that correlates to a - stored access policy. To create a stored access policy, use - set_blob_service_properties. - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. The default value - is https,http. See :class:`~azure.storage.common.models.Protocol` for possible values. - :param str cache_control: - Response header value for Cache-Control when resource is accessed - using this shared access signature. - :param str content_disposition: - Response header value for Content-Disposition when resource is accessed - using this shared access signature. - :param str content_encoding: - Response header value for Content-Encoding when resource is accessed - using this shared access signature. - :param str content_language: - Response header value for Content-Language when resource is accessed - using this shared access signature. - :param str content_type: - Response header value for Content-Type when resource is accessed - using this shared access signature. - ''' - sas = _BlobSharedAccessHelper() - sas.add_base(permission, expiry, start, ip, protocol, self.x_ms_version) - sas.add_id(id) - sas.add_resource('c') - sas.add_override_response_headers(cache_control, content_disposition, - content_encoding, content_language, - content_type) - sas.add_resource_signature(self.account_name, self.account_key, container_name, - user_delegation_key=self.user_delegation_key) - return sas.get_token() - - -class _BlobQueryStringConstants(_QueryStringConstants): - SIGNED_TIMESTAMP = 'snapshot' - SIGNED_OID = 'skoid' - SIGNED_TID = 'sktid' - SIGNED_KEY_START = 'skt' - SIGNED_KEY_EXPIRY = 'ske' - SIGNED_KEY_SERVICE = 'sks' - SIGNED_KEY_VERSION = 'skv' - - -class _BlobSharedAccessHelper(_SharedAccessHelper): - def __init__(self): - super(_BlobSharedAccessHelper, self).__init__() - - def add_timestamp(self, timestamp): - self._add_query(_BlobQueryStringConstants.SIGNED_TIMESTAMP, timestamp) - - def get_value_to_append(self, query): - return_value = self.query_dict.get(query) or '' - return return_value + '\n' - - def add_resource_signature(self, account_name, account_key, path, user_delegation_key=None): - if path[0] != '/': - path = '/' + path - - canonicalized_resource = '/blob/' + account_name + path + '\n' - - # Form the string to sign from shared_access_policy and canonicalized - # resource. The order of values is important. - string_to_sign = \ - (self.get_value_to_append(_BlobQueryStringConstants.SIGNED_PERMISSION) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_START) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_EXPIRY) + - canonicalized_resource) - - if user_delegation_key is not None: - self._add_query(_BlobQueryStringConstants.SIGNED_OID, user_delegation_key.signed_oid) - self._add_query(_BlobQueryStringConstants.SIGNED_TID, user_delegation_key.signed_tid) - self._add_query(_BlobQueryStringConstants.SIGNED_KEY_START, user_delegation_key.signed_start) - self._add_query(_BlobQueryStringConstants.SIGNED_KEY_EXPIRY, user_delegation_key.signed_expiry) - self._add_query(_BlobQueryStringConstants.SIGNED_KEY_SERVICE, user_delegation_key.signed_service) - self._add_query(_BlobQueryStringConstants.SIGNED_KEY_VERSION, user_delegation_key.signed_version) - - string_to_sign += \ - (self.get_value_to_append(_BlobQueryStringConstants.SIGNED_OID) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_TID) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_KEY_START) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_KEY_EXPIRY) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_KEY_SERVICE) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_KEY_VERSION)) - else: - string_to_sign += self.get_value_to_append(_BlobQueryStringConstants.SIGNED_IDENTIFIER) - - string_to_sign += \ - (self.get_value_to_append(_BlobQueryStringConstants.SIGNED_IP) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_PROTOCOL) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_VERSION) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_RESOURCE) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_TIMESTAMP) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_CACHE_CONTROL) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_CONTENT_DISPOSITION) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_CONTENT_ENCODING) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_CONTENT_LANGUAGE) + - self.get_value_to_append(_BlobQueryStringConstants.SIGNED_CONTENT_TYPE)) - - # remove the trailing newline - if string_to_sign[-1] == '\n': - string_to_sign = string_to_sign[:-1] - - self._add_query(_BlobQueryStringConstants.SIGNED_SIGNATURE, - _sign_string(account_key if user_delegation_key is None else user_delegation_key.value, - string_to_sign)) - - def get_token(self): - # a conscious decision was made to exclude the timestamp in the generated token - # this is to avoid having two snapshot ids in the query parameters when the user appends the snapshot timestamp - exclude = [_BlobQueryStringConstants.SIGNED_TIMESTAMP] - return '&'.join(['{0}={1}'.format(n, url_quote(v)) - for n, v in self.query_dict.items() if v is not None and n not in exclude]) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/__init__.py deleted file mode 100644 index a646e3811588..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from ._constants import ( - __author__, - __version__, - DEFAULT_X_MS_VERSION, -) -from .cloudstorageaccount import CloudStorageAccount -from .models import ( - RetentionPolicy, - Logging, - Metrics, - CorsRule, - DeleteRetentionPolicy, - StaticWebsite, - ServiceProperties, - AccessPolicy, - ResourceTypes, - Services, - AccountPermissions, - Protocol, - ServiceStats, - GeoReplication, - LocationMode, - RetryContext, -) -from .retry import ( - ExponentialRetry, - LinearRetry, - no_retry, -) -from .sharedaccesssignature import ( - SharedAccessSignature, -) -from .tokencredential import TokenCredential -from ._error import AzureSigningError diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_auth.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_auth.py deleted file mode 100644 index 13940f97b6f7..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_auth.py +++ /dev/null @@ -1,129 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from ._common_conversion import ( - _sign_string, -) -from ._constants import ( - DEV_ACCOUNT_NAME, - DEV_ACCOUNT_SECONDARY_NAME -) -import sys -if sys.version_info >= (3,): - from urllib.parse import parse_qsl -else: - from urlparse import parse_qsl - - -import logging -logger = logging.getLogger(__name__) - -from ._error import ( - AzureSigningError, - _wrap_exception, -) - - -class _StorageSharedKeyAuthentication(object): - def __init__(self, account_name, account_key, is_emulated=False): - self.account_name = account_name - self.account_key = account_key - self.is_emulated = is_emulated - - def _get_headers(self, request, headers_to_sign): - headers = dict((name.lower(), value) for name, value in request.headers.items() if value) - if 'content-length' in headers and headers['content-length'] == '0': - del headers['content-length'] - return '\n'.join(headers.get(x, '') for x in headers_to_sign) + '\n' - - def _get_verb(self, request): - return request.method + '\n' - - def _get_canonicalized_resource(self, request): - uri_path = request.path.split('?')[0] - - # for emulator, use the DEV_ACCOUNT_NAME instead of DEV_ACCOUNT_SECONDARY_NAME - # as this is how the emulator works - if self.is_emulated and uri_path.find(DEV_ACCOUNT_SECONDARY_NAME) == 1: - # only replace the first instance - uri_path = uri_path.replace(DEV_ACCOUNT_SECONDARY_NAME, DEV_ACCOUNT_NAME, 1) - - return '/' + self.account_name + uri_path - - def _get_canonicalized_headers(self, request): - string_to_sign = '' - x_ms_headers = [] - for name, value in request.headers.items(): - if name.startswith('x-ms-'): - x_ms_headers.append((name.lower(), value)) - x_ms_headers.sort() - for name, value in x_ms_headers: - if value is not None: - string_to_sign += ''.join([name, ':', value, '\n']) - return string_to_sign - - def _add_authorization_header(self, request, string_to_sign): - try: - signature = _sign_string(self.account_key, string_to_sign) - auth_string = 'SharedKey ' + self.account_name + ':' + signature - request.headers['Authorization'] = auth_string - except Exception as ex: - # Wrap any error that occurred as signing error - # Doing so will clarify/locate the source of problem - raise _wrap_exception(ex, AzureSigningError) - - -class _StorageSharedKeyAuthentication(_StorageSharedKeyAuthentication): - def sign_request(self, request): - string_to_sign = \ - self._get_verb(request) + \ - self._get_headers( - request, - [ - 'content-encoding', 'content-language', 'content-length', - 'content-md5', 'content-type', 'date', 'if-modified-since', - 'if-match', 'if-none-match', 'if-unmodified-since', 'byte_range' - ] - ) + \ - self._get_canonicalized_headers(request) + \ - self._get_canonicalized_resource(request) + \ - self._get_canonicalized_resource_query(request) - - self._add_authorization_header(request, string_to_sign) - logger.debug("String_to_sign=%s", string_to_sign) - - def _get_canonicalized_resource_query(self, request): - sorted_queries = [(name, value) for name, value in request.query.items()] - sorted_queries.sort() - - string_to_sign = '' - for name, value in sorted_queries: - if value is not None: - string_to_sign += '\n' + name.lower() + ':' + value - - return string_to_sign - - -class _StorageNoAuthentication(object): - def sign_request(self, request): - pass - - -class _StorageSASAuthentication(object): - def __init__(self, sas_token): - # ignore ?-prefix (added by tools such as Azure Portal) on sas tokens - # doing so avoids double question marks when signing - if sas_token[0] == '?': - sas_token = sas_token[1:] - - self.sas_qs = parse_qsl(sas_token) - - def sign_request(self, request): - # if 'sig' is present, then the request has already been signed - # as is the case when performing retries - if 'sig' in request.query: - return - - request.query.update(self.sas_qs) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_common_conversion.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_common_conversion.py deleted file mode 100644 index 8b50afbe1afb..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_common_conversion.py +++ /dev/null @@ -1,126 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -import base64 -import hashlib -import hmac -import sys -from io import (SEEK_SET) - -from dateutil.tz import tzutc - -from ._error import ( - _ERROR_VALUE_SHOULD_BE_BYTES_OR_STREAM, - _ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM, -) -from .models import ( - _unicode_type, -) - -if sys.version_info < (3,): - def _str(value): - if isinstance(value, unicode): - return value.encode('utf-8') - - return str(value) -else: - _str = str - - -def _to_str(value): - return _str(value) if value is not None else None - - -def _int_to_str(value): - return str(int(value)) if value is not None else None - - -def _bool_to_str(value): - if value is None: - return None - - if isinstance(value, bool): - if value: - return 'true' - else: - return 'false' - - return str(value) - - -def _to_utc_datetime(value): - return value.strftime('%Y-%m-%dT%H:%M:%SZ') - - -def _datetime_to_utc_string(value): - # Azure expects the date value passed in to be UTC. - # Azure will always return values as UTC. - # If a date is passed in without timezone info, it is assumed to be UTC. - if value is None: - return None - - if value.tzinfo: - value = value.astimezone(tzutc()) - - return value.strftime('%a, %d %b %Y %H:%M:%S GMT') - - -def _encode_base64(data): - if isinstance(data, _unicode_type): - data = data.encode('utf-8') - encoded = base64.b64encode(data) - return encoded.decode('utf-8') - - -def _decode_base64_to_bytes(data): - if isinstance(data, _unicode_type): - data = data.encode('utf-8') - return base64.b64decode(data) - - -def _decode_base64_to_text(data): - decoded_bytes = _decode_base64_to_bytes(data) - return decoded_bytes.decode('utf-8') - - -def _sign_string(key, string_to_sign, key_is_base64=True): - if key_is_base64: - key = _decode_base64_to_bytes(key) - else: - if isinstance(key, _unicode_type): - key = key.encode('utf-8') - if isinstance(string_to_sign, _unicode_type): - string_to_sign = string_to_sign.encode('utf-8') - signed_hmac_sha256 = hmac.HMAC(key, string_to_sign, hashlib.sha256) - digest = signed_hmac_sha256.digest() - encoded_digest = _encode_base64(digest) - return encoded_digest - - -def _get_content_md5(data): - md5 = hashlib.md5() - if isinstance(data, bytes): - md5.update(data) - elif hasattr(data, 'read'): - pos = 0 - try: - pos = data.tell() - except: - pass - for chunk in iter(lambda: data.read(4096), b""): - md5.update(chunk) - try: - data.seek(pos, SEEK_SET) - except (AttributeError, IOError): - raise ValueError(_ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM.format('data')) - else: - raise ValueError(_ERROR_VALUE_SHOULD_BE_BYTES_OR_STREAM.format('data')) - - return base64.b64encode(md5.digest()).decode('utf-8') - - -def _lower(text): - return text.lower() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_connection.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_connection.py deleted file mode 100644 index 6836cf91fad6..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_connection.py +++ /dev/null @@ -1,161 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import sys - -if sys.version_info >= (3,): - from urllib.parse import urlparse -else: - from urlparse import urlparse - -from ._constants import ( - SERVICE_HOST_BASE, - DEFAULT_PROTOCOL, - DEV_ACCOUNT_NAME, - DEV_ACCOUNT_SECONDARY_NAME, - DEV_ACCOUNT_KEY, - DEV_BLOB_HOST, - DEV_QUEUE_HOST, -) -from ._error import ( - _ERROR_STORAGE_MISSING_INFO, -) - -_EMULATOR_ENDPOINTS = { - 'blob': DEV_BLOB_HOST, - 'queue': DEV_QUEUE_HOST, - 'file': '', -} - -_CONNECTION_ENDPOINTS = { - 'blob': 'BlobEndpoint', - 'queue': 'QueueEndpoint', - 'file': 'FileEndpoint', -} - -_CONNECTION_ENDPOINTS_SECONDARY = { - 'blob': 'BlobSecondaryEndpoint', - 'queue': 'QueueSecondaryEndpoint', - 'file': 'FileSecondaryEndpoint', -} - - -class _ServiceParameters(object): - def __init__(self, service, account_name=None, account_key=None, sas_token=None, token_credential=None, - is_emulated=False, protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, - custom_domain=None, custom_domain_secondary=None): - - self.account_name = account_name - self.account_key = account_key - self.sas_token = sas_token - self.token_credential = token_credential - self.protocol = protocol or DEFAULT_PROTOCOL - self.is_emulated = is_emulated - - if is_emulated: - self.account_name = DEV_ACCOUNT_NAME - self.protocol = 'http' - - # Only set the account key if a sas_token is not present to allow sas to be used with the emulator - self.account_key = DEV_ACCOUNT_KEY if not self.sas_token else None - emulator_endpoint = _EMULATOR_ENDPOINTS[service] if custom_domain is None else custom_domain - - self.primary_endpoint = '{}/{}'.format(emulator_endpoint, DEV_ACCOUNT_NAME) - self.secondary_endpoint = '{}/{}'.format(emulator_endpoint, DEV_ACCOUNT_SECONDARY_NAME) - else: - # Strip whitespace from the key - if self.account_key: - self.account_key = self.account_key.strip() - - endpoint_suffix = endpoint_suffix or SERVICE_HOST_BASE - - # Setup the primary endpoint - if custom_domain: - parsed_url = urlparse(custom_domain) - - # Trim any trailing slashes from the path - path = parsed_url.path.rstrip('/') - - self.primary_endpoint = parsed_url.netloc + path - self.protocol = self.protocol if parsed_url.scheme is '' else parsed_url.scheme - else: - if not self.account_name: - raise ValueError(_ERROR_STORAGE_MISSING_INFO) - self.primary_endpoint = '{}.{}.{}'.format(self.account_name, service, endpoint_suffix) - - # Setup the secondary endpoint - if custom_domain_secondary: - if not custom_domain: - raise ValueError(_ERROR_STORAGE_MISSING_INFO) - - parsed_url = urlparse(custom_domain_secondary) - - # Trim any trailing slashes from the path - path = parsed_url.path.rstrip('/') - - self.secondary_endpoint = parsed_url.netloc + path - else: - if self.account_name: - self.secondary_endpoint = '{}-secondary.{}.{}'.format(self.account_name, service, endpoint_suffix) - else: - self.secondary_endpoint = None - - @staticmethod - def get_service_parameters(service, account_name=None, account_key=None, sas_token=None, token_credential= None, - is_emulated=None, protocol=None, endpoint_suffix=None, custom_domain=None, - request_session=None, connection_string=None, socket_timeout=None): - if connection_string: - params = _ServiceParameters._from_connection_string(connection_string, service) - elif is_emulated: - params = _ServiceParameters(service, is_emulated=True, custom_domain=custom_domain) - elif account_name: - if protocol.lower() != 'https' and token_credential is not None: - raise ValueError("Token credential is only supported with HTTPS.") - params = _ServiceParameters(service, - account_name=account_name, - account_key=account_key, - sas_token=sas_token, - token_credential=token_credential, - is_emulated=is_emulated, - protocol=protocol, - endpoint_suffix=endpoint_suffix, - custom_domain=custom_domain) - else: - raise ValueError(_ERROR_STORAGE_MISSING_INFO) - - params.request_session = request_session - params.socket_timeout = socket_timeout - return params - - @staticmethod - def _from_connection_string(connection_string, service): - # Split into key=value pairs removing empties, then split the pairs into a dict - config = dict(s.split('=', 1) for s in connection_string.split(';') if s) - - # Authentication - account_name = config.get('AccountName') - account_key = config.get('AccountKey') - sas_token = config.get('SharedAccessSignature') - - # Emulator - is_emulated = config.get('UseDevelopmentStorage') - - # Basic URL Configuration - protocol = config.get('DefaultEndpointsProtocol') - endpoint_suffix = config.get('EndpointSuffix') - - # Custom URLs - endpoint = config.get(_CONNECTION_ENDPOINTS[service]) - endpoint_secondary = config.get(_CONNECTION_ENDPOINTS_SECONDARY[service]) - - return _ServiceParameters(service, - account_name=account_name, - account_key=account_key, - sas_token=sas_token, - is_emulated=is_emulated, - protocol=protocol, - endpoint_suffix=endpoint_suffix, - custom_domain=endpoint, - custom_domain_secondary=endpoint_secondary) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_constants.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_constants.py deleted file mode 100644 index 22d93b3a2cd6..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_constants.py +++ /dev/null @@ -1,51 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import platform -import sys - -__author__ = 'Microsoft Corp. ' -__version__ = '2.0.0' - -# UserAgent string sample: 'Azure-Storage/0.37.0-0.38.0 (Python CPython 3.4.2; Windows 8)' -# First version(0.37.0) is the common package, and the second version(0.38.0) is the service package -USER_AGENT_STRING_PREFIX = 'Azure-Storage/{}-'.format(__version__) -USER_AGENT_STRING_SUFFIX = '(Python {} {}; {} {})'.format(platform.python_implementation(), - platform.python_version(), platform.system(), - platform.release()) - -# default values for common package, in case it is used directly -DEFAULT_X_MS_VERSION = '2018-03-28' -DEFAULT_USER_AGENT_STRING = '{}None {}'.format(USER_AGENT_STRING_PREFIX, USER_AGENT_STRING_SUFFIX) - -# Live ServiceClient URLs -SERVICE_HOST_BASE = 'core.windows.net' -DEFAULT_PROTOCOL = 'https' - -# Development ServiceClient URLs -DEV_BLOB_HOST = '127.0.0.1:10000' -DEV_QUEUE_HOST = '127.0.0.1:10001' - -# Default credentials for Development Storage Service -DEV_ACCOUNT_NAME = 'devstoreaccount1' -DEV_ACCOUNT_SECONDARY_NAME = 'devstoreaccount1-secondary' -DEV_ACCOUNT_KEY = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==' - -# Socket timeout in seconds -DEFAULT_SOCKET_TIMEOUT = 20 - -# for python 3.5+, there was a change to the definition of the socket timeout (as far as socket.sendall is concerned) -# The socket timeout is now the maximum total duration to send all data. -if sys.version_info >= (3, 5): - # the timeout to connect is 20 seconds, and the read timeout is 2000 seconds - # the 2000 seconds was calculated with: 100MB (max block size)/ 50KB/s (an arbitrarily chosen minimum upload speed) - DEFAULT_SOCKET_TIMEOUT = (20, 2000) - -# Encryption constants -_ENCRYPTION_PROTOCOL_V1 = '1.0' - -_AUTHORIZATION_HEADER_NAME = 'Authorization' -_COPY_SOURCE_HEADER_NAME = 'x-ms-copy-source' -_REDACTED_VALUE = 'REDACTED' diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_deserialization.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_deserialization.py deleted file mode 100644 index 80803da3e438..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_deserialization.py +++ /dev/null @@ -1,384 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from dateutil import parser - -from ._common_conversion import _to_str - -try: - from xml.etree import cElementTree as ETree -except ImportError: - from xml.etree import ElementTree as ETree - -from .models import ( - ServiceProperties, - Logging, - Metrics, - CorsRule, - AccessPolicy, - _dict, - GeoReplication, - ServiceStats, - DeleteRetentionPolicy, - StaticWebsite, -) - - -def _to_int(value): - return value if value is None else int(value) - - -def _bool(value): - return value.lower() == 'true' - - -def _to_upper_str(value): - return _to_str(value).upper() if value is not None else None - - -def _get_download_size(start_range, end_range, resource_size): - if start_range is not None: - end_range = end_range if end_range else (resource_size if resource_size else None) - if end_range is not None: - return end_range - start_range - else: - return None - else: - return resource_size - - -GET_PROPERTIES_ATTRIBUTE_MAP = { - 'last-modified': (None, 'last_modified', parser.parse), - 'etag': (None, 'etag', _to_str), - 'x-ms-blob-type': (None, 'blob_type', _to_str), - 'content-length': (None, 'content_length', _to_int), - 'content-range': (None, 'content_range', _to_str), - 'x-ms-blob-sequence-number': (None, 'page_blob_sequence_number', _to_int), - 'x-ms-blob-committed-block-count': (None, 'append_blob_committed_block_count', _to_int), - 'x-ms-blob-public-access': (None, 'public_access', _to_str), - 'x-ms-access-tier': (None, 'blob_tier', _to_str), - 'x-ms-access-tier-change-time': (None, 'blob_tier_change_time', parser.parse), - 'x-ms-access-tier-inferred': (None, 'blob_tier_inferred', _bool), - 'x-ms-archive-status': (None, 'rehydration_status', _to_str), - 'x-ms-share-quota': (None, 'quota', _to_int), - 'x-ms-server-encrypted': (None, 'server_encrypted', _bool), - 'x-ms-creation-time': (None, 'creation_time', parser.parse), - 'content-type': ('content_settings', 'content_type', _to_str), - 'cache-control': ('content_settings', 'cache_control', _to_str), - 'content-encoding': ('content_settings', 'content_encoding', _to_str), - 'content-disposition': ('content_settings', 'content_disposition', _to_str), - 'content-language': ('content_settings', 'content_language', _to_str), - 'content-md5': ('content_settings', 'content_md5', _to_str), - 'x-ms-lease-status': ('lease', 'status', _to_str), - 'x-ms-lease-state': ('lease', 'state', _to_str), - 'x-ms-lease-duration': ('lease', 'duration', _to_str), - 'x-ms-copy-id': ('copy', 'id', _to_str), - 'x-ms-copy-source': ('copy', 'source', _to_str), - 'x-ms-copy-status': ('copy', 'status', _to_str), - 'x-ms-copy-progress': ('copy', 'progress', _to_str), - 'x-ms-copy-completion-time': ('copy', 'completion_time', parser.parse), - 'x-ms-copy-destination-snapshot': ('copy', 'destination_snapshot_time', _to_str), - 'x-ms-copy-status-description': ('copy', 'status_description', _to_str), - 'x-ms-has-immutability-policy': (None, 'has_immutability_policy', _bool), - 'x-ms-has-legal-hold': (None, 'has_legal_hold', _bool), -} - - -def _parse_metadata(response): - ''' - Extracts out resource metadata information. - ''' - - if response is None or response.headers is None: - return None - - metadata = _dict() - for key, value in response.headers.items(): - if key.lower().startswith('x-ms-meta-'): - metadata[key[10:]] = _to_str(value) - - return metadata - - -def _parse_properties(response, result_class): - ''' - Extracts out resource properties and metadata information. - Ignores the standard http headers. - ''' - - if response is None or response.headers is None: - return None - - props = result_class() - for key, value in response.headers.items(): - info = GET_PROPERTIES_ATTRIBUTE_MAP.get(key) - if info: - if info[0] is None: - setattr(props, info[1], info[2](value)) - else: - attr = getattr(props, info[0]) - setattr(attr, info[1], info[2](value)) - - if hasattr(props, 'blob_type') and props.blob_type == 'PageBlob' and hasattr(props, 'blob_tier') and props.blob_tier is not None: - props.blob_tier = _to_upper_str(props.blob_tier) - return props - - -def _parse_length_from_content_range(content_range): - ''' - Parses the blob length from the content range header: bytes 1-3/65537 - ''' - if content_range is None: - return None - - # First, split in space and take the second half: '1-3/65537' - # Next, split on slash and take the second half: '65537' - # Finally, convert to an int: 65537 - return int(content_range.split(' ', 1)[1].split('/', 1)[1]) - - -def _convert_xml_to_signed_identifiers(response): - ''' - - - - unique-value - - start-time - expiry-time - abbreviated-permission-list - - - - ''' - if response is None or response.body is None: - return None - - list_element = ETree.fromstring(response.body) - signed_identifiers = _dict() - - for signed_identifier_element in list_element.findall('SignedIdentifier'): - # Id element - id = signed_identifier_element.find('Id').text - - # Access policy element - access_policy = AccessPolicy() - access_policy_element = signed_identifier_element.find('AccessPolicy') - if access_policy_element is not None: - start_element = access_policy_element.find('Start') - if start_element is not None: - access_policy.start = parser.parse(start_element.text) - - expiry_element = access_policy_element.find('Expiry') - if expiry_element is not None: - access_policy.expiry = parser.parse(expiry_element.text) - - access_policy.permission = access_policy_element.findtext('Permission') - - signed_identifiers[id] = access_policy - - return signed_identifiers - - -def _convert_xml_to_service_stats(response): - ''' - - - - live|bootstrap|unavailable - sync-time| - - - ''' - if response is None or response.body is None: - return None - - service_stats_element = ETree.fromstring(response.body) - - geo_replication_element = service_stats_element.find('GeoReplication') - - geo_replication = GeoReplication() - geo_replication.status = geo_replication_element.find('Status').text - last_sync_time = geo_replication_element.find('LastSyncTime').text - geo_replication.last_sync_time = parser.parse(last_sync_time) if last_sync_time else None - - service_stats = ServiceStats() - service_stats.geo_replication = geo_replication - return service_stats - - -def _convert_xml_to_service_properties(response): - ''' - - - - version-number - true|false - true|false - true|false - - true|false - number-of-days - - - - version-number - true|false - true|false - - true|false - number-of-days - - - - version-number - true|false - true|false - - true|false - number-of-days - - - - - comma-separated-list-of-allowed-origins - comma-separated-list-of-HTTP-verb - max-caching-age-in-seconds - comma-seperated-list-of-response-headers - comma-seperated-list-of-request-headers - - - - true|false - number-of-days - - - true|false - - - - - ''' - if response is None or response.body is None: - return None - - service_properties_element = ETree.fromstring(response.body) - service_properties = ServiceProperties() - - # Logging - logging = service_properties_element.find('Logging') - if logging is not None: - service_properties.logging = Logging() - service_properties.logging.version = logging.find('Version').text - service_properties.logging.delete = _bool(logging.find('Delete').text) - service_properties.logging.read = _bool(logging.find('Read').text) - service_properties.logging.write = _bool(logging.find('Write').text) - - _convert_xml_to_retention_policy(logging.find('RetentionPolicy'), - service_properties.logging.retention_policy) - # HourMetrics - hour_metrics_element = service_properties_element.find('HourMetrics') - if hour_metrics_element is not None: - service_properties.hour_metrics = Metrics() - _convert_xml_to_metrics(hour_metrics_element, service_properties.hour_metrics) - - # MinuteMetrics - minute_metrics_element = service_properties_element.find('MinuteMetrics') - if minute_metrics_element is not None: - service_properties.minute_metrics = Metrics() - _convert_xml_to_metrics(minute_metrics_element, service_properties.minute_metrics) - - # CORS - cors = service_properties_element.find('Cors') - if cors is not None: - service_properties.cors = list() - for rule in cors.findall('CorsRule'): - allowed_origins = rule.find('AllowedOrigins').text.split(',') - - allowed_methods = rule.find('AllowedMethods').text.split(',') - - max_age_in_seconds = int(rule.find('MaxAgeInSeconds').text) - - cors_rule = CorsRule(allowed_origins, allowed_methods, max_age_in_seconds) - - exposed_headers = rule.find('ExposedHeaders').text - if exposed_headers is not None: - cors_rule.exposed_headers = exposed_headers.split(',') - - allowed_headers = rule.find('AllowedHeaders').text - if allowed_headers is not None: - cors_rule.allowed_headers = allowed_headers.split(',') - - service_properties.cors.append(cors_rule) - - # Target version - target_version = service_properties_element.find('DefaultServiceVersion') - if target_version is not None: - service_properties.target_version = target_version.text - - # DeleteRetentionPolicy - delete_retention_policy_element = service_properties_element.find('DeleteRetentionPolicy') - if delete_retention_policy_element is not None: - service_properties.delete_retention_policy = DeleteRetentionPolicy() - policy_enabled = _bool(delete_retention_policy_element.find('Enabled').text) - service_properties.delete_retention_policy.enabled = policy_enabled - - if policy_enabled: - service_properties.delete_retention_policy.days = int(delete_retention_policy_element.find('Days').text) - - # StaticWebsite - static_website_element = service_properties_element.find('StaticWebsite') - if static_website_element is not None: - service_properties.static_website = StaticWebsite() - service_properties.static_website.enabled = _bool(static_website_element.find('Enabled').text) - - index_document_element = static_website_element.find('IndexDocument') - if index_document_element is not None: - service_properties.static_website.index_document = index_document_element.text - - error_document_element = static_website_element.find('ErrorDocument404Path') - if error_document_element is not None: - service_properties.static_website.error_document_404_path = error_document_element.text - - return service_properties - - -def _convert_xml_to_metrics(xml, metrics): - ''' - version-number - true|false - true|false - - true|false - number-of-days - - ''' - # Version - metrics.version = xml.find('Version').text - - # Enabled - metrics.enabled = _bool(xml.find('Enabled').text) - - # IncludeAPIs - include_apis_element = xml.find('IncludeAPIs') - if include_apis_element is not None: - metrics.include_apis = _bool(include_apis_element.text) - - # RetentionPolicy - _convert_xml_to_retention_policy(xml.find('RetentionPolicy'), metrics.retention_policy) - - -def _convert_xml_to_retention_policy(xml, retention_policy): - ''' - true|false - number-of-days - ''' - # Enabled - retention_policy.enabled = _bool(xml.find('Enabled').text) - - # Days - days_element = xml.find('Days') - if days_element is not None: - retention_policy.days = int(days_element.text) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_encryption.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_encryption.py deleted file mode 100644 index cd7d92e66e0e..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_encryption.py +++ /dev/null @@ -1,233 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from collections import OrderedDict - -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives.ciphers import Cipher -from cryptography.hazmat.primitives.ciphers.algorithms import AES -from cryptography.hazmat.primitives.ciphers.modes import CBC - -from ._common_conversion import ( - _encode_base64, - _decode_base64_to_bytes, -) -from ._constants import ( - _ENCRYPTION_PROTOCOL_V1, - __version__, -) -from ._error import ( - _ERROR_UNSUPPORTED_ENCRYPTION_VERSION, - _validate_not_none, - _validate_encryption_protocol_version, - _validate_key_encryption_key_unwrap, - _validate_kek_id, -) - - -class _EncryptionAlgorithm(object): - ''' - Specifies which client encryption algorithm is used. - ''' - AES_CBC_256 = 'AES_CBC_256' - - -class _WrappedContentKey: - ''' - Represents the envelope key details stored on the service. - ''' - - def __init__(self, algorithm, encrypted_key, key_id): - ''' - :param str algorithm: - The algorithm used for wrapping. - :param bytes encrypted_key: - The encrypted content-encryption-key. - :param str key_id: - The key-encryption-key identifier string. - ''' - - _validate_not_none('algorithm', algorithm) - _validate_not_none('encrypted_key', encrypted_key) - _validate_not_none('key_id', key_id) - - self.algorithm = algorithm - self.encrypted_key = encrypted_key - self.key_id = key_id - - -class _EncryptionAgent: - ''' - Represents the encryption agent stored on the service. - It consists of the encryption protocol version and encryption algorithm used. - ''' - - def __init__(self, encryption_algorithm, protocol): - ''' - :param _EncryptionAlgorithm encryption_algorithm: - The algorithm used for encrypting the message contents. - :param str protocol: - The protocol version used for encryption. - ''' - - _validate_not_none('encryption_algorithm', encryption_algorithm) - _validate_not_none('protocol', protocol) - - self.encryption_algorithm = str(encryption_algorithm) - self.protocol = protocol - - -class _EncryptionData: - ''' - Represents the encryption data that is stored on the service. - ''' - - def __init__(self, content_encryption_IV, encryption_agent, wrapped_content_key, - key_wrapping_metadata): - ''' - :param bytes content_encryption_IV: - The content encryption initialization vector. - :param _EncryptionAgent encryption_agent: - The encryption agent. - :param _WrappedContentKey wrapped_content_key: - An object that stores the wrapping algorithm, the key identifier, - and the encrypted key bytes. - :param dict key_wrapping_metadata: - A dict containing metadata related to the key wrapping. - ''' - - _validate_not_none('content_encryption_IV', content_encryption_IV) - _validate_not_none('encryption_agent', encryption_agent) - _validate_not_none('wrapped_content_key', wrapped_content_key) - - self.content_encryption_IV = content_encryption_IV - self.encryption_agent = encryption_agent - self.wrapped_content_key = wrapped_content_key - self.key_wrapping_metadata = key_wrapping_metadata - - -def _generate_encryption_data_dict(kek, cek, iv): - ''' - Generates and returns the encryption metadata as a dict. - - :param object kek: The key encryption key. See calling functions for more information. - :param bytes cek: The content encryption key. - :param bytes iv: The initialization vector. - :return: A dict containing all the encryption metadata. - :rtype: dict - ''' - # Encrypt the cek. - wrapped_cek = kek.wrap_key(cek) - - # Build the encryption_data dict. - # Use OrderedDict to comply with Java's ordering requirement. - wrapped_content_key = OrderedDict() - wrapped_content_key['KeyId'] = kek.get_kid() - wrapped_content_key['EncryptedKey'] = _encode_base64(wrapped_cek) - wrapped_content_key['Algorithm'] = kek.get_key_wrap_algorithm() - - encryption_agent = OrderedDict() - encryption_agent['Protocol'] = _ENCRYPTION_PROTOCOL_V1 - encryption_agent['EncryptionAlgorithm'] = _EncryptionAlgorithm.AES_CBC_256 - - encryption_data_dict = OrderedDict() - encryption_data_dict['WrappedContentKey'] = wrapped_content_key - encryption_data_dict['EncryptionAgent'] = encryption_agent - encryption_data_dict['ContentEncryptionIV'] = _encode_base64(iv) - encryption_data_dict['KeyWrappingMetadata'] = {'EncryptionLibrary': 'Python ' + __version__} - - return encryption_data_dict - - -def _dict_to_encryption_data(encryption_data_dict): - ''' - Converts the specified dictionary to an EncryptionData object for - eventual use in decryption. - - :param dict encryption_data_dict: - The dictionary containing the encryption data. - :return: an _EncryptionData object built from the dictionary. - :rtype: _EncryptionData - ''' - try: - if encryption_data_dict['EncryptionAgent']['Protocol'] != _ENCRYPTION_PROTOCOL_V1: - raise ValueError(_ERROR_UNSUPPORTED_ENCRYPTION_VERSION) - except KeyError: - raise ValueError(_ERROR_UNSUPPORTED_ENCRYPTION_VERSION) - wrapped_content_key = encryption_data_dict['WrappedContentKey'] - wrapped_content_key = _WrappedContentKey(wrapped_content_key['Algorithm'], - _decode_base64_to_bytes(wrapped_content_key['EncryptedKey']), - wrapped_content_key['KeyId']) - - encryption_agent = encryption_data_dict['EncryptionAgent'] - encryption_agent = _EncryptionAgent(encryption_agent['EncryptionAlgorithm'], - encryption_agent['Protocol']) - - if 'KeyWrappingMetadata' in encryption_data_dict: - key_wrapping_metadata = encryption_data_dict['KeyWrappingMetadata'] - else: - key_wrapping_metadata = None - - encryption_data = _EncryptionData(_decode_base64_to_bytes(encryption_data_dict['ContentEncryptionIV']), - encryption_agent, - wrapped_content_key, - key_wrapping_metadata) - - return encryption_data - - -def _generate_AES_CBC_cipher(cek, iv): - ''' - Generates and returns an encryption cipher for AES CBC using the given cek and iv. - - :param bytes[] cek: The content encryption key for the cipher. - :param bytes[] iv: The initialization vector for the cipher. - :return: A cipher for encrypting in AES256 CBC. - :rtype: ~cryptography.hazmat.primitives.ciphers.Cipher - ''' - - backend = default_backend() - algorithm = AES(cek) - mode = CBC(iv) - return Cipher(algorithm, mode, backend) - - -def _validate_and_unwrap_cek(encryption_data, key_encryption_key=None, key_resolver=None): - ''' - Extracts and returns the content_encryption_key stored in the encryption_data object - and performs necessary validation on all parameters. - :param _EncryptionData encryption_data: - The encryption metadata of the retrieved value. - :param obj key_encryption_key: - The key_encryption_key used to unwrap the cek. Please refer to high-level service object - instance variables for more details. - :param func key_resolver: - A function used that, given a key_id, will return a key_encryption_key. Please refer - to high-level service object instance variables for more details. - :return: the content_encryption_key stored in the encryption_data object. - :rtype: bytes[] - ''' - - _validate_not_none('content_encryption_IV', encryption_data.content_encryption_IV) - _validate_not_none('encrypted_key', encryption_data.wrapped_content_key.encrypted_key) - - _validate_encryption_protocol_version(encryption_data.encryption_agent.protocol) - - content_encryption_key = None - - # If the resolver exists, give priority to the key it finds. - if key_resolver is not None: - key_encryption_key = key_resolver(encryption_data.wrapped_content_key.key_id) - - _validate_not_none('key_encryption_key', key_encryption_key) - _validate_key_encryption_key_unwrap(key_encryption_key) - _validate_kek_id(encryption_data.wrapped_content_key.key_id, key_encryption_key.get_kid()) - - # Will throw an exception if the specified algorithm is not supported. - content_encryption_key = key_encryption_key.unwrap_key(encryption_data.wrapped_content_key.encrypted_key, - encryption_data.wrapped_content_key.algorithm) - _validate_not_none('content_encryption_key', content_encryption_key) - - return content_encryption_key diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_error.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_error.py deleted file mode 100644 index 5c8e393197c9..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_error.py +++ /dev/null @@ -1,218 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from sys import version_info - -if version_info < (3,): - def _str(value): - if isinstance(value, unicode): - return value.encode('utf-8') - - return str(value) -else: - _str = str - - -def _to_str(value): - return _str(value) if value is not None else None - - -from azure.common import ( - AzureHttpError, - AzureConflictHttpError, - AzureMissingResourceHttpError, - AzureException, -) -from ._constants import ( - _ENCRYPTION_PROTOCOL_V1, -) - -_ERROR_CONFLICT = 'Conflict ({0})' -_ERROR_NOT_FOUND = 'Not found ({0})' -_ERROR_UNKNOWN = 'Unknown error ({0})' -_ERROR_STORAGE_MISSING_INFO = \ - 'You need to provide an account name and either an account_key or sas_token when creating a storage service.' -_ERROR_EMULATOR_DOES_NOT_SUPPORT_FILES = \ - 'The emulator does not support the file service.' -_ERROR_ACCESS_POLICY = \ - 'share_access_policy must be either SignedIdentifier or AccessPolicy ' + \ - 'instance' -_ERROR_PARALLEL_NOT_SEEKABLE = 'Parallel operations require a seekable stream.' -_ERROR_VALUE_SHOULD_BE_BYTES = '{0} should be of type bytes.' -_ERROR_VALUE_SHOULD_BE_BYTES_OR_STREAM = '{0} should be of type bytes or a readable file-like/io.IOBase stream object.' -_ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM = '{0} should be a seekable file-like/io.IOBase type stream object.' -_ERROR_VALUE_SHOULD_BE_STREAM = '{0} should be a file-like/io.IOBase type stream object with a read method.' -_ERROR_VALUE_NONE = '{0} should not be None.' -_ERROR_VALUE_NONE_OR_EMPTY = '{0} should not be None or empty.' -_ERROR_VALUE_NEGATIVE = '{0} should not be negative.' -_ERROR_START_END_NEEDED_FOR_MD5 = \ - 'Both end_range and start_range need to be specified ' + \ - 'for getting content MD5.' -_ERROR_RANGE_TOO_LARGE_FOR_MD5 = \ - 'Getting content MD5 for a range greater than 4MB ' + \ - 'is not supported.' -_ERROR_MD5_MISMATCH = \ - 'MD5 mismatch. Expected value is \'{0}\', computed value is \'{1}\'.' -_ERROR_TOO_MANY_ACCESS_POLICIES = \ - 'Too many access policies provided. The server does not support setting more than 5 access policies on a single resource.' -_ERROR_OBJECT_INVALID = \ - '{0} does not define a complete interface. Value of {1} is either missing or invalid.' -_ERROR_UNSUPPORTED_ENCRYPTION_VERSION = \ - 'Encryption version is not supported.' -_ERROR_DECRYPTION_FAILURE = \ - 'Decryption failed' -_ERROR_ENCRYPTION_REQUIRED = \ - 'Encryption required but no key was provided.' -_ERROR_DECRYPTION_REQUIRED = \ - 'Decryption required but neither key nor resolver was provided.' + \ - ' If you do not want to decypt, please do not set the require encryption flag.' -_ERROR_INVALID_KID = \ - 'Provided or resolved key-encryption-key does not match the id of key used to encrypt.' -_ERROR_UNSUPPORTED_ENCRYPTION_ALGORITHM = \ - 'Specified encryption algorithm is not supported.' -_ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION = 'The require_encryption flag is set, but encryption is not supported' + \ - ' for this method.' -_ERROR_UNKNOWN_KEY_WRAP_ALGORITHM = 'Unknown key wrap algorithm.' -_ERROR_DATA_NOT_ENCRYPTED = 'Encryption required, but received data does not contain appropriate metatadata.' + \ - 'Data was either not encrypted or metadata has been lost.' - - -def _dont_fail_on_exist(error): - ''' don't throw exception if the resource exists. - This is called by create_* APIs with fail_on_exist=False''' - if isinstance(error, AzureConflictHttpError): - return False - else: - raise error - - -def _dont_fail_not_exist(error): - ''' don't throw exception if the resource doesn't exist. - This is called by create_* APIs with fail_on_exist=False''' - if isinstance(error, AzureMissingResourceHttpError): - return False - else: - raise error - - -def _http_error_handler(http_error): - ''' Simple error handler for azure.''' - message = str(http_error) - error_code = None - - if 'x-ms-error-code' in http_error.respheader: - error_code = http_error.respheader['x-ms-error-code'] - message += ' ErrorCode: ' + error_code - - if http_error.respbody is not None: - message += '\n' + http_error.respbody.decode('utf-8-sig') - - ex = AzureHttpError(message, http_error.status) - ex.error_code = error_code - - raise ex - - -def _validate_type_bytes(param_name, param): - if not isinstance(param, bytes): - raise TypeError(_ERROR_VALUE_SHOULD_BE_BYTES.format(param_name)) - - -def _validate_type_bytes_or_stream(param_name, param): - if not (isinstance(param, bytes) or hasattr(param, 'read')): - raise TypeError(_ERROR_VALUE_SHOULD_BE_BYTES_OR_STREAM.format(param_name)) - - -def _validate_not_none(param_name, param): - if param is None: - raise ValueError(_ERROR_VALUE_NONE.format(param_name)) - - -def _validate_content_match(server_md5, computed_md5): - if server_md5 != computed_md5: - raise AzureException(_ERROR_MD5_MISMATCH.format(server_md5, computed_md5)) - - -def _validate_access_policies(identifiers): - if identifiers and len(identifiers) > 5: - raise AzureException(_ERROR_TOO_MANY_ACCESS_POLICIES) - - -def _validate_key_encryption_key_wrap(kek): - # Note that None is not callable and so will fail the second clause of each check. - if not hasattr(kek, 'wrap_key') or not callable(kek.wrap_key): - raise AttributeError(_ERROR_OBJECT_INVALID.format('key encryption key', 'wrap_key')) - if not hasattr(kek, 'get_kid') or not callable(kek.get_kid): - raise AttributeError(_ERROR_OBJECT_INVALID.format('key encryption key', 'get_kid')) - if not hasattr(kek, 'get_key_wrap_algorithm') or not callable(kek.get_key_wrap_algorithm): - raise AttributeError(_ERROR_OBJECT_INVALID.format('key encryption key', 'get_key_wrap_algorithm')) - - -def _validate_key_encryption_key_unwrap(kek): - if not hasattr(kek, 'get_kid') or not callable(kek.get_kid): - raise AttributeError(_ERROR_OBJECT_INVALID.format('key encryption key', 'get_kid')) - if not hasattr(kek, 'unwrap_key') or not callable(kek.unwrap_key): - raise AttributeError(_ERROR_OBJECT_INVALID.format('key encryption key', 'unwrap_key')) - - -def _validate_encryption_required(require_encryption, kek): - if require_encryption and (kek is None): - raise ValueError(_ERROR_ENCRYPTION_REQUIRED) - - -def _validate_decryption_required(require_encryption, kek, resolver): - if (require_encryption and (kek is None) and - (resolver is None)): - raise ValueError(_ERROR_DECRYPTION_REQUIRED) - - -def _validate_encryption_protocol_version(encryption_protocol): - if not (_ENCRYPTION_PROTOCOL_V1 == encryption_protocol): - raise ValueError(_ERROR_UNSUPPORTED_ENCRYPTION_VERSION) - - -def _validate_kek_id(kid, resolved_id): - if not (kid == resolved_id): - raise ValueError(_ERROR_INVALID_KID) - - -def _validate_encryption_unsupported(require_encryption, key_encryption_key): - if require_encryption or (key_encryption_key is not None): - raise ValueError(_ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) - - -def _validate_user_delegation_key(user_delegation_key): - _validate_not_none('user_delegation_key.signed_oid', user_delegation_key.signed_oid) - _validate_not_none('user_delegation_key.signed_tid', user_delegation_key.signed_tid) - _validate_not_none('user_delegation_key.signed_start', user_delegation_key.signed_start) - _validate_not_none('user_delegation_key.signed_expiry', user_delegation_key.signed_expiry) - _validate_not_none('user_delegation_key.signed_version', user_delegation_key.signed_version) - _validate_not_none('user_delegation_key.signed_service', user_delegation_key.signed_service) - _validate_not_none('user_delegation_key.value', user_delegation_key.value) - - -# wraps a given exception with the desired exception type -def _wrap_exception(ex, desired_type): - msg = "" - if len(ex.args) > 0: - msg = ex.args[0] - if version_info >= (3,): - # Automatic chaining in Python 3 means we keep the trace - return desired_type(msg) - else: - # There isn't a good solution in 2 for keeping the stack trace - # in general, or that will not result in an error in 3 - # However, we can keep the previous error type and message - # TODO: In the future we will log the trace - return desired_type('{}: {}'.format(ex.__class__.__name__, msg)) - - -class AzureSigningError(AzureException): - """ - Represents a fatal error when attempting to sign a request. - In general, the cause of this exception is user error. For example, the given account key is not valid. - Please visit https://docs.microsoft.com/en-us/azure/storage/common/storage-create-storage-account for more info. - """ - pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/__init__.py deleted file mode 100644 index 2990ec80abe0..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - - -class HTTPError(Exception): - ''' - Represents an HTTP Exception when response status code >= 300. - - :ivar int status: - the status code of the response - :ivar str message: - the message - :ivar list headers: - the returned headers, as a list of (name, value) pairs - :ivar bytes body: - the body of the response - ''' - - def __init__(self, status, message, respheader, respbody): - self.status = status - self.respheader = respheader - self.respbody = respbody - Exception.__init__(self, message) - - -class HTTPResponse(object): - ''' - Represents a response from an HTTP request. - - :ivar int status: - the status code of the response - :ivar str message: - the message - :ivar dict headers: - the returned headers - :ivar bytes body: - the body of the response - ''' - - def __init__(self, status, message, headers, body): - self.status = status - self.message = message - self.headers = headers - self.body = body - - -class HTTPRequest(object): - ''' - Represents an HTTP Request. - - :ivar str host: - the host name to connect to - :ivar str method: - the method to use to connect (string such as GET, POST, PUT, etc.) - :ivar str path: - the uri fragment - :ivar dict query: - query parameters - :ivar dict headers: - header values - :ivar bytes body: - the body of the request. - ''' - - def __init__(self): - self.host = '' - self.method = '' - self.path = '' - self.query = {} # list of (name, value) - self.headers = {} # list of (header name, header value) - self.body = '' diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/httpclient.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/httpclient.py deleted file mode 100644 index b5847660e296..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_http/httpclient.py +++ /dev/null @@ -1,107 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -import logging -from . import HTTPResponse -from .._serialization import _get_data_bytes_or_stream_only -logger = logging.getLogger(__name__) - - -class _HTTPClient(object): - ''' - Takes the request and sends it to cloud service and returns the response. - ''' - - def __init__(self, protocol=None, session=None, timeout=None): - ''' - :param str protocol: - http or https. - :param requests.Session session: - session object created with requests library (or compatible). - :param int timeout: - timeout for the http request, in seconds. - ''' - self.protocol = protocol - self.session = session - self.timeout = timeout - - # By default, requests adds an Accept:*/* and Accept-Encoding to the session, - # which causes issues with some Azure REST APIs. Removing these here gives us - # the flexibility to add it back on a case by case basis. - if 'Accept' in self.session.headers: - del self.session.headers['Accept'] - - if 'Accept-Encoding' in self.session.headers: - del self.session.headers['Accept-Encoding'] - - self.proxies = None - - def set_proxy(self, host, port, user, password): - ''' - Sets the proxy server host and port for the HTTP CONNECT Tunnelling. - - Note that we set the proxies directly on the request later on rather than - using the session object as requests has a bug where session proxy is ignored - in favor of environment proxy. So, auth will not work unless it is passed - directly when making the request as this overrides both. - - :param str host: - Address of the proxy. Ex: '192.168.0.100' - :param int port: - Port of the proxy. Ex: 6000 - :param str user: - User for proxy authorization. - :param str password: - Password for proxy authorization. - ''' - if user and password: - proxy_string = '{}:{}@{}:{}'.format(user, password, host, port) - else: - proxy_string = '{}:{}'.format(host, port) - - self.proxies = {'http': 'http://{}'.format(proxy_string), - 'https': 'https://{}'.format(proxy_string)} - - def perform_request(self, request): - ''' - Sends an HTTPRequest to Azure Storage and returns an HTTPResponse. If - the response code indicates an error, raise an HTTPError. - - :param HTTPRequest request: - The request to serialize and send. - :return: An HTTPResponse containing the parsed HTTP response. - :rtype: :class:`~azure.storage.common._http.HTTPResponse` - ''' - # Verify the body is in bytes or either a file-like/stream object - if request.body: - request.body = _get_data_bytes_or_stream_only('request.body', request.body) - - # Construct the URI - uri = self.protocol.lower() + '://' + request.host + request.path - - # Send the request - response = self.session.request(request.method, - uri, - params=request.query, - headers=request.headers, - data=request.body or None, - timeout=self.timeout, - proxies=self.proxies) - - # Parse the response - status = int(response.status_code) - response_headers = {} - for key, name in response.headers.items(): - # Preserve the case of metadata - if key.lower().startswith('x-ms-meta-'): - response_headers[key] = name - else: - response_headers[key.lower()] = name - - wrap = HTTPResponse(status, response.reason, response_headers, response.content) - response.close() - - return wrap diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_serialization.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_serialization.py deleted file mode 100644 index af27ce5b0089..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/_serialization.py +++ /dev/null @@ -1,371 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import sys -import uuid -from datetime import date -from io import (BytesIO, IOBase, SEEK_SET, SEEK_END, UnsupportedOperation) -from os import fstat -from time import time -from wsgiref.handlers import format_date_time - -from dateutil.tz import tzutc - -if sys.version_info >= (3,): - from urllib.parse import quote as url_quote -else: - from urllib2 import quote as url_quote - -try: - from xml.etree import cElementTree as ETree -except ImportError: - from xml.etree import ElementTree as ETree - -from ._error import ( - _ERROR_VALUE_SHOULD_BE_BYTES, - _ERROR_VALUE_SHOULD_BE_BYTES_OR_STREAM, - _ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM -) -from .models import ( - _unicode_type, -) -from ._common_conversion import ( - _str, -) - - -def _to_utc_datetime(value): - # Azure expects the date value passed in to be UTC. - # Azure will always return values as UTC. - # If a date is passed in without timezone info, it is assumed to be UTC. - if value.tzinfo: - value = value.astimezone(tzutc()) - return value.strftime('%Y-%m-%dT%H:%M:%SZ') - - -def _update_request(request, x_ms_version, user_agent_string): - # Verify body - if request.body: - request.body = _get_data_bytes_or_stream_only('request.body', request.body) - length = _len_plus(request.body) - - # only scenario where this case is plausible is if the stream object is not seekable. - if length is None: - raise ValueError(_ERROR_VALUE_SHOULD_BE_SEEKABLE_STREAM) - - # if it is PUT, POST, MERGE, DELETE, need to add content-length to header. - if request.method in ['PUT', 'POST', 'MERGE', 'DELETE']: - request.headers['Content-Length'] = str(length) - - # append addtional headers based on the service - request.headers['x-ms-version'] = x_ms_version - request.headers['User-Agent'] = user_agent_string - request.headers['x-ms-client-request-id'] = str(uuid.uuid1()) - - # If the host has a path component (ex local storage), move it - path = request.host.split('/', 1) - if len(path) == 2: - request.host = path[0] - request.path = '/{}{}'.format(path[1], request.path) - - # Encode and optionally add local storage prefix to path - request.path = url_quote(request.path, '/()$=\',~') - - -def _add_metadata_headers(metadata, request): - if metadata: - if not request.headers: - request.headers = {} - for name, value in metadata.items(): - request.headers['x-ms-meta-' + name] = value - - -def _add_date_header(request): - current_time = format_date_time(time()) - request.headers['x-ms-date'] = current_time - - -def _get_data_bytes_only(param_name, param_value): - '''Validates the request body passed in and converts it to bytes - if our policy allows it.''' - if param_value is None: - return b'' - - if isinstance(param_value, bytes): - return param_value - - raise TypeError(_ERROR_VALUE_SHOULD_BE_BYTES.format(param_name)) - - -def _get_data_bytes_or_stream_only(param_name, param_value): - '''Validates the request body passed in is a stream/file-like or bytes - object.''' - if param_value is None: - return b'' - - if isinstance(param_value, bytes) or hasattr(param_value, 'read'): - return param_value - - raise TypeError(_ERROR_VALUE_SHOULD_BE_BYTES_OR_STREAM.format(param_name)) - - -def _get_request_body(request_body): - '''Converts an object into a request body. If it's None - we'll return an empty string, if it's one of our objects it'll - convert it to XML and return it. Otherwise we just use the object - directly''' - if request_body is None: - return b'' - - if isinstance(request_body, bytes) or isinstance(request_body, IOBase): - return request_body - - if isinstance(request_body, _unicode_type): - return request_body.encode('utf-8') - - request_body = str(request_body) - if isinstance(request_body, _unicode_type): - return request_body.encode('utf-8') - - return request_body - - -def _convert_signed_identifiers_to_xml(signed_identifiers): - if signed_identifiers is None: - return '' - - sis = ETree.Element('SignedIdentifiers') - for id, access_policy in signed_identifiers.items(): - # Root signed identifers element - si = ETree.SubElement(sis, 'SignedIdentifier') - - # Id element - ETree.SubElement(si, 'Id').text = id - - # Access policy element - policy = ETree.SubElement(si, 'AccessPolicy') - - if access_policy.start: - start = access_policy.start - if isinstance(access_policy.start, date): - start = _to_utc_datetime(start) - ETree.SubElement(policy, 'Start').text = start - - if access_policy.expiry: - expiry = access_policy.expiry - if isinstance(access_policy.expiry, date): - expiry = _to_utc_datetime(expiry) - ETree.SubElement(policy, 'Expiry').text = expiry - - if access_policy.permission: - ETree.SubElement(policy, 'Permission').text = _str(access_policy.permission) - - # Add xml declaration and serialize - try: - stream = BytesIO() - ETree.ElementTree(sis).write(stream, xml_declaration=True, encoding='utf-8', method='xml') - except: - raise - finally: - output = stream.getvalue() - stream.close() - - return output - - -def _convert_service_properties_to_xml(logging, hour_metrics, minute_metrics, - cors, target_version=None, delete_retention_policy=None, static_website=None): - ''' - - - - version-number - true|false - true|false - true|false - - true|false - number-of-days - - - - version-number - true|false - true|false - - true|false - number-of-days - - - - version-number - true|false - true|false - - true|false - number-of-days - - - - - comma-separated-list-of-allowed-origins - comma-separated-list-of-HTTP-verb - max-caching-age-in-seconds - comma-seperated-list-of-response-headers - comma-seperated-list-of-request-headers - - - - true|false - number-of-days - - - true|false - - - - - ''' - service_properties_element = ETree.Element('StorageServiceProperties') - - # Logging - if logging: - logging_element = ETree.SubElement(service_properties_element, 'Logging') - ETree.SubElement(logging_element, 'Version').text = logging.version - ETree.SubElement(logging_element, 'Delete').text = str(logging.delete) - ETree.SubElement(logging_element, 'Read').text = str(logging.read) - ETree.SubElement(logging_element, 'Write').text = str(logging.write) - - retention_element = ETree.SubElement(logging_element, 'RetentionPolicy') - _convert_retention_policy_to_xml(logging.retention_policy, retention_element) - - # HourMetrics - if hour_metrics: - hour_metrics_element = ETree.SubElement(service_properties_element, 'HourMetrics') - _convert_metrics_to_xml(hour_metrics, hour_metrics_element) - - # MinuteMetrics - if minute_metrics: - minute_metrics_element = ETree.SubElement(service_properties_element, 'MinuteMetrics') - _convert_metrics_to_xml(minute_metrics, minute_metrics_element) - - # CORS - # Make sure to still serialize empty list - if cors is not None: - cors_element = ETree.SubElement(service_properties_element, 'Cors') - for rule in cors: - cors_rule = ETree.SubElement(cors_element, 'CorsRule') - ETree.SubElement(cors_rule, 'AllowedOrigins').text = ",".join(rule.allowed_origins) - ETree.SubElement(cors_rule, 'AllowedMethods').text = ",".join(rule.allowed_methods) - ETree.SubElement(cors_rule, 'MaxAgeInSeconds').text = str(rule.max_age_in_seconds) - ETree.SubElement(cors_rule, 'ExposedHeaders').text = ",".join(rule.exposed_headers) - ETree.SubElement(cors_rule, 'AllowedHeaders').text = ",".join(rule.allowed_headers) - - # Target version - if target_version: - ETree.SubElement(service_properties_element, 'DefaultServiceVersion').text = target_version - - # DeleteRetentionPolicy - if delete_retention_policy: - policy_element = ETree.SubElement(service_properties_element, 'DeleteRetentionPolicy') - ETree.SubElement(policy_element, 'Enabled').text = str(delete_retention_policy.enabled) - - if delete_retention_policy.enabled: - ETree.SubElement(policy_element, 'Days').text = str(delete_retention_policy.days) - - # StaticWebsite - if static_website: - static_website_element = ETree.SubElement(service_properties_element, 'StaticWebsite') - ETree.SubElement(static_website_element, 'Enabled').text = str(static_website.enabled) - - if static_website.enabled: - - if static_website.index_document is not None: - ETree.SubElement(static_website_element, 'IndexDocument').text = str(static_website.index_document) - - if static_website.error_document_404_path is not None: - ETree.SubElement(static_website_element, 'ErrorDocument404Path').text = \ - str(static_website.error_document_404_path) - - # Add xml declaration and serialize - try: - stream = BytesIO() - ETree.ElementTree(service_properties_element).write(stream, xml_declaration=True, encoding='utf-8', - method='xml') - except: - raise - finally: - output = stream.getvalue() - stream.close() - - return output - - -def _convert_metrics_to_xml(metrics, root): - ''' - version-number - true|false - true|false - - true|false - number-of-days - - ''' - # Version - ETree.SubElement(root, 'Version').text = metrics.version - - # Enabled - ETree.SubElement(root, 'Enabled').text = str(metrics.enabled) - - # IncludeAPIs - if metrics.enabled and metrics.include_apis is not None: - ETree.SubElement(root, 'IncludeAPIs').text = str(metrics.include_apis) - - # RetentionPolicy - retention_element = ETree.SubElement(root, 'RetentionPolicy') - _convert_retention_policy_to_xml(metrics.retention_policy, retention_element) - - -def _convert_retention_policy_to_xml(retention_policy, root): - ''' - true|false - number-of-days - ''' - # Enabled - ETree.SubElement(root, 'Enabled').text = str(retention_policy.enabled) - - # Days - if retention_policy.enabled and retention_policy.days: - ETree.SubElement(root, 'Days').text = str(retention_policy.days) - - -def _len_plus(data): - length = None - # Check if object implements the __len__ method, covers most input cases such as bytearray. - try: - length = len(data) - except: - pass - - if not length: - # Check if the stream is a file-like stream object. - # If so, calculate the size using the file descriptor. - try: - fileno = data.fileno() - except (AttributeError, UnsupportedOperation): - pass - else: - return fstat(fileno).st_size - - # If the stream is seekable and tell() is implemented, calculate the stream size. - try: - current_position = data.tell() - data.seek(0, SEEK_END) - length = data.tell() - current_position - data.seek(current_position, SEEK_SET) - except (AttributeError, UnsupportedOperation): - pass - - return length diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/cloudstorageaccount.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/cloudstorageaccount.py deleted file mode 100644 index 459146849163..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/cloudstorageaccount.py +++ /dev/null @@ -1,198 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -# Note that we import BlobService/QueueService/FileService on demand -# because this module is imported by azure/storage/__init__ -# ie. we don't want 'import azure.storage' to trigger an automatic import -# of blob/queue/file packages. - -from ._error import _validate_not_none -from .models import ( - ResourceTypes, - Services, - AccountPermissions, -) -from .sharedaccesssignature import ( - SharedAccessSignature, -) - - -class CloudStorageAccount(object): - """ - Provides a factory for creating the blob, queue, and file services - with a common account name and account key or sas token. Users can either - use the factory or can construct the appropriate service directly. - """ - - def __init__(self, account_name=None, account_key=None, sas_token=None, - is_emulated=None, endpoint_suffix=None): - ''' - :param str account_name: - The storage account name. This is used to authenticate requests - signed with an account key and to construct the storage endpoint. It - is required unless is_emulated is used. - :param str account_key: - The storage account key. This is used for shared key authentication. - :param str sas_token: - A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both - specified, account key will be used to sign. - :param bool is_emulated: - Whether to use the emulator. Defaults to False. If specified, will - override all other parameters. - :param str endpoint_suffix: - The host base component of the url, minus the account name. Defaults - to Azure (core.windows.net). Override this to use a sovereign cloud. - ''' - self.account_name = account_name - self.account_key = account_key - self.sas_token = sas_token - self.is_emulated = is_emulated - self.endpoint_suffix = endpoint_suffix - - def create_block_blob_service(self): - ''' - Creates a BlockBlobService object with the settings specified in the - CloudStorageAccount. - - :return: A service object. - :rtype: :class:`~azure.storage.blob.blockblobservice.BlockBlobService` - ''' - try: - from azure.storage.blob.blockblobservice import BlockBlobService - return BlockBlobService(self.account_name, self.account_key, - sas_token=self.sas_token, - is_emulated=self.is_emulated, - endpoint_suffix=self.endpoint_suffix) - except ImportError: - raise Exception('The package azure-storage-blob is required. ' - + 'Please install it using "pip install azure-storage-blob"') - - def create_page_blob_service(self): - ''' - Creates a PageBlobService object with the settings specified in the - CloudStorageAccount. - - :return: A service object. - :rtype: :class:`~azure.storage.blob.pageblobservice.PageBlobService` - ''' - try: - from azure.storage.blob.pageblobservice import PageBlobService - return PageBlobService(self.account_name, self.account_key, - sas_token=self.sas_token, - is_emulated=self.is_emulated, - endpoint_suffix=self.endpoint_suffix) - except ImportError: - raise Exception('The package azure-storage-blob is required. ' - + 'Please install it using "pip install azure-storage-blob"') - - def create_append_blob_service(self): - ''' - Creates a AppendBlobService object with the settings specified in the - CloudStorageAccount. - - :return: A service object. - :rtype: :class:`~azure.storage.blob.appendblobservice.AppendBlobService` - ''' - try: - from azure.storage.blob.appendblobservice import AppendBlobService - return AppendBlobService(self.account_name, self.account_key, - sas_token=self.sas_token, - is_emulated=self.is_emulated, - endpoint_suffix=self.endpoint_suffix) - except ImportError: - raise Exception('The package azure-storage-blob is required. ' - + 'Please install it using "pip install azure-storage-blob"') - - def create_queue_service(self): - ''' - Creates a QueueService object with the settings specified in the - CloudStorageAccount. - - :return: A service object. - :rtype: :class:`~azure.storage.queue.queueservice.QueueService` - ''' - try: - from azure.storage.queue.queueservice import QueueService - return QueueService(self.account_name, self.account_key, - sas_token=self.sas_token, - is_emulated=self.is_emulated, - endpoint_suffix=self.endpoint_suffix) - except ImportError: - raise Exception('The package azure-storage-queue is required. ' - + 'Please install it using "pip install azure-storage-queue"') - - def create_file_service(self): - ''' - Creates a FileService object with the settings specified in the - CloudStorageAccount. - - :return: A service object. - :rtype: :class:`~azure.storage.file.fileservice.FileService` - ''' - try: - from azure.storage.file.fileservice import FileService - return FileService(self.account_name, self.account_key, - sas_token=self.sas_token, - endpoint_suffix=self.endpoint_suffix) - except ImportError: - raise Exception('The package azure-storage-file is required. ' - + 'Please install it using "pip install azure-storage-file"') - - def generate_shared_access_signature(self, services, resource_types, - permission, expiry, start=None, - ip=None, protocol=None): - ''' - Generates a shared access signature for the account. - Use the returned signature with the sas_token parameter of the service - or to create a new account object. - - :param Services services: - Specifies the services accessible with the account SAS. You can - combine values to provide access to more than one service. - :param ResourceTypes resource_types: - Specifies the resource types that are accessible with the account - SAS. You can combine values to provide access to more than one - resource type. - :param AccountPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. You can combine - values to provide more than one permission. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. Possible values are - both HTTPS and HTTP (https,http) or HTTPS only (https). The default value - is https,http. Note that HTTP only is not a permitted value. - ''' - _validate_not_none('self.account_name', self.account_name) - _validate_not_none('self.account_key', self.account_key) - - sas = SharedAccessSignature(self.account_name, self.account_key) - return sas.generate_account(services, resource_types, permission, - expiry, start=start, ip=ip, protocol=protocol) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/models.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/models.py deleted file mode 100644 index 5ada54ce29dd..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/models.py +++ /dev/null @@ -1,672 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -import sys - -if sys.version_info < (3,): - from collections import Iterable - - _unicode_type = unicode -else: - from collections.abc import Iterable - - _unicode_type = str - -from ._error import ( - _validate_not_none -) - - -class _HeaderDict(dict): - def __getitem__(self, index): - return super(_HeaderDict, self).__getitem__(index.lower()) - - -class _list(list): - '''Used so that additional properties can be set on the return list''' - pass - - -class _dict(dict): - '''Used so that additional properties can be set on the return dictionary''' - pass - - -class _OperationContext(object): - ''' - Contains information that lasts the lifetime of an operation. This operation - may span multiple calls to the Azure service. - - :ivar bool location_lock: - Whether the location should be locked for this operation. - :ivar str location: - The location to lock to. - ''' - - def __init__(self, location_lock=False): - self.location_lock = location_lock - self.host_location = None - - -class ListGenerator(Iterable): - ''' - A generator object used to list storage resources. The generator will lazily - follow the continuation tokens returned by the service and stop when all - resources have been returned or max_results is reached. - - If max_results is specified and the account has more than that number of - resources, the generator will have a populated next_marker field once it - finishes. This marker can be used to create a new generator if more - results are desired. - ''' - - def __init__(self, resources, list_method, list_args, list_kwargs): - self.items = resources - self.next_marker = resources.next_marker - - self._list_method = list_method - self._list_args = list_args - self._list_kwargs = list_kwargs - - def __iter__(self): - # return results - for i in self.items: - yield i - - while True: - # if no more results on the service, return - if not self.next_marker: - break - - # update the marker args - self._list_kwargs['marker'] = self.next_marker - - # handle max results, if present - max_results = self._list_kwargs.get('max_results') - if max_results is not None: - max_results = max_results - len(self.items) - - # if we've reached max_results, return - # else, update the max_results arg - if max_results <= 0: - break - else: - self._list_kwargs['max_results'] = max_results - - # get the next segment - resources = self._list_method(*self._list_args, **self._list_kwargs) - self.items = resources - self.next_marker = resources.next_marker - - # return results - for i in self.items: - yield i - - -class RetryContext(object): - ''' - Contains the request and response information that can be used to determine - whether and how to retry. This context is stored across retries and may be - used to store other information relevant to the retry strategy. - - :ivar ~azure.storage.common._http.HTTPRequest request: - The request sent to the storage service. - :ivar ~azure.storage.common._http.HTTPResponse response: - The response returned by the storage service. - :ivar LocationMode location_mode: - The location the request was sent to. - :ivar Exception exception: - The exception that just occurred. The type could either be AzureException (for HTTP errors), - or other Exception types from lower layers, which are kept unwrapped for easier processing. - :ivar bool is_emulated: - Whether retry is targeting the emulator. The default value is False. - :ivar int body_position: - The initial position of the body stream. It is useful when retries happen and we need to rewind the stream. - ''' - - def __init__(self): - self.request = None - self.response = None - self.location_mode = None - self.exception = None - self.is_emulated = False - self.body_position = None - - -class LocationMode(object): - ''' - Specifies the location the request should be sent to. This mode only applies - for RA-GRS accounts which allow secondary read access. All other account types - must use PRIMARY. - ''' - - PRIMARY = 'primary' - ''' Requests should be sent to the primary location. ''' - - SECONDARY = 'secondary' - ''' Requests should be sent to the secondary location, if possible. ''' - - -class RetentionPolicy(object): - ''' - By default, Storage Analytics will not delete any logging or metrics data. Blobs - will continue to be written until the shared 20TB limit is - reached. Once the 20TB limit is reached, Storage Analytics will stop writing - new data and will not resume until free space is available. This 20TB limit - is independent of the total limit for your storage account. - - There are two ways to delete Storage Analytics data: by manually making deletion - requests or by setting a data retention policy. Manual requests to delete Storage - Analytics data are billable, but delete requests resulting from a retention policy - are not billable. - ''' - - def __init__(self, enabled=False, days=None): - ''' - :param bool enabled: - Indicates whether a retention policy is enabled for the - storage service. If disabled, logging and metrics data will be retained - infinitely by the service unless explicitly deleted. - :param int days: - Required if enabled is true. Indicates the number of - days that metrics or logging data should be retained. All data older - than this value will be deleted. The minimum value you can specify is 1; - the largest value is 365 (one year). - ''' - _validate_not_none("enabled", enabled) - if enabled: - _validate_not_none("days", days) - - self.enabled = enabled - self.days = days - - -class Logging(object): - ''' - Storage Analytics logs detailed information about successful and failed requests - to a storage service. This information can be used to monitor individual requests - and to diagnose issues with a storage service. Requests are logged on a best-effort - basis. - - All logs are stored in block blobs in a container named $logs, which is - automatically created when Storage Analytics is enabled for a storage account. - The $logs container is located in the blob namespace of the storage account. - This container cannot be deleted once Storage Analytics has been enabled, though - its contents can be deleted. - - For more information, see https://msdn.microsoft.com/en-us/library/azure/hh343262.aspx - ''' - - def __init__(self, delete=False, read=False, write=False, - retention_policy=None): - ''' - :param bool delete: - Indicates whether all delete requests should be logged. - :param bool read: - Indicates whether all read requests should be logged. - :param bool write: - Indicates whether all write requests should be logged. - :param RetentionPolicy retention_policy: - The retention policy for the metrics. - ''' - _validate_not_none("read", read) - _validate_not_none("write", write) - _validate_not_none("delete", delete) - - self.version = u'1.0' - self.delete = delete - self.read = read - self.write = write - self.retention_policy = retention_policy if retention_policy else RetentionPolicy() - - -class Metrics(object): - ''' - Metrics include aggregated transaction statistics and capacity data about requests - to a storage service. Transactions are reported at both the API operation level - as well as at the storage service level, and capacity is reported at the storage - service level. Metrics data can be used to analyze storage service usage, diagnose - issues with requests made against the storage service, and to improve the - performance of applications that use a service. - - For more information, see https://msdn.microsoft.com/en-us/library/azure/hh343258.aspx - ''' - - def __init__(self, enabled=False, include_apis=None, - retention_policy=None): - ''' - :param bool enabled: - Indicates whether metrics are enabled for - the service. - :param bool include_apis: - Required if enabled is True. Indicates whether metrics - should generate summary statistics for called API operations. - :param RetentionPolicy retention_policy: - The retention policy for the metrics. - ''' - _validate_not_none("enabled", enabled) - if enabled: - _validate_not_none("include_apis", include_apis) - - self.version = u'1.0' - self.enabled = enabled - self.include_apis = include_apis - self.retention_policy = retention_policy if retention_policy else RetentionPolicy() - - -class CorsRule(object): - ''' - CORS is an HTTP feature that enables a web application running under one domain - to access resources in another domain. Web browsers implement a security - restriction known as same-origin policy that prevents a web page from calling - APIs in a different domain; CORS provides a secure way to allow one domain - (the origin domain) to call APIs in another domain. - - For more information, see https://msdn.microsoft.com/en-us/library/azure/dn535601.aspx - ''' - - def __init__(self, allowed_origins, allowed_methods, max_age_in_seconds=0, - exposed_headers=None, allowed_headers=None): - ''' - :param allowed_origins: - A list of origin domains that will be allowed via CORS, or "*" to allow - all domains. The list of must contain at least one entry. Limited to 64 - origin domains. Each allowed origin can have up to 256 characters. - :type allowed_origins: list(str) - :param allowed_methods: - A list of HTTP methods that are allowed to be executed by the origin. - The list of must contain at least one entry. For Azure Storage, - permitted methods are DELETE, GET, HEAD, MERGE, POST, OPTIONS or PUT. - :type allowed_methods: list(str) - :param int max_age_in_seconds: - The number of seconds that the client/browser should cache a - preflight response. - :param exposed_headers: - Defaults to an empty list. A list of response headers to expose to CORS - clients. Limited to 64 defined headers and two prefixed headers. Each - header can be up to 256 characters. - :type exposed_headers: list(str) - :param allowed_headers: - Defaults to an empty list. A list of headers allowed to be part of - the cross-origin request. Limited to 64 defined headers and 2 prefixed - headers. Each header can be up to 256 characters. - :type allowed_headers: list(str) - ''' - _validate_not_none("allowed_origins", allowed_origins) - _validate_not_none("allowed_methods", allowed_methods) - _validate_not_none("max_age_in_seconds", max_age_in_seconds) - - self.allowed_origins = allowed_origins if allowed_origins else list() - self.allowed_methods = allowed_methods if allowed_methods else list() - self.max_age_in_seconds = max_age_in_seconds - self.exposed_headers = exposed_headers if exposed_headers else list() - self.allowed_headers = allowed_headers if allowed_headers else list() - - -class DeleteRetentionPolicy(object): - ''' - To set DeleteRetentionPolicy, you must call Set Blob Service Properties using version 2017-07-29 or later. - This class groups the settings related to delete retention policy. - ''' - - def __init__(self, enabled=False, days=None): - ''' - :param bool enabled: - Required. Indicates whether a deleted blob or snapshot is retained or immediately removed by delete operation. - :param int days: - Required only if Enabled is true. Indicates the number of days that deleted blob be retained. - All data older than this value will be permanently deleted. - The minimum value you can specify is 1; the largest value is 365. - ''' - _validate_not_none("enabled", enabled) - if enabled: - _validate_not_none("days", days) - - self.enabled = enabled - self.days = days - - -class StaticWebsite(object): - ''' - Class representing the service properties pertaining to static websites. - To set StaticWebsite, you must call Set Blob Service Properties using version 2018-03-28 or later. - ''' - - def __init__(self, enabled=False, index_document=None, error_document_404_path=None): - ''' - :param bool enabled: - Required. True if static websites should be enabled on the blob service for the corresponding Storage Account. - :param str index_document: - Represents the name of the index document. This is commonly "index.html". - :param str error_document_404_path: - Represents the path to the error document that should be shown when an error 404 is issued, - in other words, when a browser requests a page that does not exist. - ''' - _validate_not_none("enabled", enabled) - - self.enabled = enabled - self.index_document = index_document - self.error_document_404_path = error_document_404_path - - -class ServiceProperties(object): - ''' - Returned by get_*_service_properties functions. Contains the properties of a - storage service, including Analytics and CORS rules. - - Azure Storage Analytics performs logging and provides metrics data for a storage - account. You can use this data to trace requests, analyze usage trends, and - diagnose issues with your storage account. To use Storage Analytics, you must - enable it individually for each service you want to monitor. - - The aggregated data is stored in a well-known blob (for logging) and in well-known - tables (for metrics), which may be accessed using the Blob service and Table - service APIs. - - For an in-depth guide on using Storage Analytics and other tools to identify, - diagnose, and troubleshoot Azure Storage-related issues, see - http://azure.microsoft.com/documentation/articles/storage-monitoring-diagnosing-troubleshooting/ - - For more information on CORS, see https://msdn.microsoft.com/en-us/library/azure/dn535601.aspx - ''' - - pass - - -class ServiceStats(object): - ''' - Returned by get_*_service_stats functions. Contains statistics related to - replication for the given service. It is only available when read-access - geo-redundant replication is enabled for the storage account. - - :ivar GeoReplication geo_replication: - An object containing statistics related to replication for the given service. - ''' - pass - - -class GeoReplication(object): - ''' - Contains statistics related to replication for the given service. - - :ivar str status: - The status of the secondary location. Possible values are: - live: Indicates that the secondary location is active and operational. - bootstrap: Indicates initial synchronization from the primary location - to the secondary location is in progress. This typically occurs - when replication is first enabled. - unavailable: Indicates that the secondary location is temporarily - unavailable. - :ivar date last_sync_time: - A GMT date value, to the second. All primary writes preceding this value - are guaranteed to be available for read operations at the secondary. - Primary writes after this point in time may or may not be available for - reads. The value may be empty if LastSyncTime is not available. This can - happen if the replication status is bootstrap or unavailable. Although - geo-replication is continuously enabled, the LastSyncTime result may - reflect a cached value from the service that is refreshed every few minutes. - ''' - pass - - -class AccessPolicy(object): - ''' - Access Policy class used by the set and get acl methods in each service. - - A stored access policy can specify the start time, expiry time, and - permissions for the Shared Access Signatures with which it's associated. - Depending on how you want to control access to your resource, you can - specify all of these parameters within the stored access policy, and omit - them from the URL for the Shared Access Signature. Doing so permits you to - modify the associated signature's behavior at any time, as well as to revoke - it. Or you can specify one or more of the access policy parameters within - the stored access policy, and the others on the URL. Finally, you can - specify all of the parameters on the URL. In this case, you can use the - stored access policy to revoke the signature, but not to modify its behavior. - - Together the Shared Access Signature and the stored access policy must - include all fields required to authenticate the signature. If any required - fields are missing, the request will fail. Likewise, if a field is specified - both in the Shared Access Signature URL and in the stored access policy, the - request will fail with status code 400 (Bad Request). - ''' - - def __init__(self, permission=None, expiry=None, start=None): - ''' - :param str permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - ''' - self.start = start - self.expiry = expiry - self.permission = permission - - -class Protocol(object): - ''' - Specifies the protocol permitted for a SAS token. Note that HTTP only is - not allowed. - ''' - - HTTPS = 'https' - ''' Allow HTTPS requests only. ''' - - HTTPS_HTTP = 'https,http' - ''' Allow HTTP and HTTPS requests. ''' - - -class ResourceTypes(object): - ''' - Specifies the resource types that are accessible with the account SAS. - - :ivar ResourceTypes ResourceTypes.CONTAINER: - Access to container-level APIs (e.g., Create/Delete Container, - Create/Delete Queue, Create/Delete Share, - List Blobs/Files and Directories) - :ivar ResourceTypes ResourceTypes.OBJECT: - Access to object-level APIs for blobs, queue messages, and - files(e.g. Put Blob, Query Entity, Get Messages, Create File, etc.) - :ivar ResourceTypes ResourceTypes.SERVICE: - Access to service-level APIs (e.g., Get/Set Service Properties, - Get Service Stats, List Containers/Queues/Shares) - ''' - - def __init__(self, service=False, container=False, object=False, _str=None): - ''' - :param bool service: - Access to service-level APIs (e.g., Get/Set Service Properties, - Get Service Stats, List Containers/Queues/Shares) - :param bool container: - Access to container-level APIs (e.g., Create/Delete Container, - Create/Delete Queue, Create/Delete Share, - List Blobs/Files and Directories) - :param bool object: - Access to object-level APIs for blobs, queue messages, and - files(e.g. Put Blob, Query Entity, Get Messages, Create File, etc.) - :param str _str: - A string representing the resource types. - ''' - if not _str: - _str = '' - self.service = service or ('s' in _str) - self.container = container or ('c' in _str) - self.object = object or ('o' in _str) - - def __or__(self, other): - return ResourceTypes(_str=str(self) + str(other)) - - def __add__(self, other): - return ResourceTypes(_str=str(self) + str(other)) - - def __str__(self): - return (('s' if self.service else '') + - ('c' if self.container else '') + - ('o' if self.object else '')) - - -ResourceTypes.SERVICE = ResourceTypes(service=True) -ResourceTypes.CONTAINER = ResourceTypes(container=True) -ResourceTypes.OBJECT = ResourceTypes(object=True) - - -class Services(object): - ''' - Specifies the services accessible with the account SAS. - - :ivar Services Services.BLOB: The blob service. - :ivar Services Services.FILE: The file service - :ivar Services Services.QUEUE: The queue service. - :ivar Services Services.TABLE: The table service. - ''' - - def __init__(self, blob=False, queue=False, file=False, table=False, _str=None): - ''' - :param bool blob: - Access to any blob service, for example, the `.BlockBlobService` - :param bool queue: - Access to the `.QueueService` - :param bool file: - Access to the `.FileService` - :param bool table: - Access to the TableService - :param str _str: - A string representing the services. - ''' - if not _str: - _str = '' - self.blob = blob or ('b' in _str) - self.queue = queue or ('q' in _str) - self.file = file or ('f' in _str) - self.table = table or ('t' in _str) - - def __or__(self, other): - return Services(_str=str(self) + str(other)) - - def __add__(self, other): - return Services(_str=str(self) + str(other)) - - def __str__(self): - return (('b' if self.blob else '') + - ('q' if self.queue else '') + - ('t' if self.table else '') + - ('f' if self.file else '')) - - -Services.BLOB = Services(blob=True) -Services.QUEUE = Services(queue=True) -Services.TABLE = Services(table=True) -Services.FILE = Services(file=True) - - -class AccountPermissions(object): - ''' - :class:`~ResourceTypes` class to be used with generate_shared_access_signature - method and for the AccessPolicies used with set_*_acl. There are two types of - SAS which may be used to grant resource access. One is to grant access to a - specific resource (resource-specific). Another is to grant access to the - entire service for a specific account and allow certain operations based on - perms found here. - - :ivar AccountPermissions AccountPermissions.ADD: - Valid for the following Object resource types only: queue messages and append blobs. - :ivar AccountPermissions AccountPermissions.CREATE: - Valid for the following Object resource types only: blobs and files. Users - can create new blobs or files, but may not overwrite existing blobs or files. - :ivar AccountPermissions AccountPermissions.DELETE: - Valid for Container and Object resource types, except for queue messages. - :ivar AccountPermissions AccountPermissions.LIST: - Valid for Service and Container resource types only. - :ivar AccountPermissions AccountPermissions.PROCESS: - Valid for the following Object resource type only: queue messages. - :ivar AccountPermissions AccountPermissions.READ: - Valid for all signed resources types (Service, Container, and Object). - Permits read permissions to the specified resource type. - :ivar AccountPermissions AccountPermissions.UPDATE: - Valid for the following Object resource types only: queue messages. - :ivar AccountPermissions AccountPermissions.WRITE: - Valid for all signed resources types (Service, Container, and Object). - Permits write permissions to the specified resource type. - ''' - - def __init__(self, read=False, write=False, delete=False, list=False, - add=False, create=False, update=False, process=False, _str=None): - ''' - :param bool read: - Valid for all signed resources types (Service, Container, and Object). - Permits read permissions to the specified resource type. - :param bool write: - Valid for all signed resources types (Service, Container, and Object). - Permits write permissions to the specified resource type. - :param bool delete: - Valid for Container and Object resource types, except for queue messages. - :param bool list: - Valid for Service and Container resource types only. - :param bool add: - Valid for the following Object resource types only: queue messages, and append blobs. - :param bool create: - Valid for the following Object resource types only: blobs and files. - Users can create new blobs or files, but may not overwrite existing - blobs or files. - :param bool update: - Valid for the following Object resource types only: queue messages. - :param bool process: - Valid for the following Object resource type only: queue messages. - :param str _str: - A string representing the permissions. - ''' - if not _str: - _str = '' - self.read = read or ('r' in _str) - self.write = write or ('w' in _str) - self.delete = delete or ('d' in _str) - self.list = list or ('l' in _str) - self.add = add or ('a' in _str) - self.create = create or ('c' in _str) - self.update = update or ('u' in _str) - self.process = process or ('p' in _str) - - def __or__(self, other): - return AccountPermissions(_str=str(self) + str(other)) - - def __add__(self, other): - return AccountPermissions(_str=str(self) + str(other)) - - def __str__(self): - return (('r' if self.read else '') + - ('w' if self.write else '') + - ('d' if self.delete else '') + - ('l' if self.list else '') + - ('a' if self.add else '') + - ('c' if self.create else '') + - ('u' if self.update else '') + - ('p' if self.process else '')) - - -AccountPermissions.READ = AccountPermissions(read=True) -AccountPermissions.WRITE = AccountPermissions(write=True) -AccountPermissions.DELETE = AccountPermissions(delete=True) -AccountPermissions.LIST = AccountPermissions(list=True) -AccountPermissions.ADD = AccountPermissions(add=True) -AccountPermissions.CREATE = AccountPermissions(create=True) -AccountPermissions.UPDATE = AccountPermissions(update=True) -AccountPermissions.PROCESS = AccountPermissions(process=True) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/retry.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/retry.py deleted file mode 100644 index d18c84d80b55..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/retry.py +++ /dev/null @@ -1,306 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from abc import ABCMeta -from math import pow -import random -from io import (SEEK_SET, UnsupportedOperation) - -from .models import LocationMode -from ._constants import ( - DEV_ACCOUNT_NAME, - DEV_ACCOUNT_SECONDARY_NAME -) - - -class _Retry(object): - ''' - The base class for Exponential and Linear retries containing shared code. - ''' - __metaclass__ = ABCMeta - - def __init__(self, max_attempts, retry_to_secondary): - ''' - Constructs a base retry object. - - :param int max_attempts: - The maximum number of retry attempts. - :param bool retry_to_secondary: - Whether the request should be retried to secondary, if able. This should - only be enabled of RA-GRS accounts are used and potentially stale data - can be handled. - ''' - self.max_attempts = max_attempts - self.retry_to_secondary = retry_to_secondary - - def _should_retry(self, context): - ''' - A function which determines whether or not to retry. - - :param ~azure.storage.models.RetryContext context: - The retry context. This contains the request, response, and other data - which can be used to determine whether or not to retry. - :return: - A boolean indicating whether or not to retry the request. - :rtype: bool - ''' - # If max attempts are reached, do not retry. - if context.count >= self.max_attempts: - return False - - status = None - if context.response and context.response.status: - status = context.response.status - - if status is None: - ''' - If status is None, retry as this request triggered an exception. For - example, network issues would trigger this. - ''' - return True - elif 200 <= status < 300: - ''' - This method is called after a successful response, meaning we failed - during the response body download or parsing. So, success codes should - be retried. - ''' - return True - elif 300 <= status < 500: - ''' - An exception occured, but in most cases it was expected. Examples could - include a 309 Conflict or 412 Precondition Failed. - ''' - if status == 404 and context.location_mode == LocationMode.SECONDARY: - # Response code 404 should be retried if secondary was used. - return True - if status == 408: - # Response code 408 is a timeout and should be retried. - return True - return False - elif status >= 500: - ''' - Response codes above 500 with the exception of 501 Not Implemented and - 505 Version Not Supported indicate a server issue and should be retried. - ''' - if status == 501 or status == 505: - return False - return True - else: - # If something else happened, it's unexpected. Retry. - return True - - def _set_next_host_location(self, context): - ''' - A function which sets the next host location on the request, if applicable. - - :param ~azure.storage.models.RetryContext context: - The retry context containing the previous host location and the request - to evaluate and possibly modify. - ''' - if len(context.request.host_locations) > 1: - # If there's more than one possible location, retry to the alternative - if context.location_mode == LocationMode.PRIMARY: - context.location_mode = LocationMode.SECONDARY - - # if targeting the emulator (with path style), change path instead of host - if context.is_emulated: - # replace the first instance of primary account name with the secondary account name - context.request.path = context.request.path.replace(DEV_ACCOUNT_NAME, DEV_ACCOUNT_SECONDARY_NAME, 1) - else: - context.request.host = context.request.host_locations.get(context.location_mode) - else: - context.location_mode = LocationMode.PRIMARY - - # if targeting the emulator (with path style), change path instead of host - if context.is_emulated: - # replace the first instance of secondary account name with the primary account name - context.request.path = context.request.path.replace(DEV_ACCOUNT_SECONDARY_NAME, DEV_ACCOUNT_NAME, 1) - else: - context.request.host = context.request.host_locations.get(context.location_mode) - - def _retry(self, context, backoff): - ''' - A function which determines whether and how to retry. - - :param ~azure.storage.models.RetryContext context: - The retry context. This contains the request, response, and other data - which can be used to determine whether or not to retry. - :param function() backoff: - A function which returns the backoff time if a retry is to be performed. - :return: - An integer indicating how long to wait before retrying the request, - or None to indicate no retry should be performed. - :rtype: int or None - ''' - # If the context does not contain a count parameter, this request has not - # been retried yet. Add the count parameter to track the number of retries. - if not hasattr(context, 'count'): - context.count = 0 - - # Determine whether to retry, and if so increment the count, modify the - # request as desired, and return the backoff. - if self._should_retry(context): - backoff_interval = backoff(context) - context.count += 1 - - # If retry to secondary is enabled, attempt to change the host if the - # request allows it - if self.retry_to_secondary: - self._set_next_host_location(context) - - # rewind the request body if it is a stream - if hasattr(context.request, 'body') and hasattr(context.request.body, 'read'): - # no position was saved, then retry would not work - if context.body_position is None: - return None - else: - try: - # attempt to rewind the body to the initial position - context.request.body.seek(context.body_position, SEEK_SET) - except UnsupportedOperation: - # if body is not seekable, then retry would not work - return None - - return backoff_interval - - return None - - -class ExponentialRetry(_Retry): - ''' - Exponential retry. - ''' - - def __init__(self, initial_backoff=15, increment_base=3, max_attempts=3, - retry_to_secondary=False, random_jitter_range=3): - ''' - Constructs an Exponential retry object. The initial_backoff is used for - the first retry. Subsequent retries are retried after initial_backoff + - increment_power^retry_count seconds. For example, by default the first retry - occurs after 15 seconds, the second after (15+3^1) = 18 seconds, and the - third after (15+3^2) = 24 seconds. - - :param int initial_backoff: - The initial backoff interval, in seconds, for the first retry. - :param int increment_base: - The base, in seconds, to increment the initial_backoff by after the - first retry. - :param int max_attempts: - The maximum number of retry attempts. - :param bool retry_to_secondary: - Whether the request should be retried to secondary, if able. This should - only be enabled of RA-GRS accounts are used and potentially stale data - can be handled. - :param int random_jitter_range: - A number in seconds which indicates a range to jitter/randomize for the back-off interval. - For example, a random_jitter_range of 3 results in the back-off interval x to vary between x+3 and x-3. - ''' - self.initial_backoff = initial_backoff - self.increment_base = increment_base - self.random_jitter_range = random_jitter_range - super(ExponentialRetry, self).__init__(max_attempts, retry_to_secondary) - - ''' - A function which determines whether and how to retry. - - :param ~azure.storage.models.RetryContext context: - The retry context. This contains the request, response, and other data - which can be used to determine whether or not to retry. - :return: - An integer indicating how long to wait before retrying the request, - or None to indicate no retry should be performed. - :rtype: int or None - ''' - - def retry(self, context): - return self._retry(context, self._backoff) - - ''' - Calculates how long to sleep before retrying. - - :return: - An integer indicating how long to wait before retrying the request, - or None to indicate no retry should be performed. - :rtype: int or None - ''' - - def _backoff(self, context): - random_generator = random.Random() - backoff = self.initial_backoff + (0 if context.count == 0 else pow(self.increment_base, context.count)) - random_range_start = backoff - self.random_jitter_range if backoff > self.random_jitter_range else 0 - random_range_end = backoff + self.random_jitter_range - return random_generator.uniform(random_range_start, random_range_end) - - -class LinearRetry(_Retry): - ''' - Linear retry. - ''' - - def __init__(self, backoff=15, max_attempts=3, retry_to_secondary=False, random_jitter_range=3): - ''' - Constructs a Linear retry object. - - :param int backoff: - The backoff interval, in seconds, between retries. - :param int max_attempts: - The maximum number of retry attempts. - :param bool retry_to_secondary: - Whether the request should be retried to secondary, if able. This should - only be enabled of RA-GRS accounts are used and potentially stale data - can be handled. - :param int random_jitter_range: - A number in seconds which indicates a range to jitter/randomize for the back-off interval. - For example, a random_jitter_range of 3 results in the back-off interval x to vary between x+3 and x-3. - ''' - self.backoff = backoff - self.max_attempts = max_attempts - self.random_jitter_range = random_jitter_range - super(LinearRetry, self).__init__(max_attempts, retry_to_secondary) - - ''' - A function which determines whether and how to retry. - - :param ~azure.storage.models.RetryContext context: - The retry context. This contains the request, response, and other data - which can be used to determine whether or not to retry. - :return: - An integer indicating how long to wait before retrying the request, - or None to indicate no retry should be performed. - :rtype: int or None - ''' - - def retry(self, context): - return self._retry(context, self._backoff) - - ''' - Calculates how long to sleep before retrying. - - :return: - An integer indicating how long to wait before retrying the request, - or None to indicate no retry should be performed. - :rtype: int or None - ''' - - def _backoff(self, context): - random_generator = random.Random() - # the backoff interval normally does not change, however there is the possibility - # that it was modified by accessing the property directly after initializing the object - self.random_range_start = self.backoff - self.random_jitter_range if self.backoff > self.random_jitter_range else 0 - self.random_range_end = self.backoff + self.random_jitter_range - return random_generator.uniform(self.random_range_start, self.random_range_end) - - -def no_retry(context): - ''' - Specifies never to retry. - - :param ~azure.storage.models.RetryContext context: - The retry context. - :return: - Always returns None to indicate never to retry. - :rtype: None - ''' - return None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/sharedaccesssignature.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/sharedaccesssignature.py deleted file mode 100644 index ae55b2a6aaed..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/sharedaccesssignature.py +++ /dev/null @@ -1,180 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- -from datetime import date - -from ._common_conversion import ( - _sign_string, - _to_str, -) -from ._constants import DEFAULT_X_MS_VERSION -from ._serialization import ( - url_quote, - _to_utc_datetime, -) - - -class SharedAccessSignature(object): - ''' - Provides a factory for creating account access - signature tokens with an account name and account key. Users can either - use the factory or can construct the appropriate service and use the - generate_*_shared_access_signature method directly. - ''' - - def __init__(self, account_name, account_key, x_ms_version=DEFAULT_X_MS_VERSION): - ''' - :param str account_name: - The storage account name used to generate the shared access signatures. - :param str account_key: - The access key to generate the shares access signatures. - :param str x_ms_version: - The service version used to generate the shared access signatures. - ''' - self.account_name = account_name - self.account_key = account_key - self.x_ms_version = x_ms_version - - def generate_account(self, services, resource_types, permission, expiry, start=None, - ip=None, protocol=None): - ''' - Generates a shared access signature for the account. - Use the returned signature with the sas_token parameter of the service - or to create a new account object. - - :param Services services: - Specifies the services accessible with the account SAS. You can - combine values to provide access to more than one service. - :param ResourceTypes resource_types: - Specifies the resource types that are accessible with the account - SAS. You can combine values to provide access to more than one - resource type. - :param AccountPermissions permission: - The permissions associated with the shared access signature. The - user is restricted to operations allowed by the permissions. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has been - specified in an associated stored access policy. You can combine - values to provide more than one permission. - :param expiry: - The time at which the shared access signature becomes invalid. - Required unless an id is given referencing a stored access policy - which contains this field. This field must be omitted if it has - been specified in an associated stored access policy. Azure will always - convert values to UTC. If a date is passed in without timezone info, it - is assumed to be UTC. - :type expiry: datetime or str - :param start: - The time at which the shared access signature becomes valid. If - omitted, start time for this call is assumed to be the time when the - storage service receives the request. Azure will always convert values - to UTC. If a date is passed in without timezone info, it is assumed to - be UTC. - :type start: datetime or str - :param str ip: - Specifies an IP address or a range of IP addresses from which to accept requests. - If the IP address from which the request originates does not match the IP address - or address range specified on the SAS token, the request is not authenticated. - For example, specifying sip=168.1.5.65 or sip=168.1.5.60-168.1.5.70 on the SAS - restricts the request to those IP addresses. - :param str protocol: - Specifies the protocol permitted for a request made. The default value - is https,http. See :class:`~azure.storage.common.models.Protocol` for possible values. - ''' - sas = _SharedAccessHelper() - sas.add_base(permission, expiry, start, ip, protocol, self.x_ms_version) - sas.add_account(services, resource_types) - sas.add_account_signature(self.account_name, self.account_key) - - return sas.get_token() - - -class _QueryStringConstants(object): - SIGNED_SIGNATURE = 'sig' - SIGNED_PERMISSION = 'sp' - SIGNED_START = 'st' - SIGNED_EXPIRY = 'se' - SIGNED_RESOURCE = 'sr' - SIGNED_IDENTIFIER = 'si' - SIGNED_IP = 'sip' - SIGNED_PROTOCOL = 'spr' - SIGNED_VERSION = 'sv' - SIGNED_CACHE_CONTROL = 'rscc' - SIGNED_CONTENT_DISPOSITION = 'rscd' - SIGNED_CONTENT_ENCODING = 'rsce' - SIGNED_CONTENT_LANGUAGE = 'rscl' - SIGNED_CONTENT_TYPE = 'rsct' - START_PK = 'spk' - START_RK = 'srk' - END_PK = 'epk' - END_RK = 'erk' - SIGNED_RESOURCE_TYPES = 'srt' - SIGNED_SERVICES = 'ss' - - -class _SharedAccessHelper(object): - def __init__(self): - self.query_dict = {} - - def _add_query(self, name, val): - if val: - self.query_dict[name] = _to_str(val) - - def add_base(self, permission, expiry, start, ip, protocol, x_ms_version): - if isinstance(start, date): - start = _to_utc_datetime(start) - - if isinstance(expiry, date): - expiry = _to_utc_datetime(expiry) - - self._add_query(_QueryStringConstants.SIGNED_START, start) - self._add_query(_QueryStringConstants.SIGNED_EXPIRY, expiry) - self._add_query(_QueryStringConstants.SIGNED_PERMISSION, permission) - self._add_query(_QueryStringConstants.SIGNED_IP, ip) - self._add_query(_QueryStringConstants.SIGNED_PROTOCOL, protocol) - self._add_query(_QueryStringConstants.SIGNED_VERSION, x_ms_version) - - def add_resource(self, resource): - self._add_query(_QueryStringConstants.SIGNED_RESOURCE, resource) - - def add_id(self, id): - self._add_query(_QueryStringConstants.SIGNED_IDENTIFIER, id) - - def add_account(self, services, resource_types): - self._add_query(_QueryStringConstants.SIGNED_SERVICES, services) - self._add_query(_QueryStringConstants.SIGNED_RESOURCE_TYPES, resource_types) - - def add_override_response_headers(self, cache_control, - content_disposition, - content_encoding, - content_language, - content_type): - self._add_query(_QueryStringConstants.SIGNED_CACHE_CONTROL, cache_control) - self._add_query(_QueryStringConstants.SIGNED_CONTENT_DISPOSITION, content_disposition) - self._add_query(_QueryStringConstants.SIGNED_CONTENT_ENCODING, content_encoding) - self._add_query(_QueryStringConstants.SIGNED_CONTENT_LANGUAGE, content_language) - self._add_query(_QueryStringConstants.SIGNED_CONTENT_TYPE, content_type) - - def add_account_signature(self, account_name, account_key): - def get_value_to_append(query): - return_value = self.query_dict.get(query) or '' - return return_value + '\n' - - string_to_sign = \ - (account_name + '\n' + - get_value_to_append(_QueryStringConstants.SIGNED_PERMISSION) + - get_value_to_append(_QueryStringConstants.SIGNED_SERVICES) + - get_value_to_append(_QueryStringConstants.SIGNED_RESOURCE_TYPES) + - get_value_to_append(_QueryStringConstants.SIGNED_START) + - get_value_to_append(_QueryStringConstants.SIGNED_EXPIRY) + - get_value_to_append(_QueryStringConstants.SIGNED_IP) + - get_value_to_append(_QueryStringConstants.SIGNED_PROTOCOL) + - get_value_to_append(_QueryStringConstants.SIGNED_VERSION)) - - self._add_query(_QueryStringConstants.SIGNED_SIGNATURE, - _sign_string(account_key, string_to_sign)) - - def get_token(self): - return '&'.join(['{0}={1}'.format(n, url_quote(v)) for n, v in self.query_dict.items() if v is not None]) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/storageclient.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/storageclient.py deleted file mode 100644 index 41ae2c627805..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/storageclient.py +++ /dev/null @@ -1,440 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -import requests -from abc import ABCMeta -import logging -from time import sleep -import sys - -from azure.common import ( - AzureException, - AzureHttpError, -) - -from ._constants import ( - DEFAULT_SOCKET_TIMEOUT, - DEFAULT_X_MS_VERSION, - DEFAULT_USER_AGENT_STRING, - USER_AGENT_STRING_PREFIX, - USER_AGENT_STRING_SUFFIX, - _AUTHORIZATION_HEADER_NAME, - _REDACTED_VALUE, - _COPY_SOURCE_HEADER_NAME, -) -from ._error import ( - _ERROR_DECRYPTION_FAILURE, - _http_error_handler, - _wrap_exception, - AzureSigningError, -) -from ._http import HTTPError -from ._http.httpclient import _HTTPClient -from ._serialization import ( - _update_request, - _add_date_header, -) -from .models import ( - RetryContext, - LocationMode, - _OperationContext, -) -from .retry import ExponentialRetry -from io import UnsupportedOperation -from .sharedaccesssignature import _QueryStringConstants - -if sys.version_info >= (3,): - from urllib.parse import ( - urlparse, - parse_qsl, - urlunparse, - urlencode, - ) -else: - from urlparse import ( - urlparse, - parse_qsl, - urlunparse, - ) - from urllib import urlencode -logger = logging.getLogger(__name__) - - -class StorageClient(object): - ''' - This is the base class for service objects. Service objects are used to do - all requests to Storage. This class cannot be instantiated directly. - - :ivar str account_name: - The storage account name. This is used to authenticate requests - signed with an account key and to construct the storage endpoint. It - is required unless a connection string is given, or if a custom - domain is used with anonymous authentication. - :ivar str account_key: - The storage account key. This is used for shared key authentication. - If neither account key or sas token is specified, anonymous access - will be used. - :ivar str sas_token: - A shared access signature token to use to authenticate requests - instead of the account key. If account key and sas token are both - specified, account key will be used to sign. If neither are - specified, anonymous access will be used. - :ivar str primary_endpoint: - The endpoint to send storage requests to. - :ivar str secondary_endpoint: - The secondary endpoint to read storage data from. This will only be a - valid endpoint if the storage account used is RA-GRS and thus allows - reading from secondary. - :ivar function(context) retry: - A function which determines whether to retry. Takes as a parameter a - :class:`~azure.storage.common.models.RetryContext` object. Returns the number - of seconds to wait before retrying the request, or None to indicate not - to retry. - :ivar ~azure.storage.common.models.LocationMode location_mode: - The host location to use to make requests. Defaults to LocationMode.PRIMARY. - Note that this setting only applies to RA-GRS accounts as other account - types do not allow reading from secondary. If the location_mode is set to - LocationMode.SECONDARY, read requests will be sent to the secondary endpoint. - Write requests will continue to be sent to primary. - :ivar str protocol: - The protocol to use for requests. Defaults to https. - :ivar requests.Session request_session: - The session object to use for http requests. - :ivar function(request) request_callback: - A function called immediately before each request is sent. This function - takes as a parameter the request object and returns nothing. It may be - used to added custom headers or log request data. - :ivar function() response_callback: - A function called immediately after each response is received. This - function takes as a parameter the response object and returns nothing. - It may be used to log response data. - :ivar function() retry_callback: - A function called immediately after retry evaluation is performed. This - function takes as a parameter the retry context object and returns nothing. - It may be used to detect retries and log context information. - ''' - - __metaclass__ = ABCMeta - - def __init__(self, connection_params): - ''' - :param obj connection_params: The parameters to use to construct the client. - ''' - self.account_name = connection_params.account_name - self.account_key = connection_params.account_key - self.sas_token = connection_params.sas_token - self.token_credential = connection_params.token_credential - self.is_emulated = connection_params.is_emulated - - self.primary_endpoint = connection_params.primary_endpoint - self.secondary_endpoint = connection_params.secondary_endpoint - - protocol = connection_params.protocol - request_session = connection_params.request_session or requests.Session() - socket_timeout = connection_params.socket_timeout or DEFAULT_SOCKET_TIMEOUT - self._httpclient = _HTTPClient( - protocol=protocol, - session=request_session, - timeout=socket_timeout, - ) - - self.retry = ExponentialRetry().retry - self.location_mode = LocationMode.PRIMARY - - self.request_callback = None - self.response_callback = None - self.retry_callback = None - self._X_MS_VERSION = DEFAULT_X_MS_VERSION - self._USER_AGENT_STRING = DEFAULT_USER_AGENT_STRING - - def _update_user_agent_string(self, service_package_version): - self._USER_AGENT_STRING = '{}{} {}'.format(USER_AGENT_STRING_PREFIX, - service_package_version, - USER_AGENT_STRING_SUFFIX) - - @property - def socket_timeout(self): - return self._httpclient.timeout - - @socket_timeout.setter - def socket_timeout(self, value): - self._httpclient.timeout = value - - @property - def protocol(self): - return self._httpclient.protocol - - @protocol.setter - def protocol(self, value): - self._httpclient.protocol = value - - @property - def request_session(self): - return self._httpclient.session - - @request_session.setter - def request_session(self, value): - self._httpclient.session = value - - def set_proxy(self, host, port, user=None, password=None): - ''' - Sets the proxy server host and port for the HTTP CONNECT Tunnelling. - - :param str host: Address of the proxy. Ex: '192.168.0.100' - :param int port: Port of the proxy. Ex: 6000 - :param str user: User for proxy authorization. - :param str password: Password for proxy authorization. - ''' - self._httpclient.set_proxy(host, port, user, password) - - def _get_host_locations(self, primary=True, secondary=False): - locations = {} - if primary: - locations[LocationMode.PRIMARY] = self.primary_endpoint - if secondary: - locations[LocationMode.SECONDARY] = self.secondary_endpoint - return locations - - def _apply_host(self, request, operation_context, retry_context): - if operation_context.location_lock and operation_context.host_location: - # If this is a location locked operation and the location is set, - # override the request location and host_location. - request.host_locations = operation_context.host_location - request.host = list(operation_context.host_location.values())[0] - retry_context.location_mode = list(operation_context.host_location.keys())[0] - elif len(request.host_locations) == 1: - # If only one location is allowed, use that location. - request.host = list(request.host_locations.values())[0] - retry_context.location_mode = list(request.host_locations.keys())[0] - else: - # If multiple locations are possible, choose based on the location mode. - request.host = request.host_locations.get(self.location_mode) - retry_context.location_mode = self.location_mode - - @staticmethod - def extract_date_and_request_id(retry_context): - if getattr(retry_context, 'response', None) is None: - return "" - resp = retry_context.response - - if 'date' in resp.headers and 'x-ms-request-id' in resp.headers: - return str.format("Server-Timestamp={0}, Server-Request-ID={1}", - resp.headers['date'], resp.headers['x-ms-request-id']) - elif 'date' in resp.headers: - return str.format("Server-Timestamp={0}", resp.headers['date']) - elif 'x-ms-request-id' in resp.headers: - return str.format("Server-Request-ID={0}", resp.headers['x-ms-request-id']) - else: - return "" - - @staticmethod - def _scrub_headers(headers): - # make a copy to avoid contaminating the request - clean_headers = headers.copy() - - if _AUTHORIZATION_HEADER_NAME in clean_headers: - clean_headers[_AUTHORIZATION_HEADER_NAME] = _REDACTED_VALUE - - # in case of copy operations, there could be a SAS signature present in the header value - if _COPY_SOURCE_HEADER_NAME in clean_headers \ - and _QueryStringConstants.SIGNED_SIGNATURE + "=" in clean_headers[_COPY_SOURCE_HEADER_NAME]: - # take the url apart and scrub away the signed signature - scheme, netloc, path, params, query, fragment = urlparse(clean_headers[_COPY_SOURCE_HEADER_NAME]) - parsed_qs = dict(parse_qsl(query)) - parsed_qs[_QueryStringConstants.SIGNED_SIGNATURE] = _REDACTED_VALUE - - # the SAS needs to be put back together - clean_headers[_COPY_SOURCE_HEADER_NAME] = urlunparse( - (scheme, netloc, path, params, urlencode(parsed_qs), fragment)) - return clean_headers - - @staticmethod - def _scrub_query_parameters(query): - # make a copy to avoid contaminating the request - clean_queries = query.copy() - - if _QueryStringConstants.SIGNED_SIGNATURE in clean_queries: - clean_queries[_QueryStringConstants.SIGNED_SIGNATURE] = _REDACTED_VALUE - return clean_queries - - def _perform_request(self, request, parser=None, parser_args=None, operation_context=None, expected_errors=None): - ''' - Sends the request and return response. Catches HTTPError and hands it - to error handler - ''' - operation_context = operation_context or _OperationContext() - retry_context = RetryContext() - retry_context.is_emulated = self.is_emulated - - # if request body is a stream, we need to remember its current position in case retries happen - if hasattr(request.body, 'read'): - try: - retry_context.body_position = request.body.tell() - except (AttributeError, UnsupportedOperation): - # if body position cannot be obtained, then retries will not work - pass - - # Apply the appropriate host based on the location mode - self._apply_host(request, operation_context, retry_context) - - # Apply common settings to the request - _update_request(request, self._X_MS_VERSION, self._USER_AGENT_STRING) - client_request_id_prefix = str.format("Client-Request-ID={0}", request.headers['x-ms-client-request-id']) - - while True: - try: - try: - # Execute the request callback - if self.request_callback: - self.request_callback(request) - - # Add date and auth after the callback so date doesn't get too old and - # authentication is still correct if signed headers are added in the request - # callback. This also ensures retry policies with long back offs - # will work as it resets the time sensitive headers. - _add_date_header(request) - - try: - # request can be signed individually - self.authentication.sign_request(request) - except AttributeError: - # session can also be signed - self.request_session = self.authentication.signed_session(self.request_session) - - # Set the request context - retry_context.request = request - - # Log the request before it goes out - # Avoid unnecessary scrubbing if the logger is not on - if logger.isEnabledFor(logging.INFO): - logger.info("%s Outgoing request: Method=%s, Path=%s, Query=%s, Headers=%s.", - client_request_id_prefix, - request.method, - request.path, - self._scrub_query_parameters(request.query), - str(self._scrub_headers(request.headers)).replace('\n', '')) - - # Perform the request - response = self._httpclient.perform_request(request) - - # Execute the response callback - if self.response_callback: - self.response_callback(response) - - # Set the response context - retry_context.response = response - - # Log the response when it comes back - logger.info("%s Receiving Response: " - "%s, HTTP Status Code=%s, Message=%s, Headers=%s.", - client_request_id_prefix, - self.extract_date_and_request_id(retry_context), - response.status, - response.message, - str(response.headers).replace('\n', '')) - - # Parse and wrap HTTP errors in AzureHttpError which inherits from AzureException - if response.status >= 300: - # This exception will be caught by the general error handler - # and raised as an azure http exception - _http_error_handler( - HTTPError(response.status, response.message, response.headers, response.body)) - - # Parse the response - if parser: - if parser_args: - args = [response] - args.extend(parser_args) - return parser(*args) - else: - return parser(response) - else: - return - except AzureException as ex: - retry_context.exception = ex - raise ex - except Exception as ex: - retry_context.exception = ex - raise _wrap_exception(ex, AzureException) - - except AzureException as ex: - # only parse the strings used for logging if logging is at least enabled for CRITICAL - exception_str_in_one_line = '' - status_code = '' - timestamp_and_request_id = '' - if logger.isEnabledFor(logging.CRITICAL): - exception_str_in_one_line = str(ex).replace('\n', '') - status_code = retry_context.response.status if retry_context.response is not None else 'Unknown' - timestamp_and_request_id = self.extract_date_and_request_id(retry_context) - - # if the http error was expected, we should short-circuit - if isinstance(ex, AzureHttpError) and expected_errors is not None and ex.error_code in expected_errors: - logger.info("%s Received expected http error: " - "%s, HTTP status code=%s, Exception=%s.", - client_request_id_prefix, - timestamp_and_request_id, - status_code, - exception_str_in_one_line) - raise ex - elif isinstance(ex, AzureSigningError): - logger.info("%s Unable to sign the request: Exception=%s.", - client_request_id_prefix, - exception_str_in_one_line) - raise ex - - logger.info("%s Operation failed: checking if the operation should be retried. " - "Current retry count=%s, %s, HTTP status code=%s, Exception=%s.", - client_request_id_prefix, - retry_context.count if hasattr(retry_context, 'count') else 0, - timestamp_and_request_id, - status_code, - exception_str_in_one_line) - - # Decryption failures (invalid objects, invalid algorithms, data unencrypted in strict mode, etc) - # will not be resolved with retries. - if str(ex) == _ERROR_DECRYPTION_FAILURE: - logger.error("%s Encountered decryption failure: this cannot be retried. " - "%s, HTTP status code=%s, Exception=%s.", - client_request_id_prefix, - timestamp_and_request_id, - status_code, - exception_str_in_one_line) - raise ex - - # Determine whether a retry should be performed and if so, how - # long to wait before performing retry. - retry_interval = self.retry(retry_context) - if retry_interval is not None: - # Execute the callback - if self.retry_callback: - self.retry_callback(retry_context) - - logger.info( - "%s Retry policy is allowing a retry: Retry count=%s, Interval=%s.", - client_request_id_prefix, - retry_context.count, - retry_interval) - - # Sleep for the desired retry interval - sleep(retry_interval) - else: - logger.error("%s Retry policy did not allow for a retry: " - "%s, HTTP status code=%s, Exception=%s.", - client_request_id_prefix, - timestamp_and_request_id, - status_code, - exception_str_in_one_line) - raise ex - finally: - # If this is a location locked operation and the location is not set, - # this is the first request of that operation. Set the location to - # be used for subsequent requests in the operation. - if operation_context.location_lock and not operation_context.host_location: - # note: to cover the emulator scenario, the host_location is grabbed - # from request.host_locations(which includes the dev account name) - # instead of request.host(which at this point no longer includes the dev account name) - operation_context.host_location = { - retry_context.location_mode: request.host_locations[retry_context.location_mode]} diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/tokencredential.py b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/tokencredential.py deleted file mode 100644 index 4d724ef06ad1..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/storage/common/tokencredential.py +++ /dev/null @@ -1,48 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -import requests - - -class TokenCredential(object): - """ - Represents a token credential that is used to authorize HTTPS requests. - The token can be updated by the user. - - :ivar str token: - The authorization token. It can be set by the user at any point in a thread-safe way. - """ - - def __init__(self, initial_value=None): - """ - :param initial_value: initial value for the token. - """ - self.token = initial_value - - def signed_session(self, session=None): - """ - Sign requests session with the token. This method is called every time a request is going on the wire. - The user is responsible for updating the token with the preferred tool/SDK. - In general there are two options: - - override this method to update the token in a preferred way and set Authorization header on session - - not override this method, and have a timer that triggers periodically to update the token on this class - - The second option is recommended as it tends to be more performance-friendly. - - :param session: The session to configure for authentication - :type session: requests.Session - :rtype: requests.Session - """ - session = session or requests.Session() - session.headers['Authorization'] = "Bearer {}".format(self.token) - - return session - - def token(self, new_value): - """ - :param new_value: new value to be set as the token. - """ - self.token = new_value \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/vendor_azure_storage_version.md b/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/vendor_azure_storage_version.md deleted file mode 100644 index caabbbca7d46..000000000000 --- a/sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/vendor/vendor_azure_storage_version.md +++ /dev/null @@ -1,2 +0,0 @@ -# azure-storage-blob 2.0.1 -# azure-storage-common 2.0.0 diff --git a/sdk/eventhub/azure-eventhubs/setup.py b/sdk/eventhub/azure-eventhubs/setup.py index 5f70522d0f6b..640b69a6e519 100644 --- a/sdk/eventhub/azure-eventhubs/setup.py +++ b/sdk/eventhub/azure-eventhubs/setup.py @@ -77,9 +77,6 @@ install_requires=[ 'uamqp~=1.2.0', 'azure-common~=1.1', - 'python-dateutil>=2.8.0', - 'cryptography>=2.1.4', - 'requests>=2.18.4', ], extras_require={ ":python_version<'3.0'": ['azure-nspkg'], From f8acc8b92ac4495f7778bb13e3d91310a5dfcdce Mon Sep 17 00:00:00 2001 From: Adam Ling <47871814+yunhaoling@users.noreply.github.com> Date: Fri, 2 Aug 2019 13:15:25 -0700 Subject: [PATCH 35/42] Update decorator implementation (#6642) * Update decorator implementation --- .../azure/eventhub/_consumer_producer_mixin.py | 2 +- .../eventhub/aio/_consumer_producer_mixin_async.py | 2 +- .../azure/eventhub/aio/consumer_async.py | 11 ++++++----- .../azure/eventhub/aio/producer_async.py | 12 ++++++++++-- .../azure-eventhubs/azure/eventhub/consumer.py | 10 ++++++---- .../azure-eventhubs/azure/eventhub/producer.py | 12 ++++++++++-- 6 files changed, 34 insertions(+), 15 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index bebef7a51982..3659b7c680c1 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -25,7 +25,7 @@ def wrapped_func(self, *args, **kwargs): kwargs.pop("timeout", None) while True: try: - return to_be_wrapped_func(timeout_time=timeout_time, last_exception=last_exception, **kwargs) + return to_be_wrapped_func(self, timeout_time=timeout_time, last_exception=last_exception, **kwargs) except Exception as exception: last_exception = self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index aa539110e50a..21dc6bb8a012 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -25,7 +25,7 @@ async def wrapped_func(self, *args, **kwargs): kwargs.pop("timeout", None) while True: try: - return await to_be_wrapped_func(timeout_time=timeout_time, last_exception=last_exception, **kwargs) + return await to_be_wrapped_func(self, timeout_time=timeout_time, last_exception=last_exception, **kwargs) except Exception as exception: last_exception = await self._handle_exception(exception, retry_count, max_retries, timeout_time) retry_count += 1 diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py index 8457913abcf0..404fc23312f0 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/consumer_async.py @@ -147,10 +147,8 @@ async def _open(self, timeout_time=None): self.source = self.redirected.address await super(EventHubConsumer, self)._open(timeout_time) - async def _receive(self, **kwargs): - timeout_time = kwargs.get("timeout_time") + async def _receive(self, timeout_time=None, max_batch_size=None, **kwargs): last_exception = kwargs.get("last_exception") - max_batch_size = kwargs.get("max_batch_size") data_batch = kwargs.get("data_batch") await self._open(timeout_time) @@ -171,6 +169,10 @@ async def _receive(self, **kwargs): data_batch.append(event_data) return data_batch + @_retry_decorator + async def _receive_with_try(self, timeout_time=None, max_batch_size=None, **kwargs): + return await self._receive(timeout_time=timeout_time, max_batch_size=max_batch_size, **kwargs) + @property def queue_size(self): # type: () -> int @@ -217,8 +219,7 @@ async def receive(self, *, max_batch_size=None, timeout=None): max_batch_size = max_batch_size or min(self.client.config.max_batch_size, self.prefetch) data_batch = [] # type: List[EventData] - return await _retry_decorator(self._receive)(self, timeout=timeout, - max_batch_size=max_batch_size, data_batch=data_batch) + return await self._receive_with_try(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) async def close(self, exception=None): # type: (Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index e3cd1d9fcb09..10de6017026b 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -110,6 +110,10 @@ async def _open(self, timeout_time=None, **kwargs): self.target = self.redirected.address await super(EventHubProducer, self)._open(timeout_time) + @_retry_decorator + async def _open_with_retry(self, timeout_time=None, **kwargs): + return await self._open(timeout_time=timeout_time, **kwargs) + async def _send_event_data(self, timeout_time=None, last_exception=None): if self.unsent_events: await self._open(timeout_time) @@ -131,6 +135,10 @@ async def _send_event_data(self, timeout_time=None, last_exception=None): _error(self._outcome, self._condition) return + @_retry_decorator + async def _send_event_data_with_retry(self, timeout_time=None, last_exception=None): + return await self._send_event_data(timeout_time=timeout_time, last_exception=last_exception) + def _on_outcome(self, outcome, condition): """ Called when the outcome is received for a delivery. @@ -158,7 +166,7 @@ async def create_batch(self, max_size=None, partition_key=None): """ if not self._max_message_size_on_link: - await _retry_decorator(self._open)(self, timeout=self.client.config.send_timeout) + await self._open_with_retry(timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' @@ -212,7 +220,7 @@ async def send(self, event_data, *, partition_key=None, timeout=None): wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] - await _retry_decorator(self._send_event_data)(self, timeout=timeout) + await self._send_event_data_with_retry(timeout=timeout) async def close(self, exception=None): # type: (Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 33a0e8ed187e..85c09bf1a308 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -142,10 +142,8 @@ def _open(self, timeout_time=None): self.source = self.redirected.address super(EventHubConsumer, self)._open(timeout_time) - def _receive(self, **kwargs): - timeout_time = kwargs.get("timeout_time") + def _receive(self, timeout_time=None, max_batch_size=None, **kwargs): last_exception = kwargs.get("last_exception") - max_batch_size = kwargs.get("max_batch_size") data_batch = kwargs.get("data_batch") self._open(timeout_time) @@ -165,6 +163,10 @@ def _receive(self, **kwargs): data_batch.append(event_data) return data_batch + @_retry_decorator + def _receive_with_try(self, timeout_time=None, max_batch_size=None, **kwargs): + return self._receive(timeout_time=timeout_time, max_batch_size=max_batch_size, **kwargs) + @property def queue_size(self): # type:() -> int @@ -210,7 +212,7 @@ def receive(self, max_batch_size=None, timeout=None): max_batch_size = max_batch_size or min(self.client.config.max_batch_size, self.prefetch) data_batch = [] # type: List[EventData] - return _retry_decorator(self._receive)(self, timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) + return self._receive_with_try(timeout=timeout, max_batch_size=max_batch_size, data_batch=data_batch) def close(self, exception=None): # type:(Exception) -> None diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index dd4b7aa2396a..f49fc280814e 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -117,6 +117,10 @@ def _open(self, timeout_time=None, **kwargs): self.target = self.redirected.address super(EventHubProducer, self)._open(timeout_time) + @_retry_decorator + def _open_with_retry(self, timeout_time=None, **kwargs): + return self._open(timeout_time=timeout_time, **kwargs) + def _send_event_data(self, timeout_time=None, last_exception=None): if self.unsent_events: self._open(timeout_time) @@ -138,6 +142,10 @@ def _send_event_data(self, timeout_time=None, last_exception=None): _error(self._outcome, self._condition) return + @_retry_decorator + def _send_event_data_with_retry(self, timeout_time=None, last_exception=None): + return self._send_event_data(timeout_time=timeout_time, last_exception=last_exception) + def _on_outcome(self, outcome, condition): """ Called when the outcome is received for a delivery. @@ -165,7 +173,7 @@ def create_batch(self, max_size=None, partition_key=None): """ if not self._max_message_size_on_link: - _retry_decorator(self._open)(self, timeout=self.client.config.send_timeout) + self._open_with_retry(timeout=self.client.config.send_timeout) if max_size and max_size > self._max_message_size_on_link: raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' @@ -219,7 +227,7 @@ def send(self, event_data, partition_key=None, timeout=None): wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] - _retry_decorator(self._send_event_data)(self, timeout=timeout) + self._send_event_data_with_retry(timeout=timeout) def close(self, exception=None): # type:(Exception) -> None From 6fe4533fa9fdc2d27902b44953626ca55431632e Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 2 Aug 2019 13:28:33 -0700 Subject: [PATCH 36/42] Remove old EPH pytest --- sdk/eventhub/azure-eventhubs/conftest.py | 18 -- .../tests/asynctests/conftest.py | 82 ------- .../asynctests/test_checkpoint_manager.py | 128 ---------- .../asynctests/test_eh_partition_pump.py | 30 --- .../tests/asynctests/test_longrunning_eph.py | 227 ----------------- .../test_longrunning_eph_with_context.py | 231 ------------------ .../asynctests/test_partition_manager.py | 17 -- .../tests/asynctests/test_partition_pump.py | 40 --- 8 files changed, 773 deletions(-) delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/conftest.py delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/test_checkpoint_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/test_eh_partition_pump.py delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph.py delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph_with_context.py delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_manager.py delete mode 100644 sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_pump.py diff --git a/sdk/eventhub/azure-eventhubs/conftest.py b/sdk/eventhub/azure-eventhubs/conftest.py index 578017cd60e4..c424357a77c4 100644 --- a/sdk/eventhub/azure-eventhubs/conftest.py +++ b/sdk/eventhub/azure-eventhubs/conftest.py @@ -18,24 +18,6 @@ collect_ignore.append("tests/asynctests") collect_ignore.append("features") collect_ignore.append("examples/async_examples") -else: - try: - from azure.eventprocessorhost import EventProcessorHost - from azure.eventprocessorhost import EventHubPartitionPump - from azure.eventprocessorhost import AzureStorageCheckpointLeaseManager - from azure.eventprocessorhost import AzureBlobLease - from azure.eventprocessorhost import EventHubConfig - from azure.eventprocessorhost.lease import Lease - from azure.eventprocessorhost.partition_pump import PartitionPump - from azure.eventprocessorhost.partition_manager import PartitionManager - except ImportError: - # Due to storage SDK conflict, temporarily skipping EPH tests - collect_ignore.append("tests/asynctests/test_checkpoint_manager.py") - collect_ignore.append("tests/asynctests/test_eh_partition_pump.py") - collect_ignore.append("tests/asynctests/test_longrunning_eph.py") - collect_ignore.append("tests/asynctests/test_longrunning_eph_with_context.py") - collect_ignore.append("tests/asynctests/test_partition_manager.py") - collect_ignore.append("tests/asynctests/test_partition_pump.py") from azure.eventhub import EventHubClient, EventHubConsumer, EventPosition diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/conftest.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/conftest.py deleted file mode 100644 index 7af6b48315e7..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/conftest.py +++ /dev/null @@ -1,82 +0,0 @@ -import sys -import pytest -import os - -if sys.version_info >= (3, 5): - import asyncio - import logging - - from azure.eventprocessorhost.abstract_event_processor import AbstractEventProcessor - from azure.eventprocessorhost import EventProcessorHost - from azure.eventprocessorhost import EventHubPartitionPump - from azure.eventprocessorhost import AzureStorageCheckpointLeaseManager - from azure.eventprocessorhost import AzureBlobLease - from azure.eventprocessorhost import EventHubConfig - from azure.eventprocessorhost.lease import Lease - from azure.eventprocessorhost.partition_pump import PartitionPump - from azure.eventprocessorhost.partition_manager import PartitionManager - - class MockEventProcessor(AbstractEventProcessor): - """ - Mock Implmentation of AbstractEventProcessor for testing - """ - - def __init__(self, params=None): - """ - Init Event processor - """ - self.params = params - self._msg_counter = 0 - - async def open_async(self, context): - """ - Called by processor host to initialize the event processor. - """ - logging.info("Connection established {}".format(context.partition_id)) - - async def close_async(self, context, reason): - """ - Called by processor host to indicate that the event processor is being stopped. - (Params) Context:Information about the partition - """ - logging.info("Connection closed (reason {}, id {}, offset {}, sq_number {})".format( - reason, context.partition_id, context.offset, context.sequence_number)) - - async def process_events_async(self, context, messages): - """ - Called by the processor host when a batch of events has arrived. - This is where the real work of the event processor is done. - (Params) Context: Information about the partition, Messages: The events to be processed. - """ - logging.info("Events processed {} {}".format(context.partition_id, messages)) - await context.checkpoint_async() - - async def process_error_async(self, context, error): - """ - Called when the underlying client experiences an error while receiving. - EventProcessorHost will take care of recovering from the error and - continuing to pump messages,so no action is required from - (Params) Context: Information about the partition, Error: The error that occured. - """ - logging.error("Event Processor Error {!r}".format(error)) - -@pytest.fixture() -def eph(): - try: - storage_clm = AzureStorageCheckpointLeaseManager( - os.environ['AZURE_STORAGE_ACCOUNT'], - os.environ['AZURE_STORAGE_ACCESS_KEY'], - "lease") - NAMESPACE = os.environ.get('EVENT_HUB_NAMESPACE') - EVENTHUB = os.environ.get('EVENT_HUB_NAME') - USER = os.environ.get('EVENT_HUB_SAS_POLICY') - KEY = os.environ.get('EVENT_HUB_SAS_KEY') - - eh_config = EventHubConfig(NAMESPACE, EVENTHUB, USER, KEY, consumer_group="$default") - host = EventProcessorHost( - MockEventProcessor, - eh_config, - storage_clm) - except KeyError: - pytest.skip("Live EventHub configuration not found.") - return host \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_checkpoint_manager.py deleted file mode 100644 index c54faacf312b..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_checkpoint_manager.py +++ /dev/null @@ -1,128 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import asyncio -import base64 -import pytest -import time -import json -from azure.common import AzureException - - -@pytest.mark.liveTest -def test_create_store(storage_clm): - """ - Test the store is created correctly if not exists - """ - loop = asyncio.get_event_loop() - loop.run_until_complete(storage_clm.create_checkpoint_store_if_not_exists_async()) - - -@pytest.mark.liveTest -def test_create_lease(storage_clm): - """ - Test lease creation - """ - - loop = asyncio.get_event_loop() - loop.run_until_complete(storage_clm.create_checkpoint_store_if_not_exists_async()) - loop.run_until_complete(storage_clm.create_lease_if_not_exists_async("1")) - - -@pytest.mark.liveTest -def test_get_lease(storage_clm): - """ - Test get lease - """ - loop = asyncio.get_event_loop() - loop.run_until_complete(storage_clm.get_lease_async("1")) - - -@pytest.mark.liveTest -def test_aquire_renew_release_lease(storage_clm): - """ - Test aquire lease - """ - loop = asyncio.get_event_loop() - lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - assert lease is None - loop.run_until_complete(storage_clm.create_lease_if_not_exists_async("1")) - lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - loop.run_until_complete(storage_clm.acquire_lease_async(lease)) - loop.run_until_complete(storage_clm.renew_lease_async(lease)) - loop.run_until_complete(storage_clm.release_lease_async(lease)) - assert lease.partition_id == "1" - assert lease.epoch == 1 - assert loop.run_until_complete(lease.state()) == "available" - - -@pytest.mark.liveTest -def test_delete_lease(storage_clm): - """ - Test delete lease - """ - loop = asyncio.get_event_loop() - lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - assert lease is None - loop.run_until_complete(storage_clm.create_lease_if_not_exists_async("1")) - lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - loop.run_until_complete(storage_clm.delete_lease_async(lease)) - lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - assert lease == None - - -@pytest.mark.liveTest -def test_checkpointing(storage_clm): - """ - Test checkpointing - """ - loop = asyncio.get_event_loop() - local_checkpoint = loop.run_until_complete(storage_clm.create_checkpoint_if_not_exists_async("1")) - assert local_checkpoint.partition_id == "1" - assert local_checkpoint.offset == "-1" - lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - loop.run_until_complete(storage_clm.acquire_lease_async(lease)) - - # Test EPH context encoded as bytes - event_processor_context = {'some_string_data': 'abc', 'some_int_data': 123, 'a_list': [42]} - cloud_event_processor_context_asbytes = json.dumps(event_processor_context).encode('utf-8') - lease.event_processor_context = base64.b64encode(cloud_event_processor_context_asbytes).decode('ascii') - loop.run_until_complete(storage_clm.update_checkpoint_async(lease, local_checkpoint)) - - cloud_lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - cloud_event_processor_context_asbytes = cloud_lease.event_processor_context.encode('ascii') - event_processor_context_decoded = base64.b64decode(cloud_event_processor_context_asbytes).decode('utf-8') - cloud_event_processor_context = json.loads(event_processor_context_decoded) - assert cloud_event_processor_context['some_string_data'] == 'abc' - assert cloud_event_processor_context['some_int_data'] == 123 - assert cloud_event_processor_context['a_list'] == [42] - - # Test EPH context as JSON object - lease.event_processor_context = {'some_string_data': 'abc', 'some_int_data': 123, 'a_list': [42]} - loop.run_until_complete(storage_clm.update_checkpoint_async(lease, local_checkpoint)) - - cloud_lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - assert cloud_lease.event_processor_context['some_string_data'] == 'abc' - assert cloud_lease.event_processor_context['some_int_data'] == 123 - assert cloud_lease.event_processor_context['a_list'] == [42] - - cloud_checkpoint = loop.run_until_complete(storage_clm.get_checkpoint_async("1")) - lease.offset = cloud_checkpoint.offset - lease.sequence_number = cloud_checkpoint.sequence_number - lease.event_processor_context = None - assert cloud_checkpoint.partition_id == "1" - assert cloud_checkpoint.offset == "-1" - modify_checkpoint = cloud_checkpoint - modify_checkpoint.offset = "512" - modify_checkpoint.sequence_number = "32" - time.sleep(35) - loop.run_until_complete(storage_clm.update_checkpoint_async(lease, modify_checkpoint)) - cloud_lease = loop.run_until_complete(storage_clm.get_lease_async("1")) - assert cloud_lease.event_processor_context is None - - cloud_checkpoint = loop.run_until_complete(storage_clm.get_checkpoint_async("1")) - assert cloud_checkpoint.partition_id == "1" - assert cloud_checkpoint.offset == "512" - loop.run_until_complete(storage_clm.release_lease_async(lease)) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_eh_partition_pump.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_eh_partition_pump.py deleted file mode 100644 index e21a7d675f6d..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_eh_partition_pump.py +++ /dev/null @@ -1,30 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import unittest -import asyncio -import logging -import pytest - - -async def wait_and_close(host): - """ - Run EventProcessorHost for 2 minutes then shutdown. - """ - await asyncio.sleep(60) - await host.close_async() - - -@pytest.mark.liveTest -def test_partition_pump_async(eh_partition_pump): - """ - Test that event hub partition pump opens and processess messages sucessfully then closes - """ - pytest.skip("Not working yet") - loop = asyncio.get_event_loop() - tasks = asyncio.gather( - eh_partition_pump.open_async(), - wait_and_close(eh_partition_pump)) - loop.run_until_complete(tasks) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph.py deleted file mode 100644 index 41a7db1b444d..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph.py +++ /dev/null @@ -1,227 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import logging -import asyncio -import sys -import os -import argparse -import time -import json -import pytest -from logging.handlers import RotatingFileHandler - -from azure.eventhub.aio import EventHubClient -from azure.eventhub import EventData -from azure.eventprocessorhost import ( - AbstractEventProcessor, - AzureStorageCheckpointLeaseManager, - EventHubConfig, - EventProcessorHost, - EPHOptions) - - -def get_logger(filename, level=logging.INFO): - azure_logger = logging.getLogger("azure.eventprocessorhost") - azure_logger.setLevel(level) - uamqp_logger = logging.getLogger("uamqp") - uamqp_logger.setLevel(logging.INFO) - - formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') - console_handler = logging.StreamHandler(stream=sys.stdout) - console_handler.setFormatter(formatter) - if not azure_logger.handlers: - azure_logger.addHandler(console_handler) - if not uamqp_logger.handlers: - uamqp_logger.addHandler(console_handler) - - if filename: - file_handler = RotatingFileHandler(filename, maxBytes=20*1024*1024, backupCount=3) - file_handler.setFormatter(formatter) - azure_logger.addHandler(file_handler) - uamqp_logger.addHandler(file_handler) - - return azure_logger - - -logger = get_logger("eph_test_async.log", logging.INFO) - - -class EventProcessor(AbstractEventProcessor): - """ - Example Implmentation of AbstractEventProcessor - """ - - def __init__(self, params=None): - """ - Init Event processor - """ - super().__init__(params) - self._msg_counter = 0 - - async def open_async(self, context): - """ - Called by processor host to initialize the event processor. - """ - assert hasattr(context, 'event_processor_context') - assert context.event_processor_context is None - logger.info("Connection established {}. State {}".format( - context.partition_id, context.event_processor_context)) - - async def close_async(self, context, reason): - """ - Called by processor host to indicate that the event processor is being stopped. - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.PartitionContext - """ - logger.info("Connection closed (reason {}, id {}, offset {}, sq_number {}, state {})".format( - reason, - context.partition_id, - context.offset, - context.sequence_number, - context.event_processor_context)) - - async def process_events_async(self, context, messages): - """ - Called by the processor host when a batch of events has arrived. - This is where the real work of the event processor is done. - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.PartitionContext - :param messages: The events to be processed. - :type messages: list[~azure.eventhub.common.EventData] - """ - assert context.event_processor_context is None - print("Processing id {}, offset {}, sq_number {}, state {})".format( - context.partition_id, - context.offset, - context.sequence_number, - context.event_processor_context)) - await context.checkpoint_async() - - async def process_error_async(self, context, error): - """ - Called when the underlying client experiences an error while receiving. - EventProcessorHost will take care of recovering from the error and - continuing to pump messages,so no action is required from - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.PartitionContext - :param error: The error that occured. - """ - logger.info("Event Processor Error for partition {}, {!r}".format(context.partition_id, error)) - - -async def wait_and_close(host, duration): - """ - Run EventProcessorHost for 30 seconds then shutdown. - """ - await asyncio.sleep(duration) - await host.close_async() - - -async def pump(pid, sender, duration): - deadline = time.time() + duration - total = 0 - - try: - async with sender: - event_list = [] - while time.time() < deadline: - data = EventData(body=b"D" * 512) - event_list.append(data) - total += 1 - if total % 100 == 0: - await sender.send(event_list) - event_list = [] - logger.info("{}: Send total {}".format(pid, total)) - except Exception as err: - logger.error("{}: Send failed {}".format(pid, err)) - raise - print("{}: Final Sent total {}".format(pid, total)) - - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_long_running_eph(live_eventhub): - parser = argparse.ArgumentParser() - parser.add_argument("--duration", help="Duration in seconds of the test", type=int, default=30) - parser.add_argument("--storage-account", help="Storage account name", default=os.environ.get('AZURE_STORAGE_ACCOUNT')) - parser.add_argument("--storage-key", help="Storage account access key", default=os.environ.get('AZURE_STORAGE_ACCESS_KEY')) - parser.add_argument("--container", help="Lease container name", default="nocontextleases") - parser.add_argument("--eventhub", help="Name of EventHub", default=live_eventhub['event_hub']) - parser.add_argument("--namespace", help="Namespace of EventHub", default=live_eventhub['namespace']) - parser.add_argument("--suffix", help="Namespace of EventHub", default="servicebus.windows.net") - parser.add_argument("--sas-policy", help="Name of the shared access policy to authenticate with", default=live_eventhub['key_name']) - parser.add_argument("--sas-key", help="Shared access key", default=live_eventhub['access_key']) - - loop = asyncio.get_event_loop() - args, _ = parser.parse_known_args() - if not args.namespace or not args.eventhub: - try: - import pytest - pytest.skip("Must specify '--namespace' and '--eventhub'") - except ImportError: - raise ValueError("Must specify '--namespace' and '--eventhub'") - - # Queue up some events in the Eventhub - conn_str = "Endpoint=sb://{}/;SharedAccessKeyName={};SharedAccessKey={};EntityPath={}".format( - live_eventhub['hostname'], - live_eventhub['key_name'], - live_eventhub['access_key'], - live_eventhub['event_hub']) - send_client = EventHubClient.from_connection_string(conn_str) - pumps = [] - for pid in ["0", "1"]: - sender = send_client.create_producer(partition_id=pid, send_timeout=0) - pumps.append(pump(pid, sender, 15)) - results = await asyncio.gather(*pumps, return_exceptions=True) - - assert not any(results) - - # Eventhub config and storage manager - eh_config = EventHubConfig( - args.namespace, - args.eventhub, - args.sas_policy, - args.sas_key, - consumer_group="$default", - namespace_suffix=args.suffix) - eh_options = EPHOptions() - eh_options.release_pump_on_timeout = True - eh_options.debug_trace = False - eh_options.receive_timeout = 120 - storage_manager = AzureStorageCheckpointLeaseManager( - storage_account_name=args.storage_account, - storage_account_key=args.storage_key, - lease_renew_interval=30, - lease_container_name=args.container, - lease_duration=60) - - # Event loop and host - host = EventProcessorHost( - EventProcessor, - eh_config, - storage_manager, - ep_params=["param1", "param2"], - eph_options=eh_options, - loop=loop) - - tasks = asyncio.gather( - host.open_async(), - wait_and_close(host, args.duration), return_exceptions=True) - results = await tasks - assert not any(results) - - -if __name__ == '__main__': - config = {} - config['hostname'] = os.environ['EVENT_HUB_HOSTNAME'] - config['event_hub'] = os.environ['EVENT_HUB_NAME'] - config['key_name'] = os.environ['EVENT_HUB_SAS_POLICY'] - config['access_key'] = os.environ['EVENT_HUB_SAS_KEY'] - config['namespace'] = os.environ['EVENT_HUB_NAMESPACE'] - config['consumer_group'] = "$Default" - config['partition'] = "0" - loop = asyncio.get_event_loop() - loop.run_until_complete(test_long_running_eph(config)) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph_with_context.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph_with_context.py deleted file mode 100644 index 957896c816f1..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_longrunning_eph_with_context.py +++ /dev/null @@ -1,231 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import logging -import asyncio -import sys -import os -import argparse -import time -import json -import pytest -from logging.handlers import RotatingFileHandler - -from azure.eventhub.aio import EventHubClient -from azure.eventhub import EventData -from azure.eventprocessorhost import ( - AbstractEventProcessor, - AzureStorageCheckpointLeaseManager, - EventHubConfig, - EventProcessorHost, - EPHOptions) - - -def get_logger(filename, level=logging.INFO): - azure_logger = logging.getLogger("azure.eventprocessorhost") - azure_logger.setLevel(level) - uamqp_logger = logging.getLogger("uamqp") - uamqp_logger.setLevel(logging.INFO) - - formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') - console_handler = logging.StreamHandler(stream=sys.stdout) - console_handler.setFormatter(formatter) - if not azure_logger.handlers: - azure_logger.addHandler(console_handler) - if not uamqp_logger.handlers: - uamqp_logger.addHandler(console_handler) - - if filename: - file_handler = RotatingFileHandler(filename, maxBytes=20*1024*1024, backupCount=3) - file_handler.setFormatter(formatter) - azure_logger.addHandler(file_handler) - uamqp_logger.addHandler(file_handler) - - return azure_logger - - -logger = get_logger("eph_test_async.log", logging.INFO) - - -class EventProcessor(AbstractEventProcessor): - """ - Example Implmentation of AbstractEventProcessor - """ - - def __init__(self, params=None): - """ - Init Event processor - """ - super().__init__(params) - self._params = params - self._msg_counter = 0 - - async def open_async(self, context): - """ - Called by processor host to initialize the event processor. - """ - assert hasattr(context, 'event_processor_context') - logger.info("Connection established {}. State {}".format( - context.partition_id, context.event_processor_context)) - - async def close_async(self, context, reason): - """ - Called by processor host to indicate that the event processor is being stopped. - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.PartitionContext - """ - logger.info("Connection closed (reason {}, id {}, offset {}, sq_number {}, state {})".format( - reason, - context.partition_id, - context.offset, - context.sequence_number, - context.event_processor_context)) - - async def process_events_async(self, context, messages): - """ - Called by the processor host when a batch of events has arrived. - This is where the real work of the event processor is done. - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.PartitionContext - :param messages: The events to be processed. - :type messages: list[~azure.eventhub.common.EventData] - """ - print("Processing id {}, offset {}, sq_number {}, state {})".format( - context.partition_id, - context.offset, - context.sequence_number, - context.event_processor_context)) - assert hasattr(context, 'event_processor_context') - if self._msg_counter > 1: - assert context.event_processor_context == json.dumps( - {"Sequence": self._msg_counter, "Data": self._params}) - self._msg_counter += 1 - await context.checkpoint_async( - json.dumps({"Sequence": self._msg_counter, "Data": self._params})) - - async def process_error_async(self, context, error): - """ - Called when the underlying client experiences an error while receiving. - EventProcessorHost will take care of recovering from the error and - continuing to pump messages,so no action is required from - :param context: Information about the partition - :type context: ~azure.eventprocessorhost.PartitionContext - :param error: The error that occured. - """ - logger.info("Event Processor Error for partition {}, {!r}".format(context.partition_id, error)) - - -async def wait_and_close(host, duration): - """ - Run EventProcessorHost for 30 seconds then shutdown. - """ - await asyncio.sleep(duration) - await host.close_async() - - -async def pump(pid, sender, duration): - deadline = time.time() + duration - total = 0 - - try: - async with sender: - event_list = [] - while time.time() < deadline: - data = EventData(body=b"D" * 512) - event_list.append(data) - total += 1 - if total % 100 == 0: - await sender.send(event_list) - event_list = [] - logger.info("{}: Send total {}".format(pid, total)) - except Exception as err: - logger.error("{}: Send failed {}".format(pid, err)) - raise - print("{}: Final Sent total {}".format(pid, total)) - - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_long_running_context_eph(live_eventhub): - parser = argparse.ArgumentParser() - parser.add_argument("--duration", help="Duration in seconds of the test", type=int, default=30) - parser.add_argument("--storage-account", help="Storage account name", default=os.environ.get('AZURE_STORAGE_ACCOUNT')) - parser.add_argument("--storage-key", help="Storage account access key", default=os.environ.get('AZURE_STORAGE_ACCESS_KEY')) - parser.add_argument("--container", help="Lease container name", default="contextleases") - parser.add_argument("--eventhub", help="Name of EventHub", default=live_eventhub['event_hub']) - parser.add_argument("--namespace", help="Namespace of EventHub", default=live_eventhub['namespace']) - parser.add_argument("--suffix", help="Namespace of EventHub", default="servicebus.windows.net") - parser.add_argument("--sas-policy", help="Name of the shared access policy to authenticate with", default=live_eventhub['key_name']) - parser.add_argument("--sas-key", help="Shared access key", default=live_eventhub['access_key']) - - loop = asyncio.get_event_loop() - args, _ = parser.parse_known_args() - if not args.namespace or not args.eventhub: - try: - import pytest - pytest.skip("Must specify '--namespace' and '--eventhub'") - except ImportError: - raise ValueError("Must specify '--namespace' and '--eventhub'") - - # Queue up some events in the Eventhub - conn_str = "Endpoint=sb://{}/;SharedAccessKeyName={};SharedAccessKey={};EntityPath={}".format( - live_eventhub['hostname'], - live_eventhub['key_name'], - live_eventhub['access_key'], - live_eventhub['event_hub']) - send_client = EventHubClient.from_connection_string(conn_str) - pumps = [] - for pid in ["0", "1"]: - sender = send_client.create_producer(partition_id=pid, send_timeout=0) - pumps.append(pump(pid, sender, 15)) - results = await asyncio.gather(*pumps, return_exceptions=True) - assert not any(results) - - # Eventhub config and storage manager - eh_config = EventHubConfig( - args.namespace, - args.eventhub, - args.sas_policy, - args.sas_key, - consumer_group="$default", - namespace_suffix=args.suffix) - eh_options = EPHOptions() - eh_options.release_pump_on_timeout = True - eh_options.debug_trace = False - eh_options.receive_timeout = 120 - storage_manager = AzureStorageCheckpointLeaseManager( - storage_account_name=args.storage_account, - storage_account_key=args.storage_key, - lease_renew_interval=30, - lease_container_name=args.container, - lease_duration=60) - - # Event loop and host - host = EventProcessorHost( - EventProcessor, - eh_config, - storage_manager, - ep_params=["param1","param2"], - eph_options=eh_options, - loop=loop) - - tasks = asyncio.gather( - host.open_async(), - wait_and_close(host, args.duration), return_exceptions=True) - results = await tasks - assert not any(results) - - -if __name__ == '__main__': - config = {} - config['hostname'] = os.environ['EVENT_HUB_HOSTNAME'] - config['event_hub'] = os.environ['EVENT_HUB_NAME'] - config['key_name'] = os.environ['EVENT_HUB_SAS_POLICY'] - config['access_key'] = os.environ['EVENT_HUB_SAS_KEY'] - config['namespace'] = os.environ['EVENT_HUB_NAMESPACE'] - config['consumer_group'] = "$Default" - config['partition'] = "0" - loop = asyncio.get_event_loop() - loop.run_until_complete(test_long_running_context_eph(config)) diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_manager.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_manager.py deleted file mode 100644 index aab0a1b4e501..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_manager.py +++ /dev/null @@ -1,17 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import asyncio -import pytest - - -@pytest.mark.liveTest -def test_get_partition_ids(partition_manager): - """ - Test that partition manger returns all the partitions for an event hub - """ - loop = asyncio.get_event_loop() - pids = loop.run_until_complete(partition_manager.get_partition_ids_async()) - assert pids == ["0", "1"] diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_pump.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_pump.py deleted file mode 100644 index 88c3eea4cb7e..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_partition_pump.py +++ /dev/null @@ -1,40 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ----------------------------------------------------------------------------------- - -import asyncio -import pytest - - -@pytest.mark.liveTest -def test_open_async(partition_pump): - """ - Test that partition pump opens sucessfully - """ - loop = asyncio.get_event_loop() - loop.run_until_complete(partition_pump.open_async()) # Simulate Open - - -@pytest.mark.liveTest -def test_process_events_async(partition_pump): - """ - Test that the partition pump processes a list of mock events (["event1", "event2"]) - properly - """ - loop = asyncio.get_event_loop() - loop.run_until_complete(partition_pump.open_async()) # Simulate Open - _mock_events = ["event1", "event2"] # Mock Events - loop.run_until_complete(partition_pump.process_events_async(_mock_events)) # Simulate Process - - -@pytest.mark.liveTest -def test_close_async(partition_pump): - """ - Test that partition pump closes - """ - loop = asyncio.get_event_loop() - loop.run_until_complete(partition_pump.open_async()) # Simulate Open - _mock_events = ["event1", "event2"] # Mock Events - loop.run_until_complete(partition_pump.process_events_async(_mock_events)) # Simulate Process - loop.run_until_complete(partition_pump.close_async("Finished")) # Simulate Close From 598245df6687a2e55bcdea909336524a6a51df82 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 2 Aug 2019 13:31:41 -0700 Subject: [PATCH 37/42] Revert "Revert "Merge branch 'eventhubs_dev' into eventhubs_eph"" This reverts commit d688090f53e7d320b1d156b04b53d5e9502f87a0. --- .../azure/eventhub/_consumer_producer_mixin.py | 2 ++ .../azure/eventhub/aio/_consumer_producer_mixin_async.py | 2 ++ sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py | 2 +- .../azure-eventhubs/tests/asynctests/test_negative_async.py | 6 ++++++ sdk/eventhub/azure-eventhubs/tests/test_negative.py | 4 +++- sdk/eventhub/azure-eventhubs/tests/test_receive.py | 4 ++-- 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 207a5486a1c1..bebef7a51982 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -65,6 +65,8 @@ def _open(self, timeout_time=None): """ # pylint: disable=protected-access if not self.running: + if self._handler: + self._handler.close() if self.redirected: alt_creds = { "username": self.client._auth_config.get("iot_username"), diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 9ecd47757221..aa539110e50a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -66,6 +66,8 @@ async def _open(self, timeout_time=None): """ # pylint: disable=protected-access if not self.running: + if self._handler: + await self._handler.close_async() if self.redirected: alt_creds = { "username": self.client._auth_config.get("iot_username"), diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index a9ed80c5548c..dd4b7aa2396a 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -215,7 +215,7 @@ def send(self, event_data, partition_key=None, timeout=None): wrapper_event_data = event_data else: if partition_key: - event_data = self._set_partition_key(event_data, partition_key) + event_data = _set_partition_key(event_data, partition_key) wrapper_event_data = EventDataBatch._from_batch(event_data, partition_key) # pylint: disable=protected-access wrapper_event_data.message.on_send_complete = self._on_outcome self.unsent_events = [wrapper_event_data.message] diff --git a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py index 0ab4fe53f006..4406da855f59 100644 --- a/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py +++ b/sdk/eventhub/azure-eventhubs/tests/asynctests/test_negative_async.py @@ -30,6 +30,7 @@ async def test_send_with_invalid_hostname_async(invalid_hostname, connstr_receiv sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -39,6 +40,7 @@ async def test_receive_with_invalid_hostname_async(invalid_hostname): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) + await receiver.close() @pytest.mark.liveTest @@ -49,6 +51,7 @@ async def test_send_with_invalid_key_async(invalid_key, connstr_receivers): sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -58,6 +61,7 @@ async def test_receive_with_invalid_key_async(invalid_key): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) + await receiver.close() @pytest.mark.liveTest @@ -68,6 +72,7 @@ async def test_send_with_invalid_policy_async(invalid_policy, connstr_receivers) sender = client.create_producer() with pytest.raises(AuthenticationError): await sender.send(EventData("test data")) + await sender.close() @pytest.mark.liveTest @@ -77,6 +82,7 @@ async def test_receive_with_invalid_policy_async(invalid_policy): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): await receiver.receive(timeout=3) + await receiver.close() @pytest.mark.liveTest diff --git a/sdk/eventhub/azure-eventhubs/tests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/test_negative.py index 3682461f9db2..a1fee7605818 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_negative.py @@ -27,6 +27,7 @@ def test_send_with_invalid_hostname(invalid_hostname, connstr_receivers): sender = client.create_producer() with pytest.raises(AuthenticationError): sender.send(EventData("test data")) + sender.close() @pytest.mark.liveTest @@ -47,6 +48,7 @@ def test_send_with_invalid_key(invalid_key, connstr_receivers): sender.send(EventData("test data")) sender.close() + @pytest.mark.liveTest def test_receive_with_invalid_key_sync(invalid_key): client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) @@ -96,13 +98,13 @@ def test_non_existing_entity_sender(connection_str): sender = client.create_producer(partition_id="1") with pytest.raises(AuthenticationError): sender.send(EventData("test data")) + sender.close() @pytest.mark.liveTest def test_non_existing_entity_receiver(connection_str): client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) - with pytest.raises(AuthenticationError): receiver.receive(timeout=5) receiver.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/test_receive.py b/sdk/eventhub/azure-eventhubs/tests/test_receive.py index 35c5e39c992b..d241a8e6e585 100644 --- a/sdk/eventhub/azure-eventhubs/tests/test_receive.py +++ b/sdk/eventhub/azure-eventhubs/tests/test_receive.py @@ -148,10 +148,10 @@ def test_receive_with_custom_datetime_sync(connstr_senders): receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) with receiver: all_received = [] - received = receiver.receive(timeout=1) + received = receiver.receive(timeout=5) while received: all_received.extend(received) - received = receiver.receive(timeout=1) + received = receiver.receive(timeout=5) assert len(all_received) == 5 for received_event in all_received: From 97dfce513a3c62de6474dca9c7eea1698cf8a92a Mon Sep 17 00:00:00 2001 From: Adam Ling <47871814+yunhaoling@users.noreply.github.com> Date: Fri, 2 Aug 2019 15:04:28 -0700 Subject: [PATCH 38/42] Update sample codes and docstring (#6643) --- .../eventhub/_consumer_producer_mixin.py | 2 +- .../aio/_consumer_producer_mixin_async.py | 2 +- .../azure/eventhub/aio/producer_async.py | 11 +++- .../azure/eventhub/producer.py | 9 +++ .../test_examples_eventhub_async.py | 13 +++- .../examples/event_data_batch.py | 64 +++++++++++++++++++ .../examples/test_examples_eventhub.py | 11 ++++ 7 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 sdk/eventhub/azure-eventhubs/examples/event_data_batch.py diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 3659b7c680c1..3e974c622a45 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -46,7 +46,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def _check_closed(self): if self.error: - raise EventHubError("{} has been closed. Please create a new consumer to receive event data.".format(self.name)) + raise EventHubError("{} has been closed. Please create a new one to handle event data.".format(self.name)) def _create_handler(self): pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 21dc6bb8a012..3027fcfc3287 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -47,7 +47,7 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): def _check_closed(self): if self.error: - raise EventHubError("{} has been closed. Please create a new consumer to receive event data.".format(self.name)) + raise EventHubError("{} has been closed. Please create a new one to handle event data.".format(self.name)) def _create_handler(self): pass diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py index 10de6017026b..a231e12eb63c 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/producer_async.py @@ -163,6 +163,15 @@ async def create_batch(self, max_size=None, partition_key=None): :type partition_key: str :return: an EventDataBatch instance :rtype: ~azure.eventhub.EventDataBatch + + Example: + .. literalinclude:: ../examples/async_examples/test_examples_eventhub_async.py + :start-after: [START eventhub_client_async_create_batch] + :end-before: [END eventhub_client_async_create_batch] + :language: python + :dedent: 4 + :caption: Create EventDataBatch object within limited size + """ if not self._max_message_size_on_link: @@ -195,7 +204,7 @@ async def send(self, event_data, *, partition_key=None, timeout=None): :rtype: None Example: - .. literalinclude:: ../examples/test_examples_eventhub.py + .. literalinclude:: ../examples/async_examples/test_examples_eventhub_async.py :start-after: [START eventhub_client_async_send] :end-before: [END eventhub_client_async_send] :language: python diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index f49fc280814e..13ef7744772b 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -170,6 +170,15 @@ def create_batch(self, max_size=None, partition_key=None): :type partition_key: str :return: an EventDataBatch instance :rtype: ~azure.eventhub.EventDataBatch + + Example: + .. literalinclude:: ../examples/test_examples_eventhub.py + :start-after: [START eventhub_client_sync_create_batch] + :end-before: [END eventhub_client_sync_create_batch] + :language: python + :dedent: 4 + :caption: Create EventDataBatch object within limited size + """ if not self._max_message_size_on_link: diff --git a/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py b/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py index 048f5af1623c..896f2a007b21 100644 --- a/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py +++ b/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py @@ -23,7 +23,7 @@ async def test_example_eventhub_async_send_and_receive(live_eventhub_config): os.environ['EVENT_HUB_HOSTNAME'], os.environ['EVENT_HUB_SAS_POLICY'], os.environ['EVENT_HUB_SAS_KEY'], - os.environ['EVENT_HUB_NAME']) + os.environ['EVENT_HUB_NAME']) client = EventHubClient.from_connection_string(connection_str) # [END create_eventhub_client_async] @@ -49,6 +49,17 @@ async def test_example_eventhub_async_send_and_receive(live_eventhub_config): await consumer.receive(timeout=1) + # [START eventhub_client_async_create_batch] + event_data_batch = await producer.create_batch(max_size=10000) + while True: + try: + event_data_batch.try_add(EventData('Message inside EventBatchData')) + except ValueError: + # The EventDataBatch object reaches its max_size. + # You can send the full EventDataBatch object and create a new one here. + break + # [END eventhub_client_async_create_batch] + # [START eventhub_client_async_send] async with producer: event_data = EventData(b"A single event") diff --git a/sdk/eventhub/azure-eventhubs/examples/event_data_batch.py b/sdk/eventhub/azure-eventhubs/examples/event_data_batch.py new file mode 100644 index 000000000000..3cf6dc88f177 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/examples/event_data_batch.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +""" +An example to show creating and sending EventBatchData within limited size. +""" + +# pylint: disable=C0111 + +import logging +import time +import os + +from azure.eventhub import EventHubClient, EventData, EventHubSharedKeyCredential + +import examples +logger = examples.get_logger(logging.INFO) + + +HOSTNAME = os.environ.get('EVENT_HUB_HOSTNAME') # .servicebus.windows.net +EVENT_HUB = os.environ.get('EVENT_HUB_NAME') + +USER = os.environ.get('EVENT_HUB_SAS_POLICY') +KEY = os.environ.get('EVENT_HUB_SAS_KEY') + + +def create_batch_data(producer): + event_data_batch = producer.create_batch(max_size=10000) + while True: + try: + event_data_batch.try_add(EventData('Message inside EventBatchData')) + except ValueError: + # EventDataBatch object reaches max_size. + # New EventDataBatch object can be created here to send more data + break + return event_data_batch + + +try: + if not HOSTNAME: + raise ValueError("No EventHubs URL supplied.") + + client = EventHubClient(host=HOSTNAME, event_hub_path=EVENT_HUB, credential=EventHubSharedKeyCredential(USER, KEY), + network_tracing=False) + producer = client.create_producer() + + try: + start_time = time.time() + with producer: + event_data_batch = create_batch_data(producer) + producer.send(event_data_batch) + except: + raise + finally: + end_time = time.time() + run_time = end_time - start_time + logger.info("Runtime: {} seconds".format(run_time)) + +except KeyboardInterrupt: + pass diff --git a/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py b/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py index d8483dc6c032..04e29d08256c 100644 --- a/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py +++ b/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py @@ -86,6 +86,17 @@ def test_example_eventhub_sync_send_and_receive(live_eventhub_config): event_data = EventData(body=list_data) # [END create_event_data] + # [START eventhub_client_sync_create_batch] + event_data_batch = producer.create_batch(max_size=10000) + while True: + try: + event_data_batch.try_add(EventData('Message inside EventBatchData')) + except ValueError: + # The EventDataBatch object reaches its max_size. + # You can send the full EventDataBatch object and create a new one here. + break + # [END eventhub_client_sync_create_batch] + # [START eventhub_client_sync_send] with producer: event_data = EventData(b"A single event") From 64c5c7d3eb9dde820ab4eaefbe2e487510183fe5 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 2 Aug 2019 18:50:53 -0700 Subject: [PATCH 39/42] Check tablename to prevent sql injection --- .../sqlite3_partition_manager.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py index 4ba7b489e487..f1d3a39bd5b3 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/sqlite3_partition_manager.py @@ -9,13 +9,20 @@ from .partition_manager import PartitionManager +def _check_table_name(table_name: str): + for c in table_name: + if not (c.isalnum() or c == "_"): + raise ValueError("Table name \"{}\" is not in correct format".format(table_name)) + return table_name + + class Sqlite3PartitionManager(PartitionManager): """An implementation of PartitionManager by using the sqlite3 in Python standard library. Sqlite3 is a mini sql database that runs in memory or files. """ - def __init__(self, db_filename=":memory:", ownership_table="ownership"): + def __init__(self, db_filename: str = ":memory:", ownership_table: str = "ownership"): """ :param db_filename: name of file that saves the sql data. @@ -23,7 +30,7 @@ def __init__(self, db_filename=":memory:", ownership_table="ownership"): :param ownership_table: The table name of the sqlite3 database. """ super(Sqlite3PartitionManager, self).__init__() - self.ownership_table = ownership_table + self.ownership_table = _check_table_name(ownership_table) conn = sqlite3.connect(db_filename) c = conn.cursor() try: @@ -50,7 +57,7 @@ async def list_ownership(self, eventhub_name, consumer_group_name): "sequence_number", "offset", "last_modified_time", "etag"] cursor.execute("select " + ",".join(fields) + - " from "+self.ownership_table+" where eventhub_name=? " + " from "+_check_table_name(self.ownership_table)+" where eventhub_name=? " "and consumer_group_name=?", (eventhub_name, consumer_group_name)) result_list = [] @@ -66,21 +73,21 @@ async def claim_ownership(self, partitions): cursor = self.conn.cursor() try: for p in partitions: - cursor.execute("select * from " + self.ownership_table + + cursor.execute("select * from " + _check_table_name(self.ownership_table) + " where eventhub_name=? " "and consumer_group_name=? " "and partition_id =?", (p["eventhub_name"], p["consumer_group_name"], p["partition_id"])) if not cursor.fetchall(): - cursor.execute("insert into " + self.ownership_table + + cursor.execute("insert into " + _check_table_name(self.ownership_table) + " (eventhub_name,consumer_group_name,partition_id,owner_id,owner_level,last_modified_time,etag) " "values (?,?,?,?,?,?,?)", (p["eventhub_name"], p["consumer_group_name"], p["partition_id"], p["owner_id"], p["owner_level"], time.time(), str(uuid.uuid4()) )) else: - cursor.execute("update "+self.ownership_table+" set owner_id=?, owner_level=?, last_modified_time=?, etag=? " + cursor.execute("update " + _check_table_name(self.ownership_table) + " set owner_id=?, owner_level=?, last_modified_time=?, etag=? " "where eventhub_name=? and consumer_group_name=? and partition_id=?", (p["owner_id"], p["owner_level"], time.time(), str(uuid.uuid4()), p["eventhub_name"], p["consumer_group_name"], p["partition_id"])) @@ -93,7 +100,7 @@ async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_ offset, sequence_number): cursor = self.conn.cursor() try: - cursor.execute("update "+self.ownership_table+" set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", + cursor.execute("update " + _check_table_name(self.ownership_table) + " set offset=?, sequence_number=? where eventhub_name=? and consumer_group_name=? and partition_id=?", (offset, sequence_number, eventhub_name, consumer_group_name, partition_id)) self.conn.commit() finally: From 13014dd8c08c4bacde15468427512181550c9e40 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 2 Aug 2019 18:57:29 -0700 Subject: [PATCH 40/42] PR review update --- .../eventprocessor/checkpoint_manager.py | 18 +++--- .../eventprocessor/event_processor.py | 60 +++++++++---------- .../eventprocessor/partition_processor.py | 5 +- 3 files changed, 40 insertions(+), 43 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py index 653c058cb20c..88a3c3616592 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/checkpoint_manager.py @@ -11,21 +11,21 @@ class CheckpointManager(object): """Every PartitionProcessor has a CheckpointManager to save the partition's checkpoint. """ - def __init__(self, partition_id, eventhub_name, consumer_group_name, owner_id, partition_manager: PartitionManager): - self._partition_id = partition_id - self._eventhub_name = eventhub_name - self._consumer_group_name = consumer_group_name - self._owner_id = owner_id - self._partition_manager = partition_manager + def __init__(self, partition_id: str, eventhub_name: str, consumer_group_name: str, owner_id: str, partition_manager: PartitionManager): + self.partition_id = partition_id + self.eventhub_name = eventhub_name + self.consumer_group_name = consumer_group_name + self.owner_id = owner_id + self.partition_manager = partition_manager - async def update_checkpoint(self, offset, sequence_number): + async def update_checkpoint(self, offset, sequence_number=None): """Users call this method in PartitionProcessor.process_events() to save checkpoints :param offset: offset of the processed EventData :param sequence_number: sequence_number of the processed EventData :return: None """ - await self._partition_manager.update_checkpoint( - self._eventhub_name, self._consumer_group_name, self._partition_id, self._owner_id, offset, + await self.partition_manager.update_checkpoint( + self.eventhub_name, self.consumer_group_name, self.partition_id, self.owner_id, offset, sequence_number ) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py index 9ec4d5fdf5ce..7e06c2c81eda 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/event_processor.py @@ -22,18 +22,19 @@ class EventProcessor(object): def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, - partition_processor_factory: Callable[[str, str, str, CheckpointManager], PartitionProcessor], + partition_processor_factory: Callable[[CheckpointManager], PartitionProcessor], partition_manager: PartitionManager, **kwargs): """An EventProcessor automatically creates and runs consumers for all partitions of the eventhub. It provides the user a convenient way to receive events from multiple partitions and save checkpoints. If multiple EventProcessors are running for an event hub, they will automatically balance loading. This feature - won't be availabe until preview 3. + won't be available until preview 3. :param eventhub_client: an instance of azure.eventhub.aio.EventClient object :param consumer_group_name: the consumer group that is used to receive events from the event hub that the eventhub_client is going to receive events from - :param partition_processor_factory: a callable (constructor, function, etc) that is called to return a PartitionProcessor + :param partition_processor_factory: a callable (type or function) that is called to return a PartitionProcessor + TODO: elaborate an example :param partition_manager: an instance of a PartitionManager implementation :param initial_event_position: the offset to start a partition consumer if the partition has no checkpoint yet :type initial_event_position: int or str @@ -48,29 +49,31 @@ def __init__(self, eventhub_client: EventHubClient, consumer_group_name: str, self._receive_timeout = eventhub_client.config.receive_timeout self._tasks: List[asyncio.Task] = [] self._id = str(uuid.uuid4()) - self._partition_ids = None - @property - def id(self): - return self._id + def __repr__(self): + return f'EventProcessor: id {self._id}' async def start(self): """Start the EventProcessor. + 1. retrieve the partition ids from eventhubs 2. claim partition ownership of these partitions. 3. repeatedly call EvenHubConsumer.receive() to retrieve events and call user defined PartitionProcessor.process_events() + + :return None """ log.info("EventProcessor %r is being started", self._id) partition_ids = await self._eventhub_client.get_partition_ids() - self._partition_ids = partition_ids - claimed_list = await self._claim_partitions() + claimed_list = await self._claim_partitions(partition_ids) await self._start_claimed_partitions(claimed_list) async def stop(self): """Stop all the partition consumer This method cancels tasks that are running EventHubConsumer.receive() for the partitions owned by this EventProcessor. + + :return None """ for i in range(len(self._tasks)): task = self._tasks.pop() @@ -78,14 +81,14 @@ async def stop(self): log.info("EventProcessor %r has been cancelled", self._id) await asyncio.sleep(2) # give some time to finish after cancelled - async def _claim_partitions(self): + async def _claim_partitions(self, partition_ids): partitions_ownership = await self._partition_manager.list_ownership(self._eventhub_name, self._consumer_group_name) partitions_ownership_dict = dict() for ownership in partitions_ownership: partitions_ownership_dict[ownership["partition_id"]] = ownership to_claim_list = [] - for pid in self._partition_ids: + for pid in partition_ids: p_ownership = partitions_ownership_dict.get(pid) if p_ownership: to_claim_list.append(p_ownership) @@ -103,9 +106,6 @@ async def _start_claimed_partitions(self, claimed_partitions): consumer = self._eventhub_client.create_consumer(self._consumer_group_name, partition_id, EventPosition(str(offset))) partition_processor = self._partition_processor_factory( - eventhub_name=self._eventhub_name, - consumer_group_name=self._consumer_group_name, - partition_id=partition_id, checkpoint_manager=CheckpointManager(partition_id, self._eventhub_name, self._consumer_group_name, self._id, self._partition_manager) ) @@ -131,10 +131,10 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "is cancelled", - partition_processor._checkpoint_manager._owner_id, - partition_processor._eventhub_name, - partition_processor._partition_id, - partition_processor._consumer_group_name + partition_processor._checkpoint_manager.owner_id, + partition_processor._checkpoint_manager.eventhub_name, + partition_processor._checkpoint_manager.partition_id, + partition_processor._checkpoint_manager.consumer_group_name ) await partition_processor.process_error(cancelled_error) await partition_processor.close(reason=CloseReason.SHUTDOWN) @@ -144,10 +144,10 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception receiving events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager._owner_id, - partition_processor._eventhub_name, - partition_processor._partition_id, - partition_processor._consumer_group_name, + partition_processor._checkpoint_manager.owner_id, + partition_processor._checkpoint_manager.eventhub_name, + partition_processor._checkpoint_manager.partition_id, + partition_processor._checkpoint_manager.consumer_group_name, eh_err ) await partition_processor.process_error(eh_err) @@ -159,10 +159,10 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.info( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "is cancelled.", - partition_processor._checkpoint_manager._owner_id, - partition_processor._eventhub_name, - partition_processor._partition_id, - partition_processor._consumer_group_name + partition_processor._checkpoint_manager.owner_id, + partition_processor._checkpoint_manager.eventhub_name, + partition_processor._checkpoint_manager.partition_id, + partition_processor._checkpoint_manager.consumer_group_name ) await partition_processor.process_error(cancelled_error) await partition_processor.close(reason=CloseReason.SHUTDOWN) @@ -171,10 +171,10 @@ async def _receive(partition_consumer, partition_processor, receive_timeout): log.warning( "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r " "has met an exception from user code process_events. It's being closed. The exception is %r.", - partition_processor._checkpoint_manager._owner_id, - partition_processor._eventhub_name, - partition_processor._partition_id, - partition_processor._consumer_group_name, + partition_processor._checkpoint_manager.owner_id, + partition_processor._checkpoint_manager.eventhub_name, + partition_processor._checkpoint_manager.partition_id, + partition_processor._checkpoint_manager.consumer_group_name, exp ) await partition_processor.process_error(exp) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py index 8818a2b89f0f..1a17cd513876 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/eventprocessor/partition_processor.py @@ -18,10 +18,7 @@ class CloseReason(Enum): class PartitionProcessor(ABC): - def __init__(self, eventhub_name, consumer_group_name, partition_id, checkpoint_manager: CheckpointManager): - self._partition_id = partition_id - self._eventhub_name = eventhub_name - self._consumer_group_name = consumer_group_name + def __init__(self, checkpoint_manager: CheckpointManager): self._checkpoint_manager = checkpoint_manager async def close(self, reason): From 3e82e90ebf884a6699ccdf9df14287587ab328e0 Mon Sep 17 00:00:00 2001 From: yijxie Date: Fri, 2 Aug 2019 19:28:06 -0700 Subject: [PATCH 41/42] Removed old EPH stuffs. Added new EPH stuffs. --- sdk/eventhub/azure-eventhubs/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/setup.py b/sdk/eventhub/azure-eventhubs/setup.py index 640b69a6e519..41b8e7c36800 100644 --- a/sdk/eventhub/azure-eventhubs/setup.py +++ b/sdk/eventhub/azure-eventhubs/setup.py @@ -47,8 +47,8 @@ exclude_packages.extend([ '*.aio', '*.aio.*', - '*.eventprocessorhost', - '*.eventprocessorhost.*' + '*.eventprocessor', + '*.eventprocessor.*' ]) setup( From 0f670d89846c9721c3d3c648c16bebf63e449d6d Mon Sep 17 00:00:00 2001 From: Adam Ling <47871814+yunhaoling@users.noreply.github.com> Date: Fri, 2 Aug 2019 19:53:58 -0700 Subject: [PATCH 42/42] Small fix (#6650) * Draft for changelog * Improve syntax for kwargs --- sdk/eventhub/azure-eventhubs/HISTORY.md | 15 +++++++++++++++ .../azure/eventhub/_consumer_producer_mixin.py | 3 +-- .../aio/_consumer_producer_mixin_async.py | 3 +-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sdk/eventhub/azure-eventhubs/HISTORY.md b/sdk/eventhub/azure-eventhubs/HISTORY.md index c1b859b55f00..11ae91dc04fb 100644 --- a/sdk/eventhub/azure-eventhubs/HISTORY.md +++ b/sdk/eventhub/azure-eventhubs/HISTORY.md @@ -1,5 +1,20 @@ # Release History +## 5.0.0b2 (2019-08-06) + +**New features** + +- Added ability to create and send EventDataBatch object with limited data size. +- Added new configuration parameters for exponential delay among each retry operation. + - `retry_total`: The total number of attempts to redo the failed operation. + - `backoff_factor`: The delay time factor. + - `backoff_max`: The maximum delay time in total. + +**Breaking changes** + +- New `EventProcessor` design + - The `EventProcessorHost` was waived. + ## 5.0.0b1 (2019-06-25) Version 5.0.0b1 is a preview of our efforts to create a client library that is user friendly and idiomatic to the Python ecosystem. The reasons for most of the changes in this update can be found in the [Azure SDK Design Guidelines for Python](https://azuresdkspecs.z5.web.core.windows.net/PythonSpec.html). For more information, please visit https://aka.ms/azure-sdk-preview1-python. diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 3e974c622a45..9124ff261949 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -15,14 +15,13 @@ def _retry_decorator(to_be_wrapped_func): def wrapped_func(self, *args, **kwargs): - timeout = kwargs.get("timeout", None) + timeout = kwargs.pop("timeout", None) if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout timeout_time = time.time() + timeout max_retries = self.client.config.max_retries retry_count = 0 last_exception = None - kwargs.pop("timeout", None) while True: try: return to_be_wrapped_func(self, timeout_time=timeout_time, last_exception=last_exception, **kwargs) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py index 3027fcfc3287..a90198f42f54 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/aio/_consumer_producer_mixin_async.py @@ -15,14 +15,13 @@ def _retry_decorator(to_be_wrapped_func): async def wrapped_func(self, *args, **kwargs): - timeout = kwargs.get("timeout", None) + timeout = kwargs.pop("timeout", None) if not timeout: timeout = 100000 # timeout None or 0 mean no timeout. 100000 seconds is equivalent to no timeout timeout_time = time.time() + timeout max_retries = self.client.config.max_retries retry_count = 0 last_exception = None - kwargs.pop("timeout", None) while True: try: return await to_be_wrapped_func(self, timeout_time=timeout_time, last_exception=last_exception, **kwargs)