diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob-aio/setup.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob-aio/setup.py index 27c12b3bfcc1..9a3312220d84 100644 --- a/sdk/eventhub/azure-eventhubs-checkpointstoreblob-aio/setup.py +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob-aio/setup.py @@ -67,7 +67,7 @@ python_requires=">=3.5.3", install_requires=[ 'azure-storage-blob<=12.1,>=12.0.0b2', - 'azure-eventhub<6.0.0,>=5.0.0b3', + 'azure-eventhub<6.0.0,>=5.0.0b5', 'aiohttp<4.0,>=3.0', ], extras_require={ diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/HISTORY.md b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/HISTORY.md new file mode 100644 index 000000000000..95536734e9b8 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/HISTORY.md @@ -0,0 +1,9 @@ +# Release History + +## 2019-11-04 1.0.0b5 + +**New features** + +- `BlobPartitionManager` that uses Azure Blob Storage Block Blob to store EventProcessor checkpoint data + +![Impressions](https://azure-sdk-impressions.azurewebsites.net/api/impressions/azure-sdk-for-python/sdk/eventhub/azure-eventhubs-checkpointstoreblob/HISTORY.png) \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/LICENSE b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/LICENSE new file mode 100644 index 000000000000..21071075c245 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/MANIFEST.in b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/MANIFEST.in new file mode 100644 index 000000000000..7012aaaa132a --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/MANIFEST.in @@ -0,0 +1,4 @@ +include *.md +include azure/__init__.py +include azure/eventhub/__init__.py +include azure/eventhub/extensions/__init__.py diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/README.md b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/README.md new file mode 100644 index 000000000000..abeeae1ac384 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/README.md @@ -0,0 +1,138 @@ +# Azure EventHubs Checkpoint Store client library for Python using Storage Blobs + +Azure EventHubs Checkpoint Store is used for storing checkpoints while processing events from Azure Event Hubs. +This Checkpoint Store package works as a plug-in package to `EventHubConsumerClient`. It uses Azure Storage Blob as the persistent store for maintaining checkpoints and partition ownership information. + +[Source code](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs-checkpointstoreblob) | [Package (PyPi)](https://pypi.org/project/azure-eventhub-checkpointstoreblob/) | [API reference documentation](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-eventhub/5.0.0b5/azure.eventhub.extensions.html) | [Azure Eventhubs documentation](https://docs.microsoft.com/en-us/azure/event-hubs/) | [Azure Storage documentation](https://docs.microsoft.com/en-us/azure/storage/) + +## Getting started + +### Install the package + +``` +$ pip install --pre azure-eventhub-checkpointstoreblob +``` + +**Prerequisites** + +- Python2.7, Python 3.5.3 or later. +- **Microsoft Azure Subscription:** To use Azure services, including Azure Event Hubs, you'll need a subscription. If you do not have an existing Azure account, you may sign up for a free trial or use your MSDN subscriber benefits when you [create an account](https://azure.microsoft.com/en-us/). + +- **Event Hubs namespace with an Event Hub:** To interact with Azure Event Hubs, you'll also need to have a namespace and Event Hub available. If you are not familiar with creating Azure resources, you may wish to follow the step-by-step guide for [creating an Event Hub using the Azure portal](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-create). There, you can also find detailed instructions for using the Azure CLI, Azure PowerShell, or Azure Resource Manager (ARM) templates to create an Event Hub. + +- **Azure Storage Account:** You'll need to have an Azure Storage Account and create a Azure Blob Storage Block Container to store the checkpoint data with blobs. You may follow the guide [creating an Azure Block Blob Storage Account](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-create-account-block-blob). + +## Key concepts + +### Checkpointing + +Checkpointing is a process by which readers mark or commit their position within a partition event sequence. +Checkpointing is the responsibility of the consumer and occurs on a per-partition basis within a consumer group. +This responsibility means that for each consumer group, each partition reader must keep track of its current position +in the event stream, and can inform the service when it considers the data stream complete. If a reader disconnects from +a partition, when it reconnects it begins reading at the checkpoint that was previously submitted by the last reader of +that partition in that consumer group. When the reader connects, it passes the offset to the event hub to specify the +location at which to start reading. In this way, you can use checkpointing to both mark events as "complete" by +downstream applications, and to provide resiliency if a failover between readers running on different machines occurs. +It is possible to return to older data by specifying a lower offset from this checkpointing process. Through this +mechanism, checkpointing enables both failover resiliency and event stream replay. + +### Offsets & sequence numbers +Both offset & sequence number refer to the position of an event within a partition. You can think of them as a +client-side cursor. The offset is a byte numbering of the event. The offset/sequence number enables an event consumer +(reader) to specify a point in the event stream from which they want to begin reading events. You can specify a +timestamp such that you receive events enqueued only after the given timestamp. Consumers are responsible for +storing their own offset values outside of the Event Hubs service. Within a partition, each event includes an offset, +sequence number and the timestamp of when it was enqueued. + +## Examples +- [Create an Azure Storage Blobs `ContainerClient`](#create-an-azure-storage-blobs-containerclient) +- [Create an Azure EventHubs `EventHubConsumerClient`](#create-an-eventhubconsumerclient) +- [Consume events using a `BlobPartitionManager`](#consume-events-using-a-blobpartitionmanager-to-do-checkpoint) + +### Create an Azure Storage Blobs `ContainerClient` +The easiest way to create a `ContainerClient` is to use a connection string. +```python +from azure.storage.blob import ContainerClient +container_client = ContainerClient.from_connection_string("my_storageacount_connection_string", "mycontainer") +``` +For other ways of creating a `ContainerClient`, go to [Blob Storage library](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/storage/azure-storage-blob) for more details. + +### Create an `EventHubConsumerClient` +The easiest way to create a `EventHubConsumerClient` is to use a connection string. +```python +from azure.eventhub import EventHubConsumerClient +eventhub_client = EventHubConsumerClient.from_connection_string("my_eventhub_namespace_connection_string", event_hub_path="myeventhub") +``` +For other ways of creating a `EventHubConsumerClient`, refer to [EventHubs library](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs) for more details. + +### Consume events using a `BlobPartitionManager` to do checkpoint +```python + +from azure.eventhub import EventHubConsumerClient +from azure.storage.blob import ContainerClient +from azure.eventhub.extensions.checkpointstoreblob import BlobPartitionManager + +eventhub_connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' +storage_container_connection_str = '<< CONNECTION STRING OF THE STORAGE >>' +storage_container_name = '<< STORAGE CONTAINER NAME>>' + +def do_operation(events): + # do some operations to the events. + pass + +def process_events(partition_context, events): + do_operation(events) + partition_context.update_checkpoint(events[-1]) + +def main(): + storage_container_client = ContainerClient.from_connection_string(storage_container_connection_str, storage_container_name) + partition_manager = BlobPartitionManager(storage_container_client) # use the BlobPartitonManager to save + client = EventHubConsumerClient.from_connection_string(eventhub_connection_str, partition_manager=partition_manager, receive_timeout=5, retry_total=3) + + try: + client.receive(process_events, "$default") + except KeyboardInterrupt: + client.close() + +if __name__ == '__main__': + main() +``` + +## Troubleshooting + +### General +Enabling logging will be helpful to do trouble shooting. +Refer to [Logging](#logging) to enable loggers for related libraries. + +## Next steps + +### Examples +- [./samples/event_processor_blob_storage_example.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs-checkpointstoreblob/samples/event_processor_blob_storage_example.py) - EventHubConsumerClient with blob partition manager example + +### Documentation + +Reference documentation is available at https://azuresdkdocs.blob.core.windows.net/$web/python/azure-eventhub/5.0.0b5/azure.eventhub.extensions.html + +### Logging + +- Enable `azure.eventhub.extensions.checkpointstoreblob` logger to collect traces from the library. +- Enable `azure.eventhub` logger to collect traces from the main azure-eventhub library. +- Enable `azure.storage.blob` logger to collect traces from azure storage blob library. +- Enable `uamqp` logger to collect traces from the underlying uAMQP library. +- Enable AMQP frame level trace by setting `logging_enable=True` when creating the client. + +### Provide Feedback + +If you encounter any bugs or have suggestions, please file an issue in the [Issues](https://github.com/Azure/azure-sdk-for-python/issues) section of the project. + +## Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com. + +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +![Impressions](https://azure-sdk-impressions.azurewebsites.net/api/impressions/azure-sdk-for-python/sdk/eventhub/azure-eventhubs-checkpointstoreblob/README.png) diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/__init__.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/__init__.py new file mode 100644 index 000000000000..62351a0ab30b --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/__init__.py @@ -0,0 +1,5 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +__path__ = __import__('pkgutil').extend_path(__path__, __name__) # type: ignore diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/__init__.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/__init__.py new file mode 100644 index 000000000000..62351a0ab30b --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/__init__.py @@ -0,0 +1,5 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +__path__ = __import__('pkgutil').extend_path(__path__, __name__) # type: ignore diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/__init__.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/checkpointstoreblob/__init__.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/checkpointstoreblob/__init__.py new file mode 100644 index 000000000000..22debc5f7cc3 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/checkpointstoreblob/__init__.py @@ -0,0 +1,12 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +__version__ = "1.0.0b5" + +from .blobstoragepm import BlobPartitionManager + +__all__ = [ + "BlobPartitionManager", +] diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/checkpointstoreblob/blobstoragepm.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/checkpointstoreblob/blobstoragepm.py new file mode 100644 index 000000000000..0d506900a244 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/azure/eventhub/extensions/checkpointstoreblob/blobstoragepm.py @@ -0,0 +1,162 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +from typing import Dict +import logging +import time +import calendar +from datetime import datetime +from collections import defaultdict + +from azure.eventhub import PartitionManager, OwnershipLostError # type: ignore # pylint:disable=no-name-in-module +from azure.core.exceptions import ResourceModifiedError, ResourceExistsError # type: ignore +from azure.storage.blob import BlobClient # type: ignore + +logger = logging.getLogger(__name__) +UPLOAD_DATA = "" + + +class BlobPartitionManager(PartitionManager): + """An PartitionManager that uses Azure Blob Storage to store the partition ownership and checkpoint data. + + This class implements methods list_ownership, claim_ownership, update_checkpoint and list_checkpoints that are + defined in class azure.eventhub.aio.PartitionManager of package azure-eventhub. + + """ + def __init__(self, container_client): + # type(ContainerClient) -> None + """Create a BlobPartitionManager + + :param container_client: The Azure Blob Storage Container client that is used to save checkpoint data to Azure + Blob Storage Container. + """ + self._container_client = container_client + self._cached_blob_clients = defaultdict() # type:Dict[str, BlobClient] + + @staticmethod + def _utc_to_local(utc_dt): + timestamp = calendar.timegm(utc_dt.timetuple()) + local_dt = datetime.fromtimestamp(timestamp) + return local_dt.replace(microsecond=utc_dt.microsecond) + + @staticmethod + def _to_timestamp(date): + timestamp = None + try: + timestamp = date.timestamp() + except AttributeError: # python2.7 compatible + timestamp = time.mktime(BlobPartitionManager._utc_to_local(date).timetuple())\ + + date.microsecond / 1e6 + return timestamp + + def _get_blob_client(self, blob_name): + result = self._cached_blob_clients.get(blob_name) + if not result: + result = self._container_client.get_blob_client(blob_name) + self._cached_blob_clients[blob_name] = result + return result + + def _upload_ownership(self, ownership, metadata): + etag = ownership.get("etag") + if etag: + etag_match = {"if_match": etag} + else: + etag_match = {"if_none_match": '*'} + blob_name = "{}/{}/{}/ownership/{}".format(ownership["fully_qualified_namespace"], ownership["eventhub_name"], + ownership["consumer_group_name"], ownership["partition_id"]) + uploaded_blob_properties = self._get_blob_client(blob_name).upload_blob( + data=UPLOAD_DATA, overwrite=True, metadata=metadata, **etag_match + ) + ownership["etag"] = uploaded_blob_properties["etag"] + ownership["last_modified_time"] = self._to_timestamp(uploaded_blob_properties["last_modified"]) + ownership.update(metadata) + + def list_ownership(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + try: + blobs = self._container_client.list_blobs( + name_starts_with="{}/{}/{}/ownership".format( + fully_qualified_namespace, eventhub_name, consumer_group_name), + include=['metadata']) + result = [] + for b in blobs: + ownership = { + "fully_qualified_namespace": fully_qualified_namespace, + "eventhub_name": eventhub_name, + "consumer_group_name": consumer_group_name, + "partition_id": b.name.split("/")[-1], + "owner_id": b.metadata["ownerId"], + "etag": b.etag, + "last_modified_time": self._to_timestamp(b.last_modified) if b.last_modified else None + } + result.append(ownership) + return result + except Exception as err: # pylint:disable=broad-except + logger.warning("An exception occurred during list_ownership for " + "namespace %r eventhub %r consumer group %r. " + "Exception is %r", fully_qualified_namespace, eventhub_name, consumer_group_name, err) + raise + + def _claim_one_partition(self, ownership): + partition_id = ownership["partition_id"] + fully_qualified_namespace = ownership["fully_qualified_namespace"] + eventhub_name = ownership["eventhub_name"] + consumer_group_name = ownership["consumer_group_name"] + owner_id = ownership["owner_id"] + metadata = {"ownerId": owner_id} + try: + self._upload_ownership(ownership, metadata) + return ownership + except (ResourceModifiedError, ResourceExistsError): + logger.info( + "EventProcessor instance %r of namespace %r eventhub %r consumer group %r " + "lost ownership to partition %r", + owner_id, fully_qualified_namespace, eventhub_name, consumer_group_name, partition_id) + raise OwnershipLostError() + except Exception as err: # pylint:disable=broad-except + logger.warning("An exception occurred when EventProcessor instance %r claim_ownership for " + "namespace %r eventhub %r consumer group %r partition %r. " + "The ownership is now lost. Exception " + "is %r", + owner_id, fully_qualified_namespace, eventhub_name, consumer_group_name, partition_id, err) + return ownership # Keep the ownership if an unexpected error happens + + def claim_ownership(self, ownership_list): + gathered_results = [] + for x in ownership_list: + try: + gathered_results.append(self._claim_one_partition(x)) + except (ResourceModifiedError, ResourceExistsError): + pass + return gathered_results + + def update_checkpoint(self, fully_qualified_namespace, eventhub_name, consumer_group_name, partition_id, + offset, sequence_number): + metadata = { + "Offset": offset, + "SequenceNumber": str(sequence_number), + } + blob_name = "{}/{}/{}/checkpoint/{}".format(fully_qualified_namespace, eventhub_name, + consumer_group_name, partition_id) + self._get_blob_client(blob_name).upload_blob( + data=UPLOAD_DATA, overwrite=True, metadata=metadata + ) + + def list_checkpoints(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + blobs = self._container_client.list_blobs( + name_starts_with="{}/{}/{}/checkpoint".format( + fully_qualified_namespace, eventhub_name, consumer_group_name), + include=['metadata']) + result = [] + for b in blobs: + metadata = b.metadata + checkpoint = { + "fully_qualified_namespace": fully_qualified_namespace, + "eventhub_name": eventhub_name, + "consumer_group_name": consumer_group_name, + "partition_id": b.name.split("/")[-1], + "offset": metadata["Offset"], + "sequence_number": metadata["SequenceNumber"] + } + result.append(checkpoint) + return result diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/dev_requirements.txt b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/dev_requirements.txt new file mode 100644 index 000000000000..6fd64226d3da --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/dev_requirements.txt @@ -0,0 +1,4 @@ +-e ../../../tools/azure-sdk-tools +../../core/azure-core +-e ../../storage/azure-storage-blob +../azure-eventhubs diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/samples/event_processor_blob_storage_example.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/samples/event_processor_blob_storage_example.py new file mode 100644 index 000000000000..f3520b79c8f1 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/samples/event_processor_blob_storage_example.py @@ -0,0 +1,37 @@ +import logging +import os +from azure.eventhub import EventHubConsumerClient +from azure.eventhub.extensions.checkpointstoreblob import BlobPartitionManager +from azure.storage.blob import ContainerClient + +RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout +RETRY_TOTAL = 3 # max number of retries for receive operations within the receive timeout. Actual number of retries clould be less if RECEIVE_TIMEOUT is too small +CONNECTION_STR = os.environ["EVENT_HUB_CONN_STR"] +STORAGE_CONNECTION_STR = os.environ["AZURE_STORAGE_CONN_STR"] + +logging.basicConfig(level=logging.INFO) + + +def do_operation(event): + # do some sync or async operations. If the operation is i/o intensive, async will have better performance + print(event) + + +def process_events(partition_context, events): + if events: + print("received events: {} from partition: {}".format(len(events), partition_context.partition_id)) + for event in events: + do_operation(event) + partition_context.update_checkpoint(events[-1]) + else: + print("empty events received", "partition:", partition_context.partition_id) + + +if __name__ == '__main__': + container_client = ContainerClient.from_connection_string(STORAGE_CONNECTION_STR, "eventprocessor") + partition_manager = BlobPartitionManager(container_client=container_client) + client = EventHubConsumerClient.from_connection_string(CONNECTION_STR, partition_manager=partition_manager, receive_timeout=RECEIVE_TIMEOUT, retry_total=RETRY_TOTAL) + try: + client.receive(process_events, "$default") + except KeyboardInterrupt: + client.close() diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/sdk_packaging.toml b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/sdk_packaging.toml new file mode 100644 index 000000000000..e7687fdae93b --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/sdk_packaging.toml @@ -0,0 +1,2 @@ +[packaging] +auto_update = false \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/setup.cfg b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/setup.cfg new file mode 100644 index 000000000000..3480374bc2f2 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal=1 \ No newline at end of file diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/setup.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/setup.py new file mode 100644 index 000000000000..251f2da9c3f3 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/setup.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +#-------------------------------------------------------------------------- + +import re +import os.path +import sys +from io import open +from setuptools import find_packages, setup + + +# Change the PACKAGE_NAME only to change folder and different name +PACKAGE_NAME = "azure-eventhub-checkpointstoreblob" +PACKAGE_PPRINT_NAME = "Event Hubs checkpointer implementation with Blob Storage" + +package_folder_path = "azure/eventhub/extensions/checkpointstoreblob" +namespace_name = "azure.eventhub.extensions.checkpointstoreblob" + +# Version extraction inspired from 'requests' +with open(os.path.join(package_folder_path, '__init__.py'), 'r') as fd: + version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', + fd.read(), re.MULTILINE).group(1) + +if not version: + raise RuntimeError('Cannot find version information') + +with open('README.md') as f: + readme = f.read() +with open('HISTORY.md') as f: + history = f.read() + +exclude_packages = [ + 'tests', + 'samples', + # Exclude packages that will be covered by PEP420 or nspkg + 'azure', + 'azure.eventhub', + 'azure.eventhub.extensions', + ] + +setup( + name=PACKAGE_NAME, + version=version, + description='Microsoft Azure {} Client Library for Python'.format(PACKAGE_PPRINT_NAME), + long_description=readme + '\n\n' + history, + long_description_content_type='text/markdown', + license='MIT License', + author='Microsoft Corporation', + author_email='azpysdkhelp@microsoft.com', + url='https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs-checkpointerblob', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'License :: OSI Approved :: MIT License', + ], + zip_safe=False, + packages=find_packages(exclude=exclude_packages), + install_requires=[ + 'azure-storage-blob<13.0.0,>=12.0.0', + 'azure-eventhub<6.0.0,>=5.0.0b5', + ], + extras_require={ + + } +) diff --git a/sdk/eventhub/azure-eventhubs-checkpointstoreblob/tests/test_storage_blob_partition_manager.py b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/tests/test_storage_blob_partition_manager.py new file mode 100644 index 000000000000..d397224a8a84 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs-checkpointstoreblob/tests/test_storage_blob_partition_manager.py @@ -0,0 +1,123 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +#-------------------------------------------------------------------------- + +import pytest +import time +import os +import uuid +import warnings + +from azure.eventhub.extensions.checkpointstoreblob import BlobPartitionManager + + +def get_live_storage_blob_client(): + try: + storage_connection_str = os.environ['AZURE_STORAGE_CONN_STR'] + except KeyError: + return None, None + try: + from azure.storage.blob import BlobServiceClient + from azure.storage.blob import ContainerClient + except ImportError or ModuleNotFoundError: + return None, None + + container_str = str(uuid.uuid4()) + blob_service_client = BlobServiceClient.from_connection_string(storage_connection_str) + blob_service_client.create_container(container_str) + container_client = ContainerClient.from_connection_string(storage_connection_str, container_str) + return container_str, container_client + + +def remove_live_storage_blob_client(container_str): + try: + storage_connection_str = os.environ['AZURE_STORAGE_CONN_STR'] + from azure.storage.blob import BlobServiceClient + blob_service_client = BlobServiceClient.from_connection_string(storage_connection_str) + blob_service_client.delete_container(container_str) + except: + warnings.warn(UserWarning("storage container teardown failed")) + + +def _claim_and_list_ownership(live_storage_blob_client): + fully_qualified_namespace = 'test_namespace' + eventhub_name = 'eventhub' + consumer_group_name = '$default' + ownership_cnt = 8 + with live_storage_blob_client: + partition_manager = BlobPartitionManager(container_client=live_storage_blob_client) + + ownership_list = partition_manager.list_ownership( + fully_qualified_namespace=fully_qualified_namespace, + eventhub_name=eventhub_name, + consumer_group_name=consumer_group_name) + assert len(ownership_list) == 0 + + ownership_list = [] + + for i in range(ownership_cnt): + ownership = {} + ownership['fully_qualified_namespace'] = fully_qualified_namespace + ownership['eventhub_name'] = eventhub_name + ownership['consumer_group_name'] = consumer_group_name + ownership['owner_id'] = 'ownerid' + ownership['partition_id'] = str(i) + ownership['last_modified_time'] = time.time() + ownership["offset"] = "1" + ownership["sequence_number"] = "1" + ownership_list.append(ownership) + + partition_manager.claim_ownership(ownership_list) + + ownership_list = partition_manager.list_ownership( + fully_qualified_namespace=fully_qualified_namespace, + eventhub_name=eventhub_name, + consumer_group_name=consumer_group_name) + assert len(ownership_list) == ownership_cnt + + +@pytest.mark.liveTest +def test_claim_and_list_ownership(): + container_str, live_storage_blob_client = get_live_storage_blob_client() + if not live_storage_blob_client: + pytest.skip("Storage blob client can't be created") + try: + _claim_and_list_ownership(live_storage_blob_client) + finally: + remove_live_storage_blob_client(container_str) + + +def _update_checkpoint(live_storage_blob_client): + fully_qualified_namespace = 'test_namespace' + eventhub_name = 'eventhub' + consumer_group_name = '$default' + partition_cnt = 8 + + with live_storage_blob_client: + partition_manager = BlobPartitionManager(container_client=live_storage_blob_client) + for i in range(partition_cnt): + partition_manager.update_checkpoint( + fully_qualified_namespace, eventhub_name, consumer_group_name, str(i), + '2', 20) + + checkpoint_list = partition_manager.list_checkpoints( + fully_qualified_namespace=fully_qualified_namespace, + eventhub_name=eventhub_name, + consumer_group_name=consumer_group_name) + assert len(checkpoint_list) == partition_cnt + for checkpoint in checkpoint_list: + assert checkpoint['offset'] == '2' + assert checkpoint['sequence_number'] == '20' + + +@pytest.mark.liveTest +def test_update_checkpoint(): + container_str, live_storage_blob_client = get_live_storage_blob_client() + if not live_storage_blob_client: + pytest.skip("Storage blob client can't be created") + try: + _update_checkpoint(live_storage_blob_client) + finally: + remove_live_storage_blob_client(container_str) diff --git a/sdk/eventhub/azure-eventhubs/HISTORY.md b/sdk/eventhub/azure-eventhubs/HISTORY.md index 477be10e417a..a1d4d9f2b1b8 100644 --- a/sdk/eventhub/azure-eventhubs/HISTORY.md +++ b/sdk/eventhub/azure-eventhubs/HISTORY.md @@ -1,5 +1,28 @@ # Release History +## 2019-11-04 5.0.0b5 + +**Breaking changes** + +- `EventHubClient` has been split into two separate clients: `EventHubProducerClient` and `EventHubConsumerClient`. + - Construction of both objects is the same as it was for the previous client. +- Introduced `EventHubProducerClient` as substitution for`EventHubProducer`. + - `EventHubProducerClient` supports sending events to different partitions. +- Introduced `EventHubConsumerClient` as substitution for `EventHubConsumer`. + - `EventHubConsumerClient` supports receiving events from single/all partitions. + - There are no longer methods which directly return `EventData`, all receiving is done via callback method: `on_events`. +- `EventHubConsumerClient` has taken on the responsibility of `EventProcessor`. + - `EventHubConsumerClient` now accepts `PartitionManager` to do load-balancing and checkpoint. +- Replaced `PartitionProcessor`by four independent callback methods accepted by the `receive` method on `EventHubConsumerClient`. + - `on_events(partition_context, events)` called when events are received. + - `on_error(partition_context, exception` called when errors occur. + - `on_partition_initialize(partition_context)` called when a partition consumer is opened. + - `on_partition_close(partition_context, reason)` called when a partition consumer is closed. + +**Bug fixes** + +- Fixed bug in user-agent string not being parsed. + ## 5.0.0b4 (2019-10-08) **New features** diff --git a/sdk/eventhub/azure-eventhubs/README.md b/sdk/eventhub/azure-eventhubs/README.md index e4fc68fd38e0..046f0cd99d27 100644 --- a/sdk/eventhub/azure-eventhubs/README.md +++ b/sdk/eventhub/azure-eventhubs/README.md @@ -13,7 +13,7 @@ The Azure Event Hubs client library allows for publishing and consuming of Azure - Observe interesting operations and interactions happening within your business or other ecosystem, allowing loosely coupled systems to interact without the need to bind them together. - Receive events from one or more publishers, transform them to better meet the needs of your ecosystem, then publish the transformed events to a new stream for consumers to observe. -[Source code](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs) | [Package (PyPi)](https://pypi.org/project/azure-eventhub/5.0.0b4) | [API reference documentation](https://azure.github.io/azure-sdk-for-python/ref/azure.eventhub) | [Product documentation](https://docs.microsoft.com/en-us/azure/event-hubs/) +[Source code](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs) | [Package (PyPi)](https://pypi.org/project/azure-eventhub/5.0.0b5) | [API reference documentation](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-eventhub/5.0.0b5/azure.eventhub.html) | [Product documentation](https://docs.microsoft.com/en-us/azure/event-hubs/) ## Getting started @@ -53,11 +53,12 @@ There are several ways to instantiate the EventHubClient object and the followin **Create client from connection string:** ```python -from azure.eventhub import EventHubClient +from azure.eventhub import EventHubConsumerClient connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient.from_connection_string(connection_str, event_hub_path) +consumer_client = EventHubConsumerClient.from_connection_string(connection_str, event_hub_path) + ``` - The `from_connection_string` method takes the connection string of the form @@ -67,14 +68,15 @@ entity name to your Event Hub instance. You can get the connection string from t **Create client using the azure-identity library:** ```python -from azure.eventhub import EventHubClient +from azure.eventhub import EventHubConsumerClient from azure.identity import DefaultAzureCredential credential = DefaultAzureCredential() host = '<< HOSTNAME OF THE EVENT HUB >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient(host, event_hub_path, credential) +consumer_client = EventHubConsumerClient(host, event_hub_path, credential) + ``` - This constructor takes the host name and entity name of your Event Hub instance and credential that implements the @@ -83,15 +85,11 @@ TokenCredential interface. There are implementations of the TokenCredential inte ## Key concepts -- An **Event Hub client** is the primary interface for developers interacting with the Event Hubs client library, -allowing for inspection of Event Hub metadata and providing a guided experience towards specific Event Hub operations -such as the creation of producers and consumers. - -- An **Event Hub producer** is a source of telemetry data, diagnostics information, usage logs, or other log data, +- An **EventHubProducerClient** is a source of telemetry data, diagnostics information, usage logs, or other log data, as part of an embedded device solution, a mobile device application, a game title running on a console or other device, some client or server based business solution, or a web site. -- An **Event Hub consumer** picks up such information from the Event Hub and processes it. Processing may involve aggregation, +- An **EventHubConsumerClient** picks up such information from the Event Hub and processes it. Processing may involve aggregation, complex computation, and filtering. Processing may also involve distribution or storage of the information in a raw or transformed fashion. Event Hub consumers are often robust and high-scale platform infrastructure parts with built-in analytics capabilities, like Azure Stream Analytics, Apache Spark, or Apache Storm. @@ -118,19 +116,19 @@ The following sections provide several code snippets covering some of the most c - [Consume events from an Event Hub](#consume-events-from-an-event-hub) - [Async publish events to an Event Hub](#async-publish-events-to-an-event-hub) - [Async consume events from an Event Hub](#async-consume-events-from-an-event-hub) -- [Consume events using an Event Processor](#consume-events-using-an-event-processor) -- [Use EventHubClient to work with IoT Hub](#use-eventhubclient-to-work-with-iot-hub) +- [Consume events using a partition manager](#consume-events-using-a-partition-manager) +- [Use EventHubConsumerClient to work with IoT Hub](#use-eventhubconsumerclient-to-work-with-iot-hub) ### Inspect an Event Hub Get the partition ids of an Event Hub. ```python -from azure.eventhub import EventHubClient +from azure.eventhub import EventHubConsumerClient connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient.from_connection_string(connection_str, event_hub_path) +client = EventHubConsumerClient.from_connection_string(connection_str, event_hub_path) partition_ids = client.get_partition_ids() ``` @@ -141,40 +139,37 @@ Publish events to an Event Hub. #### Send a single event or an array of events ```python -from azure.eventhub import EventHubClient, EventData +from azure.eventhub import EventHubProducerClient, EventData connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient.from_connection_string(connection_str, event_hub_path) -producer = client.create_producer(partition_id="0") +client = EventHubProducerClient.from_connection_string(connection_str, event_hub_path) try: - event_list = [] - for i in range(10): - event_list.append(EventData(b"A single event")) - - with producer: - producer.send(event_list) + event_list = [] + for i in range(10): + event_list.append(EventData(b"A single event")) + with client: + client.send(event_list) except: - raise + raise finally: pass ``` #### Send a batch of events -Use the `create_batch` method on `EventHubProcuer` to create an `EventDataBatch` object which can then be sent using the `send` method. +Use the `create_batch` method on `EventHubProducerClient` to create an `EventDataBatch` object which can then be sent using the `send` method. Events may be added to the `EventDataBatch` using the `try_add` method until the maximum batch size limit in bytes has been reached. ```python -from azure.eventhub import EventHubClient, EventData +from azure.eventhub import EventHubProducerClient, EventData try: connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' - client = EventHubClient.from_connection_string(connection_str, event_hub_path) - producer = client.create_producer(partition_id="0") + client = EventHubProducerClient.from_connection_string(connection_str, event_hub_path) - event_data_batch = producer.create_batch(max_size=10000) + event_data_batch = client.create_batch(max_size=10000) can_add = True while can_add: try: @@ -182,8 +177,8 @@ try: except ValueError: can_add = False # EventDataBatch object reaches max_size. - with producer: - producer.send(event_data_batch) + with client: + client.send(event_data_batch) except: raise finally: @@ -196,19 +191,22 @@ Consume events from an Event Hub. ```python import logging -from azure.eventhub import EventHubClient, EventData, EventPosition +from azure.eventhub import EventHubConsumerClient connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient.from_connection_string(connection_str, event_hub_path) -consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) +client = EventHubConsumerClient.from_connection_string(connection_str, event_hub_path) + +logger = logging.getLogger("azure.eventhub") + +def on_events(partition_context, events): + logger.info("Received {} events from partition {}".format(len(events), partition_context.partition_id)) try: - logger = logging.getLogger("azure.eventhub") - with consumer: - received = consumer.receive(max_batch_size=100, timeout=5) - for event_data in received: - logger.info("Message received:{}".format(event_data)) + with client: + client.receive(on_events=on_events, consumer_group="$Default") + # receive events from specified partition: + # client.receive(on_events=on_events, consumer_group="$Default", partition_id='0') except: raise finally: @@ -221,22 +219,21 @@ Publish events to an Event Hub asynchronously. #### Send a single event or an array of events ```python -from azure.eventhub.aio import EventHubClient +from azure.eventhub.aio import EventHubProducerClient from azure.eventhub import EventData connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient.from_connection_string(connection_str, event_hub_path) -producer = client.create_producer(partition_id="0") +client = EventHubProducerClient.from_connection_string(connection_str, event_hub_path) try: event_list = [] for i in range(10): event_list.append(EventData(b"A single event")) - async with producer: - await producer.send(event_list) # Send a list of events - await producer.send(EventData(b"A single event")) # Send a single event + async with client: + await client.send(event_list) # Send a list of events + await client.send(EventData(b"A single event")) # Send a single event except: raise finally: @@ -248,16 +245,15 @@ finally: Use the `create_batch` method on `EventHubProcuer` to create an `EventDataBatch` object which can then be sent using the `send` method. Events may be added to the `EventDataBatch` using the `try_add` method until the maximum batch size limit in bytes has been reached. ```python -from azure.eventhub.aio import EventHubClient +from azure.eventhub.aio import EventHubProducerClient from azure.eventhub import EventData try: connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' - client = EventHubClient.from_connection_string(connection_str, event_hub_path) - producer = client.create_producer(partition_id="0") + client = EventHubProducerClient.from_connection_string(connection_str, event_hub_path) - event_data_batch = await producer.create_batch(max_size=10000) + event_data_batch = await client.create_batch(max_size=10000) can_add = True while can_add: try: @@ -265,8 +261,8 @@ try: except ValueError: can_add = False # EventDataBatch object reaches max_size. - async with producer: - await producer.send(event_data_batch) + async with client: + await client.send(event_data_batch) except: raise finally: @@ -279,90 +275,88 @@ Consume events asynchronously from an EventHub. ```python import logging -from azure.eventhub.aio import EventHubClient -from azure.eventhub import EventData, EventPosition +from azure.eventhub.aio import EventHubConsumerClient connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' event_hub_path = '<< NAME OF THE EVENT HUB >>' -client = EventHubClient.from_connection_string(connection_str, event_hub_path) -consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) +client = EventHubConsumerClient.from_connection_string(connection_str, event_hub_path) + +logger = logging.getLogger("azure.eventhub") + +async def on_events(partition_context, events): + logger.info("Received {} events from partition {}".format(len(events), partition_context.partition_id)) try: - logger = logging.getLogger("azure.eventhub") - async with consumer: - received = await consumer.receive(max_batch_size=100, timeout=5) - for event_data in received: - logger.info("Message received:{}".format(event_data)) + async with client: + received = await client.receive(on_events=on_events, consumer_group='$Default') + # receive events from specified partition: + # received = await client.receive(on_events=on_events, consumer_group='$Default', partition_id='0') except: raise finally: pass ``` -### Consume events using an Event Processor +### Consume events using a partition manager -`EventProcessor` is a high level construct which internally uses the `EventHubConsumer` mentioned in previous examples -to receive events from multiple partitions at once. +`EventHubConsumerClient` is a high level construct which allows you to receive events from multiple partitions at once +and load balance with other consumers using the same Event Hub and consumer group. -Typically, Event Processor based applications consist of one or more instances of `EventProcessor` instances which have -been configured to consume events from the same Event Hub and consumer group. They balance the workload across different -instances by distributing the partitions to be processed among themselves. -They also allow the user to track progress when events are processed using checkpoints. - -The `EventProcessor` will delegate the processing of events to a `PartitionProcessor` that you provide, -allowing you to focus on business logic while the `EventProcessor` holds responsibility for managing the -underlying consumer operations including checkpointing and load balancing. +This also allows the user to track progress when events are processed using checkpoints. A checkpoint is meant to represent the last successfully processed event by the user from a particular partition of -a consumer group in an Event Hub instance.The `EventProcessor` uses an instance of PartitionManager to update checkpoints -and to store the relevant information required by the load balancing algorithm. While for the purposes of getting -started you can use the `SamplePartitionManager` that is shipped out of the box from this library, it is recommended to -use a persistent store when running in production. Search pypi with the prefix `azure-eventhub-checkpointstore` to +a consumer group in an Event Hub instance.The `EventHubConsumerClient` uses an instance of PartitionManager to update checkpoints +and to store the relevant information required by the load balancing algorithm. + +Search pypi with the prefix `azure-eventhub-checkpointstore` to find packages that support this and use the PartitionManager implementation from one such package. -In the below example, we create an instance of `EventProcessor` and use an `SamplePartitionManager`. +In the below example, we create an instance of `EventHubConsumerClient` and use a `BlobPartitionManager`. You need +to [create an Azure Storage account](https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account?tabs=azure-portal) +and a [Blob Container](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-portal#create-a-container) to run the code. -[Azure Blob Storage Partition Manager](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs-checkpointstoreblob-aio) -is one of the `PartitionManager` implementation we provide that applies Azure Blob Storage as the persistent store. +[Azure Blob Storage Partition Manager Async](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs-checkpointstoreblob-aio) +and [Azure Blob Storage Partition Manager Sync](https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/eventhub/azure-eventhubs-checkpointstoreblob) +are one of the `PartitionManager` implementations we provide that applies Azure Blob Storage as the persistent store. ```python import asyncio -from azure.eventhub.aio import EventHubClient -from azure.eventhub.aio.eventprocessor import EventProcessor, PartitionProcessor, SamplePartitionManager +from azure.eventhub.aio import EventHubConsumerClient +from azure.storage.blob.aio import ContainerClient +from azure.eventhub.extensions.checkpointstoreblobaio import BlobPartitionManager +RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout +RETRY_TOTAL = 3 # max number of retries for receive operations within the receive timeout. Actual number of retries clould be less if RECEIVE_TIMEOUT is too small connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>' +storage_connection_str = '<< CONNECTION STRING FOR THE STORAGE >>' +blob_name_str = '<>' async def do_operation(event): # do some sync or async operations. If the operation is i/o intensive, async will have better performance print(event) -class MyPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - if events: - await asyncio.gather(*[do_operation(event) for event in events]) - await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) - -async def main(): - client = EventHubClient.from_connection_string(connection_str, receive_timeout=5, retry_total=3) - partition_manager = SamplePartitionManager() # in-memory or file based PartitionManager - try: - event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(60) - await event_processor.stop() - finally: - await partition_manager.close() +async def process_events(partition_context, events): + await asyncio.gather(*[do_operation(event) for event in events]) + await partition_context.update_checkpoint(events[-1]) if __name__ == '__main__': loop = asyncio.get_event_loop() - loop.run_until_complete(main()) + container_client = ContainerClient.from_connection_string(storage_connection_str, blob_name_str) + partition_manager = BlobPartitionManager(container_client=container_client) + client = EventHubConsumerClient.from_connection_string(connection_str, partition_manager=partition_manager, receive_timeout=RECEIVE_TIMEOUT, retry_total=RETRY_TOTAL) + try: + loop.run_until_complete(client.receive(process_events, "$default")) + except KeyboardInterrupt: + loop.run_until_complete(client.close()) + finally: + loop.stop() ``` -### Use EventHubClient to work with IoT Hub +### Use EventHubConsumerClient to work with IoT Hub -You can use `EventHubClient` to work with IoT Hub as well. This is useful for receiving telemetry data of IoT Hub from the +You can use `EventHubConsumerClient` to work with IoT Hub as well. This is useful for receiving telemetry data of IoT Hub from the linked EventHub. The associated connection string will not have send claims, hence sending events is not possible. - Please notice that the connection string needs to be for an @@ -370,10 +364,10 @@ linked EventHub. The associated connection string will not have send claims, hen e.g. "Endpoint=sb://my-iothub-namespace-[uid].servicebus.windows.net/;SharedAccessKeyName=my-SA-name;SharedAccessKey=my-SA-key;EntityPath=my-iot-hub-name" ```python -from azure.eventhub import EventHubClient +from azure.eventhub import EventHubConsumerClient connection_str = 'Endpoint=sb://my-iothub-namespace-[uid].servicebus.windows.net/;SharedAccessKeyName=my-SA-name;SharedAccessKey=my-SA-key;EntityPath=my-iot-hub-name' -client = EventHubClient.from_connection_string(connection_str) +client = EventHubConsumerClient.from_connection_string(connection_str) partition_ids = client.get_partition_ids() ``` @@ -397,23 +391,22 @@ For instance, this error is raised if you try to send an EventData that is alrea ### Examples -These are the samples in our repo demonstraing the usage of the library. +These are [more samples](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/samples) in our repo demonstrating the usage of the library. -- [./examples/send.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/send.py) - use producer to publish events -- [./examples/recv.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/recv.py) - use consumer to consume events -- [./examples/async_examples/send_async.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/async_examples/send_async.py) - async/await support of a producer -- [./examples/async_examples/recv_async.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/async_examples/recv_async.py) - async/await support of a consumer -- [./examples/eventprocessor/event_processor_example.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py) - event processor +- [./samples/sync_samples/send.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/send.py) - use EventHubProducerClient to publish events +- [./samples/sync_samples/recv.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/recv.py) - use EventHubConsumerClient to consume events +- [./samples/async_examples/send_async.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/async_examples/send_async.py) - async/await support of a EventHubProducerClient +- [./samples/async_examples/recv_async.py](https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/eventhub/azure-eventhubs/examples/async_examples/recv_async.py) - async/await support of a EventHubConsumerClient ### Documentation -Reference documentation is available at https://azure.github.io/azure-sdk-for-python/ref/azure.eventhub. +Reference documentation is available at https://azuresdkdocs.blob.core.windows.net/$web/python/azure-eventhub/5.0.0b5/azure.eventhub.html. ### Logging - Enable `azure.eventhub` logger to collect traces from the library. - Enable `uamqp` logger to collect traces from the underlying uAMQP library. -- Enable AMQP frame level trace by setting `network_tracing=True` when creating the client. +- Enable AMQP frame level trace by setting `logging_enable=True` when creating the client. ### Provide Feedback diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/__init__.py index 888e86a98986..fdcb3bf49329 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/__init__.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/__init__.py @@ -4,15 +4,16 @@ # -------------------------------------------------------------------------------------------- __path__ = __import__('pkgutil').extend_path(__path__, __name__) # type: ignore -__version__ = "5.0.0b4" +__version__ = "5.0.0b5" from uamqp import constants # type: ignore -from azure.eventhub.common import EventData, EventDataBatch, EventPosition -from azure.eventhub.error import EventHubError, EventDataError, ConnectError, \ +from .common import EventData, EventDataBatch, EventPosition +from .error import EventHubError, EventDataError, ConnectError, \ AuthenticationError, EventDataSendError, ConnectionLostError -from azure.eventhub.client import EventHubClient -from azure.eventhub.producer import EventHubProducer -from azure.eventhub.consumer import EventHubConsumer +from ._producer_client import EventHubProducerClient +from ._consumer_client import EventHubConsumerClient from .common import EventHubSharedKeyCredential, EventHubSASTokenCredential +from ._eventprocessor.partition_manager import PartitionManager +from ._eventprocessor.common import CloseReason, OwnershipLostError TransportType = constants.TransportType @@ -26,10 +27,12 @@ "EventDataSendError", "AuthenticationError", "EventPosition", - "EventHubClient", - "EventHubProducer", - "EventHubConsumer", + "EventHubProducerClient", + "EventHubConsumerClient", "TransportType", "EventHubSharedKeyCredential", "EventHubSASTokenCredential", + "PartitionManager", + "CloseReason", + "OwnershipLostError", ] diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py index 77c12a376f97..b8af12eca3d9 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_connection_manager.py @@ -3,13 +3,19 @@ # Licensed under the MIT License. See License.txt in the project root for license information. # -------------------------------------------------------------------------------------------- -from threading import RLock +from threading import Lock +from enum import Enum from uamqp import Connection, TransportType, c_uamqp # type: ignore +class _ConnectionMode(Enum): + ShareConnection = 1 + SeparateConnection = 2 + + class _SharedConnectionManager(object): #pylint:disable=too-many-instance-attributes def __init__(self, **kwargs): - self._lock = RLock() + self._lock = Lock() self._conn = None # type: Connection self._container_id = kwargs.get("container_id") @@ -74,4 +80,7 @@ def reset_connection_if_broken(self): def get_connection_manager(**kwargs): + connection_mode = kwargs.get("connection_mode", _ConnectionMode.SeparateConnection) + if connection_mode == _ConnectionMode.ShareConnection: + return _SharedConnectionManager(**kwargs) return _SeparateConnectionManager(**kwargs) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_client.py new file mode 100644 index 000000000000..c6220e7c04c4 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_client.py @@ -0,0 +1,198 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import logging +from typing import Any, Union, Dict, Tuple, TYPE_CHECKING, Callable, List + +from .common import EventHubSharedKeyCredential, EventHubSASTokenCredential, EventData +from .client import EventHubClient +from ._eventprocessor.event_processor import EventProcessor +from ._eventprocessor.partition_context import PartitionContext + +if TYPE_CHECKING: + from azure.core.credentials import TokenCredential # type: ignore + +log = logging.getLogger(__name__) + + +class EventHubConsumerClient(EventHubClient): + """ The EventHubProducerClient class defines a high level interface for + receiving events from the Azure Event Hubs service. + + The main goal of `EventHubConsumerClient` is to receive events from all partitions of an EventHub with + load balancing and checkpointing. + + When multiple `EventHubConsumerClient` works with one process, multiple processes, or multiple computer machines + and if they use the same repository as the load balancing and checkpointing store, they will balance automatically. + To enable the load balancing and / or checkpointing, partition_manager must be set when creating the + `EventHubConsumerClient`. + + An `EventHubConsumerClient` can also receive from a specific partition when you call its method `receive()` + and specify the partition_id. + Load balancing won't work in single-partition mode. But users can still save checkpoint if the partition_manager + is set. + + :param str host: The hostname of the Event Hub. + :param str event_hub_path: The path of the specific Event Hub to connect the client to. + :param credential: The credential object used for authentication which implements particular interface + of getting tokens. + :type credential: ~azure.eventhub.EventHubSharedKeyCredential,~azure.eventhub.EventHubSASTokenCredential, + Credential objects in azure-identity and objects that implement `get_token(self, *scopes)` method + :keyword bool logging_enable: Whether to output network trace logs to the logger. Default is `False`. + :keyword float auth_timeout: The time in seconds to wait for a token to be authorized by the service. + The default value is 60 seconds. If set to 0, no timeout will be enforced from the client. + :keyword str user_agent: The user agent that needs to be appended to the built in user agent string. + :keyword int retry_total: The total number of attempts to redo the failed operation when an error happened. Default + value is 3. + :keyword transport_type: The type of transport protocol that will be used for communicating with + the Event Hubs service. Default is ~azure.eventhub.TransportType.Amqp. + :paramtype transport_type: ~azure.eventhub.TransportType + :keyword dict http_proxy: HTTP proxy settings. This must be a dictionary with the following + keys: 'proxy_hostname' (str value) and 'proxy_port' (int value). + Additionally the following keys may also be present: 'username', 'password'. + :keyword partition_manager: stores the load balancing data and checkpoint data when receiving events + if partition_manager is specified. If it's None, this EventHubConsumerClient instance will receive + events without load balancing and checkpoint. + :paramtype partition_manager: Implementation classes of ~azure.eventhub.PartitionManager + :keyword float load_balancing_interval: When load balancing kicks in, this is the interval in seconds + between two load balancing. Default is 10. + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START create_eventhub_consumer_client_sync] + :end-before: [END create_eventhub_consumer_client_sync] + :language: python + :dedent: 4 + :caption: Create a new instance of the EventHubConsumerClient. + """ + + def __init__(self, host, event_hub_path, credential, **kwargs): + # type:(str, str, Union[EventHubSharedKeyCredential, EventHubSASTokenCredential, TokenCredential], Any) -> None + receive_timeout = kwargs.get("receive_timeout", 3) + if receive_timeout <= 0: + raise ValueError("receive_timeout must be greater than 0.") + + kwargs['receive_timeout'] = receive_timeout + self._partition_manager = kwargs.pop("partition_manager", None) + self._load_balancing_interval = kwargs.pop("load_balancing_interval", 10) + super(EventHubConsumerClient, self).__init__( + host=host, event_hub_path=event_hub_path, credential=credential, + network_tracing=kwargs.get("logging_enable"), **kwargs) + self._event_processors = dict() # type: Dict[Tuple[str, str], EventProcessor] + self._closed = False + + @classmethod + def _stop_eventprocessor(cls, event_processor): + # pylint: disable=protected-access + eventhub_client = event_processor._eventhub_client + consumer_group = event_processor._consumer_group_name + partition_id = event_processor._partition_id + with eventhub_client._lock: + event_processor.stop() + if partition_id and (consumer_group, partition_id) in eventhub_client._event_processors: + del eventhub_client._event_processors[(consumer_group, partition_id)] + elif (consumer_group, '-1') in eventhub_client._event_processors: + del eventhub_client._event_processors[(consumer_group, "-1")] + + def receive(self, on_events, consumer_group, **kwargs): + # type: (Callable[[PartitionContext, List[EventData]], None], str, Any) -> None + """Receive events from partition(s) optionally with load balancing and checkpointing. + + :param on_events: The callback function for handling received events. The callback takes two + parameters: partition_context` which contains partition information and `events` which are the received events. + Please define the callback like `on_event(partition_context, events)`. + For detailed partition context information, please refer to ~azure.eventhub.PartitionContext. + :type on_events: Callable[PartitionContext, List[EventData]] + :param str consumer_group: The name of the consumer group this consumer is associated with. + Events are read in the context of this group. The default consumer_group for an event hub is "$Default". + :keyword str partition_id: The identifier of the Event Hub partition from which events will be received. + :keyword int owner_level: The priority of the exclusive consumer. An exclusive consumer will be created + if owner_level is set. + :keyword int prefetch: The message prefetch count of the consumer. Default is 300. + :keyword bool track_last_enqueued_event_properties: Indicates whether or not the consumer should + request information on the last enqueued event on its associated partition, and track that information + as events are received. When information about the partition's last enqueued event is being tracked, + each event received from the Event Hubs service will carry metadata about the partition. This results in + a small amount of additional network bandwidth consumption that is generally a favorable trade-off when + considered against periodically making requests for partition properties using the Event Hub client. + It is set to `False` by default. + :keyword initial_event_position: Start receiving from this initial_event_position + if there isn't checkpoint data for a partition. Use the checkpoint data if there it's available. This can be a + a dict with partition id as the key and position as the value for individual partitions, or a single + EventPosition instance for all partitions. This parameter could be type of ~azure.eventhub.EventPosition or + dict[str,~azure.eventhub.EventPosition] where the key is partition id. + :paramtype initial_event_position: ~azure.eventhub.EventPosition, dict[str,~azure.eventhub.EventPosition] + :keyword on_error: The callback function which would be called when there is an error met during the receiving + time. The callback takes two parameters: `partition_context` which contains partition information + and `error` being the exception. Please define the callback like `on_error(partition_context, error)`. + :paramtype on_error: Callable[[PartitionContext, Exception]] + :keyword on_partition_initialize: The callback function which will be called after a consumer for certain + partition finishes initialization. The callback takes two parameter: `partition_context` which contains + the partition information. Please define the callback like`on_partition_initialize(partition_context)`. + :paramtype on_partition_initialize: Callable[[PartitionContext]] + :keyword on_partition_close: The callback function which will be called after a consumer for certain + partition is closed. The callback takes two parameters: `partition_context` which contains partition + information and `reason` for the close. Please define the callback like `on_error(partition_context, reason)`. + Please refer to `azure.eventhub.CloseReason` for different closing reason. + :paramtype on_partition_close: Callable[[PartitionContext, CloseReason]] + :rtype: None + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START eventhub_consumer_client_receive_sync] + :end-before: [END eventhub_consumer_client_receive_sync] + :language: python + :dedent: 4 + :caption: Receive events from the EventHub. + """ + partition_id = kwargs.get("partition_id") + + with self._lock: + error = None + if (consumer_group, '-1') in self._event_processors: + error = ValueError("This consumer client is already receiving events from all partitions for" + " consumer group {}. ".format(consumer_group)) + elif partition_id is None and any(x[0] == consumer_group for x in self._event_processors): + error = ValueError("This consumer client is already receiving events for consumer group {}. " + .format(consumer_group)) + elif (consumer_group, partition_id) in self._event_processors: + error = ValueError("This consumer is already receiving events from partition {} for consumer group {}. " + .format(partition_id, consumer_group)) + if error: + log.warning(error) + raise error + + event_processor = EventProcessor( + self, consumer_group, on_events, + partition_manager=self._partition_manager, + polling_interval=self._load_balancing_interval, + **kwargs + ) + self._event_processors[(consumer_group, partition_id or "-1")] = event_processor + + event_processor.start() + + def close(self): + # type: () -> None + """Stop retrieving events from event hubs and close the underlying AMQP connection and links. + + :rtype: None + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START eventhub_consumer_client_close_sync] + :end-before: [END eventhub_consumer_client_close_sync] + :language: python + :dedent: 4 + :caption: Close down the client. + + """ + with self._lock: + for _ in range(len(self._event_processors)): + _, ep = self._event_processors.popitem() + ep.stop() + super(EventHubConsumerClient, self).close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py index 44ea7d09b493..5da486ea1b11 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_consumer_producer_mixin.py @@ -8,7 +8,7 @@ import time from uamqp import errors, constants, compat # type: ignore -from azure.eventhub.error import EventHubError, _handle_exception +from .error import EventHubError, _handle_exception log = logging.getLogger(__name__) @@ -94,15 +94,6 @@ def close(self): """ Close down the handler. If the handler has already closed, this will be a no op. - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_receiver_close] - :end-before: [END eventhub_client_receiver_close] - :language: python - :dedent: 4 - :caption: Close down the handler. - """ if self._handler: self._handler.close() # this will close link if sharing connection. Otherwise close connection diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/__init__.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/__init__.py new file mode 100644 index 000000000000..34913fb394d7 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/__init__.py @@ -0,0 +1,4 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/common.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/common.py new file mode 100644 index 000000000000..90a53a20cd53 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/common.py @@ -0,0 +1,23 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from enum import Enum + + +class CloseReason(Enum): + """ + A partition consumer is closed due to two reasons: + SHUTDOWN: It is explicitly required to stop, this would happen when the EventHubConsumerClient is closed. + OWNERSHIP_LOST: It loses the ownership of a partition, this would happend when other EventHubConsumerClient + instance claims ownership of the partition. + """ + SHUTDOWN = 0 + OWNERSHIP_LOST = 1 + + +class OwnershipLostError(Exception): + """Raises when update_checkpoint detects the ownership to a partition has been lost. + + """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/event_processor.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/event_processor.py new file mode 100644 index 000000000000..5445bc8da490 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/event_processor.py @@ -0,0 +1,306 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from contextlib import contextmanager +from typing import Dict, Type +import uuid +import logging +import time +import threading + +from uamqp.compat import queue # type: ignore + +from azure.core.tracing import SpanKind # type: ignore +from azure.core.settings import settings # type: ignore + +from azure.eventhub import EventPosition +from .partition_context import PartitionContext +from .ownership_manager import OwnershipManager +from .common import CloseReason + +log = logging.getLogger(__name__) + + +class EventProcessor(object): # pylint:disable=too-many-instance-attributes + """ + An EventProcessor constantly receives events from one or multiple partitions of the Event Hub + in the context of a given consumer group. + + """ + def __init__(self, eventhub_client, consumer_group_name, on_event, **kwargs): + self._consumer_group_name = consumer_group_name + self._eventhub_client = eventhub_client + self._namespace = eventhub_client._address.hostname # pylint: disable=protected-access + self._eventhub_name = eventhub_client.eh_name + self._event_handler = on_event + self._partition_id = kwargs.get("partition_id", None) + self._error_handler = kwargs.get("on_error", None) + self._partition_initialize_handler = kwargs.get("on_partition_initialize", None) + self._partition_close_handler = kwargs.get("on_partition_close", None) + self._partition_manager = kwargs.get("partition_manager", None) + self._initial_event_position = kwargs.get("initial_event_position", EventPosition("-1")) + + self._polling_interval = kwargs.get("polling_interval", 10.0) + self._ownership_timeout = self._polling_interval * 2 + + self._partition_contexts = {} + + # Receive parameters + self._owner_level = kwargs.get("owner_level", None) + self._prefetch = kwargs.get("prefetch", None) + self._track_last_enqueued_event_properties = kwargs.get("track_last_enqueued_event_properties", False) + self._last_enqueued_event_properties = {} + self._id = str(uuid.uuid4()) + self._running = False + self._lock = threading.RLock() + + # Each partition consumer is working in its own thread + self._working_threads = {} # type: Dict[str, threading.Thread] + self._threads_stop_flags = {} # type: Dict[str, bool] + + self._callback_queue = queue.Queue(maxsize=100) # Right now the limitation of receiving speed is ~10k + + def __repr__(self): + return 'EventProcessor: id {}'.format(self._id) + + def _cancel_tasks_for_partitions(self, to_cancel_partitions): + with self._lock: + for partition_id in to_cancel_partitions: + if partition_id in self._working_threads: + self._threads_stop_flags[partition_id] = True # the cancellation token sent to thread to stop + + if to_cancel_partitions: + log.info("EventProcesor %r has cancelled partitions %r", self._id, to_cancel_partitions) + + def _create_tasks_for_claimed_ownership(self, claimed_partitions, checkpoints=None): + with self._lock: + for partition_id in claimed_partitions: + if partition_id not in self._working_threads or not self._working_threads[partition_id].is_alive(): + checkpoint = checkpoints.get(partition_id) if checkpoints else None + self._working_threads[partition_id] = threading.Thread(target=self._receive, + args=(partition_id, checkpoint)) + self._working_threads[partition_id].daemon = True + self._threads_stop_flags[partition_id] = False + self._working_threads[partition_id].start() + log.info("Working thread started, ownership %r, checkpoint %r", partition_id, checkpoint) + + @contextmanager + def _context(self, events): + # Tracing + span_impl_type = settings.tracing_implementation() # type: Type[AbstractSpan] + if span_impl_type is None: + yield + else: + child = span_impl_type(name="Azure.EventHubs.process") + self._eventhub_client._add_span_request_attributes(child) # pylint: disable=protected-access + child.kind = SpanKind.SERVER + + for event in events: + event._trace_link_message(child) # pylint: disable=protected-access + with child: + yield + + def _process_error(self, partition_context, err): + log.warning( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " has met an error. The exception is %r.", + partition_context.owner_id, + partition_context.eventhub_name, + partition_context.partition_id, + partition_context.consumer_group_name, + err + ) + if self._error_handler: + self._callback_queue.put((self._error_handler, partition_context, err), block=True) + + def _process_close(self, partition_context, reason): + if self._partition_close_handler: + log.info( + "PartitionProcessor of EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " is being closed. Reason is: %r", + partition_context.owner_id, + partition_context.eventhub_name, + partition_context.partition_id, + partition_context.consumer_group_name, + reason + ) + if self._partition_close_handler: + self._callback_queue.put((self._partition_close_handler, partition_context, reason), block=True) + + def _handle_callback(self, callback_and_args): + callback = callback_and_args[0] + try: + callback(*callback_and_args[1:]) + except Exception as exp: # pylint:disable=broad-except + partition_context = callback_and_args[1] + if callback != self._error_handler: + self._process_error(partition_context, exp) + else: + log.warning( + "EventProcessor instance %r of eventhub %r partition %r consumer group %r" + " has another error during running process_error(). The exception is %r.", + partition_context.owner_id, + partition_context.eventhub_name, + partition_context.partition_id, + partition_context.consumer_group_name, + exp + ) + + def _receive(self, partition_id, checkpoint=None): # pylint: disable=too-many-statements + try: # pylint:disable =too-many-nested-blocks + log.info("start ownership %r, checkpoint %r", partition_id, checkpoint) + namespace = self._namespace + eventhub_name = self._eventhub_name + consumer_group_name = self._consumer_group_name + owner_id = self._id + checkpoint_offset = checkpoint.get("offset") if checkpoint else None + if checkpoint_offset: + initial_event_position = EventPosition(checkpoint_offset) + elif isinstance(self._initial_event_position, EventPosition): + initial_event_position = self._initial_event_position + elif isinstance(self._initial_event_position, dict): + initial_event_position = self._initial_event_position.get(partition_id, EventPosition("-1")) + else: + initial_event_position = EventPosition(self._initial_event_position) + if partition_id in self._partition_contexts: + partition_context = self._partition_contexts[partition_id] + else: + partition_context = PartitionContext( + namespace, + eventhub_name, + consumer_group_name, + partition_id, + owner_id, + self._partition_manager + ) + self._partition_contexts[partition_id] = partition_context + + partition_consumer = self._eventhub_client._create_consumer( # pylint: disable=protected-access + consumer_group_name, + partition_id, + initial_event_position, + owner_level=self._owner_level, + track_last_enqueued_event_properties=self._track_last_enqueued_event_properties, + prefetch=self._prefetch, + ) + + try: + if self._partition_initialize_handler: + self._callback_queue.put((self._partition_initialize_handler, partition_context), block=True) + while self._threads_stop_flags[partition_id] is False: + try: + events = partition_consumer.receive() + if events: + if self._track_last_enqueued_event_properties: + self._last_enqueued_event_properties[partition_id] = \ + partition_consumer.last_enqueued_event_properties + with self._context(events): + self._callback_queue.put((self._event_handler, partition_context, events), block=True) + except Exception as error: # pylint:disable=broad-except + self._process_error(partition_context, error) + break + # Go to finally to stop this partition processor. + # Later an EventProcessor(this one or another one) will pick up this partition again. + finally: + partition_consumer.close() + if self._running: + # Event processor is running but the partition consumer has been stopped. + self._process_close(partition_context, CloseReason.OWNERSHIP_LOST) + else: + self._process_close(partition_context, CloseReason.SHUTDOWN) + finally: + with self._lock: + del self._working_threads[partition_id] + self._threads_stop_flags[partition_id] = True + + def _start(self): + """Start the EventProcessor. + + The EventProcessor will try to claim and balance partition ownership with other `EventProcessor` + and start receiving EventData from EventHub and processing events. + + :return: None + + """ + ownership_manager = OwnershipManager(self._eventhub_client, self._consumer_group_name, self._id, + self._partition_manager, self._ownership_timeout, self._partition_id) + while self._running: + try: + checkpoints = ownership_manager.get_checkpoints() if self._partition_manager else None + claimed_partition_ids = ownership_manager.claim_ownership() + if claimed_partition_ids: + to_cancel_list = set(self._working_threads.keys()) - set(claimed_partition_ids) + self._create_tasks_for_claimed_ownership(claimed_partition_ids, checkpoints) + else: + log.info("EventProcessor %r hasn't claimed an ownership. It keeps claiming.", self._id) + to_cancel_list = set(self._working_threads.keys()) + if to_cancel_list: + self._cancel_tasks_for_partitions(to_cancel_list) + except Exception as err: # pylint:disable=broad-except + log.warning("An exception (%r) occurred during balancing and claiming ownership for " + "eventhub %r consumer group %r. Retrying after %r seconds", + err, self._eventhub_name, self._consumer_group_name, self._polling_interval) + # ownership_manager.get_checkpoints() and ownership_manager.claim_ownership() may raise exceptions + # when there are load balancing and/or checkpointing (partition_manager isn't None). + # They're swallowed here to retry every self._polling_interval seconds. + # Meanwhile this event processor won't lose the partitions it has claimed before. + # If it keeps failing, other EventProcessors will start to claim ownership of the partitions + # that this EventProcessor is working on. So two or multiple EventProcessors may be working + # on the same partition. + time.sleep(self._polling_interval) + + def _get_last_enqueued_event_properties(self, partition_id): + if partition_id in self._working_threads and partition_id in self._last_enqueued_event_properties: + return self._last_enqueued_event_properties[partition_id] + raise ValueError("You're not receiving events from partition {}".format(partition_id)) + + def start(self): + if not self._running: + log.info("EventProcessor %r is being started", self._id) + self._running = True + thread = threading.Thread(target=self._start) + thread.daemon = True + thread.start() + + while self._running or self._callback_queue.qsize() or self._working_threads: + try: + callback_and_args = self._callback_queue.get(block=False) + self._handle_callback(callback_and_args) + self._callback_queue.task_done() + except queue.Empty: + # ignore queue empty exception + pass + + else: + log.info("EventProcessor %r has already started.", self._id) + + def stop(self): + """Stop the EventProcessor. + + The EventProcessor will stop receiving events from EventHubs and release the ownership of the partitions + it is working on. + Other running EventProcessor will take over these released partitions. + + A stopped EventProcessor can be restarted by calling method `start` again. + + :return: None + + """ + if not self._running: + log.info("EventProcessor %r has already been stopped.", self._id) + return + + self._running = False + + with self._lock: + to_join_threads = [x for x in self._working_threads.values()] + self._cancel_tasks_for_partitions(list(self._working_threads.keys())) + + for thread in to_join_threads: + thread.join() + + self._threads_stop_flags.clear() + + log.info("EventProcessor %r has been stopped.", self._id) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/local_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/local_partition_manager.py new file mode 100644 index 000000000000..1933d58e9f45 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/local_partition_manager.py @@ -0,0 +1,24 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from .sqlite3_partition_manager import Sqlite3PartitionManager + + +class InMemoryPartitionManager(Sqlite3PartitionManager): + """A partition manager that stores checkpoint and load balancer partition ownership data in memory. + This is for mock test only. + + """ + def __init__(self): + super(InMemoryPartitionManager, self).__init__(db_filename=":memory:") + + +class FileBasedPartitionManager(Sqlite3PartitionManager): + """A partition manager that stores checkpoint and load balancer partition ownership data in a file. + Do not use this + + """ + def __init__(self, filename): + super(FileBasedPartitionManager, self).__init__(db_filename=filename) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/ownership_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/ownership_manager.py new file mode 100644 index 000000000000..7b1bc9c7a46e --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/ownership_manager.py @@ -0,0 +1,142 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +import time +import random +from typing import List +from collections import Counter, defaultdict + + +class OwnershipManager(object): + """Increases or decreases the number of partitions owned by an EventProcessor + so the number of owned partitions are balanced among multiple EventProcessors + + An EventProcessor calls claim_ownership() of this class every x seconds, + where x is set by keyword argument "polling_interval" in EventProcessor, + to claim the ownership of partitions, create tasks for the claimed ownership, and cancel tasks that no longer belong + to the claimed ownership. + + """ + def __init__( + self, eventhub_client, consumer_group_name, owner_id, + partition_manager, ownership_timeout, partition_id, + ): + self.cached_parition_ids = [] # type: List[str] + self.eventhub_client = eventhub_client + self.fully_qualified_namespace = eventhub_client._address.hostname # pylint: disable=protected-access + self.eventhub_name = eventhub_client.eh_name + self.consumer_group_name = consumer_group_name + self.owner_id = owner_id + self.partition_manager = partition_manager + self.ownership_timeout = ownership_timeout + self.partition_id = partition_id + self._initializing = True + + def claim_ownership(self): + """Claims ownership for this EventProcessor + """ + if not self.cached_parition_ids: + self._retrieve_partition_ids() + + if self.partition_id is not None: + if self.partition_id in self.cached_parition_ids: + return [self.partition_id] + raise ValueError( + "Wrong partition id:{}. The eventhub has partitions: {}.". + format(self.partition_id, self.cached_parition_ids)) + + if self.partition_manager is None: + return self.cached_parition_ids + + ownership_list = self.partition_manager.list_ownership( + self.fully_qualified_namespace, self.eventhub_name, self.consumer_group_name + ) + to_claim = self._balance_ownership(ownership_list, self.cached_parition_ids) + claimed_list = self.partition_manager.claim_ownership(to_claim) if to_claim else [] + return [x["partition_id"] for x in claimed_list] + + def _retrieve_partition_ids(self): + """List all partition ids of the event hub that the EventProcessor is working on. + """ + self.cached_parition_ids = self.eventhub_client.get_partition_ids() + + def _balance_ownership(self, ownership_list, all_partition_ids): + """Balances and claims ownership of partitions for this EventProcessor. + """ + + now = time.time() + ownership_dict = {x["partition_id"]: x for x in ownership_list} # put the list to dict for fast lookup + not_owned_partition_ids = [pid for pid in all_partition_ids if pid not in ownership_dict] + timed_out_partitions = [x for x in ownership_list + if x["last_modified_time"] + self.ownership_timeout < now] + if self._initializing: # greedily claim all available partitions when an EventProcessor is started. + to_claim = timed_out_partitions + for to_claim_item in to_claim: + to_claim_item["owner_id"] = self.owner_id + for pid in not_owned_partition_ids: + to_claim.append( + { + "fully_qualified_namespace": self.fully_qualified_namespace, + "partition_id": pid, + "eventhub_name": self.eventhub_name, + "consumer_group_name": self.consumer_group_name, + "owner_id": self.owner_id + } + ) + self._initializing = False + if to_claim: # if no expired or unclaimed partitions, go ahead with balancing + return to_claim + + timed_out_partition_ids = [ownership["partition_id"] for ownership in timed_out_partitions] + claimable_partition_ids = not_owned_partition_ids + timed_out_partition_ids + + active_ownership = [ownership for ownership in ownership_list + if ownership["last_modified_time"] + self.ownership_timeout >= now] + active_ownership_by_owner = defaultdict(list) + for ownership in active_ownership: + active_ownership_by_owner[ownership["owner_id"]].append(ownership) + active_ownership_self = active_ownership_by_owner[self.owner_id] + + # calculate expected count per owner + all_partition_count = len(all_partition_ids) + # owners_count is the number of active owners. If self.owner_id is not yet among the active owners, + # then plus 1 to include self. This will make owners_count >= 1. + owners_count = len(active_ownership_by_owner) + \ + (0 if self.owner_id in active_ownership_by_owner else 1) + expected_count_per_owner = all_partition_count // owners_count + # end of calculating expected count per owner + + to_claim = active_ownership_self + if len(active_ownership_self) < expected_count_per_owner: + # Either claims an inactive partition, or steals from other owners + if claimable_partition_ids: # claim an inactive partition if there is + random_partition_id = random.choice(claimable_partition_ids) + random_chosen_to_claim = ownership_dict.get( + random_partition_id, + {"fully_qualified_namespace": self.fully_qualified_namespace, + "partition_id": random_partition_id, + "eventhub_name": self.eventhub_name, + "consumer_group_name": self.consumer_group_name, + } + ) + random_chosen_to_claim["owner_id"] = self.owner_id + to_claim.append(random_chosen_to_claim) + else: # steal from another owner that has the most count + active_ownership_count_group_by_owner = Counter( + dict((x, len(y)) for x, y in active_ownership_by_owner.items())) + most_frequent_owner_id = active_ownership_count_group_by_owner.most_common(1)[0][0] + # randomly choose a partition to steal from the most_frequent_owner + to_steal_partition = random.choice(active_ownership_by_owner[most_frequent_owner_id]) + to_steal_partition["owner_id"] = self.owner_id + to_claim.append(to_steal_partition) + return to_claim + + def get_checkpoints(self): + if self.partition_manager: + checkpoints = self.partition_manager.list_checkpoints( + self.fully_qualified_namespace, self.eventhub_name, self.consumer_group_name) + return {x["partition_id"]: x for x in checkpoints} + + return {} diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/partition_context.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/partition_context.py new file mode 100644 index 000000000000..7dac5a2e713f --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/partition_context.py @@ -0,0 +1,45 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +import logging +from .partition_manager import PartitionManager + +_LOGGER = logging.getLogger(__name__) + + +class PartitionContext(object): + """Contains partition related context information for a PartitionProcessor instance to use. + + Users can use update_checkpoint() of this class to save checkpoint data. + """ + def __init__(self, fully_qualified_namespace, eventhub_name, consumer_group_name, + partition_id, owner_id, partition_manager=None): + # type: (str, str, str, str, str, PartitionManager) -> None + self.fully_qualified_namespace = fully_qualified_namespace + self.partition_id = partition_id + self.eventhub_name = eventhub_name + self.consumer_group_name = consumer_group_name + self.owner_id = owner_id + self._partition_manager = partition_manager + + def update_checkpoint(self, event): + """ + Updates the checkpoint using the given information for the associated partition and consumer group in the + chosen storage service. + + :param ~azure.eventhub.EventData event: The EventData instance which contains the offset and + sequence number information used for checkpoint. + :rtype: None + """ + if self._partition_manager: + self._partition_manager.update_checkpoint( + self.fully_qualified_namespace, self.eventhub_name, self.consumer_group_name, + self.partition_id, event.offset, event.sequence_number + ) + else: + _LOGGER.info( + "namespace %r, eventhub %r, consumer_group %r, partition_id %r " + "update_checkpoint is called without partition manager. No checkpoint is updated.", + self.fully_qualified_namespace, self.eventhub_name, self.consumer_group_name, self.partition_id) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/partition_manager.py new file mode 100644 index 000000000000..0980c37a74a6 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/partition_manager.py @@ -0,0 +1,92 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +from typing import Iterable, Dict, Any +from abc import abstractmethod + + +class PartitionManager(object): + """ + PartitionManager deals with the interaction with the chosen storage service. + It's able to list/claim ownership and save checkpoint. + """ + + @abstractmethod + def list_ownership(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + # type: (str, str, str) -> Iterable[Dict[str, Any]] + """ + Retrieves a complete ownership list from the chosen storage service. + + :param str fully_qualified_namespace: The fully qualified namespace that the event hub belongs to. + The format is like ".servicebus.windows.net" + :param str eventhub_name: The name of the specific Event Hub the ownership are associated with, relative to + the Event Hubs namespace that contains it. + :param str consumer_group_name: The name of the consumer group the ownership are associated with. + :rtype: Iterable[Dict[str, Any]], Iterable of dictionaries containing partition ownership information: + * fully_qualified_namespace + * eventhub_name + * consumer_group_name + * owner_id + * partition_id + * last_modified_time + * etag + """ + + @abstractmethod + def claim_ownership(self, ownership_list): + # type: (Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]] + """ + Tries to claim a list of specified ownership. + + :param Iterable[Dict[str,Any]] ownership_list: Iterable of dictionaries containing all the ownership to claim. + :rtype: Iterable[Dict[str,Any]], Iterable of dictionaries containing partition ownership information: + * fully_qualified_namespace + * eventhub_name + * consumer_group_name + * owner_id + * partition_id + * last_modified_time + * etag + """ + + @abstractmethod + def update_checkpoint(self, fully_qualified_namespace, eventhub_name, consumer_group_name, + partition_id, offset, sequence_number): + # type: (str, str, str, str, str, int) -> None + """ + Updates the checkpoint using the given information for the associated partition and + consumer group in the chosen storage service. + + :param str fully_qualified_namespace: The fully qualified namespace that the event hub belongs to. + The format is like ".servicebus.windows.net" + :param str eventhub_name: The name of the specific Event Hub the ownership are associated with, relative to + the Event Hubs namespace that contains it. + :param str consumer_group_name: The name of the consumer group the ownership are associated with. + :param str partition_id: The partition id which the checkpoint is created for. + :param str offset: The offset of the ~azure.eventhub.EventData the new checkpoint will be associated with. + :param int sequence_number: The sequence_number of the ~azure.eventhub.EventData the new checkpoint + will be associated with. + :rtype: None + :raise: `OwnershipLostError` + """ + + @abstractmethod + def list_checkpoints(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + # type: (str, str, str) -> Iterable[Dict[str, Any]] + """List the updated checkpoints from the store + + :param str fully_qualified_namespace: The fully qualified namespace that the event hub belongs to. + The format is like ".servicebus.windows.net" + :param str eventhub_name: The name of the specific Event Hub the ownership are associated with, relative to + the Event Hubs namespace that contains it. + :param str consumer_group_name: The name of the consumer group the ownership are associated with. + :rtype: Iterable[Dict[str,Any]], Iterable of dictionaries containing partition ownership information: + * fully_qualified_namespace + * eventhub_name + * consumer_group_name + * partition_id + * sequence_number + * offset + """ diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/sqlite3_partition_manager.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/sqlite3_partition_manager.py new file mode 100644 index 000000000000..802f5954bedf --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_eventprocessor/sqlite3_partition_manager.py @@ -0,0 +1,174 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ----------------------------------------------------------------------------------- + +import time +import threading +import uuid +import sqlite3 +import logging +from .partition_manager import PartitionManager + +logger = logging.getLogger(__name__) + + +def _check_table_name(table_name): + for c in table_name: + if not (c.isalnum() or c == "_"): + raise ValueError("Table name \"{}\" is not in correct format".format(table_name)) + return table_name + + +class Sqlite3PartitionManager(PartitionManager): + """An implementation of PartitionManager by using the sqlite3 in Python standard library. + Sqlite3 is a mini sql database that runs in memory or files. + Please don't use this PartitionManager for production use. + + + """ + primary_keys_dict = {"fully_qualified_namespace": "text", "eventhub_name": "text", + "consumer_group_name": "text", "partition_id": "text"} + primary_keys = list(primary_keys_dict.keys()) + + ownership_data_fields_dict = {"owner_id": "text", "last_modified_time": "real", "etag": "text"} + ownership_fields_dict = dict(primary_keys_dict) + ownership_fields_dict.update(ownership_data_fields_dict) + ownership_data_fields = list(ownership_data_fields_dict.keys()) + ownership_fields = primary_keys + ownership_data_fields + + checkpoint_data_fields_dict = {"sequence_number": "integer", "offset": "text"} + checkpoint_data_fields = list(checkpoint_data_fields_dict.keys()) + checkpoint_fields_dict = dict(primary_keys_dict) + checkpoint_fields_dict.update(checkpoint_data_fields_dict) + checkpoint_fields = primary_keys + checkpoint_data_fields + + def __init__(self, db_filename=":memory:", + ownership_table="ownership", checkpoint_table="checkpoint"): + # type: (str, str, str) -> None + """ + :param db_filename: name of file that saves the sql data. Sqlite3 will run in memory without + a file when db_filename is ":memory:". + :param ownership_table: The table name of the sqlite3 database. + + """ + super(Sqlite3PartitionManager, self).__init__() + self.ownership_table = _check_table_name(ownership_table) + self.checkpoint_table = _check_table_name(checkpoint_table) + conn = sqlite3.connect(db_filename, check_same_thread=False) + self._lock = threading.RLock() + + c = conn.cursor() + try: + ownership_sql = "create table if not exists " + self.ownership_table\ + + "("\ + + ",".join([x[0]+" "+x[1] for x in self.ownership_fields_dict.items()])\ + + ", constraint pk_ownership PRIMARY KEY ("\ + + ",".join(self.primary_keys)\ + + "))" + c.execute(ownership_sql) + + checkpoint_sql = "create table if not exists " + self.checkpoint_table \ + + "(" \ + + ",".join([x[0] + " " + x[1] for x in self.checkpoint_fields_dict.items()]) \ + + ", constraint pk_ownership PRIMARY KEY (" \ + + ",".join(self.primary_keys) \ + + "))" + c.execute(checkpoint_sql) + finally: + c.close() + self.conn = conn + + def list_ownership(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + cursor = self.conn.cursor() + try: + cursor.execute("select " + ",".join(self.ownership_fields) + + " from "+_check_table_name(self.ownership_table) + + " where fully_qualified_namespace=? and eventhub_name=? and consumer_group_name=?", + (fully_qualified_namespace, eventhub_name, consumer_group_name)) + return [dict(zip(self.ownership_fields, row)) for row in cursor.fetchall()] + finally: + cursor.close() + + def claim_ownership(self, ownership_list): + with self._lock: + result = [] + cursor = self.conn.cursor() + try: + for p in ownership_list: + cursor.execute("select etag from " + _check_table_name(self.ownership_table) + + " where "+ " and ".join([field+"=?" for field in self.primary_keys]), + tuple(p.get(field) for field in self.primary_keys)) + cursor_fetch = cursor.fetchall() + if not cursor_fetch: + p["last_modified_time"] = time.time() + p["etag"] = str(uuid.uuid4()) + try: + sql = "insert into " + _check_table_name(self.ownership_table) + " (" \ + + ",".join(self.ownership_fields) \ + + ") values ("+",".join(["?"] * len(self.ownership_fields)) + ")" + cursor.execute(sql, tuple(p.get(field) for field in self.ownership_fields)) + except sqlite3.OperationalError as op_err: + logger.info("EventProcessor %r failed to claim partition %r " + "because it was claimed by another EventProcessor at the same time. " + "The Sqlite3 exception is %r", p["owner_id"], p["partition_id"], op_err) + continue + else: + result.append(p) + else: + if p.get("etag") == cursor_fetch[0][0]: + p["last_modified_time"] = time.time() + p["etag"] = str(uuid.uuid4()) + sql = "update " + _check_table_name(self.ownership_table) + " set "\ + + ','.join([field+"=?" for field in self.ownership_data_fields])\ + + " where "\ + + " and ".join([field+"=?" for field in self.primary_keys]) + + cursor.execute(sql, tuple(p.get(field) for field in self.ownership_data_fields) + + tuple(p.get(field) for field in self.primary_keys)) + result.append(p) + else: + logger.info("EventProcessor %r failed to claim partition %r " + "because it was claimed by another EventProcessor at the same time", + p["owner_id"], + p["partition_id"]) + self.conn.commit() + return result + finally: + cursor.close() + + def update_checkpoint( + self, fully_qualified_namespace, eventhub_name, consumer_group_name, partition_id, offset, sequence_number): + with self._lock: + cursor = self.conn.cursor() + localvars = locals() + try: + cursor.execute("insert or replace into " + self.checkpoint_table + "(" + + ",".join([field for field in self.checkpoint_fields]) + + ") values (" + + ",".join(["?"] * len(self.checkpoint_fields)) + + ")", + tuple(localvars[field] for field in self.checkpoint_fields) + ) + self.conn.commit() + finally: + cursor.close() + + def list_checkpoints(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + cursor = self.conn.cursor() + try: + cursor.execute("select " + + ",".join(self.checkpoint_fields) + + " from " + + self.checkpoint_table + + " where fully_qualified_namespace=? and eventhub_name=? and consumer_group_name=?", + (fully_qualified_namespace, eventhub_name, consumer_group_name) + ) + return [dict(zip(self.checkpoint_fields, row)) for row in cursor.fetchall()] + + finally: + cursor.close() + + def close(self): + with self._lock: + self.conn.close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/_producer_client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/_producer_client.py new file mode 100644 index 000000000000..fc4b13c45226 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/_producer_client.py @@ -0,0 +1,172 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +import logging +import threading + +from typing import Any, Union, TYPE_CHECKING, Iterable, List +from uamqp import constants # type:ignore +from .client import EventHubClient +from .producer import EventHubProducer +from .common import EventData, \ + EventHubSharedKeyCredential, EventHubSASTokenCredential, EventDataBatch + +if TYPE_CHECKING: + from azure.core.credentials import TokenCredential # type: ignore + +log = logging.getLogger(__name__) + + +class EventHubProducerClient(EventHubClient): + """ + The EventHubProducerClient class defines a high level interface for + sending events to the Azure Event Hubs service. + + :param str host: The hostname of the Event Hub. + :param str event_hub_path: The path of the specific Event Hub to connect the client to. + :param credential: The credential object used for authentication which implements particular interface + of getting tokens. It accepts ~azure.eventhub.EventHubSharedKeyCredential, + ~azure.eventhub.EventHubSASTokenCredential, credential objects generated by the azure-identity library and + objects that implement `get_token(self, *scopes)` method. + :keyword bool logging_enable: Whether to output network trace logs to the logger. Default is `False`. + :keyword float auth_timeout: The time in seconds to wait for a token to be authorized by the service. + The default value is 60 seconds. If set to 0, no timeout will be enforced from the client. + :keyword str user_agent: The user agent that needs to be appended to the built in user agent string. + :keyword int retry_total: The total number of attempts to redo the failed operation when an error happened. + Default value is 3. + :keyword transport_type: The type of transport protocol that will be used for + communicating with the Event Hubs service. Default is ~azure.eventhub.TransportType.Amqp. + :paramtype transport_type: ~azure.eventhub.TransportType + :keyword dict http_proxy: HTTP proxy settings. This must be a dictionary with the following + keys: 'proxy_hostname' (str value) and 'proxy_port' (int value). + Additionally the following keys may also be present: 'username', 'password'. + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START create_eventhub_producer_client_sync] + :end-before: [END create_eventhub_producer_client_sync] + :language: python + :dedent: 4 + :caption: Create a new instance of the EventHubProducerClient. + + """ + def __init__(self, host, event_hub_path, credential, **kwargs): + # type:(str, str, Union[EventHubSharedKeyCredential, EventHubSASTokenCredential, TokenCredential], Any) -> None + super(EventHubProducerClient, self).__init__( + host=host, event_hub_path=event_hub_path, credential=credential, + network_tracing=kwargs.get("logging_enable"), **kwargs) + self._producers = [] # type: List[EventHubProducer] + self._client_lock = threading.Lock() + self._producers_locks = [] # type: List[threading.Lock] + self._max_message_size_on_link = 0 + + def _init_locks_for_producers(self): + if not self._producers: + with self._client_lock: + if not self._producers: + num_of_producers = len(self.get_partition_ids()) + 1 + self._producers = [None] * num_of_producers + for _ in range(num_of_producers): + self._producers_locks.append(threading.Lock()) + + def send(self, event_data, **kwargs): + # type: (Union[EventData, EventDataBatch, Iterable[EventData]], Any) -> None + """ + Sends event data and blocks until acknowledgement is received or operation times out. + + :param event_data: The event to be sent. It can be an EventData object, or iterable of EventData objects. + :type event_data: ~azure.eventhub.EventData, ~azure.eventhub.EventDataBatch, EventData Iterator/Generator/list + :keyword str partition_key: With the given partition_key, event data will land to + a particular partition of the Event Hub decided by the service. + :keyword str partition_id: The specific partition ID to send to. Default is None, in which case the service + will assign to all partitions using round-robin. + :keyword float timeout: The maximum wait time to send the event data. + If not specified, the default wait time specified when the producer was created will be used. + :rtype: None + :raises: ~azure.eventhub.AuthenticationError, ~azure.eventhub.ConnectError, ~azure.eventhub.ConnectionLostError, + ~azure.eventhub.EventDataError, ~azure.eventhub.EventDataSendError, ~azure.eventhub.EventHubError + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START eventhub_producer_client_send_sync] + :end-before: [END eventhub_producer_client_send_sync] + :language: python + :dedent: 4 + :caption: Sends event data + + """ + partition_id = kwargs.pop("partition_id", None) + + self._init_locks_for_producers() + + producer_index = int(partition_id) if partition_id is not None else -1 + if self._producers[producer_index] is None or\ + self._producers[producer_index]._closed: # pylint:disable=protected-access + with self._producers_locks[producer_index]: + if self._producers[producer_index] is None: + self._producers[producer_index] = self._create_producer(partition_id=partition_id) + + with self._producers_locks[producer_index]: + self._producers[producer_index].send(event_data, **kwargs) + + def create_batch(self, max_size=None): + # type:(int) -> EventDataBatch + """ + Create an EventDataBatch object with max size being max_size. + The max_size should be no greater than the max allowed message size defined by the service side. + + :param int max_size: The maximum size of bytes data that an EventDataBatch object can hold. + :rtype: ~azure.eventhub.EventDataBatch + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START eventhub_producer_client_create_batch_sync] + :end-before: [END eventhub_producer_client_create_batch_sync] + :language: python + :dedent: 4 + :caption: Create EventDataBatch object within limited size + + """ + # pylint: disable=protected-access + if not self._max_message_size_on_link: + self._init_locks_for_producers() + with self._producers_locks[-1]: + if self._producers[-1] is None: + self._producers[-1] = self._create_producer(partition_id=None) + self._producers[-1]._open_with_retry() # pylint: disable=protected-access + with self._client_lock: + self._max_message_size_on_link =\ + self._producers[-1]._handler.message_handler._link.peer_max_message_size \ + or constants.MAX_MESSAGE_LENGTH_BYTES + + if max_size and max_size > self._max_message_size_on_link: + raise ValueError('Max message size: {} is too large, acceptable max batch size is: {} bytes.' + .format(max_size, self._max_message_size_on_link)) + + return EventDataBatch(max_size=(max_size or self._max_message_size_on_link)) + + def close(self): + # type: () -> None + """ + Close down the client. If the client has already closed, this will be a no op. + + :rtype: None + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py + :start-after: [START eventhub_producer_client_close_sync] + :end-before: [END eventhub_producer_client_close_sync] + :language: python + :dedent: 4 + :caption: Close down the client. + + """ + for p in self._producers: + if p: + p.close() + self._conn_manager.close_connection() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py index 9a7ca85e3330..60e04cc33cf7 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client.py @@ -17,9 +17,9 @@ from uamqp import authentication # type: ignore from uamqp import constants # type: ignore -from azure.eventhub.producer import EventHubProducer -from azure.eventhub.consumer import EventHubConsumer -from azure.eventhub.common import parse_sas_token, EventPosition +from .producer import EventHubProducer +from .consumer import EventHubConsumer +from .common import parse_sas_token, EventPosition from .client_abstract import EventHubClientAbstract from .common import EventHubSASTokenCredential, EventHubSharedKeyCredential from ._connection_manager import get_connection_manager @@ -35,15 +35,6 @@ class EventHubClient(EventHubClientAbstract): """ The EventHubClient class defines a high level interface for sending events to and receiving events from the Azure Event Hubs service. - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START create_eventhub_client] - :end-before: [END create_eventhub_client] - :language: python - :dedent: 4 - :caption: Create a new instance of the Event Hub client - """ def __init__(self, host, event_hub_path, credential, **kwargs): @@ -62,11 +53,6 @@ def _create_auth(self): """ Create an ~uamqp.authentication.SASTokenAuth instance to authenticate the session. - - :param username: The name of the shared access policy. - :type username: str - :param password: The shared access key. - :type password: str """ http_proxy = self._config.http_proxy transport_type = self._config.transport_type @@ -143,7 +129,6 @@ def _management_request(self, mgmt_msg, op_type): log.info("%r returns an exception %r", self._container_id, last_exception) # pylint:disable=specify-parameter-names-in-call raise last_exception - def get_properties(self): # type:() -> Dict[str, Any] """ @@ -191,8 +176,7 @@ def get_partition_properties(self, partition): -'last_enqueued_time_utc' -'is_empty' - :param partition: The target partition id. - :type partition: str + :param str partition: The target partition id. :rtype: dict :raises: ~azure.eventhub.ConnectError """ @@ -212,7 +196,7 @@ def get_partition_properties(self, partition): output['is_empty'] = partition_info[b'is_partition_empty'] return output - def create_consumer(self, consumer_group, partition_id, event_position, **kwargs): + def _create_consumer(self, consumer_group, partition_id, event_position, **kwargs): # type: (str, str, EventPosition, Any) -> EventHubConsumer """ Create a consumer to the client for a particular consumer group and partition. @@ -238,15 +222,6 @@ def create_consumer(self, consumer_group, partition_id, event_position, **kwargs It is set to `False` by default. :type track_last_enqueued_event_properties: bool :rtype: ~azure.eventhub.consumer.EventHubConsumer - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START create_eventhub_client_receiver] - :end-before: [END create_eventhub_client_receiver] - :language: python - :dedent: 4 - :caption: Add a consumer to the client for a particular consumer group and partition. - """ owner_level = kwargs.get("owner_level") prefetch = kwargs.get("prefetch") or self._config.prefetch @@ -260,7 +235,7 @@ def create_consumer(self, consumer_group, partition_id, event_position, **kwargs track_last_enqueued_event_properties=track_last_enqueued_event_properties) return handler - def create_producer(self, partition_id=None, send_timeout=None): + def _create_producer(self, partition_id=None, send_timeout=None): # type: (str, float) -> EventHubProducer """ Create an producer to send EventData object to an EventHub. @@ -276,15 +251,6 @@ def create_producer(self, partition_id=None, send_timeout=None): queued. Default value is 60 seconds. If set to 0, there will be no timeout. :type send_timeout: float :rtype: ~azure.eventhub.producer.EventHubProducer - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START create_eventhub_client_sender] - :end-before: [END create_eventhub_client_sender] - :language: python - :dedent: 4 - :caption: Add a producer to the client to send EventData. - """ target = "amqps://{}{}".format(self._address.hostname, self._address.path) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py index 00f3e867b598..6a8979369c90 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/client_abstract.py @@ -14,7 +14,7 @@ from uamqp import types # type: ignore from azure.eventhub import __version__ -from azure.eventhub.configuration import _Configuration +from .configuration import Configuration from .common import EventHubSharedKeyCredential, EventHubSASTokenCredential, _Address try: @@ -94,8 +94,6 @@ class EventHubClientAbstract(object): # pylint:disable=too-many-instance-attrib def __init__(self, host, event_hub_path, credential, **kwargs): # type:(str, str, Union[EventHubSharedKeyCredential, EventHubSASTokenCredential, TokenCredential], Any) -> None """ - Constructs a new EventHubClient. - :param host: The hostname of the Event Hub. :type host: str :param event_hub_path: The path of the specific Event Hub to connect the client to. @@ -106,10 +104,10 @@ def __init__(self, host, event_hub_path, credential, **kwargs): :param credential: The credential object used for authentication which implements particular interface of getting tokens. It accepts ~azure.eventhub.EventHubSharedKeyCredential, ~azure.eventhub.EventHubSASTokenCredential, credential objects generated by the azure-identity library and - objects that implement get_token(self, *scopes) method. + objects that implement `get_token(self, *scopes)` method. :param http_proxy: HTTP proxy settings. This must be a dictionary with the following - keys: 'proxy_hostname' (str value) and 'proxy_port' (int value). - Additionally the following keys may also be present: 'username', 'password'. + keys - 'proxy_hostname' (str value) and 'proxy_port' (int value). + Additionally the following keys may also be present - 'username', 'password'. :type http_proxy: dict[str, Any] :param auth_timeout: The time in seconds to wait for a token to be authorized by the service. The default value is 60 seconds. If set to 0, no timeout will be enforced from the client. @@ -122,17 +120,6 @@ def __init__(self, host, event_hub_path, credential, **kwargs): :param transport_type: The type of transport protocol that will be used for communicating with the Event Hubs service. Default is ~azure.eventhub.TransportType.Amqp. :type transport_type: ~azure.eventhub.TransportType - :param prefetch: The message prefetch count of the consumer. Default is 300. - :type prefetch: int - :param max_batch_size: Receive a batch of events. Batch size will be up to the maximum specified, but - will return as soon as service returns no new events. Default value is the same as prefetch. - :type max_batch_size: int - :param receive_timeout: The timeout in seconds to receive a batch of events from an Event Hub. - Default value is 0 seconds. - :type receive_timeout: float - :param send_timeout: The timeout in seconds for an individual event to be sent from the time that it is - queued. Default value is 60 seconds. If set to 0, there will be no timeout. - :type send_timeout: float """ self.eh_name = event_hub_path self._host = host @@ -145,7 +132,7 @@ def __init__(self, host, event_hub_path, credential, **kwargs): self._auto_reconnect = kwargs.get("auto_reconnect", True) self._mgmt_target = "amqps://{}/{}".format(self._host, self.eh_name) self._auth_uri = "sb://{}{}".format(self._address.hostname, self._address.path) - self._config = _Configuration(**kwargs) + self._config = Configuration(**kwargs) self._debug = self._config.network_tracing log.info("%r: Created the Event Hub client", self._container_id) @@ -162,7 +149,7 @@ def _create_properties(self, user_agent=None): # pylint: disable=no-self-use :rtype: dict """ properties = {} - product = "azure-eventhub" + product = "azsdk-python-eventhubs" properties[types.AMQPSymbol("product")] = product properties[types.AMQPSymbol("version")] = __version__ framework = "Python {}.{}.{}, {}".format( @@ -190,51 +177,34 @@ def _add_span_request_attributes(self, span): @classmethod def from_connection_string(cls, conn_str, **kwargs): - """Create an EventHubClient from an EventHub connection string. + """ + Create an EventHubProducerClient/EventHubConsumerClient from a connection string. - :param conn_str: The connection string of an eventhub - :type conn_str: str - :param event_hub_path: The path of the specific Event Hub to connect the client to, if the EntityName is - not included in the connection string. - :type event_hub_path: str - :param network_tracing: Whether to output network trace logs to the logger. Default - is `False`. - :type network_tracing: bool - :param http_proxy: HTTP proxy settings. This must be a dictionary with the following - keys: 'proxy_hostname' (str value) and 'proxy_port' (int value). - Additionally the following keys may also be present: 'username', 'password'. - :type http_proxy: dict[str, Any] - :param auth_timeout: The time in seconds to wait for a token to be authorized by the service. + :param str conn_str: The connection string of an eventhub. + :keyword str event_hub_path: The path of the specific Event Hub to connect the client to. + :keyword credential: The credential object used for authentication which implements particular interface + of getting tokens. It accepts ~azure.eventhub.EventHubSharedKeyCredential, + ~azure.eventhub.EventHubSASTokenCredential, credential objects generated by the azure-identity library and + objects that implement `get_token(self, *scopes)` method. + :keyword bool network_tracing: Whether to output network trace logs to the logger. Default is `False`. + :keyword dict[str, Any] http_proxy: HTTP proxy settings. This must be a dictionary with the following + keys - 'proxy_hostname' (str value) and 'proxy_port' (int value). + Additionally the following keys may also be present - 'username', 'password'. + :keyword float auth_timeout: The time in seconds to wait for a token to be authorized by the service. The default value is 60 seconds. If set to 0, no timeout will be enforced from the client. - :type auth_timeout: float - :param user_agent: The user agent that needs to be appended to the built in user agent string. - :type user_agent: str - :param retry_total: The total number of attempts to redo the failed operation when an error happened. Default - value is 3. - :type retry_total: int + :keyword str user_agent: The user agent that needs to be appended to the built in user agent string. + :keyword int retry_total: The total number of attempts to redo the failed operation when an error happened. + Default value is 3. :param transport_type: The type of transport protocol that will be used for communicating with the Event Hubs service. Default is ~azure.eventhub.TransportType.Amqp. :type transport_type: ~azure.eventhub.TransportType - :param prefetch: The message prefetch count of the consumer. Default is 300. - :type prefetch: int - :param max_batch_size: Receive a batch of events. Batch size will be up to the maximum specified, but - will return as soon as service returns no new events. Default value is the same as prefetch. - :type max_batch_size: int - :param receive_timeout: The timeout in seconds to receive a batch of events from an Event Hub. - Default value is 0 seconds, meaning there is no timeout. - :type receive_timeout: float - :param send_timeout: The timeout in seconds for an individual event to be sent from the time that it is - queued. Default value is 60 seconds. If set to 0, there will be no timeout. - :type send_timeout: float - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START create_eventhub_client_connstr] - :end-before: [END create_eventhub_client_connstr] - :language: python - :dedent: 4 - :caption: Create an EventHubClient from a connection string. - + :keyword partition_manager: **Only for EventHubConsumerClient** + stores the load balancing data and checkpoint data when receiving events + if partition_manager is specified. If it's None, this EventHubConsumerClient instance will receive + events without load balancing and checkpoint. + :paramtype partition_manager: Implementation classes of ~azure.eventhub.aio.PartitionManager + :keyword float load_balancing_interval: **Only for EventHubConsumerClient** + When load balancing kicks in, this is the interval in seconds between two load balancing. Default is 10. """ event_hub_path = kwargs.pop("event_hub_path", None) address, policy, key, entity = _parse_conn_str(conn_str) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py index 8110baf344eb..c1a82957a687 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/common.py @@ -15,7 +15,7 @@ from azure.core.settings import settings # type: ignore -from azure.eventhub.error import EventDataError +from .error import EventDataError log = logging.getLogger(__name__) @@ -43,8 +43,12 @@ class EventData(object): """ The EventData class is a holder of event content. - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py + :param body: The data to send in a single message. body can be type of str or bytes. + :type body: str or bytes + + .. admonition:: Example: + + .. literalinclude:: ../samples/sync_samples/sample_code_eventhub.py :start-after: [START create_event_data] :end-before: [END create_event_data] :language: python @@ -64,13 +68,6 @@ class EventData(object): PROP_RUNTIME_INFO_RETRIEVAL_TIME_UTC = b"runtime_info_retrieval_time_utc" def __init__(self, body=None): - """ - Initialize EventData. - - :param body: The data to send in a single message. - :type body: str, bytes or list - """ - self._last_enqueued_event_properties = {} if body and isinstance(body, list): self.message = Message(body[0]) @@ -81,6 +78,7 @@ def __init__(self, body=None): else: self.message = Message(body) self.message.annotations = {} + self.message.application_properties = {} def __str__(self): dic = { @@ -233,8 +231,7 @@ def application_properties(self, value): """ Application defined properties on the message. - :param value: The application properties for the EventData. - :type value: dict + :param dict value: The application properties for the EventData. """ properties = None if value is None else dict(value) self.message.application_properties = properties @@ -260,6 +257,22 @@ def body(self): except TypeError: raise ValueError("Message data empty.") + @property + def last_enqueued_event_properties(self): + """ + The latest enqueued event information. This property will be updated each time an event is received when + the receiver is created with `track_last_enqueued_event_properties` being `True`. + The dict includes following information of the partition: + + - `sequence_number` + - `offset` + - `enqueued_time` + - `retrieval_time` + + :rtype: dict or None + """ + return self._get_last_enqueued_event_properties() + def body_as_str(self, encoding='UTF-8'): """ The body of the event data as a string if the data is of a @@ -306,10 +319,15 @@ class EventDataBatch(object): Use `try_add` method to add events until the maximum batch size limit in bytes has been reached - a `ValueError` will be raised. - Use `send` method of ~azure.eventhub.EventHubProducer or ~azure.eventhub.aio.EventHubProducer for sending. + Use `send` method of ~azure.eventhub.EventHubProducerClient or ~azure.eventhub.aio.EventHubProducerClient + for sending. The `send` method accepts partition_key as a parameter for sending a particular partition. + + **Please use the create_batch method of EventHubProducerClient + to create an EventDataBatch object instead of instantiating an EventDataBatch object directly.** - Please use the `create_batch` method of `EventHubProducer` - to create an `EventDataBatch` object instead of instantiating an `EventDataBatch` object directly. + :param int max_size: The maximum size of bytes data that an EventDataBatch object can hold. + :param str partition_key: With the given partition_key, event data will land to a particular partition of the + Event Hub decided by the service. """ def __init__(self, max_size=None, partition_key=None): @@ -324,14 +342,6 @@ def __init__(self, max_size=None, partition_key=None): def __len__(self): return self._count - @property - def size(self): - """The size in bytes - - :return: int - """ - return self._size - @staticmethod def _from_batch(batch_data, partition_key=None): batch_data_instance = EventDataBatch(partition_key=partition_key) @@ -349,11 +359,20 @@ def _set_partition_key(self, value): self.message.annotations = annotations self.message.header = header + @property + def size(self): + """The size of EventDataBatch object in bytes + + :rtype: int + """ + return self._size + def try_add(self, event_data): """ - The message size is a sum up of body, properties, header, etc. - :param event_data: ~azure.eventhub.EventData - :return: None + Try to add an EventData object, the size of EventData is a sum up of body, application_properties, etc. + :param event_data: The EventData object which is attempted to be added. + :type event_data: ~azure.eventhub.EventData + :rtype: None :raise: ValueError, when exceeding the size limit. """ if event_data is None: @@ -387,7 +406,13 @@ def try_add(self, event_data): class EventPosition(object): """ - The position(offset, sequence or timestamp) where a consumer starts. Examples: + The position(offset, sequence or timestamp) where a consumer starts. + + :param value: The event position value. The value can be type of datetime.datetime or int or str. + :type value: int, str or datetime.datetime + :param bool inclusive: Whether to include the supplied value as the start point. + + Examples: Beginning of the event stream: >>> event_pos = EventPosition("-1") @@ -404,14 +429,6 @@ class EventPosition(object): """ def __init__(self, value, inclusive=False): - """ - Initialize EventPosition. - - :param value: The event position value. - :type value: ~datetime.datetime or int or str - :param inclusive: Whether to include the supplied value as the start point. - :type inclusive: bool - """ self.value = value if value is not None else "-1" self.inclusive = inclusive @@ -437,14 +454,12 @@ def _selector(self): class EventHubSASTokenCredential(object): """ SAS token used for authentication. + + :param token: A SAS token or function that returns a SAS token. If a function is supplied, + it will be used to retrieve subsequent tokens in the case of token expiry. The function should + take no arguments. The token can be type of str or Callable object. """ def __init__(self, token): - """ - :param token: A SAS token or function that returns a SAS token. If a function is supplied, - it will be used to retrieve subsequent tokens in the case of token expiry. The function should - take no arguments. - :type token: str or callable - """ self.token = token def get_sas_token(self): @@ -457,15 +472,11 @@ def get_sas_token(self): class EventHubSharedKeyCredential(object): """ The shared access key credential used for authentication. + + :param str policy: The name of the shared access policy. + :param str key: The shared access key. """ def __init__(self, policy, key): - """ - :param policy: The name of the shared access policy. - :type policy: str - :param key: The shared access key. - :type key: str - """ - self.policy = policy self.key = key diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py index e7ea5b43df46..d6bd9ab881bd 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/configuration.py @@ -5,7 +5,7 @@ from uamqp.constants import TransportType # type: ignore -class _Configuration(object): # pylint:disable=too-many-instance-attributes +class Configuration(object): # pylint:disable=too-many-instance-attributes def __init__(self, **kwargs): self.user_agent = kwargs.get("user_agent") self.retry_total = kwargs.get('retry_total', 3) diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py index 87869aaa3a7b..0550bb618a74 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/consumer.py @@ -14,8 +14,8 @@ from uamqp import types, errors, utils # type: ignore from uamqp import ReceiveClient, Source # type: ignore -from azure.eventhub.common import EventData, EventPosition -from azure.eventhub.error import _error_handler +from .common import EventData, EventPosition +from .error import _error_handler from ._consumer_producer_mixin import ConsumerProducerMixin @@ -235,15 +235,6 @@ def receive(self, max_batch_size=None, timeout=None): :rtype: list[~azure.eventhub.common.EventData] :raises: ~azure.eventhub.AuthenticationError, ~azure.eventhub.ConnectError, ~azure.eventhub.ConnectionLostError, ~azure.eventhub.EventHubError - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_sync_receive] - :end-before: [END eventhub_client_sync_receive] - :language: python - :dedent: 4 - :caption: Receive events from the EventHub. - """ self._check_closed() @@ -257,15 +248,6 @@ def close(self): # pylint:disable=useless-super-delegation """ Close down the handler. If the handler has already closed, this will be a no op. - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_receiver_close] - :end-before: [END eventhub_client_receiver_close] - :language: python - :dedent: 4 - :caption: Close down the handler. - """ super(EventHubConsumer, self).close() diff --git a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py index a7e37980dc18..8b50a5e26748 100644 --- a/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py +++ b/sdk/eventhub/azure-eventhubs/azure/eventhub/producer.py @@ -15,8 +15,8 @@ from azure.core.tracing import SpanKind, AbstractSpan # type: ignore from azure.core.settings import settings # type: ignore -from azure.eventhub.common import EventData, EventDataBatch -from azure.eventhub.error import _error_handler, OperationTimeoutError, EventDataError +from .common import EventData, EventDataBatch +from .error import _error_handler, OperationTimeoutError, EventDataError from ._consumer_producer_mixin import ConsumerProducerMixin @@ -164,15 +164,6 @@ def create_batch(self, max_size=None, partition_key=None): :type partition_key: str :return: an EventDataBatch instance :rtype: ~azure.eventhub.EventDataBatch - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_sync_create_batch] - :end-before: [END eventhub_client_sync_create_batch] - :language: python - :dedent: 4 - :caption: Create EventDataBatch object within limited size - """ if not self._max_message_size_on_link: @@ -204,15 +195,6 @@ def send(self, event_data, partition_key=None, timeout=None): ~azure.eventhub.EventDataError, ~azure.eventhub.EventDataSendError, ~azure.eventhub.EventHubError :return: None :rtype: None - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_sync_send] - :end-before: [END eventhub_client_sync_send] - :language: python - :dedent: 4 - :caption: Sends an event data and blocks until acknowledgement is received or operation times out. - """ # Tracing code span_impl_type = settings.tracing_implementation() # type: Type[AbstractSpan] @@ -252,14 +234,5 @@ def close(self): # pylint:disable=useless-super-delegation """ Close down the handler. If the handler has already closed, this will be a no op. - - Example: - .. literalinclude:: ../examples/test_examples_eventhub.py - :start-after: [START eventhub_client_sender_close] - :end-before: [END eventhub_client_sender_close] - :language: python - :dedent: 4 - :caption: Close down the handler. - """ super(EventHubProducer, self).close() diff --git a/sdk/eventhub/azure-eventhubs/conftest.py b/sdk/eventhub/azure-eventhubs/conftest.py index 923ed270827b..b55f0ad6f204 100644 --- a/sdk/eventhub/azure-eventhubs/conftest.py +++ b/sdk/eventhub/azure-eventhubs/conftest.py @@ -18,9 +18,10 @@ collect_ignore.append("tests/livetest/asynctests") collect_ignore.append("tests/eventprocessor") collect_ignore.append("features") - collect_ignore.append("examples/async_examples") + collect_ignore.append("samples/async_samples") -from azure.eventhub import EventHubClient, EventPosition +from azure.eventhub.client import EventHubClient +from azure.eventhub import EventPosition def pytest_addoption(parser): @@ -155,7 +156,7 @@ def invalid_policy(live_eventhub_config): @pytest.fixture() def aad_credential(): try: - return os.environ['AAD_CLIENT_ID'], os.environ['AAD_SECRET'], os.environ['AAD_TENANT_ID'] + return os.environ['AZURE_CLIENT_ID'], os.environ['AZURE_CLIENT_SECRET'], os.environ['AZURE_TENANT_ID'] except KeyError: pytest.skip('No Azure Active Directory credential found') @@ -166,13 +167,14 @@ def connstr_receivers(connection_str): partitions = client.get_partition_ids() receivers = [] for p in partitions: - receiver = client.create_consumer(consumer_group="$default", partition_id=p, event_position=EventPosition("-1"), prefetch=500) + receiver = client._create_consumer(consumer_group="$default", partition_id=p, event_position=EventPosition("-1"), prefetch=500) receiver._open() receivers.append(receiver) yield connection_str, receivers for r in receivers: r.close() + client.close() @pytest.fixture() @@ -182,8 +184,9 @@ def connstr_senders(connection_str): senders = [] for p in partitions: - sender = client.create_producer(partition_id=p) + sender = client._create_producer(partition_id=p) senders.append(sender) yield connection_str, senders for s in senders: s.close() + client.close() diff --git a/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py b/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py deleted file mode 100644 index eeb20b3594af..000000000000 --- a/sdk/eventhub/azure-eventhubs/examples/async_examples/test_examples_eventhub_async.py +++ /dev/null @@ -1,110 +0,0 @@ -#------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -#-------------------------------------------------------------------------- - -import pytest -import logging -import asyncio - - -@pytest.mark.asyncio -async def test_example_eventhub_async_send_and_receive(live_eventhub_config): - # [START create_eventhub_client_async] - from azure.eventhub.aio import EventHubClient - import os - connection_str = "Endpoint=sb://{}/;SharedAccessKeyName={};SharedAccessKey={};EntityPath={}".format( - os.environ['EVENT_HUB_HOSTNAME'], - os.environ['EVENT_HUB_SAS_POLICY'], - os.environ['EVENT_HUB_SAS_KEY'], - os.environ['EVENT_HUB_NAME']) - client = EventHubClient.from_connection_string(connection_str) - # [END create_eventhub_client_async] - - from azure.eventhub import EventData, EventPosition - - # [START create_eventhub_client_async_sender] - client = EventHubClient.from_connection_string(connection_str) - # Create an async producer. - producer = client.create_producer(partition_id="0") - # [END create_eventhub_client_async_sender] - - # [START create_eventhub_client_async_receiver] - client = EventHubClient.from_connection_string(connection_str) - # Create an async consumer. - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) - # Create an exclusive async consumer. - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), owner_level=1) - # [END create_eventhub_client_async_receiver] - - client = EventHubClient.from_connection_string(connection_str) - producer = client.create_producer(partition_id="0") - consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) - - await consumer.receive(timeout=1) - - # [START eventhub_client_async_create_batch] - event_data_batch = await producer.create_batch(max_size=10000) - while True: - try: - event_data_batch.try_add(EventData('Message inside EventBatchData')) - except ValueError: - # The EventDataBatch object reaches its max_size. - # You can send the full EventDataBatch object and create a new one here. - break - # [END eventhub_client_async_create_batch] - - # [START eventhub_client_async_send] - async with producer: - event_data = EventData(b"A single event") - await producer.send(event_data) - # [END eventhub_client_async_send] - - await asyncio.sleep(1) - - # [START eventhub_client_async_receive] - logger = logging.getLogger("azure.eventhub") - async with consumer: - received = await consumer.receive(timeout=5) - for event_data in received: - logger.info("Message received:{}".format(event_data.body_as_str())) - # [END eventhub_client_async_receive] - assert len(received) > 0 - assert received[0].body_as_str() == "A single event" - assert list(received[-1].body)[0] == b"A single event" - - -@pytest.mark.asyncio -async def test_example_eventhub_async_producer_ops(live_eventhub_config, connection_str): - from azure.eventhub.aio import EventHubClient - from azure.eventhub import EventData - - # [START eventhub_client_async_sender_close] - client = EventHubClient.from_connection_string(connection_str) - producer = client.create_producer(partition_id="0") - try: - await producer.send(EventData(b"A single event")) - finally: - # Close down the send handler. - await producer.close() - # [END eventhub_client_async_sender_close] - - -@pytest.mark.asyncio -async def test_example_eventhub_async_consumer_ops(live_eventhub_config, connection_str): - from azure.eventhub.aio import EventHubClient - from azure.eventhub import EventPosition - - # [START eventhub_client_async_receiver_close] - client = EventHubClient.from_connection_string(connection_str) - consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) - try: - # Open and receive - await consumer.receive(timeout=1) - except: - raise - finally: - # Close down the receive handler. - await consumer.close() - # [END eventhub_client_async_receiver_close] diff --git a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py b/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py deleted file mode 100644 index c0826e274704..000000000000 --- a/sdk/eventhub/azure-eventhubs/examples/eventprocessor/event_processor_example.py +++ /dev/null @@ -1,39 +0,0 @@ -import asyncio -import logging -import os -from azure.eventhub.aio import EventHubClient -from azure.eventhub.aio.eventprocessor import EventProcessor, PartitionProcessor -from azure.eventhub.aio.eventprocessor import SamplePartitionManager - -RECEIVE_TIMEOUT = 5 # timeout in seconds for a receiving operation. 0 or None means no timeout -RETRY_TOTAL = 3 # max number of retries for receive operations within the receive timeout. Actual number of retries clould be less if RECEIVE_TIMEOUT is too small -CONNECTION_STR = os.environ["EVENT_HUB_CONN_STR"] - -logging.basicConfig(level=logging.INFO) - - -async def do_operation(event): - # do some sync or async operations. If the operation is i/o intensive, async will have better performance - print(event) - - -class MyPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - if events: - await asyncio.gather(*[do_operation(event) for event in events]) - await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) - else: - print("empty events received", "partition:", partition_context.partition_id) - - -if __name__ == '__main__': - loop = asyncio.get_event_loop() - client = EventHubClient.from_connection_string(CONNECTION_STR, receive_timeout=RECEIVE_TIMEOUT, retry_total=RETRY_TOTAL) - partition_manager = SamplePartitionManager(db_filename="eventprocessor_test_db") - event_processor = EventProcessor(client, "$default", MyPartitionProcessor, partition_manager, polling_interval=1) - try: - loop.run_until_complete(event_processor.start()) - except KeyboardInterrupt: - loop.run_until_complete(event_processor.stop()) - finally: - loop.stop() diff --git a/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py b/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py deleted file mode 100644 index 8ff334ef971f..000000000000 --- a/sdk/eventhub/azure-eventhubs/examples/test_examples_eventhub.py +++ /dev/null @@ -1,132 +0,0 @@ -#------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -#-------------------------------------------------------------------------- - -import time -import logging - - -def create_eventhub_client(live_eventhub_config): - # [START create_eventhub_client] - import os - from azure.eventhub import EventHubClient, EventHubSharedKeyCredential - - host = os.environ['EVENT_HUB_HOSTNAME'] - event_hub_path = os.environ['EVENT_HUB_NAME'] - shared_access_policy = os.environ['EVENT_HUB_SAS_POLICY'] - shared_access_key = os.environ['EVENT_HUB_SAS_KEY'] - - client = EventHubClient( - host=host, - event_hub_path=event_hub_path, - credential=EventHubSharedKeyCredential(shared_access_policy, shared_access_key) - ) - # [END create_eventhub_client] - return client - - -def test_example_eventhub_sync_send_and_receive(live_eventhub_config): - # [START create_eventhub_client_connstr] - import os - from azure.eventhub import EventHubClient - - connection_str = "Endpoint=sb://{}/;SharedAccessKeyName={};SharedAccessKey={};EntityPath={}".format( - os.environ['EVENT_HUB_HOSTNAME'], - os.environ['EVENT_HUB_SAS_POLICY'], - os.environ['EVENT_HUB_SAS_KEY'], - os.environ['EVENT_HUB_NAME']) - client = EventHubClient.from_connection_string(connection_str) - # [END create_eventhub_client_connstr] - - from azure.eventhub import EventData, EventPosition - - # [START create_eventhub_client_sender] - client = EventHubClient.from_connection_string(connection_str) - # Create a producer. - producer = client.create_producer(partition_id="0") - # [END create_eventhub_client_sender] - - # [START create_eventhub_client_receiver] - client = EventHubClient.from_connection_string(connection_str) - # Create a consumer. - consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) - # Create an exclusive consumer object. - exclusive_consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1"), owner_level=1) - # [END create_eventhub_client_receiver] - - client = EventHubClient.from_connection_string(connection_str) - producer = client.create_producer(partition_id="0") - consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) - try: - consumer.receive(timeout=1) - - # [START create_event_data] - event_data = EventData("String data") - event_data = EventData(b"Bytes data") - event_data = EventData([b"A", b"B", b"C"]) - - list_data = ['Message {}'.format(i) for i in range(10)] - event_data = EventData(body=list_data) - # [END create_event_data] - - # [START eventhub_client_sync_create_batch] - event_data_batch = producer.create_batch(max_size=10000) - while True: - try: - event_data_batch.try_add(EventData('Message inside EventBatchData')) - except ValueError: - # The EventDataBatch object reaches its max_size. - # You can send the full EventDataBatch object and create a new one here. - break - # [END eventhub_client_sync_create_batch] - - # [START eventhub_client_sync_send] - with producer: - event_data = EventData(b"A single event") - producer.send(event_data) - # [END eventhub_client_sync_send] - time.sleep(1) - - # [START eventhub_client_sync_receive] - with consumer: - logger = logging.getLogger("azure.eventhub") - received = consumer.receive(timeout=5, max_batch_size=1) - for event_data in received: - logger.info("Message received:{}".format(event_data.body_as_str())) - # [END eventhub_client_sync_receive] - assert len(received) > 0 - assert received[0].body_as_str() == "A single event" - assert list(received[-1].body)[0] == b"A single event" - finally: - pass - - -def test_example_eventhub_producer_ops(live_eventhub_config, connection_str): - from azure.eventhub import EventHubClient, EventData - - # [START eventhub_client_sender_close] - client = EventHubClient.from_connection_string(connection_str) - producer = client.create_producer(partition_id="0") - try: - producer.send(EventData(b"A single event")) - finally: - # Close down the send handler. - producer.close() - # [END eventhub_client_sender_close] - - -def test_example_eventhub_consumer_ops(live_eventhub_config, connection_str): - from azure.eventhub import EventHubClient - from azure.eventhub import EventPosition - - # [START eventhub_client_receiver_close] - client = EventHubClient.from_connection_string(connection_str) - consumer = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) - try: - consumer.receive(timeout=1) - finally: - # Close down the receive handler. - consumer.close() - # [END eventhub_client_receiver_close] diff --git a/sdk/eventhub/azure-eventhubs/setup.py b/sdk/eventhub/azure-eventhubs/setup.py index 36d34049abc7..d104756ba8aa 100644 --- a/sdk/eventhub/azure-eventhubs/setup.py +++ b/sdk/eventhub/azure-eventhubs/setup.py @@ -38,7 +38,7 @@ exclude_packages = [ 'tests', 'stress', - 'examples', + 'samples', # Exclude packages that will be covered by PEP420 or nspkg 'azure', ] diff --git a/sdk/eventhub/azure-eventhubs/tests/eventprocessor/test_eventprocessor.py b/sdk/eventhub/azure-eventhubs/tests/eventprocessor/test_eventprocessor.py deleted file mode 100644 index 28ff7cd6554b..000000000000 --- a/sdk/eventhub/azure-eventhubs/tests/eventprocessor/test_eventprocessor.py +++ /dev/null @@ -1,314 +0,0 @@ -#------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -#-------------------------------------------------------------------------- - -import pytest -import asyncio - -from azure.eventhub import EventData, EventHubError -from azure.eventhub.aio import EventHubClient -from azure.eventhub.aio.eventprocessor import EventProcessor, SamplePartitionManager, PartitionProcessor, \ - CloseReason, OwnershipLostError - - -class LoadBalancerPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - pass - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_loadbalancer_balance(connstr_senders): - - connection_str, senders = connstr_senders - for sender in senders: - sender.send(EventData("EventProcessor Test")) - eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) - partition_manager = SamplePartitionManager() - - event_processor1 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor1.start()) - await asyncio.sleep(5) - assert len(event_processor1._tasks) == 2 # event_processor1 claims two partitions - - event_processor2 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, - partition_manager, polling_interval=1) - - asyncio.ensure_future(event_processor2.start()) - await asyncio.sleep(5) - assert len(event_processor1._tasks) == 1 # two event processors balance. So each has 1 task - assert len(event_processor2._tasks) == 1 - - event_processor3 = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor3.start()) - await asyncio.sleep(5) - assert len(event_processor3._tasks) == 0 - await event_processor3.stop() - - await event_processor1.stop() - await asyncio.sleep(5) - assert len(event_processor2._tasks) == 2 # event_procesor2 takes another one after event_processor1 stops - await event_processor2.stop() - - -@pytest.mark.asyncio -async def test_load_balancer_abandon(): - class TestPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - await asyncio.sleep(0.1) - - class MockEventHubClient(object): - eh_name = "test_eh_name" - - def create_consumer(self, consumer_group_name, partition_id, event_position): - return MockEventhubConsumer() - - async def get_partition_ids(self): - return [str(pid) for pid in range(6)] - - class MockEventhubConsumer(object): - async def receive(self): - return [] - - partition_manager = SamplePartitionManager() - - event_processor = EventProcessor(MockEventHubClient(), "$default", TestPartitionProcessor, - partition_manager, polling_interval=0.5) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(5) - - ep_list = [] - for _ in range(2): - ep = EventProcessor(MockEventHubClient(), "$default", TestPartitionProcessor, - partition_manager, polling_interval=0.5) - asyncio.ensure_future(ep.start()) - ep_list.append(ep) - await asyncio.sleep(5) - assert len(event_processor._tasks) == 2 - for ep in ep_list: - await ep.stop() - await event_processor.stop() - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_loadbalancer_list_ownership_error(connstr_senders): - class ErrorPartitionManager(SamplePartitionManager): - async def list_ownership(self, eventhub_name, consumer_group_name): - raise RuntimeError("Test runtime error") - - connection_str, senders = connstr_senders - for sender in senders: - sender.send(EventData("EventProcessor Test")) - eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) - partition_manager = ErrorPartitionManager() - - event_processor = EventProcessor(eventhub_client, "$default", LoadBalancerPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(5) - assert event_processor._running is True - assert len(event_processor._tasks) == 0 - await event_processor.stop() - - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_partition_processor(connstr_senders): - partition_processor1 = None - partition_processor2 = None - - class TestPartitionProcessor(PartitionProcessor): - def __init__(self): - self.initialize_called = False - self.error = None - self.close_reason = None - self.received_events = [] - self.checkpoint = None - - async def initialize(self, partition_context): - nonlocal partition_processor1, partition_processor2 - if partition_context.partition_id == "1": - partition_processor1 = self - else: - partition_processor2 = self - - async def process_events(self, events, partition_context): - self.received_events.extend(events) - if events: - offset, sn = events[-1].offset, events[-1].sequence_number - await partition_context.update_checkpoint(offset, sn) - self.checkpoint = (offset, sn) - - async def process_error(self, error, partition_context): - self.error = error - assert partition_context is not None - - async def close(self, reason, partition_context): - self.close_reason = reason - assert partition_context is not None - - connection_str, senders = connstr_senders - for sender in senders: - sender.send(EventData("EventProcessor Test")) - eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) - partition_manager = SamplePartitionManager() - - event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(10) - await event_processor.stop() - assert partition_processor1 is not None and partition_processor2 is not None - assert len(partition_processor1.received_events) == 1 and len(partition_processor2.received_events) == 1 - assert partition_processor1.checkpoint is not None - assert partition_processor1.close_reason == CloseReason.SHUTDOWN - assert partition_processor1.error is None - - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_partition_processor_process_events_error(connstr_senders): - class ErrorPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - if partition_context.partition_id == "1": - raise RuntimeError("processing events error") - else: - pass - - async def process_error(self, error, partition_context): - if partition_context.partition_id == "1": - assert isinstance(error, RuntimeError) - else: - raise RuntimeError("There shouldn't be an error for partition other than 1") - - async def close(self, reason, partition_context): - if partition_context.partition_id == "1": - assert reason == CloseReason.PROCESS_EVENTS_ERROR - else: - assert reason == CloseReason.SHUTDOWN - - connection_str, senders = connstr_senders - for sender in senders: - sender.send(EventData("EventProcessor Test")) - eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) - partition_manager = SamplePartitionManager() - - event_processor = EventProcessor(eventhub_client, "$default", ErrorPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(10) - await event_processor.stop() - - -@pytest.mark.asyncio -async def test_partition_processor_process_eventhub_consumer_error(): - class TestPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - pass - - async def process_error(self, error, partition_context): - assert isinstance(error, EventHubError) - - async def close(self, reason, partition_context): - assert reason == CloseReason.EVENTHUB_EXCEPTION - - class MockEventHubClient(object): - eh_name = "test_eh_name" - - def create_consumer(self, consumer_group_name, partition_id, event_position): - return MockEventhubConsumer() - - async def get_partition_ids(self): - return ["0", "1"] - - class MockEventhubConsumer(object): - async def receive(self): - raise EventHubError("Mock EventHubConsumer EventHubError") - - eventhub_client = MockEventHubClient() - partition_manager = SamplePartitionManager() - - event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(5) - await event_processor.stop() - - -@pytest.mark.asyncio -async def test_partition_processor_process_error_close_error(): - class TestPartitionProcessor(PartitionProcessor): - async def initialize(self, partition_context): - raise RuntimeError("initialize error") - - async def process_events(self, events, partition_context): - raise RuntimeError("process_events error") - - async def process_error(self, error, partition_context): - assert isinstance(error, RuntimeError) - raise RuntimeError("process_error error") - - async def close(self, reason, partition_context): - assert reason == CloseReason.PROCESS_EVENTS_ERROR - raise RuntimeError("close error") - - class MockEventHubClient(object): - eh_name = "test_eh_name" - - def create_consumer(self, consumer_group_name, partition_id, event_position): - return MockEventhubConsumer() - - async def get_partition_ids(self): - return ["0", "1"] - - class MockEventhubConsumer(object): - async def receive(self): - return [EventData("mock events")] - - eventhub_client = MockEventHubClient() #EventHubClient.from_connection_string(connection_str, receive_timeout=3) - partition_manager = SamplePartitionManager() - - event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(5) - await event_processor.stop() - - -@pytest.mark.liveTest -@pytest.mark.asyncio -async def test_partition_processor_process_update_checkpoint_error(connstr_senders): - class ErrorPartitionManager(SamplePartitionManager): - async def update_checkpoint(self, eventhub_name, consumer_group_name, partition_id, owner_id, - offset, sequence_number): - if partition_id == "1": - raise OwnershipLostError("Mocked ownership lost") - - class TestPartitionProcessor(PartitionProcessor): - async def process_events(self, events, partition_context): - if events: - await partition_context.update_checkpoint(events[-1].offset, events[-1].sequence_number) - - async def process_error(self, error, partition_context): - assert isinstance(error, OwnershipLostError) - - async def close(self, reason, partition_context): - if partition_context.partition_id == "1": - assert reason == CloseReason.OWNERSHIP_LOST - else: - assert reason == CloseReason.SHUTDOWN - - connection_str, senders = connstr_senders - for sender in senders: - sender.send(EventData("EventProcessor Test")) - eventhub_client = EventHubClient.from_connection_string(connection_str, receive_timeout=3) - partition_manager = ErrorPartitionManager() - - event_processor = EventProcessor(eventhub_client, "$default", TestPartitionProcessor, - partition_manager, polling_interval=1) - asyncio.ensure_future(event_processor.start()) - await asyncio.sleep(10) - await event_processor.stop() diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_auth.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_auth.py index 531967a671f9..eb2d028c3964 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_auth.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_auth.py @@ -3,38 +3,38 @@ # Licensed under the MIT License. See License.txt in the project root for # license information. #-------------------------------------------------------------------------- - import pytest import time +import threading -from azure.eventhub import EventData, EventHubClient, EventPosition +from azure.eventhub import EventData, EventHubProducerClient, EventHubConsumerClient @pytest.mark.liveTest def test_client_secret_credential(aad_credential, live_eventhub): try: - from azure.identity import ClientSecretCredential + from azure.identity import EnvironmentCredential except ImportError: pytest.skip("No azure identity library") - client_id, secret, tenant_id = aad_credential - credential = ClientSecretCredential(client_id=client_id, client_secret=secret, tenant_id=tenant_id) - client = EventHubClient(host=live_eventhub['hostname'], - event_hub_path=live_eventhub['event_hub'], - credential=credential, - user_agent='customized information') - sender = client.create_producer(partition_id='0') - receiver = client.create_consumer(consumer_group="$default", partition_id='0', event_position=EventPosition("@latest")) - - with receiver: - received = receiver.receive(timeout=3) - assert len(received) == 0 - - with sender: - event = EventData(body='A single message') - sender.send(event) - time.sleep(1) - - received = receiver.receive(timeout=3) - - assert len(received) == 1 - assert list(received[0].body)[0] == 'A single message'.encode('utf-8') + credential = EnvironmentCredential() + producer_client = EventHubProducerClient(host=live_eventhub['hostname'], + event_hub_path=live_eventhub['event_hub'], + credential=credential, + user_agent='customized information') + consumer_client = EventHubConsumerClient(host=live_eventhub['hostname'], + event_hub_path=live_eventhub['event_hub'], + credential=credential, + user_agent='customized information') + with producer_client: + producer_client.send(EventData(body='A single message')) + + def on_events(partition_context, events): + assert partition_context.partition_id == '0' + assert len(events) == 1 + assert list(events[0].body)[0] == 'A single message'.encode('utf-8') + with consumer_client: + worker = threading.Thread(target=consumer_client.receive, args=(on_events,), + kwargs={"consumer_group": '$default', + "partition_id": '0'}) + worker.start() + time.sleep(2) diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_consumer_client.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_consumer_client.py new file mode 100644 index 000000000000..18105de695fe --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_consumer_client.py @@ -0,0 +1,80 @@ +import time +import pytest +import threading +from azure.eventhub import EventData +from azure.eventhub import EventHubConsumerClient +from azure.eventhub._eventprocessor.local_partition_manager import InMemoryPartitionManager + + +@pytest.mark.liveTest +def test_receive_no_partition(connstr_senders): + connection_str, senders = connstr_senders + senders[0].send(EventData("Test EventData")) + senders[1].send(EventData("Test EventData")) + client = EventHubConsumerClient.from_connection_string(connection_str, receive_timeout=1) + + recv_cnt = {"received": 0} # substitution for nonlocal variable, 2.7 compatible + + def on_events(partition_context, events): + recv_cnt["received"] += len(events) + + with client: + worker = threading.Thread(target=client.receive, + args=(on_events,), + kwargs={"consumer_group": "$default", "initial_event_position": "-1"}) + worker.start() + time.sleep(10) + assert recv_cnt["received"] == 2 + + +@pytest.mark.liveTest +def test_receive_partition(connstr_senders): + connection_str, senders = connstr_senders + senders[0].send(EventData("Test EventData")) + client = EventHubConsumerClient.from_connection_string(connection_str) + + recv_cnt = {"received": 0} # substitution for nonlocal variable, 2.7 compatible + + def on_events(partition_context, events): + recv_cnt["received"] += len(events) + assert partition_context.partition_id == "0" + assert partition_context.consumer_group_name == "$default" + assert partition_context.fully_qualified_namespace in connection_str + assert partition_context.eventhub_name == senders[0]._client.eh_name + + with client: + worker = threading.Thread(target=client.receive, + args=(on_events,), + kwargs={"consumer_group": "$default", "initial_event_position": "-1", + "partition_id": "0"}) + worker.start() + time.sleep(10) + assert recv_cnt["received"] == 1 + + +@pytest.mark.liveTest +def test_receive_load_balancing(connstr_senders): + connection_str, senders = connstr_senders + pm = InMemoryPartitionManager() + client1 = EventHubConsumerClient.from_connection_string( + connection_str, partition_manager=pm, load_balancing_interval=1) + client2 = EventHubConsumerClient.from_connection_string( + connection_str, partition_manager=pm, load_balancing_interval=1) + + def on_events(partition_context, events): + pass + + with client1, client2: + worker1 = threading.Thread(target=client1.receive, + args=(on_events,), + kwargs={"consumer_group": "$default", "initial_event_position": "-1"}) + + worker2 = threading.Thread(target=client2.receive, + args=(on_events,), + kwargs={"consumer_group": "$default", "initial_event_position": "-1"}) + + worker1.start() + worker2.start() + time.sleep(20) + assert len(client1._event_processors[("$default", "-1")]._working_threads) == 1 + assert len(client2._event_processors[("$default", "-1")]._working_threads) == 1 diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_eventprocessor.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_eventprocessor.py new file mode 100644 index 000000000000..1927a4ed865a --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_eventprocessor.py @@ -0,0 +1,408 @@ +#------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +#-------------------------------------------------------------------------- + +import pytest +import threading +import time + +from azure.eventhub import EventData, EventHubError +from azure.eventhub.client import EventHubClient +from azure.eventhub._eventprocessor.event_processor import EventProcessor +from azure.eventhub import CloseReason +from azure.eventhub._eventprocessor.local_partition_manager import InMemoryPartitionManager +from azure.eventhub._eventprocessor.common import OwnershipLostError +from azure.eventhub.common import _Address + + +def event_handler(partition_context, events): + pass + + +def test_loadbalancer_balance(): + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.1) + return [] + def close(self): + pass + + eventhub_client = MockEventHubClient() + partition_manager = InMemoryPartitionManager() + threads = [] + event_processor1 = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + polling_interval=3, + receive_timeout=1) + + thread1 = threading.Thread(target=event_processor1.start) + thread1.start() + threads.append(thread1) + + time.sleep(2) + ep1_after_start = len(event_processor1._working_threads) + event_processor2 = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + polling_interval=3, + receive_timeout=1) + + thread2 = threading.Thread(target=event_processor2.start) + thread2.start() + threads.append(thread2) + time.sleep(10) + ep2_after_start = len(event_processor2._working_threads) + + event_processor1.stop() + thread1.join() + time.sleep(10) + ep2_after_ep1_stopped = len(event_processor2._working_threads) + event_processor2.stop() + + assert ep1_after_start == 2 + assert ep2_after_start == 1 + assert ep2_after_ep1_stopped == 2 + + +def test_loadbalancer_list_ownership_error(): + class ErrorPartitionManager(InMemoryPartitionManager): + def list_ownership(self, fully_qualified_namespace, eventhub_name, consumer_group_name): + raise RuntimeError("Test runtime error") + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.5) + return [] + + def close(self): + pass + + eventhub_client = MockEventHubClient() + partition_manager = ErrorPartitionManager() + + event_processor = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + polling_interval=1) + + thread = threading.Thread(target=event_processor.start) + thread.start() + time.sleep(2) + event_processor_running = event_processor._running + event_processor_partitions = len(event_processor._working_threads) + event_processor.stop() + thread.join() + assert event_processor_running is True + assert event_processor_partitions == 0 + + +def test_partition_processor(): + assert_map = {} + event_map = {} + + def partition_initialize_handler(partition_context): + assert_map["initialize"] = "called" + assert partition_context + + def event_handler(partition_context, events): + event_map[partition_context.partition_id] = event_map.get(partition_context.partition_id, 0) + len(events) + partition_context.update_checkpoint(events[-1]) + assert_map["checkpoint"] = "checkpoint called" + + def partition_close_handler(partition_context, reason): + assert_map["close_reason"] = reason + + def error_handler(partition_context, err): + assert_map["error"] = err + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.5) + return [EventData("test data")] + + def close(self): + pass + + eventhub_client = MockEventHubClient() + + partition_manager = InMemoryPartitionManager() + + event_processor = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + on_error=error_handler, + on_partition_initialize=partition_initialize_handler, + on_partition_close=partition_close_handler, + polling_interval=1) + + thread = threading.Thread(target=event_processor.start) + thread.start() + time.sleep(2) + ep_partitions = len(event_processor._working_threads) + event_processor.stop() + time.sleep(2) + assert ep_partitions == 2 + assert assert_map["initialize"] == "called" + assert event_map['0'] > 1 and event_map['1'] > 1 + assert assert_map["checkpoint"] == "checkpoint called" + assert "error" not in assert_map + assert assert_map["close_reason"] == CloseReason.SHUTDOWN + + +def test_partition_processor_process_events_error(): + assert_result = {} + def event_handler(partition_context, events): + if partition_context.partition_id == "1": + raise RuntimeError("processing events error") + else: + pass + + def error_handler(partition_context, error): + if partition_context.partition_id == "1": + assert_result["error"] = "runtime error" + else: + assert_result["error"] = "not an error" + + def partition_close_handler(partition_context, reason): + if partition_context.partition_id == "1": + assert reason == CloseReason.OWNERSHIP_LOST + else: + assert reason == CloseReason.SHUTDOWN + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.5) + return [EventData("test data")] + + def close(self): + pass + + eventhub_client = MockEventHubClient() + partition_manager = InMemoryPartitionManager() + + event_processor = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + on_error=error_handler, + on_partition_close=partition_close_handler, + polling_interval=1) + thread = threading.Thread(target=event_processor.start) + thread.start() + time.sleep(2) + event_processor.stop() + thread.join() + assert assert_result["error"] == "runtime error" + + +def test_partition_processor_process_eventhub_consumer_error(): + assert_result = {} + def event_handler(partition_context, events): + pass + + def error_handler(partition_context, error): + assert_result["error"] = error + + def partition_close_handler(partition_context, reason): + assert_result["reason"] = CloseReason.OWNERSHIP_LOST + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.5) + raise EventHubError("Mock EventHubConsumer EventHubError") + def close(self): + pass + + eventhub_client = MockEventHubClient() + partition_manager = InMemoryPartitionManager() + + event_processor = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + on_error=error_handler, + on_partition_close=partition_close_handler, + polling_interval=1) + thread = threading.Thread(target=event_processor.start) + thread.start() + time.sleep(2) + event_processor.stop() + thread.join() + assert isinstance(assert_result["error"], EventHubError) + assert assert_result["reason"] == CloseReason.OWNERSHIP_LOST + + +def test_partition_processor_process_error_close_error(): + def partition_initialize_handler(partition_context): + raise RuntimeError("initialize error") + + def event_handler(partition_context, events): + raise RuntimeError("process_events error") + + def error_handler(partition_context, error): + assert isinstance(error, RuntimeError) + raise RuntimeError("process_error error") + + def partition_close_handler(partition_context, reason): + assert reason == CloseReason.OWNERSHIP_LOST + raise RuntimeError("close error") + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.5) + return [EventData("mock events")] + def close(self): + pass + + eventhub_client = MockEventHubClient() # EventHubClient.from_connection_string(connection_str, receive_timeout=3) + partition_manager = InMemoryPartitionManager() + + event_processor = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + on_error=error_handler, + on_partition_initialize=partition_initialize_handler, + on_partition_close=partition_close_handler, + polling_interval=1) + thread = threading.Thread(target=event_processor.start) + thread.start() + time.sleep(2) + event_processor.stop() + thread.join() + + +def test_partition_processor_process_update_checkpoint_error(): + assert_map = {} + class ErrorPartitionManager(InMemoryPartitionManager): + def update_checkpoint( + self, fully_qualified_namespace, eventhub_name, + consumer_group_name, partition_id, offset, sequence_number): + if partition_id == "1": + raise ValueError("Mocked error") + + def event_handler(partition_context, events): + if events: + partition_context.update_checkpoint(events[-1]) + + def error_handler(partition_context, error): + assert_map["error"] = error + + def partition_close_handler(partition_context, reason): + pass + + class MockEventHubClient(object): + eh_name = "test_eh_name" + + def __init__(self): + self._address = _Address(hostname="test", path=MockEventHubClient.eh_name) + + def _create_consumer(self, consumer_group_name, partition_id, event_position, **kwargs): + return MockEventhubConsumer() + + def get_partition_ids(self): + return ["0", "1"] + + class MockEventhubConsumer(object): + def receive(self): + time.sleep(0.5) + return [EventData("test data")] + + def close(self): + pass + + eventhub_client = MockEventHubClient() + partition_manager = ErrorPartitionManager() + + event_processor = EventProcessor(eventhub_client=eventhub_client, + consumer_group_name='$default', + partition_manager=partition_manager, + on_event=event_handler, + on_error=error_handler, + on_partition_close=partition_close_handler, + polling_interval=1) + thread = threading.Thread(target=event_processor.start) + thread.start() + time.sleep(2) + event_processor.stop() + assert isinstance(assert_map["error"], ValueError) diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_negative.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_negative.py index a08751e4f606..21e434c0b9be 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_negative.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_negative.py @@ -4,7 +4,6 @@ # license information. #-------------------------------------------------------------------------- -import os import pytest import time import sys @@ -12,74 +11,79 @@ from azure.eventhub import ( EventData, EventPosition, - EventHubError, AuthenticationError, ConnectError, - EventDataError, - EventDataSendError, - EventHubClient) + EventDataSendError) + +from azure.eventhub.client import EventHubClient @pytest.mark.liveTest def test_send_with_invalid_hostname(invalid_hostname): - client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(invalid_hostname) + sender = client._create_producer() with pytest.raises(AuthenticationError): sender.send(EventData("test data")) sender.close() + client.close() @pytest.mark.liveTest def test_receive_with_invalid_hostname_sync(invalid_hostname): - client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) + client = EventHubClient.from_connection_string(invalid_hostname) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): receiver.receive(timeout=5) receiver.close() + client.close() @pytest.mark.liveTest def test_send_with_invalid_key(invalid_key): - client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(invalid_key) + sender = client._create_producer() with pytest.raises(AuthenticationError): sender.send(EventData("test data")) sender.close() + client.close() @pytest.mark.liveTest def test_receive_with_invalid_key_sync(invalid_key): - client = EventHubClient.from_connection_string(invalid_key, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) + client = EventHubClient.from_connection_string(invalid_key) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): receiver.receive(timeout=10) receiver.close() + client.close() @pytest.mark.liveTest def test_send_with_invalid_policy(invalid_policy): - client = EventHubClient.from_connection_string(invalid_policy, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(invalid_policy) + sender = client._create_producer() with pytest.raises(AuthenticationError): sender.send(EventData("test data")) sender.close() + client.close() @pytest.mark.liveTest def test_receive_with_invalid_policy_sync(invalid_policy): - client = EventHubClient.from_connection_string(invalid_policy, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) + client = EventHubClient.from_connection_string(invalid_policy) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): receiver.receive(timeout=5) receiver.close() + client.close() @pytest.mark.liveTest def test_send_partition_key_with_partition_sync(connection_str): pytest.skip("Skipped tentatively. Confirm whether to throw ValueError or just warn users") - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer(partition_id="1") + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer(partition_id="1") try: data = EventData(b"Data") data._set_partition_key(b"PKey") @@ -87,84 +91,90 @@ def test_send_partition_key_with_partition_sync(connection_str): sender.send(data) finally: sender.close() + client.close() @pytest.mark.liveTest def test_non_existing_entity_sender(connection_str): - client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) - sender = client.create_producer(partition_id="1") + client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo") + sender = client._create_producer(partition_id="1") with pytest.raises(AuthenticationError): sender.send(EventData("test data")) sender.close() + client.close() @pytest.mark.liveTest def test_non_existing_entity_receiver(connection_str): - client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo", network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) + client = EventHubClient.from_connection_string(connection_str, event_hub_path="nemo") + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition("-1")) with pytest.raises(AuthenticationError): receiver.receive(timeout=5) receiver.close() + client.close() @pytest.mark.liveTest def test_receive_from_invalid_partitions_sync(connection_str): partitions = ["XYZ", "-1", "1000", "-"] for p in partitions: - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id=p, event_position=EventPosition("-1")) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id=p, event_position=EventPosition("-1")) try: with pytest.raises(ConnectError): receiver.receive(timeout=5) finally: receiver.close() + client.close() @pytest.mark.liveTest def test_send_to_invalid_partitions(connection_str): partitions = ["XYZ", "-1", "1000", "-"] for p in partitions: - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer(partition_id=p) + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer(partition_id=p) try: with pytest.raises(ConnectError): sender.send(EventData("test data")) finally: sender.close() + client.close() @pytest.mark.liveTest def test_send_too_large_message(connection_str): if sys.platform.startswith('darwin'): pytest.skip("Skipping on OSX - open issue regarding message size") - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() try: data = EventData(b"A" * 1100000) with pytest.raises(EventDataSendError): sender.send(data) finally: sender.close() + client.close() @pytest.mark.liveTest def test_send_null_body(connection_str): - partitions = ["XYZ", "-1", "1000", "-"] - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() try: with pytest.raises(ValueError): data = EventData(None) sender.send(data) finally: sender.close() + client.close() @pytest.mark.liveTest def test_message_body_types(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) try: received = receiver.receive(timeout=5) assert len(received) == 0 @@ -212,21 +222,24 @@ def test_message_body_types(connstr_senders): raise finally: receiver.close() + client.close() @pytest.mark.liveTest def test_create_batch_with_invalid_hostname_sync(invalid_hostname): - client = EventHubClient.from_connection_string(invalid_hostname, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(invalid_hostname) + sender = client._create_producer() with pytest.raises(AuthenticationError): - batch_event_data = sender.create_batch(max_size=300, partition_key="key") + sender.create_batch(max_size=300) sender.close() + client.close() @pytest.mark.liveTest def test_create_batch_with_too_large_size_sync(connection_str): - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with pytest.raises(ValueError): - batch_event_data = sender.create_batch(max_size=5 * 1024 * 1024, partition_key="key") + sender.create_batch(max_size=5 * 1024 * 1024) sender.close() + client.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_producer_client.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_producer_client.py new file mode 100644 index 000000000000..5e3a9b7ba7f4 --- /dev/null +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_producer_client.py @@ -0,0 +1,59 @@ +import pytest +from azure.eventhub import EventData +from azure.eventhub import EventHubProducerClient + + +@pytest.mark.liveTest +def test_send_with_partition_key(connstr_receivers): + connection_str, receivers = connstr_receivers + client = EventHubProducerClient.from_connection_string(connection_str) + + with client: + data_val = 0 + for partition in [b"a", b"b", b"c", b"d", b"e", b"f"]: + partition_key = b"test_partition_" + partition + for i in range(50): + data = EventData(str(data_val)) + data_val += 1 + client.send(data, partition_key=partition_key) + + found_partition_keys = {} + for index, partition in enumerate(receivers): + received = partition.receive(timeout=5) + for message in received: + try: + existing = found_partition_keys[message.partition_key] + assert existing == index + except KeyError: + found_partition_keys[message.partition_key] = index + + +@pytest.mark.liveTest +def test_send_partition(connstr_receivers): + connection_str, receivers = connstr_receivers + client = EventHubProducerClient.from_connection_string(connection_str) + with client: + client.send(EventData(b"Data"), partition_id="1") + + partition_0 = receivers[0].receive(timeout=2) + assert len(partition_0) == 0 + partition_1 = receivers[1].receive(timeout=2) + assert len(partition_1) == 1 + client.close() + + +@pytest.mark.liveTest +def test_send_no_partition_batch(connstr_receivers): + connection_str, receivers = connstr_receivers + client = EventHubProducerClient.from_connection_string(connection_str) + with client: + event_batch = client.create_batch() + try: + while True: + event_batch.try_add(EventData(b"Data")) + except ValueError: + client.send(event_batch) + + partition_0 = receivers[0].receive(timeout=2) + partition_1 = receivers[1].receive(timeout=2) + assert len(partition_0) + len(partition_1) > 10 diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_properties.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_properties.py index d16820a00083..9711c878afd3 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_properties.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_properties.py @@ -5,32 +5,32 @@ #-------------------------------------------------------------------------- import pytest -from azure.eventhub import EventHubClient, EventHubSharedKeyCredential +from azure.eventhub import EventHubSharedKeyCredential +from azure.eventhub.client import EventHubClient @pytest.mark.liveTest def test_get_properties(live_eventhub): client = EventHubClient(live_eventhub['hostname'], live_eventhub['event_hub'], - EventHubSharedKeyCredential(live_eventhub['key_name'], live_eventhub['access_key']) - ) + EventHubSharedKeyCredential(live_eventhub['key_name'], live_eventhub['access_key'])) properties = client.get_properties() assert properties['path'] == live_eventhub['event_hub'] and properties['partition_ids'] == ['0', '1'] + client.close() @pytest.mark.liveTest def test_get_partition_ids(live_eventhub): client = EventHubClient(live_eventhub['hostname'], live_eventhub['event_hub'], - EventHubSharedKeyCredential(live_eventhub['key_name'], live_eventhub['access_key']) - ) + EventHubSharedKeyCredential(live_eventhub['key_name'], live_eventhub['access_key'])) partition_ids = client.get_partition_ids() assert partition_ids == ['0', '1'] + client.close() @pytest.mark.liveTest def test_get_partition_properties(live_eventhub): client = EventHubClient(live_eventhub['hostname'], live_eventhub['event_hub'], - EventHubSharedKeyCredential(live_eventhub['key_name'], live_eventhub['access_key']) - ) + EventHubSharedKeyCredential(live_eventhub['key_name'], live_eventhub['access_key'])) properties = client.get_partition_properties('0') assert properties['event_hub_path'] == live_eventhub['event_hub'] \ and properties['id'] == '0' \ @@ -39,3 +39,4 @@ def test_get_partition_properties(live_eventhub): and 'last_enqueued_offset' in properties \ and 'last_enqueued_time_utc' in properties \ and 'is_empty' in properties + client.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receive.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receive.py index 9bd1ea38ee03..6ca06d8b148b 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receive.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receive.py @@ -9,35 +9,15 @@ import time import datetime -from azure.eventhub import EventData, EventHubClient, EventPosition, TransportType - - -# def test_receive_without_events(connstr_senders): -# connection_str, senders = connstr_senders -# client = EventHubClient.from_connection_string(connection_str, network_tracing=False) -# receiver = client.create_consumer(consumer_group="$default", partition_id"$default", "0", event_position=EventPosition('@latest')) -# finish = datetime.datetime.now() + datetime.timedelta(seconds=240) -# count = 0 -# try: -# client.run() -# while True: #datetime.datetime.now() < finish: -# senders[0].send(EventData("Receiving an event {}".format(count))) -# received = receiver.receive(timeout=1) -# if received: -# print(received[0].body_as_str()) -# count += 1 -# time.sleep(1) -# except: -# raise -# finally: -# client.stop() +from azure.eventhub import EventData, EventPosition, TransportType +from azure.eventhub.client import EventHubClient @pytest.mark.liveTest def test_receive_end_of_stream(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: received = receiver.receive(timeout=5) assert len(received) == 0 @@ -47,15 +27,16 @@ def test_receive_end_of_stream(connstr_senders): assert received[0].body_as_str() == "Receiving only a single event" assert list(received[-1].body)[0] == b"Receiving only a single event" + client.close() @pytest.mark.liveTest def test_receive_with_offset_sync(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + client = EventHubClient.from_connection_string(connection_str) partitions = client.get_properties() assert partitions["partition_ids"] == ["0", "1"] - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: more_partitions = client.get_properties() assert more_partitions["partition_ids"] == ["0", "1"] @@ -70,20 +51,21 @@ def test_receive_with_offset_sync(connstr_senders): assert list(received[0].body) == [b'Data'] assert received[0].body_as_str() == "Data" - offset_receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, inclusive=False)) + offset_receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, inclusive=False)) with offset_receiver: received = offset_receiver.receive(timeout=5) assert len(received) == 0 senders[0].send(EventData(b"Message after offset")) received = offset_receiver.receive(timeout=5) assert len(received) == 1 + client.close() @pytest.mark.liveTest def test_receive_with_inclusive_offset(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: received = receiver.receive(timeout=5) @@ -97,19 +79,20 @@ def test_receive_with_inclusive_offset(connstr_senders): assert list(received[0].body) == [b'Data'] assert received[0].body_as_str() == "Data" - offset_receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, inclusive=True)) + offset_receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, inclusive=True)) with offset_receiver: received = offset_receiver.receive(timeout=5) assert len(received) == 1 + client.close() @pytest.mark.liveTest def test_receive_with_datetime_sync(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + client = EventHubClient.from_connection_string(connection_str) partitions = client.get_properties() assert partitions["partition_ids"] == ["0", "1"] - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: more_partitions = client.get_properties() assert more_partitions["partition_ids"] == ["0", "1"] @@ -123,19 +106,20 @@ def test_receive_with_datetime_sync(connstr_senders): assert list(received[0].body) == [b'Data'] assert received[0].body_as_str() == "Data" - offset_receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) + offset_receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) with offset_receiver: received = offset_receiver.receive(timeout=5) assert len(received) == 0 senders[0].send(EventData(b"Message after timestamp")) received = offset_receiver.receive(timeout=5) assert len(received) == 1 + client.close() @pytest.mark.liveTest def test_receive_with_custom_datetime_sync(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) + client = EventHubClient.from_connection_string(connection_str) for i in range(5): senders[0].send(EventData(b"Message before timestamp")) time.sleep(65) @@ -145,7 +129,7 @@ def test_receive_with_custom_datetime_sync(connstr_senders): for i in range(5): senders[0].send(EventData(b"Message after timestamp")) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset)) with receiver: all_received = [] received = receiver.receive(timeout=5) @@ -157,13 +141,14 @@ def test_receive_with_custom_datetime_sync(connstr_senders): for received_event in all_received: assert received_event.body_as_str() == "Message after timestamp" assert received_event.enqueued_time > offset + client.close() @pytest.mark.liveTest def test_receive_with_sequence_no(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: received = receiver.receive(timeout=5) @@ -174,7 +159,7 @@ def test_receive_with_sequence_no(connstr_senders): assert len(received) == 1 offset = received[0].sequence_number - offset_receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, False)) + offset_receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, False)) with offset_receiver: received = offset_receiver.receive(timeout=5) assert len(received) == 0 @@ -182,12 +167,14 @@ def test_receive_with_sequence_no(connstr_senders): time.sleep(1) received = offset_receiver.receive(timeout=5) assert len(received) == 1 + client.close() + @pytest.mark.liveTest def test_receive_with_inclusive_sequence_no(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: received = receiver.receive(timeout=5) assert len(received) == 0 @@ -195,17 +182,18 @@ def test_receive_with_inclusive_sequence_no(connstr_senders): received = receiver.receive(timeout=5) assert len(received) == 1 offset = received[0].sequence_number - offset_receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, inclusive=True)) + offset_receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition(offset, inclusive=True)) with offset_receiver: received = offset_receiver.receive(timeout=5) assert len(received) == 1 + client.close() @pytest.mark.liveTest def test_receive_batch(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500) with receiver: received = receiver.receive(timeout=5) assert len(received) == 0 @@ -219,6 +207,7 @@ def test_receive_batch(connstr_senders): assert event.sequence_number is not None assert event.offset assert event.enqueued_time + client.close() @pytest.mark.liveTest @@ -239,8 +228,8 @@ def batched(): ed.application_properties = batch_app_prop yield ed - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500) with receiver: received = receiver.receive(timeout=5) assert len(received) == 0 @@ -256,13 +245,14 @@ def batched(): assert list(message.body)[0] == "Event Data {}".format(index).encode('utf-8') assert (app_prop_key.encode('utf-8') in message.application_properties) \ and (dict(message.application_properties)[app_prop_key.encode('utf-8')] == app_prop_value.encode('utf-8')) + client.close() @pytest.mark.liveTest def test_receive_over_websocket_sync(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500) + client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500) event_list = [] for i in range(20): @@ -278,6 +268,7 @@ def test_receive_over_websocket_sync(connstr_senders): received = receiver.receive(max_batch_size=50, timeout=5) assert len(received) == 20 + client.close() @pytest.mark.liveTest @@ -287,9 +278,8 @@ def test_receive_run_time_metric(connstr_senders): if StrictVersion(uamqp_version) < StrictVersion('1.2.3'): pytest.skip("Disabled for uamqp 1.2.2. Will enable after uamqp 1.2.3 is released.") connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, - network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", + client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest'), prefetch=500, track_last_enqueued_event_properties=True) @@ -312,3 +302,4 @@ def test_receive_run_time_metric(connstr_senders): assert receiver.last_enqueued_event_properties.get('offset', None) assert receiver.last_enqueued_event_properties.get('enqueued_time', None) assert receiver.last_enqueued_event_properties.get('retrieval_time', None) + client.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receiver_iterator.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receiver_iterator.py index 803d27953341..0e30aed027e3 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receiver_iterator.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_receiver_iterator.py @@ -5,18 +5,16 @@ #-------------------------------------------------------------------------- import pytest -import time -import datetime -from threading import Thread -from azure.eventhub import EventData, EventHubClient, EventPosition, TransportType +from azure.eventhub import EventData, EventPosition +from azure.eventhub.client import EventHubClient @pytest.mark.liveTest def test_receive_iterator(connstr_senders): connection_str, senders = connstr_senders - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - receiver = client.create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) + client = EventHubClient.from_connection_string(connection_str) + receiver = client._create_consumer(consumer_group="$default", partition_id="0", event_position=EventPosition('@latest')) with receiver: received = receiver.receive(timeout=5) assert len(received) == 0 @@ -29,3 +27,4 @@ def test_receive_iterator(connstr_senders): assert len(received) == 1 assert received[0].body_as_str() == "Receiving only a single event" assert list(received[-1].body)[0] == b"Receiving only a single event" + client.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_reconnect.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_reconnect.py index d9a76238acea..08281996c197 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_reconnect.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_reconnect.py @@ -4,21 +4,17 @@ # license information. #-------------------------------------------------------------------------- -import os import time import pytest -from azure.eventhub import ( - EventData, - EventPosition, - EventHubError, - EventHubClient) +from azure.eventhub import EventData +from azure.eventhub.client import EventHubClient @pytest.mark.liveTest def test_send_with_long_interval_sync(connstr_receivers, sleep): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: sender.send(EventData(b"A single event")) for _ in range(1): @@ -36,3 +32,4 @@ def test_send_with_long_interval_sync(connstr_receivers, sleep): assert len(received) == 2 assert list(received[0].body)[0] == b"A single event" + client.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_send.py b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_send.py index 9346932b6115..568bf3405e51 100644 --- a/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_send.py +++ b/sdk/eventhub/azure-eventhubs/tests/livetest/synctests/test_send.py @@ -10,14 +10,15 @@ import json import sys -from azure.eventhub import EventData, EventHubClient, TransportType +from azure.eventhub import EventData, TransportType +from azure.eventhub.client import EventHubClient @pytest.mark.liveTest def test_send_with_partition_key(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: data_val = 0 for partition in [b"a", b"b", b"c", b"d", b"e", b"f"]: @@ -37,6 +38,7 @@ def test_send_with_partition_key(connstr_receivers): assert existing == index except KeyError: found_partition_keys[message.partition_key] = index + client.close() @pytest.mark.liveTest @@ -44,8 +46,8 @@ def test_send_and_receive_large_body_size(connstr_receivers): if sys.platform.startswith('darwin'): pytest.skip("Skipping on OSX - open issue regarding message size") connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: payload = 250 * 1024 sender.send(EventData("A" * payload)) @@ -56,13 +58,14 @@ def test_send_and_receive_large_body_size(connstr_receivers): assert len(received) == 1 assert len(list(received[0].body)[0]) == payload + client.close() @pytest.mark.liveTest def test_send_and_receive_zero_length_body(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: sender.send(EventData("")) @@ -72,13 +75,14 @@ def test_send_and_receive_zero_length_body(connstr_receivers): assert len(received) == 1 assert list(received[0].body)[0] == b"" + client.close() @pytest.mark.liveTest def test_send_single_event(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: sender.send(EventData(b"A single event")) @@ -88,6 +92,7 @@ def test_send_single_event(connstr_receivers): assert len(received) == 1 assert list(received[0].body)[0] == b"A single event" + client.close() @pytest.mark.liveTest @@ -98,8 +103,8 @@ def batched(): for i in range(10): yield EventData("Event number {}".format(i)) - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: sender.send(batched()) @@ -111,13 +116,14 @@ def batched(): assert len(received) == 10 for index, message in enumerate(received): assert list(message.body)[0] == "Event number {}".format(index).encode('utf-8') + client.close() @pytest.mark.liveTest def test_send_partition(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer(partition_id="1") + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer(partition_id="1") with sender: sender.send(EventData(b"Data")) @@ -125,13 +131,14 @@ def test_send_partition(connstr_receivers): assert len(partition_0) == 0 partition_1 = receivers[1].receive(timeout=2) assert len(partition_1) == 1 + client.close() @pytest.mark.liveTest def test_send_non_ascii(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer(partition_id="0") + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer(partition_id="0") with sender: sender.send(EventData(u"é,è,à,ù,â,ê,î,ô,û")) sender.send(EventData(json.dumps({"foo": u"漢字"}))) @@ -140,6 +147,7 @@ def test_send_non_ascii(connstr_receivers): assert len(partition_0) == 2 assert partition_0[0].body_as_str() == u"é,è,à,ù,â,ê,î,ô,û" assert partition_0[1].body_as_json() == {"foo": u"漢字"} + client.close() @pytest.mark.liveTest @@ -150,8 +158,8 @@ def batched(): for i in range(10): yield EventData("Event number {}".format(i)) - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer(partition_id="1") + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer(partition_id="1") with sender: sender.send(batched()) time.sleep(1) @@ -160,13 +168,14 @@ def batched(): assert len(partition_0) == 0 partition_1 = receivers[1].receive(timeout=2) assert len(partition_1) == 10 + client.close() @pytest.mark.liveTest def test_send_array_sync(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: sender.send(EventData([b"A", b"B", b"C"])) @@ -176,14 +185,15 @@ def test_send_array_sync(connstr_receivers): assert len(received) == 1 assert list(received[0].body) == [b"A", b"B", b"C"] + client.close() @pytest.mark.liveTest def test_send_multiple_clients(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender_0 = client.create_producer(partition_id="0") - sender_1 = client.create_producer(partition_id="1") + client = EventHubClient.from_connection_string(connection_str) + sender_0 = client._create_producer(partition_id="0") + sender_1 = client._create_producer(partition_id="1") with sender_0: sender_0.send(EventData(b"Message 0")) with sender_1: @@ -193,6 +203,7 @@ def test_send_multiple_clients(connstr_receivers): assert len(partition_0) == 1 partition_1 = receivers[1].receive(timeout=2) assert len(partition_1) == 1 + client.close() @pytest.mark.liveTest @@ -212,8 +223,8 @@ def batched(): ed.application_properties = app_prop yield ed - client = EventHubClient.from_connection_string(connection_str, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str) + sender = client._create_producer() with sender: sender.send(batched()) @@ -228,13 +239,14 @@ def batched(): assert list(message.body)[0] == "Event number {}".format(index).encode('utf-8') assert (app_prop_key.encode('utf-8') in message.application_properties) \ and (dict(message.application_properties)[app_prop_key.encode('utf-8')] == app_prop_value.encode('utf-8')) + client.close() @pytest.mark.liveTest def test_send_over_websocket_sync(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket) + sender = client._create_producer() event_list = [] for i in range(20): @@ -249,15 +261,16 @@ def test_send_over_websocket_sync(connstr_receivers): received.extend(r.receive(timeout=3)) assert len(received) == 20 + client.close() @pytest.mark.liveTest def test_send_with_create_event_batch_sync(connstr_receivers): connection_str, receivers = connstr_receivers - client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket, network_tracing=False) - sender = client.create_producer() + client = EventHubClient.from_connection_string(connection_str, transport_type=TransportType.AmqpOverWebsocket) + sender = client._create_producer() - event_data_batch = sender.create_batch(max_size=100000, partition_key="0") + event_data_batch = sender.create_batch(max_size=100000) while True: try: event_data_batch.try_add(EventData('A single event data')) @@ -275,3 +288,4 @@ def test_send_with_create_event_batch_sync(connstr_receivers): sender.send(event_data_batch) sender.close() + client.close() diff --git a/sdk/eventhub/azure-eventhubs/tests/unittest/test_event_data.py b/sdk/eventhub/azure-eventhubs/tests/unittest/test_event_data.py index a36ac52f2571..44612759109f 100644 --- a/sdk/eventhub/azure-eventhubs/tests/unittest/test_event_data.py +++ b/sdk/eventhub/azure-eventhubs/tests/unittest/test_event_data.py @@ -18,11 +18,11 @@ def test_constructor(test_input, expected_result): event_data = EventData(test_input) assert event_data.body_as_str() == expected_result assert event_data.partition_key is None - assert event_data.application_properties is None + assert len(event_data.application_properties) == 0 assert event_data.enqueued_time is None assert event_data.offset is None assert event_data.sequence_number is None - assert event_data.system_properties == {} + assert len(event_data.system_properties) == 0 with pytest.raises(TypeError): event_data.body_as_json() diff --git a/sdk/eventhub/ci.yml b/sdk/eventhub/ci.yml index 12f76e3a252f..d04759a7cea4 100644 --- a/sdk/eventhub/ci.yml +++ b/sdk/eventhub/ci.yml @@ -46,3 +46,5 @@ stages: safeName: azureeventhub - name: azure_eventhub_checkpointstoreblob_aio safeName: azureeventhubcheckpointstoreblobaio + - name: azure_eventhub_checkpointstoreblob + safeName: azureeventhubcheckpointstoreblob diff --git a/shared_requirements.txt b/shared_requirements.txt index 5543a4fbcb90..d950629eff9b 100644 --- a/shared_requirements.txt +++ b/shared_requirements.txt @@ -9,7 +9,7 @@ azure-common~=1.1 azure-core<2.0.0,>=1.0.0 azure-cosmosdb-table~=1.0 azure-datalake-store~=0.0.18 -azure-eventhub<6.0.0,>=5.0.0b3 +azure-eventhub<6.0.0,>=5.0.0b5 azure-eventgrid~=1.1 azure-graphrbac~=0.40.0 azure-keyvault==4.0.0b5 @@ -113,6 +113,7 @@ opencensus>=0.6.0 opencensus-ext-threading opencensus-ext-azure>=0.3.1 #override azure-cognitiveservices-inkrecognizer azure-core<2.0.0,>=1.0.0b2 +#override azure-eventhub-checkpointstoreblob azure-storage-blob<13.0.0,>=12.0.0 #override azure-eventhub-checkpointstoreblob-aio azure-storage-blob<=12.1,>=12.0.0b2 #override azure-eventhub-checkpointstoreblob-aio aiohttp<4.0,>=3.0 #override azure-eventhub uamqp<2.0,>=1.2.3