From d95acb2bea0f092589728a8a3a378c3d8c7f1e2c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 10 Oct 2019 14:09:48 -0400 Subject: [PATCH 001/211] ngdg --- CHANGELOG.rst | 25 + build.yaml | 14 + cassandra/__init__.py | 2 +- cassandra/cluster.py | 137 +- cassandra/datastax/graph/__init__.py | 3 +- cassandra/datastax/graph/fluent/__init__.py | 137 +- .../datastax/graph/fluent/_predicates.py | 37 +- cassandra/datastax/graph/fluent/_query.py | 112 +- .../datastax/graph/fluent/_serializers.py | 195 ++- cassandra/datastax/graph/graphson.py | 585 +++++++- cassandra/datastax/graph/query.py | 56 +- cassandra/metadata.py | 397 +++++- test-requirements.txt | 3 +- tests/integration/__init__.py | 35 + tests/integration/advanced/__init__.py | 578 +------- tests/integration/advanced/graph/__init__.py | 1192 +++++++++++++++++ .../advanced/graph/fluent/test_graph.py | 656 ++++++--- .../advanced/graph/fluent/test_search.py | 402 ++++-- .../integration/advanced/graph/test_graph.py | 271 ++++ .../advanced/graph/test_graph_cont_paging.py | 78 ++ .../advanced/graph/test_graph_datatype.py | 260 ++++ .../advanced/graph/test_graph_query.py | 575 ++++++++ .../integration/advanced/test_adv_metadata.py | 89 +- tests/integration/advanced/test_auth.py | 9 +- tests/integration/advanced/test_spark.py | 7 +- tests/integration/simulacron/utils.py | 7 +- tests/integration/standard/test_metadata.py | 17 +- tests/unit/advanced/test_graph.py | 20 +- tests/unit/advanced/test_insights.py | 4 +- tests/unit/advanced/test_metadata.py | 141 ++ 30 files changed, 4923 insertions(+), 1121 deletions(-) create mode 100644 tests/integration/advanced/graph/test_graph.py create mode 100644 tests/integration/advanced/graph/test_graph_cont_paging.py create mode 100644 tests/integration/advanced/graph/test_graph_datatype.py create mode 100644 tests/integration/advanced/graph/test_graph_query.py create mode 100644 tests/unit/advanced/test_metadata.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3681885ff0..3fdd3a7893 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,28 @@ +3.22.0 +====== +Unreleased + +Features +-------- + +* Parse new schema metadata in NGDG and generate table edges CQL syntax (PYTHON-996) +* Add GraphSON3 support (PYTHON-788) +* Use GraphSON3 as default for Native graphs (PYTHON-1004) +* Add Tuple and UDT types for native graph (PYTHON-1005) +* Add Duration type for native graph (PYTHON-1000) +* Add gx:ByteBuffer graphson type support for Blob field (PYTHON-1027) +* Enable Paging Through DSE Driver for Gremlin Traversals (PYTHON-1045) +* Provide numerical wrappers to ensure proper graphson schema definition (PYTHON-1051) +* Resolve the row_factory automatically for native graphs (PYTHON-1056) +* Add g:TraversalMetrics/g:Metrics graph deserializers (PYTHON-1057) +* Add g:BulkSet graph deserializers (PYTHON-1060) +* Update Graph Engine names and the way to create a Classic/Native Graph (PYTHON-1090) +* Update Native to Core Graph Engine +* Add graphson3 and native graph support (PYTHON-1039) +* Enable Paging Through DSE Driver for Gremlin Traversals (PYTHON-1045) +* Expose filter predicates for cql collections (PYTHON-1019) +* Add g:TraversalMetrics/Metrics deserializers (PYTHON-1057) + 3.21.0 ====== Unreleased diff --git a/build.yaml b/build.yaml index f94b78e259..1d6f63c274 100644 --- a/build.yaml +++ b/build.yaml @@ -55,6 +55,18 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='libev' +ngdg: + schedule: adhoc + branches: + include: [ngdg_master_ft] + env_vars: | + EVENT_LOOP_MANAGER='libev' + EXCLUDE_LONG=1 + matrix: + exclude: + - python: [2.7, 3.4, 3.6, 3.7] + - cassandra: ['dse-4.8', 'dse-5.0', dse-6.0', 'dse-6.7'] + weekly_master: schedule: 0 10 * * 6 branches: @@ -166,6 +178,7 @@ cassandra: - 'dse-5.1' - 'dse-6.0' - 'dse-6.7' + - 'dse-6.8' env: CYTHON: @@ -177,6 +190,7 @@ build: export JAVA_HOME=$CCM_JAVA_HOME export PATH=$JAVA_HOME/bin:$PATH export PYTHONPATH="" + export CCM_MAX_HEAP_SIZE=1024M # Install latest setuptools pip install --upgrade pip diff --git a/cassandra/__init__.py b/cassandra/__init__.py index b8f4b9af39..2a4bd8f0e6 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 19, 0, 'post0') +__version_info__ = (3, 19, 0, '20190910+labs') __version__ = '.'.join(map(str, __version_info__)) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 1016be72be..b6442fe726 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -83,14 +83,16 @@ from cassandra.marshal import int64_pack from cassandra.timestamps import MonotonicTimestampGenerator from cassandra.compat import Mapping -from cassandra.util import _resolve_contact_points_to_string_map +from cassandra.util import _resolve_contact_points_to_string_map, Version from cassandra.datastax.insights.reporter import MonitorReporter from cassandra.datastax.insights.util import version_supports_insights from cassandra.datastax.graph import (graph_object_row_factory, GraphOptions, GraphSON1Serializer, - GraphProtocol, GraphSON2Serializer, GraphStatement, SimpleGraphStatement) -from cassandra.datastax.graph.query import _request_timeout_key + GraphProtocol, GraphSON2Serializer, GraphStatement, SimpleGraphStatement, + graph_graphson2_row_factory, graph_graphson3_row_factory, + GraphSON3Serializer) +from cassandra.datastax.graph.query import _request_timeout_key, _GraphSONContextRowFactory if six.PY3: long = int @@ -141,6 +143,7 @@ def _is_gevent_monkey_patched(): DEFAULT_MIN_CONNECTIONS_PER_REMOTE_HOST = 1 DEFAULT_MAX_CONNECTIONS_PER_REMOTE_HOST = 2 +_GRAPH_PAGING_MIN_DSE_VERSION = Version('6.8.0') _NOT_SET = object() @@ -395,20 +398,21 @@ class GraphExecutionProfile(ExecutionProfile): def __init__(self, load_balancing_policy=_NOT_SET, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, - request_timeout=30.0, row_factory=graph_object_row_factory, - graph_options=None): + request_timeout=30.0, row_factory=None, + graph_options=None, continuous_paging_options=_NOT_SET): """ Default execution profile for graph execution. - See :class:`.ExecutionProfile` - for base attributes. + See :class:`.ExecutionProfile` for base attributes. Note that if not explicitly set, + the row_factory and graph_options.graph_protocol are resolved during the query execution. In addition to default parameters shown in the signature, this profile also defaults ``retry_policy`` to :class:`cassandra.policies.NeverRetryPolicy`. """ retry_policy = retry_policy or NeverRetryPolicy() super(GraphExecutionProfile, self).__init__(load_balancing_policy, retry_policy, consistency_level, - serial_consistency_level, request_timeout, row_factory) + serial_consistency_level, request_timeout, row_factory, + continuous_paging_options=continuous_paging_options) self.graph_options = graph_options or GraphOptions(graph_source=b'g', graph_language=b'gremlin-groovy') @@ -417,7 +421,7 @@ class GraphAnalyticsExecutionProfile(GraphExecutionProfile): def __init__(self, load_balancing_policy=None, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, - request_timeout=3600. * 24. * 7., row_factory=graph_object_row_factory, + request_timeout=3600. * 24. * 7., row_factory=None, graph_options=None): """ Execution profile with timeout and load balancing appropriate for graph analytics queries. @@ -2434,6 +2438,7 @@ def default_serial_consistency_level(self, cl): _profile_manager = None _metrics = None _request_init_callbacks = None + _graph_paging_available = False def __init__(self, cluster, hosts, keyspace=None): self.cluster = cluster @@ -2466,6 +2471,8 @@ def __init__(self, cluster, hosts, keyspace=None): msg += " using keyspace '%s'" % self.keyspace raise NoHostAvailable(msg, [h.address for h in hosts]) + self._graph_paging_available = self._check_graph_paging_available() + cc_host = self.cluster.get_control_connection_host() valid_insights_version = (cc_host and version_supports_insights(cc_host.dse_version)) if self.cluster.monitor_reporting_enabled and valid_insights_version: @@ -2605,18 +2612,31 @@ def execute_graph_async(self, query, parameters=None, trace=False, execution_pro if not isinstance(query, GraphStatement): query = SimpleGraphStatement(query) - execution_profile = self._maybe_get_execution_profile(execution_profile) # look up instance here so we can apply the extended attributes + # Clone and look up instance here so we can resolve and apply the extended attributes + execution_profile = self.execution_profile_clone_update(execution_profile) + + if not hasattr(execution_profile, 'graph_options'): + raise ValueError( + "Execution profile for graph queries must derive from GraphExecutionProfile, and provide graph_options") + + self._resolve_execution_profile_options(execution_profile) + # make sure the graphson context row factory is binded to this cluster try: - options = execution_profile.graph_options.copy() - except AttributeError: - raise ValueError("Execution profile for graph queries must derive from GraphExecutionProfile, and provide graph_options") + if issubclass(execution_profile.row_factory, _GraphSONContextRowFactory): + execution_profile.row_factory = execution_profile.row_factory(self.cluster) + except TypeError: + # issubclass might fail if arg1 is an instance + pass + + # set graph paging if needed + self._maybe_set_graph_paging(execution_profile) graph_parameters = None if parameters: - graph_parameters = self._transform_params(parameters, graph_options=options) + graph_parameters = self._transform_params(parameters, graph_options=execution_profile.graph_options) - custom_payload = options.get_options_map() + custom_payload = execution_profile.graph_options.get_options_map() if execute_as: custom_payload[_proxy_execute_key] = six.b(execute_as) custom_payload[_request_timeout_key] = int64_pack(long(execution_profile.request_timeout * 1000)) @@ -2627,12 +2647,81 @@ def execute_graph_async(self, query, parameters=None, trace=False, execution_pro future.message.query_params = graph_parameters future._protocol_handler = self.client_protocol_handler - if options.is_analytics_source and isinstance(execution_profile.load_balancing_policy, DefaultLoadBalancingPolicy): + if execution_profile.graph_options.is_analytics_source and \ + isinstance(execution_profile.load_balancing_policy, DefaultLoadBalancingPolicy): self._target_analytics_master(future) else: future.send_request() return future + def _maybe_set_graph_paging(self, execution_profile): + graph_paging = execution_profile.continuous_paging_options + if execution_profile.continuous_paging_options is _NOT_SET: + graph_paging = ContinuousPagingOptions() if self._graph_paging_available else None + + execution_profile.continuous_paging_options = graph_paging + + def _check_graph_paging_available(self): + """Verify if we can enable graph paging. This executed only once when the session is created.""" + + if not ProtocolVersion.has_continuous_paging_next_pages(self._protocol_version): + return False + + for host in self.cluster.metadata.all_hosts(): + if host.dse_version is None: + return False + + version = Version(host.dse_version) + if version < _GRAPH_PAGING_MIN_DSE_VERSION: + return False + + return True + + def _resolve_execution_profile_options(self, execution_profile): + """ + Determine the GraphSON protocol and row factory for a graph query. This is useful + to configure automatically the execution profile when executing a query on a + core graph. + If `graph_protocol` is not explicitly specified, the following rules apply: + - Default to GraphProtocol.GRAPHSON_1_0, or GRAPHSON_2_0 if the `graph_language` is not gremlin-groovy. + - If `graph_options.graph_name` is specified and is a Core graph, set GraphSON_3_0. + If `row_factory` is not explicitly specified, the following rules apply: + - Default to graph_object_row_factory. + - If `graph_options.graph_name` is specified and is a Core graph, set graph_graphson3_row_factory. + """ + if execution_profile.graph_options.graph_protocol is not None and \ + execution_profile.row_factory is not None: + return + + graph_options = execution_profile.graph_options + + is_core_graph = False + if graph_options.graph_name: + # graph_options.graph_name is bytes ... + name = graph_options.graph_name.decode('utf-8') + if name in self.cluster.metadata.keyspaces: + ks_metadata = self.cluster.metadata.keyspaces[name] + if ks_metadata.graph_engine == 'Core': + is_core_graph = True + + if is_core_graph: + graph_protocol = GraphProtocol.GRAPHSON_3_0 + row_factory = graph_graphson3_row_factory + else: + if graph_options.graph_language == GraphOptions.DEFAULT_GRAPH_LANGUAGE: + graph_protocol = GraphOptions.DEFAULT_GRAPH_PROTOCOL + row_factory = graph_object_row_factory + else: + # if not gremlin-groovy, GraphSON_2_0 + graph_protocol = GraphProtocol.GRAPHSON_2_0 + row_factory = graph_graphson2_row_factory + + # Only apply if not set explicitly + if graph_options.graph_protocol is None: + graph_options.graph_protocol = graph_protocol + if execution_profile.row_factory is None: + execution_profile.row_factory = row_factory + def _transform_params(self, parameters, graph_options): if not isinstance(parameters, dict): raise ValueError('The parameters must be a dictionary. Unnamed parameters are not allowed.') @@ -2640,12 +2729,16 @@ def _transform_params(self, parameters, graph_options): # Serialize python types to graphson serializer = GraphSON1Serializer if graph_options.graph_protocol == GraphProtocol.GRAPHSON_2_0: - serializer = GraphSON2Serializer - - serialized_parameters = { - p: serializer.serialize(v) - for p, v in six.iteritems(parameters) - } + serializer = GraphSON2Serializer() + elif graph_options.graph_protocol == GraphProtocol.GRAPHSON_3_0: + # only required for core graphs + context = { + 'cluster': self.cluster, + 'graph_name': graph_options.graph_name.decode('utf-8') if graph_options.graph_name else None + } + serializer = GraphSON3Serializer(context) + + serialized_parameters = serializer.serialize(parameters) return [json.dumps(serialized_parameters).encode('utf-8')] def _target_analytics_master(self, future): diff --git a/cassandra/datastax/graph/__init__.py b/cassandra/datastax/graph/__init__.py index 0c03c9249d..d828c7f707 100644 --- a/cassandra/datastax/graph/__init__.py +++ b/cassandra/datastax/graph/__init__.py @@ -17,6 +17,7 @@ from cassandra.datastax.graph.query import ( GraphOptions, GraphProtocol, GraphStatement, SimpleGraphStatement, Result, graph_object_row_factory, single_object_row_factory, - graph_result_row_factory, graph_graphson2_row_factory + graph_result_row_factory, graph_graphson2_row_factory, + graph_graphson3_row_factory ) from cassandra.datastax.graph.graphson import * diff --git a/cassandra/datastax/graph/fluent/__init__.py b/cassandra/datastax/graph/fluent/__init__.py index 193272d986..7d1ba0b60c 100644 --- a/cassandra/datastax/graph/fluent/__init__.py +++ b/cassandra/datastax/graph/fluent/__init__.py @@ -31,29 +31,29 @@ from cassandra.cluster import Session, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT from cassandra.datastax.graph import GraphOptions, GraphProtocol + from cassandra.datastax.graph.query import _GraphSONContextRowFactory from cassandra.datastax.graph.fluent.serializers import ( - GremlinGraphSONReader, - deserializers, - gremlin_deserializers + GremlinGraphSONReaderV2, + GremlinGraphSONReaderV3, + dse_graphson2_deserializers, + gremlin_graphson2_deserializers, + dse_graphson3_deserializers, + gremlin_graphson3_deserializers ) from cassandra.datastax.graph.fluent.query import _DefaultTraversalBatch, _query_from_traversal log = logging.getLogger(__name__) - __all__ = ['BaseGraphRowFactory', 'dse_graphson_reader', 'graphson_reader', 'graph_traversal_row_factory', + __all__ = ['BaseGraphRowFactory', 'graph_traversal_row_factory', 'graph_traversal_dse_object_row_factory', 'DSESessionRemoteGraphConnection', 'DseGraph'] - # Create our custom GraphSONReader/Writer - dse_graphson_reader = GremlinGraphSONReader(deserializer_map=deserializers) - graphson_reader = GremlinGraphSONReader(deserializer_map=gremlin_deserializers) - # Traversal result keys _bulk_key = 'bulk' _result_key = 'result' - class BaseGraphRowFactory(object): + class BaseGraphRowFactory(_GraphSONContextRowFactory): """ Base row factory for graph traversal. This class basically wraps a graphson reader function to handle additional features of Gremlin/DSE @@ -61,37 +61,51 @@ class BaseGraphRowFactory(object): Currently supported: - bulk results + """ + + def __call__(self, column_names, rows): + for row in rows: + parsed_row = self.graphson_reader.readObject(row[0]) + yield parsed_row[_result_key] + bulk = parsed_row.get(_bulk_key, 1) + for _ in range(bulk - 1): + yield copy.deepcopy(parsed_row[_result_key]) - :param graphson_reader: The function used to read the graphson. - Use example:: + class _GremlinGraphSON2RowFactory(BaseGraphRowFactory): + """Row Factory that returns the decoded graphson2.""" + graphson_reader_class = GremlinGraphSONReaderV2 + graphson_reader_kwargs = {'deserializer_map': gremlin_graphson2_deserializers} - my_custom_row_factory = BaseGraphRowFactory(custom_graphson_reader.readObject) - """ - def __init__(self, graphson_reader): - self._graphson_reader = graphson_reader + class _DseGraphSON2RowFactory(BaseGraphRowFactory): + """Row Factory that returns the decoded graphson2 as DSE types.""" + graphson_reader_class = GremlinGraphSONReaderV2 + graphson_reader_kwargs = {'deserializer_map': dse_graphson2_deserializers} - def __call__(self, column_names, rows): - results = [] + gremlin_graphson2_traversal_row_factory = _GremlinGraphSON2RowFactory + # TODO remove in next major + graph_traversal_row_factory = gremlin_graphson2_traversal_row_factory - for row in rows: - parsed_row = self._graphson_reader(row[0]) - bulk = parsed_row.get(_bulk_key, 1) - if bulk > 1: # Avoid deepcopy call if bulk <= 1 - results.extend([copy.deepcopy(parsed_row[_result_key]) - for _ in range(bulk - 1)]) + dse_graphson2_traversal_row_factory = _DseGraphSON2RowFactory + # TODO remove in next major + graph_traversal_dse_object_row_factory = dse_graphson2_traversal_row_factory - results.append(parsed_row[_result_key]) - return results + class _GremlinGraphSON3RowFactory(BaseGraphRowFactory): + """Row Factory that returns the decoded graphson2.""" + graphson_reader_class = GremlinGraphSONReaderV3 + graphson_reader_kwargs = {'deserializer_map': gremlin_graphson3_deserializers} - graph_traversal_row_factory = BaseGraphRowFactory(graphson_reader.readObject) - graph_traversal_row_factory.__doc__ = "Row Factory that returns the decoded graphson." + class _DseGraphSON3RowFactory(BaseGraphRowFactory): + """Row Factory that returns the decoded graphson3 as DSE types.""" + graphson_reader_class = GremlinGraphSONReaderV3 + graphson_reader_kwargs = {'deserializer_map': dse_graphson3_deserializers} - graph_traversal_dse_object_row_factory = BaseGraphRowFactory(dse_graphson_reader.readObject) - graph_traversal_dse_object_row_factory.__doc__ = "Row Factory that returns the decoded graphson as DSE types." + + gremlin_graphson3_traversal_row_factory = _GremlinGraphSON3RowFactory + dse_graphson3_traversal_row_factory = _DseGraphSON3RowFactory class DSESessionRemoteGraphConnection(RemoteConnection): @@ -117,21 +131,35 @@ def __init__(self, session, graph_name=None, execution_profile=EXEC_PROFILE_GRAP self.graph_name = graph_name self.execution_profile = execution_profile - def submit(self, bytecode): + def _traversers_generator(self, traversers): + for t in traversers: + yield Traverser(t) - query = DseGraph.query_from_traversal(bytecode) - ep = self.session.execution_profile_clone_update(self.execution_profile, - row_factory=graph_traversal_row_factory) - graph_options = ep.graph_options.copy() + def submit(self, bytecode): + ep = self.session.execution_profile_clone_update(self.execution_profile) + graph_options = ep.graph_options + graph_options.graph_name = self.graph_name or graph_options.graph_name + # traversal can't use anything else than bytecode graph_options.graph_language = DseGraph.DSE_GRAPH_QUERY_LANGUAGE - if self.graph_name: - graph_options.graph_name = self.graph_name - - ep.graph_options = graph_options - + # We resolve the execution profile options here , to know how what gremlin factory to set + self.session._resolve_execution_profile_options(ep) + + context = None + if graph_options.graph_protocol == GraphProtocol.GRAPHSON_2_0: + row_factory = gremlin_graphson2_traversal_row_factory + elif graph_options.graph_protocol == GraphProtocol.GRAPHSON_3_0: + row_factory = gremlin_graphson3_traversal_row_factory + context = { + 'cluster': self.session.cluster, + 'graph_name': graph_options.graph_name.decode('utf-8') + } + else: + raise ValueError('Unknown graph protocol: {}'.format(graph_options.graph_protocol)) + + ep.row_factory = row_factory + query = DseGraph.query_from_traversal(bytecode, graph_options.graph_protocol, context) traversers = self.session.execute_graph(query, execution_profile=ep) - traversers = [Traverser(t) for t in traversers] - return RemoteTraversal(iter(traversers), TraversalSideEffects()) + return RemoteTraversal(self._traversers_generator(traversers), TraversalSideEffects()) def __str__(self): return "".format(self.graph_name) @@ -149,12 +177,20 @@ class DseGraph(object): Graph query language, Default is 'bytecode-json' (GraphSON). """ + DSE_GRAPH_QUERY_PROTOCOL = GraphProtocol.GRAPHSON_2_0 + """ + Graph query language, Default is GraphProtocol.GRAPHSON_2_0. + """ + @staticmethod - def query_from_traversal(traversal): + def query_from_traversal(traversal, graph_protocol=DSE_GRAPH_QUERY_PROTOCOL, context=None): """ From a GraphTraversal, return a query string based on the language specified in `DseGraph.DSE_GRAPH_QUERY_LANGUAGE`. :param traversal: The GraphTraversal object + :param graph_protocol: The graph protocol. Default is `DseGraph.DSE_GRAPH_QUERY_PROTOCOL`. + :param context: The dict of the serialization context, needed for GraphSON3 (tuple, udt). + e.g: {'cluster': dse_cluster, 'graph_name': name} """ if isinstance(traversal, GraphTraversal): @@ -165,7 +201,7 @@ def query_from_traversal(traversal): log.warning("GraphTraversal session, graph_name and execution_profile are " "only taken into account when executed with TinkerPop.") - return _query_from_traversal(traversal) + return _query_from_traversal(traversal, graph_protocol, context) @staticmethod def traversal_source(session=None, graph_name=None, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT, @@ -201,18 +237,27 @@ def traversal_source(session=None, graph_name=None, execution_profile=EXEC_PROFI return traversal_source @staticmethod - def create_execution_profile(graph_name): + def create_execution_profile(graph_name, graph_protocol=DSE_GRAPH_QUERY_PROTOCOL, **kwargs): """ Creates an ExecutionProfile for GraphTraversal execution. You need to register that execution profile to the cluster by using `cluster.add_execution_profile`. :param graph_name: The graph name + :param graph_protocol: (Optional) The graph protocol, default is `DSE_GRAPH_QUERY_PROTOCOL`. """ - ep = GraphExecutionProfile(row_factory=graph_traversal_dse_object_row_factory, + if graph_protocol == GraphProtocol.GRAPHSON_2_0: + row_factory = dse_graphson2_traversal_row_factory + elif graph_protocol == GraphProtocol.GRAPHSON_3_0: + row_factory = dse_graphson3_traversal_row_factory + else: + raise ValueError('Unknown graph protocol: {}'.format(graph_protocol)) + + ep = GraphExecutionProfile(row_factory=row_factory, graph_options=GraphOptions(graph_name=graph_name, graph_language=DseGraph.DSE_GRAPH_QUERY_LANGUAGE, - graph_protocol=GraphProtocol.GRAPHSON_2_0)) + graph_protocol=graph_protocol), + **kwargs) return ep @staticmethod diff --git a/cassandra/datastax/graph/fluent/_predicates.py b/cassandra/datastax/graph/fluent/_predicates.py index b63dd90043..95bd533d5e 100644 --- a/cassandra/datastax/graph/fluent/_predicates.py +++ b/cassandra/datastax/graph/fluent/_predicates.py @@ -18,7 +18,7 @@ from cassandra.util import Distance -__all__ = ['GeoP', 'TextDistanceP', 'Search', 'GeoUnit', 'Geo'] +__all__ = ['GeoP', 'TextDistanceP', 'Search', 'GeoUnit', 'Geo', 'CqlCollection'] class GeoP(object): @@ -138,6 +138,41 @@ def phrase(value, proximity): return TextDistanceP.phrase(value, proximity) +class CqlCollection(object): + + @staticmethod + def contains(value): + """ + Search for a value inside a cql list/set column. + :param value: the value to look for. + """ + return P('contains', value) + + @staticmethod + def contains_value(value): + """ + Search for a map value. + :param value: the value to look for. + """ + return P('containsValue', value) + + @staticmethod + def contains_key(value): + """ + Search for a map key. + :param value: the value to look for. + """ + return P('containsKey', value) + + @staticmethod + def entry_eq(value): + """ + Search for a map entry. + :param value: the value to look for. + """ + return P('entryEq', value) + + class GeoUnit(object): _EARTH_MEAN_RADIUS_KM = 6371.0087714 _DEGREES_TO_RADIANS = math.pi / 180 diff --git a/cassandra/datastax/graph/fluent/_query.py b/cassandra/datastax/graph/fluent/_query.py index b5d24df05b..bd89046852 100644 --- a/cassandra/datastax/graph/fluent/_query.py +++ b/cassandra/datastax/graph/fluent/_query.py @@ -12,29 +12,104 @@ # See the License for the specific language governing permissions and # limitations under the License. +import six import logging -from cassandra.graph import SimpleGraphStatement +from cassandra.graph import SimpleGraphStatement, GraphProtocol from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT from gremlin_python.process.graph_traversal import GraphTraversal -from gremlin_python.structure.io.graphsonV2d0 import GraphSONWriter +from gremlin_python.structure.io.graphsonV2d0 import GraphSONWriter as GraphSONWriterV2 +from gremlin_python.structure.io.graphsonV3d0 import GraphSONWriter as GraphSONWriterV3 -from cassandra.datastax.graph.fluent.serializers import serializers +from cassandra.datastax.graph.fluent.serializers import GremlinUserTypeIO, \ + dse_graphson2_serializers, dse_graphson3_serializers log = logging.getLogger(__name__) -graphson_writer = GraphSONWriter(serializer_map=serializers) __all__ = ['TraversalBatch', '_query_from_traversal', '_DefaultTraversalBatch'] -def _query_from_traversal(traversal): +class _GremlinGraphSONWriterAdapter(object): + + def __init__(self, context, **kwargs): + super(_GremlinGraphSONWriterAdapter, self).__init__(**kwargs) + self.context = context + self.user_types = None + + def serialize(self, value, _): + return self.toDict(value) + + def get_serializer(self, value): + serializer = None + try: + serializer = self.serializers[type(value)] + except KeyError: + for key, ser in self.serializers.items(): + if isinstance(value, key): + serializer = ser + + if self.context: + # Check if UDT + if self.user_types is None: + try: + user_types = self.context['cluster']._user_types[self.context['graph_name']] + self.user_types = dict(map(reversed, six.iteritems(user_types))) + except KeyError: + self.user_types = {} + + # Custom detection to map a namedtuple to udt + if (tuple in self.serializers and serializer is self.serializers[tuple] and hasattr(value, '_fields') or + (not serializer and type(value) in self.user_types)): + serializer = GremlinUserTypeIO + + if serializer: + try: + # A serializer can have specialized serializers (e.g for Int32 and Int64, so value dependant) + serializer = serializer.get_specialized_serializer(value) + except AttributeError: + pass + + return serializer + + def toDict(self, obj): + serializer = self.get_serializer(obj) + return serializer.dictify(obj, self) if serializer else obj + + def definition(self, value): + serializer = self.get_serializer(value) + return serializer.definition(value, self) + + +class GremlinGraphSON2Writer(_GremlinGraphSONWriterAdapter, GraphSONWriterV2): + pass + + +class GremlinGraphSON3Writer(_GremlinGraphSONWriterAdapter, GraphSONWriterV3): + pass + + +graphson2_writer = GremlinGraphSON2Writer +graphson3_writer = GremlinGraphSON3Writer + + +def _query_from_traversal(traversal, graph_protocol, context=None): """ From a GraphTraversal, return a query string. :param traversal: The GraphTraversal object + :param graphson_protocol: The graph protocol to determine the output format. """ + if graph_protocol == GraphProtocol.GRAPHSON_2_0: + graphson_writer = graphson2_writer(context, serializer_map=dse_graphson2_serializers) + elif graph_protocol == GraphProtocol.GRAPHSON_3_0: + if context is None: + raise ValueError('Missing context for GraphSON3 serialization requires.') + graphson_writer = graphson3_writer(context, serializer_map=dse_graphson3_serializers) + else: + raise ValueError('Unknown graph protocol: {}'.format(graph_protocol)) + try: query = graphson_writer.writeObject(traversal) except Exception: @@ -87,9 +162,11 @@ def execute(self): """ raise NotImplementedError() - def as_graph_statement(self): + def as_graph_statement(self, graph_protocol=GraphProtocol.GRAPHSON_2_0): """ Return the traversal batch as GraphStatement. + + :param graph_protocol: The graph protocol for the GraphSONWriter. Default is GraphProtocol.GRAPHSON_2_0. """ raise NotImplementedError() @@ -115,32 +192,35 @@ def __init__(self, *args, **kwargs): super(_DefaultTraversalBatch, self).__init__(*args, **kwargs) self._traversals = [] - @property - def _query(self): - return u"[{0}]".format(','.join(self._traversals)) - def add(self, traversal): if not isinstance(traversal, GraphTraversal): raise ValueError('traversal should be a gremlin GraphTraversal') - query = _query_from_traversal(traversal) - self._traversals.append(query) - + self._traversals.append(traversal) return self def add_all(self, traversals): for traversal in traversals: self.add(traversal) - def as_graph_statement(self): - return SimpleGraphStatement(self._query) + def as_graph_statement(self, graph_protocol=GraphProtocol.GRAPHSON_2_0, context=None): + statements = [_query_from_traversal(t, graph_protocol, context) for t in self._traversals] + query = u"[{0}]".format(','.join(statements)) + return SimpleGraphStatement(query) def execute(self): if self._session is None: raise ValueError('A DSE Session must be provided to execute the traversal batch.') execution_profile = self._execution_profile if self._execution_profile else EXEC_PROFILE_GRAPH_DEFAULT - return self._session.execute_graph(self._query, execution_profile=execution_profile) + graph_options = self._session.get_execution_profile(execution_profile).graph_options + context = { + 'cluster': self._session.cluster, + 'graph_name': graph_options.graph_name + } + statement = self.as_graph_statement(graph_options.graph_protocol, context=context) \ + if graph_options.graph_protocol else self.as_graph_statement(context=context) + return self._session.execute_graph(statement, execution_profile=execution_profile) def clear(self): del self._traversals[:] diff --git a/cassandra/datastax/graph/fluent/_serializers.py b/cassandra/datastax/graph/fluent/_serializers.py index 56591603af..db8e715ef8 100644 --- a/cassandra/datastax/graph/fluent/_serializers.py +++ b/cassandra/datastax/graph/fluent/_serializers.py @@ -17,33 +17,89 @@ import six from gremlin_python.structure.io.graphsonV2d0 import ( - GraphSONReader, - GraphSONUtil, - VertexDeserializer, - VertexPropertyDeserializer, - PropertyDeserializer, - EdgeDeserializer, - PathDeserializer + GraphSONReader as GraphSONReaderV2, + GraphSONUtil as GraphSONUtil, # no difference between v2 and v3 + VertexDeserializer as VertexDeserializerV2, + VertexPropertyDeserializer as VertexPropertyDeserializerV2, + PropertyDeserializer as PropertyDeserializerV2, + EdgeDeserializer as EdgeDeserializerV2, + PathDeserializer as PathDeserializerV2 ) -from cassandra.datastax.graph.graphson import ( - GraphSON2Serializer, - GraphSON2Deserializer +from gremlin_python.structure.io.graphsonV3d0 import ( + GraphSONReader as GraphSONReaderV3, + VertexDeserializer as VertexDeserializerV3, + VertexPropertyDeserializer as VertexPropertyDeserializerV3, + PropertyDeserializer as PropertyDeserializerV3, + EdgeDeserializer as EdgeDeserializerV3, + PathDeserializer as PathDeserializerV3 ) +try: + from gremlin_python.structure.io.graphsonV2d0 import ( + TraversalMetricsDeserializer as TraversalMetricsDeserializerV2, + MetricsDeserializer as MetricsDeserializerV2 + ) + from gremlin_python.structure.io.graphsonV3d0 import ( + TraversalMetricsDeserializer as TraversalMetricsDeserializerV3, + MetricsDeserializer as MetricsDeserializerV3 + ) +except ImportError: + TraversalMetricsDeserializerV2 = MetricsDeserializerV2 = None + TraversalMetricsDeserializerV3 = MetricsDeserializerV3 = None + +from cassandra.graph import ( + GraphSON2Serializer, + GraphSON2Deserializer, + GraphSON3Serializer, + GraphSON3Deserializer +) +from cassandra.graph.graphson import UserTypeIO, TypeWrapperTypeIO from cassandra.datastax.graph.fluent.predicates import GeoP, TextDistanceP from cassandra.util import Distance __all__ = ['GremlinGraphSONReader', 'GeoPSerializer', 'TextDistancePSerializer', - 'DistanceIO', 'gremlin_deserializers', 'deserializers', 'serializers'] + 'DistanceIO', 'gremlin_deserializers', 'deserializers', 'serializers', + 'GremlinGraphSONReaderV2', 'GremlinGraphSONReaderV3', 'dse_graphson2_serializers', + 'dse_graphson2_deserializers', 'dse_graphson3_serializers', 'dse_graphson3_deserializers', + 'gremlin_graphson2_deserializers', 'gremlin_graphson3_deserializers', 'GremlinUserTypeIO'] class _GremlinGraphSONTypeSerializer(object): + TYPE_KEY = "@type" + VALUE_KEY = "@value" + serializer = None - @classmethod - def dictify(cls, v, _): - return GraphSON2Serializer.serialize(v) + def __init__(self, serializer): + self.serializer = serializer + + def dictify(self, v, writer): + value = self.serializer.serialize(v, writer) + if self.serializer is TypeWrapperTypeIO: + graphson_base_type = v.type_io.graphson_base_type + graphson_type = v.type_io.graphson_type + else: + graphson_base_type = self.serializer.graphson_base_type + graphson_type = self.serializer.graphson_type + + if graphson_base_type is None: + out = value + else: + out = {self.TYPE_KEY: graphson_type} + if value is not None: + out[self.VALUE_KEY] = value + + return out + + def definition(self, value, writer=None): + return self.serializer.definition(value, writer) + + def get_specialized_serializer(self, value): + ser = self.serializer.get_specialized_serializer(value) + if ser is not self.serializer: + return _GremlinGraphSONTypeSerializer(ser) + return self class _GremlinGraphSONTypeDeserializer(object): @@ -54,22 +110,44 @@ def __init__(self, deserializer): self.deserializer = deserializer def objectify(self, v, reader): - return self.deserializer.deserialize(v, reader=reader) + return self.deserializer.deserialize(v, reader) -def _make_gremlin_deserializer(graphson_type): +def _make_gremlin_graphson2_deserializer(graphson_type): return _GremlinGraphSONTypeDeserializer( GraphSON2Deserializer.get_deserializer(graphson_type.graphson_type) ) -class GremlinGraphSONReader(GraphSONReader): +def _make_gremlin_graphson3_deserializer(graphson_type): + return _GremlinGraphSONTypeDeserializer( + GraphSON3Deserializer.get_deserializer(graphson_type.graphson_type) + ) + + +class _GremlinGraphSONReader(object): """Gremlin GraphSONReader Adapter, required to use gremlin types""" + context = None + + def __init__(self, context, deserializer_map=None): + self.context = context + super(_GremlinGraphSONReader, self).__init__(deserializer_map) + def deserialize(self, obj): return self.toObject(obj) +class GremlinGraphSONReaderV2(_GremlinGraphSONReader, GraphSONReaderV2): + pass + +# TODO remove next major +GremlinGraphSONReader = GremlinGraphSONReaderV2 + +class GremlinGraphSONReaderV3(_GremlinGraphSONReader, GraphSONReaderV3): + pass + + class GeoPSerializer(object): @classmethod def dictify(cls, p, writer): @@ -100,32 +178,85 @@ def dictify(cls, v, _): return GraphSONUtil.typedValue('Distance', six.text_type(v), prefix='dse') -serializers = OrderedDict([ - (t, _GremlinGraphSONTypeSerializer) - for t in six.iterkeys(GraphSON2Serializer.get_type_definitions()) +GremlinUserTypeIO = _GremlinGraphSONTypeSerializer(UserTypeIO) + +# GraphSON2 +dse_graphson2_serializers = OrderedDict([ + (t, _GremlinGraphSONTypeSerializer(s)) + for t, s in six.iteritems(GraphSON2Serializer.get_type_definitions()) ]) -# Predicates -serializers.update(OrderedDict([ +dse_graphson2_serializers.update(OrderedDict([ (Distance, DistanceIO), (GeoP, GeoPSerializer), (TextDistanceP, TextDistancePSerializer) ])) -deserializers = { - k: _make_gremlin_deserializer(v) +# TODO remove next major, this is just in case someone was using it +serializers = dse_graphson2_serializers + +dse_graphson2_deserializers = { + k: _make_gremlin_graphson2_deserializer(v) for k, v in six.iteritems(GraphSON2Deserializer.get_type_definitions()) } -deserializers.update({ +dse_graphson2_deserializers.update({ "dse:Distance": DistanceIO, }) -gremlin_deserializers = deserializers.copy() -gremlin_deserializers.update({ - 'g:Vertex': VertexDeserializer, - 'g:VertexProperty': VertexPropertyDeserializer, - 'g:Edge': EdgeDeserializer, - 'g:Property': PropertyDeserializer, - 'g:Path': PathDeserializer +# TODO remove next major, this is just in case someone was using it +deserializers = dse_graphson2_deserializers + +gremlin_graphson2_deserializers = dse_graphson2_deserializers.copy() +gremlin_graphson2_deserializers.update({ + 'g:Vertex': VertexDeserializerV2, + 'g:VertexProperty': VertexPropertyDeserializerV2, + 'g:Edge': EdgeDeserializerV2, + 'g:Property': PropertyDeserializerV2, + 'g:Path': PathDeserializerV2 }) + +if TraversalMetricsDeserializerV2: + gremlin_graphson2_deserializers.update({ + 'g:TraversalMetrics': TraversalMetricsDeserializerV2, + 'g:lMetrics': MetricsDeserializerV2 + }) + +# TODO remove next major, this is just in case someone was using it +gremlin_deserializers = gremlin_graphson2_deserializers + +# GraphSON3 +dse_graphson3_serializers = OrderedDict([ + (t, _GremlinGraphSONTypeSerializer(s)) + for t, s in six.iteritems(GraphSON3Serializer.get_type_definitions()) +]) + +dse_graphson3_serializers.update(OrderedDict([ + (Distance, DistanceIO), + (GeoP, GeoPSerializer), + (TextDistanceP, TextDistancePSerializer) +])) + +dse_graphson3_deserializers = { + k: _make_gremlin_graphson3_deserializer(v) + for k, v in six.iteritems(GraphSON3Deserializer.get_type_definitions()) +} + +dse_graphson3_deserializers.update({ + "dse:Distance": DistanceIO +}) + +gremlin_graphson3_deserializers = dse_graphson3_deserializers.copy() +gremlin_graphson3_deserializers.update({ + 'g:Vertex': VertexDeserializerV3, + 'g:VertexProperty': VertexPropertyDeserializerV3, + 'g:Edge': EdgeDeserializerV3, + 'g:Property': PropertyDeserializerV3, + 'g:Path': PathDeserializerV3 +}) + +if TraversalMetricsDeserializerV3: + gremlin_graphson3_deserializers.update({ + 'g:TraversalMetrics': TraversalMetricsDeserializerV3, + 'g:Metrics': MetricsDeserializerV3 + }) diff --git a/cassandra/datastax/graph/graphson.py b/cassandra/datastax/graph/graphson.py index 620adf045e..8419c7992b 100644 --- a/cassandra/datastax/graph/graphson.py +++ b/cassandra/datastax/graph/graphson.py @@ -19,52 +19,73 @@ import json from decimal import Decimal from collections import OrderedDict +import logging +import itertools +from functools import partial import six -if six.PY3: +try: import ipaddress +except: + ipaddress = None -from cassandra.util import Polygon, Point, LineString + +from cassandra.cqltypes import cql_types_from_string +from cassandra.metadata import UserType +from cassandra.util import Polygon, Point, LineString, Duration from cassandra.datastax.graph.types import Vertex, VertexProperty, Edge, Path __all__ = ['GraphSON1Serializer', 'GraphSON1Deserializer', 'GraphSON1TypeDeserializer', - 'GraphSON2Serializer', 'GraphSON2Deserializer', - 'GraphSON2Reader', 'BooleanTypeIO', 'Int16TypeIO', 'Int32TypeIO', 'DoubleTypeIO', + 'GraphSON2Serializer', 'GraphSON2Deserializer', 'GraphSON2Reader', + 'GraphSON3Serializer', 'GraphSON3Deserializer', 'GraphSON3Reader', + 'to_bigint', 'to_int', 'to_double', 'to_float', 'to_smallint', + 'BooleanTypeIO', 'Int16TypeIO', 'Int32TypeIO', 'DoubleTypeIO', 'FloatTypeIO', 'UUIDTypeIO', 'BigDecimalTypeIO', 'DurationTypeIO', 'InetTypeIO', 'InstantTypeIO', 'LocalDateTypeIO', 'LocalTimeTypeIO', 'Int64TypeIO', 'BigIntegerTypeIO', - 'LocalDateTypeIO', 'PolygonTypeIO', 'PointTypeIO', 'LineStringTypeIO', 'BlobTypeIO'] + 'LocalDateTypeIO', 'PolygonTypeIO', 'PointTypeIO', 'LineStringTypeIO', 'BlobTypeIO', + 'GraphSON3Serializer', 'GraphSON3Deserializer', 'UserTypeIO', 'TypeWrapperTypeIO'] """ Supported types: -DSE Graph GraphSON 2.0 Python Driver ------------- | -------------- | ------------ -text | ------ | str -boolean | g:Boolean | bool -bigint | g:Int64 | long -int | g:Int32 | int -double | g:Double | float -float | g:Float | float -uuid | g:UUID | UUID -bigdecimal | gx:BigDecimal | Decimal -duration | gx:Duration | timedelta -inet | gx:InetAddress | str (unicode), IPV4Address/IPV6Address (PY3) -timestamp | gx:Instant | datetime.datetime -date | gx:LocalDate | datetime.date -time | gx:LocalTime | datetime.time -smallint | gx:Int16 | int -varint | gx:BigInteger | long -date | gx:LocalDate | Date -polygon | dse:Polygon | Polygon -point | dse:Point | Point -linestring | dse:LineString | LineString -blob | dse:Blob | bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +DSE Graph GraphSON 2.0 GraphSON 3.0 | Python Driver +------------ | -------------- | -------------- | ------------ +text | string | string | str +boolean | g:Boolean | g:Boolean | bool +bigint | g:Int64 | g:Int64 | long +int | g:Int32 | g:Int32 | int +double | g:Double | g:Double | float +float | g:Float | g:Float | float +uuid | g:UUID | g:UUID | UUID +bigdecimal | gx:BigDecimal | gx:BigDecimal | Decimal +duration | gx:Duration | N/A | timedelta (Classic graph only) +DSE Duration | N/A | dse:Duration | Duration (Core graph only) +inet | gx:InetAddress | gx:InetAddress | str (unicode), IPV4Address/IPV6Address (PY3) +timestamp | gx:Instant | gx:Instant | datetime.datetime +date | gx:LocalDate | gx:LocalDate | datetime.date +time | gx:LocalTime | gx:LocalTime | datetime.time +smallint | gx:Int16 | gx:Int16 | int +varint | gx:BigInteger | gx:BigInteger | long +date | gx:LocalDate | gx:LocalDate | Date +polygon | dse:Polygon | dse:Polygon | Polygon +point | dse:Point | dse:Point | Point +linestring | dse:Linestring | dse:LineString | LineString +blob | dse:Blob | dse:Blob | bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +blob | gx:ByteBuffer | gx:ByteBuffer | bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +list | N/A | g:List | list (Core graph only) +map | N/A | g:Map | dict (Core graph only) +set | N/A | g:Set | set or list (Core graph only) + Can return a list due to numerical values returned by Java +tuple | N/A | dse:Tuple | tuple (Core graph only) +udt | N/A | dse:UDT | class or namedtuple (Core graph only) """ MAX_INT32 = 2 ** 32 - 1 MIN_INT32 = -2 ** 31 +log = logging.getLogger(__name__) + class _GraphSONTypeType(type): """GraphSONType metaclass, required to create a class property.""" @@ -80,9 +101,14 @@ class GraphSONTypeIO(object): prefix = 'g' graphson_base_type = None + cql_type = None + + @classmethod + def definition(cls, value, writer=None): + return {'cqlType': cls.cql_type} @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): return six.text_type(value) @classmethod @@ -94,18 +120,23 @@ def get_specialized_serializer(cls, value): return cls +class TextTypeIO(GraphSONTypeIO): + cql_type = 'text' + + class BooleanTypeIO(GraphSONTypeIO): graphson_base_type = 'Boolean' + cql_type = 'boolean' @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): return bool(value) class IntegerTypeIO(GraphSONTypeIO): @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): return value @classmethod @@ -119,14 +150,17 @@ def get_specialized_serializer(cls, value): class Int16TypeIO(IntegerTypeIO): prefix = 'gx' graphson_base_type = 'Int16' + cql_type = 'smallint' class Int32TypeIO(IntegerTypeIO): graphson_base_type = 'Int32' + cql_type = 'int' class Int64TypeIO(IntegerTypeIO): graphson_base_type = 'Int64' + cql_type = 'bigint' @classmethod def deserialize(cls, value, reader=None): @@ -137,6 +171,11 @@ def deserialize(cls, value, reader=None): class FloatTypeIO(GraphSONTypeIO): graphson_base_type = 'Float' + cql_type = 'float' + + @classmethod + def serialize(cls, value, writer=None): + return value @classmethod def deserialize(cls, value, reader=None): @@ -145,6 +184,7 @@ def deserialize(cls, value, reader=None): class DoubleTypeIO(FloatTypeIO): graphson_base_type = 'Double' + cql_type = 'double' class BigIntegerTypeIO(IntegerTypeIO): @@ -157,9 +197,10 @@ class LocalDateTypeIO(GraphSONTypeIO): prefix = 'gx' graphson_base_type = 'LocalDate' + cql_type = 'date' @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): return value.isoformat() @classmethod @@ -170,20 +211,14 @@ def deserialize(cls, value, reader=None): # negative date return value - @classmethod - def get_specialized_serializer(cls, value): - if isinstance(value, datetime.datetime): - return InstantTypeIO - - return cls - class InstantTypeIO(GraphSONTypeIO): prefix = 'gx' graphson_base_type = 'Instant' + cql_type = 'timestamp' @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): if isinstance(value, datetime.datetime): value = datetime.datetime(*value.utctimetuple()[:6]).replace(microsecond=value.microsecond) else: @@ -209,9 +244,10 @@ class LocalTimeTypeIO(GraphSONTypeIO): prefix = 'gx' graphson_base_type = 'LocalTime' + cql_type = 'time' @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): return value.strftime(cls.FORMATS[2]) @classmethod @@ -233,9 +269,10 @@ def deserialize(cls, value, reader=None): class BlobTypeIO(GraphSONTypeIO): prefix = 'dse' graphson_base_type = 'Blob' + cql_type = 'blob' @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): value = base64.b64encode(value) if six.PY3: value = value.decode('utf-8') @@ -246,8 +283,14 @@ def deserialize(cls, value, reader=None): return bytearray(base64.b64decode(value)) +class ByteBufferTypeIO(BlobTypeIO): + prefix = 'gx' + graphson_base_type = 'ByteBuffer' + + class UUIDTypeIO(GraphSONTypeIO): graphson_base_type = 'UUID' + cql_type = 'uuid' @classmethod def deserialize(cls, value, reader=None): @@ -257,6 +300,7 @@ def deserialize(cls, value, reader=None): class BigDecimalTypeIO(GraphSONTypeIO): prefix = 'gx' graphson_base_type = 'BigDecimal' + cql_type = 'bigdecimal' @classmethod def deserialize(cls, value, reader=None): @@ -266,6 +310,7 @@ def deserialize(cls, value, reader=None): class DurationTypeIO(GraphSONTypeIO): prefix = 'gx' graphson_base_type = 'Duration' + cql_type = 'duration' _duration_regex = re.compile(r""" ^P((?P\d+)D)? @@ -280,7 +325,7 @@ class DurationTypeIO(GraphSONTypeIO): _seconds_in_day = 24 * _seconds_in_hour @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): total_seconds = int(value.total_seconds()) days, total_seconds = divmod(total_seconds, cls._seconds_in_day) hours, total_seconds = divmod(total_seconds, cls._seconds_in_hour) @@ -303,9 +348,47 @@ def deserialize(cls, value, reader=None): minutes=duration['minutes'], seconds=duration['seconds']) +class DseDurationTypeIO(GraphSONTypeIO): + prefix = 'dse' + graphson_base_type = 'Duration' + cql_type = 'duration' + + @classmethod + def serialize(cls, value, writer=None): + return { + 'months': value.months, + 'days': value.days, + 'nanos': value.nanoseconds + } + + @classmethod + def deserialize(cls, value, reader=None): + return Duration( + reader.deserialize(value['months']), + reader.deserialize(value['days']), + reader.deserialize(value['nanos']) + ) + + +class TypeWrapperTypeIO(GraphSONTypeIO): + + @classmethod + def definition(cls, value, writer=None): + return {'cqlType': value.type_io.cql_type} + + @classmethod + def serialize(cls, value, writer=None): + return value.type_io.serialize(value.value) + + @classmethod + def deserialize(cls, value, reader=None): + return value.type_io.deserialize(value.value) + + class PointTypeIO(GraphSONTypeIO): prefix = 'dse' graphson_base_type = 'Point' + cql_type = "org.apache.cassandra.db.marshal.PointType" @classmethod def deserialize(cls, value, reader=None): @@ -315,6 +398,7 @@ def deserialize(cls, value, reader=None): class LineStringTypeIO(GraphSONTypeIO): prefix = 'dse' graphson_base_type = 'LineString' + cql_type = "org.apache.cassandra.db.marshal.LineStringType" @classmethod def deserialize(cls, value, reader=None): @@ -324,6 +408,7 @@ def deserialize(cls, value, reader=None): class PolygonTypeIO(GraphSONTypeIO): prefix = 'dse' graphson_base_type = 'Polygon' + cql_type = "org.apache.cassandra.db.marshal.PolygonType" @classmethod def deserialize(cls, value, reader=None): @@ -333,6 +418,7 @@ def deserialize(cls, value, reader=None): class InetTypeIO(GraphSONTypeIO): prefix = 'gx' graphson_base_type = 'InetAddress' + cql_type = 'inet' class VertexTypeIO(GraphSONTypeIO): @@ -397,13 +483,268 @@ class PathTypeIO(GraphSONTypeIO): @classmethod def deserialize(cls, value, reader=None): - labels = [set(label) for label in value['labels']] - objects = [reader.deserialize(obj) for obj in value['objects']] + labels = [set(label) for label in reader.deserialize(value['labels'])] + objects = [obj for obj in reader.deserialize(value['objects'])] p = Path(labels, []) p.objects = objects # avoid the object processing in Path.__init__ return p +class TraversalMetricsTypeIO(GraphSONTypeIO): + graphson_base_type = 'TraversalMetrics' + + @classmethod + def deserialize(cls, value, reader=None): + return reader.deserialize(value) + + +class MetricsTypeIO(GraphSONTypeIO): + graphson_base_type = 'Metrics' + + @classmethod + def deserialize(cls, value, reader=None): + return reader.deserialize(value) + + +class JsonMapTypeIO(GraphSONTypeIO): + """In GraphSON2, dict are simply serialized as json map""" + + @classmethod + def serialize(cls, value, writer=None): + out = {} + for k, v in six.iteritems(value): + out[k] = writer.serialize(v, writer) + + return out + + +class MapTypeIO(GraphSONTypeIO): + """In GraphSON3, dict has its own type""" + + graphson_base_type = 'Map' + cql_type = 'map' + + @classmethod + def definition(cls, value, writer=None): + out = OrderedDict([('cqlType', cls.cql_type)]) + out['definition'] = [] + for k, v in six.iteritems(value): + # we just need the first pair to write the def + out['definition'].append(writer.definition(k)) + out['definition'].append(writer.definition(v)) + break + return out + + @classmethod + def serialize(cls, value, writer=None): + out = [] + for k, v in six.iteritems(value): + out.append(writer.serialize(k, writer)) + out.append(writer.serialize(v, writer)) + + return out + + @classmethod + def deserialize(cls, value, reader=None): + out = {} + a, b = itertools.tee(value) + for key, val in zip( + itertools.islice(a, 0, None, 2), + itertools.islice(b, 1, None, 2) + ): + out[reader.deserialize(key)] = reader.deserialize(val) + return out + + +class ListTypeIO(GraphSONTypeIO): + """In GraphSON3, list has its own type""" + + graphson_base_type = 'List' + cql_type = 'list' + + @classmethod + def definition(cls, value, writer=None): + out = OrderedDict([('cqlType', cls.cql_type)]) + out['definition'] = [] + if value: + out['definition'].append(writer.definition(value[0])) + return out + + @classmethod + def serialize(cls, value, writer=None): + return [writer.serialize(v, writer) for v in value] + + @classmethod + def deserialize(cls, value, reader=None): + return [reader.deserialize(obj) for obj in value] + + +class SetTypeIO(GraphSONTypeIO): + """In GraphSON3, set has its own type""" + + graphson_base_type = 'Set' + cql_type = 'set' + + @classmethod + def definition(cls, value, writer=None): + out = OrderedDict([('cqlType', cls.cql_type)]) + out['definition'] = [] + for v in value: + # we only take into account the first value for the definition + out['definition'].append(writer.definition(v)) + break + return out + + @classmethod + def serialize(cls, value, writer=None): + return [writer.serialize(v, writer) for v in value] + + @classmethod + def deserialize(cls, value, reader=None): + lst = [reader.deserialize(obj) for obj in value] + + s = set(lst) + if len(s) != len(lst): + log.warning("Coercing g:Set to list due to numerical values returned by Java. " + "See TINKERPOP-1844 for details.") + return lst + + return s + + +class BulkSetTypeIO(GraphSONTypeIO): + graphson_base_type = "BulkSet" + + @classmethod + def deserialize(cls, value, reader=None): + out = [] + + a, b = itertools.tee(value) + for val, bulk in zip( + itertools.islice(a, 0, None, 2), + itertools.islice(b, 1, None, 2) + ): + val = reader.deserialize(val) + bulk = reader.deserialize(bulk) + for n in range(bulk): + out.append(val) + + return out + + +class TupleTypeIO(GraphSONTypeIO): + prefix = 'dse' + graphson_base_type = 'Tuple' + cql_type = 'tuple' + + @classmethod + def definition(cls, value, writer=None): + out = OrderedDict() + out['cqlType'] = cls.cql_type + serializers = [writer.get_serializer(s) for s in value] + out['definition'] = [s.definition(v, writer) for v, s in zip(value, serializers)] + return out + + @classmethod + def serialize(cls, value, writer=None): + out = cls.definition(value, writer) + out['value'] = [writer.serialize(v, writer) for v in value] + return out + + @classmethod + def deserialize(cls, value, reader=None): + return tuple(reader.deserialize(obj) for obj in value['value']) + + +class UserTypeIO(GraphSONTypeIO): + prefix = 'dse' + graphson_base_type = 'UDT' + cql_type = 'udt' + + FROZEN_REMOVAL_REGEX = re.compile(r'frozen<"*([^"]+)"*>') + + @classmethod + def cql_types_from_string(cls, typ): + # sanitizing: remove frozen references and double quotes... + return cql_types_from_string( + re.sub(cls.FROZEN_REMOVAL_REGEX, r'\1', typ) + ) + + @classmethod + def get_udt_definition(cls, value, writer): + user_type_name = writer.user_types[type(value)] + keyspace = writer.context['graph_name'] + return writer.context['cluster'].metadata.keyspaces[keyspace].user_types[user_type_name] + + @classmethod + def is_collection(cls, typ): + return typ in ['list', 'tuple', 'map', 'set'] + + @classmethod + def is_udt(cls, typ, writer): + keyspace = writer.context['graph_name'] + if keyspace in writer.context['cluster'].metadata.keyspaces: + return typ in writer.context['cluster'].metadata.keyspaces[keyspace].user_types + return False + + @classmethod + def field_definition(cls, types, writer, name=None): + """ + Build the udt field definition. This is required when we have a complex udt type. + """ + index = -1 + out = [OrderedDict() if name is None else OrderedDict([('fieldName', name)])] + + while types: + index += 1 + typ = types.pop(0) + if index > 0: + out.append(OrderedDict()) + + if cls.is_udt(typ, writer): + keyspace = writer.context['graph_name'] + udt = writer.context['cluster'].metadata.keyspaces[keyspace].user_types[typ] + out[index].update(cls.definition(udt, writer)) + elif cls.is_collection(typ): + out[index]['cqlType'] = typ + definition = cls.field_definition(types, writer) + out[index]['definition'] = definition if isinstance(definition, list) else [definition] + else: + out[index]['cqlType'] = typ + + return out if len(out) > 1 else out[0] + + @classmethod + def definition(cls, value, writer=None): + udt = value if isinstance(value, UserType) else cls.get_udt_definition(value, writer) + return OrderedDict([ + ('cqlType', cls.cql_type), + ('keyspace', udt.keyspace), + ('name', udt.name), + ('definition', [ + cls.field_definition(cls.cql_types_from_string(typ), writer, name=name) + for name, typ in zip(udt.field_names, udt.field_types)]) + ]) + + @classmethod + def serialize(cls, value, writer=None): + udt = cls.get_udt_definition(value, writer) + out = cls.definition(value, writer) + out['value'] = [] + for name, typ in zip(udt.field_names, udt.field_types): + out['value'].append(writer.serialize(getattr(value, name), writer)) + return out + + @classmethod + def deserialize(cls, value, reader=None): + udt_class = reader.context['cluster']._user_types[value['keyspace']][value['name']] + kwargs = zip( + list(map(lambda v: v['fieldName'], value['definition'])), + [reader.deserialize(v) for v in value['value']] + ) + return udt_class(**dict(kwargs)) + + class _BaseGraphSONSerializer(object): _serializers = OrderedDict() @@ -448,15 +789,19 @@ def get_serializer(cls, value): return serializer @classmethod - def serialize(cls, value): + def serialize(cls, value, writer=None): """ - Serialize a python object to graphson. + Serialize a python object to GraphSON. + + e.g 'P42DT10H5M37S' + e.g. {'key': value} :param value: The python object to serialize. + :param writer: A graphson serializer for recursive types (Optional) """ serializer = cls.get_serializer(value) if serializer: - return serializer.serialize(value) + return serializer.serialize(value, writer or cls) return value @@ -470,27 +815,34 @@ class GraphSON1Serializer(_BaseGraphSONSerializer): # We want that iteration order to be consistent, so we use an OrderedDict, # not a dict. _serializers = OrderedDict([ + (str, TextTypeIO), (bool, BooleanTypeIO), - (bytearray, BlobTypeIO), + (bytearray, ByteBufferTypeIO), (Decimal, BigDecimalTypeIO), (datetime.date, LocalDateTypeIO), (datetime.time, LocalTimeTypeIO), (datetime.timedelta, DurationTypeIO), + (datetime.datetime, InstantTypeIO), (uuid.UUID, UUIDTypeIO), (Polygon, PolygonTypeIO), (Point, PointTypeIO), - (LineString, LineStringTypeIO) + (LineString, LineStringTypeIO), + (dict, JsonMapTypeIO), + (float, FloatTypeIO) ]) -if six.PY2: - GraphSON1Serializer.register(buffer, BlobTypeIO) -else: - GraphSON1Serializer.register(memoryview, BlobTypeIO) - GraphSON1Serializer.register(bytes, BlobTypeIO) +if ipaddress: GraphSON1Serializer.register(ipaddress.IPv4Address, InetTypeIO) GraphSON1Serializer.register(ipaddress.IPv6Address, InetTypeIO) +if six.PY2: + GraphSON1Serializer.register(buffer, ByteBufferTypeIO) + GraphSON1Serializer.register(unicode, TextTypeIO) +else: + GraphSON1Serializer.register(memoryview, ByteBufferTypeIO) + GraphSON1Serializer.register(bytes, ByteBufferTypeIO) + class _BaseGraphSONDeserializer(object): @@ -526,7 +878,7 @@ class GraphSON1Deserializer(_BaseGraphSONDeserializer): """ Deserialize graphson1 types to python objects. """ - _TYPES = [UUIDTypeIO, BigDecimalTypeIO, InstantTypeIO, BlobTypeIO, + _TYPES = [UUIDTypeIO, BigDecimalTypeIO, InstantTypeIO, BlobTypeIO, ByteBufferTypeIO, PointTypeIO, LineStringTypeIO, PolygonTypeIO, LocalDateTypeIO, LocalTimeTypeIO, DurationTypeIO, InetTypeIO] @@ -581,7 +933,7 @@ def deserialize_decimal(cls, value): @classmethod def deserialize_blob(cls, value): - return cls._deserializers[BlobTypeIO.graphson_type].deserialize(value) + return cls._deserializers[ByteBufferTypeIO.graphson_type].deserialize(value) @classmethod def deserialize_point(cls, value): @@ -604,7 +956,7 @@ def deserialize_boolean(cls, value): return value -# Remove in the next major +# TODO Remove in the next major GraphSON1TypeDeserializer = GraphSON1Deserializer GraphSON1TypeSerializer = GraphSON1Serializer @@ -615,8 +967,7 @@ class GraphSON2Serializer(_BaseGraphSONSerializer): _serializers = GraphSON1Serializer.get_type_definitions() - @classmethod - def serialize(cls, value): + def serialize(self, value, writer=None): """ Serialize a type to GraphSON2. @@ -624,15 +975,24 @@ def serialize(cls, value): :param value: The python object to serialize. """ - serializer = cls.get_serializer(value) + serializer = self.get_serializer(value) if not serializer: - # if no serializer found, we can't type it. `value` will be jsonized as string. - return value + raise ValueError("Unable to find a serializer for value of type: ".format(type(value))) + + val = serializer.serialize(value, writer or self) + if serializer is TypeWrapperTypeIO: + graphson_base_type = value.type_io.graphson_base_type + graphson_type = value.type_io.graphson_type + else: + graphson_base_type = serializer.graphson_base_type + graphson_type = serializer.graphson_type - value = serializer.serialize(value) - out = {cls.TYPE_KEY: serializer.graphson_type} - if value is not None: - out[cls.VALUE_KEY] = value + if graphson_base_type is None: + out = val + else: + out = {self.TYPE_KEY: graphson_type} + if val is not None: + out[self.VALUE_KEY] = val return out @@ -647,7 +1007,7 @@ class GraphSON2Deserializer(_BaseGraphSONDeserializer): _TYPES = GraphSON1Deserializer._TYPES + [ Int16TypeIO, Int32TypeIO, Int64TypeIO, DoubleTypeIO, FloatTypeIO, BigIntegerTypeIO, VertexTypeIO, VertexPropertyTypeIO, EdgeTypeIO, - PathTypeIO, PropertyTypeIO] + PathTypeIO, PropertyTypeIO, TraversalMetricsTypeIO, MetricsTypeIO] _deserializers = { t.graphson_type: t @@ -660,10 +1020,11 @@ class GraphSON2Reader(object): GraphSON2 Reader that parse json and deserialize to python objects. """ - def __init__(self, extra_deserializer_map=None): + def __init__(self, context, extra_deserializer_map=None): """ :param extra_deserializer_map: map from GraphSON type tag to deserializer instance implementing `deserialize` """ + self.context = context self.deserializers = GraphSON2Deserializer.get_type_definitions() if extra_deserializer_map: self.deserializers.update(extra_deserializer_map) @@ -690,3 +1051,91 @@ def deserialize(self, obj): return [self.deserialize(o) for o in obj] else: return obj + + +class TypeIOWrapper(object): + """Used to force a graphson type during serialization""" + + type_io = None + value = None + + def __init__(self, type_io, value): + self.type_io = type_io + self.value = value + + +def _wrap_value(type_io, value): + return TypeIOWrapper(type_io, value) + + +to_bigint = partial(_wrap_value, Int64TypeIO) +to_int = partial(_wrap_value, Int32TypeIO) +to_smallint = partial(_wrap_value, Int16TypeIO) +to_double = partial(_wrap_value, DoubleTypeIO) +to_float = partial(_wrap_value, FloatTypeIO) + + +class GraphSON3Serializer(GraphSON2Serializer): + + _serializers = GraphSON2Serializer.get_type_definitions() + + context = None + """A dict of the serialization context""" + + def __init__(self, context): + self.context = context + self.user_types = None + + def definition(self, value): + serializer = self.get_serializer(value) + return serializer.definition(value, self) + + def get_serializer(self, value): + """Custom get_serializer to support UDT/Tuple""" + + serializer = super(GraphSON3Serializer, self).get_serializer(value) + is_namedtuple_udt = serializer is TupleTypeIO and hasattr(value, '_fields') + if not serializer or is_namedtuple_udt: + # Check if UDT + if self.user_types is None: + try: + user_types = self.context['cluster']._user_types[self.context['graph_name']] + self.user_types = dict(map(reversed, six.iteritems(user_types))) + except KeyError: + self.user_types = {} + + serializer = UserTypeIO if (is_namedtuple_udt or (type(value) in self.user_types)) else serializer + + return serializer + + +GraphSON3Serializer.register(dict, MapTypeIO) +GraphSON3Serializer.register(list, ListTypeIO) +GraphSON3Serializer.register(set, SetTypeIO) +GraphSON3Serializer.register(tuple, TupleTypeIO) +GraphSON3Serializer.register(Duration, DseDurationTypeIO) +GraphSON3Serializer.register(TypeIOWrapper, TypeWrapperTypeIO) + + +class GraphSON3Deserializer(GraphSON2Deserializer): + _TYPES = GraphSON2Deserializer._TYPES + [MapTypeIO, ListTypeIO, + SetTypeIO, TupleTypeIO, + UserTypeIO, DseDurationTypeIO, BulkSetTypeIO] + + _deserializers = {t.graphson_type: t for t in _TYPES} + + +class GraphSON3Reader(GraphSON2Reader): + """ + GraphSON3 Reader that parse json and deserialize to python objects. + """ + + def __init__(self, context, extra_deserializer_map=None): + """ + :param context: A dict of the context, mostly used as context for udt deserialization. + :param extra_deserializer_map: map from GraphSON type tag to deserializer instance implementing `deserialize` + """ + self.context = context + self.deserializers = GraphSON3Deserializer.get_type_definitions() + if extra_deserializer_map: + self.deserializers.update(extra_deserializer_map) diff --git a/cassandra/datastax/graph/query.py b/cassandra/datastax/graph/query.py index 50a03b5561..7c0e265dbf 100644 --- a/cassandra/datastax/graph/query.py +++ b/cassandra/datastax/graph/query.py @@ -19,14 +19,14 @@ from cassandra import ConsistencyLevel from cassandra.query import Statement, SimpleStatement -from cassandra.datastax.graph.types import Vertex, Edge, Path -from cassandra.datastax.graph.graphson import GraphSON2Reader +from cassandra.datastax.graph.types import Vertex, Edge, Path, VertexProperty +from cassandra.datastax.graph.graphson import GraphSON2Reader, GraphSON3Reader __all__ = [ 'GraphProtocol', 'GraphOptions', 'GraphStatement', 'SimpleGraphStatement', 'single_object_row_factory', 'graph_result_row_factory', 'graph_object_row_factory', - 'graph_graphson2_row_factory', 'Result' + 'graph_graphson2_row_factory', 'Result', 'graph_graphson3_row_factory' ] # (attr, description, server option) @@ -45,21 +45,24 @@ # this is defined by the execution profile attribute, not in graph options _request_timeout_key = 'request-timeout' -_graphson2_reader = GraphSON2Reader() - class GraphProtocol(object): - GRAPHSON_1_0 = 'graphson-1.0' + GRAPHSON_1_0 = b'graphson-1.0' """ GraphSON1 """ - GRAPHSON_2_0 = 'graphson-2.0' + GRAPHSON_2_0 = b'graphson-2.0' """ GraphSON2 """ + GRAPHSON_3_0 = b'graphson-3.0' + """ + GraphSON3 + """ + class GraphOptions(object): """ @@ -67,11 +70,13 @@ class GraphOptions(object): """ # See _graph_options map above for notes on valid options + DEFAULT_GRAPH_PROTOCOL = GraphProtocol.GRAPHSON_1_0 + DEFAULT_GRAPH_LANGUAGE = b'gremlin-groovy' + def __init__(self, **kwargs): self._graph_options = {} kwargs.setdefault('graph_source', 'g') - kwargs.setdefault('graph_language', 'gremlin-groovy') - kwargs.setdefault('graph_protocol', GraphProtocol.GRAPHSON_1_0) + kwargs.setdefault('graph_language', GraphOptions.DEFAULT_GRAPH_LANGUAGE) for attr, value in six.iteritems(kwargs): if attr not in _graph_option_names: warn("Unknown keyword argument received for GraphOptions: {0}".format(attr)) @@ -222,11 +227,31 @@ def _graph_object_sequence(objects): yield res -def graph_graphson2_row_factory(column_names, rows): - """ - Row Factory that returns the decoded graphson as DSE types. - """ - return [_graphson2_reader.read(row[0])['result'] for row in rows] +class _GraphSONContextRowFactory(object): + graphson_reader_class = None + graphson_reader_kwargs = None + + def __init__(self, cluster): + context = {'cluster': cluster} + kwargs = self.graphson_reader_kwargs or {} + self.graphson_reader = self.graphson_reader_class(context, **kwargs) + + def __call__(self, column_names, rows): + return [self.graphson_reader.read(row[0])['result'] for row in rows] + + +class _GraphSON2RowFactory(_GraphSONContextRowFactory): + """Row factory to deserialize GraphSON2 results.""" + graphson_reader_class = GraphSON2Reader + + +class _GraphSON3RowFactory(_GraphSONContextRowFactory): + """Row factory to deserialize GraphSON3 results.""" + graphson_reader_class = GraphSON3Reader + + +graph_graphson2_row_factory = _GraphSON2RowFactory +graph_graphson3_row_factory = _GraphSON3RowFactory class Result(object): @@ -302,3 +327,6 @@ def as_path(self): return Path(self.labels, self.objects) except (AttributeError, ValueError, TypeError): raise TypeError("Could not create Path from %r" % (self,)) + + def as_vertex_property(self): + return VertexProperty(self.value.get('label'), self.value.get('value'), self.value.get('properties', {})) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index e9c03f583b..1caeec0542 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -666,10 +666,15 @@ class KeyspaceMetadata(object): .. versionadded:: 3.15 """ + graph_engine = None + """ + A string indicating whether a graph engine is enabled for this keyspace (Core/Classic). + """ + _exc_info = None """ set if metadata parsing failed """ - def __init__(self, name, durable_writes, strategy_class, strategy_options): + def __init__(self, name, durable_writes, strategy_class, strategy_options, graph_engine=None): self.name = name self.durable_writes = durable_writes self.replication_strategy = ReplicationStrategy.create(strategy_class, strategy_options) @@ -679,17 +684,28 @@ def __init__(self, name, durable_writes, strategy_class, strategy_options): self.functions = {} self.aggregates = {} self.views = {} + self.graph_engine = graph_engine + + @property + def is_graph_enabled(self): + return self.graph_engine is not None def export_as_string(self): """ Returns a CQL query string that can be used to recreate the entire keyspace, including user-defined types and tables. """ - cql = "\n\n".join([self.as_cql_query() + ';'] + - self.user_type_strings() + - [f.export_as_string() for f in self.functions.values()] + - [a.export_as_string() for a in self.aggregates.values()] + - [t.export_as_string() for t in self.tables.values()]) + # Make sure tables with vertex are exported before tables with edges + tables_with_vertex = [t for t in self.tables.values() if hasattr(t, 'vertex') and t.vertex] + other_tables = [t for t in self.tables.values() if t not in tables_with_vertex] + + cql = "\n\n".join( + [self.as_cql_query() + ';'] + + self.user_type_strings() + + [f.export_as_string() for f in self.functions.values()] + + [a.export_as_string() for a in self.aggregates.values()] + + [t.export_as_string() for t in tables_with_vertex + other_tables]) + if self._exc_info: import traceback ret = "/*\nWarning: Keyspace %s is incomplete because of an error processing metadata.\n" % \ @@ -715,7 +731,10 @@ def as_cql_query(self): ret = "CREATE KEYSPACE %s WITH replication = %s " % ( protect_name(self.name), self.replication_strategy.export_for_schema()) - return ret + (' AND durable_writes = %s' % ("true" if self.durable_writes else "false")) + ret = ret + (' AND durable_writes = %s' % ("true" if self.durable_writes else "false")) + if self.graph_engine is not None: + ret = ret + (" AND graph_engine = '%s'" % self.graph_engine) + return ret def user_type_strings(self): user_type_strings = [] @@ -1340,6 +1359,90 @@ def _make_option_strings(cls, options_map): return list(sorted(ret)) +class TableMetadataV3(TableMetadata): + """ + For C* 3.0+. `option_maps` take a superset of map names, so if nothing + changes structurally, new option maps can just be appended to the list. + """ + compaction_options = {} + + option_maps = [ + 'compaction', 'compression', 'caching', + 'nodesync' # added DSE 6.0 + ] + + @property + def is_cql_compatible(self): + return True + + @classmethod + def _make_option_strings(cls, options_map): + ret = [] + options_copy = dict(options_map.items()) + + for option in cls.option_maps: + value = options_copy.get(option) + if isinstance(value, Mapping): + del options_copy[option] + params = ("'%s': '%s'" % (k, v) for k, v in value.items()) + ret.append("%s = {%s}" % (option, ', '.join(params))) + + for name, value in options_copy.items(): + if value is not None: + if name == "comment": + value = value or "" + ret.append("%s = %s" % (name, protect_value(value))) + + return list(sorted(ret)) + + +# TODO This should inherit V4 later? +class TableMetadataDSE68(TableMetadataV3): + + vertex = None + """A :class:`.VertexMetadata` instance, if graph enabled""" + + edge = None + """A :class:`.EdgeMetadata` instance, if graph enabled""" + + def as_cql_query(self, formatted=False): + ret = super(TableMetadataDSE68, self).as_cql_query(formatted) + + if self.vertex: + ret += " AND VERTEX LABEL %s" % protect_name(self.vertex.label_name) + + if self.edge: + ret += " AND EDGE LABEL %s" % protect_name(self.edge.label_name) + + ret += self._export_edge_as_cql( + self.edge.from_label, + self.edge.from_partition_key_columns, + self.edge.from_clustering_columns, "FROM") + + ret += self._export_edge_as_cql( + self.edge.to_label, + self.edge.to_partition_key_columns, + self.edge.to_clustering_columns, "TO") + + return ret + + @staticmethod + def _export_edge_as_cql(label_name, partition_keys, + clustering_columns, keyword): + ret = " %s %s(" % (keyword, protect_name(label_name)) + + if len(partition_keys) == 1: + ret += protect_name(partition_keys[0]) + else: + ret += "(%s)" % ", ".join([protect_name(k) for k in partition_keys]) + + if clustering_columns: + ret += ", %s" % ", ".join([protect_name(k) for k in clustering_columns]) + ret += ")" + + return ret + + class TableExtensionInterface(object): """ Defines CQL/DDL for Cassandra table extensions. @@ -2301,6 +2404,8 @@ class SchemaParserV3(SchemaParserV22): _function_agg_arument_type_col = 'argument_types' + _table_metadata_class = TableMetadataV3 + recognized_table_options = ( 'bloom_filter_fp_chance', 'caching', @@ -2384,7 +2489,7 @@ def _build_table_metadata(self, row, col_rows=None, trigger_rows=None, index_row trigger_rows = trigger_rows or self.keyspace_table_trigger_rows[keyspace_name][table_name] index_rows = index_rows or self.keyspace_table_index_rows[keyspace_name][table_name] - table_meta = TableMetadataV3(keyspace_name, table_name, virtual=virtual) + table_meta = self._table_metadata_class(keyspace_name, table_name, virtual=virtual) try: table_meta.options = self._build_table_options(row) flags = row.get('flags', set()) @@ -2640,15 +2745,15 @@ def _query_all(self): # ignore them if we got an error self.virtual_keyspaces_result = self._handle_results( virtual_ks_success, virtual_ks_result, - expected_failures=InvalidRequest + expected_failures=(InvalidRequest,) ) self.virtual_tables_result = self._handle_results( virtual_table_success, virtual_table_result, - expected_failures=InvalidRequest + expected_failures=(InvalidRequest,) ) self.virtual_columns_result = self._handle_results( virtual_column_success, virtual_column_result, - expected_failures=InvalidRequest + expected_failures=(InvalidRequest,) ) self._aggregate_results() @@ -2703,41 +2808,174 @@ class SchemaParserDSE67(SchemaParserV4): ("nodesync",)) -class TableMetadataV3(TableMetadata): +class SchemaParserDSE68(SchemaParserDSE67): """ - For C* 3.0+. `option_maps` take a superset of map names, so if nothing - changes structurally, new option maps can just be appended to the list. + For DSE 6.8+ """ - compaction_options = {} - option_maps = [ - 'compaction', 'compression', 'caching', - 'nodesync' # added DSE 6.0 - ] + _SELECT_VERTICES = "SELECT * FROM system_schema.vertices" + _SELECT_EDGES = "SELECT * FROM system_schema.edges" - @property - def is_cql_compatible(self): - return True + _table_metadata_class = TableMetadataDSE68 - @classmethod - def _make_option_strings(cls, options_map): - ret = [] - options_copy = dict(options_map.items()) + def __init__(self, connection, timeout): + super(SchemaParserDSE68, self).__init__(connection, timeout) + self.keyspace_table_vertex_rows = defaultdict(lambda: defaultdict(list)) + self.keyspace_table_edge_rows = defaultdict(lambda: defaultdict(list)) - for option in cls.option_maps: - value = options_copy.get(option) - if isinstance(value, Mapping): - del options_copy[option] - params = ("'%s': '%s'" % (k, v) for k, v in value.items()) - ret.append("%s = {%s}" % (option, ', '.join(params))) + def get_all_keyspaces(self): + for keyspace_meta in super(SchemaParserDSE68, self).get_all_keyspaces(): - for name, value in options_copy.items(): - if value is not None: - if name == "comment": - value = value or "" - ret.append("%s = %s" % (name, protect_value(value))) + def _build_table_graph_metadata(table_meta): + for row in self.keyspace_table_vertex_rows[keyspace_meta.name][table_meta.name]: + vertex_meta = self._build_table_vertex_metadata(row) + table_meta.vertex = vertex_meta - return list(sorted(ret)) + for row in self.keyspace_table_edge_rows[keyspace_meta.name][table_meta.name]: + edge_meta = self._build_table_edge_metadata(keyspace_meta, row) + table_meta.edge = edge_meta + + # Make sure we process vertices before edges + for t in [t for t in six.itervalues(keyspace_meta.tables) + if t.name in self.keyspace_table_vertex_rows[keyspace_meta.name]]: + _build_table_graph_metadata(t) + + # all other tables... + for t in [t for t in six.itervalues(keyspace_meta.tables) + if t.name not in self.keyspace_table_vertex_rows[keyspace_meta.name]]: + _build_table_graph_metadata(t) + + yield keyspace_meta + + def get_table(self, keyspaces, keyspace, table): + table_meta = super(SchemaParserDSE68, self).get_table(keyspaces, keyspace, table) + cl = ConsistencyLevel.ONE + where_clause = bind_params(" WHERE keyspace_name = %%s AND %s = %%s" % (self._table_name_col), (keyspace, table), _encoder) + vertices_query = QueryMessage(query=self._SELECT_VERTICES + where_clause, consistency_level=cl) + edges_query = QueryMessage(query=self._SELECT_EDGES + where_clause, consistency_level=cl) + + (vertices_success, vertices_result), (edges_success, edges_result) \ + = self.connection.wait_for_responses(vertices_query, edges_query, timeout=self.timeout, fail_on_error=False) + vertices_result = self._handle_results(vertices_success, vertices_result) + edges_result = self._handle_results(edges_success, edges_result) + + if vertices_result: + table_meta.vertex = self._build_table_vertex_metadata(vertices_result[0]) + elif edges_result: + table_meta.edge = self._build_table_edge_metadata(keyspaces[keyspace], edges_result[0]) + + return table_meta + + @staticmethod + def _build_keyspace_metadata_internal(row): + name = row["keyspace_name"] + durable_writes = row.get("durable_writes", None) + replication = dict(row.get("replication")) if 'replication' in row else {} + replication_class = replication.pop("class") if 'class' in replication else None + graph_engine = row.get("graph_engine", None) + return KeyspaceMetadata(name, durable_writes, replication_class, replication, graph_engine) + + @staticmethod + def _build_table_vertex_metadata(row): + return VertexMetadata(row.get("keyspace_name"), row.get("table_name"), + row.get("label_name")) + + @staticmethod + def _build_table_edge_metadata(keyspace_meta, row): + from_table = row.get("from_table") + from_table_meta = keyspace_meta.tables.get(from_table) + from_label = from_table_meta.vertex.label_name + to_table = row.get("to_table") + to_table_meta = keyspace_meta.tables.get(to_table) + to_label = to_table_meta.vertex.label_name + + return EdgeMetadata( + row.get("keyspace_name"), row.get("table_name"), + row.get("label_name"), from_table, from_label, + row.get("from_partition_key_columns"), + row.get("from_clustering_columns"), to_table, to_label, + row.get("to_partition_key_columns"), + row.get("to_clustering_columns")) + + def _query_all(self): + cl = ConsistencyLevel.ONE + queries = [ + # copied from v4 + QueryMessage(query=self._SELECT_KEYSPACES, consistency_level=cl), + QueryMessage(query=self._SELECT_TABLES, consistency_level=cl), + QueryMessage(query=self._SELECT_COLUMNS, consistency_level=cl), + QueryMessage(query=self._SELECT_TYPES, consistency_level=cl), + QueryMessage(query=self._SELECT_FUNCTIONS, consistency_level=cl), + QueryMessage(query=self._SELECT_AGGREGATES, consistency_level=cl), + QueryMessage(query=self._SELECT_TRIGGERS, consistency_level=cl), + QueryMessage(query=self._SELECT_INDEXES, consistency_level=cl), + QueryMessage(query=self._SELECT_VIEWS, consistency_level=cl), + QueryMessage(query=self._SELECT_VIRTUAL_KEYSPACES, consistency_level=cl), + QueryMessage(query=self._SELECT_VIRTUAL_TABLES, consistency_level=cl), + QueryMessage(query=self._SELECT_VIRTUAL_COLUMNS, consistency_level=cl), + # dse7.0 only + QueryMessage(query=self._SELECT_VERTICES, consistency_level=cl), + QueryMessage(query=self._SELECT_EDGES, consistency_level=cl) + ] + + responses = self.connection.wait_for_responses( + *queries, timeout=self.timeout, fail_on_error=False) + ( + # copied from V4 + (ks_success, ks_result), + (table_success, table_result), + (col_success, col_result), + (types_success, types_result), + (functions_success, functions_result), + (aggregates_success, aggregates_result), + (triggers_success, triggers_result), + (indexes_success, indexes_result), + (views_success, views_result), + (virtual_ks_success, virtual_ks_result), + (virtual_table_success, virtual_table_result), + (virtual_column_success, virtual_column_result), + # dse6.8 responses + (vertices_success, vertices_result), + (edges_success, edges_result) + ) = responses + + # copied from V4 + self.keyspaces_result = self._handle_results(ks_success, ks_result) + self.tables_result = self._handle_results(table_success, table_result) + self.columns_result = self._handle_results(col_success, col_result) + self.triggers_result = self._handle_results(triggers_success, triggers_result) + self.types_result = self._handle_results(types_success, types_result) + self.functions_result = self._handle_results(functions_success, functions_result) + self.aggregates_result = self._handle_results(aggregates_success, aggregates_result) + self.indexes_result = self._handle_results(indexes_success, indexes_result) + self.views_result = self._handle_results(views_success, views_result) + + self.virtual_keyspaces_result = self._handle_results(virtual_ks_success, + virtual_ks_result) + self.virtual_tables_result = self._handle_results(virtual_table_success, + virtual_table_result) + self.virtual_columns_result = self._handle_results(virtual_column_success, + virtual_column_result) + # dse6.8-only results + self.vertices_result = self._handle_results(vertices_success, vertices_result) + self.edges_result = self._handle_results(edges_success, edges_result) + + self._aggregate_results() + + def _aggregate_results(self): + super(SchemaParserDSE68, self)._aggregate_results() + + m = self.keyspace_table_vertex_rows + for row in self.vertices_result: + ksname = row["keyspace_name"] + cfname = row['table_name'] + m[ksname][cfname].append(row) + + m = self.keyspace_table_edge_rows + for row in self.edges_result: + ksname = row["keyspace_name"] + cfname = row['table_name'] + m[ksname][cfname].append(row) class MaterializedViewMetadata(object): @@ -2746,8 +2984,7 @@ class MaterializedViewMetadata(object): """ keyspace_name = None - - """ A string name of the view.""" + """ A string name of the keyspace of this view.""" name = None """ A string name of the view.""" @@ -2849,11 +3086,89 @@ def export_as_string(self): return self.as_cql_query(formatted=True) + ";" +class VertexMetadata(object): + """ + A representation of a vertex on a table + """ + + keyspace_name = None + """ A string name of the keyspace. """ + + table_name = None + """ A string name of the table this vertex is on. """ + + label_name = None + """ A string name of the label of this vertex.""" + + def __init__(self, keyspace_name, table_name, label_name): + self.keyspace_name = keyspace_name + self.table_name = table_name + self.label_name = label_name + + +class EdgeMetadata(object): + """ + A representation of an edge on a table + """ + + keyspace_name = None + """A string name of the keyspace """ + + table_name = None + """A string name of the table this edge is on""" + + label_name = None + """A string name of the label of this edge""" + + from_table = None + """A string name of the from table of this edge (incoming vertex)""" + + from_label = None + """A string name of the from table label of this edge (incoming vertex)""" + + from_partition_key_columns = None + """The columns that match the partition key of the incoming vertex table.""" + + from_clustering_columns = None + """The columns that match the clustering columns of the incoming vertex table.""" + + to_table = None + """A string name of the to table of this edge (outgoing vertex)""" + + to_label = None + """A string name of the to table label of this edge (outgoing vertex)""" + + to_partition_key_columns = None + """The columns that match the partition key of the outgoing vertex table.""" + + to_clustering_columns = None + """The columns that match the clustering columns of the outgoing vertex table.""" + + def __init__( + self, keyspace_name, table_name, label_name, from_table, + from_label, from_partition_key_columns, from_clustering_columns, + to_table, to_label, to_partition_key_columns, + to_clustering_columns): + self.keyspace_name = keyspace_name + self.table_name = table_name + self.label_name = label_name + self.from_table = from_table + self.from_label = from_label + self.from_partition_key_columns = from_partition_key_columns + self.from_clustering_columns = from_clustering_columns + self.to_table = to_table + self.to_label = to_label + self.to_partition_key_columns = to_partition_key_columns + self.to_clustering_columns = to_clustering_columns + + def get_schema_parser(connection, server_version, dse_version, timeout): version = Version(server_version) if dse_version: v = Version(dse_version) - if v >= Version('6.7.0'): + if v >= Version('6.8.0'): + return SchemaParserDSE68(connection, timeout) + elif v >= Version('6.7.0'): return SchemaParserDSE67(connection, timeout) elif v >= Version('6.0.0'): return SchemaParserDSE60(connection, timeout) diff --git a/test-requirements.txt b/test-requirements.txt index 2760e27b47..0cc44c3c97 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -18,4 +18,5 @@ requests backports.ssl_match_hostname; python_version < '2.7.9' futurist; python_version >= '3.7' asynctest; python_version > '3.4' -gremlinpython>=3.3.4,<3.3.9 +gremlinpython>=3.4.0,<3.5.0 +ipaddress; python_version < '3.3.0' diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 5870191ea8..c8e3163094 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -22,6 +22,7 @@ import unittest2 as unittest except ImportError: import unittest # noqa + from packaging.version import Version import logging import socket @@ -130,6 +131,11 @@ def _get_cass_version_from_dse(dse_version): cass_ver = "4.0.0.67" else: cass_ver = '4.0.0.' + ''.join(dse_version.split('.')) + elif dse_version.startswith('6.8'): + if dse_version == '6.8.0': + cass_ver = "4.0.0.68" + else: + cass_ver = '4.0.0.' + ''.join(dse_version.split('.')) else: log.error("Unknown dse version found {0}, defaulting to 2.1".format(dse_version)) cass_ver = "2.1" @@ -338,6 +344,7 @@ def _id_and_mark(f): lessthanorequalcass40 = unittest.skipUnless(CASSANDRA_VERSION <= Version('4.0'), 'Cassandra version less or equal to 4.0 required') lessthancass40 = unittest.skipUnless(CASSANDRA_VERSION < Version('4.0'), 'Cassandra version less than 4.0 required') lessthancass30 = unittest.skipUnless(CASSANDRA_VERSION < Version('3.0'), 'Cassandra version less then 3.0 required') +greaterthanorequaldse68 = unittest.skipUnless(DSE_VERSION and DSE_VERSION >= Version('6.8'), "DSE 6.8 or greater required for this test") greaterthanorequaldse67 = unittest.skipUnless(DSE_VERSION and DSE_VERSION >= Version('6.7'), "DSE 6.7 or greater required for this test") greaterthanorequaldse60 = unittest.skipUnless(DSE_VERSION and DSE_VERSION >= Version('6.0'), "DSE 6.0 or greater required for this test") greaterthanorequaldse51 = unittest.skipUnless(DSE_VERSION and DSE_VERSION >= Version('5.1'), "DSE 5.1 or greater required for this test") @@ -398,12 +405,23 @@ def use_single_node(start=True, workloads=[], configuration_options={}, dse_opti configuration_options=configuration_options, dse_options=dse_options) +def check_log_error(): + global CCM_CLUSTER + log.debug("Checking log error of cluster {0}".format(CCM_CLUSTER.name)) + for node in CCM_CLUSTER.nodelist(): + errors = node.grep_log_for_errors() + for error in errors: + for line in error: + print(line) + + def remove_cluster(): if USE_CASS_EXTERNAL or KEEP_TEST_CLUSTER: return global CCM_CLUSTER if CCM_CLUSTER: + check_log_error() log.debug("Removing cluster {0}".format(CCM_CLUSTER.name)) tries = 0 while tries < 100: @@ -506,6 +524,23 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, if dse_version >= Version('5.0'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) + if dse_version >= Version('5.1'): + # For Inet4Address + CCM_CLUSTER.set_dse_configuration_options({ + 'graph': { + 'gremlin_server': { + 'scriptEngines': { + 'gremlin-groovy': { + 'config': { + 'sandbox_rules': { + 'whitelist_packages': ['java.net'] + } + } + } + } + } + } + }) if 'spark' in workloads: config_options = {"initial_spark_worker_resources": 0.1} if dse_version >= Version('6.7'): diff --git a/tests/integration/advanced/__init__.py b/tests/integration/advanced/__init__.py index a943beac41..c5da6c0154 100644 --- a/tests/integration/advanced/__init__.py +++ b/tests/integration/advanced/__init__.py @@ -18,147 +18,28 @@ import unittest # noqa from six.moves.urllib.request import build_opener, Request, HTTPHandler -import sys import re import os import time from os.path import expanduser -from uuid import UUID -from decimal import Decimal + from ccmlib import common -import datetime -import six -from packaging.version import Version - -from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT, EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT - -from tests.integration import PROTOCOL_VERSION, DSE_VERSION, get_server_versions, BasicKeyspaceUnitTestCase, \ - drop_keyspace_shutdown_cluster, get_node, USE_CASS_EXTERNAL, CASSANDRA_IP -from tests.integration import use_singledc, use_single_node, wait_for_node_socket -from cassandra.protocol import ServerError -from cassandra.util import Point, LineString, Polygon -from cassandra.graph import Edge, Vertex, Path -from cassandra.graph import GraphSON1Deserializer -from cassandra.graph.graphson import InetTypeIO -from cassandra.datastax.graph.query import _graphson2_reader -from cassandra.cluster import (GraphAnalyticsExecutionProfile, GraphExecutionProfile, - EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT, default_lbp_factory) -from cassandra.policies import DSELoadBalancingPolicy -from cassandra.graph.query import GraphOptions, GraphProtocol, graph_graphson2_row_factory -home = expanduser('~') +from cassandra.cluster import Cluster + +from tests.integration import PROTOCOL_VERSION, get_server_versions, BasicKeyspaceUnitTestCase, \ + drop_keyspace_shutdown_cluster, get_node, USE_CASS_EXTERNAL, set_default_cass_ip +from tests.integration import use_singledc, use_single_node, wait_for_node_socket, CASSANDRA_IP +home = expanduser('~') # Home directory of the Embedded Apache Directory Server to use ADS_HOME = os.getenv('ADS_HOME', home) -MAKE_STRICT = "schema.config().option('graph.schema_mode').set('production')" -MAKE_NON_STRICT = "schema.config().option('graph.schema_mode').set('development')" -ALLOW_SCANS = "schema.config().option('graph.allow_scan').set('true')" - -# A map of common types and their corresponding groovy declaration for use in schema creation and insertion -MAX_LONG = 9223372036854775807 -MIN_LONG = -9223372036854775808 -ZERO_LONG = 0 - -if sys.version_info < (3, 0): - MAX_LONG = long(MAX_LONG) - MIN_LONG = long(MIN_LONG) - ZERO_LONG = long(ZERO_LONG) - -deserializers = GraphSON1Deserializer()._deserializers - -TYPE_MAP = {"point1": ["Point()", Point(.5, .13), GraphSON1Deserializer.deserialize_point], - "point2": ["Point()", Point(-5, .0), GraphSON1Deserializer.deserialize_point], - - "linestring1": ["Linestring()", LineString(((1.0, 2.0), (3.0, 4.0), (-89.0, 90.0))), - GraphSON1Deserializer.deserialize_linestring], - "polygon1": ["Polygon()", Polygon([(10.0, 10.0), (80.0, 10.0), (80., 88.0), (10., 89.0), (10., 10.0)], - [[(20., 20.0), (20., 30.0), (30., 30.0), (30., 20.0), (20., 20.0)], - [(40., 20.0), (40., 30.0), (50., 30.0), (50., 20.0), (40., 20.0)]]), - GraphSON1Deserializer.deserialize_polygon], - "smallint1": ["Smallint()", 1, GraphSON1Deserializer.deserialize_smallint], - "varint1": ["Varint()", 2147483647, GraphSON1Deserializer.deserialize_varint], - - "bigint1": ["Bigint()", MAX_LONG, GraphSON1Deserializer.deserialize_bigint], - "bigint2": ["Bigint()", MIN_LONG, GraphSON1Deserializer.deserialize_bigint], - "bigint3": ["Bigint()", ZERO_LONG, GraphSON1Deserializer.deserialize_bigint], - - "int1": ["Int()", 100, GraphSON1Deserializer.deserialize_int], - "float1": ["Float()", .5, GraphSON1Deserializer.deserialize_float], - "double1": ["Double()", .3415681, GraphSON1Deserializer.deserialize_double], - "uuid1": ["Uuid()", UUID('12345678123456781234567812345678'), GraphSON1Deserializer.deserialize_uuid], - "decimal1": ["Decimal()", Decimal(10), GraphSON1Deserializer.deserialize_decimal], - "blob1": ["Blob()", bytearray(b"Hello World"), GraphSON1Deserializer.deserialize_blob], - - "timestamp1": ["Timestamp()", datetime.datetime.now().replace(microsecond=0), - GraphSON1Deserializer.deserialize_timestamp], - "timestamp2": ["Timestamp()", datetime.datetime.max.replace(microsecond=0), - GraphSON1Deserializer.deserialize_timestamp], - # These are valid values but are pending for DSP-14093 to be fixed - #"timestamp3": ["Timestamp()", datetime.datetime(159, 1, 1, 23, 59, 59), - # GraphSON1TypeDeserializer.deserialize_timestamp], - #"timestamp4": ["Timestamp()", datetime.datetime.min, - # GraphSON1TypeDeserializer.deserialize_timestamp], - - "duration1": ["Duration()", datetime.timedelta(1, 16, 0), - GraphSON1Deserializer.deserialize_duration], - "duration2": ["Duration()", datetime.timedelta(days=1, seconds=16, milliseconds=15), - GraphSON1Deserializer.deserialize_duration], - } - - -if six.PY2: - TYPE_MAP["blob3"] = ["Blob()", buffer(b"Hello World"), GraphSON1Deserializer.deserialize_blob] - - TYPE_MAP["inet1"] = ["Inet()", "127.0.0.1", GraphSON1Deserializer.deserialize_inet] - TYPE_MAP["inet2"] = ["Inet()", "2001:db8:85a3:8d3:1319:8a2e:370:7348", GraphSON1Deserializer.deserialize_inet] - -else: - TYPE_MAP["blob4"] = ["Blob()", bytes(b"Hello World Again"), GraphSON1Deserializer.deserialize_blob] - TYPE_MAP["blob5"] = ["Blob()", memoryview(b"And Again Hello World"), GraphSON1Deserializer.deserialize_blob] - - import ipaddress - deserializer_plus_to_ipaddressv4 = lambda x: ipaddress.IPv4Address(GraphSON1Deserializer.deserialize_inet(x)) - deserializer_plus_to_ipaddressv6 = lambda x: ipaddress.IPv6Address(GraphSON1Deserializer.deserialize_inet(x)) - - def generic_ip_deserializer(string_ip_adress): - if ":" in string_ip_adress: - return deserializer_plus_to_ipaddressv6(string_ip_adress) - return deserializer_plus_to_ipaddressv4(string_ip_adress) - - class GenericIpAddressIO(InetTypeIO): - @classmethod - def deserialize(cls, value, reader=None): - return generic_ip_deserializer(value) - - _graphson2_reader.deserializers[GenericIpAddressIO.graphson_type] = GenericIpAddressIO - - TYPE_MAP["inet1"] = ["Inet()", ipaddress.IPv4Address("127.0.0.1"), deserializer_plus_to_ipaddressv4] - TYPE_MAP["inet2"] = ["Inet()", ipaddress.IPv6Address("2001:db8:85a3:8d3:1319:8a2e:370:7348"), - deserializer_plus_to_ipaddressv6] - -if DSE_VERSION and DSE_VERSION >= Version("5.1"): - TYPE_MAP["datetime1"]= ["Date()", datetime.date.today(), GraphSON1Deserializer.deserialize_date] - TYPE_MAP["time1"] = ["Time()", datetime.time(12, 6, 12, 444), GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time2"] = ["Time()", datetime.time(12, 6, 12), GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time3"] = ["Time()", datetime.time(12, 6), GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time4"] = ["Time()", datetime.time.min, GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time5"] = ["Time()", datetime.time.max, GraphSON1Deserializer.deserialize_time] - TYPE_MAP["blob2"] = ["Blob()", bytearray(b"AKDLIElksadlaswqA" * 100000), GraphSON1Deserializer.deserialize_blob] - TYPE_MAP["datetime1"]= ["Date()", datetime.date.today(), GraphSON1Deserializer.deserialize_date] - TYPE_MAP["datetime2"]= ["Date()", datetime.date(159, 1, 3), GraphSON1Deserializer.deserialize_date] - TYPE_MAP["datetime3"]= ["Date()", datetime.date.min, GraphSON1Deserializer.deserialize_date] - TYPE_MAP["datetime4"]= ["Date()", datetime.date.max, GraphSON1Deserializer.deserialize_date] - TYPE_MAP["time1"] = ["Time()", datetime.time(12, 6, 12, 444), GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time2"] = ["Time()", datetime.time(12, 6, 12), GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time3"] = ["Time()", datetime.time(12, 6), GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time4"] = ["Time()", datetime.time.min, GraphSON1Deserializer.deserialize_time] - TYPE_MAP["time5"] = ["Time()", datetime.time.max, GraphSON1Deserializer.deserialize_time] - TYPE_MAP["blob2"] = ["Blob()", bytearray(b"AKDLIElksadlaswqA" * 100000), GraphSON1Deserializer.deserialize_blob] + def find_spark_master(session): - # Itterate over the nodes the one with port 7080 open is the spark master + # Iterate over the nodes the one with port 7080 open is the spark master for host in session.hosts: ip = host.address port = 7077 @@ -216,7 +97,7 @@ def use_cluster_with_graph(num_nodes): when started all at once. """ if USE_CASS_EXTERNAL: - set_default_dse_ip() + set_default_cass_ip() return # Create the cluster but don't start it. @@ -248,178 +129,12 @@ def use_cluster_with_graph(num_nodes): wait_for_spark_workers(3, 120) -def reset_graph(session, graph_name): - session.execute_graph('system.graph(name).ifNotExists().create()', {'name': graph_name}, - execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) - wait_for_graph_inserted(session, graph_name) - - -def wait_for_graph_inserted(session, graph_name): - count = 0 - exists = session.execute_graph('system.graph(name).exists()', {'name': graph_name}, - execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0].value - while not exists and count < 50: - time.sleep(1) - exists = session.execute_graph('system.graph(name).exists()', {'name': graph_name}, - execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0].value - return exists - - -class BasicGraphUnitTestCase(BasicKeyspaceUnitTestCase): - """ - This is basic graph unit test case that provides various utility methods that can be leveraged for testcase setup and tear - down - """ - @property - def graph_name(self): - return self._testMethodName.lower() - - def session_setup(self): - lbp = DSELoadBalancingPolicy(default_lbp_factory()) - - ep_graphson2 = GraphExecutionProfile( - request_timeout=60, - load_balancing_policy=lbp, - graph_options=GraphOptions( - graph_name=self.graph_name, - graph_protocol=GraphProtocol.GRAPHSON_2_0 - ), - row_factory=graph_graphson2_row_factory) - - ep_graphson1 = GraphExecutionProfile( - request_timeout=60, - load_balancing_policy=lbp, - graph_options=GraphOptions( - graph_name=self.graph_name - ) - ) - - ep_analytics = GraphAnalyticsExecutionProfile( - request_timeout=60, - load_balancing_policy=lbp, - graph_options = GraphOptions( - graph_language=b'gremlin-groovy', - graph_name=self.graph_name - ) - ) - - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={ - EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, - EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, - "graphson2": ep_graphson2 - }) - self.session = self.cluster.connect() - self.ks_name = self._testMethodName.lower() - self.cass_version, self.cql_version = get_server_versions() - - def setUp(self): - if DSE_VERSION: - self.session_setup() - self.reset_graph() - - self.clear_schema() - - def tearDown(self): - if DSE_VERSION: - self.cluster.shutdown() - - def clear_schema(self): - self.session.execute_graph('schema.clear()') - - def reset_graph(self): - reset_graph(self.session, self.graph_name) - - - def wait_for_graph_inserted(self): - wait_for_graph_inserted(self.session, self.graph_name) - - -class BasicSharedGraphUnitTestCase(BasicKeyspaceUnitTestCase): - """ - This is basic graph unit test case that provides various utility methods that can be leveraged for testcase setup and tear - down - """ - - @classmethod - def session_setup(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) - cls.session = cls.cluster.connect() - cls.ks_name = cls.__name__.lower() - cls.cass_version, cls.cql_version = get_server_versions() - cls.graph_name = cls.__name__.lower() - - @classmethod - def setUpClass(cls): - if DSE_VERSION: - cls.session_setup() - cls.reset_graph() - profiles = cls.cluster.profile_manager.profiles - profiles[EXEC_PROFILE_GRAPH_DEFAULT].request_timeout = 60 - profiles[EXEC_PROFILE_GRAPH_DEFAULT].graph_options.graph_name = cls.graph_name - profiles[EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT].request_timeout = 60 - profiles[EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT].graph_options.graph_name = cls.graph_name - cls.clear_schema() - - @classmethod - def tearDownClass(cls): - if DSE_VERSION: - cls.cluster.shutdown() - - @classmethod - def clear_schema(self): - self.session.execute_graph('schema.clear()') - - @classmethod - def reset_graph(self): - reset_graph(self.session, self.graph_name) - - def wait_for_graph_inserted(self): - wait_for_graph_inserted(self.session, self.graph_name) - - -def fetchCustomGeoType(type): - if type.lower().startswith("point"): - return getPointType() - elif type.lower().startswith("line"): - return getLineType() - elif type.lower().startswith("poly"): - return getPolygonType() - else: - return None - - -geo_condition = DSE_VERSION and DSE_VERSION < Version('5.1') -def getPointType(): - if geo_condition: - return "Point()" - - return "Point().withGeoBounds()" - -def getPointTypeWithBounds(lowerX, lowerY, upperX, upperY): - if geo_condition: - return "Point()" - - return "Point().withBounds({0}, {1}, {2}, {3})".format(lowerX, lowerY, upperX, upperY) - -def getLineType(): - if geo_condition: - return "Linestring()" - - return "Linestring().withGeoBounds()" - -def getPolygonType(): - if geo_condition: - return "Polygon()" - - return "Polygon().withGeoBounds()" - - class BasicGeometricUnitTestCase(BasicKeyspaceUnitTestCase): """ This base test class is used by all the geomteric tests. It contains class level teardown and setup methods. It also contains the test fixtures used by those tests """ + @classmethod def common_dse_setup(cls, rf, keyspace_creation=True): cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) @@ -432,277 +147,22 @@ def common_dse_setup(cls, rf, keyspace_creation=True): @classmethod def setUpClass(cls): - if DSE_VERSION: - cls.common_dse_setup(1) - cls.initalizeTables() + cls.common_dse_setup(1) + cls.initalizeTables() @classmethod def tearDownClass(cls): - if DSE_VERSION: - drop_keyspace_shutdown_cluster(cls.ks_name, cls.session, cls.cluster) + drop_keyspace_shutdown_cluster(cls.ks_name, cls.session, cls.cluster) @classmethod def initalizeTables(cls): udt_type = "CREATE TYPE udt1 (g {0})".format(cls.cql_type_name) - large_table = "CREATE TABLE tbl (k uuid PRIMARY KEY, g {0}, l list<{0}>, s set<{0}>, m0 map<{0},int>, m1 map, t tuple<{0},{0},{0}>, u frozen)".format(cls.cql_type_name) - simple_table = "CREATE TABLE tblpk (k {0} primary key, v int)".format( cls.cql_type_name) - cluster_table = "CREATE TABLE tblclustering (k0 int, k1 {0}, v int, primary key (k0, k1))".format(cls.cql_type_name) + large_table = "CREATE TABLE tbl (k uuid PRIMARY KEY, g {0}, l list<{0}>, s set<{0}>, m0 map<{0},int>, m1 map, t tuple<{0},{0},{0}>, u frozen)".format( + cls.cql_type_name) + simple_table = "CREATE TABLE tblpk (k {0} primary key, v int)".format(cls.cql_type_name) + cluster_table = "CREATE TABLE tblclustering (k0 int, k1 {0}, v int, primary key (k0, k1))".format( + cls.cql_type_name) cls.session.execute(udt_type) cls.session.execute(large_table) cls.session.execute(simple_table) cls.session.execute(cluster_table) - - -def generate_line_graph(length): - query_parts = [] - query_parts.append(ALLOW_SCANS+';') - query_parts.append("schema.propertyKey('index').Int().ifNotExists().create();") - query_parts.append("schema.propertyKey('distance').Int().ifNotExists().create();") - query_parts.append("schema.vertexLabel('lp').properties('index').ifNotExists().create();") - query_parts.append("schema.edgeLabel('goesTo').properties('distance').connection('lp', 'lp').ifNotExists().create();") - for index in range(0, length): - query_parts.append('''Vertex vertex{0} = graph.addVertex(label, 'lp', 'index', {0}); '''.format(index)) - if index is not 0: - query_parts.append('''vertex{0}.addEdge('goesTo', vertex{1}, 'distance', 5); '''.format(index-1, index)) - final_graph_generation_statement = "".join(query_parts) - return final_graph_generation_statement - - -def generate_classic(session): - to_run = [MAKE_STRICT, ALLOW_SCANS, '''schema.propertyKey('name').Text().ifNotExists().create(); - schema.propertyKey('age').Int().ifNotExists().create(); - schema.propertyKey('lang').Text().ifNotExists().create(); - schema.propertyKey('weight').Float().ifNotExists().create(); - schema.vertexLabel('person').properties('name', 'age').ifNotExists().create(); - schema.vertexLabel('software').properties('name', 'lang').ifNotExists().create(); - schema.edgeLabel('created').properties('weight').connection('person', 'software').ifNotExists().create(); - schema.edgeLabel('created').connection('software', 'software').add(); - schema.edgeLabel('knows').properties('weight').connection('person', 'person').ifNotExists().create();''', - '''Vertex marko = graph.addVertex(label, 'person', 'name', 'marko', 'age', 29); - Vertex vadas = graph.addVertex(label, 'person', 'name', 'vadas', 'age', 27); - Vertex lop = graph.addVertex(label, 'software', 'name', 'lop', 'lang', 'java'); - Vertex josh = graph.addVertex(label, 'person', 'name', 'josh', 'age', 32); - Vertex ripple = graph.addVertex(label, 'software', 'name', 'ripple', 'lang', 'java'); - Vertex peter = graph.addVertex(label, 'person', 'name', 'peter', 'age', 35); - marko.addEdge('knows', vadas, 'weight', 0.5f); - marko.addEdge('knows', josh, 'weight', 1.0f); - marko.addEdge('created', lop, 'weight', 0.4f); - josh.addEdge('created', ripple, 'weight', 1.0f); - josh.addEdge('created', lop, 'weight', 0.4f); - peter.addEdge('created', lop, 'weight', 0.2f);'''] - - for run in to_run: - succeed = False - count = 0 - # Retry up to 10 times this is an issue for - # Graph Mult-NodeClusters - while count < 10 and not succeed: - try: - session.execute_graph(run) - succeed = True - except (ServerError): - print("error creating classic graph retrying") - time.sleep(.5) - count += 1 - - -def generate_multi_field_graph(session): - to_run = [ALLOW_SCANS, - '''schema.propertyKey('shortvalue').Smallint().ifNotExists().create(); - schema.vertexLabel('shortvertex').properties('shortvalue').ifNotExists().create(); - short s1 = 5000; graph.addVertex(label, "shortvertex", "shortvalue", s1);''', - '''schema.propertyKey('intvalue').Int().ifNotExists().create(); - schema.vertexLabel('intvertex').properties('intvalue').ifNotExists().create(); - int i1 = 1000000000; graph.addVertex(label, "intvertex", "intvalue", i1);''', - '''schema.propertyKey('intvalue2').Int().ifNotExists().create(); - schema.vertexLabel('intvertex2').properties('intvalue2').ifNotExists().create(); - Integer i2 = 100000000; graph.addVertex(label, "intvertex2", "intvalue2", i2);''', - '''schema.propertyKey('longvalue').Bigint().ifNotExists().create(); - schema.vertexLabel('longvertex').properties('longvalue').ifNotExists().create(); - long l1 = 9223372036854775807; graph.addVertex(label, "longvertex", "longvalue", l1);''', - '''schema.propertyKey('longvalue2').Bigint().ifNotExists().create(); - schema.vertexLabel('longvertex2').properties('longvalue2').ifNotExists().create(); - Long l2 = 100000000000000000L; graph.addVertex(label, "longvertex2", "longvalue2", l2);''', - '''schema.propertyKey('floatvalue').Float().ifNotExists().create(); - schema.vertexLabel('floatvertex').properties('floatvalue').ifNotExists().create(); - float f1 = 3.5f; graph.addVertex(label, "floatvertex", "floatvalue", f1);''', - '''schema.propertyKey('doublevalue').Double().ifNotExists().create(); - schema.vertexLabel('doublevertex').properties('doublevalue').ifNotExists().create(); - double d1 = 3.5e40; graph.addVertex(label, "doublevertex", "doublevalue", d1);''', - '''schema.propertyKey('doublevalue2').Double().ifNotExists().create(); - schema.vertexLabel('doublevertex2').properties('doublevalue2').ifNotExists().create(); - Double d2 = 3.5e40d; graph.addVertex(label, "doublevertex2", "doublevalue2", d2);'''] - - - for run in to_run: - session.execute_graph(run) - - if DSE_VERSION >= Version('5.1'): - to_run_51=['''schema.propertyKey('datevalue1').Date().ifNotExists().create(); - schema.vertexLabel('datevertex1').properties('datevalue1').ifNotExists().create();''', - '''schema.propertyKey('negdatevalue2').Date().ifNotExists().create(); - schema.vertexLabel('negdatevertex2').properties('negdatevalue2').ifNotExists().create();'''] - for i in range(1,4): - to_run_51.append('''schema.propertyKey('timevalue{0}').Time().ifNotExists().create(); - schema.vertexLabel('timevertex{0}').properties('timevalue{0}').ifNotExists().create();'''.format(i)) - - for run in to_run_51: - session.execute_graph(run) - - session.execute_graph('''graph.addVertex(label, "datevertex1", "datevalue1", date1);''', - {'date1': '1999-07-29' }) - session.execute_graph('''graph.addVertex(label, "negdatevertex2", "negdatevalue2", date2);''', - {'date2': '-1999-07-28' }) - - session.execute_graph('''graph.addVertex(label, "timevertex1", "timevalue1", time1);''', - {'time1': '14:02'}) - session.execute_graph('''graph.addVertex(label, "timevertex2", "timevalue2", time2);''', - {'time2': '14:02:20'}) - session.execute_graph('''graph.addVertex(label, "timevertex3", "timevalue3", time3);''', - {'time3': '14:02:20.222'}) - - -def generate_type_graph_schema(session, prime_schema=True): - """ - This method will prime the schema for all types in the TYPE_MAP - """ - session.execute_graph(ALLOW_SCANS) - if(prime_schema): - create_vertex= "schema.vertexLabel('{0}').ifNotExists().create();".\ - format(generate_type_graph_schema.single_vertex) - session.execute_graph(create_vertex) - for key in TYPE_MAP.keys(): - prop_type = fetchCustomGeoType(key) - if prop_type is None: - prop_type=TYPE_MAP[key][0] - vertex_label = key - prop_name = key+"value" - insert_string = "" - insert_string += "schema.propertyKey('{0}').{1}.ifNotExists().create();".format(prop_name, prop_type) - insert_string += "schema.vertexLabel('{}').properties('{}').add();".\ - format(generate_type_graph_schema.single_vertex, prop_name) - session.execute_graph(insert_string) - else: - session.execute_graph(MAKE_NON_STRICT) -generate_type_graph_schema.single_vertex = "single_vertex_label" - -def generate_address_book_graph(session, size): - to_run = [ALLOW_SCANS, - "schema.propertyKey('name').Text().create()\n" + - "schema.propertyKey('pointPropWithBoundsWithSearchIndex')." + getPointTypeWithBounds(-100, -100, 100, 100) + ".create()\n" + - "schema.propertyKey('pointPropWithBounds')." + getPointTypeWithBounds(-100, -100, 100, 100) + ".create()\n" + - "schema.propertyKey('pointPropWithGeoBoundsWithSearchIndex')." + getPointType() + ".create()\n" + - "schema.propertyKey('pointPropWithGeoBounds')." + getPointType() + ".create()\n" + - "schema.propertyKey('city').Text().create()\n" + - "schema.propertyKey('state').Text().create()\n" + - "schema.propertyKey('description').Text().create()\n" + - "schema.vertexLabel('person').properties('name', 'city', 'state', 'description', 'pointPropWithBoundsWithSearchIndex', 'pointPropWithBounds', 'pointPropWithGeoBoundsWithSearchIndex', 'pointPropWithGeoBounds').create()", - "schema.vertexLabel('person').index('searchPointWithBounds').secondary().by('pointPropWithBounds').add()", - "schema.vertexLabel('person').index('searchPointWithGeoBounds').secondary().by('pointPropWithGeoBounds').add()", - - "g.addV('person').property('name', 'Paul Thomas Joe').property('city', 'Rochester').property('state', 'MN').property('pointPropWithBoundsWithSearchIndex', Geo.point(-92.46295, 44.0234)).property('pointPropWithBounds', Geo.point(-92.46295, 44.0234)).property('pointPropWithGeoBoundsWithSearchIndex', Geo.point(-92.46295, 44.0234)).property('pointPropWithGeoBounds', Geo.point(-92.46295, 44.0234)).property('description', 'Lives by the hospital')", - "g.addV('person').property('name', 'George Bill Steve').property('city', 'Minneapolis').property('state', 'MN').property('pointPropWithBoundsWithSearchIndex', Geo.point(-93.266667, 44.093333)).property('pointPropWithBounds', Geo.point(-93.266667, 44.093333)).property('pointPropWithGeoBoundsWithSearchIndex', Geo.point(-93.266667, 44.093333)).property('pointPropWithGeoBounds', Geo.point(-93.266667, 44.093333)).property('description', 'A cold dude')", - "g.addV('person').property('name', 'James Paul Smith').property('city', 'Chicago').property('state', 'IL').property('pointPropWithBoundsWithSearchIndex', Geo.point(-87.684722, 41.836944)).property('description', 'Likes to hang out')", - "g.addV('person').property('name', 'Jill Alice').property('city', 'Atlanta').property('state', 'GA').property('pointPropWithBoundsWithSearchIndex', Geo.point(-84.39, 33.755)).property('description', 'Enjoys a nice cold coca cola')", - ] - - if not Version('5.0') <= DSE_VERSION < Version('5.1'): - to_run.append("schema.vertexLabel('person').index('search').search().by('pointPropWithBoundsWithSearchIndex').withError(0.00001, 0.0).by('pointPropWithGeoBoundsWithSearchIndex').withError(0.00001, 0.0).add()") - - for run in to_run: - session.execute_graph(run) - - -def generate_large_complex_graph(session, size): - prof = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, request_timeout=32) - to_run = ''' - schema.config().option('graph.schema_mode').set('development'); - schema.config().option('graph.allow_scan').set('true'); - ''' - session.execute_graph(to_run, execution_profile=prof) - to_run = ''' - int size = 2000; - List ids = new ArrayList(); - schema.propertyKey('ts').Int().single().ifNotExists().create(); - schema.propertyKey('sin').Int().single().ifNotExists().create(); - schema.propertyKey('cos').Int().single().ifNotExists().create(); - schema.propertyKey('ii').Int().single().ifNotExists().create(); - schema.vertexLabel('lcg').properties('ts', 'sin', 'cos', 'ii').ifNotExists().create(); - schema.edgeLabel('linked').connection('lcg', 'lcg').ifNotExists().create(); - Vertex v = graph.addVertex(label, 'lcg'); - v.property("ts", 100001); - v.property("sin", 0); - v.property("cos", 1); - v.property("ii", 0); - ids.add(v.id()); - Random rand = new Random(); - for (int ii = 1; ii < size; ii++) { - v = graph.addVertex(label, 'lcg'); - v.property("ii", ii); - v.property("ts", 100001 + ii); - v.property("sin", Math.sin(ii/5.0)); - v.property("cos", Math.cos(ii/5.0)); - Vertex u = g.V(ids.get(rand.nextInt(ids.size()))).next(); - v.addEdge("linked", u); - ids.add(u.id()); - ids.add(v.id()); - } - g.V().count();''' - - session.execute_graph(to_run, execution_profile=prof) - - -def validate_classic_vertex(test, vertex): - vertex_props = vertex.properties.keys() - test.assertEqual(len(vertex_props), 2) - test.assertIn('name', vertex_props) - test.assertTrue('lang' in vertex_props or 'age' in vertex_props) - - -def validate_classic_vertex_return_type(test, vertex): - validate_generic_vertex_result_type(vertex) - vertex_props = vertex.properties - test.assertIn('name', vertex_props) - test.assertTrue('lang' in vertex_props or 'age' in vertex_props) - - -def validate_generic_vertex_result_type(test, vertex): - test.assertIsInstance(vertex, Vertex) - for attr in ('id', 'type', 'label', 'properties'): - test.assertIsNotNone(getattr(vertex, attr)) - - -def validate_classic_edge_properties(test, edge_properties): - test.assertEqual(len(edge_properties.keys()), 1) - test.assertIn('weight', edge_properties) - test.assertIsInstance(edge_properties, dict) - - -def validate_classic_edge(test, edge): - validate_generic_edge_result_type(test, edge) - validate_classic_edge_properties(test, edge.properties) - - -def validate_line_edge(test, edge): - validate_generic_edge_result_type(test, edge) - edge_props = edge.properties - test.assertEqual(len(edge_props.keys()), 1) - test.assertIn('distance', edge_props) - - -def validate_generic_edge_result_type(test, edge): - test.assertIsInstance(edge, Edge) - for attr in ('properties', 'outV', 'outVLabel', 'inV', 'inVLabel', 'label', 'type', 'id'): - test.assertIsNotNone(getattr(edge, attr)) - - -def validate_path_result_type(test, path): - test.assertIsInstance(path, Path) - test.assertIsNotNone(path.labels) - for obj in path.objects: - if isinstance(obj, Edge): - validate_classic_edge(test, obj) - elif isinstance(obj, Vertex): - validate_classic_vertex(test, obj) - else: - test.fail("Invalid object found in path " + str(object.type)) diff --git a/tests/integration/advanced/graph/__init__.py b/tests/integration/advanced/graph/__init__.py index 2c9ca172f8..6002d57f78 100644 --- a/tests/integration/advanced/graph/__init__.py +++ b/tests/integration/advanced/graph/__init__.py @@ -11,3 +11,1195 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import sys +import logging +import inspect +from packaging.version import Version +import ipaddress +from uuid import UUID +from decimal import Decimal +import datetime + +from cassandra.util import Point, LineString, Polygon, Duration +import six + +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT +from cassandra.cluster import GraphAnalyticsExecutionProfile, GraphExecutionProfile, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT, \ + default_lbp_factory +from cassandra.policies import DSELoadBalancingPolicy + +from cassandra.graph import GraphSON1Deserializer +from cassandra.graph.graphson import InetTypeIO, GraphSON2Deserializer, GraphSON3Deserializer +from cassandra.graph import Edge, Vertex, Path +from cassandra.graph.query import GraphOptions, GraphProtocol, graph_graphson2_row_factory, \ + graph_graphson3_row_factory + +from tests.integration import DSE_VERSION +from tests.integration.advanced import * + + +def setup_module(): + if DSE_VERSION: + dse_options = {'graph': {'realtime_evaluation_timeout_in_seconds': 60}} + use_single_node_with_graph(dse_options=dse_options) + + +log = logging.getLogger(__name__) + +MAX_LONG = 9223372036854775807 +MIN_LONG = -9223372036854775808 +ZERO_LONG = 0 + +if sys.version_info < (3, 0): + MAX_LONG = long(MAX_LONG) + MIN_LONG = long(MIN_LONG) + ZERO_LONG = long(ZERO_LONG) + +MAKE_STRICT = "schema.config().option('graph.schema_mode').set('production')" +MAKE_NON_STRICT = "schema.config().option('graph.schema_mode').set('development')" +ALLOW_SCANS = "schema.config().option('graph.allow_scan').set('true')" + +deserializer_plus_to_ipaddressv4 = lambda x: ipaddress.IPv4Address(GraphSON1Deserializer.deserialize_inet(x)) +deserializer_plus_to_ipaddressv6 = lambda x: ipaddress.IPv6Address(GraphSON1Deserializer.deserialize_inet(x)) + + +def generic_ip_deserializer(string_ip_address): + if ":" in string_ip_address: + return deserializer_plus_to_ipaddressv6(string_ip_address) + return deserializer_plus_to_ipaddressv4(string_ip_address) + + +class GenericIpAddressIO(InetTypeIO): + @classmethod + def deserialize(cls, value, reader=None): + return generic_ip_deserializer(value) + +GraphSON2Deserializer._deserializers[GenericIpAddressIO.graphson_type] = GenericIpAddressIO +GraphSON3Deserializer._deserializers[GenericIpAddressIO.graphson_type] = GenericIpAddressIO + +if DSE_VERSION: + if DSE_VERSION >= Version('6.8.0'): + CREATE_CLASSIC_GRAPH = "system.graph(name).engine(Classic).create()" + else: + CREATE_CLASSIC_GRAPH = "system.graph(name).create()" + + +def reset_graph(session, graph_name): + ks = list(session.execute( + "SELECT * FROM system_schema.keyspaces WHERE keyspace_name = '{}';".format(graph_name))) + if ks: + try: + session.execute_graph('system.graph(name).drop()', {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + except: + pass + + session.execute_graph(CREATE_CLASSIC_GRAPH, {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + wait_for_graph_inserted(session, graph_name) + + +def wait_for_graph_inserted(session, graph_name): + count = 0 + exists = session.execute_graph('system.graph(name).exists()', {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0].value + while not exists and count < 50: + time.sleep(1) + exists = session.execute_graph('system.graph(name).exists()', {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0].value + return exists + + +class BasicGraphUnitTestCase(BasicKeyspaceUnitTestCase): + """ + This is basic graph unit test case that provides various utility methods that can be leveraged for testcase setup and tear + down + """ + + @property + def graph_name(self): + return self._testMethodName.lower() + + def session_setup(self): + lbp = DSELoadBalancingPolicy(default_lbp_factory()) + + ep_graphson2 = GraphExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_name=self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_2_0 + ), + row_factory=graph_graphson2_row_factory) + + ep_graphson3 = GraphExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_name=self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0 + ), + row_factory=graph_graphson3_row_factory) + + ep_graphson1 = GraphExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_name=self.graph_name + ) + ) + + ep_analytics = GraphAnalyticsExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_source=b'a', + graph_language=b'gremlin-groovy', + graph_name=self.graph_name + ) + ) + + self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, + execution_profiles={ + EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, + EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, + "graphson1": ep_graphson1, + "graphson2": ep_graphson2, + "graphson3": ep_graphson3 + }) + + self.session = self.cluster.connect() + self.ks_name = self._testMethodName.lower() + self.cass_version, self.cql_version = get_server_versions() + + def setUp(self): + self.session_setup() + self.reset_graph() + self.clear_schema() + # enable dev and scan modes + self.session.execute_graph(MAKE_NON_STRICT) + self.session.execute_graph(ALLOW_SCANS) + + def tearDown(self): + self.cluster.shutdown() + + def clear_schema(self): + self.session.execute_graph(""" + schema.clear(); + """) + + def reset_graph(self): + reset_graph(self.session, self.graph_name) + + def wait_for_graph_inserted(self): + wait_for_graph_inserted(self.session, self.graph_name) + + def _execute(self, query, graphson, params=None, execution_profile_options=None, **kwargs): + queries = query if isinstance(query, list) else [query] + ep = self.get_execution_profile(graphson) + if execution_profile_options: + ep = self.session.execution_profile_clone_update(ep, **execution_profile_options) + + results = [] + for query in queries: + log.debug(query) + rf = self.session.execute_graph_async(query, parameters=params, execution_profile=ep, **kwargs) + results.append(rf.result()) + self.assertEqual(rf.message.custom_payload['graph-results'], graphson) + + return results[0] if len(results) == 1 else results + + def get_execution_profile(self, graphson, traversal=False): + ep = 'graphson1' + if graphson == GraphProtocol.GRAPHSON_2_0: + ep = 'graphson2' + elif graphson == GraphProtocol.GRAPHSON_3_0: + ep = 'graphson3' + + return ep if traversal is False else 'traversal_' + ep + + def resultset_to_list(self, rs): + results_list = [] + for result in rs: + try: + results_list.append(result.value) + except: + results_list.append(result) + + return results_list + + +class GraphUnitTestCase(BasicKeyspaceUnitTestCase): + + @property + def graph_name(self): + return self._testMethodName.lower() + + def session_setup(self): + lbp = DSELoadBalancingPolicy(default_lbp_factory()) + + ep_graphson2 = GraphExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_name=self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_2_0 + ), + row_factory=graph_graphson2_row_factory) + + ep_graphson3 = GraphExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_name=self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0 + ), + row_factory=graph_graphson3_row_factory) + + ep_graphson1 = GraphExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_name=self.graph_name, + graph_language='gremlin-groovy' + ) + ) + + ep_analytics = GraphAnalyticsExecutionProfile( + request_timeout=60, + load_balancing_policy=lbp, + graph_options=GraphOptions( + graph_source=b'a', + graph_language=b'gremlin-groovy', + graph_name=self.graph_name + ) + ) + + self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, + execution_profiles={ + EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, + EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, + "graphson1": ep_graphson1, + "graphson2": ep_graphson2, + "graphson3": ep_graphson3 + }) + + self.session = self.cluster.connect() + self.ks_name = self._testMethodName.lower() + self.cass_version, self.cql_version = get_server_versions() + + def setUp(self): + """basic setup only""" + self.session_setup() + + def setup_graph(self, schema): + """Config dependant setup""" + schema.drop_graph(self.session, self.graph_name) + schema.create_graph(self.session, self.graph_name) + schema.clear(self.session) + if schema is ClassicGraphSchema: + # enable dev and scan modes + self.session.execute_graph(MAKE_NON_STRICT) + self.session.execute_graph(ALLOW_SCANS) + + def teardown_graph(self, schema): + schema.drop_graph(self.session, self.graph_name) + + def tearDown(self): + self.cluster.shutdown() + + def execute_graph_queries(self, queries, params=None, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT, + verify_graphson=False, **kwargs): + results = [] + for query in queries: + log.debug(query) + rf = self.session.execute_graph_async(query, parameters=params, + execution_profile=execution_profile, **kwargs) + if verify_graphson: + self.assertEqual(rf.message.custom_payload['graph-results'], verify_graphson) + results.append(rf.result()) + + return results + + def execute_graph(self, query, graphson, params=None, execution_profile_options=None, traversal=False, **kwargs): + queries = query if isinstance(query, list) else [query] + ep = self.get_execution_profile(graphson) + if traversal: + ep = 'traversal_' + ep + if execution_profile_options: + ep = self.session.execution_profile_clone_update(ep, **execution_profile_options) + + results = self.execute_graph_queries(queries, params, ep, verify_graphson=graphson, **kwargs) + + return results[0] if len(results) == 1 else results + + def get_execution_profile(self, graphson, traversal=False): + ep = 'graphson1' + if graphson == GraphProtocol.GRAPHSON_2_0: + ep = 'graphson2' + elif graphson == GraphProtocol.GRAPHSON_3_0: + ep = 'graphson3' + + return ep if traversal is False else 'traversal_' + ep + + def resultset_to_list(self, rs): + results_list = [] + for result in rs: + try: + results_list.append(result.value) + except: + results_list.append(result) + + return results_list + + +class BasicSharedGraphUnitTestCase(BasicKeyspaceUnitTestCase): + """ + This is basic graph unit test case that provides various utility methods that can be leveraged for testcase setup and tear + down + """ + + @classmethod + def session_setup(cls): + cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.session = cls.cluster.connect() + cls.ks_name = cls.__name__.lower() + cls.cass_version, cls.cql_version = get_server_versions() + cls.graph_name = cls.__name__.lower() + + @classmethod + def setUpClass(cls): + if DSE_VERSION: + cls.session_setup() + cls.reset_graph() + profiles = cls.cluster.profile_manager.profiles + profiles[EXEC_PROFILE_GRAPH_DEFAULT].request_timeout = 60 + profiles[EXEC_PROFILE_GRAPH_DEFAULT].graph_options.graph_name = cls.graph_name + profiles[EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT].request_timeout = 60 + profiles[EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT].graph_options.graph_name = cls.graph_name + + @classmethod + def tearDownClass(cls): + if DSE_VERSION: + cls.cluster.shutdown() + + @classmethod + def clear_schema(self): + self.session.execute_graph('schema.clear()') + + @classmethod + def reset_graph(self): + reset_graph(self.session, self.graph_name) + + def wait_for_graph_inserted(self): + wait_for_graph_inserted(self.session, self.graph_name) + + +class GraphFixtures(object): + + @staticmethod + def line(length, single_script=True): + raise NotImplementedError() + + @staticmethod + def classic(): + raise NotImplementedError() + + @staticmethod + def multiple_fields(): + raise NotImplementedError() + + @staticmethod + def large(): + raise NotImplementedError() + + +class ClassicGraphFixtures(GraphFixtures): + + @staticmethod + def datatypes(): + data = { + "point1": ["Point()", Point(.5, .13), GraphSON1Deserializer.deserialize_point], + "point2": ["Point()", Point(-5, .0), GraphSON1Deserializer.deserialize_point], + + "linestring1": ["Linestring()", LineString(((1.0, 2.0), (3.0, 4.0), (-89.0, 90.0))), + GraphSON1Deserializer.deserialize_linestring], + "polygon1": ["Polygon()", Polygon([(10.0, 10.0), (80.0, 10.0), (80., 88.0), (10., 89.0), (10., 10.0)], + [[(20., 20.0), (20., 30.0), (30., 30.0), (30., 20.0), (20., 20.0)], + [(40., 20.0), (40., 30.0), (50., 30.0), (50., 20.0), (40., 20.0)]]), + GraphSON1Deserializer.deserialize_polygon], + "int1": ["Int()", 2, GraphSON1Deserializer.deserialize_int], + "smallint1": ["Smallint()", 1, GraphSON1Deserializer.deserialize_smallint], + "bigint1": ["Bigint()", MAX_LONG, GraphSON1Deserializer.deserialize_bigint], + "bigint2": ["Bigint()", MIN_LONG, GraphSON1Deserializer.deserialize_bigint], + "bigint3": ["Bigint()", ZERO_LONG, GraphSON1Deserializer.deserialize_bigint], + "varint1": ["Varint()", 2147483647, GraphSON1Deserializer.deserialize_varint], + "int1": ["Int()", 100, GraphSON1Deserializer.deserialize_int], + "float1": ["Float()", 0.3415681, GraphSON1Deserializer.deserialize_float], + "double1": ["Double()", 0.34156811237335205, GraphSON1Deserializer.deserialize_double], + "uuid1": ["Uuid()", UUID('12345678123456781234567812345678'), GraphSON1Deserializer.deserialize_uuid], + "decimal1": ["Decimal()", Decimal(10), GraphSON1Deserializer.deserialize_decimal], + "blob1": ["Blob()", bytearray(b"Hello World"), GraphSON1Deserializer.deserialize_blob], + + "timestamp1": ["Timestamp()", datetime.datetime.utcnow().replace(microsecond=0), + GraphSON1Deserializer.deserialize_timestamp], + "timestamp2": ["Timestamp()", datetime.datetime.max.replace(microsecond=0), + GraphSON1Deserializer.deserialize_timestamp], + # These are valid values but are pending for DSP-14093 to be fixed + #"timestamp3": ["Timestamp()", datetime.datetime(159, 1, 1, 23, 59, 59), + # GraphSON1TypeDeserializer.deserialize_timestamp], + #"timestamp4": ["Timestamp()", datetime.datetime.min, + # GraphSON1TypeDeserializer.deserialize_timestamp], + "inet1": ["Inet()", ipaddress.IPv4Address(u"127.0.0.1"), deserializer_plus_to_ipaddressv4], + "inet2": ["Inet()", ipaddress.IPv6Address(u"2001:db8:85a3:8d3:1319:8a2e:370:7348"), + deserializer_plus_to_ipaddressv6], + "duration1": ["Duration()", datetime.timedelta(1, 16, 0), + GraphSON1Deserializer.deserialize_duration], + "duration2": ["Duration()", datetime.timedelta(days=1, seconds=16, milliseconds=15), + GraphSON1Deserializer.deserialize_duration] + } + + if six.PY2: + data["blob2"] = ["Blob()", buffer(b"Hello World"), GraphSON1Deserializer.deserialize_blob] + else: + data["blob3"] = ["Blob()", bytes(b"Hello World Again"), GraphSON1Deserializer.deserialize_blob] + data["blob4"] = ["Blob()", memoryview(b"And Again Hello World"), GraphSON1Deserializer.deserialize_blob] + + if DSE_VERSION >= Version("5.1"): + data["time1"] = ["Time()", datetime.time(12, 6, 12, 444), GraphSON1Deserializer.deserialize_time] + data["time2"] = ["Time()", datetime.time(12, 6, 12), GraphSON1Deserializer.deserialize_time] + data["time3"] = ["Time()", datetime.time(12, 6), GraphSON1Deserializer.deserialize_time] + data["time4"] = ["Time()", datetime.time.min, GraphSON1Deserializer.deserialize_time] + data["time5"] = ["Time()", datetime.time.max, GraphSON1Deserializer.deserialize_time] + data["blob5"] = ["Blob()", bytearray(b"AKDLIElksadlaswqA" * 10000), GraphSON1Deserializer.deserialize_blob] + data["datetime1"] = ["Date()", datetime.date.today(), GraphSON1Deserializer.deserialize_date] + data["datetime2"] = ["Date()", datetime.date(159, 1, 3), GraphSON1Deserializer.deserialize_date] + data["datetime3"] = ["Date()", datetime.date.min, GraphSON1Deserializer.deserialize_date] + data["datetime4"] = ["Date()", datetime.date.max, GraphSON1Deserializer.deserialize_date] + data["time1"] = ["Time()", datetime.time(12, 6, 12, 444), GraphSON1Deserializer.deserialize_time] + data["time2"] = ["Time()", datetime.time(12, 6, 12), GraphSON1Deserializer.deserialize_time] + data["time3"] = ["Time()", datetime.time(12, 6), GraphSON1Deserializer.deserialize_time] + data["time4"] = ["Time()", datetime.time.min, GraphSON1Deserializer.deserialize_time] + data["time5"] = ["Time()", datetime.time.max, GraphSON1Deserializer.deserialize_time] + + return data + + @staticmethod + def line(length, single_script=False): + queries = [ALLOW_SCANS + ';', + """schema.propertyKey('index').Int().ifNotExists().create(); + schema.propertyKey('distance').Int().ifNotExists().create(); + schema.vertexLabel('lp').properties('index').ifNotExists().create(); + schema.edgeLabel('goesTo').properties('distance').connection('lp', 'lp').ifNotExists().create();"""] + + vertex_script = ["Vertex vertex0 = graph.addVertex(label, 'lp', 'index', 0);"] + for index in range(1, length): + if not single_script and len(vertex_script) > 25: + queries.append("\n".join(vertex_script)) + vertex_script = [ + "Vertex vertex{pindex} = g.V().hasLabel('lp').has('index', {pindex}).next()".format( + pindex=index-1)] + + vertex_script.append(''' + Vertex vertex{vindex} = graph.addVertex(label, 'lp', 'index', {vindex}); + vertex{pindex}.addEdge('goesTo', vertex{vindex}, 'distance', 5); '''.format( + vindex=index, pindex=index - 1)) + + queries.append("\n".join(vertex_script)) + return queries + + @staticmethod + def classic(): + queries = [ALLOW_SCANS, + '''schema.propertyKey('name').Text().ifNotExists().create(); + schema.propertyKey('age').Int().ifNotExists().create(); + schema.propertyKey('lang').Text().ifNotExists().create(); + schema.propertyKey('weight').Float().ifNotExists().create(); + schema.vertexLabel('person').properties('name', 'age').ifNotExists().create(); + schema.vertexLabel('software').properties('name', 'lang').ifNotExists().create(); + schema.edgeLabel('created').properties('weight').connection('person', 'software').ifNotExists().create(); + schema.edgeLabel('created').connection('software', 'software').add(); + schema.edgeLabel('knows').properties('weight').connection('person', 'person').ifNotExists().create();''', + + '''Vertex marko = graph.addVertex(label, 'person', 'name', 'marko', 'age', 29); + Vertex vadas = graph.addVertex(label, 'person', 'name', 'vadas', 'age', 27); + Vertex lop = graph.addVertex(label, 'software', 'name', 'lop', 'lang', 'java'); + Vertex josh = graph.addVertex(label, 'person', 'name', 'josh', 'age', 32); + Vertex ripple = graph.addVertex(label, 'software', 'name', 'ripple', 'lang', 'java'); + Vertex peter = graph.addVertex(label, 'person', 'name', 'peter', 'age', 35); + Vertex carl = graph.addVertex(label, 'person', 'name', 'carl', 'age', 35); + marko.addEdge('knows', vadas, 'weight', 0.5f); + marko.addEdge('knows', josh, 'weight', 1.0f); + marko.addEdge('created', lop, 'weight', 0.4f); + josh.addEdge('created', ripple, 'weight', 1.0f); + josh.addEdge('created', lop, 'weight', 0.4f); + peter.addEdge('created', lop, 'weight', 0.2f);'''] + + return "\n".join(queries) + + @staticmethod + def multiple_fields(): + query_params = {} + queries= [ALLOW_SCANS, + '''schema.propertyKey('shortvalue').Smallint().ifNotExists().create(); + schema.vertexLabel('shortvertex').properties('shortvalue').ifNotExists().create(); + short s1 = 5000; graph.addVertex(label, "shortvertex", "shortvalue", s1); + schema.propertyKey('intvalue').Int().ifNotExists().create(); + schema.vertexLabel('intvertex').properties('intvalue').ifNotExists().create(); + int i1 = 1000000000; graph.addVertex(label, "intvertex", "intvalue", i1); + schema.propertyKey('intvalue2').Int().ifNotExists().create(); + schema.vertexLabel('intvertex2').properties('intvalue2').ifNotExists().create(); + Integer i2 = 100000000; graph.addVertex(label, "intvertex2", "intvalue2", i2); + schema.propertyKey('longvalue').Bigint().ifNotExists().create(); + schema.vertexLabel('longvertex').properties('longvalue').ifNotExists().create(); + long l1 = 9223372036854775807; graph.addVertex(label, "longvertex", "longvalue", l1); + schema.propertyKey('longvalue2').Bigint().ifNotExists().create(); + schema.vertexLabel('longvertex2').properties('longvalue2').ifNotExists().create(); + Long l2 = 100000000000000000L; graph.addVertex(label, "longvertex2", "longvalue2", l2); + schema.propertyKey('floatvalue').Float().ifNotExists().create(); + schema.vertexLabel('floatvertex').properties('floatvalue').ifNotExists().create(); + float f1 = 3.5f; graph.addVertex(label, "floatvertex", "floatvalue", f1); + schema.propertyKey('doublevalue').Double().ifNotExists().create(); + schema.vertexLabel('doublevertex').properties('doublevalue').ifNotExists().create(); + double d1 = 3.5e40; graph.addVertex(label, "doublevertex", "doublevalue", d1); + schema.propertyKey('doublevalue2').Double().ifNotExists().create(); + schema.vertexLabel('doublevertex2').properties('doublevalue2').ifNotExists().create(); + Double d2 = 3.5e40d; graph.addVertex(label, "doublevertex2", "doublevalue2", d2);'''] + + if DSE_VERSION >= Version('5.1'): + queries.append('''schema.propertyKey('datevalue1').Date().ifNotExists().create(); + schema.vertexLabel('datevertex1').properties('datevalue1').ifNotExists().create(); + schema.propertyKey('negdatevalue2').Date().ifNotExists().create(); + schema.vertexLabel('negdatevertex2').properties('negdatevalue2').ifNotExists().create();''') + + for i in range(1, 4): + queries.append('''schema.propertyKey('timevalue{0}').Time().ifNotExists().create(); + schema.vertexLabel('timevertex{0}').properties('timevalue{0}').ifNotExists().create();'''.format( + i)) + + queries.append('graph.addVertex(label, "datevertex1", "datevalue1", date1);') + query_params['date1'] = '1999-07-29' + + queries.append('graph.addVertex(label, "negdatevertex2", "negdatevalue2", date2);') + query_params['date2'] = '-1999-07-28' + + queries.append('graph.addVertex(label, "timevertex1", "timevalue1", time1);') + query_params['time1'] = '14:02' + queries.append('graph.addVertex(label, "timevertex2", "timevalue2", time2);') + query_params['time2'] = '14:02:20' + queries.append('graph.addVertex(label, "timevertex3", "timevalue3", time3);') + query_params['time3'] = '14:02:20.222' + + return queries, query_params + + @staticmethod + def large(): + query_parts = [''' + int size = 2000; + List ids = new ArrayList(); + schema.propertyKey('ts').Int().single().ifNotExists().create(); + schema.propertyKey('sin').Int().single().ifNotExists().create(); + schema.propertyKey('cos').Int().single().ifNotExists().create(); + schema.propertyKey('ii').Int().single().ifNotExists().create(); + schema.vertexLabel('lcg').properties('ts', 'sin', 'cos', 'ii').ifNotExists().create(); + schema.edgeLabel('linked').connection('lcg', 'lcg').ifNotExists().create(); + Vertex v = graph.addVertex(label, 'lcg'); + v.property("ts", 100001); + v.property("sin", 0); + v.property("cos", 1); + v.property("ii", 0); + ids.add(v.id()); + Random rand = new Random(); + for (int ii = 1; ii < size; ii++) { + v = graph.addVertex(label, 'lcg'); + v.property("ii", ii); + v.property("ts", 100001 + ii); + v.property("sin", Math.sin(ii/5.0)); + v.property("cos", Math.cos(ii/5.0)); + Vertex u = g.V(ids.get(rand.nextInt(ids.size()))).next(); + v.addEdge("linked", u); + ids.add(v.id()); + } + g.V().count();'''] + + return "\n".join(query_parts) + + @staticmethod + def address_book(): + p1 = "Point()" + p2 = "Point()" + if DSE_VERSION >= Version('5.1'): + p1 = "Point().withBounds(-100, -100, 100, 100)" + p2 = "Point().withGeoBounds()" + + queries = [ + ALLOW_SCANS, + "schema.propertyKey('name').Text().ifNotExists().create()", + "schema.propertyKey('pointPropWithBoundsWithSearchIndex').{}.ifNotExists().create()".format(p1), + "schema.propertyKey('pointPropWithBounds').{}.ifNotExists().create()".format(p1), + "schema.propertyKey('pointPropWithGeoBoundsWithSearchIndex').{}.ifNotExists().create()".format(p2), + "schema.propertyKey('pointPropWithGeoBounds').{}.ifNotExists().create()".format(p2), + "schema.propertyKey('city').Text().ifNotExists().create()", + "schema.propertyKey('state').Text().ifNotExists().create()", + "schema.propertyKey('description').Text().ifNotExists().create()", + "schema.vertexLabel('person').properties('name', 'city', 'state', 'description', 'pointPropWithBoundsWithSearchIndex', 'pointPropWithBounds', 'pointPropWithGeoBoundsWithSearchIndex', 'pointPropWithGeoBounds').ifNotExists().create()", + "schema.vertexLabel('person').index('searchPointWithBounds').secondary().by('pointPropWithBounds').ifNotExists().add()", + "schema.vertexLabel('person').index('searchPointWithGeoBounds').secondary().by('pointPropWithGeoBounds').ifNotExists().add()", + + "g.addV('person').property('name', 'Paul Thomas Joe').property('city', 'Rochester').property('state', 'MN').property('pointPropWithBoundsWithSearchIndex', Geo.point(-92.46295, 44.0234)).property('pointPropWithBounds', Geo.point(-92.46295, 44.0234)).property('pointPropWithGeoBoundsWithSearchIndex', Geo.point(-92.46295, 44.0234)).property('pointPropWithGeoBounds', Geo.point(-92.46295, 44.0234)).property('description', 'Lives by the hospital').next()", + "g.addV('person').property('name', 'George Bill Steve').property('city', 'Minneapolis').property('state', 'MN').property('pointPropWithBoundsWithSearchIndex', Geo.point(-93.266667, 44.093333)).property('pointPropWithBounds', Geo.point(-93.266667, 44.093333)).property('pointPropWithGeoBoundsWithSearchIndex', Geo.point(-93.266667, 44.093333)).property('pointPropWithGeoBounds', Geo.point(-93.266667, 44.093333)).property('description', 'A cold dude').next()", + "g.addV('person').property('name', 'James Paul Smith').property('city', 'Chicago').property('state', 'IL').property('pointPropWithBoundsWithSearchIndex', Geo.point(-87.684722, 41.836944)).property('description', 'Likes to hang out').next()", + "g.addV('person').property('name', 'Jill Alice').property('city', 'Atlanta').property('state', 'GA').property('pointPropWithBoundsWithSearchIndex', Geo.point(-84.39, 33.755)).property('description', 'Enjoys a nice cold coca cola').next()" + ] + + if not Version('5.0') <= DSE_VERSION < Version('5.1'): + queries.append("schema.vertexLabel('person').index('search').search().by('pointPropWithBoundsWithSearchIndex').withError(0.00001, 0.0).by('pointPropWithGeoBoundsWithSearchIndex').withError(0.00001, 0.0).ifNotExists().add()") + + return "\n".join(queries) + + +class CoreGraphFixtures(GraphFixtures): + + @staticmethod + def datatypes(): + data = ClassicGraphFixtures.datatypes() + del data['duration1'] + del data['duration2'] + + # Core Graphs only types + data["map1"] = ["mapOf(Text, Text)", {'test': 'test'}, None] + data["map2"] = ["mapOf(Text, Point)", {'test': Point(.5, .13)}, None] + data["map3"] = ["frozen(mapOf(Int, Varchar))", {42: 'test'}, None] + + data["list1"] = ["listOf(Text)", ['test', 'hello', 'world'], None] + data["list2"] = ["listOf(Int)", [42, 632, 32], None] + data["list3"] = ["listOf(Point)", [Point(.5, .13), Point(42.5, .13)], None] + data["list4"] = ["frozen(listOf(Int))", [42, 55, 33], None] + + data["set1"] = ["setOf(Text)", {'test', 'hello', 'world'}, None] + data["set2"] = ["setOf(Int)", {42, 632, 32}, None] + data["set3"] = ["setOf(Point)", {Point(.5, .13), Point(42.5, .13)}, None] + data["set4"] = ["frozen(setOf(Int))", {42, 55, 33}, None] + + data["tuple1"] = ["tupleOf(Int, Text)", (42, "world"), None] + data["tuple2"] = ["tupleOf(Int, tupleOf(Text, tupleOf(Text, Point)))", (42, ("world", ('this', Point(.5, .13)))), None] + data["tuple3"] = ["tupleOf(Int, tupleOf(Text, frozen(mapOf(Text, Text))))", (42, ("world", {'test': 'test'})), None] + data["tuple4"] = ["tupleOf(Int, tupleOf(Text, frozen(listOf(Int))))", (42, ("world", [65, 89])), None] + data["tuple5"] = ["tupleOf(Int, tupleOf(Text, frozen(setOf(Int))))", (42, ("world", {65, 55})), None] + data["tuple6"] = ["tupleOf(Int, tupleOf(Text, tupleOf(Text, LineString)))", + (42, ("world", ('this', LineString(((1.0, 2.0), (3.0, 4.0), (-89.0, 90.0)))))), None] + + data["tuple7"] = ["tupleOf(Int, tupleOf(Text, tupleOf(Text, Polygon)))", + (42, ("world", ('this', Polygon([(10.0, 10.0), (80.0, 10.0), (80., 88.0), (10., 89.0), (10., 10.0)], + [[(20., 20.0), (20., 30.0), (30., 30.0), (30., 20.0), (20., 20.0)], + [(40., 20.0), (40., 30.0), (50., 30.0), (50., 20.0), (40., 20.0)]])))), None] + data["dse_duration1"] = ["Duration()", Duration(42, 12, 10303312), None] + data["dse_duration2"] = ["Duration()", Duration(50, 32, 11), None] + + return data + + @staticmethod + def line(length, single_script=False): + queries = [""" + schema.vertexLabel('lp').ifNotExists().partitionBy('index', Int).create(); + schema.edgeLabel('goesTo').ifNotExists().from('lp').to('lp').property('distance', Int).create(); + """] + + vertex_script = ["g.addV('lp').property('index', 0).next();"] + for index in range(1, length): + if not single_script and len(vertex_script) > 25: + queries.append("\n".join(vertex_script)) + vertex_script = [] + + vertex_script.append(''' + g.addV('lp').property('index', {index}).next(); + g.V().hasLabel('lp').has('index', {pindex}).as('pp').V().hasLabel('lp').has('index', {index}).as('p'). + addE('goesTo').from('pp').to('p').property('distance', 5).next(); + '''.format( + index=index, pindex=index - 1)) + + queries.append("\n".join(vertex_script)) + return queries + + @staticmethod + def classic(): + queries = [ + ''' + schema.vertexLabel('person').ifNotExists().partitionBy('name', Text).property('age', Int).create(); + schema.vertexLabel('software')ifNotExists().partitionBy('name', Text).property('lang', Text).create(); + schema.edgeLabel('created').ifNotExists().from('person').to('software').property('weight', Double).create(); + schema.edgeLabel('knows').ifNotExists().from('person').to('person').property('weight', Double).create(); + ''', + + ''' + Vertex marko = g.addV('person').property('name', 'marko').property('age', 29).next(); + Vertex vadas = g.addV('person').property('name', 'vadas').property('age', 27).next(); + Vertex lop = g.addV('software').property('name', 'lop').property('lang', 'java').next(); + Vertex josh = g.addV('person').property('name', 'josh').property('age', 32).next(); + Vertex peter = g.addV('person').property('name', 'peter').property('age', 35).next(); + Vertex carl = g.addV('person').property('name', 'carl').property('age', 35).next(); + Vertex ripple = g.addV('software').property('name', 'ripple').property('lang', 'java').next(); + + // TODO, switch to VertexReference and use v.id() + g.V().hasLabel('person').has('name', 'vadas').as('v').V().hasLabel('person').has('name', 'marko').as('m').addE('knows').from('m').to('v').property('weight', 0.5d).next(); + g.V().hasLabel('person').has('name', 'josh').as('j').V().hasLabel('person').has('name', 'marko').as('m').addE('knows').from('m').to('j').property('weight', 1.0d).next(); + g.V().hasLabel('software').has('name', 'lop').as('l').V().hasLabel('person').has('name', 'marko').as('m').addE('created').from('m').to('l').property('weight', 0.4d).next(); + g.V().hasLabel('software').has('name', 'ripple').as('r').V().hasLabel('person').has('name', 'josh').as('j').addE('created').from('j').to('r').property('weight', 1.0d).next(); + g.V().hasLabel('software').has('name', 'lop').as('l').V().hasLabel('person').has('name', 'josh').as('j').addE('created').from('j').to('l').property('weight', 0.4d).next(); + g.V().hasLabel('software').has('name', 'lop').as('l').V().hasLabel('person').has('name', 'peter').as('p').addE('created').from('p').to('l').property('weight', 0.2d).next(); + + '''] + + return queries + + @staticmethod + def multiple_fields(): + ## no generic test currently needs this + raise NotImplementedError() + + @staticmethod + def large(): + query_parts = [ + ''' + schema.vertexLabel('lcg').ifNotExists().partitionBy('ts', Int).property('sin', Double). + property('cos', Double).property('ii', Int).create(); + schema.edgeLabel('linked').ifNotExists().from('lcg').to('lcg').create(); + ''', + + ''' + int size = 2000; + List ids = new ArrayList(); + v = g.addV('lcg').property('ts', 100001).property('sin', 0d).property('cos', 1d).property('ii', 0).next(); + ids.add(v.id()); + Random rand = new Random(); + for (int ii = 1; ii < size; ii++) { + v = g.addV('lcg').property('ts', 100001 + ii).property('sin', Math.sin(ii/5.0)).property('cos', Math.cos(ii/5.0)).property('ii', ii).next(); + + uid = ids.get(rand.nextInt(ids.size())) + g.V(v.id()).as('v').V(uid).as('u').addE('linked').from('v').to('u').next(); + ids.add(v.id()); + } + g.V().count();''' + ] + + return query_parts + + @staticmethod + def address_book(): + queries = [ + "schema.vertexLabel('person').ifNotExists().partitionBy('name', Text)." + "property('pointPropWithBoundsWithSearchIndex', Point)." + "property('pointPropWithBounds', Point)." + "property('pointPropWithGeoBoundsWithSearchIndex', Point)." + "property('pointPropWithGeoBounds', Point)." + "property('city', Text)." + "property('state', Text)." + "property('description', Text).create()", + "schema.vertexLabel('person').searchIndex().by('name').by('pointPropWithBounds').by('pointPropWithGeoBounds').by('description').asText().create()", + "g.addV('person').property('name', 'Paul Thomas Joe').property('city', 'Rochester').property('state', 'MN').property('pointPropWithBoundsWithSearchIndex', Geo.point(-92.46295, 44.0234)).property('pointPropWithBounds', Geo.point(-92.46295, 44.0234)).property('pointPropWithGeoBoundsWithSearchIndex', Geo.point(-92.46295, 44.0234)).property('pointPropWithGeoBounds', Geo.point(-92.46295, 44.0234)).property('description', 'Lives by the hospital').next()", + "g.addV('person').property('name', 'George Bill Steve').property('city', 'Minneapolis').property('state', 'MN').property('pointPropWithBoundsWithSearchIndex', Geo.point(-93.266667, 44.093333)).property('pointPropWithBounds', Geo.point(-93.266667, 44.093333)).property('pointPropWithGeoBoundsWithSearchIndex', Geo.point(-93.266667, 44.093333)).property('pointPropWithGeoBounds', Geo.point(-93.266667, 44.093333)).property('description', 'A cold dude').next()", + "g.addV('person').property('name', 'James Paul Smith').property('city', 'Chicago').property('state', 'IL').property('pointPropWithBoundsWithSearchIndex', Geo.point(-87.684722, 41.836944)).property('description', 'Likes to hang out').next()", + "g.addV('person').property('name', 'Jill Alice').property('city', 'Atlanta').property('state', 'GA').property('pointPropWithBoundsWithSearchIndex', Geo.point(-84.39, 33.755)).property('description', 'Enjoys a nice cold coca cola').next()" + ] + + if not Version('5.0') <= DSE_VERSION < Version('5.1'): + queries.append("schema.vertexLabel('person').searchIndex().by('pointPropWithBoundsWithSearchIndex').by('pointPropWithGeoBounds')" + ".by('pointPropWithGeoBoundsWithSearchIndex').create()") + + return queries + + +def validate_classic_vertex(test, vertex): + vertex_props = vertex.properties.keys() + test.assertEqual(len(vertex_props), 2) + test.assertIn('name', vertex_props) + test.assertTrue('lang' in vertex_props or 'age' in vertex_props) + + +def validate_classic_vertex_return_type(test, vertex): + validate_generic_vertex_result_type(vertex) + vertex_props = vertex.properties + test.assertIn('name', vertex_props) + test.assertTrue('lang' in vertex_props or 'age' in vertex_props) + + +def validate_generic_vertex_result_type(test, vertex): + test.assertIsInstance(vertex, Vertex) + for attr in ('id', 'type', 'label', 'properties'): + test.assertIsNotNone(getattr(vertex, attr)) + + +def validate_classic_edge_properties(test, edge_properties): + test.assertEqual(len(edge_properties.keys()), 1) + test.assertIn('weight', edge_properties) + test.assertIsInstance(edge_properties, dict) + + +def validate_classic_edge(test, edge): + validate_generic_edge_result_type(test, edge) + validate_classic_edge_properties(test, edge.properties) + + +def validate_line_edge(test, edge): + validate_generic_edge_result_type(test, edge) + edge_props = edge.properties + test.assertEqual(len(edge_props.keys()), 1) + test.assertIn('distance', edge_props) + + +def validate_generic_edge_result_type(test, edge): + test.assertIsInstance(edge, Edge) + for attr in ('properties', 'outV', 'outVLabel', 'inV', 'inVLabel', 'label', 'type', 'id'): + test.assertIsNotNone(getattr(edge, attr)) + + +def validate_path_result_type(test, path): + test.assertIsInstance(path, Path) + test.assertIsNotNone(path.labels) + for obj in path.objects: + if isinstance(obj, Edge): + validate_classic_edge(test, obj) + elif isinstance(obj, Vertex): + validate_classic_vertex(test, obj) + else: + test.fail("Invalid object found in path " + str(object.type)) + + +class GraphTestConfiguration(object): + """Possible Configurations: + ClassicGraphSchema: + graphson1 + graphson2 + graphson3 + + CoreGraphSchema + graphson3 + """ + + @classmethod + def schemas(cls): + schemas = [ClassicGraphSchema] + if DSE_VERSION >= Version("6.8"): + schemas.append(CoreGraphSchema) + return schemas + + @classmethod + def graphson_versions(cls): + graphson_versions = [GraphProtocol.GRAPHSON_1_0] + if DSE_VERSION >= Version("6.0"): + graphson_versions.append(GraphProtocol.GRAPHSON_2_0) + if DSE_VERSION >= Version("6.8"): + graphson_versions.append(GraphProtocol.GRAPHSON_3_0) + return graphson_versions + + @classmethod + def schema_configurations(cls, schema=None): + schemas = cls.schemas() if schema is None else [schema] + configurations = [] + for s in schemas: + configurations.append(s) + + return configurations + + @classmethod + def configurations(cls, schema=None, graphson=None): + schemas = cls.schemas() if schema is None else [schema] + graphson_versions = cls.graphson_versions() if graphson is None else [graphson] + + configurations = [] + for s in schemas: + for g in graphson_versions: + if s is CoreGraphSchema and g != GraphProtocol.GRAPHSON_3_0: + continue + configurations.append((s, g)) + + return configurations + + @staticmethod + def _make_graph_schema_test_method(func, schema): + def test_input(self): + self.setup_graph(schema) + try: + func(self, schema) + except: + raise + finally: + self.teardown_graph(schema) + + schema_name = 'classic' if schema is ClassicGraphSchema else 'core' + test_input.__name__ = '{func}_{schema}'.format( + func=func.__name__.lstrip('_'), schema=schema_name) + return test_input + + @staticmethod + def _make_graph_test_method(func, schema, graphson): + def test_input(self): + self.setup_graph(schema) + try: + func(self, schema, graphson) + except: + raise + finally: + self.teardown_graph(schema) + + graphson_name = 'graphson1' + if graphson == GraphProtocol.GRAPHSON_2_0: + graphson_name = 'graphson2' + elif graphson == GraphProtocol.GRAPHSON_3_0: + graphson_name = 'graphson3' + + schema_name = 'classic' if schema is ClassicGraphSchema else 'core' + + # avoid keyspace name too long issue + if DSE_VERSION < Version('6.7'): + schema_name = schema_name[0] + graphson_name = 'g' + graphson_name[-1] + + test_input.__name__ = '{func}_{schema}_{graphson}'.format( + func=func.__name__.lstrip('_'), schema=schema_name, graphson=graphson_name) + return test_input + + @classmethod + def generate_tests(cls, schema=None, graphson=None, traversal=False): + """Generate tests for a graph configuration""" + def decorator(klass): + if DSE_VERSION: + predicate = inspect.ismethod if six.PY2 else inspect.isfunction + for name, func in inspect.getmembers(klass, predicate=predicate): + if not name.startswith('_test'): + continue + for _schema, _graphson in cls.configurations(schema, graphson): + if traversal and _graphson == GraphProtocol.GRAPHSON_1_0: + continue + test_input = cls._make_graph_test_method(func, _schema, _graphson) + log.debug("Generated test '{}.{}'".format(klass.__name__, test_input.__name__)) + setattr(klass, test_input.__name__, test_input) + return klass + + return decorator + + @classmethod + def generate_schema_tests(cls, schema=None): + """Generate schema tests for a graph configuration""" + def decorator(klass): + if DSE_VERSION: + predicate = inspect.ismethod if six.PY2 else inspect.isfunction + for name, func in inspect.getmembers(klass, predicate=predicate): + if not name.startswith('_test'): + continue + for _schema in cls.schema_configurations(schema): + test_input = cls._make_graph_schema_test_method(func, _schema) + log.debug("Generated test '{}.{}'".format(klass.__name__, test_input.__name__)) + setattr(klass, test_input.__name__, test_input) + return klass + + return decorator + + +class VertexLabel(object): + """ + Helper that represents a new VertexLabel: + + VertexLabel(['Int()', 'Float()']) # a vertex with 2 properties named property1 and property2 + VertexLabel([('int1', 'Int()'), 'Float()']) # a vertex with 2 properties named int1 and property1 + """ + + id = 0 + label = None + properties = None + + def __init__(self, properties): + VertexLabel.id += 1 + self.id = VertexLabel.id + self.label = "vertex{}".format(self.id) + self.properties = {'pkid': self.id} + property_count = 0 + for p in properties: + if isinstance(p, tuple): + name, typ = p + else: + property_count += 1 + name = "property-v{}-{}".format(self.id, property_count) + typ = p + self.properties[name] = typ + + @property + def non_pk_properties(self): + return {p: v for p, v in six.iteritems(self.properties) if p != 'pkid'} + + +class GraphSchema(object): + + has_geo_bounds = DSE_VERSION and DSE_VERSION >= Version('5.1') + fixtures = GraphFixtures + + @classmethod + def sanitize_type(cls, typ): + if typ.lower().startswith("point"): + return cls.sanitize_point_type() + elif typ.lower().startswith("line"): + return cls.sanitize_line_type() + elif typ.lower().startswith("poly"): + return cls.sanitize_polygon_type() + else: + return typ + + @classmethod + def sanitize_point_type(cls): + return "Point().withGeoBounds()" if cls.has_geo_bounds else "Point()" + + @classmethod + def sanitize_line_type(cls): + return "Linestring().withGeoBounds()" if cls.has_geo_bounds else "Linestring()" + + @classmethod + def sanitize_polygon_type(cls): + return "Polygon().withGeoBounds()" if cls.has_geo_bounds else "Polygon()" + + @staticmethod + def drop_graph(session, graph_name): + ks = list(session.execute( + "SELECT * FROM system_schema.keyspaces WHERE keyspace_name = '{}';".format(graph_name))) + if not ks: + return + + try: + session.execute_graph('system.graph(name).drop()', {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + except: + pass + + @staticmethod + def create_graph(session, graph_name): + raise NotImplementedError() + + @staticmethod + def clear(session): + pass + + @staticmethod + def create_vertex_label(session, vertex_label, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + raise NotImplementedError() + + @staticmethod + def add_vertex(session, vertex_label, name, value, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + raise NotImplementedError() + + @classmethod + def ensure_properties(cls, session, obj, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + if not isinstance(obj, (Vertex, Edge)): + return + + # This pre-processing is due to a change in TinkerPop + # properties are not returned automatically anymore + # with some queries. + if not obj.properties: + if isinstance(obj, Edge): + obj.properties = {} + for p in cls.get_edge_properties(session, obj, execution_profile=execution_profile): + obj.properties.update(p) + elif isinstance(obj, Vertex): + obj.properties = { + p.label: p + for p in cls.get_vertex_properties(session, obj, execution_profile=execution_profile) + } + + @staticmethod + def get_vertex_properties(session, vertex, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + return session.execute_graph("g.V(vertex_id).properties().toList()", {'vertex_id': vertex.id}, + execution_profile=execution_profile) + + @staticmethod + def get_edge_properties(session, edge, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + v = session.execute_graph("g.E(edge_id).properties().toList()", {'edge_id': edge.id}, + execution_profile=execution_profile) + return v + + +class ClassicGraphSchema(GraphSchema): + + fixtures = ClassicGraphFixtures + + @staticmethod + def create_graph(session, graph_name): + session.execute_graph(CREATE_CLASSIC_GRAPH, {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + wait_for_graph_inserted(session, graph_name) + + @staticmethod + def clear(session): + session.execute_graph('schema.clear()') + + @classmethod + def create_vertex_label(cls, session, vertex_label, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + statements = ["schema.propertyKey('pkid').Int().ifNotExists().create();"] + for k, v in six.iteritems(vertex_label.non_pk_properties): + typ = cls.sanitize_type(v) + statements.append("schema.propertyKey('{name}').{type}.create();".format( + name=k, type=typ + )) + + statements.append("schema.vertexLabel('{label}').partitionKey('pkid').properties(".format( + label=vertex_label.label)) + property_names = [name for name in six.iterkeys(vertex_label.non_pk_properties)] + statements.append(", ".join(["'{}'".format(p) for p in property_names])) + statements.append(").create();") + + to_run = "\n".join(statements) + session.execute_graph(to_run, execution_profile=execution_profile) + + @staticmethod + def add_vertex(session, vertex_label, name, value, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + statement = "g.addV('{label}').property('pkid', {pkid}).property('{property_name}', val);".format( + pkid=vertex_label.id, label=vertex_label.label, property_name=name) + parameters = {'val': value} + return session.execute_graph(statement, parameters, execution_profile=execution_profile) + + +class CoreGraphSchema(GraphSchema): + + fixtures = CoreGraphFixtures + + @classmethod + def sanitize_type(cls, typ): + typ = super(CoreGraphSchema, cls).sanitize_type(typ) + return typ.replace('()', '') + + @classmethod + def sanitize_point_type(cls): + return "Point" + + @classmethod + def sanitize_line_type(cls): + return "LineString" + + @classmethod + def sanitize_polygon_type(cls): + return "Polygon" + + @staticmethod + def create_graph(session, graph_name): + session.execute_graph('system.graph(name).create()', {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + wait_for_graph_inserted(session, graph_name) + + @classmethod + def create_vertex_label(cls, session, vertex_label, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + statements = ["schema.vertexLabel('{label}').partitionBy('pkid', Int)".format( + label=vertex_label.label)] + + for name, typ in six.iteritems(vertex_label.non_pk_properties): + typ = cls.sanitize_type(typ) + statements.append(".property('{name}', {type})".format(name=name, type=typ)) + statements.append(".create();") + + to_run = "\n".join(statements) + session.execute_graph(to_run, execution_profile=execution_profile) + + @staticmethod + def add_vertex(session, vertex_label, name, value, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): + statement = "g.addV('{label}').property('pkid', {pkid}).property('{property_name}', val);".format( + pkid=vertex_label.id, label=vertex_label.label, property_name=name) + parameters = {'val': value} + return session.execute_graph(statement, parameters, execution_profile=execution_profile) diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index 623e275352..174c15277b 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -12,49 +12,65 @@ # See the License for the specific language governing permissions and # limitations under the License. - import sys +import datetime +import six +import time +from collections import namedtuple +from packaging.version import Version + +from cassandra import cluster +from cassandra.cluster import ContinuousPagingOptions from cassandra.datastax.graph.fluent import DseGraph +from cassandra.graph import Vertex, Edge, VertexProperty, GraphProtocol +from cassandra.util import Point, Polygon, LineString + from gremlin_python.process.graph_traversal import GraphTraversal, GraphTraversalSource from gremlin_python.process.traversal import P -from tests.integration import DSE_VERSION, requiredse, greaterthanorequaldse60 -from tests.integration.advanced import BasicGraphUnitTestCase, use_single_node_with_graph_and_solr, \ - use_single_node_with_graph, generate_classic, generate_line_graph, generate_multi_field_graph, \ - generate_large_complex_graph, generate_type_graph_schema, validate_classic_vertex, validate_classic_edge, \ - validate_generic_vertex_result_type, validate_classic_edge_properties, validate_line_edge, \ - validate_generic_edge_result_type, validate_path_result_type, TYPE_MAP +from gremlin_python.structure.graph import Edge as TravEdge +from gremlin_python.structure.graph import Vertex as TravVertex, VertexProperty as TravVertexProperty +from tests.integration import DSE_VERSION, greaterthanorequaldse68 +from tests.integration.advanced.graph import GraphUnitTestCase, \ + ClassicGraphSchema, CoreGraphSchema, \ + validate_classic_vertex, validate_classic_edge, validate_generic_vertex_result_type,\ + validate_classic_edge_properties, validate_line_edge, \ + validate_generic_edge_result_type, validate_path_result_type, VertexLabel, \ + GraphTestConfiguration +from tests.integration import greaterthanorequaldse60, requiredse -from gremlin_python.structure.graph import Edge as TravEdge -from gremlin_python.structure.graph import Vertex as TravVertex -from cassandra.graph import Vertex, Edge -from cassandra.util import Point, Polygon, LineString -import datetime -from six import string_types -import six -if six.PY3: - import ipaddress +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa -def setup_module(): - if DSE_VERSION: - dse_options = {'graph': {'realtime_evaluation_timeout_in_seconds': 60}} - use_single_node_with_graph(dse_options=dse_options) +import ipaddress def check_equality_base(testcase, original, read_value): if isinstance(original, float): testcase.assertAlmostEqual(original, read_value, delta=.01) - elif six.PY3 and isinstance(original, ipaddress.IPv4Address): + elif isinstance(original, ipaddress.IPv4Address): testcase.assertAlmostEqual(original, ipaddress.IPv4Address(read_value)) - elif six.PY3 and isinstance(original, ipaddress.IPv6Address): + elif isinstance(original, ipaddress.IPv6Address): testcase.assertAlmostEqual(original, ipaddress.IPv6Address(read_value)) else: testcase.assertEqual(original, read_value) -class AbstractTraversalTest(): +class _AbstractTraversalTest(GraphUnitTestCase): + + def setUp(self): + super(_AbstractTraversalTest, self).setUp() + self.ep_graphson2 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_2_0) + self.ep_graphson3 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0) - def test_basic_query(self): + self.cluster.add_execution_profile('traversal_graphson2', self.ep_graphson2) + self.cluster.add_execution_profile('traversal_graphson3', self.ep_graphson3) + + def _test_basic_query(self, schema, graphson): """ Test to validate that basic graph queries works @@ -70,16 +86,15 @@ def test_basic_query(self): @test_category dse graph """ - - g = self.fetch_traversal_source() - generate_classic(self.session) - traversal =g.V().has('name', 'marko').out('knows').values('name') - results_list = self.execute_traversal(traversal) + g = self.fetch_traversal_source(graphson) + self.execute_graph(schema.fixtures.classic(), graphson) + traversal = g.V().has('name', 'marko').out('knows').values('name') + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) self.assertIn('vadas', results_list) self.assertIn('josh', results_list) - def test_classic_graph(self): + def _test_classic_graph(self, schema, graphson): """ Test to validate that basic graph generation, and vertex and edges are surfaced correctly @@ -95,38 +110,40 @@ def test_classic_graph(self): @test_category dse graph """ - generate_classic(self.session) - g = self.fetch_traversal_source() - traversal = g.V() - vert_list = self.execute_traversal(traversal) + self.execute_graph(schema.fixtures.classic(), graphson) + ep = self.get_execution_profile(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V() + vert_list = self.execute_traversal(traversal, graphson) for vertex in vert_list: + schema.ensure_properties(self.session, vertex, execution_profile=ep) self._validate_classic_vertex(g, vertex) - traversal = g.E() - edge_list = self.execute_traversal(traversal) + traversal = g.E() + edge_list = self.execute_traversal(traversal, graphson) for edge in edge_list: + schema.ensure_properties(self.session, edge, execution_profile=ep) self._validate_classic_edge(g, edge) - def test_graph_classic_path(self): + def _test_graph_classic_path(self, schema, graphson): """ Test to validate that the path version of the result type is generated correctly. It also tests basic path results as that is not covered elsewhere @since 1.0.0 @jira_ticket PYTHON-641 - @expected_result path object should be unpacked correctly including all nested edges and verticies + @expected_result path object should be unpacked correctly including all nested edges and vertices @test_category dse graph """ - generate_classic(self.session) - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.classic(), graphson) + g = self.fetch_traversal_source(graphson) traversal = g.V().hasLabel('person').has('name', 'marko').as_('a').outE('knows').inV().as_('c', 'd').outE('created').as_('e', 'f', 'g').inV().path() - path_list = self.execute_traversal(traversal) + path_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(path_list), 2) for path in path_list: self._validate_path_result_type(g, path) - - def test_range_query(self): + def _test_range_query(self, schema, graphson): """ Test to validate range queries are handled correctly. @@ -141,18 +158,18 @@ def test_range_query(self): @test_category dse graph """ + self.execute_graph(schema.fixtures.line(150), graphson) + ep = self.get_execution_profile(graphson) + g = self.fetch_traversal_source(graphson) - query_to_run = generate_line_graph(150) - self.session.execute_graph(query_to_run) - g = self.fetch_traversal_source() - - traversal = g.E().range(0,10) - edges = self.execute_traversal(traversal) + traversal = g.E().range(0, 10) + edges = self.execute_traversal(traversal, graphson) self.assertEqual(len(edges), 10) for edge in edges: + schema.ensure_properties(self.session, edge, execution_profile=ep) self._validate_line_edge(g, edge) - def test_result_types(self): + def _test_result_types(self, schema, graphson): """ Test to validate that the edge and vertex version of results are constructed correctly. @@ -161,14 +178,14 @@ def test_result_types(self): @expected_result edge/vertex result types should be unpacked correctly. @test_category dse graph """ - generate_multi_field_graph(self.session) # TODO: we could just make a single vertex with properties of all types, or even a simple query that just uses a sequence of groovy expressions - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.line(150), graphson) + g = self.fetch_traversal_source(graphson) traversal = g.V() - vertices = self.execute_traversal(traversal) + vertices = self.execute_traversal(traversal, graphson) for vertex in vertices: self._validate_type(g, vertex) - def test_large_result_set(self): + def _test_large_result_set(self, schema, graphson): """ Test to validate that large result sets return correctly. @@ -180,14 +197,14 @@ def test_large_result_set(self): @test_category dse graph """ - generate_large_complex_graph(self.session, 5000) - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.large(), graphson) + g = self.fetch_traversal_source(graphson) traversal = g.V() - vertices = self.execute_traversal(traversal) + vertices = self.execute_traversal(traversal, graphson) for vertex in vertices: - self._validate_generic_vertex_result_type(g,vertex) + self._validate_generic_vertex_result_type(g, vertex) - def test_vertex_meta_properties(self): + def _test_vertex_meta_properties(self, schema, graphson): """ Test verifying vertex property properties @@ -196,6 +213,9 @@ def test_vertex_meta_properties(self): @test_category dse graph """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') + s = self.session s.execute_graph("schema.propertyKey('k0').Text().ifNotExists().create();") s.execute_graph("schema.propertyKey('k1').Text().ifNotExists().create();") @@ -206,15 +226,15 @@ def test_vertex_meta_properties(self): v.property('key', 'meta_prop', 'k0', 'v0', 'k1', 'v1') v''')[0] - g = self.fetch_traversal_source() + g = self.fetch_traversal_source(graphson) traversal = g.V() # This should contain key, and value where value is a property # This should be a vertex property and should contain sub properties - results = self.execute_traversal(traversal) + results = self.execute_traversal(traversal, graphson) self._validate_meta_property(g, results[0]) - def test_vertex_multiple_properties(self): + def _test_vertex_multiple_properties(self, schema, graphson): """ Test verifying vertex property form for various Cardinality @@ -229,6 +249,9 @@ def test_vertex_multiple_properties(self): @test_category dse graph """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') + s = self.session s.execute_graph('''Schema schema = graph.schema(); schema.propertyKey('mult_key').Text().multiple().ifNotExists().create(); @@ -243,10 +266,10 @@ def test_vertex_multiple_properties(self): mpw2v = s.execute_graph('''g.addV('MPW2').property('mult_key', 'value0').property('mult_key', 'value1')''')[0] - g = self.fetch_traversal_source() + g = self.fetch_traversal_source(graphson) traversal = g.V(mpw1v.id).properties() - vertex_props = self.execute_traversal(traversal) + vertex_props = self.execute_traversal(traversal, graphson) self.assertEqual(len(vertex_props), 1) @@ -254,10 +277,10 @@ def test_vertex_multiple_properties(self): self.assertEqual(vertex_props[0].value, "value") # multiple_with_two_values - #v = s.execute_graph('''g.addV(label, 'MPW2', 'mult_key', 'value0', 'mult_key', 'value1')''')[0] + #v = s.execute_graph('''g.addV(label, 'MPW2', 'mult_key', 'value0', 'mult_key', 'value1')''')[0] traversal = g.V(mpw2v.id).properties() - vertex_props = self.execute_traversal(traversal) + vertex_props = self.execute_traversal(traversal, graphson) self.assertEqual(len(vertex_props), 2) self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), 'mult_key') @@ -270,18 +293,16 @@ def test_vertex_multiple_properties(self): v.property('single_key', 'value') v''')[0] traversal = g.V(v.id).properties() - vertex_props = self.execute_traversal(traversal) + vertex_props = self.execute_traversal(traversal, graphson) self.assertEqual(len(vertex_props), 1) self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), "single_key") self.assertEqual(vertex_props[0].value, "value") - def should_parse_meta_properties(self): g = self.fetch_traversal_source() g.addV("meta_v").property("meta_prop", "hello", "sub_prop", "hi", "sub_prop2", "hi2") - - def test_all_graph_types_with_schema(self): + def _test_all_graph_types_with_schema(self, schema, graphson): """ Exhaustively goes through each type that is supported by dse_graph. creates a vertex for each type using a dse-tinkerpop traversal, @@ -294,31 +315,24 @@ def test_all_graph_types_with_schema(self): @test_category dse graph """ - generate_type_graph_schema(self.session) - # if result set is not parsed correctly this will throw an exception + self._write_and_read_data_types(schema, graphson) - self._write_and_read_data_types() - - - def test_all_graph_types_without_schema(self): + def _test_all_graph_types_without_schema(self, schema, graphson): """ Exhaustively goes through each type that is supported by dse_graph. creates a vertex for each type using a dse-tinkerpop traversal, It then attempts to fetch it from the server and compares it to what was inserted Do not prime the graph with the correct schema first - @since 1.0.0 @jira_ticket PYTHON-641 @expected_result inserted objects are equivalent to those retrieved - @test_category dse graph """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('schema-less is only for classic graphs') + self._write_and_read_data_types(schema, graphson, use_schema=False) - # Prime graph using common utilites - generate_type_graph_schema(self.session, prime_schema=False) - self._write_and_read_data_types() - - def test_dsl(self): + def _test_dsl(self, schema, graphson): """ The test creates a SocialTraversal and a SocialTraversalSource as part of a DSL. Then calls it's method and checks the results to verify @@ -342,17 +356,24 @@ def __init__(self, *args, **kwargs): def people(self, *names): return self.get_graph_traversal().V().has("name", P.within(*names)) - generate_classic(self.session) - g = self.fetch_traversal_source(traversal_class=SocialTraversalSource) + self.execute_graph(schema.fixtures.classic(), graphson) + if schema is CoreGraphSchema: + self.execute_graph(""" + schema.edgeLabel('knows').from('person').to('person').materializedView('person__knows__person_by_in_name'). + ifNotExists().partitionBy('in_name').clusterBy('out_name', Asc).create() + """, graphson) + time.sleep(1) # give some time to the MV to be populated + g = self.fetch_traversal_source(graphson, traversal_class=SocialTraversalSource) traversal = g.people("marko", "albert").knows("vadas") - results = self.execute_traversal(traversal) - + results = self.execute_traversal(traversal, graphson) self.assertEqual(len(results), 1) only_vertex = results[0] + schema.ensure_properties(self.session, only_vertex, + execution_profile=self.get_execution_profile(graphson)) self._validate_classic_vertex(g, only_vertex) - def test_bulked_results(self): + def _test_bulked_results(self, schema, graphson): """ Send a query expecting a bulked result and the driver "undoes" the bulk and returns the expected list @@ -363,32 +384,182 @@ def test_bulked_results(self): @test_category dse graph """ - generate_classic(self.session) - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.classic(), graphson) + g = self.fetch_traversal_source(graphson) barrier_traversal = g.E().label().barrier() - results = self.execute_traversal(barrier_traversal) - self.assertEqual(["created", "created", "created", "created", "knows", "knows"], results) - - def _write_and_read_data_types(self): - g = self.fetch_traversal_source() - for key in TYPE_MAP.keys(): - vertex_label = generate_type_graph_schema.single_vertex - property_name = key + "value" - data_value = TYPE_MAP[key][1] - - write_traversal = g.addV(vertex_label).property(property_name, data_value) - self.execute_traversal(write_traversal) - - read_traversal = g.V().hasLabel(vertex_label).has(property_name).values() - results = self.execute_traversal(read_traversal) - - self._check_equality(g, data_value, results[0]) - - def fetch_edge_props(self, g, edge): + results = self.execute_traversal(barrier_traversal, graphson) + self.assertEqual(sorted(["created", "created", "created", "created", "knows", "knows"]), sorted(results)) + + def _test_udt_with_classes(self, schema, graphson): + class Address(object): + + def __init__(self, address, city, state): + self.address = address + self.city = city + self.state = state + + def __eq__(self, other): + return self.address == other.address and self.city == other.city and self.state == other.state + + class AddressWithTags(object): + + def __init__(self, address, city, state, tags): + self.address = address + self.city = city + self.state = state + self.tags = tags + + def __eq__(self, other): + return (self.address == other.address and self.city == other.city + and self.state == other.state and self.tags == other.tags) + + class ComplexAddress(object): + + def __init__(self, address, address_tags, city, state, props): + self.address = address + self.address_tags = address_tags + self.city = city + self.state = state + self.props = props + + def __eq__(self, other): + return (self.address == other.address and self.address_tags == other.address_tags + and self.city == other.city and self.state == other.state + and self.props == other.props) + + class ComplexAddressWithOwners(object): + + def __init__(self, address, address_tags, city, state, props, owners): + self.address = address + self.address_tags = address_tags + self.city = city + self.state = state + self.props = props + self.owners = owners + + def __eq__(self, other): + return (self.address == other.address and self.address_tags == other.address_tags + and self.city == other.city and self.state == other.state + and self.props == other.props and self.owners == other.owners) + + self.__test_udt(schema, graphson, Address, AddressWithTags, ComplexAddress, ComplexAddressWithOwners) + + def _test_udt_with_namedtuples(self, schema, graphson): + AddressTuple = namedtuple('Address', ('address', 'city', 'state')) + AddressWithTagsTuple = namedtuple('AddressWithTags', ('address', 'city', 'state', 'tags')) + ComplexAddressTuple = namedtuple('ComplexAddress', ('address', 'address_tags', 'city', 'state', 'props')) + ComplexAddressWithOwnersTuple = namedtuple('ComplexAddressWithOwners', ('address', 'address_tags', 'city', + 'state', 'props', 'owners')) + + self.__test_udt(schema, graphson, AddressTuple, AddressWithTagsTuple, + ComplexAddressTuple, ComplexAddressWithOwnersTuple) + + def _write_and_read_data_types(self, schema, graphson, use_schema=True): + g = self.fetch_traversal_source(graphson) + ep = self.get_execution_profile(graphson) + for data in six.itervalues(schema.fixtures.datatypes()): + typ, value, deserializer = data + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + if use_schema or schema is CoreGraphSchema: + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + + write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id).\ + property(property_name, value) + self.execute_traversal(write_traversal, graphson) + + read_traversal = g.V().hasLabel(str(vertex_label.label)).has(property_name).properties() + results = self.execute_traversal(read_traversal, graphson) + + for result in results: + if result.label == 'pkid': + continue + self._check_equality(g, value, result.value) + + def __test_udt(self, schema, graphson, address_class, address_with_tags_class, + complex_address_class, complex_address_with_owners_class): + if schema is not CoreGraphSchema or DSE_VERSION < Version('6.8'): + raise unittest.SkipTest("Graph UDT is only supported with DSE 6.8+ and Core graphs.") + + ep = self.get_execution_profile(graphson) + + Address = address_class + AddressWithTags = address_with_tags_class + ComplexAddress = complex_address_class + ComplexAddressWithOwners = complex_address_with_owners_class + + # setup udt + self.session.execute_graph(""" + schema.type('address').property('address', Text).property('city', Text).property('state', Text).create(); + schema.type('addressTags').property('address', Text).property('city', Text).property('state', Text). + property('tags', setOf(Text)).create(); + schema.type('complexAddress').property('address', Text).property('address_tags', frozen(typeOf('addressTags'))). + property('city', Text).property('state', Text).property('props', mapOf(Text, Int)).create(); + schema.type('complexAddressWithOwners').property('address', Text). + property('address_tags', frozen(typeOf('addressTags'))). + property('city', Text).property('state', Text).property('props', mapOf(Text, Int)). + property('owners', frozen(listOf(tupleOf(Text, Int)))).create(); + """, execution_profile=ep) + + time.sleep(2) # wait the UDT to be discovered + self.session.cluster.register_user_type(self.graph_name, 'address', Address) + self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags) + self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress) + self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners) + + data = { + "udt1": ["typeOf('address')", Address('1440 Rd Smith', 'Quebec', 'QC')], + "udt2": ["tupleOf(typeOf('address'), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], + "udt3": ["tupleOf(frozen(typeOf('address')), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], + "udt4": ["tupleOf(tupleOf(Int, typeOf('address')), Text)", + ((42, Address('1440 Rd Smith', 'Quebec', 'QC')), 'hello')], + "udt5": ["tupleOf(tupleOf(Int, typeOf('addressTags')), Text)", + ((42, AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'})), 'hello')], + "udt6": ["tupleOf(tupleOf(Int, typeOf('complexAddress')), Text)", + ((42, ComplexAddress('1440 Rd Smith', + AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'}), + 'Quebec', 'QC', {'p1': 42, 'p2': 33})), 'hello')], + "udt7": ["tupleOf(tupleOf(Int, frozen(typeOf('complexAddressWithOwners'))), Text)", + ((42, ComplexAddressWithOwners( + '1440 Rd Smith', + AddressWithTags('1440 CRd Smith', 'Quebec', 'QC', {'t1', 't2'}), + 'Quebec', 'QC', {'p1': 42, 'p2': 33}, [('Mike', 43), ('Gina', 39)]) + ), 'hello')] + } + + g = self.fetch_traversal_source(graphson) + for typ, value in six.itervalues(data): + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + + write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id). \ + property(property_name, value) + self.execute_traversal(write_traversal, graphson) + + #vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] + #vertex_properties = list(schema.get_vertex_properties( + # self.session, vertex, execution_profile=ep)) + + read_traversal = g.V().hasLabel(str(vertex_label.label)).has(property_name).properties() + vertex_properties = self.execute_traversal(read_traversal, graphson) + + self.assertEqual(len(vertex_properties), 2) # include pkid + for vp in vertex_properties: + if vp.label == 'pkid': + continue + + self.assertIsInstance(vp, (VertexProperty, TravVertexProperty)) + self.assertEqual(vp.label, property_name) + self.assertEqual(vp.value, value) + + @staticmethod + def fetch_edge_props(g, edge): edge_props = g.E(edge.id).properties().toList() return edge_props - def fetch_vertex_props(self, g, vertex): + @staticmethod + def fetch_vertex_props(g, vertex): vertex_props = g.V(vertex.id).properties().toList() return vertex_props @@ -398,24 +569,21 @@ def _check_equality(self, g, original, read_value): @requiredse -class ImplicitExecutionTest(AbstractTraversalTest, BasicGraphUnitTestCase): +@GraphTestConfiguration.generate_tests(traversal=True) +class ImplicitExecutionTest(_AbstractTraversalTest): """ This test class will execute all tests of the AbstractTraversalTestClass using implicit execution This all traversal will be run directly using toList() """ - def setUp(self): - super(ImplicitExecutionTest, self).setUp() - if DSE_VERSION: - self.ep = DseGraph().create_execution_profile(self.graph_name) - self.cluster.add_execution_profile(self.graph_name, self.ep) - - def fetch_key_from_prop(self, property): + @staticmethod + def fetch_key_from_prop(property): return property.key - def fetch_traversal_source(self, **kwargs): - return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=self.ep, **kwargs) + def fetch_traversal_source(self, graphson, **kwargs): + ep = self.get_execution_profile(graphson, traversal=True) + return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep, **kwargs) - def execute_traversal(self, traversal): + def execute_traversal(self, traversal, graphson=None): return traversal.toList() def _validate_classic_vertex(self, g, vertex): @@ -426,12 +594,12 @@ def _validate_classic_vertex(self, g, vertex): self.assertIn('name', vertex_prop_keys) self.assertTrue('lang' in vertex_prop_keys or 'age' in vertex_prop_keys) - def _validate_generic_vertex_result_type(self,g, vertex): + def _validate_generic_vertex_result_type(self, g, vertex): # Checks a vertex object for it's generic properties properties = self.fetch_vertex_props(g, vertex) for attr in ('id', 'label'): self.assertIsNotNone(getattr(vertex, attr)) - self.assertTrue( len(properties)>2) + self.assertTrue(len(properties) > 2) def _validate_classic_edge_properties(self, g, edge): # Checks the properties on a classic edge for correctness @@ -467,13 +635,13 @@ def _validate_path_result_type(self, g, objects_path): self.fail("Invalid object found in path " + str(object.type)) def _validate_meta_property(self, g, vertex): - meta_props = g.V(vertex.id).properties().toList() + meta_props = g.V(vertex.id).properties().toList() self.assertEqual(len(meta_props), 1) meta_prop = meta_props[0] - self.assertEqual(meta_prop.value,"meta_prop") - self.assertEqual(meta_prop.key,"key") + self.assertEqual(meta_prop.value, "meta_prop") + self.assertEqual(meta_prop.key, "key") - nested_props = vertex_props = g.V(vertex.id).properties().properties().toList() + nested_props = g.V(vertex.id).properties().properties().toList() self.assertEqual(len(nested_props), 2) for nested_prop in nested_props: self.assertTrue(nested_prop.key in ['k0', 'k1']) @@ -487,37 +655,42 @@ def _validate_type(self, g, vertex): _validate_prop(key, value, self) -@requiredse -class ExplicitExecutionBase(BasicGraphUnitTestCase): - def setUp(self): - super(ExplicitExecutionBase, self).setUp() - if DSE_VERSION: - self.ep = DseGraph().create_execution_profile(self.graph_name) - self.cluster.add_execution_profile(self.graph_name, self.ep) - - def fetch_traversal_source(self, **kwargs): - return DseGraph().traversal_source(self.session, self.graph_name, **kwargs) - - def execute_traversal(self, traversal): - query = DseGraph.query_from_traversal(traversal) - #Use an ep that is configured with the correct row factory, and bytecode-json language flat set - result_set = self.session.execute_graph(query, execution_profile=self.ep) +class ExplicitExecutionBase(GraphUnitTestCase): + + def fetch_traversal_source(self, graphson, **kwargs): + ep = self.get_execution_profile(graphson, traversal=True) + return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep, **kwargs) + + def execute_traversal(self, traversal, graphson): + ep = self.get_execution_profile(graphson, traversal=True) + ep = self.session.get_execution_profile(ep) + context = None + if graphson == GraphProtocol.GRAPHSON_3_0: + context = { + 'cluster': self.cluster, + 'graph_name': ep.graph_options.graph_name.decode('utf-8') if ep.graph_options.graph_name else None + } + query = DseGraph.query_from_traversal(traversal, graphson, context=context) + # Use an ep that is configured with the correct row factory, and bytecode-json language flat set + result_set = self.execute_graph(query, graphson, traversal=True) return list(result_set) @requiredse -class ExplicitExecutionTest(ExplicitExecutionBase, AbstractTraversalTest): +@GraphTestConfiguration.generate_tests(traversal=True) +class ExplicitExecutionTest(ExplicitExecutionBase, _AbstractTraversalTest): """ This test class will execute all tests of the AbstractTraversalTestClass using Explicit execution All queries will be run by converting them to byte code, and calling execute graph explicitly with a generated ep. """ - def fetch_key_from_prop(self, property): + @staticmethod + def fetch_key_from_prop(property): return property.label def _validate_classic_vertex(self, g, vertex): validate_classic_vertex(self, vertex) - def _validate_generic_vertex_result_type(self,g, vertex): + def _validate_generic_vertex_result_type(self, g, vertex): validate_generic_vertex_result_type(self, vertex) def _validate_classic_edge_properties(self, g, edge): @@ -534,7 +707,7 @@ def _validate_generic_edge_result_type(self, edge): def _validate_type(self, g, vertex): for key in vertex.properties: - value = vertex.properties[key][0].value + value = vertex.properties[key][0].value _validate_prop(key, value, self) def _validate_path_result_type(self, g, path_obj): @@ -546,12 +719,12 @@ def _validate_path_result_type(self, g, path_obj): props = [] if isinstance(obj, Edge): obj.properties = { - p['key']: p['value'] + p.key: p.value for p in self.fetch_edge_props(g, obj) } elif isinstance(obj, Vertex): obj.properties = { - p['label']: p['value'] + p.label: p.value for p in self.fetch_vertex_props(g, obj) } @@ -568,6 +741,9 @@ def _validate_meta_property(self, g, vertex): def _validate_prop(key, value, unittest): + if key == 'index': + return + if any(key.startswith(t) for t in ('int', 'short')): typ = int @@ -585,7 +761,7 @@ def _validate_prop(key, value, unittest): elif any(key.startswith(t) for t in ('Linestring',)): typ = LineString elif any(key.startswith(t) for t in ('neg',)): - typ = string_types + typ = six.string_types elif any(key.startswith(t) for t in ('date',)): typ = datetime.date elif any(key.startswith(t) for t in ('time',)): @@ -595,20 +771,21 @@ def _validate_prop(key, value, unittest): unittest.assertIsInstance(value, typ) -@requiredse @greaterthanorequaldse60 +@GraphTestConfiguration.generate_tests(traversal=True) class BatchStatementTests(ExplicitExecutionBase): def setUp(self): super(BatchStatementTests, self).setUp() - self.g = self.fetch_traversal_source() + self.ep_graphson2 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_2_0) + self.ep_graphson3 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0) - if hasattr(self, "batch"): - self.batch.clear() - else: - self.batch = DseGraph.batch(session=self.session, execution_profile=self.ep) + self.cluster.add_execution_profile('traversal_graphson2', self.ep_graphson2) + self.cluster.add_execution_profile('traversal_graphson3', self.ep_graphson3) - def test_batch_with_schema(self): + def _test_batch_with_schema(self, schema, graphson): """ Sends a Batch statement and verifies it has succeeded with a schema created @@ -618,10 +795,9 @@ def test_batch_with_schema(self): @test_category dse graph """ - generate_type_graph_schema(self.session) - self._send_batch_and_read_results() + self._send_batch_and_read_results(schema, graphson) - def test_batch_without_schema(self): + def _test_batch_without_schema(self, schema, graphson): """ Sends a Batch statement and verifies it has succeeded without a schema created @@ -631,10 +807,11 @@ def test_batch_without_schema(self): @test_category dse graph """ - generate_type_graph_schema(self.session) - self._send_batch_and_read_results() + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('schema-less is only for classic graphs') + self._send_batch_and_read_results(schema, graphson, use_schema=False) - def test_batch_with_schema_add_all(self): + def _test_batch_with_schema_add_all(self, schema, graphson): """ Sends a Batch statement and verifies it has succeeded with a schema created. Uses :method:`dse_graph.query._BatchGraphStatement.add_all` to add the statements @@ -646,10 +823,9 @@ def test_batch_with_schema_add_all(self): @test_category dse graph """ - generate_type_graph_schema(self.session) - self._send_batch_and_read_results(add_all=True) + self._send_batch_and_read_results(schema, graphson, add_all=True) - def test_batch_without_schema_add_all(self): + def _test_batch_without_schema_add_all(self, schema, graphson): """ Sends a Batch statement and verifies it has succeeded without a schema created Uses :method:`dse_graph.query._BatchGraphStatement.add_all` to add the statements @@ -661,8 +837,9 @@ def test_batch_without_schema_add_all(self): @test_category dse graph """ - generate_type_graph_schema(self.session, prime_schema=False) - self._send_batch_and_read_results(add_all=True) + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('schema-less is only for classic graphs') + self._send_batch_and_read_results(schema, graphson, add_all=True, use_schema=False) def test_only_graph_traversals_are_accepted(self): """ @@ -674,42 +851,141 @@ def test_only_graph_traversals_are_accepted(self): @test_category dse graph """ - self.assertRaises(ValueError, self.batch.add, '{"@value":{"step":[["addV","poc_int"],' - '["property","bigint1value",{"@value":12,"@type":"g:Int32"}]]},' - '"@type":"g:Bytecode"}') + batch = DseGraph.batch() + self.assertRaises(ValueError, batch.add, '{"@value":{"step":[["addV","poc_int"],' + '["property","bigint1value",{"@value":12,"@type":"g:Int32"}]]},' + '"@type":"g:Bytecode"}') another_batch = DseGraph.batch() - self.assertRaises(ValueError, self.batch.add, another_batch) + self.assertRaises(ValueError, batch.add, another_batch) - def _send_batch_and_read_results(self, add_all=False): - # For each supported type fetch create a vetex containing that type - vertex_label = generate_type_graph_schema.single_vertex + def _send_batch_and_read_results(self, schema, graphson, add_all=False, use_schema=True): traversals = [] - for key in TYPE_MAP.keys(): - property_name = key + "value" - traversal = self.g.addV(vertex_label).property(property_name, TYPE_MAP[key][1]) + datatypes = schema.fixtures.datatypes() + values = {} + g = self.fetch_traversal_source(graphson) + ep = self.get_execution_profile(graphson) + batch = DseGraph.batch(session=self.session, + execution_profile=self.get_execution_profile(graphson, traversal=True)) + for data in six.itervalues(datatypes): + typ, value, deserializer = data + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + values[property_name] = value + if use_schema or schema is CoreGraphSchema: + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + + traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id).property(property_name, value) if not add_all: - self.batch.add(traversal) + batch.add(traversal) traversals.append(traversal) if add_all: - self.batch.add_all(traversals) - - self.assertEqual(len(TYPE_MAP), len(self.batch)) + batch.add_all(traversals) - self.batch.execute() + self.assertEqual(len(datatypes), len(batch)) - traversal = self.g.V() - vertices = self.execute_traversal(traversal) + batch.execute() - self.assertEqual(len(vertices), len(TYPE_MAP), "g.V() returned {}".format(vertices)) + vertices = self.execute_traversal(g.V(), graphson) + self.assertEqual(len(vertices), len(datatypes), "g.V() returned {}".format(vertices)) # Iterate over all the vertices and check that they match the original input for vertex in vertices: - key = list(vertex.properties.keys())[0].replace("value", "") - original = TYPE_MAP[key][1] - self._check_equality(self.g, original, vertex) + schema.ensure_properties(self.session, vertex, execution_profile=ep) + key = [k for k in list(vertex.properties.keys()) if k != 'pkid'][0].replace("value", "") + original = values[key] + self._check_equality(original, vertex) - def _check_equality(self,g, original, vertex): + def _check_equality(self, original, vertex): for key in vertex.properties: - value = vertex.properties[key][0].value + if key == 'pkid': + continue + value = vertex.properties[key].value \ + if isinstance(vertex.properties[key], VertexProperty) else vertex.properties[key][0].value check_equality_base(self, original, value) + + +class ContinuousPagingOptionsForTests(ContinuousPagingOptions): + def __init__(self, + page_unit=ContinuousPagingOptions.PagingUnit.ROWS, max_pages=1, # max_pages=1 + max_pages_per_second=0, max_queue_size=4): + super(ContinuousPagingOptionsForTests, self).__init__(page_unit, max_pages, max_pages_per_second, + max_queue_size) + + +def reset_paging_options(): + cluster.ContinuousPagingOptions = ContinuousPagingOptions + + +@greaterthanorequaldse68 +@GraphTestConfiguration.generate_tests(schema=CoreGraphSchema) +class GraphPagingTest(GraphUnitTestCase): + + def setUp(self): + super(GraphPagingTest, self).setUp() + self.addCleanup(reset_paging_options) + self.ep_graphson3 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0) + self.cluster.add_execution_profile('traversal_graphson3', self.ep_graphson3) + + def _setup_data(self, schema, graphson): + self.execute_graph( + "schema.vertexLabel('person').ifNotExists().partitionBy('name', Text).property('age', Int).create();", + graphson) + for i in range(100): + self.execute_graph("g.addV('person').property('name', 'batman-{}')".format(i), graphson) + + def _test_cont_paging_is_enabled_by_default(self, schema, graphson): + """ + Test that graph paging is automatically enabled with a >=6.8 cluster. + + @jira_ticket PYTHON-1045 + @expected_result the default continuous paging options are used + + @test_category dse graph + """ + # with traversals... I don't have access to the response future... so this is a hack to ensure paging is on + cluster.ContinuousPagingOptions = ContinuousPagingOptionsForTests + ep = self.get_execution_profile(graphson, traversal=True) + self._setup_data(schema, graphson) + self.session.default_fetch_size = 10 + g = DseGraph.traversal_source(self.session, execution_profile=ep) + results = g.V().toList() + self.assertEqual(len(results), 10) # only 10 results due to our hack + + def _test_cont_paging_can_be_disabled(self, schema, graphson): + """ + Test that graph paging can be disabled. + + @jira_ticket PYTHON-1045 + @expected_result the default continuous paging options are not used + + @test_category dse graph + """ + # with traversals... I don't have access to the response future... so this is a hack to ensure paging is on + cluster.ContinuousPagingOptions = ContinuousPagingOptionsForTests + ep = self.get_execution_profile(graphson, traversal=True) + ep = self.session.execution_profile_clone_update(ep, continuous_paging_options=None) + self._setup_data(schema, graphson) + self.session.default_fetch_size = 10 + g = DseGraph.traversal_source(self.session, execution_profile=ep) + results = g.V().toList() + self.assertEqual(len(results), 100) # 100 results since paging is disabled + + def _test_cont_paging_with_custom_options(self, schema, graphson): + """ + Test that we can specify custom paging options. + + @jira_ticket PYTHON-1045 + @expected_result we get only the desired number of results + + @test_category dse graph + """ + ep = self.get_execution_profile(graphson, traversal=True) + ep = self.session.execution_profile_clone_update(ep, + continuous_paging_options=ContinuousPagingOptions(max_pages=1)) + self._setup_data(schema, graphson) + self.session.default_fetch_size = 10 + g = DseGraph.traversal_source(self.session, execution_profile=ep) + results = g.V().toList() + self.assertEqual(len(results), 10) # only 10 results since paging is disabled diff --git a/tests/integration/advanced/graph/fluent/test_search.py b/tests/integration/advanced/graph/fluent/test_search.py index dde4e73ab4..d50016d576 100644 --- a/tests/integration/advanced/graph/fluent/test_search.py +++ b/tests/integration/advanced/graph/fluent/test_search.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cassandra.datastax.graph.fluent import DseGraph -import time -from cassandra.datastax.graph.fluent.predicates import Search, Geo, GeoUnit -from tests.integration.advanced import BasicSharedGraphUnitTestCase, generate_address_book_graph, use_single_node_with_graph_and_solr -from tests.integration import greaterthanorequaldse51, requiredse, DSE_VERSION from cassandra.util import Distance from cassandra import InvalidRequest +from cassandra.graph import GraphProtocol +from cassandra.datastax.graph.fluent import DseGraph +from cassandra.datastax.graph.fluent.predicates import Search, Geo, GeoUnit, CqlCollection + +from tests.integration.advanced import use_single_node_with_graph_and_solr +from tests.integration.advanced.graph import GraphUnitTestCase, CoreGraphSchema, ClassicGraphSchema, GraphTestConfiguration +from tests.integration import greaterthanorequaldse51, DSE_VERSION, requiredse def setup_module(): @@ -26,9 +28,23 @@ def setup_module(): use_single_node_with_graph_and_solr() -class AbstractSearchTest(): +class AbstractSearchTest(GraphUnitTestCase): + + def setUp(self): + super(AbstractSearchTest, self).setUp() + self.ep_graphson2 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_2_0) + self.ep_graphson3 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0) + + self.cluster.add_execution_profile('traversal_graphson2', self.ep_graphson2) + self.cluster.add_execution_profile('traversal_graphson3', self.ep_graphson3) - def test_search_by_prefix(self): + def fetch_traversal_source(self, graphson): + ep = self.get_execution_profile(graphson, traversal=True) + return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep) + + def _test_search_by_prefix(self, schema, graphson): """ Test to validate that solr searches by prefix function. @@ -38,13 +54,14 @@ def test_search_by_prefix(self): @test_category dse graph """ - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) traversal = g.V().has("person", "name", Search.prefix("Paul")).values("name") - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 1) self.assertEqual(results_list[0], "Paul Thomas Joe") - def test_search_by_regex(self): + def _test_search_by_regex(self, schema, graphson): """ Test to validate that solr searches by regex function. @@ -54,14 +71,15 @@ def test_search_by_regex(self): @test_category dse graph """ - g = self.fetch_traversal_source() - traversal = g.V().has("person", "name", Search.regex(".*Paul.*")).values("name") - results_list = self.execute_traversal(traversal) + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().has("person", "name", Search.regex(".*Paul.*")).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) - self.assertIn("Paul Thomas Joe", results_list ) - self.assertIn("James Paul Smith", results_list ) + self.assertIn("Paul Thomas Joe", results_list) + self.assertIn("James Paul Smith", results_list) - def test_search_by_token(self): + def _test_search_by_token(self, schema, graphson): """ Test to validate that solr searches by token. @@ -71,15 +89,15 @@ def test_search_by_token(self): @test_category dse graph """ - g = self.fetch_traversal_source() - traversal = g.V().has("person", "description", Search.token("cold")).values("name") - results_list = self.execute_traversal(traversal) + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().has("person", "description", Search.token("cold")).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) - self.assertIn("Jill Alice", results_list ) + self.assertIn("Jill Alice", results_list) self.assertIn("George Bill Steve", results_list) - - def test_search_by_token_prefix(self): + def _test_search_by_token_prefix(self, schema, graphson): """ Test to validate that solr searches by token prefix. @@ -89,15 +107,15 @@ def test_search_by_token_prefix(self): @test_category dse graph """ - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) traversal = g.V().has("person", "description", Search.token_prefix("h")).values("name") - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) - self.assertIn("Paul Thomas Joe", results_list ) - self.assertIn( "James Paul Smith", results_list ) + self.assertIn("Paul Thomas Joe", results_list) + self.assertIn( "James Paul Smith", results_list) - - def test_search_by_token_regex(self): + def _test_search_by_token_regex(self, schema, graphson): """ Test to validate that solr searches by token regex. @@ -107,15 +125,15 @@ def test_search_by_token_regex(self): @test_category dse graph """ - - g = self.fetch_traversal_source() - traversal = g.V().has("person", "description", Search.token_regex("(nice|hospital)")).values("name") - results_list = self.execute_traversal(traversal) + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().has("person", "description", Search.token_regex("(nice|hospital)")).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) self.assertIn("Paul Thomas Joe", results_list ) self.assertIn( "Jill Alice", results_list ) - def _assert_in_distance(self, inside, names): + def _assert_in_distance(self, schema, graphson, inside, names): """ Helper function that asserts that an exception is arisen if geodetic predicates are used in cartesian geometry. Also asserts that the expected list is equal to the returned from @@ -124,27 +142,33 @@ def _assert_in_distance(self, inside, names): def assert_equal_list(L1, L2): return len(L1) == len(L2) and sorted(L1) == sorted(L2) - g = self.fetch_traversal_source() + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) traversal = g.V().has("person", "pointPropWithBoundsWithSearchIndex", inside).values("name") - # throws an exception because of a SOLR/Search limitation in the indexing process - # may be resolved in the future - self.assertRaises(InvalidRequest, self.execute_traversal, traversal) + if schema is ClassicGraphSchema: + # throws an exception because of a SOLR/Search limitation in the indexing process + # may be resolved in the future + self.assertRaises(InvalidRequest, self.execute_traversal, traversal, graphson) + else: + traversal = g.V().has("person", "pointPropWithBoundsWithSearchIndex", inside).values("name") + results_list = self.execute_traversal(traversal, graphson) + assert_equal_list(names, results_list) traversal = g.V().has("person", "pointPropWithBounds", inside).values("name") - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) assert_equal_list(names, results_list) traversal = g.V().has("person", "pointPropWithGeoBoundsWithSearchIndex", inside).values("name") - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) assert_equal_list(names, results_list) traversal = g.V().has("person", "pointPropWithGeoBounds", inside).values("name") - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) assert_equal_list(names, results_list) @greaterthanorequaldse51 - def test_search_by_distance(self): + def _test_search_by_distance(self, schema, graphson): """ Test to validate that solr searches by distance. @@ -154,13 +178,13 @@ def test_search_by_distance(self): @test_category dse graph """ - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92, 44, 2)), ["Paul Thomas Joe", "George Bill Steve"] ) @greaterthanorequaldse51 - def test_search_by_distance_with_meters_units(self): + def _test_search_by_distance_meters_units(self, schema, graphson): """ Test to validate that solr searches by distance. @@ -170,13 +194,13 @@ def test_search_by_distance_with_meters_units(self): @test_category dse graph """ - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92, 44, 56000), GeoUnit.METERS), ["Paul Thomas Joe"] ) @greaterthanorequaldse51 - def test_search_by_distance_with_miles_units(self): + def _test_search_by_distance_miles_units(self, schema, graphson): """ Test to validate that solr searches by distance. @@ -186,13 +210,13 @@ def test_search_by_distance_with_miles_units(self): @test_category dse graph """ - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92, 44, 70), GeoUnit.MILES), ["Paul Thomas Joe", "George Bill Steve"] ) @greaterthanorequaldse51 - def test_search_by_distance_check_limit(self): + def _test_search_by_distance_check_limit(self, schema, graphson): """ Test to validate that solr searches by distance using several units. It will also validate that and exception is arisen if geodetic predicates are used against cartesian geometry @@ -205,29 +229,29 @@ def test_search_by_distance_check_limit(self): @test_category dse graph """ # Paul Thomas Joe and George Bill Steve are 64.6923761881464 km apart - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92.46295, 44.0234, 65), GeoUnit.KILOMETERS), ["George Bill Steve", "Paul Thomas Joe"] ) - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92.46295, 44.0234, 64), GeoUnit.KILOMETERS), ["Paul Thomas Joe"] ) # Paul Thomas Joe and George Bill Steve are 40.19797892069464 miles apart - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92.46295, 44.0234, 41), GeoUnit.MILES), ["George Bill Steve", "Paul Thomas Joe"] ) - self._assert_in_distance( + self._assert_in_distance(schema, graphson, Geo.inside(Distance(-92.46295, 44.0234, 40), GeoUnit.MILES), ["Paul Thomas Joe"] ) @greaterthanorequaldse51 - def test_search_by_fuzzy(self): + def _test_search_by_fuzzy(self, schema, graphson): """ Test to validate that solr searches by distance. @@ -237,18 +261,19 @@ def test_search_by_fuzzy(self): @test_category dse graph """ - g = self.fetch_traversal_source() - traversal = g.V().has("person", "name", Search.fuzzy("Paul Thamas Joe" ,1)).values("name") - results_list = self.execute_traversal(traversal) + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().has("person", "name", Search.fuzzy("Paul Thamas Joe", 1)).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 1) - self.assertIn("Paul Thomas Joe", results_list ) + self.assertIn("Paul Thomas Joe", results_list) - traversal = g.V().has("person", "name", Search.fuzzy("Paul Thames Joe" ,1)).values("name") - results_list = self.execute_traversal(traversal) + traversal = g.V().has("person", "name", Search.fuzzy("Paul Thames Joe", 1)).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 0) @greaterthanorequaldse51 - def test_search_by_fuzzy_token(self): + def _test_search_by_fuzzy_token(self, schema, graphson): """ Test to validate that fuzzy searches. @@ -258,21 +283,22 @@ def test_search_by_fuzzy_token(self): @test_category dse graph """ - g = self.fetch_traversal_source() - traversal = g.V().has("person", "description", Search.token_fuzzy("lives", 1)).values("name"); + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().has("person", "description", Search.token_fuzzy("lives", 1)).values("name") # Should match 'Paul Thomas Joe' since description contains 'Lives' # Should match 'James Paul Joe' since description contains 'Likes' - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) - self.assertIn("Paul Thomas Joe", results_list ) - self.assertIn("James Paul Smith", results_list ) + self.assertIn("Paul Thomas Joe", results_list) + self.assertIn("James Paul Smith", results_list) - traversal = g.V().has("person", "description", Search.token_fuzzy("loues", 1)).values("name"); - results_list = self.execute_traversal(traversal) + traversal = g.V().has("person", "description", Search.token_fuzzy("loues", 1)).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 0) @greaterthanorequaldse51 - def test_search_by_phrase(self): + def _test_search_by_phrase(self, schema, graphson): """ Test to validate that phrase searches. @@ -282,66 +308,232 @@ def test_search_by_phrase(self): @test_category dse graph """ - g = self.fetch_traversal_source() - traversal = g.V().has("person", "description", Search.phrase("a cold", 2)).values("name"); + self.execute_graph(schema.fixtures.address_book(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().has("person", "description", Search.phrase("a cold", 2)).values("name") #Should match 'George Bill Steve' since 'A cold dude' is at distance of 0 for 'a cold'. #Should match 'Jill Alice' since 'Enjoys a very nice cold coca cola' is at distance of 2 for 'a cold'. - results_list = self.execute_traversal(traversal) + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 2) - self.assertIn('George Bill Steve', results_list ) - self.assertIn('Jill Alice', results_list ) + self.assertIn('George Bill Steve', results_list) + self.assertIn('Jill Alice', results_list) - traversal = g.V().has("user", "description", Search.phrase("a bald", 2)).values("name"); - results_list = self.execute_traversal(traversal) + traversal = g.V().has("person", "description", Search.phrase("a bald", 2)).values("name") + results_list = self.execute_traversal(traversal, graphson) self.assertEqual(len(results_list), 0) - @requiredse -class ImplicitSearchTest(AbstractSearchTest, BasicSharedGraphUnitTestCase): +@GraphTestConfiguration.generate_tests(traversal=True) +class ImplicitSearchTest(AbstractSearchTest): """ This test class will execute all tests of the AbstractSearchTest using implicit execution All traversals will be run directly using toList() """ - @classmethod - def setUpClass(self): - super(ImplicitSearchTest, self).setUpClass() - if DSE_VERSION: - self.ep = DseGraph().create_execution_profile(self.graph_name) - self.cluster.add_execution_profile(self.graph_name, self.ep) - generate_address_book_graph(self.session, 0) - time.sleep(20) - def fetch_key_from_prop(self, property): return property.key - def fetch_traversal_source(self): - return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=self.ep) - - def execute_traversal(self, traversal): + def execute_traversal(self, traversal, graphson=None): return traversal.toList() @requiredse -class ExplicitSearchTest(AbstractSearchTest, BasicSharedGraphUnitTestCase): +@GraphTestConfiguration.generate_tests(traversal=True) +class ExplicitSearchTest(AbstractSearchTest): """ This test class will execute all tests of the AbstractSearchTest using implicit execution All traversals will be converted to byte code then they will be executed explicitly. """ - @classmethod - def setUpClass(self): - super(ExplicitSearchTest, self).setUpClass() - if DSE_VERSION: - self.ep = DseGraph().create_execution_profile(self.graph_name) - self.cluster.add_execution_profile(self.graph_name, self.ep) - generate_address_book_graph(self.session, 0) - time.sleep(20) - - def fetch_traversal_source(self): - return DseGraph().traversal_source(self.session, self.graph_name) - - def execute_traversal(self, traversal): - query = DseGraph.query_from_traversal(traversal) + + def execute_traversal(self, traversal, graphson): + ep = self.get_execution_profile(graphson, traversal=True) + ep = self.session.get_execution_profile(ep) + context = None + if graphson == GraphProtocol.GRAPHSON_3_0: + context = { + 'cluster': self.cluster, + 'graph_name': ep.graph_options.graph_name.decode('utf-8') if ep.graph_options.graph_name else None + } + query = DseGraph.query_from_traversal(traversal, graphson, context=context) #Use an ep that is configured with the correct row factory, and bytecode-json language flat set - result_set = self.session.execute_graph(query, execution_profile=self.ep) + result_set = self.execute_graph(query, graphson, traversal=True) + return list(result_set) + + +@requiredse +class BaseCqlCollectionPredicatesTest(GraphUnitTestCase): + + def setUp(self): + super(BaseCqlCollectionPredicatesTest, self).setUp() + self.ep_graphson3 = DseGraph().create_execution_profile(self.graph_name, + graph_protocol=GraphProtocol.GRAPHSON_3_0) + self.cluster.add_execution_profile('traversal_graphson3', self.ep_graphson3) + + def fetch_traversal_source(self, graphson): + ep = self.get_execution_profile(graphson, traversal=True) + return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep) + + def setup_vertex_label(self, graphson): + ep = self.get_execution_profile(graphson) + self.session.execute_graph(""" + schema.vertexLabel('cqlcollections').ifNotExists().partitionBy('name', Varchar) + .property('list', listOf(Text)) + .property('frozen_list', frozen(listOf(Text))) + .property('set', setOf(Text)) + .property('frozen_set', frozen(setOf(Text))) + .property('map_keys', mapOf(Int, Text)) + .property('map_values', mapOf(Int, Text)) + .property('map_entries', mapOf(Int, Text)) + .property('frozen_map', frozen(mapOf(Int, Text))) + .create() + """, execution_profile=ep) + + self.session.execute_graph(""" + schema.vertexLabel('cqlcollections').secondaryIndex('list').by('list').create(); + schema.vertexLabel('cqlcollections').secondaryIndex('frozen_list').by('frozen_list').indexFull().create(); + schema.vertexLabel('cqlcollections').secondaryIndex('set').by('set').create(); + schema.vertexLabel('cqlcollections').secondaryIndex('frozen_set').by('frozen_set').indexFull().create(); + schema.vertexLabel('cqlcollections').secondaryIndex('map_keys').by('map_keys').indexKeys().create(); + schema.vertexLabel('cqlcollections').secondaryIndex('map_values').by('map_values').indexValues().create(); + schema.vertexLabel('cqlcollections').secondaryIndex('map_entries').by('map_entries').indexEntries().create(); + schema.vertexLabel('cqlcollections').secondaryIndex('frozen_map').by('frozen_map').indexFull().create(); + """, execution_profile=ep) + + def _test_contains_list(self, schema, graphson): + """ + Test to validate that the cql predicate contains works with list + + @since TODO dse 6.8 + @jira_ticket PYTHON-1039 + @expected_result contains predicate work on a list + + @test_category dse graph + """ + self.setup_vertex_label(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.addV("cqlcollections").property("name", "list1").property("list", ['item1', 'item2']) + self.execute_traversal(traversal, graphson) + traversal = g.addV("cqlcollections").property("name", "list2").property("list", ['item3', 'item4']) + self.execute_traversal(traversal, graphson) + traversal = g.V().has("cqlcollections", "list", CqlCollection.contains("item1")).values("name") + results_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results_list), 1) + self.assertIn("list1", results_list) + + def _test_contains_set(self, schema, graphson): + """ + Test to validate that the cql predicate contains works with set + + @since TODO dse 6.8 + @jira_ticket PYTHON-1039 + @expected_result contains predicate work on a set + + @test_category dse graph + """ + self.setup_vertex_label(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.addV("cqlcollections").property("name", "set1").property("set", {'item1', 'item2'}) + self.execute_traversal(traversal, graphson) + traversal = g.addV("cqlcollections").property("name", "set2").property("set", {'item3', 'item4'}) + self.execute_traversal(traversal, graphson) + traversal = g.V().has("cqlcollections", "set", CqlCollection.contains("item1")).values("name") + results_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results_list), 1) + self.assertIn("set1", results_list) + + def _test_contains_key_map(self, schema, graphson): + """ + Test to validate that the cql predicate contains_key works with map + + @since TODO dse 6.8 + @jira_ticket PYTHON-1039 + @expected_result contains_key predicate work on a map + + @test_category dse graph + """ + self.setup_vertex_label(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.addV("cqlcollections").property("name", "map1").property("map_keys", {0: 'item1', 1: 'item2'}) + self.execute_traversal(traversal, graphson) + traversal = g.addV("cqlcollections").property("name", "map2").property("map_keys", {2: 'item3', 3: 'item4'}) + self.execute_traversal(traversal, graphson) + traversal = g.V().has("cqlcollections", "map_keys", CqlCollection.contains_key(0)).values("name") + results_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results_list), 1) + self.assertIn("map1", results_list) + + def _test_contains_value_map(self, schema, graphson): + """ + Test to validate that the cql predicate contains_value works with map + + @since TODO dse 6.8 + @jira_ticket PYTHON-1039 + @expected_result contains_value predicate work on a map + + @test_category dse graph + """ + self.setup_vertex_label(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.addV("cqlcollections").property("name", "map1").property("map_values", {0: 'item1', 1: 'item2'}) + self.execute_traversal(traversal, graphson) + traversal = g.addV("cqlcollections").property("name", "map2").property("map_values", {2: 'item3', 3: 'item4'}) + self.execute_traversal(traversal, graphson) + traversal = g.V().has("cqlcollections", "map_values", CqlCollection.contains_value('item3')).values("name") + results_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results_list), 1) + self.assertIn("map2", results_list) + + def _test_entry_eq_map(self, schema, graphson): + """ + Test to validate that the cql predicate entry_eq works with map + + @since TODO dse 6.8 + @jira_ticket PYTHON-1039 + @expected_result entry_eq predicate work on a map + + @test_category dse graph + """ + self.setup_vertex_label(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.addV("cqlcollections").property("name", "map1").property("map_entries", {0: 'item1', 1: 'item2'}) + self.execute_traversal(traversal, graphson) + traversal = g.addV("cqlcollections").property("name", "map2").property("map_entries", {2: 'item3', 3: 'item4'}) + self.execute_traversal(traversal, graphson) + traversal = g.V().has("cqlcollections", "map_entries", CqlCollection.entry_eq([2, 'item3'])).values("name") + results_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results_list), 1) + self.assertIn("map2", results_list) + + +@requiredse +@GraphTestConfiguration.generate_tests(traversal=True, schema=CoreGraphSchema) +class ImplicitCqlCollectionPredicatesTest(BaseCqlCollectionPredicatesTest): + """ + This test class will execute all tests of the BaseCqlCollectionTest using implicit execution + All traversals will be run directly using toList() + """ + + def execute_traversal(self, traversal, graphson=None): + return traversal.toList() + + +@requiredse +@GraphTestConfiguration.generate_tests(traversal=True, schema=CoreGraphSchema) +class ExplicitCqlCollectionPredicatesTest(BaseCqlCollectionPredicatesTest): + """ + This test class will execute all tests of the AbstractSearchTest using implicit execution + All traversals will be converted to byte code then they will be executed explicitly. + """ + + def execute_traversal(self, traversal, graphson): + ep = self.get_execution_profile(graphson, traversal=True) + ep = self.session.get_execution_profile(ep) + context = None + if graphson == GraphProtocol.GRAPHSON_3_0: + context = { + 'cluster': self.cluster, + 'graph_name': ep.graph_options.graph_name.decode('utf-8') if ep.graph_options.graph_name else None + } + query = DseGraph.query_from_traversal(traversal, graphson, context=context) + result_set = self.execute_graph(query, graphson, traversal=True) return list(result_set) diff --git a/tests/integration/advanced/graph/test_graph.py b/tests/integration/advanced/graph/test_graph.py new file mode 100644 index 0000000000..898779f789 --- /dev/null +++ b/tests/integration/advanced/graph/test_graph.py @@ -0,0 +1,271 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re + +from cassandra import OperationTimedOut, InvalidRequest +from cassandra.protocol import SyntaxException +from cassandra.policies import WhiteListRoundRobinPolicy +from cassandra.cluster import NoHostAvailable +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, GraphExecutionProfile, Cluster +from cassandra.graph import single_object_row_factory, Vertex, graph_object_row_factory, \ + graph_graphson2_row_factory, graph_graphson3_row_factory +from cassandra.util import SortedSet + +from tests.integration import PROTOCOL_VERSION, DSE_VERSION, greaterthanorequaldse51, greaterthanorequaldse68, requiredse +from tests.integration.advanced.graph import BasicGraphUnitTestCase, GraphUnitTestCase, \ + GraphProtocol, ClassicGraphSchema, CoreGraphSchema, use_single_node_with_graph + + +def setup_module(): + if DSE_VERSION: + dse_options = {'graph': {'realtime_evaluation_timeout_in_seconds': 60}} + use_single_node_with_graph(dse_options=dse_options) + + +@requiredse +class GraphTimeoutTests(BasicGraphUnitTestCase): + + def test_should_wait_indefinitely_by_default(self): + """ + Tests that by default the client should wait indefinitely for server timeouts + + @since 1.0.0 + @jira_ticket PYTHON-589 + + @test_category dse graph + """ + desired_timeout = 1000 + + graph_source = "test_timeout_1" + ep_name = graph_source + ep = self.session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT) + ep.graph_options = ep.graph_options.copy() + ep.graph_options.graph_source = graph_source + self.cluster.add_execution_profile(ep_name, ep) + + to_run = '''graph.schema().config().option("graph.traversal_sources.{0}.evaluation_timeout").set('{1} ms')'''.format( + graph_source, desired_timeout) + self.session.execute_graph(to_run, execution_profile=ep_name) + with self.assertRaises(InvalidRequest) as ir: + self.session.execute_graph("java.util.concurrent.TimeUnit.MILLISECONDS.sleep(35000L);1+1", + execution_profile=ep_name) + self.assertTrue("evaluation exceeded the configured threshold of 1000" in str(ir.exception) or + "evaluation exceeded the configured threshold of evaluation_timeout at 1000" in str( + ir.exception)) + + def test_request_timeout_less_then_server(self): + """ + Tests that with explicit request_timeouts set, that a server timeout is honored if it's relieved prior to the + client timeout + + @since 1.0.0 + @jira_ticket PYTHON-589 + + @test_category dse graph + """ + desired_timeout = 1000 + graph_source = "test_timeout_2" + ep_name = graph_source + ep = self.session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, request_timeout=32) + ep.graph_options = ep.graph_options.copy() + ep.graph_options.graph_source = graph_source + self.cluster.add_execution_profile(ep_name, ep) + + to_run = '''graph.schema().config().option("graph.traversal_sources.{0}.evaluation_timeout").set('{1} ms')'''.format( + graph_source, desired_timeout) + self.session.execute_graph(to_run, execution_profile=ep_name) + with self.assertRaises(InvalidRequest) as ir: + self.session.execute_graph("java.util.concurrent.TimeUnit.MILLISECONDS.sleep(35000L);1+1", + execution_profile=ep_name) + self.assertTrue("evaluation exceeded the configured threshold of 1000" in str(ir.exception) or + "evaluation exceeded the configured threshold of evaluation_timeout at 1000" in str( + ir.exception)) + + def test_server_timeout_less_then_request(self): + """ + Tests that with explicit request_timeouts set, that a client timeout is honored if it's triggered prior to the + server sending a timeout. + + @since 1.0.0 + @jira_ticket PYTHON-589 + + @test_category dse graph + """ + graph_source = "test_timeout_3" + ep_name = graph_source + ep = self.session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, request_timeout=1) + ep.graph_options = ep.graph_options.copy() + ep.graph_options.graph_source = graph_source + self.cluster.add_execution_profile(ep_name, ep) + server_timeout = 10000 + to_run = '''graph.schema().config().option("graph.traversal_sources.{0}.evaluation_timeout").set('{1} ms')'''.format( + graph_source, server_timeout) + self.session.execute_graph(to_run, execution_profile=ep_name) + + with self.assertRaises(Exception) as e: + self.session.execute_graph("java.util.concurrent.TimeUnit.MILLISECONDS.sleep(35000L);1+1", + execution_profile=ep_name) + self.assertTrue(isinstance(e, InvalidRequest) or isinstance(e, OperationTimedOut)) + + +@requiredse +class GraphProfileTests(BasicGraphUnitTestCase): + def test_graph_profile(self): + """ + Test verifying various aspects of graph config properties. + + @since 1.0.0 + @jira_ticket PYTHON-570 + + @test_category dse graph + """ + hosts = self.cluster.metadata.all_hosts() + first_host = hosts[0].address + second_hosts = "1.2.3.4" + + self._execute(ClassicGraphSchema.fixtures.classic(), graphson=GraphProtocol.GRAPHSON_1_0) + # Create various execution policies + exec_dif_factory = GraphExecutionProfile(row_factory=single_object_row_factory) + exec_dif_factory.graph_options.graph_name = self.graph_name + exec_dif_lbp = GraphExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([first_host])) + exec_dif_lbp.graph_options.graph_name = self.graph_name + exec_bad_lbp = GraphExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([second_hosts])) + exec_dif_lbp.graph_options.graph_name = self.graph_name + exec_short_timeout = GraphExecutionProfile(request_timeout=1, + load_balancing_policy=WhiteListRoundRobinPolicy([first_host])) + exec_short_timeout.graph_options.graph_name = self.graph_name + + # Add a single execution policy on cluster creation + local_cluster = Cluster(protocol_version=PROTOCOL_VERSION, + execution_profiles={"exec_dif_factory": exec_dif_factory}) + local_session = local_cluster.connect() + self.addCleanup(local_cluster.shutdown) + + rs1 = self.session.execute_graph('g.V()') + rs2 = local_session.execute_graph('g.V()', execution_profile='exec_dif_factory') + + # Verify default and non default policy works + self.assertFalse(isinstance(rs2[0], Vertex)) + self.assertTrue(isinstance(rs1[0], Vertex)) + # Add other policies validate that lbp are honored + local_cluster.add_execution_profile("exec_dif_ldp", exec_dif_lbp) + local_session.execute_graph('g.V()', execution_profile="exec_dif_ldp") + local_cluster.add_execution_profile("exec_bad_lbp", exec_bad_lbp) + with self.assertRaises(NoHostAvailable): + local_session.execute_graph('g.V()', execution_profile="exec_bad_lbp") + + # Try with missing EP + with self.assertRaises(ValueError): + local_session.execute_graph('g.V()', execution_profile='bad_exec_profile') + + # Validate that timeout is honored + local_cluster.add_execution_profile("exec_short_timeout", exec_short_timeout) + with self.assertRaises(Exception) as e: + self.assertTrue(isinstance(e, InvalidRequest) or isinstance(e, OperationTimedOut)) + local_session.execute_graph('java.util.concurrent.TimeUnit.MILLISECONDS.sleep(2000L);', + execution_profile='exec_short_timeout') + + +@requiredse +class GraphMetadataTest(BasicGraphUnitTestCase): + + @greaterthanorequaldse51 + def test_dse_workloads(self): + """ + Test to ensure dse_workloads is populated appropriately. + Field added in DSE 5.1 + + @since DSE 2.0 + @jira_ticket PYTHON-667 + @expected_result dse_workloads set is set on host model + + @test_category metadata + """ + for host in self.cluster.metadata.all_hosts(): + self.assertIsInstance(host.dse_workloads, SortedSet) + self.assertIn("Cassandra", host.dse_workloads) + self.assertIn("Graph", host.dse_workloads) + + +@requiredse +class GraphExecutionProfileOptionsResolveTest(GraphUnitTestCase): + """ + Test that the execution profile options are properly resolved for graph queries. + + @since DSE 6.8 + @jira_ticket PYTHON-1004 PYTHON-1056 + @expected_result execution profile options are properly determined following the rules. + """ + + def test_default_options(self): + ep = self.session.get_execution_profile(EXEC_PROFILE_GRAPH_DEFAULT) + self.assertEqual(ep.graph_options.graph_protocol, None) + self.assertEqual(ep.row_factory, None) + self.session._resolve_execution_profile_options(ep) + self.assertEqual(ep.graph_options.graph_protocol, GraphProtocol.GRAPHSON_1_0) + self.assertEqual(ep.row_factory, graph_object_row_factory) + + def test_default_options_when_not_groovy(self): + ep = self.session.get_execution_profile(EXEC_PROFILE_GRAPH_DEFAULT) + self.assertEqual(ep.graph_options.graph_protocol, None) + self.assertEqual(ep.row_factory, None) + ep.graph_options.graph_language = 'whatever' + self.session._resolve_execution_profile_options(ep) + self.assertEqual(ep.graph_options.graph_protocol, GraphProtocol.GRAPHSON_2_0) + self.assertEqual(ep.row_factory, graph_graphson2_row_factory) + + def test_default_options_when_explicitly_specified(self): + ep = self.session.get_execution_profile(EXEC_PROFILE_GRAPH_DEFAULT) + self.assertEqual(ep.graph_options.graph_protocol, None) + self.assertEqual(ep.row_factory, None) + obj = object() + ep.graph_options.graph_protocol = obj + ep.row_factory = obj + self.session._resolve_execution_profile_options(ep) + self.assertEqual(ep.graph_options.graph_protocol, obj) + self.assertEqual(ep.row_factory, obj) + + @greaterthanorequaldse68 + def test_graph_protocol_default_for_core_is_graphson3(self): + """Test that graphson3 is automatically resolved for a core graph query""" + self.setup_graph(CoreGraphSchema) + ep = self.session.get_execution_profile(EXEC_PROFILE_GRAPH_DEFAULT) + self.assertEqual(ep.graph_options.graph_protocol, None) + self.assertEqual(ep.row_factory, None) + # Ensure we have the graph metadata + self.session.cluster.refresh_schema_metadata() + self.session._resolve_execution_profile_options(ep) + self.assertEqual(ep.graph_options.graph_protocol, GraphProtocol.GRAPHSON_3_0) + self.assertEqual(ep.row_factory, graph_graphson3_row_factory) + + self.execute_graph_queries(CoreGraphSchema.fixtures.classic(), verify_graphson=GraphProtocol.GRAPHSON_3_0) + + @greaterthanorequaldse68 + def test_graph_protocol_default_for_core_fallback_to_graphson1_if_no_graph_name(self): + """Test that graphson1 is set when we cannot detect if it's a core graph""" + self.setup_graph(CoreGraphSchema) + default_ep = self.session.get_execution_profile(EXEC_PROFILE_GRAPH_DEFAULT) + graph_options = default_ep.graph_options.copy() + graph_options.graph_name = None + ep = self.session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, graph_options=graph_options) + self.session._resolve_execution_profile_options(ep) + self.assertEqual(ep.graph_options.graph_protocol, GraphProtocol.GRAPHSON_1_0) + self.assertEqual(ep.row_factory, graph_object_row_factory) + + regex = re.compile(".*Variable.*is unknown.*", re.S) + with six.assertRaisesRegex(self, SyntaxException, regex): + self.execute_graph_queries(CoreGraphSchema.fixtures.classic(), + execution_profile=ep, verify_graphson=GraphProtocol.GRAPHSON_1_0) diff --git a/tests/integration/advanced/graph/test_graph_cont_paging.py b/tests/integration/advanced/graph/test_graph_cont_paging.py new file mode 100644 index 0000000000..065d01d939 --- /dev/null +++ b/tests/integration/advanced/graph/test_graph_cont_paging.py @@ -0,0 +1,78 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cassandra.cluster import ContinuousPagingOptions + +from tests.integration import greaterthanorequaldse68 +from tests.integration.advanced.graph import GraphUnitTestCase, CoreGraphSchema, GraphTestConfiguration + + +@greaterthanorequaldse68 +@GraphTestConfiguration.generate_tests(schema=CoreGraphSchema) +class GraphPagingTest(GraphUnitTestCase): + + def _setup_data(self, schema, graphson): + self.execute_graph("schema.vertexLabel('person').ifNotExists().partitionBy('name', Text).property('age', Int).create();", graphson) + for i in range(100): + self.execute_graph("g.addV('person').property('name', 'batman-{}')".format(i), graphson) + + def _test_cont_paging_is_enabled_by_default(self, schema, graphson): + """ + Test that graph paging is automatically enabled with a >=6.8 cluster. + + @jira_ticket PYTHON-1045 + @expected_result the response future has a continuous_paging_session since graph paging is enabled + + @test_category dse graph + """ + ep = self.get_execution_profile(graphson) + self._setup_data(schema, graphson) + rf = self.session.execute_graph_async("g.V()", execution_profile=ep) + results = list(rf.result()) + self.assertIsNotNone(rf._continuous_paging_session) + self.assertEqual(len(results), 100) + + def _test_cont_paging_can_be_disabled(self, schema, graphson): + """ + Test that graph paging can be disabled. + + @jira_ticket PYTHON-1045 + @expected_result the response future doesn't have a continuous_paging_session since graph paging is disabled + + @test_category dse graph + """ + ep = self.get_execution_profile(graphson) + new_ep = self.session.execution_profile_clone_update(ep, continuous_paging_options=None) + self._setup_data(schema, graphson) + rf = self.session.execute_graph_async("g.V()", execution_profile=new_ep) + results = list(rf.result()) + self.assertIsNone(rf._continuous_paging_session) + self.assertEqual(len(results), 100) + + def _test_cont_paging_with_custom_options(self, schema, graphson): + """ + Test that we can specify custom paging options. + + @jira_ticket PYTHON-1045 + @expected_result we get only the desired number of results + + @test_category dse graph + """ + ep = self.get_execution_profile(graphson) + new_ep = self.session.execution_profile_clone_update( + ep, continuous_paging_options=ContinuousPagingOptions(max_pages=1)) + self._setup_data(schema, graphson) + self.session.default_fetch_size = 10 + results = list(self.session.execute_graph("g.V()", execution_profile=new_ep)) + self.assertEqual(len(results), 10) diff --git a/tests/integration/advanced/graph/test_graph_datatype.py b/tests/integration/advanced/graph/test_graph_datatype.py new file mode 100644 index 0000000000..d4d28b80df --- /dev/null +++ b/tests/integration/advanced/graph/test_graph_datatype.py @@ -0,0 +1,260 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + +import time +import six +import logging +from packaging.version import Version +from collections import namedtuple + +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT +from cassandra.graph import graph_result_row_factory +from cassandra.graph.query import GraphProtocol +from cassandra.graph.types import VertexProperty + +from tests.integration.advanced.graph import BasicGraphUnitTestCase, ClassicGraphFixtures, \ + ClassicGraphSchema, CoreGraphSchema +from tests.integration.advanced.graph import VertexLabel, GraphTestConfiguration, GraphUnitTestCase +from tests.integration import DSE_VERSION, requiredse + +log = logging.getLogger(__name__) + + +@requiredse +class GraphBasicDataTypesTests(BasicGraphUnitTestCase): + + def test_result_types(self): + """ + Test to validate that the edge and vertex version of results are constructed correctly. + + @since 1.0.0 + @jira_ticket PYTHON-479 + @expected_result edge/vertex result types should be unpacked correctly. + @test_category dse graph + """ + queries, params = ClassicGraphFixtures.multiple_fields() + for query in queries: + self.session.execute_graph(query, params) + + prof = self.session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, row_factory=graph_result_row_factory) # requires simplified row factory to avoid shedding id/~type information used for validation below + rs = self.session.execute_graph("g.V()", execution_profile=prof) + + for result in rs: + self._validate_type(result) + + def _validate_type(self, vertex): + for properties in vertex.properties.values(): + prop = properties[0] + + if DSE_VERSION >= Version("5.1"): + type_indicator = prop['id']['~label'] + else: + type_indicator = prop['id']['~type'] + + if any(type_indicator.startswith(t) for t in + ('int', 'short', 'long', 'bigint', 'decimal', 'smallint', 'varint')): + typ = six.integer_types + elif any(type_indicator.startswith(t) for t in ('float', 'double')): + typ = float + elif any(type_indicator.startswith(t) for t in ('duration', 'date', 'negdate', 'time', + 'blob', 'timestamp', 'point', 'linestring', 'polygon', + 'inet', 'uuid')): + typ = six.text_type + else: + pass + self.fail("Received unexpected type: %s" % type_indicator) + self.assertIsInstance(prop['value'], typ) + + +class GenericGraphDataTypeTest(GraphUnitTestCase): + + def _test_all_datatypes(self, schema, graphson): + ep = self.get_execution_profile(graphson) + + for data in six.itervalues(schema.fixtures.datatypes()): + typ, value, deserializer = data + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] + + vertex_properties = list(schema.get_vertex_properties( + self.session, vertex, execution_profile=ep)) + + if graphson == GraphProtocol.GRAPHSON_1_0: + vertex_properties = [vp.as_vertex_property() for vp in vertex_properties] + + prop_returned = 1 if DSE_VERSION < Version('5.1') else 2 # include pkid >=5.1 + self.assertEqual(len(vertex_properties), prop_returned) + for vp in vertex_properties: + if vp.label == 'pkid': + continue + + self.assertIsInstance(vp, VertexProperty) + self.assertEqual(vp.label, property_name) + if graphson == GraphProtocol.GRAPHSON_1_0: + deserialized_value = deserializer(vp.value) + self.assertEqual(deserialized_value, value) + else: + self.assertEqual(vp.value, value) + + def __test_udt(self, schema, graphson, address_class, address_with_tags_class, + complex_address_class, complex_address_with_owners_class): + if schema is not CoreGraphSchema or DSE_VERSION < Version('6.8'): + raise unittest.SkipTest("Graph UDT is only supported with DSE 6.8+ and Core graphs.") + + ep = self.get_execution_profile(graphson) + + Address = address_class + AddressWithTags = address_with_tags_class + ComplexAddress = complex_address_class + ComplexAddressWithOwners = complex_address_with_owners_class + + # setup udt + self.session.execute_graph(""" + schema.type('address').property('address', Text).property('city', Text).property('state', Text).create(); + schema.type('addressTags').property('address', Text).property('city', Text).property('state', Text). + property('tags', setOf(Text)).create(); + schema.type('complexAddress').property('address', Text).property('address_tags', frozen(typeOf('addressTags'))). + property('city', Text).property('state', Text).property('props', mapOf(Text, Int)).create(); + schema.type('complexAddressWithOwners').property('address', Text). + property('address_tags', frozen(typeOf('addressTags'))). + property('city', Text).property('state', Text).property('props', mapOf(Text, Int)). + property('owners', frozen(listOf(tupleOf(Text, Int)))).create(); + """, execution_profile=ep) + + time.sleep(2) # wait the UDT to be discovered + self.session.cluster.register_user_type(self.graph_name, 'address', Address) + self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags) + self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress) + self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners) + + data = { + "udt1": ["typeOf('address')", Address('1440 Rd Smith', 'Quebec', 'QC')], + "udt2": ["tupleOf(typeOf('address'), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], + "udt3": ["tupleOf(frozen(typeOf('address')), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], + "udt4": ["tupleOf(tupleOf(Int, typeOf('address')), Text)", + ((42, Address('1440 Rd Smith', 'Quebec', 'QC')), 'hello')], + "udt5": ["tupleOf(tupleOf(Int, typeOf('addressTags')), Text)", + ((42, AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'})), 'hello')], + "udt6": ["tupleOf(tupleOf(Int, typeOf('complexAddress')), Text)", + ((42, ComplexAddress('1440 Rd Smith', + AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'}), + 'Quebec', 'QC', {'p1': 42, 'p2': 33})), 'hello')], + "udt7": ["tupleOf(tupleOf(Int, frozen(typeOf('complexAddressWithOwners'))), Text)", + ((42, ComplexAddressWithOwners( + '1440 Rd Smith', + AddressWithTags('1440 CRd Smith', 'Quebec', 'QC', {'t1', 't2'}), + 'Quebec', 'QC', {'p1': 42, 'p2': 33}, [('Mike', 43), ('Gina', 39)]) + ), 'hello')] + } + + for typ, value in six.itervalues(data): + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + + vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] + vertex_properties = list(schema.get_vertex_properties( + self.session, vertex, execution_profile=ep)) + + self.assertEqual(len(vertex_properties), 2) # include pkid + for vp in vertex_properties: + if vp.label == 'pkid': + continue + + self.assertIsInstance(vp, VertexProperty) + self.assertEqual(vp.label, property_name) + self.assertEqual(vp.value, value) + + def _test_udt_with_classes(self, schema, graphson): + class Address(object): + + def __init__(self, address, city, state): + self.address = address + self.city = city + self.state = state + + def __eq__(self, other): + return self.address == other.address and self.city == other.city and self.state == other.state + + class AddressWithTags(object): + + def __init__(self, address, city, state, tags): + self.address = address + self.city = city + self.state = state + self.tags = tags + + def __eq__(self, other): + return (self.address == other.address and self.city == other.city + and self.state == other.state and self.tags == other.tags) + + class ComplexAddress(object): + + def __init__(self, address, address_tags, city, state, props): + self.address = address + self.address_tags = address_tags + self.city = city + self.state = state + self.props = props + + def __eq__(self, other): + return (self.address == other.address and self.address_tags == other.address_tags + and self.city == other.city and self.state == other.state + and self.props == other.props) + + class ComplexAddressWithOwners(object): + + def __init__(self, address, address_tags, city, state, props, owners): + self.address = address + self.address_tags = address_tags + self.city = city + self.state = state + self.props = props + self.owners = owners + + def __eq__(self, other): + return (self.address == other.address and self.address_tags == other.address_tags + and self.city == other.city and self.state == other.state + and self.props == other.props and self.owners == other.owners) + + self.__test_udt(schema, graphson, Address, AddressWithTags, ComplexAddress, ComplexAddressWithOwners) + + def _test_udt_with_namedtuples(self, schema, graphson): + AddressTuple = namedtuple('Address', ('address', 'city', 'state')) + AddressWithTagsTuple = namedtuple('AddressWithTags', ('address', 'city', 'state', 'tags')) + ComplexAddressTuple = namedtuple('ComplexAddress', ('address', 'address_tags', 'city', 'state', 'props')) + ComplexAddressWithOwnersTuple = namedtuple('ComplexAddressWithOwners', ('address', 'address_tags', 'city', + 'state', 'props', 'owners')) + + self.__test_udt(schema, graphson, AddressTuple, AddressWithTagsTuple, + ComplexAddressTuple, ComplexAddressWithOwnersTuple) + + +@requiredse +@GraphTestConfiguration.generate_tests(schema=ClassicGraphSchema) +class ClassicGraphDataTypeTest(GenericGraphDataTypeTest): + pass + + +@requiredse +@GraphTestConfiguration.generate_tests(schema=CoreGraphSchema) +class CoreGraphDataTypeTest(GenericGraphDataTypeTest): + pass diff --git a/tests/integration/advanced/graph/test_graph_query.py b/tests/integration/advanced/graph/test_graph_query.py new file mode 100644 index 0000000000..5ef4e2c749 --- /dev/null +++ b/tests/integration/advanced/graph/test_graph_query.py @@ -0,0 +1,575 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import six +from packaging.version import Version + +from copy import copy +from itertools import chain +import json +import time + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + +from cassandra import OperationTimedOut, ConsistencyLevel, InvalidRequest +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, NoHostAvailable +from cassandra.protocol import ServerError, SyntaxException +from cassandra.query import QueryTrace +from cassandra.util import Point +from cassandra.graph import (SimpleGraphStatement, single_object_row_factory, + Result, GraphOptions, GraphProtocol, to_bigint) +from cassandra.datastax.graph.query import _graph_options + +from tests.integration import DSE_VERSION, requiredse +from tests.integration.advanced.graph import BasicGraphUnitTestCase, GraphTestConfiguration, \ + validate_classic_vertex, GraphUnitTestCase, validate_classic_edge, validate_path_result_type, \ + validate_line_edge, validate_generic_vertex_result_type, \ + ClassicGraphSchema, CoreGraphSchema, VertexLabel + + +@requiredse +class BasicGraphQueryTest(BasicGraphUnitTestCase): + + def test_consistency_passing(self): + """ + Test to validated that graph consistency levels are properly surfaced to the base driver + + @since 1.0.0 + @jira_ticket PYTHON-509 + @expected_result graph consistency levels are surfaced correctly + @test_category dse graph + """ + cl_attrs = ('graph_read_consistency_level', 'graph_write_consistency_level') + + # Iterates over the graph options and constructs an array containing + # The graph_options that correlate to graoh read and write consistency levels + graph_params = [a[2] for a in _graph_options if a[0] in cl_attrs] + + s = self.session + default_profile = s.cluster.profile_manager.profiles[EXEC_PROFILE_GRAPH_DEFAULT] + default_graph_opts = default_profile.graph_options + try: + # Checks the default graph attributes and ensures that both graph_read_consistency_level and graph_write_consistency_level + # Are None by default + for attr in cl_attrs: + self.assertIsNone(getattr(default_graph_opts, attr)) + + res = s.execute_graph("null") + for param in graph_params: + self.assertNotIn(param, res.response_future.message.custom_payload) + + # session defaults are passed + opts = GraphOptions() + opts.update(default_graph_opts) + cl = {0: ConsistencyLevel.ONE, 1: ConsistencyLevel.LOCAL_QUORUM} + for k, v in cl.items(): + setattr(opts, cl_attrs[k], v) + default_profile.graph_options = opts + + res = s.execute_graph("null") + + for k, v in cl.items(): + self.assertEqual(res.response_future.message.custom_payload[graph_params[k]], six.b(ConsistencyLevel.value_to_name[v])) + + # passed profile values override session defaults + cl = {0: ConsistencyLevel.ALL, 1: ConsistencyLevel.QUORUM} + opts = GraphOptions() + opts.update(default_graph_opts) + for k, v in cl.items(): + attr_name = cl_attrs[k] + setattr(opts, attr_name, v) + self.assertNotEqual(getattr(default_profile.graph_options, attr_name), getattr(opts, attr_name)) + tmp_profile = s.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, graph_options=opts) + res = s.execute_graph("null", execution_profile=tmp_profile) + + for k, v in cl.items(): + self.assertEqual(res.response_future.message.custom_payload[graph_params[k]], six.b(ConsistencyLevel.value_to_name[v])) + finally: + default_profile.graph_options = default_graph_opts + + def test_execute_graph_row_factory(self): + s = self.session + + # default Results + default_profile = s.cluster.profile_manager.profiles[EXEC_PROFILE_GRAPH_DEFAULT] + self.assertEqual(default_profile.row_factory, None) # will be resolved to graph_object_row_factory + result = s.execute_graph("123")[0] + self.assertIsInstance(result, Result) + self.assertEqual(result.value, 123) + + # other via parameter + prof = s.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, row_factory=single_object_row_factory) + rs = s.execute_graph("123", execution_profile=prof) + self.assertEqual(rs.response_future.row_factory, single_object_row_factory) + self.assertEqual(json.loads(rs[0]), {'result': 123}) + + def test_execute_graph_timeout(self): + s = self.session + + value = [1, 2, 3] + query = "[%r]" % (value,) + + # default is passed down + default_graph_profile = s.cluster.profile_manager.profiles[EXEC_PROFILE_GRAPH_DEFAULT] + rs = self.session.execute_graph(query) + self.assertEqual(rs[0].value, value) + self.assertEqual(rs.response_future.timeout, default_graph_profile.request_timeout) + + # tiny timeout times out as expected + tmp_profile = copy(default_graph_profile) + tmp_profile.request_timeout = sys.float_info.min + + max_retry_count = 10 + for _ in range(max_retry_count): + start = time.time() + try: + with self.assertRaises(OperationTimedOut): + s.execute_graph(query, execution_profile=tmp_profile) + break + except: + end = time.time() + self.assertAlmostEqual(start, end, 1) + else: + raise Exception("session.execute_graph didn't time out in {0} tries".format(max_retry_count)) + + def test_profile_graph_options(self): + s = self.session + statement = SimpleGraphStatement("true") + ep = self.session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT) + self.assertTrue(s.execute_graph(statement, execution_profile=ep)[0].value) + + # bad graph name to verify it's passed + ep.graph_options = ep.graph_options.copy() + ep.graph_options.graph_name = "definitely_not_correct" + try: + s.execute_graph(statement, execution_profile=ep) + except NoHostAvailable: + self.assertTrue(DSE_VERSION >= Version("6.0")) + else: + if DSE_VERSION < Version("6.8"): # >6.8 returns true + self.fail("Should have risen ServerError or InvalidRequest") + + def test_additional_custom_payload(self): + s = self.session + custom_payload = {'some': 'example'.encode('utf-8'), 'items': 'here'.encode('utf-8')} + sgs = SimpleGraphStatement("null", custom_payload=custom_payload) + future = s.execute_graph_async(sgs) + + default_profile = s.cluster.profile_manager.profiles[EXEC_PROFILE_GRAPH_DEFAULT] + default_graph_opts = default_profile.graph_options + for k, v in chain(custom_payload.items(), default_graph_opts.get_options_map().items()): + self.assertEqual(future.message.custom_payload[k], v) + + +class GenericGraphQueryTest(GraphUnitTestCase): + + def _test_basic_query(self, schema, graphson): + """ + Test to validate that basic graph query results can be executed with a sane result set. + + Creates a simple classic tinkerpot graph, and attempts to find all vertices + related the vertex marco, that have a label of knows. + See reference graph here + http://www.tinkerpop.com/docs/3.0.0.M1/ + + @since 1.0.0 + @jira_ticket PYTHON-457 + @expected_result graph should find two vertices related to marco via 'knows' edges. + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + rs = self.execute_graph('''g.V().has('name','marko').out('knows').values('name')''', graphson) + self.assertFalse(rs.has_more_pages) + results_list = self.resultset_to_list(rs) + self.assertEqual(len(results_list), 2) + self.assertIn('vadas', results_list) + self.assertIn('josh', results_list) + + def _test_geometric_graph_types(self, schema, graphson): + """ + Test to validate that geometric types function correctly + + Creates a very simple graph, and tries to insert a simple point type + + @since 1.0.0 + @jira_ticket DSP-8087 + @expected_result json types associated with insert is parsed correctly + + @test_category dse graph + """ + vertex_label = VertexLabel([('pointP', "Point()")]) + ep = self.get_execution_profile(graphson) + schema.create_vertex_label(self.session, vertex_label, ep) + # import org.apache.cassandra.db.marshal.geometry.Point; + rs = schema.add_vertex(self.session, vertex_label, 'pointP', Point(0, 1), ep) + + # if result set is not parsed correctly this will throw an exception + self.assertIsNotNone(rs) + + def _test_execute_graph_trace(self, schema, graphson): + value = [1, 2, 3] + query = "[%r]" % (value,) + + # default is no trace + rs = self.execute_graph(query, graphson) + results = self.resultset_to_list(rs) + self.assertEqual(results[0], value) + self.assertIsNone(rs.get_query_trace()) + + # request trace + rs = self.execute_graph(query, graphson, trace=True) + results = self.resultset_to_list(rs) + self.assertEqual(results[0], value) + qt = rs.get_query_trace(max_wait_sec=10) + self.assertIsInstance(qt, QueryTrace) + self.assertIsNotNone(qt.duration) + + def _test_range_query(self, schema, graphson): + """ + Test to validate range queries are handled correctly. + + Creates a very large line graph script and executes it. Then proceeds to to a range + limited query against it, and ensure that the results are formatted correctly and that + the result set is properly sized. + + @since 1.0.0 + @jira_ticket PYTHON-457 + @expected_result result set should be properly formatted and properly sized + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.line(150), graphson) + rs = self.execute_graph("g.E().range(0,10)", graphson) + self.assertFalse(rs.has_more_pages) + results = self.resultset_to_list(rs) + self.assertEqual(len(results), 10) + ep = self.get_execution_profile(graphson) + for result in results: + schema.ensure_properties(self.session, result, execution_profile=ep) + validate_line_edge(self, result) + + def _test_classic_graph(self, schema, graphson): + """ + Test to validate that basic graph generation, and vertex and edges are surfaced correctly + + Creates a simple classic tinkerpot graph, and iterates over the the vertices and edges + ensureing that each one is correct. See reference graph here + http://www.tinkerpop.com/docs/3.0.0.M1/ + + @since 1.0.0 + @jira_ticket PYTHON-457 + @expected_result graph should generate and all vertices and edge results should be + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + rs = self.execute_graph('g.V()', graphson) + ep = self.get_execution_profile(graphson) + for vertex in rs: + schema.ensure_properties(self.session, vertex, execution_profile=ep) + validate_classic_vertex(self, vertex) + rs = self.execute_graph('g.E()', graphson) + for edge in rs: + schema.ensure_properties(self.session, edge, execution_profile=ep) + validate_classic_edge(self, edge) + + def _test_graph_classic_path(self, schema, graphson): + """ + Test to validate that the path version of the result type is generated correctly. It also + tests basic path results as that is not covered elsewhere + + @since 1.0.0 + @jira_ticket PYTHON-479 + @expected_result path object should be unpacked correctly including all nested edges and verticies + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + rs = self.execute_graph("g.V().hasLabel('person').has('name', 'marko').as('a').outE('knows').inV().as('c', 'd')." + " outE('created').as('e', 'f', 'g').inV().path()", + graphson) + rs_list = list(rs) + self.assertEqual(len(rs_list), 2) + for result in rs_list: + try: + path = result.as_path() + except: + path = result + + ep = self.get_execution_profile(graphson) + for obj in path.objects: + schema.ensure_properties(self.session, obj, ep) + + validate_path_result_type(self, path) + + def _test_large_create_script(self, schema, graphson): + """ + Test to validate that server errors due to large groovy scripts are properly surfaced + + Creates a very large line graph script and executes it. Then proceeds to create a line graph script + that is to large for the server to handle expects a server error to be returned + + @since 1.0.0 + @jira_ticket PYTHON-457 + @expected_result graph should generate and all vertices and edge results should be + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.line(150), graphson) + self.execute_graph(schema.fixtures.line(300), graphson) # This should passed since the queries are splitted + self.assertRaises(SyntaxException, self.execute_graph, schema.fixtures.line(300, single_script=True), graphson) # this is not and too big + + def _test_large_result_set(self, schema, graphson): + """ + Test to validate that large result sets return correctly. + + Creates a very large graph. Ensures that large result sets are handled appropriately. + + @since 1.0.0 + @jira_ticket PYTHON-457 + @expected_result when limits of result sets are hit errors should be surfaced appropriately + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.large(), graphson, execution_profile_options={'request_timeout': 32}) + rs = self.execute_graph("g.V()", graphson) + for result in rs: + validate_generic_vertex_result_type(self, result) + + def _test_param_passing(self, schema, graphson): + """ + Test to validate that parameter passing works as expected + + @since 1.0.0 + @jira_ticket PYTHON-457 + @expected_result parameters work as expected + + @test_category dse graph + """ + + # unused parameters are passed, but ignored + self.execute_graph("null", graphson, params={"doesn't": "matter", "what's": "passed"}) + + # multiple params + rs = self.execute_graph("[a, b]", graphson, params={'a': 0, 'b': 1}) + results = self.resultset_to_list(rs) + self.assertEqual(results[0], 0) + self.assertEqual(results[1], 1) + + if graphson == GraphProtocol.GRAPHSON_1_0: + # different value types + for param in (None, "string", 1234, 5.678, True, False): + result = self.resultset_to_list(self.execute_graph('x', graphson, params={'x': param}))[0] + self.assertEqual(result, param) + + def _test_vertex_property_properties(self, schema, graphson): + """ + Test verifying vertex property properties + + @since 1.0.0 + @jira_ticket PYTHON-487 + + @test_category dse graph + """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('skipped because rich properties are only supported with classic graphs') + + self.execute_graph("schema.propertyKey('k0').Text().ifNotExists().create();", graphson) + self.execute_graph("schema.propertyKey('k1').Text().ifNotExists().create();", graphson) + self.execute_graph("schema.propertyKey('key').Text().properties('k0', 'k1').ifNotExists().create();", graphson) + self.execute_graph("schema.vertexLabel('MLP').properties('key').ifNotExists().create();", graphson) + v = self.execute_graph('''v = graph.addVertex('MLP') + v.property('key', 'value', 'k0', 'v0', 'k1', 'v1') + v''', graphson)[0] + self.assertEqual(len(v.properties), 1) + self.assertEqual(len(v.properties['key']), 1) + p = v.properties['key'][0] + self.assertEqual(p.label, 'key') + self.assertEqual(p.value, 'value') + self.assertEqual(p.properties, {'k0': 'v0', 'k1': 'v1'}) + + def _test_vertex_multiple_properties(self, schema, graphson): + """ + Test verifying vertex property form for various Cardinality + + All key types are encoded as a list, regardless of cardinality + + Single cardinality properties have only one value -- the last one added + + Default is single (this is config dependent) + + @since 1.0.0 + @jira_ticket PYTHON-487 + + @test_category dse graph + """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') + + self.execute_graph('''Schema schema = graph.schema(); + schema.propertyKey('mult_key').Text().multiple().ifNotExists().create(); + schema.propertyKey('single_key').Text().single().ifNotExists().create(); + schema.vertexLabel('MPW1').properties('mult_key').ifNotExists().create(); + schema.vertexLabel('SW1').properties('single_key').ifNotExists().create();''', graphson) + + v = self.execute_graph('''v = graph.addVertex('MPW1') + v.property('mult_key', 'value') + v''', graphson)[0] + self.assertEqual(len(v.properties), 1) + self.assertEqual(len(v.properties['mult_key']), 1) + self.assertEqual(v.properties['mult_key'][0].label, 'mult_key') + self.assertEqual(v.properties['mult_key'][0].value, 'value') + + # multiple_with_two_values + v = self.execute_graph('''g.addV('MPW1').property('mult_key', 'value0').property('mult_key', 'value1')''', graphson)[0] + self.assertEqual(len(v.properties), 1) + self.assertEqual(len(v.properties['mult_key']), 2) + self.assertEqual(v.properties['mult_key'][0].label, 'mult_key') + self.assertEqual(v.properties['mult_key'][1].label, 'mult_key') + self.assertEqual(v.properties['mult_key'][0].value, 'value0') + self.assertEqual(v.properties['mult_key'][1].value, 'value1') + + # single_with_one_value + v = self.execute_graph('''v = graph.addVertex('SW1') + v.property('single_key', 'value') + v''', graphson)[0] + self.assertEqual(len(v.properties), 1) + self.assertEqual(len(v.properties['single_key']), 1) + self.assertEqual(v.properties['single_key'][0].label, 'single_key') + self.assertEqual(v.properties['single_key'][0].value, 'value') + + if DSE_VERSION < Version('6.8'): + # single_with_two_values + with self.assertRaises(InvalidRequest): + v = self.execute_graph(''' + v = graph.addVertex('SW1') + v.property('single_key', 'value0').property('single_key', 'value1').next() + v + ''', graphson)[0] + else: + # >=6.8 single_with_two_values, first one wins + v = self.execute_graph('''v = graph.addVertex('SW1') + v.property('single_key', 'value0').property('single_key', 'value1') + v''', graphson)[0] + self.assertEqual(v.properties['single_key'][0].value, 'value0') + + def _test_result_forms(self, schema, graphson): + """ + Test to validate that geometric types function correctly + + Creates a very simple graph, and tries to insert a simple point type + + @since 1.0.0 + @jira_ticket DSP-8087 + @expected_result json types associated with insert is parsed correctly + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + ep = self.get_execution_profile(graphson) + + results = self.resultset_to_list(self.session.execute_graph('g.V()', execution_profile=ep)) + self.assertGreater(len(results), 0, "Result set was empty this was not expected") + for v in results: + schema.ensure_properties(self.session, v, ep) + validate_classic_vertex(self, v) + + results = self.resultset_to_list(self.session.execute_graph('g.E()', execution_profile=ep)) + self.assertGreater(len(results), 0, "Result set was empty this was not expected") + for e in results: + schema.ensure_properties(self.session, e, ep) + validate_classic_edge(self, e) + + def _test_query_profile(self, schema, graphson): + """ + Test to validate profiling results are deserialized properly. + + @since 1.6.0 + @jira_ticket PYTHON-1057 + @expected_result TraversalMetrics and Metrics are deserialized properly + + @test_category dse graph + """ + if graphson == GraphProtocol.GRAPHSON_1_0: + raise unittest.SkipTest('skipped because there is no metrics deserializer with graphson1') + + ep = self.get_execution_profile(graphson) + results = list(self.session.execute_graph("g.V().profile()", execution_profile=ep)) + self.assertEqual(len(results), 1) + self.assertIn('metrics', results[0]) + self.assertIn('dur', results[0]) + self.assertEqual(len(results[0]['metrics']), 2) + self.assertIn('dur', results[0]['metrics'][0]) + + def _test_query_bulkset(self, schema, graphson): + """ + Test to validate bulkset results are deserialized properly. + + @since 1.6.0 + @jira_ticket PYTHON-1060 + @expected_result BulkSet is deserialized properly to a list + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + ep = self.get_execution_profile(graphson) + results = list(self.session.execute_graph( + 'g.V().hasLabel("person").aggregate("x").by("age").cap("x")', + execution_profile=ep)) + self.assertEqual(len(results), 1) + results = results[0] + if type(results) is Result: + results = results.value + else: + self.assertEqual(len(results), 5) + self.assertEqual(results.count(35), 2) + + +@GraphTestConfiguration.generate_tests(schema=ClassicGraphSchema) +class ClassicGraphQueryTest(GenericGraphQueryTest): + pass + + +@GraphTestConfiguration.generate_tests(schema=CoreGraphSchema) +class CoreGraphQueryTest(GenericGraphQueryTest): + pass + + +@GraphTestConfiguration.generate_tests(schema=CoreGraphSchema) +class CoreGraphQueryWithTypeWrapperTest(GraphUnitTestCase): + + def _test_basic_query_with_type_wrapper(self, schema, graphson): + """ + Test to validate that a query using a type wrapper works. + + @since 2.8.0 + @jira_ticket PYTHON-1051 + @expected_result graph query works and doesn't raise an exception + + @test_category dse graph + """ + ep = self.get_execution_profile(graphson) + vl = VertexLabel(['tupleOf(Int, Bigint)']) + schema.create_vertex_label(self.session, vl, execution_profile=ep) + + prop_name = next(six.iterkeys(vl.non_pk_properties)) + with self.assertRaises(InvalidRequest): + schema.add_vertex(self.session, vl, prop_name, (1, 42), execution_profile=ep) + + schema.add_vertex(self.session, vl, prop_name, (1, to_bigint(42)), execution_profile=ep) diff --git a/tests/integration/advanced/test_adv_metadata.py b/tests/integration/advanced/test_adv_metadata.py index 25df0323a1..2c69a769a3 100644 --- a/tests/integration/advanced/test_adv_metadata.py +++ b/tests/integration/advanced/test_adv_metadata.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from tests.integration import (BasicSharedKeyspaceUnitTestCase, +from packaging.version import Version + +from tests.integration import (BasicExistingKeyspaceUnitTestCase, BasicSharedKeyspaceUnitTestCase, BasicSharedKeyspaceUnitTestCaseRF1, - greaterthanorequaldse51, - greaterthanorequaldse60, use_single_node, + greaterthanorequaldse51, greaterthanorequaldse60, + greaterthanorequaldse68, use_single_node, DSE_VERSION, requiredse) try: @@ -211,7 +213,7 @@ def test_rlac_on_table(self): table_meta = self.cluster.metadata.keyspaces[self.keyspace_name].tables['reports'] self.assertTrue(restrict_cql in table_meta.export_as_string()) - @unittest.skip("Dse 5.1 doesn't current MV and RLAC remove after update") + @unittest.skip("Dse 5.1 doesn't support MV and RLAC remove after update") @greaterthanorequaldse51 def test_rlac_on_mv(self): """ @@ -284,3 +286,82 @@ def test_nodesync_on_table(self): table_3rf = self.cluster.metadata.keyspaces["test3rf"].tables['test'] self.assertNotIn('nodesync =', table_3rf.export_as_string()) self.assertIsNone(table_3rf.options['nodesync']) + + +@greaterthanorequaldse68 +class GraphMetadataTests(BasicExistingKeyspaceUnitTestCase): + """ + Various tests to ensure that graph metadata are visible through driver metadata + @since DSE6.8 + @jira_ticket PYTHON-996 + @expected_result graph metadata are fetched + @test_category metadata + """ + + @classmethod + def setUpClass(cls): + if DSE_VERSION and DSE_VERSION >= Version('6.8'): + super(GraphMetadataTests, cls).setUpClass() + cls.session.execute(""" + CREATE KEYSPACE ks_no_graph_engine WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; + """) + cls.session.execute(""" + CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1} and graph_engine = 'Core'; + """ % (cls.ks_name,)) + + cls.session.execute(""" + CREATE TABLE %s.person (name text PRIMARY KEY) WITH VERTEX LABEL; + """ % (cls.ks_name,)) + + cls.session.execute(""" + CREATE TABLE %s.software(company text, name text, version int, PRIMARY KEY((company, name), version)) WITH VERTEX LABEL rocksolidsoftware; + """ % (cls.ks_name,)) + + cls.session.execute(""" + CREATE TABLE %s.contributors (contributor text, company_name text, software_name text, software_version int, + PRIMARY KEY (contributor, company_name, software_name, software_version) ) + WITH CLUSTERING ORDER BY (company_name ASC, software_name ASC, software_version ASC) + AND EDGE LABEL contrib FROM person(contributor) TO rocksolidsoftware((company_name, software_name), software_version); + """ % (cls.ks_name,)) + + @classmethod + def tearDownClass(cls): + if DSE_VERSION and DSE_VERSION >= Version('6.8'): + cls.session.execute('DROP KEYSPACE {0}'.format('ks_no_graph_engine')) + cls.session.execute('DROP KEYSPACE {0}'.format(cls.ks_name)) + cls.cluster.shutdown() + + def test_keyspace_metadata(self): + self.assertIsNone(self.cluster.metadata.keyspaces['ks_no_graph_engine'].graph_engine, None) + self.assertEqual(self.cluster.metadata.keyspaces[self.ks_name].graph_engine, 'Core') + + def test_keyspace_metadata_alter_graph_engine(self): + self.session.execute("ALTER KEYSPACE %s WITH graph_engine = 'Tinker'" % (self.ks_name,)) + self.assertEqual(self.cluster.metadata.keyspaces[self.ks_name].graph_engine, 'Tinker') + self.session.execute("ALTER KEYSPACE %s WITH graph_engine = 'Core'" % (self.ks_name,)) + self.assertEqual(self.cluster.metadata.keyspaces[self.ks_name].graph_engine, 'Core') + + def test_vertex_metadata(self): + vertex_meta = self.cluster.metadata.keyspaces[self.ks_name].tables['person'].vertex + self.assertEqual(vertex_meta.keyspace_name, self.ks_name) + self.assertEqual(vertex_meta.table_name, 'person') + self.assertEqual(vertex_meta.label_name, 'person') + + vertex_meta = self.cluster.metadata.keyspaces[self.ks_name].tables['software'].vertex + self.assertEqual(vertex_meta.keyspace_name, self.ks_name) + self.assertEqual(vertex_meta.table_name, 'software') + self.assertEqual(vertex_meta.label_name, 'rocksolidsoftware') + + def test_edge_metadata(self): + edge_meta = self.cluster.metadata.keyspaces[self.ks_name].tables['contributors'].edge + self.assertEqual(edge_meta.keyspace_name, self.ks_name) + self.assertEqual(edge_meta.table_name, 'contributors') + self.assertEqual(edge_meta.label_name, 'contrib') + self.assertEqual(edge_meta.from_table, 'person') + self.assertEqual(edge_meta.from_label, 'person') + self.assertEqual(edge_meta.from_partition_key_columns, ['contributor']) + self.assertEqual(edge_meta.from_clustering_columns, []) + self.assertEqual(edge_meta.to_table, 'software') + self.assertEqual(edge_meta.to_label, 'rocksolidsoftware') + self.assertEqual(edge_meta.to_partition_key_columns, ['company_name', 'software_name']) + self.assertEqual(edge_meta.to_clustering_columns, ['software_version']) diff --git a/tests/integration/advanced/test_auth.py b/tests/integration/advanced/test_auth.py index 533a9e70ed..59bd3dec5c 100644 --- a/tests/integration/advanced/test_auth.py +++ b/tests/integration/advanced/test_auth.py @@ -30,10 +30,9 @@ from cassandra.protocol import Unauthorized from cassandra.query import SimpleStatement from tests.integration import (get_cluster, greaterthanorequaldse51, - remove_cluster, requiredse) -from tests.integration.advanced import (ADS_HOME, DSE_VERSION, - generate_classic, reset_graph, - use_single_node_with_graph) + remove_cluster, requiredse, DSE_VERSION) +from tests.integration.advanced import ADS_HOME, use_single_node_with_graph +from tests.integration.advanced.graph import reset_graph, ClassicGraphFixtures log = logging.getLogger(__name__) @@ -225,7 +224,7 @@ def test_connect_with_kerberos_and_graph(self): reset_graph(self.session, self._testMethodName.lower()) profiles = self.cluster.profile_manager.profiles profiles[EXEC_PROFILE_GRAPH_DEFAULT].graph_options.graph_name = self._testMethodName.lower() - generate_classic(self.session) + self.session.execute_graph(ClassicGraphFixtures.classic()) rs = self.session.execute_graph('g.V()') self.assertIsNotNone(rs) diff --git a/tests/integration/advanced/test_spark.py b/tests/integration/advanced/test_spark.py index 8864e9a947..a307913abb 100644 --- a/tests/integration/advanced/test_spark.py +++ b/tests/integration/advanced/test_spark.py @@ -17,7 +17,8 @@ from cassandra.cluster import EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT from cassandra.graph import SimpleGraphStatement from tests.integration import DSE_VERSION, requiredse -from tests.integration.advanced import BasicGraphUnitTestCase, use_singledc_wth_graph_and_spark, generate_classic, find_spark_master +from tests.integration.advanced import use_singledc_wth_graph_and_spark, find_spark_master +from tests.integration.advanced.graph import BasicGraphUnitTestCase, ClassicGraphFixtures log = logging.getLogger(__name__) @@ -38,12 +39,12 @@ class SparkLBTests(BasicGraphUnitTestCase): @test_category dse graph """ def test_spark_analytic_query(self): - generate_classic(self.session) + self.session.execute_graph(ClassicGraphFixtures.classic()) spark_master = find_spark_master(self.session) # Run multipltle times to ensure we don't round robin for i in range(3): to_run = SimpleGraphStatement("g.V().count()") rs = self.session.execute_graph(to_run, execution_profile=EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT) - self.assertEqual(rs[0].value, 6) + self.assertEqual(rs[0].value, 7) self.assertEqual(rs.response_future._current_host.address, spark_master) diff --git a/tests/integration/simulacron/utils.py b/tests/integration/simulacron/utils.py index 0d474651c2..5cee5ac3f1 100644 --- a/tests/integration/simulacron/utils.py +++ b/tests/integration/simulacron/utils.py @@ -17,7 +17,7 @@ import time from six.moves.urllib.request import build_opener, Request, HTTPHandler -from cassandra.metadata import SchemaParserV4 +from cassandra.metadata import SchemaParserV4, SchemaParserDSE68 from tests.integration import CASSANDRA_VERSION, SIMULACRON_JAR, DSE_VERSION @@ -355,6 +355,11 @@ def prime_driver_defaults(): "message": "Invalid Query!"}) ) + # prepare empty rows for NGDG + for query in [SchemaParserDSE68._SELECT_VERTICES, + SchemaParserDSE68._SELECT_EDGES]: + PrimeQuery(query, result='success', then={'rows': [], 'column_types': {'row1': 'int'}}) + def prime_cluster(data_centers="3", version=None, cluster_name=DEFAULT_CLUSTER, dse_version=None): """ diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index a5038672d4..858be75283 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -2007,14 +2007,21 @@ def test_dct_alias(self): dct_table = self.cluster.metadata.keyspaces.get(self.ks_name).tables.get(self.function_table_name) # Format can very slightly between versions, strip out whitespace for consistency sake - self.assertTrue("c1'org.apache.cassandra.db.marshal.DynamicCompositeType(" - "s=>org.apache.cassandra.db.marshal.UTF8Type," - "i=>org.apache.cassandra.db.marshal.Int32Type)'" - in dct_table.as_cql_query().replace(" ", "")) + try: + self.assertTrue("c1'org.apache.cassandra.db.marshal.DynamicCompositeType(" + "s=>org.apache.cassandra.db.marshal.UTF8Type," + "i=>org.apache.cassandra.db.marshal.Int32Type)'" + in dct_table.as_cql_query().replace(" ", "")) + except: + # C* 4.0 + self.assertTrue("c1'org.apache.cassandra.db.marshal.DynamicCompositeType(" + "i=>org.apache.cassandra.db.marshal.Int32Type," + "s=>org.apache.cassandra.db.marshal.UTF8Type)'" + in dct_table.as_cql_query().replace(" ", "")) @greaterthanorequalcass30 -class Materia3lizedViewMetadataTestSimple(BasicSharedKeyspaceUnitTestCase): +class MaterializedViewMetadataTestSimple(BasicSharedKeyspaceUnitTestCase): def setUp(self): self.session.execute("CREATE TABLE {0}.{1} (pk int PRIMARY KEY, c int)".format(self.keyspace_name, self.function_table_name)) diff --git a/tests/unit/advanced/test_graph.py b/tests/unit/advanced/test_graph.py index 702b3c376b..f25a229f42 100644 --- a/tests/unit/advanced/test_graph.py +++ b/tests/unit/advanced/test_graph.py @@ -236,7 +236,9 @@ def test_path_str_repr(self): class GraphOptionTests(unittest.TestCase): - opt_mapping = dict((t[0], t[2]) for t in _graph_options if not t[0].endswith('consistency_level')) # cl excluded from general tests because it requires mapping to names + opt_mapping = dict((t[0], t[2]) for t in _graph_options if not + (t[0].endswith('consistency_level') or # cl excluded from general tests because it requires mapping to names + t[0] == 'graph_protocol')) # default is None api_params = dict((p, str(i)) for i, p in enumerate(opt_mapping)) @@ -245,8 +247,15 @@ def test_init(self): self._verify_api_params(opts, self.api_params) self._verify_api_params(GraphOptions(), { 'graph_source': 'g', - 'graph_language': 'gremlin-groovy', - 'graph_protocol': GraphProtocol.GRAPHSON_1_0 + 'graph_language': 'gremlin-groovy' + }) + + def test_with_graph_protocol(self): + opts = GraphOptions(graph_protocol='graphson-2-0') + self.assertEqual(opts._graph_options, { + 'graph-source': b'g', + 'graph-language': b'gremlin-groovy', + 'graph-results': b'graphson-2-0' }) def test_init_unknown_kwargs(self): @@ -311,7 +320,10 @@ def test_del_attr(self): def _verify_api_params(self, opts, api_params): self.assertEqual(len(opts._graph_options), len(api_params)) for name, value in api_params.items(): - value = six.b(value) + try: + value = six.b(value) + except: + pass # already bytes self.assertEqual(getattr(opts, name), value) self.assertEqual(opts._graph_options[self.opt_mapping[name]], value) diff --git a/tests/unit/advanced/test_insights.py b/tests/unit/advanced/test_insights.py index b63e48ba6a..3db8883542 100644 --- a/tests/unit/advanced/test_insights.py +++ b/tests/unit/advanced/test_insights.py @@ -169,7 +169,7 @@ def test_graph_execution_profile(self): 'serialConsistency': None, 'speculativeExecution': {'namespace': 'cassandra.policies', 'options': {}, 'type': 'NoSpeculativeExecutionPolicy'}, - 'graphOptions': {'graphProtocol': 'graphson-1.0', + 'graphOptions': {'graphProtocol': None, 'language': 'gremlin-groovy', 'source': 'g'}, } @@ -195,7 +195,7 @@ def test_graph_analytics_execution_profile(self): 'serialConsistency': None, 'speculativeExecution': {'namespace': 'cassandra.policies', 'options': {}, 'type': 'NoSpeculativeExecutionPolicy'}, - 'graphOptions': {'graphProtocol': 'graphson-1.0', + 'graphOptions': {'graphProtocol': None, 'language': 'gremlin-groovy', 'source': 'a'}, } diff --git a/tests/unit/advanced/test_metadata.py b/tests/unit/advanced/test_metadata.py new file mode 100644 index 0000000000..addd514169 --- /dev/null +++ b/tests/unit/advanced/test_metadata.py @@ -0,0 +1,141 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + +from cassandra.metadata import ( + KeyspaceMetadata, TableMetadataDSE68, + VertexMetadata, EdgeMetadata +) + + +class GraphMetadataToCQLTests(unittest.TestCase): + + def _create_edge_metadata(self, partition_keys=['pk1'], clustering_keys=['c1']): + return EdgeMetadata( + 'keyspace', 'table', 'label', 'from_table', 'from_label', + partition_keys, clustering_keys, 'to_table', 'to_label', + partition_keys, clustering_keys) + + def _create_vertex_metadata(self, label_name='label'): + return VertexMetadata('keyspace', 'table', label_name) + + def _create_keyspace_metadata(self, graph_engine): + return KeyspaceMetadata( + 'keyspace', True, 'org.apache.cassandra.locator.SimpleStrategy', + {'replication_factor': 1}, graph_engine=graph_engine) + + def _create_table_metadata(self, with_vertex=False, with_edge=False): + tm = TableMetadataDSE68('keyspace', 'table') + if with_vertex: + tm.vertex = self._create_vertex_metadata() if with_vertex is True else with_vertex + elif with_edge: + tm.edge = self._create_edge_metadata() if with_edge is True else with_edge + + return tm + + def test_keyspace_no_graph_engine(self): + km = self._create_keyspace_metadata(None) + self.assertEqual(km.graph_engine, None) + self.assertNotIn( + "graph_engine", + km.as_cql_query() + ) + + def test_keyspace_with_graph_engine(self): + graph_engine = 'Core' + km = self._create_keyspace_metadata(graph_engine) + self.assertEqual(km.graph_engine, graph_engine) + cql = km.as_cql_query() + self.assertIn( + "graph_engine", + cql + ) + self.assertIn( + "Core", + cql + ) + + def test_table_no_vertex_or_edge(self): + tm = self._create_table_metadata() + self.assertIsNone(tm.vertex) + self.assertIsNone(tm.edge) + cql = tm.as_cql_query() + self.assertNotIn("VERTEX LABEL", cql) + self.assertNotIn("EDGE LABEL", cql) + + def test_table_with_vertex(self): + tm = self._create_table_metadata(with_vertex=True) + self.assertIsInstance(tm.vertex, VertexMetadata) + self.assertIsNone(tm.edge) + cql = tm.as_cql_query() + self.assertIn("VERTEX LABEL", cql) + self.assertNotIn("EDGE LABEL", cql) + + def test_table_with_edge(self): + tm = self._create_table_metadata(with_edge=True) + self.assertIsNone(tm.vertex) + self.assertIsInstance(tm.edge, EdgeMetadata) + cql = tm.as_cql_query() + self.assertNotIn("VERTEX LABEL", cql) + self.assertIn("EDGE LABEL", cql) + self.assertIn("FROM from_label", cql) + self.assertIn("TO to_label", cql) + + def test_vertex_with_label(self): + tm = self. _create_table_metadata(with_vertex=True) + self.assertTrue(tm.as_cql_query().endswith('VERTEX LABEL label')) + + def test_edge_single_partition_key_and_clustering_key(self): + tm = self._create_table_metadata(with_edge=True) + self.assertIn( + 'FROM from_label(pk1, c1)', + tm.as_cql_query() + ) + + def test_edge_multiple_partition_keys(self): + edge = self._create_edge_metadata(partition_keys=['pk1', 'pk2']) + tm = self. _create_table_metadata(with_edge=edge) + self.assertIn( + 'FROM from_label((pk1, pk2), ', + tm.as_cql_query() + ) + + def test_edge_no_clustering_keys(self): + edge = self._create_edge_metadata(clustering_keys=[]) + tm = self. _create_table_metadata(with_edge=edge) + self.assertIn( + 'FROM from_label(pk1) ', + tm.as_cql_query() + ) + + def test_edge_multiple_clustering_keys(self): + edge = self._create_edge_metadata(clustering_keys=['c1', 'c2']) + tm = self. _create_table_metadata(with_edge=edge) + self.assertIn( + 'FROM from_label(pk1, c1, c2) ', + tm.as_cql_query() + ) + + def test_edge_multiple_partition_and_clustering_keys(self): + edge = self._create_edge_metadata(partition_keys=['pk1', 'pk2'], + clustering_keys=['c1', 'c2']) + tm = self. _create_table_metadata(with_edge=edge) + self.assertIn( + 'FROM from_label((pk1, pk2), c1, c2) ', + tm.as_cql_query() + ) From 97de684894f6d8f15917da5bda852f7bea82fd07 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 1 Nov 2019 09:16:07 -0400 Subject: [PATCH 002/211] Ngdg docs integration (#19) * Add iterate_step test * ngdg docs integration --- cassandra/datastax/graph/fluent/__init__.py | 2 +- docs/.nav | 3 + docs/api/cassandra/graph.rst | 27 + docs/api/cassandra/metadata.rst | 9 + docs/classic_graph.rst | 299 +++++++++++ docs/graph.rst | 501 +++++++++++------- docs/graph_fluent.rst | 41 +- docs/index.rst | 6 +- .../advanced/graph/fluent/test_graph.py | 14 + 9 files changed, 707 insertions(+), 195 deletions(-) create mode 100644 docs/classic_graph.rst diff --git a/cassandra/datastax/graph/fluent/__init__.py b/cassandra/datastax/graph/fluent/__init__.py index 7d1ba0b60c..0609172483 100644 --- a/cassandra/datastax/graph/fluent/__init__.py +++ b/cassandra/datastax/graph/fluent/__init__.py @@ -190,7 +190,7 @@ def query_from_traversal(traversal, graph_protocol=DSE_GRAPH_QUERY_PROTOCOL, con :param traversal: The GraphTraversal object :param graph_protocol: The graph protocol. Default is `DseGraph.DSE_GRAPH_QUERY_PROTOCOL`. :param context: The dict of the serialization context, needed for GraphSON3 (tuple, udt). - e.g: {'cluster': dse_cluster, 'graph_name': name} + e.g: {'cluster': cluster, 'graph_name': name} """ if isinstance(traversal, GraphTraversal): diff --git a/docs/.nav b/docs/.nav index 7b39d9001d..d5b54c4e13 100644 --- a/docs/.nav +++ b/docs/.nav @@ -3,6 +3,9 @@ getting_started execution_profiles lwt object_mapper +geo_types +graph +classic_graph performance query_paging security diff --git a/docs/api/cassandra/graph.rst b/docs/api/cassandra/graph.rst index 2211a25d96..43ddd3086c 100644 --- a/docs/api/cassandra/graph.rst +++ b/docs/api/cassandra/graph.rst @@ -11,6 +11,30 @@ .. autofunction:: graph_object_row_factory +.. autofunction:: graph_graphson2_row_factory + +.. autofunction:: graph_graphson3_row_factory + +.. function:: to_int(value) + + Wraps a value to be explicitly serialized as a graphson Int. + +.. function:: to_bigint(value) + + Wraps a value to be explicitly serialized as a graphson Bigint. + +.. function:: to_smallint(value) + + Wraps a value to be explicitly serialized as a graphson Smallint. + +.. function:: to_float(value) + + Wraps a value to be explicitly serialized as a graphson Float. + +.. function:: to_double(value) + + Wraps a value to be explicitly serialized as a graphson Double. + .. autoclass:: GraphProtocol :members: @@ -92,3 +116,6 @@ .. autoclass:: GraphSON2Reader :members: + +.. autoclass:: GraphSON3Reader + :members: diff --git a/docs/api/cassandra/metadata.rst b/docs/api/cassandra/metadata.rst index ed79d04f42..b5e6dae904 100644 --- a/docs/api/cassandra/metadata.rst +++ b/docs/api/cassandra/metadata.rst @@ -34,6 +34,9 @@ Schemas .. autoclass:: TableMetadata () :members: +.. autoclass:: TableMetadataV3 () + :members: + .. autoclass:: ColumnMetadata () :members: @@ -43,6 +46,12 @@ Schemas .. autoclass:: MaterializedViewMetadata () :members: +.. autoclass:: VertexMetadata () + :members: + +.. autoclass:: EdgeMetadata () + :members: + Tokens and Ring Topology ------------------------ diff --git a/docs/classic_graph.rst b/docs/classic_graph.rst new file mode 100644 index 0000000000..ef68c86359 --- /dev/null +++ b/docs/classic_graph.rst @@ -0,0 +1,299 @@ +DataStax Classic Graph Queries +============================== + +Getting Started +~~~~~~~~~~~~~~~ + +First, we need to create a graph in the system. To access the system API, we +use the system execution profile :: + + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT + + cluster = Cluster() + session = cluster.connect() + + graph_name = 'movies' + session.execute_graph("system.graph(name).ifNotExists().engine(Classic).create()", {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + + +To execute requests on our newly created graph, we need to setup an execution +profile. Additionally, we also need to set the schema_mode to `development` +for the schema creation:: + + + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions + + graph_name = 'movies' + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) + + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + session = cluster.connect() + + session.execute_graph("schema.config().option('graph.schema_mode').set('development')") + + +We are ready to configure our graph schema. We will create a simple one for movies:: + + # properties are used to define a vertex + properties = """ + schema.propertyKey("genreId").Text().create(); + schema.propertyKey("personId").Text().create(); + schema.propertyKey("movieId").Text().create(); + schema.propertyKey("name").Text().create(); + schema.propertyKey("title").Text().create(); + schema.propertyKey("year").Int().create(); + schema.propertyKey("country").Text().create(); + """ + + session.execute_graph(properties) # we can execute multiple statements in a single request + + # A Vertex represents a "thing" in the world. + vertices = """ + schema.vertexLabel("genre").properties("genreId","name").create(); + schema.vertexLabel("person").properties("personId","name").create(); + schema.vertexLabel("movie").properties("movieId","title","year","country").create(); + """ + + session.execute_graph(vertices) + + # An edge represents a relationship between two vertices + edges = """ + schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); + schema.edgeLabel("actor").connection("movie","person").create(); + """ + + session.execute_graph(edges) + + # Indexes to execute graph requests efficiently + indexes = """ + schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); + schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); + schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); + schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); + schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); + schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); + schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); + """ + +Next, we'll add some data:: + + session.execute_graph(""" + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); + """) + + session.execute_graph(""" + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); + """) + + session.execute_graph(""" + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). + property('year', 2003).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). + property('year', 1998).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). + property('year', 1995).property('country', 'United States').next(); + """) + +Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: + + session.execute_graph(""" + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); + genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); + + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); + + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); + + the_happening.addEdge('belongsTo', genre_horror); + the_italian_job.addEdge('belongsTo', genre_action); + rev_road.addEdge('belongsTo', genre_drama); + man_mask.addEdge('belongsTo', genre_drama); + man_mask.addEdge('belongsTo', genre_action); + dead_man.addEdge('belongsTo', genre_drama); + + the_happening.addEdge('actor', mark); + the_italian_job.addEdge('actor', mark); + rev_road.addEdge('actor', leo); + man_mask.addEdge('actor', leo); + dead_man.addEdge('actor', iggy); + """) + +We are all set. You can now query your graph. Here are some examples:: + + # Find all movies of the genre Drama + for r in session.execute_graph(""" + g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): + print(r) + + # Find all movies of the same genre than the movie 'Dead Man' + for r in session.execute_graph(""" + g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): + print(r) + + # Find all movies of Mark Wahlberg + for r in session.execute_graph(""" + g.V().has('person', 'name', 'Mark Wahlberg').in('actor').valueMap();"""): + print(r) + +To see a more graph examples, see `DataStax Graph Examples `_. + +Graph Types +~~~~~~~~~~~ + +Here are the supported graph types with their python representations: + +========== ================ +DSE Graph Python +========== ================ +boolean bool +bigint long, int (PY3) +int int +smallint int +varint int +float float +double double +uuid uuid.UUID +Decimal Decimal +inet str +timestamp datetime.datetime +date datetime.date +time datetime.time +duration datetime.timedelta +point Point +linestring LineString +polygon Polygon +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +========== ================ + +Graph Row Factory +~~~~~~~~~~~~~~~~~ + +By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.graph.graph_object_row_factory`), known graph result +types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these +types, a :class:`.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. +The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), +or lists by index:: + + # dicts with `__getattr__` or `__getitem__` + result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined + result # dse.graph.Result({u'key_str': u'value', u'key_int': 3}) + result.value # {u'key_int': 3, u'key_str': u'value'} (dict) + result.key_str # u'value' + result.key_int # 3 + result['key_str'] # u'value' + result['key_int'] # 3 + + # lists with `__getitem__` + result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] + result # dse.graph.Result([0, 1, 2]) + result.value # [0, 1, 2] (list) + result[1] # 1 (list[1]) + +You can use a different row factory by setting :attr:`.Session.default_graph_row_factory` or passing it to +:meth:`.Session.execute_graph`. For example, :func:`.graph.single_object_row_factory` returns the JSON result string`, +unparsed. :func:`.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, +unlike :func:`.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results +also provide convenience methods for converting to known types (:meth:`~.Result.as_vertex`, :meth:`~.Result.as_edge`, :meth:`~.Result.as_path`). + +Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to +deserialize properties, use the :class:`.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. +deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: + + # ... + from cassandra.graph import GraphSON1Deserializer + + row = session.execute_graph("g.V().toList()")[0] + value = row.properties['my_property_key'][0].value # accessing the VertexProperty value + value = GraphSON1Deserializer.deserialize_timestamp(value) + + print(value) # 2017-06-26 08:27:05 + print(type(value)) # + + +Named Parameters +~~~~~~~~~~~~~~~~ + +Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`:: + + result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + [r.value for r in result_set] # [1, 2] + +All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized +automatically to their graph representation: + +Example:: + + session.execute_graph(""" + g.addV('person'). + property('name', text_value). + property('age', integer_value). + property('birthday', timestamp_value). + property('house_yard', polygon_value).toList() + """, { + 'text_value': 'Mike Smith', + 'integer_value': 34, + 'timestamp_value': datetime.datetime(1967, 12, 30), + 'polygon_value': Polygon(((30, 10), (40, 40), (20, 40), (10, 20), (30, 10))) + }) + + +As with all Execution Profile parameters, graph options can be set in the cluster default (as shown in the first example) +or specified per execution:: + + ep = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, + graph_options=GraphOptions(graph_name='something-else')) + session.execute_graph(statement, execution_profile=ep) + +Using GraphSON2 Protocol +~~~~~~~~~~~~~~~~~~~~~~~~ + +The default graph protocol used is GraphSON1. However GraphSON1 may +cause problems of type conversion happening during the serialization +of the query to the DSE Graph server, or the deserialization of the +responses back from a string Gremlin query. GraphSON2 offers better +support for the complex data types handled by DSE Graph. + +DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol +for graph queries. Enabling GraphSON2 can be done by `changing the +graph protocol of the execution profile` and `setting the graphson2 row factory`:: + + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory + + # Create a GraphSON2 execution profile + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', + graph_protocol=GraphProtocol.GRAPHSON_2_0), + row_factory=graph_graphson2_row_factory) + + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + session = cluster.connect() + session.execute_graph(...) + +Using GraphSON2, all properties will be automatically deserialized to +its Python representation. Note that it may bring significant +behavioral change at runtime. + +It is generally recommended to switch to GraphSON2 as it brings more +consistent support for complex data types in the Graph driver and will +be activated by default in the next major version (Python dse-driver +driver 3.0). diff --git a/docs/graph.rst b/docs/graph.rst index bd81f0cf4f..47dc53d38d 100644 --- a/docs/graph.rst +++ b/docs/graph.rst @@ -1,11 +1,11 @@ DataStax Graph Queries ====================== -The Cassandra driver executes graph queries over the Cassandra native protocol. Use +The driver executes graph queries over the Cassandra native protocol. Use :meth:`.Session.execute_graph` or :meth:`.Session.execute_graph_async` for -executing gremlin queries in DSE Graph. +executing gremlin queries in DataStax Graph. -Three Execution Profiles are provided suitable for graph execution: +The driver defines three Execution Profiles suitable for graph execution: * :data:`~.cluster.EXEC_PROFILE_GRAPH_DEFAULT` * :data:`~.cluster.EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT` @@ -14,8 +14,13 @@ Three Execution Profiles are provided suitable for graph execution: See :doc:`getting_started` and :doc:`execution_profiles` for more detail on working with profiles. -Getting Started -~~~~~~~~~~~~~~~ +In DSE 6.8.0, the Core graph engine has been introduced and is now the default. It +provides a better unified multi-model, performance and scale. This guide +is for graphs that use the core engine. If you work with previous versions of +DSE or existing graphs, see :doc:`classic_graph`. + +Getting Started with Graph and the Core Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, we need to create a graph in the system. To access the system API, we use the system execution profile :: @@ -26,129 +31,204 @@ use the system execution profile :: session = cluster.connect() graph_name = 'movies' - session.execute_graph("system.graph(name).ifNotExists().create()", {'name': graph_name}, + session.execute_graph("system.graph(name).create()", {'name': graph_name}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) -To execute requests on our newly created graph, we need to setup an execution -profile. Additionally, we also need to set the schema_mode to `development` -for the schema creation:: - +Graphs that use the core engine only support GraphSON3. Since they are Cassandra tables under +the hood, we can automatically configure the execution profile with the proper options +(row_factory and graph_protocol) when executing queries. You only need to make sure that +the `graph_name` is set and GraphSON3 will be automatically used:: from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.datastax.graph import GraphOptions graph_name = 'movies' ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) session = cluster.connect() - - session.execute_graph("schema.config().option('graph.schema_mode').set('development')") + session.execute_graph("g.addV(...)") -We are ready to configure our graph schema. We will create a simple one for movies:: +Note that this graph engine detection is based on the metadata. You might experience +some query errors if the graph has been newly created and is not yet in the metadata. This +would result to a badly configured execution profile. If you really want to avoid that, +configure your execution profile explicitly:: - # properties are used to define a vertex - properties = """ - schema.propertyKey("genreId").Text().create(); - schema.propertyKey("personId").Text().create(); - schema.propertyKey("movieId").Text().create(); - schema.propertyKey("name").Text().create(); - schema.propertyKey("title").Text().create(); - schema.propertyKey("year").Int().create(); - schema.propertyKey("country").Text().create(); - """ + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson3_row_factory - session.execute_graph(properties) # we can execute multiple statements in a single request + graph_name = 'movies' + ep_graphson3 = GraphExecutionProfile( + row_factory=graph_graphson3_row_factory, + graph_options=GraphOptions( + graph_protocol=GraphProtocol.GRAPHSON_3_0, + graph_name=graph_name)) + + cluster = Cluster(execution_profiles={'core': ep_graphson3}) + session = cluster.connect() + session.execute_graph("g.addV(...)", execution_profile='core') + + +We are ready to configure our graph schema. We will create a simple one for movies:: # A Vertex represents a "thing" in the world. - vertices = """ - schema.vertexLabel("genre").properties("genreId","name").create(); - schema.vertexLabel("person").properties("personId","name").create(); - schema.vertexLabel("movie").properties("movieId","title","year","country").create(); + # Create the genre vertex + query = """ + schema.vertexLabel('genre') + .partitionBy('genreId', Int) + .property('name', Text) + .create() """ - - session.execute_graph(vertices) + session.execute_graph(query) + + # Create the person vertex + query = """ + schema.vertexLabel('person') + .partitionBy('personId', Int) + .property('name', Text) + .create() + """ + session.execute_graph(query) + + # Create the movie vertex + query = """ + schema.vertexLabel('movie') + .partitionBy('movieId', Int) + .property('title', Text) + .property('year', Int) + .property('country', Text) + .create() + """ + session.execute_graph(query) # An edge represents a relationship between two vertices - edges = """ - schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); - schema.edgeLabel("actor").connection("movie","person").create(); + # Create our edges + queries = """ + schema.edgeLabel('belongsTo').from('movie').to('genre').create(); + schema.edgeLabel('actor').from('movie').to('person').create(); """ - - session.execute_graph(edges) + session.execute_graph(queries) # Indexes to execute graph requests efficiently + + # If you have a node with the search workload enabled (solr), use the following: + indexes = """ + schema.vertexLabel('genre').searchIndex() + .by("name") + .create(); + + schema.vertexLabel('person').searchIndex() + .by("name") + .create(); + + schema.vertexLabel('movie').searchIndex() + .by('title') + .by("year") + .create(); + """ + session.execute_graph(indexes) + + # Otherwise, use secondary indexes: indexes = """ - schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); - schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); - schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); - schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); - schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); - schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); - schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); + schema.vertexLabel('genre') + .secondaryIndex('by_genre') + .by('name') + .create() + + schema.vertexLabel('person') + .secondaryIndex('by_name') + .by('name') + .create() + + schema.vertexLabel('movie') + .secondaryIndex('by_title') + .by('title') + .create() """ + session.execute_graph(indexes) + +Add some edge indexes (materialized views):: + + indexes = """ + schema.edgeLabel('belongsTo') + .from('movie') + .to('genre') + .materializedView('movie__belongsTo__genre_by_in_genreId') + .ifNotExists() + .partitionBy(IN, 'genreId') + .clusterBy(OUT, 'movieId', Asc) + .create() + + schema.edgeLabel('actor') + .from('movie') + .to('person') + .materializedView('movie__actor__person_by_in_personId') + .ifNotExists() + .partitionBy(IN, 'personId') + .clusterBy(OUT, 'movieId', Asc) + .create() + """ + session.execute_graph(indexes) Next, we'll add some data:: session.execute_graph(""" - g.addV('genre').property('genreId', 1).property('name', 'Action').next(); - g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); - g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); - g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); """) session.execute_graph(""" - g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); - g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); - g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); """) session.execute_graph(""" - g.addV('movie').property('movieId', 1).property('title', 'The Happening'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). - property('year', 2003).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). - property('year', 1998).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). - property('year', 1995).property('country', 'United States').next(); + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). + property('year', 2003).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). + property('year', 1998).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). + property('year', 1995).property('country', 'United States').next(); """) Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: session.execute_graph(""" - genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); - genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); - genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); - - leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); - mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); - iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); - - the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); - the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); - rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); - man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); - dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); - - the_happening.addEdge('belongsTo', genre_horror); - the_italian_job.addEdge('belongsTo', genre_action); - rev_road.addEdge('belongsTo', genre_drama); - man_mask.addEdge('belongsTo', genre_drama); - man_mask.addEdge('belongsTo', genre_action); - dead_man.addEdge('belongsTo', genre_drama); - - the_happening.addEdge('actor', mark); - the_italian_job.addEdge('actor', mark); - rev_road.addEdge('actor', leo); - man_mask.addEdge('actor', leo); - dead_man.addEdge('actor', iggy); + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').id().next(); + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').id().next(); + genre_action = g.V().hasLabel('genre').has('name', 'Action').id().next(); + + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').id().next(); + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').id().next(); + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').id().next(); + + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').id().next(); + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').id().next(); + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').id().next(); + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').id().next(); + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').id().next(); + + g.addE('belongsTo').from(__.V(the_happening)).to(__.V(genre_horror)).next(); + g.addE('belongsTo').from(__.V(the_italian_job)).to(__.V(genre_action)).next(); + g.addE('belongsTo').from(__.V(rev_road)).to(__.V(genre_drama)).next(); + g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_drama)).next(); + g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_action)).next(); + g.addE('belongsTo').from(__.V(dead_man)).to(__.V(genre_drama)).next(); + + g.addE('actor').from(__.V(the_happening)).to(__.V(mark)).next(); + g.addE('actor').from(__.V(the_italian_job)).to(__.V(mark)).next(); + g.addE('actor').from(__.V(rev_road)).to(__.V(leo)).next(); + g.addE('actor').from(__.V(man_mask)).to(__.V(leo)).next(); + g.addE('actor').from(__.V(dead_man)).to(__.V(iggy)).next(); """) We are all set. You can now query your graph. Here are some examples:: @@ -157,7 +237,7 @@ We are all set. You can now query your graph. Here are some examples:: for r in session.execute_graph(""" g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): print(r) - + # Find all movies of the same genre than the movie 'Dead Man' for r in session.execute_graph(""" g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): @@ -170,81 +250,40 @@ We are all set. You can now query your graph. Here are some examples:: To see a more graph examples, see `DataStax Graph Examples `_. -.. _graph-types: - -Graph Types -~~~~~~~~~~~ +Graph Types for the Core Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here are the supported graph types with their python representations: -========== ================ -DSE Graph Python -========== ================ -boolean bool -bigint long, int (PY3) -int int -smallint int -varint int -float float -double double -uuid uuid.UUID -Decimal Decimal -inet str -timestamp datetime.datetime -date datetime.date -time datetime.time -duration datetime.timedelta -point Point -linestring LineString -polygon Polygon -blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) -========== ================ - -Graph Row Factory -~~~~~~~~~~~~~~~~~ - -By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.datastax.graph.graph_object_row_factory`), known graph result -types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these -types, a :class:`.datastax.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. -The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), -or lists by index:: - - # dicts with `__getattr__` or `__getitem__` - result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined - result # cassandra.datastax.graph.Result({u'key_str': u'value', u'key_int': 3}) - result.value # {u'key_int': 3, u'key_str': u'value'} (dict) - result.key_str # u'value' - result.key_int # 3 - result['key_str'] # u'value' - result['key_int'] # 3 - - # lists with `__getitem__` - result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] - result # cassandra.datastax.graph.Result([0, 1, 2]) - result.value # [0, 1, 2] (list) - result[1] # 1 (list[1]) - -You can use a different row factory by setting :attr:`.cluster.ExecutionProfile.row_factory` or passing it to -:meth:`cluster.Session.execute_graph`. For example, :func:`.datastax.graph.single_object_row_factory` returns the JSON result string, -unparsed. :func:`.datastax.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, -unlike :func:`.datastax.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results -also provide convenience methods for converting to known types (:meth:`.datastax.graph.Result.as_vertex`, :meth:`.datastax.graph.Result.as_edge`, - :meth:`.datastax.graph.Result.as_path`). - -Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to -deserialize properties, use the :class:`.datastax.graph.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. -deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: - - # ... - from cassandra.datastax.graph import GraphSON1Deserializer - - row = session.execute_graph("g.V().toList()")[0] - value = row.properties['my_property_key'][0].value # accessing the VertexProperty value - value = GraphSON1Deserializer.deserialize_timestamp(value) - - print value # 2017-06-26 08:27:05 - print type(value) # - +============ ================= +DSE Graph Python Driver +============ ================= +text str +boolean bool +bigint long +int int +smallint int +varint long +double float +float float +uuid UUID +bigdecimal Decimal +duration Duration (cassandra.util) +inet str or IPV4Address/IPV6Address (if available) +timestamp datetime.datetime +date datetime.date +time datetime.time +polygon Polygon +point Point +linestring LineString +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +list list +map dict +set set or list + (Can return a list due to numerical values returned by Java) +tuple tuple +udt class or namedtuple +============ ================= Named Parameters ~~~~~~~~~~~~~~~~ @@ -254,17 +293,17 @@ Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`: result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) [r.value for r in result_set] # [1, 2] -All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized +All python types listed in `Graph Types for the Core Engine`_ can be passed as named parameters and will be serialized automatically to their graph representation: Example:: - s.execute_graph(""" + session.execute_graph(""" g.addV('person'). property('name', text_value). property('age', integer_value). property('birthday', timestamp_value). - property('house_yard', polygon_value).toList() + property('house_yard', polygon_value).next() """, { 'text_value': 'Mike Smith', 'integer_value': 34, @@ -280,36 +319,116 @@ or specified per execution:: graph_options=GraphOptions(graph_name='something-else')) session.execute_graph(statement, execution_profile=ep) -Using GraphSON2 Protocol -~~~~~~~~~~~~~~~~~~~~~~~~ +CQL collections, Tuple and UDT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The default graph protocol used is GraphSON1. However GraphSON1 may -cause problems of type conversion happening during the serialization -of the query to the DSE Graph server, or the deserialization of the -responses back from a string Gremlin query. GraphSON2 offers better -support for the complex data types handled by DSE Graph. +This is a very interesting feature of the core engine: we can use all CQL data types, including +list, map, set, tuple and udt. Here is an example using all these types:: -DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol -for graph queries. Enabling GraphSON2 can be done by `changing the -graph protocol of the execution profile` and `setting the graphson2 row factory`:: + query = """ + schema.type('address') + .property('address', Text) + .property('city', Text) + .property('state', Text) + .create(); + """ + session.execute_graph(query) + + # It works the same way than normal CQL UDT, so we + # can create an udt class and register it + class Address(object): + def __init__(self, address, city, state): + self.address = address + self.city = city + self.state = state + + session.cluster.register_user_type(graph_name, 'address', Address) + + query = """ + schema.vertexLabel('person') + .partitionBy('personId', Int) + .property('address', typeOf('address')) + .property('friends', listOf(Text)) + .property('skills', setOf(Text)) + .property('scores', mapOf(Text, Int)) + .property('last_workout', tupleOf(Text, Date)) + .create() + """ + session.execute_graph(query) + + # insertion example + query = """ + g.addV('person') + .property('personId', pid) + .property('address', address) + .property('friends', friends) + .property('skills', skills) + .property('scores', scores) + .property('last_workout', last_workout) + .next() + """ - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.datastax.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory + session.execute_graph(query, { + 'pid': 3, + 'address': Address('42 Smith St', 'Quebec', 'QC'), + 'friends': ['Al', 'Mike', 'Cathy'], + 'skills': {'food', 'fight', 'chess'}, + 'scores': {'math': 98, 'french': 3}, + 'last_workout': ('CrossFit', datetime.date(2018, 11, 20)) + }) - # Create a GraphSON2 execution profile - ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', - graph_protocol=GraphProtocol.GRAPHSON_2_0), - row_factory=graph_graphson2_row_factory) +Limitations +----------- - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - s = cluster.connect() - s.execute_graph(...) +Since Python is not a strongly-typed language and the UDT/Tuple graphson representation is, you might +get schema errors when trying to write numerical data. Example:: + + session.execute_graph(""" + schema.vertexLabel('test_tuple').partitionBy('id', Int).property('t', tupleOf(Text, Bigint)).create() + """) -Using GraphSON2, all properties will be automatically deserialized to -its Python representation. Note that it may bring significant -behavioral change at runtime. + session.execute_graph(""" + g.addV('test_tuple').property('id', 0).property('t', t) + """, + {'t': ('Test', 99))} + ) + + # error: [Invalid query] message="Value component 1 is of type int, not bigint" + +This is because the server requires the client to include a GraphSON schema definition +with every UDT or tuple query. In the general case, the driver can't determine what Graph type +is meant by, e.g., an int value, and so it can't serialize the value with the correct type in the schema. +The driver provides some numerical type-wrapper factories that you can use to specify types: + +* :func:`~.to_int` +* :func:`~.to_bigint` +* :func:`~.to_smallint` +* :func:`~.to_float` +* :func:`~.to_double` + +Here's the working example of the case above:: + + from cassandra.graph import to_bigint + + session.execute_graph(""" + g.addV('test_tuple').property('id', 0).property('t', t) + """, + {'t': ('Test', to_bigint(99))} + ) -It is generally recommended to switch to GraphSON2 as it brings more -consistent support for complex data types in the Graph driver and will -be activated by default in the next major version (Python dse-driver -driver 3.0). +Continuous Paging +~~~~~~~~~~~~~~~~~ + +This is another nice feature that comes with the core engine: continuous paging with +graph queries. If all nodes of the cluster are >= DSE 6.8.0, it is automatically +enabled under the hood to get the best performance. If you want to explicitly +enable/disable it, you can do it through the execution profile:: + + # Disable it + ep = GraphExecutionProfile(..., continuous_paging_options=None)) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + + # Enable with a custom max_pages option + ep = GraphExecutionProfile(..., + continuous_paging_options=ContinuousPagingOptions(max_pages=10))) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) diff --git a/docs/graph_fluent.rst b/docs/graph_fluent.rst index da5eee8db7..876d69a269 100644 --- a/docs/graph_fluent.rst +++ b/docs/graph_fluent.rst @@ -11,7 +11,7 @@ The fluent API adds graph features to the core driver:: The Graph fluent API depends on Apache TinkerPop and is not installed by default. Make sure you have the Graph requirements are properly :ref:`installed `. -You might be interested in reading the :doc:`DSE Graph Getting Started documentation ` to +You might be interested in reading the :doc:`DataStax Graph Getting Started documentation ` to understand the basics of creating a graph and its schema. Graph Traversal Queries @@ -31,7 +31,7 @@ a `Session` object, or implicitly:: g.addV('genre').property('genreId', 1).property('name', 'Action').next() # implicit execution caused by iterating over results - for v in g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap(): + for v in g.V().has('genre', 'name', 'Drama').in_('belongsTo').valueMap(): print(v) These :ref:`Python types ` are also supported transparently:: @@ -71,6 +71,27 @@ If you want to change execution property defaults, please see the :doc:`Executio for a more generalized discussion of the API. Graph traversal queries use the same execution profile defined for DSE graph. If you need to change the default properties, please refer to the :doc:`DSE Graph query documentation page ` +Configuring a Traversal Execution Profile for the Core graph engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To execute a traversal query with graphs that use the core engine, you need to configure +a graphson3 execution profile: + +.. code-block:: python + + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.datastax.graph import GraphProtocol + from cassandra.datastax.graph.fluent import DseGraph + + ep_graphson3 = DseGraph.create_execution_profile( + 'my_core_graph_name', + graph_protocol=GraphProtocol.GRAPHSON_3_0 + ) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson3}) + + g = DseGraph.traversal_source(session) + print g.V().toList() + Explicit Graph Traversal Execution with a DSE Session ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -100,6 +121,22 @@ Below is an example of explicit execution. For this example, assume the schema h for result in session.execute_graph(v_query): pprint(result.value) +Converting a traversal to a bytecode query for core graphs require some more work, because we +need the cluster context for UDT and tuple types: + +.. code-block:: python + + g = DseGraph.traversal_source(session=session) + context = { + 'cluster': cluster, + 'graph_name': 'the_graph_for_the_query' + } + addV_query = DseGraph.query_from_traversal( + g.addV('genre').property('genreId', 1).property('name', 'Action'), + graph_protocol=GraphProtocol.GRAPHSON_3_0, + context=context + ) + Implicit Graph Traversal Execution with TinkerPop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/index.rst b/docs/index.rst index 8ea11c6088..7eca865755 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -54,7 +54,10 @@ Contents Working with DSE geometry types :doc:`graph` - Graph queries with DSE Graph + Graph queries with the Core engine + +:doc:`classic_graph` + Graph queries with the Classic engine :doc:`graph_fluent` DataStax Graph Fluent API @@ -84,6 +87,7 @@ Contents object_mapper geo_types graph + classic_graph graph_fluent dse_auth dates_and_times diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index 174c15277b..b2fc5d62f6 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -654,6 +654,20 @@ def _validate_type(self, g, vertex): key = prop.key _validate_prop(key, value, self) + def test_iterate_step(self): + """ + Test to validate that the iterate() step work on all dse versions. + + @jira_ticket PYTHON-1155 + @expected_result iterate step works + + @test_category dse graph + """ + + g = self.fetch_traversal_source() + generate_classic(self.session) + g.addV('person').property('name', 'Person1').iterate() + class ExplicitExecutionBase(GraphUnitTestCase): From c9b567a04b85379c803c0c53f0909b00b062b39f Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 4 Nov 2019 16:41:12 -0500 Subject: [PATCH 003/211] Fix test_iterate_step test --- tests/integration/advanced/graph/fluent/test_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index b2fc5d62f6..15630aa9b4 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -654,7 +654,7 @@ def _validate_type(self, g, vertex): key = prop.key _validate_prop(key, value, self) - def test_iterate_step(self): + def _test_iterate_step(self, schema, graphson): """ Test to validate that the iterate() step work on all dse versions. @@ -664,7 +664,7 @@ def test_iterate_step(self): @test_category dse graph """ - g = self.fetch_traversal_source() + g = self.fetch_traversal_source(graphson) generate_classic(self.session) g.addV('person').property('name', 'Person1').iterate() From 37128031b35212bb22121314cae16f5c8a5f49e1 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 27 Nov 2019 22:07:44 -0600 Subject: [PATCH 004/211] tmp build.yaml --- build.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.yaml b/build.yaml index 7c8020cb80..50fb8c1b70 100644 --- a/build.yaml +++ b/build.yaml @@ -57,17 +57,17 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='libev' -ngdg: + tmpngdg: schedule: adhoc branches: - include: [ngdg_master_ft] + include: [merge_ossnext_ngdg] env_vars: | EVENT_LOOP_MANAGER='libev' EXCLUDE_LONG=1 matrix: exclude: - python: [2.7, 3.4, 3.6, 3.7] - - cassandra: ['dse-4.8', 'dse-5.0', dse-6.0', 'dse-6.7'] + - cassandra: ['2.1', '2.2', '3.0', '3.11', 'dse-4.8', 'dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7'] weekly_master: schedule: 0 10 * * 6 From d73a61c43b74c74b3a50af8db58c235ecda5b532 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 10 Dec 2019 14:27:58 -0500 Subject: [PATCH 005/211] version 3.21a labs --- cassandra/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index c0b1b2d5a6..4fc2fb7303 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 20, 2, '20191104+labs') +__version_info__ = (3, '21a1', '20191210+labs') __version__ = '.'.join(map(str, __version_info__)) From c29ffbe262eda0fa50ad19b497e1a4a093dedb03 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 10 Dec 2019 14:37:55 -0500 Subject: [PATCH 006/211] version 3.21a labs --- cassandra/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 4fc2fb7303..be08299b62 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, '21a1', '20191210+labs') +__version_info__ = (3, '21a1+20191210') __version__ = '.'.join(map(str, __version_info__)) From 2c855944d8b1788b29492e0640739daa56df5a60 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 16 Jan 2020 08:56:00 -0500 Subject: [PATCH 007/211] Moved some utils module to fix import errors in unit tests --- tests/integration/__init__.py | 2 + tests/integration/cloud/test_cloud.py | 2 +- .../integration/long/test_topology_change.py | 2 +- .../integration/simulacron/test_connection.py | 3 +- .../standard/test_custom_cluster.py | 2 +- tests/integration/util.py | 58 --------------- tests/unit/advanced/test_auth.py | 7 +- tests/unit/test_connection.py | 2 +- tests/util.py | 72 +++++++++++++++++++ 9 files changed, 84 insertions(+), 66 deletions(-) create mode 100644 tests/util.py diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 7c89ebcfc6..48f5f1aa10 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -166,6 +166,8 @@ def _get_dse_version_from_cass(cass_version): cv_string = os.getenv('CASSANDRA_VERSION', None) mcv_string = os.getenv('MAPPED_CASSANDRA_VERSION', None) try: + print(cv_string) + sasa cassandra_version = Version(cv_string) # env var is set to test-dse for DDAC except: # fallback to MAPPED_CASSANDRA_VERSION diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index 952a92835b..31b5367f3c 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -31,7 +31,7 @@ from mock import patch from tests.integration import requirescloudproxy -from tests.integration.util import wait_until_not_raised +from tests.util import wait_until_not_raised from tests.integration.cloud import CloudProxyCluster, CLOUD_PROXY_SERVER DISALLOWED_CONSISTENCIES = [ diff --git a/tests/integration/long/test_topology_change.py b/tests/integration/long/test_topology_change.py index ccd20779a5..8800cd802b 100644 --- a/tests/integration/long/test_topology_change.py +++ b/tests/integration/long/test_topology_change.py @@ -4,7 +4,7 @@ from cassandra.policies import HostStateListener from tests.integration import PROTOCOL_VERSION, get_node, use_cluster, local from tests.integration.long.utils import decommission -from tests.integration.util import wait_until +from tests.util import wait_until class StateListener(HostStateListener): diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index 25824cc599..a09950dda6 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -27,8 +27,9 @@ from cassandra.policies import HostStateListener, RoundRobinPolicy from tests import connection_class, thread_pool_executor_class +from tests.util late from tests.integration import requiressimulacron, libevtest -from tests.integration.util import assert_quiescent_pool_state, late +from tests.integration.util import assert_quiescent_pool_state # important to import the patch PROTOCOL_VERSION from the simulacron module from tests.integration.simulacron import SimulacronBase, PROTOCOL_VERSION from cassandra.connection import DEFAULT_CQL_VERSION diff --git a/tests/integration/standard/test_custom_cluster.py b/tests/integration/standard/test_custom_cluster.py index 9208c35cea..1943557ee4 100644 --- a/tests/integration/standard/test_custom_cluster.py +++ b/tests/integration/standard/test_custom_cluster.py @@ -14,7 +14,7 @@ from cassandra.cluster import Cluster, NoHostAvailable from tests.integration import use_singledc, get_cluster, remove_cluster, local -from tests.integration.util import wait_until, wait_until_not_raised +from tests.util import wait_until, wait_until_not_raised try: import unittest2 as unittest diff --git a/tests/integration/util.py b/tests/integration/util.py index a2ce9d5c3f..6215449d1f 100644 --- a/tests/integration/util.py +++ b/tests/integration/util.py @@ -13,7 +13,6 @@ # limitations under the License. from tests.integration import PROTOCOL_VERSION -from functools import wraps import time @@ -50,60 +49,3 @@ def assert_quiescent_pool_state(test_case, cluster, wait=None): test_case.assertEqual(connection.highest_request_id, max(req_ids)) if PROTOCOL_VERSION < 3: test_case.assertEqual(connection.highest_request_id, connection.max_request_id) - - -def wait_until(condition, delay, max_attempts): - """ - Executes a function at regular intervals while the condition - is false and the amount of attempts < maxAttempts. - :param condition: a function - :param delay: the delay in second - :param max_attempts: the maximum number of attempts. So the timeout - of this function is delay*max_attempts - """ - attempt = 0 - while not condition() and attempt < max_attempts: - attempt += 1 - time.sleep(delay) - - if attempt >= max_attempts: - raise Exception("Condition is still False after {} attempts.".format(max_attempts)) - - -def wait_until_not_raised(condition, delay, max_attempts): - """ - Executes a function at regular intervals while the condition - doesn't raise an exception and the amount of attempts < maxAttempts. - :param condition: a function - :param delay: the delay in second - :param max_attempts: the maximum number of attemps. So the timeout - of this function will be delay*max_attempts - """ - def wrapped_condition(): - try: - condition() - except: - return False - - return True - - attempt = 0 - while attempt < (max_attempts-1): - attempt += 1 - if wrapped_condition(): - return - - time.sleep(delay) - - # last attempt, let the exception raise - condition() - - -def late(seconds=1): - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - time.sleep(seconds) - func(*args, **kwargs) - return wrapper - return decorator diff --git a/tests/unit/advanced/test_auth.py b/tests/unit/advanced/test_auth.py index e4f7e4cf5e..bb411afe2b 100644 --- a/tests/unit/advanced/test_auth.py +++ b/tests/unit/advanced/test_auth.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from puresasl import QOP try: @@ -21,10 +22,10 @@ from cassandra.auth import DSEGSSAPIAuthProvider -from tests.integration import requiredse - +# Cannot import requiredse from tests.integration # This auth provider requires kerberos and puresals -@requiredse +DSE_VERSION = os.getenv('DSE_VERSION', None) +@unittest.skipUnless(DSE_VERSION, "DSE required") class TestGSSAPI(unittest.TestCase): def test_host_resolution(self): diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index be205c33a8..68577a396e 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -31,7 +31,7 @@ from cassandra.protocol import (write_stringmultimap, write_int, write_string, SupportedMessage, ProtocolHandler) -from tests.integration.util import wait_until +from tests.util import wait_until class ConnectionTest(unittest.TestCase): diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000000..c5dfd8a387 --- /dev/null +++ b/tests/util.py @@ -0,0 +1,72 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from functools import wraps + +def wait_until(condition, delay, max_attempts): + """ + Executes a function at regular intervals while the condition + is false and the amount of attempts < maxAttempts. + :param condition: a function + :param delay: the delay in second + :param max_attempts: the maximum number of attempts. So the timeout + of this function is delay*max_attempts + """ + attempt = 0 + while not condition() and attempt < max_attempts: + attempt += 1 + time.sleep(delay) + + if attempt >= max_attempts: + raise Exception("Condition is still False after {} attempts.".format(max_attempts)) + + +def wait_until_not_raised(condition, delay, max_attempts): + """ + Executes a function at regular intervals while the condition + doesn't raise an exception and the amount of attempts < maxAttempts. + :param condition: a function + :param delay: the delay in second + :param max_attempts: the maximum number of attemps. So the timeout + of this function will be delay*max_attempts + """ + def wrapped_condition(): + try: + condition() + except: + return False + + return True + + attempt = 0 + while attempt < (max_attempts-1): + attempt += 1 + if wrapped_condition(): + return + + time.sleep(delay) + + # last attempt, let the exception raise + condition() + + +def late(seconds=1): + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + time.sleep(seconds) + func(*args, **kwargs) + return wrapper + return decorator From 095f542b523e37158c881b1626a0d62a6e58a4ee Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 16 Jan 2020 08:57:26 -0500 Subject: [PATCH 008/211] remove test code --- tests/integration/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 48f5f1aa10..7c89ebcfc6 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -166,8 +166,6 @@ def _get_dse_version_from_cass(cass_version): cv_string = os.getenv('CASSANDRA_VERSION', None) mcv_string = os.getenv('MAPPED_CASSANDRA_VERSION', None) try: - print(cv_string) - sasa cassandra_version = Version(cv_string) # env var is set to test-dse for DDAC except: # fallback to MAPPED_CASSANDRA_VERSION From c5a374f9924ebb6b107057dafba2b6fed88a115d Mon Sep 17 00:00:00 2001 From: James Falcon Date: Fri, 17 Jan 2020 14:47:48 -0600 Subject: [PATCH 009/211] ninja fix invalid syntax in test --- tests/integration/simulacron/test_connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index a09950dda6..afe2685dbf 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -27,7 +27,7 @@ from cassandra.policies import HostStateListener, RoundRobinPolicy from tests import connection_class, thread_pool_executor_class -from tests.util late +from tests.util import late from tests.integration import requiressimulacron, libevtest from tests.integration.util import assert_quiescent_pool_state # important to import the patch PROTOCOL_VERSION from the simulacron module From d59af15e57d2639b213644b8a9f5b697bcc693af Mon Sep 17 00:00:00 2001 From: James Falcon Date: Fri, 17 Jan 2020 13:12:53 -0600 Subject: [PATCH 010/211] Changes from test failures. Move session id creation before insights reporter starts. When the insights reporter retrieves the startup data, it captures the session id. Since this runs in a separate thread, the session id is usually created by the time this capture actually runs. But it's a race, and sessionId can occassionally be captured as None. Fix tests/integration/cqlengine/model/test_model.py:TestDeprecationWarning.test_deprecation_warnings: Asyncio throws warnings on python 2.7+ that aren't relevant to the test, so ignore them. --- cassandra/cluster.py | 3 ++- tests/integration/cqlengine/model/test_model.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index c9a8b6d397..a1cbe9323f 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -2538,6 +2538,8 @@ def __init__(self, cluster, hosts, keyspace=None): msg += " using keyspace '%s'" % self.keyspace raise NoHostAvailable(msg, [h.address for h in hosts]) + self.session_id = uuid.uuid4() + cc_host = self.cluster.get_control_connection_host() valid_insights_version = (cc_host and version_supports_insights(cc_host.dse_version)) if self.cluster.monitor_reporting_enabled and valid_insights_version: @@ -2551,7 +2553,6 @@ def __init__(self, cluster, hosts, keyspace=None): 'not supported by server version {v} on ' 'ControlConnection host {c}'.format(v=cc_host.release_version, c=cc_host)) - self.session_id = uuid.uuid4() log.debug('Started Session with client_id {} and session_id {}'.format(self.cluster.client_id, self.session_id)) diff --git a/tests/integration/cqlengine/model/test_model.py b/tests/integration/cqlengine/model/test_model.py index 81de0ead0c..bbd9e0cbb6 100644 --- a/tests/integration/cqlengine/model/test_model.py +++ b/tests/integration/cqlengine/model/test_model.py @@ -259,10 +259,13 @@ class SensitiveModel(Model): rows[-1] rows[-1:] - self.assertEqual(len(w), 4) - self.assertIn("__table_name_case_sensitive__ will be removed in 4.0.", str(w[0].message)) - self.assertIn("__table_name_case_sensitive__ will be removed in 4.0.", str(w[1].message)) + # Asyncio complains loudly about old syntax on python 3.7+, so get rid of all of those + relevant_warnings = [warn for warn in w if "with (yield from lock)" not in str(warn.message)] + + self.assertEqual(len(relevant_warnings), 4) + self.assertIn("__table_name_case_sensitive__ will be removed in 4.0.", str(relevant_warnings[0].message)) + self.assertIn("__table_name_case_sensitive__ will be removed in 4.0.", str(relevant_warnings[1].message)) self.assertIn("ModelQuerySet indexing with negative indices support will be removed in 4.0.", - str(w[2].message)) + str(relevant_warnings[2].message)) self.assertIn("ModelQuerySet slicing with negative indices support will be removed in 4.0.", - str(w[3].message)) + str(relevant_warnings[3].message)) From 9400af8960d8616b2ff506dc95ccabafccc86334 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 21 Jan 2020 15:11:16 -0500 Subject: [PATCH 011/211] Add all() function to the ResultSet API --- CHANGELOG.rst | 9 +++++++++ cassandra/cluster.py | 9 +++++++++ tests/unit/test_resultset.py | 7 +++++++ 3 files changed, 25 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ae1b50a589..da15a1158b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +3.22.0 +====== +UNRELEASED + +Features +-------- +* Add all() function to the ResultSet API (PYTHON-1203) + 3.21.0 ====== January 15, 2020 @@ -31,6 +39,7 @@ Others * Remove *read_repair_chance table options (PYTHON-1140) * Avoid warnings about unspecified load balancing policy when connecting to a cloud cluster (PYTHON-1177) * Add new DSE CQL keywords (PYTHON-1122) +* Publish binary wheel distributions (PYTHON-1013) Deprecations ------------ diff --git a/cassandra/cluster.py b/cassandra/cluster.py index a1cbe9323f..59c8b61f96 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -4934,6 +4934,15 @@ def current_rows(self): """ return self._current_rows or [] + def all(self): + """ + Returns all the remaining rows as a list. This is basically + a convenient shortcut to `list(result_set)`. + + This function is not recommended for queries that return a large number of elements. + """ + return list(self) + def one(self): """ Return a single row of the results or None if empty. This is basically diff --git a/tests/unit/test_resultset.py b/tests/unit/test_resultset.py index c1a2562360..1af3e849b6 100644 --- a/tests/unit/test_resultset.py +++ b/tests/unit/test_resultset.py @@ -195,6 +195,13 @@ def test_one(self): self.assertEqual(rs.one(), first) + def test_all(self): + first, second = Mock(), Mock() + rs1 = ResultSet(Mock(has_more_pages=False), [first, second]) + rs2 = ResultSet(Mock(has_more_pages=False), [first, second]) + + self.assertEqual(rs1.all(), list(rs2)) + @patch('cassandra.cluster.warn') def test_indexing_deprecation(self, mocked_warn): # normally we'd use catch_warnings to test this, but that doesn't work From 3a5bd85f0cccb4a74399dc5eb3b9e69e9af532c7 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Thu, 23 Jan 2020 12:55:20 -0600 Subject: [PATCH 012/211] Test 68 (#1066) 6.8 test changes * Materialized view tests: You now can't create a MV with clustering order by unless you specify ALL of the clustering columns. So tests are updated to do that. * DuplicateRpcTest: We used to be able to update system.peers, but that's not allowed anymore, so converted it to a simulacron test. Updated another test to use mocks for the same reason. * Updated virtual keyspace test to not verify exact structure since that will change between versions. Rather, verify a few known values that should exist in all versions to verify we parsed the structure correctly. * Tombstone settings have moved under guardrails, and delete entire row instead of just cells in tombstone tests due to DB-2426. * Added a few optional env vars to allow management of an external cluster * DSE version checks --- tests/integration/__init__.py | 10 +- .../integration/cqlengine/query/test_named.py | 2 +- tests/integration/long/test_failure_types.py | 9 +- tests/integration/simulacron/test_cluster.py | 41 +++- tests/integration/standard/test_cluster.py | 185 ++++++------------ .../standard/test_control_connection.py | 3 +- tests/integration/standard/test_metadata.py | 45 ++--- tests/integration/standard/test_query.py | 8 +- 8 files changed, 141 insertions(+), 162 deletions(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 7c89ebcfc6..0a3d6542a0 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -125,7 +125,7 @@ def _get_cass_version_from_dse(dse_version): cass_ver = '4.0.0.2349' else: cass_ver = '4.0.0.' + ''.join(dse_version.split('.')) - elif dse_version.startswith('6.7'): + elif Version(dse_version) >= Version('6.7'): if dse_version == '6.7.0': cass_ver = "4.0.0.67" else: @@ -472,7 +472,13 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, if CCM_CLUSTER: log.debug("Using external CCM cluster {0}".format(CCM_CLUSTER.name)) else: - log.debug("Using unnamed external cluster") + ccm_path = os.getenv("CCM_PATH", None) + ccm_name = os.getenv("CCM_NAME", None) + if ccm_path and ccm_name: + CCM_CLUSTER = CCMClusterFactory.load(ccm_path, ccm_name) + log.debug("Using external CCM cluster {0}".format(CCM_CLUSTER.name)) + else: + log.debug("Using unnamed external cluster") if set_keyspace and start: setup_keyspace(ipformat=ipformat, wait=False) return diff --git a/tests/integration/cqlengine/query/test_named.py b/tests/integration/cqlengine/query/test_named.py index 4907c26661..3a6f83b32e 100644 --- a/tests/integration/cqlengine/query/test_named.py +++ b/tests/integration/cqlengine/query/test_named.py @@ -335,7 +335,7 @@ def test_named_table_with_mv(self): SELECT * FROM {0}.scores WHERE game IS NOT NULL AND score IS NOT NULL AND user IS NOT NULL AND year IS NOT NULL AND month IS NOT NULL AND day IS NOT NULL PRIMARY KEY (game, score, user, year, month, day) - WITH CLUSTERING ORDER BY (score DESC)""".format(ks) + WITH CLUSTERING ORDER BY (score DESC, user DESC, year DESC, month DESC, day DESC)""".format(ks) self.session.execute(create_mv_alltime) diff --git a/tests/integration/long/test_failure_types.py b/tests/integration/long/test_failure_types.py index a63b36649b..486313e096 100644 --- a/tests/integration/long/test_failure_types.py +++ b/tests/integration/long/test_failure_types.py @@ -16,6 +16,8 @@ import sys import traceback import time + +from ccmlib.dse_cluster import DseCluster from mock import Mock from cassandra.policies import HostFilterPolicy, RoundRobinPolicy @@ -29,7 +31,7 @@ from tests.integration import ( use_singledc, PROTOCOL_VERSION, get_cluster, setup_keyspace, remove_cluster, get_node, start_cluster_wait_for_up, requiresmallclockgranularity, -) + local) try: @@ -40,6 +42,7 @@ log = logging.getLogger(__name__) +@local def setup_module(): """ We need some custom setup for this module. All unit tests in this module @@ -52,7 +55,7 @@ def setup_module(): use_singledc(start=False) ccm_cluster = get_cluster() ccm_cluster.stop() - config_options = {'tombstone_failure_threshold': 2000, 'tombstone_warn_threshold': 1000} + config_options = {'guardrails.tombstone_failure_threshold': 2000, 'guardrails.tombstone_warn_threshold': 1000} ccm_cluster.set_configuration_options(config_options) start_cluster_wait_for_up(ccm_cluster) setup_keyspace() @@ -252,7 +255,7 @@ def test_tombstone_overflow_read_failure(self): parameters = [(x,) for x in range(3000)] self.execute_concurrent_args_helper(self.session, statement, parameters) - statement = self.session.prepare("DELETE v1 FROM test3rf.test2 WHERE k = 1 AND v0 =?") + statement = self.session.prepare("DELETE FROM test3rf.test2 WHERE k = 1 AND v0 =?") parameters = [(x,) for x in range(2001)] self.execute_concurrent_args_helper(self.session, statement, parameters) diff --git a/tests/integration/simulacron/test_cluster.py b/tests/integration/simulacron/test_cluster.py index ec20c106ba..b89f564f08 100644 --- a/tests/integration/simulacron/test_cluster.py +++ b/tests/integration/simulacron/test_cluster.py @@ -16,14 +16,20 @@ except ImportError: import unittest # noqa -from tests.integration.simulacron import SimulacronCluster -from tests.integration import (requiressimulacron, PROTOCOL_VERSION) -from tests.integration.simulacron.utils import prime_query +import logging +from packaging.version import Version + +import cassandra +from tests.integration.simulacron import SimulacronCluster, SimulacronBase +from tests.integration import (requiressimulacron, PROTOCOL_VERSION, DSE_VERSION, MockLoggingHandler) +from tests.integration.simulacron.utils import prime_query, start_and_prime_singledc from cassandra import (WriteTimeout, WriteType, ConsistencyLevel, UnresolvableContactPoints) -from cassandra.cluster import Cluster +from cassandra.cluster import Cluster, ControlConnection + +PROTOCOL_VERSION = min(4, PROTOCOL_VERSION if (DSE_VERSION is None or DSE_VERSION >= Version('5.0')) else 3) @requiressimulacron class ClusterTests(SimulacronCluster): @@ -78,3 +84,30 @@ def test_connection_with_only_unresolvable_contact_points(self): self.cluster = Cluster(['dns.invalid'], protocol_version=PROTOCOL_VERSION, compression=False) + + +@requiressimulacron +class DuplicateRpcTest(SimulacronCluster): + connect = False + + def test_duplicate(self): + mock_handler = MockLoggingHandler() + logger = logging.getLogger(cassandra.cluster.__name__) + logger.addHandler(mock_handler) + address_column = "native_transport_address" if DSE_VERSION and DSE_VERSION > Version("6.0") else "rpc_address" + rows = [ + {"peer": "127.0.0.1", "data_center": "dc", "host_id": "dontcare1", "rack": "rack1", + "release_version": "3.11.4", address_column: "127.0.0.1", "schema_version": "dontcare", "tokens": "1"}, + {"peer": "127.0.0.2", "data_center": "dc", "host_id": "dontcare2", "rack": "rack1", + "release_version": "3.11.4", address_column: "127.0.0.2", "schema_version": "dontcare", "tokens": "2"}, + ] + prime_query(ControlConnection._SELECT_PEERS, rows=rows) + + cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) + session = cluster.connect(wait_for_all_pools=True) + + warnings = mock_handler.messages.get("warning") + self.assertEqual(len(warnings), 1) + self.assertTrue('multiple hosts with the same endpoint' in warnings[0]) + logger.removeHandler(mock_handler) + cluster.shutdown() diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index eae7d64b71..2314931b7d 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -27,7 +27,7 @@ from packaging.version import Version import cassandra -from cassandra.cluster import Cluster, NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import Cluster, NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT, ControlConnection from cassandra.concurrent import execute_concurrent from cassandra.policies import (RoundRobinPolicy, ExponentialReconnectionPolicy, RetryPolicy, SimpleConvictionPolicy, HostDistance, @@ -502,79 +502,70 @@ def test_refresh_schema_type(self): @local @notwindows def test_refresh_schema_no_wait(self): - contact_points = [CASSANDRA_IP] - with Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=10, - contact_points=contact_points, - execution_profiles= - {EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy= - HostFilterPolicy( - RoundRobinPolicy(), lambda host: host.address == CASSANDRA_IP - ))}) as cluster: - session = cluster.connect() - - schema_ver = session.execute("SELECT schema_version FROM system.local WHERE key='local'")[0][0] - new_schema_ver = uuid4() - session.execute("UPDATE system.local SET schema_version=%s WHERE key='local'", (new_schema_ver,)) - - try: - agreement_timeout = 1 - - # cluster agreement wait exceeded - c = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=agreement_timeout) - c.connect() - self.assertTrue(c.metadata.keyspaces) - - # cluster agreement wait used for refresh - original_meta = c.metadata.keyspaces - start_time = time.time() - self.assertRaisesRegexp(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata) - end_time = time.time() - self.assertGreaterEqual(end_time - start_time, agreement_timeout) - self.assertIs(original_meta, c.metadata.keyspaces) - - # refresh wait overrides cluster value - original_meta = c.metadata.keyspaces - start_time = time.time() - c.refresh_schema_metadata(max_schema_agreement_wait=0) - end_time = time.time() - self.assertLess(end_time - start_time, agreement_timeout) - self.assertIsNot(original_meta, c.metadata.keyspaces) - self.assertEqual(original_meta, c.metadata.keyspaces) - - c.shutdown() - - refresh_threshold = 0.5 - # cluster agreement bypass - c = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=0) - start_time = time.time() - s = c.connect() - end_time = time.time() - self.assertLess(end_time - start_time, refresh_threshold) - self.assertTrue(c.metadata.keyspaces) - - # cluster agreement wait used for refresh - original_meta = c.metadata.keyspaces - start_time = time.time() - c.refresh_schema_metadata() - end_time = time.time() - self.assertLess(end_time - start_time, refresh_threshold) - self.assertIsNot(original_meta, c.metadata.keyspaces) - self.assertEqual(original_meta, c.metadata.keyspaces) - - # refresh wait overrides cluster value - original_meta = c.metadata.keyspaces - start_time = time.time() - self.assertRaisesRegexp(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata, - max_schema_agreement_wait=agreement_timeout) - end_time = time.time() - self.assertGreaterEqual(end_time - start_time, agreement_timeout) - self.assertIs(original_meta, c.metadata.keyspaces) - c.shutdown() - finally: - # TODO once fixed this connect call - session = cluster.connect() - session.execute("UPDATE system.local SET schema_version=%s WHERE key='local'", (schema_ver,)) - + original_wait_for_responses = connection.Connection.wait_for_responses + + def patched_wait_for_responses(*args, **kwargs): + # When selecting schema version, replace the real schema UUID with an unexpected UUID + response = original_wait_for_responses(*args, **kwargs) + if len(args) > 2 and hasattr(args[2], "query") and args[2].query == "SELECT schema_version FROM system.local WHERE key='local'": + new_uuid = uuid4() + response[1].parsed_rows[0] = (new_uuid,) + return response + + with patch.object(connection.Connection, "wait_for_responses", patched_wait_for_responses): + agreement_timeout = 1 + + # cluster agreement wait exceeded + c = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=agreement_timeout) + c.connect() + self.assertTrue(c.metadata.keyspaces) + + # cluster agreement wait used for refresh + original_meta = c.metadata.keyspaces + start_time = time.time() + self.assertRaisesRegexp(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata) + end_time = time.time() + self.assertGreaterEqual(end_time - start_time, agreement_timeout) + self.assertIs(original_meta, c.metadata.keyspaces) + + # refresh wait overrides cluster value + original_meta = c.metadata.keyspaces + start_time = time.time() + c.refresh_schema_metadata(max_schema_agreement_wait=0) + end_time = time.time() + self.assertLess(end_time - start_time, agreement_timeout) + self.assertIsNot(original_meta, c.metadata.keyspaces) + self.assertEqual(original_meta, c.metadata.keyspaces) + + c.shutdown() + + refresh_threshold = 0.5 + # cluster agreement bypass + c = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=0) + start_time = time.time() + s = c.connect() + end_time = time.time() + self.assertLess(end_time - start_time, refresh_threshold) + self.assertTrue(c.metadata.keyspaces) + + # cluster agreement wait used for refresh + original_meta = c.metadata.keyspaces + start_time = time.time() + c.refresh_schema_metadata() + end_time = time.time() + self.assertLess(end_time - start_time, refresh_threshold) + self.assertIsNot(original_meta, c.metadata.keyspaces) + self.assertEqual(original_meta, c.metadata.keyspaces) + + # refresh wait overrides cluster value + original_meta = c.metadata.keyspaces + start_time = time.time() + self.assertRaisesRegexp(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata, + max_schema_agreement_wait=agreement_timeout) + end_time = time.time() + self.assertGreaterEqual(end_time - start_time, agreement_timeout) + self.assertIs(original_meta, c.metadata.keyspaces) + c.shutdown() def test_trace(self): """ @@ -1480,52 +1471,6 @@ def test_prepare_on_ignored_hosts(self): cluster.shutdown() -@local -class DuplicateRpcTest(unittest.TestCase): - - load_balancing_policy = HostFilterPolicy(RoundRobinPolicy(), - lambda host: host.address == "127.0.0.1") - - def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=self.load_balancing_policy)}) - self.session = self.cluster.connect() - - self.address_column = "native_transport_address" if DSE_VERSION and DSE_VERSION >= Version("6.0") else "rpc_address" - self.session.execute("UPDATE system.peers SET {} = '127.0.0.1' WHERE peer='127.0.0.2'". - format(self.address_column)) - - def tearDown(self): - self.session.execute("UPDATE system.peers SET {} = '127.0.0.2' WHERE peer='127.0.0.2'". - format(self.address_column)) - self.cluster.shutdown() - - def test_duplicate(self): - """ - Test duplicate RPC addresses. - - Modifies the system.peers table to make hosts have the same rpc address. Ensures such hosts are filtered out and a message is logged - - @since 3.4 - @jira_ticket PYTHON-366 - @expected_result only one hosts' metadata will be populated - - @test_category metadata - """ - mock_handler = MockLoggingHandler() - logger = logging.getLogger(cassandra.cluster.__name__) - logger.addHandler(mock_handler) - test_cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=self.load_balancing_policy)}) - - test_cluster.connect() - warnings = mock_handler.messages.get("warning") - self.assertEqual(len(warnings), 1) - self.assertTrue('multiple' in warnings[0]) - logger.removeHandler(mock_handler) - test_cluster.shutdown() - - @protocolv5 class BetaProtocolTest(unittest.TestCase): diff --git a/tests/integration/standard/test_control_connection.py b/tests/integration/standard/test_control_connection.py index b928cd2b68..b91d29c4e6 100644 --- a/tests/integration/standard/test_control_connection.py +++ b/tests/integration/standard/test_control_connection.py @@ -14,6 +14,7 @@ # # # +from cassandra import InvalidRequest try: import unittest2 as unittest @@ -43,7 +44,7 @@ def setUp(self): def tearDown(self): try: self.session.execute("DROP KEYSPACE keyspacetodrop ") - except (ConfigurationException): + except (ConfigurationException, InvalidRequest): # we already removed the keyspace. pass self.cluster.shutdown() diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index a5038672d4..6169e5d951 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -723,7 +723,7 @@ def test_refresh_metadata_for_mv(self): try: self.assertNotIn("mv1", cluster2.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT b FROM {0}.{1} WHERE b IS NOT NULL PRIMARY KEY (a, b)" + self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT a, b FROM {0}.{1} WHERE b IS NOT NULL PRIMARY KEY (a, b)" .format(self.keyspace_name, self.function_table_name)) self.assertNotIn("mv1", cluster2.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) @@ -745,7 +745,7 @@ def test_refresh_metadata_for_mv(self): cluster3.connect() try: self.assertNotIn("mv2", cluster3.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv2 AS SELECT b FROM {0}.{1} WHERE b IS NOT NULL PRIMARY KEY (a, b)" + self.session.execute("CREATE MATERIALIZED VIEW {0}.mv2 AS SELECT a, b FROM {0}.{1} WHERE b IS NOT NULL PRIMARY KEY (a, b)" .format(self.keyspace_name, self.function_table_name)) self.assertNotIn("mv2", cluster3.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) cluster3.refresh_materialized_view_metadata(self.keyspace_name, 'mv2') @@ -2007,10 +2007,15 @@ def test_dct_alias(self): dct_table = self.cluster.metadata.keyspaces.get(self.ks_name).tables.get(self.function_table_name) # Format can very slightly between versions, strip out whitespace for consistency sake - self.assertTrue("c1'org.apache.cassandra.db.marshal.DynamicCompositeType(" - "s=>org.apache.cassandra.db.marshal.UTF8Type," - "i=>org.apache.cassandra.db.marshal.Int32Type)'" - in dct_table.as_cql_query().replace(" ", "")) + table_text = dct_table.as_cql_query().replace(" ", "") + dynamic_type_text = "c1'org.apache.cassandra.db.marshal.DynamicCompositeType(" + self.assertIn("c1'org.apache.cassandra.db.marshal.DynamicCompositeType(", table_text) + # Types within in the composite can come out in random order, so grab the type definition and find each one + type_definition_start = table_text.index("(", table_text.find(dynamic_type_text)) + type_definition_end = table_text.index(")") + type_definition_text = table_text[type_definition_start:type_definition_end] + self.assertIn("s=>org.apache.cassandra.db.marshal.UTF8Type", type_definition_text) + self.assertIn("i=>org.apache.cassandra.db.marshal.Int32Type", type_definition_text) @greaterthanorequalcass30 @@ -2018,7 +2023,7 @@ class Materia3lizedViewMetadataTestSimple(BasicSharedKeyspaceUnitTestCase): def setUp(self): self.session.execute("CREATE TABLE {0}.{1} (pk int PRIMARY KEY, c int)".format(self.keyspace_name, self.function_table_name)) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT c FROM {0}.{1} WHERE c IS NOT NULL PRIMARY KEY (pk, c)".format(self.keyspace_name, self.function_table_name)) + self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT pk, c FROM {0}.{1} WHERE c IS NOT NULL PRIMARY KEY (pk, c)".format(self.keyspace_name, self.function_table_name)) def tearDown(self): self.session.execute("DROP MATERIALIZED VIEW {0}.mv1".format(self.keyspace_name)) @@ -2089,7 +2094,7 @@ def test_materialized_view_metadata_drop(self): self.assertDictEqual({}, self.cluster.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) self.assertDictEqual({}, self.cluster.metadata.keyspaces[self.keyspace_name].views) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT c FROM {0}.{1} WHERE c IS NOT NULL PRIMARY KEY (pk, c)".format(self.keyspace_name, self.function_table_name)) + self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT pk, c FROM {0}.{1} WHERE c IS NOT NULL PRIMARY KEY (pk, c)".format(self.keyspace_name, self.function_table_name)) @greaterthanorequalcass30 @@ -2232,7 +2237,7 @@ def test_base_table_column_addition_mv(self): SELECT * FROM {0}.scores WHERE game IS NOT NULL AND score IS NOT NULL AND user IS NOT NULL AND year IS NOT NULL AND month IS NOT NULL AND day IS NOT NULL PRIMARY KEY (game, score, user, year, month, day) - WITH CLUSTERING ORDER BY (score DESC)""".format(self.keyspace_name) + WITH CLUSTERING ORDER BY (score DESC, user ASC, year ASC, month ASC, day ASC)""".format(self.keyspace_name) self.session.execute(create_mv) @@ -2462,22 +2467,6 @@ def _assert_group_keys_by_host(self, keys, table_name, stmt): class VirtualKeypaceTest(BasicSharedKeyspaceUnitTestCase): virtual_ks_names = ('system_virtual_schema', 'system_views') - virtual_ks_structure = { - # keyspaces - 'system_virtual_schema': { - # tables: columns. columns are a set because we're comparing unordered - 'keyspaces': {'keyspace_name'}, - 'tables': {'comment', 'keyspace_name', 'table_name'}, - 'columns': {'clustering_order', 'column_name', 'column_name_bytes', - 'keyspace_name', 'kind', 'position', 'table_name', - 'type'} - }, - 'system_views': { - 'sstable_tasks': {'keyspace_name', 'kind', 'progress', - 'table_name', 'task_id', 'total', 'unit'} - } - } - def test_existing_keyspaces_have_correct_virtual_tags(self): for name, ks in self.cluster.metadata.keyspaces.items(): if name in self.virtual_ks_names: @@ -2514,5 +2503,7 @@ def test_virtual_keyspaces_have_expected_schema_structure(self): tab.columns.keys() ) - self.assertDictEqual(ingested_virtual_ks_structure, - self.virtual_ks_structure) + # Identify a couple known values to verify we parsed the structure correctly + self.assertIn('table_name', ingested_virtual_ks_structure['system_virtual_schema']['tables']) + self.assertIn('type', ingested_virtual_ks_structure['system_virtual_schema']['columns']) + self.assertIn('total', ingested_virtual_ks_structure['system_views']['sstable_tasks']) diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py index 62478974eb..63f94399a6 100644 --- a/tests/integration/standard/test_query.py +++ b/tests/integration/standard/test_query.py @@ -1193,25 +1193,25 @@ def test_mv_filtering(self): SELECT * FROM {0}.scores WHERE game IS NOT NULL AND score IS NOT NULL AND user IS NOT NULL AND year IS NOT NULL AND month IS NOT NULL AND day IS NOT NULL PRIMARY KEY (game, score, user, year, month, day) - WITH CLUSTERING ORDER BY (score DESC)""".format(self.keyspace_name) + WITH CLUSTERING ORDER BY (score DESC, user ASC, year ASC, month ASC, day ASC)""".format(self.keyspace_name) create_mv_dailyhigh = """CREATE MATERIALIZED VIEW {0}.dailyhigh AS SELECT * FROM {0}.scores WHERE game IS NOT NULL AND year IS NOT NULL AND month IS NOT NULL AND day IS NOT NULL AND score IS NOT NULL AND user IS NOT NULL PRIMARY KEY ((game, year, month, day), score, user) - WITH CLUSTERING ORDER BY (score DESC)""".format(self.keyspace_name) + WITH CLUSTERING ORDER BY (score DESC, user ASC)""".format(self.keyspace_name) create_mv_monthlyhigh = """CREATE MATERIALIZED VIEW {0}.monthlyhigh AS SELECT * FROM {0}.scores WHERE game IS NOT NULL AND year IS NOT NULL AND month IS NOT NULL AND score IS NOT NULL AND user IS NOT NULL AND day IS NOT NULL PRIMARY KEY ((game, year, month), score, user, day) - WITH CLUSTERING ORDER BY (score DESC)""".format(self.keyspace_name) + WITH CLUSTERING ORDER BY (score DESC, user ASC, day ASC)""".format(self.keyspace_name) create_mv_filtereduserhigh = """CREATE MATERIALIZED VIEW {0}.filtereduserhigh AS SELECT * FROM {0}.scores WHERE user in ('jbellis', 'pcmanus') AND game IS NOT NULL AND score IS NOT NULL AND year is NOT NULL AND day is not NULL and month IS NOT NULL PRIMARY KEY (game, score, user, year, month, day) - WITH CLUSTERING ORDER BY (score DESC)""".format(self.keyspace_name) + WITH CLUSTERING ORDER BY (score DESC, user ASC, year ASC, month ASC, day ASC)""".format(self.keyspace_name) self.session.execute(create_mv_alltime) self.session.execute(create_mv_dailyhigh) From d28014c2a08b1d1a0317bd3398db42a8a501dc6e Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 23 Jan 2020 15:48:41 -0500 Subject: [PATCH 013/211] Minor docs improvements --- cassandra/cluster.py | 1 + docs/api/cassandra/cluster.rst | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index d7a963791b..a33404c5fd 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -2754,6 +2754,7 @@ def _resolve_execution_profile_options(self, execution_profile): Determine the GraphSON protocol and row factory for a graph query. This is useful to configure automatically the execution profile when executing a query on a core graph. + If `graph_protocol` is not explicitly specified, the following rules apply: - Default to GraphProtocol.GRAPHSON_1_0, or GRAPHSON_2_0 if the `graph_language` is not gremlin-groovy. - If `graph_options.graph_name` is specified and is a Core graph, set GraphSON_3_0. diff --git a/docs/api/cassandra/cluster.rst b/docs/api/cassandra/cluster.rst index 71e110559e..459f287b4c 100644 --- a/docs/api/cassandra/cluster.rst +++ b/docs/api/cassandra/cluster.rst @@ -120,13 +120,19 @@ .. automethod:: set_meta_refresh_enabled -.. autoclass:: ExecutionProfile (load_balancing_policy=, retry_policy=None, consistency_level=LOCAL_ONE, serial_consistency_level=None, request_timeout=10.0, row_factory=, speculative_execution_policy=None) +.. autoclass:: ExecutionProfile (load_balancing_policy=, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=10.0, row_factory=, speculative_execution_policy=None) :members: :exclude-members: consistency_level .. autoattribute:: consistency_level :annotation: = LOCAL_ONE +.. autoclass:: GraphExecutionProfile (load_balancing_policy=_NOT_SET, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=30.0, row_factory=, graph_options=None, continuous_paging_options=_NOT_SET) + :members: + +.. autoclass:: GraphAnalyticsExecutionProfile (load_balancing_policy=None, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=3600. * 24. * 7., row_factory=, graph_options=None) + :members: + .. autodata:: EXEC_PROFILE_DEFAULT :annotation: From e1739be48cd6b318e96277ab7fb646e7287f63a6 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 24 Jan 2020 14:27:42 -0500 Subject: [PATCH 014/211] Remove tmp build section --- build.yaml | 12 ------------ cassandra/__init__.py | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/build.yaml b/build.yaml index 40c8a15d9d..985a5d66be 100644 --- a/build.yaml +++ b/build.yaml @@ -57,18 +57,6 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='libev' - tmpngdg: - schedule: adhoc - branches: - include: [merge_ossnext_ngdg] - env_vars: | - EVENT_LOOP_MANAGER='libev' - EXCLUDE_LONG=1 - matrix: - exclude: - - python: [2.7, 3.4, 3.6, 3.7] - - cassandra: ['2.1', '2.2', '3.0', '3.11', 'dse-4.8', 'dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7'] - weekly_master: schedule: 0 10 * * 6 disable_pull_requests: true diff --git a/cassandra/__init__.py b/cassandra/__init__.py index d5d02d29e4..76c7a649de 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 21, 0, 'post0+20200123') +__version_info__ = (3, 21, 0, 'post0+20200127') __version__ = '.'.join(map(str, __version_info__)) From 19514c629ec886e89765c79b0296f037ae83f1c6 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 28 Jan 2020 11:37:11 -0500 Subject: [PATCH 015/211] Fix some minor test failures: virtual tables, replaced time.sleep by wait_until.. --- build.yaml | 2 +- cassandra/metadata.py | 24 ++++++++++++------- .../advanced/graph/fluent/test_graph.py | 7 ++++-- .../advanced/graph/test_graph_query.py | 2 ++ tests/integration/simulacron/utils.py | 4 +++- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/build.yaml b/build.yaml index 985a5d66be..460192131d 100644 --- a/build.yaml +++ b/build.yaml @@ -162,7 +162,7 @@ cassandra: - 'dse-5.1' - 'dse-6.0' - 'dse-6.7' - - 'dse-6.8' + - 'dse-6.8.0' env: CYTHON: diff --git a/cassandra/metadata.py b/cassandra/metadata.py index 4c51392a6b..b4dc19fbf0 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -1407,7 +1407,6 @@ def _make_option_strings(cls, options_map): return list(sorted(ret)) -# TODO This should inherit V4 later? class TableMetadataDSE68(TableMetadataV3): vertex = None @@ -2932,7 +2931,7 @@ def _query_all(self): QueryMessage(query=self._SELECT_VIRTUAL_KEYSPACES, consistency_level=cl), QueryMessage(query=self._SELECT_VIRTUAL_TABLES, consistency_level=cl), QueryMessage(query=self._SELECT_VIRTUAL_COLUMNS, consistency_level=cl), - # dse7.0 only + # dse6.8 only QueryMessage(query=self._SELECT_VERTICES, consistency_level=cl), QueryMessage(query=self._SELECT_EDGES, consistency_level=cl) ] @@ -2969,12 +2968,21 @@ def _query_all(self): self.indexes_result = self._handle_results(indexes_success, indexes_result) self.views_result = self._handle_results(views_success, views_result) - self.virtual_keyspaces_result = self._handle_results(virtual_ks_success, - virtual_ks_result) - self.virtual_tables_result = self._handle_results(virtual_table_success, - virtual_table_result) - self.virtual_columns_result = self._handle_results(virtual_column_success, - virtual_column_result) + # These tables don't exist in some DSE versions reporting 4.X so we can + # ignore them if we got an error + self.virtual_keyspaces_result = self._handle_results( + virtual_ks_success, virtual_ks_result, + expected_failures=(InvalidRequest,) + ) + self.virtual_tables_result = self._handle_results( + virtual_table_success, virtual_table_result, + expected_failures=(InvalidRequest,) + ) + self.virtual_columns_result = self._handle_results( + virtual_column_success, virtual_column_result, + expected_failures=(InvalidRequest,) + ) + # dse6.8-only results self.vertices_result = self._handle_results(vertices_success, vertices_result) self.edges_result = self._handle_results(edges_success, edges_result) diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index f1946a40b7..c3f9c2a43b 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -31,6 +31,7 @@ from gremlin_python.structure.graph import Edge as TravEdge from gremlin_python.structure.graph import Vertex as TravVertex, VertexProperty as TravVertexProperty +from tests.util import wait_until_not_raised from tests.integration import DSE_VERSION, greaterthanorequaldse68 from tests.integration.advanced.graph import GraphUnitTestCase, \ ClassicGraphSchema, CoreGraphSchema, \ @@ -509,8 +510,10 @@ def __test_udt(self, schema, graphson, address_class, address_with_tags_class, property('owners', frozen(listOf(tupleOf(Text, Int)))).create(); """, execution_profile=ep) - time.sleep(2) # wait the UDT to be discovered - self.session.cluster.register_user_type(self.graph_name, 'address', Address) + # wait max 10 seconds to get the UDT discovered. + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'address', Address), + 1, 10) self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags) self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress) self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners) diff --git a/tests/integration/advanced/graph/test_graph_query.py b/tests/integration/advanced/graph/test_graph_query.py index 5ef4e2c749..ff76288008 100644 --- a/tests/integration/advanced/graph/test_graph_query.py +++ b/tests/integration/advanced/graph/test_graph_query.py @@ -161,6 +161,8 @@ def test_profile_graph_options(self): s.execute_graph(statement, execution_profile=ep) except NoHostAvailable: self.assertTrue(DSE_VERSION >= Version("6.0")) + except InvalidRequest: + self.assertTrue(DSE_VERSION >= Version("5.1")) else: if DSE_VERSION < Version("6.8"): # >6.8 returns true self.fail("Should have risen ServerError or InvalidRequest") diff --git a/tests/integration/simulacron/utils.py b/tests/integration/simulacron/utils.py index 5cee5ac3f1..870b60bd46 100644 --- a/tests/integration/simulacron/utils.py +++ b/tests/integration/simulacron/utils.py @@ -358,7 +358,9 @@ def prime_driver_defaults(): # prepare empty rows for NGDG for query in [SchemaParserDSE68._SELECT_VERTICES, SchemaParserDSE68._SELECT_EDGES]: - PrimeQuery(query, result='success', then={'rows': [], 'column_types': {'row1': 'int'}}) + client_simulacron.submit_request( + PrimeQuery(query, result='success', + then={'rows': [], 'column_types': {'row1': 'int'}})) def prime_cluster(data_centers="3", version=None, cluster_name=DEFAULT_CLUSTER, dse_version=None): From d338e66f5cfd4ca25615a763485cf697072c4b0c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 28 Jan 2020 14:19:09 -0500 Subject: [PATCH 016/211] anoother time related test fix --- cassandra/__init__.py | 2 +- .../integration/advanced/graph/fluent/test_graph.py | 12 +++++++++--- tests/integration/advanced/test_adv_metadata.py | 4 ---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 76c7a649de..b86f1a8c90 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 21, 0, 'post0+20200127') +__version_info__ = (3, 21, 0, 'post0+20200128') __version__ = '.'.join(map(str, __version_info__)) diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index c3f9c2a43b..4ebb0b6109 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -514,9 +514,15 @@ def __test_udt(self, schema, graphson, address_class, address_with_tags_class, wait_until_not_raised( lambda: self.session.cluster.register_user_type(self.graph_name, 'address', Address), 1, 10) - self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags) - self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress) - self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners) + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags), + 1, 10) + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress), + 1, 10) + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners), + 1, 10) data = { "udt1": ["typeOf('address')", Address('1440 Rd Smith', 'Quebec', 'QC')], diff --git a/tests/integration/advanced/test_adv_metadata.py b/tests/integration/advanced/test_adv_metadata.py index 2c69a769a3..eaa7b7f8f6 100644 --- a/tests/integration/advanced/test_adv_metadata.py +++ b/tests/integration/advanced/test_adv_metadata.py @@ -283,10 +283,6 @@ def test_nodesync_on_table(self): self.assertIn('nodesync =', table_meta.export_as_string()) self.assertIn('nodesync', table_meta.options) - table_3rf = self.cluster.metadata.keyspaces["test3rf"].tables['test'] - self.assertNotIn('nodesync =', table_3rf.export_as_string()) - self.assertIsNone(table_3rf.options['nodesync']) - @greaterthanorequaldse68 class GraphMetadataTests(BasicExistingKeyspaceUnitTestCase): From f61ed208ac353b439124497991f5ecd7f34f2641 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 30 Jan 2020 08:14:13 -0500 Subject: [PATCH 017/211] PYTHON-1204: Make graph metadata handling more robust (#47) * Make graph metadata handling more robust --- cassandra/metadata.py | 61 +++++++++++-------- .../integration/advanced/test_adv_metadata.py | 35 ++++++++++- 2 files changed, 71 insertions(+), 25 deletions(-) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index b4dc19fbf0..6ca72440da 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -2843,26 +2843,7 @@ def __init__(self, connection, timeout): def get_all_keyspaces(self): for keyspace_meta in super(SchemaParserDSE68, self).get_all_keyspaces(): - - def _build_table_graph_metadata(table_meta): - for row in self.keyspace_table_vertex_rows[keyspace_meta.name][table_meta.name]: - vertex_meta = self._build_table_vertex_metadata(row) - table_meta.vertex = vertex_meta - - for row in self.keyspace_table_edge_rows[keyspace_meta.name][table_meta.name]: - edge_meta = self._build_table_edge_metadata(keyspace_meta, row) - table_meta.edge = edge_meta - - # Make sure we process vertices before edges - for t in [t for t in six.itervalues(keyspace_meta.tables) - if t.name in self.keyspace_table_vertex_rows[keyspace_meta.name]]: - _build_table_graph_metadata(t) - - # all other tables... - for t in [t for t in six.itervalues(keyspace_meta.tables) - if t.name not in self.keyspace_table_vertex_rows[keyspace_meta.name]]: - _build_table_graph_metadata(t) - + self._build_graph_metadata(keyspace_meta) yield keyspace_meta def get_table(self, keyspaces, keyspace, table): @@ -2877,10 +2858,16 @@ def get_table(self, keyspaces, keyspace, table): vertices_result = self._handle_results(vertices_success, vertices_result) edges_result = self._handle_results(edges_success, edges_result) - if vertices_result: - table_meta.vertex = self._build_table_vertex_metadata(vertices_result[0]) - elif edges_result: - table_meta.edge = self._build_table_edge_metadata(keyspaces[keyspace], edges_result[0]) + try: + if vertices_result: + table_meta.vertex = self._build_table_vertex_metadata(vertices_result[0]) + elif edges_result: + table_meta.edge = self._build_table_edge_metadata(keyspaces[keyspace], edges_result[0]) + except Exception: + table_meta.vertex = None + table_meta.edge = None + table_meta._exc_info = sys.exc_info() + log.exception("Error while parsing graph metadata for table %s.%s.", keyspace, table) return table_meta @@ -2893,6 +2880,32 @@ def _build_keyspace_metadata_internal(row): graph_engine = row.get("graph_engine", None) return KeyspaceMetadata(name, durable_writes, replication_class, replication, graph_engine) + def _build_graph_metadata(self, keyspace_meta): + + def _build_table_graph_metadata(table_meta): + for row in self.keyspace_table_vertex_rows[keyspace_meta.name][table_meta.name]: + table_meta.vertex = self._build_table_vertex_metadata(row) + + for row in self.keyspace_table_edge_rows[keyspace_meta.name][table_meta.name]: + table_meta.egde = self._build_table_edge_metadata(keyspace_meta, row) + + try: + # Make sure we process vertices before edges + for table_meta in [t for t in six.itervalues(keyspace_meta.tables) + if t.name in self.keyspace_table_vertex_rows[keyspace_meta.name]]: + _build_table_graph_metadata(table_meta) + + # all other tables... + for table_meta in [t for t in six.itervalues(keyspace_meta.tables) + if t.name not in self.keyspace_table_vertex_rows[keyspace_meta.name]]: + _build_table_graph_metadata(table_meta) + except Exception: + # schema error, remove all graph metadata for this keyspace + for t in six.itervalues(keyspace_meta.tables): + t.edge = t.vertex = None + keyspace_meta._exc_info = sys.exc_info() + log.exception("Error while parsing graph metadata for keyspace %s", keyspace_meta.name) + @staticmethod def _build_table_vertex_metadata(row): return VertexMetadata(row.get("keyspace_name"), row.get("table_name"), diff --git a/tests/integration/advanced/test_adv_metadata.py b/tests/integration/advanced/test_adv_metadata.py index eaa7b7f8f6..52944aabdf 100644 --- a/tests/integration/advanced/test_adv_metadata.py +++ b/tests/integration/advanced/test_adv_metadata.py @@ -14,11 +14,12 @@ from packaging.version import Version +from cassandra.cluster import Cluster from tests.integration import (BasicExistingKeyspaceUnitTestCase, BasicSharedKeyspaceUnitTestCase, BasicSharedKeyspaceUnitTestCaseRF1, greaterthanorequaldse51, greaterthanorequaldse60, greaterthanorequaldse68, use_single_node, - DSE_VERSION, requiredse) + DSE_VERSION, requiredse, PROTOCOL_VERSION) try: import unittest2 as unittest @@ -361,3 +362,35 @@ def test_edge_metadata(self): self.assertEqual(edge_meta.to_label, 'rocksolidsoftware') self.assertEqual(edge_meta.to_partition_key_columns, ['company_name', 'software_name']) self.assertEqual(edge_meta.to_clustering_columns, ['software_version']) + + +@greaterthanorequaldse68 +class GraphMetadataSchemaErrorTests(BasicExistingKeyspaceUnitTestCase): + """ + Test that we can connect when the graph schema is broken. + """ + + def test_connection_on_graph_schema_error(self): + self.session = self.cluster.connect() + + self.session.execute(""" + CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1} and graph_engine = 'Core'; + """ % (self.ks_name,)) + + self.session.execute(""" + CREATE TABLE %s.person (name text PRIMARY KEY) WITH VERTEX LABEL; + """ % (self.ks_name,)) + + self.session.execute(""" + CREATE TABLE %s.software(company text, name text, version int, PRIMARY KEY((company, name), version)) WITH VERTEX LABEL rocksolidsoftware; + """ % (self.ks_name,)) + + self.session.execute(""" + CREATE TABLE %s.contributors (contributor text, company_name text, software_name text, software_version int, + PRIMARY KEY (contributor, company_name, software_name, software_version) ) + WITH CLUSTERING ORDER BY (company_name ASC, software_name ASC, software_version ASC) + AND EDGE LABEL contrib FROM person(contributor) TO rocksolidsoftware((company_name, software_name), software_version); + """ % (self.ks_name,)) + + self.session.execute('TRUNCATE system_schema.vertices') + Cluster(protocol_version=PROTOCOL_VERSION).connect().shutdown() From bdb5f530950c71b637662cb1f6c8f502e3ecc3ee Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 30 Jan 2020 08:19:47 -0500 Subject: [PATCH 018/211] Add python-1204 changelog entry --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a7689ff551..f86e5048da 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,7 @@ Features * Enable Paging Through DSE Driver for Gremlin Traversals (PYTHON-1045) * Expose filter predicates for cql collections (PYTHON-1019) * Add g:TraversalMetrics/Metrics deserializers (PYTHON-1057) +* Make graph metadata handling more robust (PYTHON-1204) 3.21.0 ====== From db96ffaae045523011c94ddd5e13f5b5eaa07176 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 30 Jan 2020 13:10:49 -0500 Subject: [PATCH 019/211] Don't try to enable insights is disabled --- cassandra/cluster.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index a33404c5fd..a6709ba6a5 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -2546,18 +2546,19 @@ def __init__(self, cluster, hosts, keyspace=None): self.session_id = uuid.uuid4() self._graph_paging_available = self._check_graph_paging_available() - cc_host = self.cluster.get_control_connection_host() - valid_insights_version = (cc_host and version_supports_insights(cc_host.dse_version)) - if self.cluster.monitor_reporting_enabled and valid_insights_version: - self._monitor_reporter = MonitorReporter( - interval_sec=self.cluster.monitor_reporting_interval, - session=self, - ) - else: - if cc_host: - log.debug('Not starting MonitorReporter thread for Insights; ' - 'not supported by server version {v} on ' - 'ControlConnection host {c}'.format(v=cc_host.release_version, c=cc_host)) + if self.cluster.monitor_reporting_enabled: + cc_host = self.cluster.get_control_connection_host() + valid_insights_version = (cc_host and version_supports_insights(cc_host.dse_version)) + if valid_insights_version: + self._monitor_reporter = MonitorReporter( + interval_sec=self.cluster.monitor_reporting_interval, + session=self, + ) + else: + if cc_host: + log.debug('Not starting MonitorReporter thread for Insights; ' + 'not supported by server version {v} on ' + 'ControlConnection host {c}'.format(v=cc_host.release_version, c=cc_host)) log.debug('Started Session with client_id {} and session_id {}'.format(self.cluster.client_id, self.session_id)) From c9da0566331099f533cd32c0fcd24f0680548978 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 4 Feb 2020 13:39:32 -0500 Subject: [PATCH 020/211] Add C* 4.0 testing --- build.yaml | 5 +++-- tests/integration/__init__.py | 16 +++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/build.yaml b/build.yaml index b60c0950c1..c4c93a803c 100644 --- a/build.yaml +++ b/build.yaml @@ -46,8 +46,8 @@ schedules: EXCLUDE_LONG=1 matrix: exclude: - - python: [2.7, 3.4, 3.7, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', 'test-dse', dse-4.8', 'dse-5.0'] + - python: [2.7, 3.4, 3.7, 3.6, 3.8] + - cassandra: ['2.0', '2.1', '2.2', '3.0', '3.11', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.7', 'dse-6.8'] release_test: schedule: per_commit @@ -157,6 +157,7 @@ cassandra: - '2.2' - '3.0' - '3.11' + - '4.0' - 'dse-4.8' - 'dse-5.0' - 'dse-5.1' diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 0a3d6542a0..547a3e6e32 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -172,7 +172,7 @@ def _get_dse_version_from_cass(cass_version): cassandra_version = Version(mcv_string) CASSANDRA_VERSION = Version(mcv_string) if mcv_string else cassandra_version - CCM_VERSION = cassandra_version if mcv_string else CASSANDRA_VERSION + CCM_VERSION = mcv_string or cv_string CASSANDRA_IP = os.getenv('CLUSTER_IP', '127.0.0.1') CASSANDRA_DIR = os.getenv('CASSANDRA_DIR', None) @@ -458,15 +458,9 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, elif ccm_options is None: ccm_options = CCM_KWARGS.copy() - if 'version' in ccm_options and not isinstance(ccm_options['version'], Version): - ccm_options['version'] = Version(ccm_options['version']) - cassandra_version = ccm_options.get('version', CCM_VERSION) dse_version = ccm_options.get('version', DSE_VERSION) - if 'version' in ccm_options: - ccm_options['version'] = ccm_options['version'].base_version - global CCM_CLUSTER if USE_CASS_EXTERNAL: if CCM_CLUSTER: @@ -515,12 +509,12 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, CCM_CLUSTER = DseCluster(path, cluster_name, **ccm_options) CCM_CLUSTER.set_configuration_options({'start_native_transport': True}) CCM_CLUSTER.set_configuration_options({'batch_size_warn_threshold_in_kb': 5}) - if dse_version >= Version('5.0'): + if Version(dse_version) >= Version('5.0'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) if 'spark' in workloads: config_options = {"initial_spark_worker_resources": 0.1} - if dse_version >= Version('6.7'): + if Version(dse_version) >= Version('6.7'): log.debug("Disabling AlwaysON SQL for a DSE 6.7 Cluster") config_options['alwayson_sql_options'] = {'enabled': False} CCM_CLUSTER.set_dse_configuration_options(config_options) @@ -532,9 +526,9 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, else: CCM_CLUSTER = CCMCluster(path, cluster_name, **ccm_options) CCM_CLUSTER.set_configuration_options({'start_native_transport': True}) - if cassandra_version >= Version('2.2'): + if Version(cassandra_version) >= Version('2.2'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) - if cassandra_version >= Version('3.0'): + if Version(cassandra_version) >= Version('3.0'): CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) common.switch_cluster(path, cluster_name) CCM_CLUSTER.set_configuration_options(configuration_options) From 2716214599148987ebc9999f7ed1ad330f5e889e Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 11 Feb 2020 09:46:31 -0500 Subject: [PATCH 021/211] Fix typo when assigning a table edge --- cassandra/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index 6ca72440da..5cdcef807e 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -2887,7 +2887,7 @@ def _build_table_graph_metadata(table_meta): table_meta.vertex = self._build_table_vertex_metadata(row) for row in self.keyspace_table_edge_rows[keyspace_meta.name][table_meta.name]: - table_meta.egde = self._build_table_edge_metadata(keyspace_meta, row) + table_meta.edge = self._build_table_edge_metadata(keyspace_meta, row) try: # Make sure we process vertices before edges From 3a2ea456d1552b898aa962959ada5c6b5a025687 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 4 Feb 2020 14:46:23 -0500 Subject: [PATCH 022/211] Make sure to only query the native_transport_address column with DSE --- CHANGELOG.rst | 4 ++++ build.yaml | 12 ++++++------ cassandra/cluster.py | 17 ++++++++++------- tests/integration/__init__.py | 4 ++-- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index da15a1158b..694da485ad 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,10 @@ Features -------- * Add all() function to the ResultSet API (PYTHON-1203) +Bug Fixes +--------- +* Make sure to only query the native_transport_address column with DSE (PYTHON-1205) + 3.21.0 ====== January 15, 2020 diff --git a/build.yaml b/build.yaml index c4c93a803c..3ae684ebb9 100644 --- a/build.yaml +++ b/build.yaml @@ -9,7 +9,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] + - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_long_test: schedule: per_commit @@ -21,7 +21,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] + - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_branches: schedule: per_commit @@ -34,7 +34,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] + - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_branches_dev: schedule: per_commit @@ -47,7 +47,7 @@ schedules: matrix: exclude: - python: [2.7, 3.4, 3.7, 3.6, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', '3.11', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.7', 'dse-6.8'] + - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.8'] release_test: schedule: per_commit @@ -139,7 +139,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', 'test-dse'] + - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse'] python: - 2.7 @@ -182,7 +182,7 @@ build: pip install --upgrade pip pip install -U setuptools - pip install git+ssh://git@github.com/riptano/ccm-private.git + pip install $HOME/ccm if [ -n "$CCM_IS_DSE" ]; then pip install -r test-datastax-requirements.txt diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 59c8b61f96..0ca962c076 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -80,7 +80,7 @@ from cassandra.marshal import int64_pack from cassandra.timestamps import MonotonicTimestampGenerator from cassandra.compat import Mapping -from cassandra.util import _resolve_contact_points_to_string_map +from cassandra.util import _resolve_contact_points_to_string_map, Version from cassandra.datastax.insights.reporter import MonitorReporter from cassandra.datastax.insights.util import version_supports_insights @@ -3324,7 +3324,7 @@ class ControlConnection(object): _SELECT_SCHEMA_PEERS_TEMPLATE = "SELECT peer, host_id, {nt_col_name}, schema_version FROM system.peers" _SELECT_SCHEMA_LOCAL = "SELECT schema_version FROM system.local WHERE key='local'" - _MINIMUM_NATIVE_ADDRESS_VERSION = "4.0" + _MINIMUM_NATIVE_ADDRESS_DSE_VERSION = Version("6.0.0") _is_shutdown = False _timeout = None @@ -3884,14 +3884,17 @@ def _peers_query_for_version(self, connection, peers_query_template): field named nt_col_name. """ host_release_version = self._cluster.metadata.get_host(connection.endpoint).release_version - if host_release_version: - use_native_address_query = host_release_version >= self._MINIMUM_NATIVE_ADDRESS_VERSION - if use_native_address_query: - select_peers_query = peers_query_template.format(nt_col_name="native_transport_address") - else: + host_dse_version = self._cluster.metadata.get_host(connection.endpoint).dse_version + uses_native_address_query = ( + host_dse_version and Version(host_dse_version) >= self._MINIMUM_NATIVE_ADDRESS_DSE_VERSION) + + if uses_native_address_query: + select_peers_query = peers_query_template.format(nt_col_name="native_transport_address") + elif host_release_version: select_peers_query = peers_query_template.format(nt_col_name="rpc_address") else: select_peers_query = self._SELECT_PEERS + return select_peers_query def _signal_error(self): diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 547a3e6e32..992de80cda 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -172,7 +172,7 @@ def _get_dse_version_from_cass(cass_version): cassandra_version = Version(mcv_string) CASSANDRA_VERSION = Version(mcv_string) if mcv_string else cassandra_version - CCM_VERSION = mcv_string or cv_string + CCM_VERSION = mcv_string if mcv_string else cv_string CASSANDRA_IP = os.getenv('CLUSTER_IP', '127.0.0.1') CASSANDRA_DIR = os.getenv('CASSANDRA_DIR', None) @@ -454,7 +454,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, set_default_cass_ip() if ccm_options is None and DSE_VERSION: - ccm_options = {"version": DSE_VERSION} + ccm_options = {"version": CCM_VERSION} elif ccm_options is None: ccm_options = CCM_KWARGS.copy() From df458d2ebd701962e72d8ad6fa1a8c4356de53b2 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 14 Feb 2020 09:35:25 -0500 Subject: [PATCH 023/211] Fix test version compare --- tests/integration/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index a350bb40e3..d6f26acbcd 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -530,7 +530,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, if Version(dse_version) >= Version('5.0'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) - if dse_version >= Version('5.1'): + if Version(dse_version) >= Version('5.1'): # For Inet4Address CCM_CLUSTER.set_dse_configuration_options({ 'graph': { From 28d85a99a0123508ce14e442de74b1b7a9577e98 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Sat, 15 Feb 2020 13:37:41 -0500 Subject: [PATCH 024/211] install pyyaml 5.2 to fix the ccm installation with py3.4 --- build.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.yaml b/build.yaml index 605683b085..83bed55a09 100644 --- a/build.yaml +++ b/build.yaml @@ -184,6 +184,8 @@ build: pip install --upgrade pip pip install -U setuptools + # Remove this pyyaml installation when removing Python 3.4 support + pip install PyYAML==5.2 pip install $HOME/ccm if [ -n "$CCM_IS_DSE" ]; then From 02faa6d80fc8adff10fb7a44d2a2c8b9ba3dc79e Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 4 Feb 2020 13:39:32 -0500 Subject: [PATCH 025/211] Add C* 4.0 testing --- build.yaml | 5 +++-- tests/integration/__init__.py | 16 +++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/build.yaml b/build.yaml index 460192131d..dabdbc7132 100644 --- a/build.yaml +++ b/build.yaml @@ -46,8 +46,8 @@ schedules: EXCLUDE_LONG=1 matrix: exclude: - - python: [2.7, 3.4, 3.7, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', 'test-dse', dse-4.8', 'dse-5.0'] + - python: [2.7, 3.4, 3.7, 3.6, 3.8] + - cassandra: ['2.0', '2.1', '2.2', '3.0', '3.11', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.7', 'dse-6.8'] release_test: schedule: per_commit @@ -157,6 +157,7 @@ cassandra: - '2.2' - '3.0' - '3.11' + - '4.0' - 'dse-4.8' - 'dse-5.0' - 'dse-5.1' diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 18c9110b35..db68ae632d 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -178,7 +178,7 @@ def _get_dse_version_from_cass(cass_version): cassandra_version = Version(mcv_string) CASSANDRA_VERSION = Version(mcv_string) if mcv_string else cassandra_version - CCM_VERSION = cassandra_version if mcv_string else CASSANDRA_VERSION + CCM_VERSION = mcv_string or cv_string CASSANDRA_IP = os.getenv('CLUSTER_IP', '127.0.0.1') CASSANDRA_DIR = os.getenv('CASSANDRA_DIR', None) @@ -476,15 +476,9 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, elif ccm_options is None: ccm_options = CCM_KWARGS.copy() - if 'version' in ccm_options and not isinstance(ccm_options['version'], Version): - ccm_options['version'] = Version(ccm_options['version']) - cassandra_version = ccm_options.get('version', CCM_VERSION) dse_version = ccm_options.get('version', DSE_VERSION) - if 'version' in ccm_options: - ccm_options['version'] = ccm_options['version'].base_version - global CCM_CLUSTER if USE_CASS_EXTERNAL: if CCM_CLUSTER: @@ -533,7 +527,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, CCM_CLUSTER = DseCluster(path, cluster_name, **ccm_options) CCM_CLUSTER.set_configuration_options({'start_native_transport': True}) CCM_CLUSTER.set_configuration_options({'batch_size_warn_threshold_in_kb': 5}) - if dse_version >= Version('5.0'): + if Version(dse_version) >= Version('5.0'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) if dse_version >= Version('5.1'): @@ -555,7 +549,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, }) if 'spark' in workloads: config_options = {"initial_spark_worker_resources": 0.1} - if dse_version >= Version('6.7'): + if Version(dse_version) >= Version('6.7'): log.debug("Disabling AlwaysON SQL for a DSE 6.7 Cluster") config_options['alwayson_sql_options'] = {'enabled': False} CCM_CLUSTER.set_dse_configuration_options(config_options) @@ -567,9 +561,9 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, else: CCM_CLUSTER = CCMCluster(path, cluster_name, **ccm_options) CCM_CLUSTER.set_configuration_options({'start_native_transport': True}) - if cassandra_version >= Version('2.2'): + if Version(cassandra_version) >= Version('2.2'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) - if cassandra_version >= Version('3.0'): + if Version(cassandra_version) >= Version('3.0'): CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) common.switch_cluster(path, cluster_name) CCM_CLUSTER.set_configuration_options(configuration_options) From 211bf1d7759f18c2b21412f615d950982a35bde5 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 4 Feb 2020 14:46:23 -0500 Subject: [PATCH 026/211] Make sure to only query the native_transport_address column with DSE --- CHANGELOG.rst | 4 ++++ build.yaml | 12 ++++++------ cassandra/cluster.py | 15 +++++++++------ tests/integration/__init__.py | 4 ++-- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f86e5048da..dc11a9fddd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -25,6 +25,10 @@ Features * Add g:TraversalMetrics/Metrics deserializers (PYTHON-1057) * Make graph metadata handling more robust (PYTHON-1204) +Bug Fixes +--------- +* Make sure to only query the native_transport_address column with DSE (PYTHON-1205) + 3.21.0 ====== January 15, 2020 diff --git a/build.yaml b/build.yaml index dabdbc7132..605683b085 100644 --- a/build.yaml +++ b/build.yaml @@ -9,7 +9,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] + - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_long_test: schedule: per_commit @@ -21,7 +21,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] + - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_branches: schedule: per_commit @@ -34,7 +34,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] + - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_branches_dev: schedule: per_commit @@ -47,7 +47,7 @@ schedules: matrix: exclude: - python: [2.7, 3.4, 3.7, 3.6, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', '3.11', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.7', 'dse-6.8'] + - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.8'] release_test: schedule: per_commit @@ -139,7 +139,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', 'test-dse'] + - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse'] python: - 2.7 @@ -184,7 +184,7 @@ build: pip install --upgrade pip pip install -U setuptools - pip install git+ssh://git@github.com/riptano/ccm-private.git + pip install $HOME/ccm if [ -n "$CCM_IS_DSE" ]; then pip install -r test-datastax-requirements.txt diff --git a/cassandra/cluster.py b/cassandra/cluster.py index a6709ba6a5..2263fbdb97 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -3418,7 +3418,7 @@ class ControlConnection(object): _SELECT_SCHEMA_PEERS_TEMPLATE = "SELECT peer, host_id, {nt_col_name}, schema_version FROM system.peers" _SELECT_SCHEMA_LOCAL = "SELECT schema_version FROM system.local WHERE key='local'" - _MINIMUM_NATIVE_ADDRESS_VERSION = "4.0" + _MINIMUM_NATIVE_ADDRESS_DSE_VERSION = Version("6.0.0") _is_shutdown = False _timeout = None @@ -3978,14 +3978,17 @@ def _peers_query_for_version(self, connection, peers_query_template): field named nt_col_name. """ host_release_version = self._cluster.metadata.get_host(connection.endpoint).release_version - if host_release_version: - use_native_address_query = host_release_version >= self._MINIMUM_NATIVE_ADDRESS_VERSION - if use_native_address_query: - select_peers_query = peers_query_template.format(nt_col_name="native_transport_address") - else: + host_dse_version = self._cluster.metadata.get_host(connection.endpoint).dse_version + uses_native_address_query = ( + host_dse_version and Version(host_dse_version) >= self._MINIMUM_NATIVE_ADDRESS_DSE_VERSION) + + if uses_native_address_query: + select_peers_query = peers_query_template.format(nt_col_name="native_transport_address") + elif host_release_version: select_peers_query = peers_query_template.format(nt_col_name="rpc_address") else: select_peers_query = self._SELECT_PEERS + return select_peers_query def _signal_error(self): diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index db68ae632d..a350bb40e3 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -178,7 +178,7 @@ def _get_dse_version_from_cass(cass_version): cassandra_version = Version(mcv_string) CASSANDRA_VERSION = Version(mcv_string) if mcv_string else cassandra_version - CCM_VERSION = mcv_string or cv_string + CCM_VERSION = mcv_string if mcv_string else cv_string CASSANDRA_IP = os.getenv('CLUSTER_IP', '127.0.0.1') CASSANDRA_DIR = os.getenv('CASSANDRA_DIR', None) @@ -472,7 +472,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, set_default_cass_ip() if ccm_options is None and DSE_VERSION: - ccm_options = {"version": DSE_VERSION} + ccm_options = {"version": CCM_VERSION} elif ccm_options is None: ccm_options = CCM_KWARGS.copy() From 143d45d8ade160a4873e92020030bea061684bde Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 14 Feb 2020 09:35:25 -0500 Subject: [PATCH 027/211] Fix test version compare --- tests/integration/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index a350bb40e3..d6f26acbcd 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -530,7 +530,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, if Version(dse_version) >= Version('5.0'): CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) - if dse_version >= Version('5.1'): + if Version(dse_version) >= Version('5.1'): # For Inet4Address CCM_CLUSTER.set_dse_configuration_options({ 'graph': { From 9c0b5be19dae9bfd218d59b82ba2d754a89231c2 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Sat, 15 Feb 2020 13:37:41 -0500 Subject: [PATCH 028/211] install pyyaml 5.2 to fix the ccm installation with py3.4 --- build.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.yaml b/build.yaml index 605683b085..83bed55a09 100644 --- a/build.yaml +++ b/build.yaml @@ -184,6 +184,8 @@ build: pip install --upgrade pip pip install -U setuptools + # Remove this pyyaml installation when removing Python 3.4 support + pip install PyYAML==5.2 pip install $HOME/ccm if [ -n "$CCM_IS_DSE" ]; then From 1979a481692748e661ea80c9997afc2bf0d428a7 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 18 Feb 2020 11:24:52 -0600 Subject: [PATCH 029/211] Test fixes: - fixing a version check in test_graph_query.py - revert guardrails prefx in test_failure_types.py - for tombstone test, delete cell in c* version < 4.0, else delete entire row --- tests/integration/advanced/graph/test_graph_query.py | 2 +- tests/integration/long/test_failure_types.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/integration/advanced/graph/test_graph_query.py b/tests/integration/advanced/graph/test_graph_query.py index ff76288008..1ccfc4a90c 100644 --- a/tests/integration/advanced/graph/test_graph_query.py +++ b/tests/integration/advanced/graph/test_graph_query.py @@ -162,7 +162,7 @@ def test_profile_graph_options(self): except NoHostAvailable: self.assertTrue(DSE_VERSION >= Version("6.0")) except InvalidRequest: - self.assertTrue(DSE_VERSION >= Version("5.1")) + self.assertTrue(DSE_VERSION >= Version("5.0")) else: if DSE_VERSION < Version("6.8"): # >6.8 returns true self.fail("Should have risen ServerError or InvalidRequest") diff --git a/tests/integration/long/test_failure_types.py b/tests/integration/long/test_failure_types.py index 486313e096..25854a57f7 100644 --- a/tests/integration/long/test_failure_types.py +++ b/tests/integration/long/test_failure_types.py @@ -16,8 +16,8 @@ import sys import traceback import time +from packaging.version import Version -from ccmlib.dse_cluster import DseCluster from mock import Mock from cassandra.policies import HostFilterPolicy, RoundRobinPolicy @@ -31,7 +31,7 @@ from tests.integration import ( use_singledc, PROTOCOL_VERSION, get_cluster, setup_keyspace, remove_cluster, get_node, start_cluster_wait_for_up, requiresmallclockgranularity, - local) + local, CASSANDRA_VERSION) try: @@ -55,7 +55,10 @@ def setup_module(): use_singledc(start=False) ccm_cluster = get_cluster() ccm_cluster.stop() - config_options = {'guardrails.tombstone_failure_threshold': 2000, 'guardrails.tombstone_warn_threshold': 1000} + config_options = { + 'tombstone_failure_threshold': 2000, + 'tombstone_warn_threshold': 1000, + } ccm_cluster.set_configuration_options(config_options) start_cluster_wait_for_up(ccm_cluster) setup_keyspace() @@ -255,7 +258,8 @@ def test_tombstone_overflow_read_failure(self): parameters = [(x,) for x in range(3000)] self.execute_concurrent_args_helper(self.session, statement, parameters) - statement = self.session.prepare("DELETE FROM test3rf.test2 WHERE k = 1 AND v0 =?") + column = 'v1' if CASSANDRA_VERSION < Version('4.0') else '' + statement = self.session.prepare("DELETE {} FROM test3rf.test2 WHERE k = 1 AND v0 =?".format(column)) parameters = [(x,) for x in range(2001)] self.execute_concurrent_args_helper(self.session, statement, parameters) From 2ccc9b6671be372d24b6208544301859acbfc2e1 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Mon, 24 Feb 2020 10:59:05 -0600 Subject: [PATCH 030/211] Remove platform and runtime survey from README --- README.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.rst b/README.rst index 0b6c1e206d..6fabdf7531 100644 --- a/README.rst +++ b/README.rst @@ -11,10 +11,6 @@ The driver supports Python 2.7, 3.4, 3.5, 3.6, 3.7 and 3.8. **Note:** DataStax products do not support big-endian systems. -Feedback Requested ------------------- -**Help us focus our efforts!** Provide your input on the `Platform and Runtime Survey `_ (we kept it short). - Features -------- * `Synchronous `_ and `Asynchronous `_ APIs From dff53fb26fa5834ecfeb361b0e202221f918bf76 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Mon, 24 Feb 2020 16:09:03 -0600 Subject: [PATCH 031/211] Update docs for 3.22 release --- cassandra/cluster.py | 2 ++ docs/api/cassandra/cluster.rst | 4 ++-- .../datastax/graph/fluent/predicates.rst | 3 +++ docs/api/cassandra/datastax/graph/index.rst | 24 +++++++++++++++++++ docs/api/cassandra/metadata.rst | 3 +++ docs/index.rst | 1 - 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 2263fbdb97..58051104a1 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -426,6 +426,8 @@ def __init__(self, load_balancing_policy=_NOT_SET, retry_policy=None, See :class:`.ExecutionProfile` for base attributes. Note that if not explicitly set, the row_factory and graph_options.graph_protocol are resolved during the query execution. + These options will resolve to graph_graphson3_row_factory and GraphProtocol.GRAPHSON_3_0 + for the core graph engine (DSE 6.8+), otherwise graph_object_row_factory and GraphProtocol.GRAPHSON_1_0 In addition to default parameters shown in the signature, this profile also defaults ``retry_policy`` to :class:`cassandra.policies.NeverRetryPolicy`. diff --git a/docs/api/cassandra/cluster.rst b/docs/api/cassandra/cluster.rst index 459f287b4c..2b3d7828a8 100644 --- a/docs/api/cassandra/cluster.rst +++ b/docs/api/cassandra/cluster.rst @@ -127,10 +127,10 @@ .. autoattribute:: consistency_level :annotation: = LOCAL_ONE -.. autoclass:: GraphExecutionProfile (load_balancing_policy=_NOT_SET, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=30.0, row_factory=, graph_options=None, continuous_paging_options=_NOT_SET) +.. autoclass:: GraphExecutionProfile (load_balancing_policy=_NOT_SET, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=30.0, row_factory=None, graph_options=None, continuous_paging_options=_NOT_SET) :members: -.. autoclass:: GraphAnalyticsExecutionProfile (load_balancing_policy=None, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=3600. * 24. * 7., row_factory=, graph_options=None) +.. autoclass:: GraphAnalyticsExecutionProfile (load_balancing_policy=None, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=3600. * 24. * 7., row_factory=None, graph_options=None) :members: .. autodata:: EXEC_PROFILE_DEFAULT diff --git a/docs/api/cassandra/datastax/graph/fluent/predicates.rst b/docs/api/cassandra/datastax/graph/fluent/predicates.rst index 09f9e4c8fa..f6e86f6451 100644 --- a/docs/api/cassandra/datastax/graph/fluent/predicates.rst +++ b/docs/api/cassandra/datastax/graph/fluent/predicates.rst @@ -7,5 +7,8 @@ .. autoclass:: Search :members: +.. autoclass:: CqlCollection + :members: + .. autoclass:: Geo :members: diff --git a/docs/api/cassandra/datastax/graph/index.rst b/docs/api/cassandra/datastax/graph/index.rst index 285ca5ffc2..18a0e7c511 100644 --- a/docs/api/cassandra/datastax/graph/index.rst +++ b/docs/api/cassandra/datastax/graph/index.rst @@ -11,6 +11,30 @@ .. autofunction:: graph_object_row_factory +.. autofunction:: graph_graphson2_row_factory + +.. autofunction:: graph_graphson3_row_factory + +.. function:: to_int(value) + + Wraps a value to be explicitly serialized as a graphson Int. + +.. function:: to_bigint(value) + + Wraps a value to be explicitly serialized as a graphson Bigint. + +.. function:: to_smallint(value) + + Wraps a value to be explicitly serialized as a graphson Smallint. + +.. function:: to_float(value) + + Wraps a value to be explicitly serialized as a graphson Float. + +.. function:: to_double(value) + + Wraps a value to be explicitly serialized as a graphson Double. + .. autoclass:: GraphProtocol :members: diff --git a/docs/api/cassandra/metadata.rst b/docs/api/cassandra/metadata.rst index b5e6dae904..ca33e34739 100644 --- a/docs/api/cassandra/metadata.rst +++ b/docs/api/cassandra/metadata.rst @@ -37,6 +37,9 @@ Schemas .. autoclass:: TableMetadataV3 () :members: +.. autoclass:: TableMetadataDSE68 () + :members: + .. autoclass:: ColumnMetadata () :members: diff --git a/docs/index.rst b/docs/index.rst index fd5b4e683e..f79cc36ac8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -89,7 +89,6 @@ Contents graph classic_graph graph_fluent - dse_auth dates_and_times cloud faq From e347d8f36720f41a568d60de3f906fb3080fd30d Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 25 Feb 2020 09:54:56 -0600 Subject: [PATCH 032/211] release 3.22: bump version and release date --- CHANGELOG.rst | 2 +- cassandra/__init__.py | 2 +- docs/installation.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dc11a9fddd..fe8c2c13e5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,6 @@ 3.22.0 ====== -UNRELEASED +February 26, 2020 Features -------- diff --git a/cassandra/__init__.py b/cassandra/__init__.py index b86f1a8c90..c4479464c3 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 21, 0, 'post0+20200128') +__version_info__ = (3, 22, 0) __version__ = '.'.join(map(str, __version_info__)) diff --git a/docs/installation.rst b/docs/installation.rst index d33ce441c9..b381425302 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -26,7 +26,7 @@ To check if the installation was successful, you can run:: python -c 'import cassandra; print cassandra.__version__' -It should print something like "3.21.0". +It should print something like "3.22.0". .. _installation-datastax-graph: From c79cf6014c6f5a54c8c99df8b771199faa105137 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 25 Feb 2020 10:02:24 -0600 Subject: [PATCH 033/211] release 3.22: bump docs.yaml --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index e2e1231834..e19ccd7d9e 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.22' + ref: e347d8f3 - name: '3.21' ref: 5589d96b - name: '3.20' From 0205565752b4bc88a2617cb04ae3e3696d275f77 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 25 Feb 2020 20:48:01 -0500 Subject: [PATCH 034/211] Remove temporarily the core engine graph documentation --- docs/.nav | 1 - docs/classic_graph.rst | 299 ------------------------ docs/core_graph.rst | 434 +++++++++++++++++++++++++++++++++++ docs/graph.rst | 503 +++++++++++++++-------------------------- docs/graph_fluent.rst | 38 ---- docs/index.rst | 5 +- 6 files changed, 619 insertions(+), 661 deletions(-) delete mode 100644 docs/classic_graph.rst create mode 100644 docs/core_graph.rst diff --git a/docs/.nav b/docs/.nav index d5b54c4e13..568cd6a383 100644 --- a/docs/.nav +++ b/docs/.nav @@ -5,7 +5,6 @@ lwt object_mapper geo_types graph -classic_graph performance query_paging security diff --git a/docs/classic_graph.rst b/docs/classic_graph.rst deleted file mode 100644 index ef68c86359..0000000000 --- a/docs/classic_graph.rst +++ /dev/null @@ -1,299 +0,0 @@ -DataStax Classic Graph Queries -============================== - -Getting Started -~~~~~~~~~~~~~~~ - -First, we need to create a graph in the system. To access the system API, we -use the system execution profile :: - - from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT - - cluster = Cluster() - session = cluster.connect() - - graph_name = 'movies' - session.execute_graph("system.graph(name).ifNotExists().engine(Classic).create()", {'name': graph_name}, - execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) - - -To execute requests on our newly created graph, we need to setup an execution -profile. Additionally, we also need to set the schema_mode to `development` -for the schema creation:: - - - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.graph import GraphOptions - - graph_name = 'movies' - ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) - - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - session = cluster.connect() - - session.execute_graph("schema.config().option('graph.schema_mode').set('development')") - - -We are ready to configure our graph schema. We will create a simple one for movies:: - - # properties are used to define a vertex - properties = """ - schema.propertyKey("genreId").Text().create(); - schema.propertyKey("personId").Text().create(); - schema.propertyKey("movieId").Text().create(); - schema.propertyKey("name").Text().create(); - schema.propertyKey("title").Text().create(); - schema.propertyKey("year").Int().create(); - schema.propertyKey("country").Text().create(); - """ - - session.execute_graph(properties) # we can execute multiple statements in a single request - - # A Vertex represents a "thing" in the world. - vertices = """ - schema.vertexLabel("genre").properties("genreId","name").create(); - schema.vertexLabel("person").properties("personId","name").create(); - schema.vertexLabel("movie").properties("movieId","title","year","country").create(); - """ - - session.execute_graph(vertices) - - # An edge represents a relationship between two vertices - edges = """ - schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); - schema.edgeLabel("actor").connection("movie","person").create(); - """ - - session.execute_graph(edges) - - # Indexes to execute graph requests efficiently - indexes = """ - schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); - schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); - schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); - schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); - schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); - schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); - schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); - """ - -Next, we'll add some data:: - - session.execute_graph(""" - g.addV('genre').property('genreId', 1).property('name', 'Action').next(); - g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); - g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); - g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); - """) - - session.execute_graph(""" - g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); - g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); - g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); - """) - - session.execute_graph(""" - g.addV('movie').property('movieId', 1).property('title', 'The Happening'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). - property('year', 2003).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). - property('year', 1998).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). - property('year', 1995).property('country', 'United States').next(); - """) - -Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: - - session.execute_graph(""" - genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); - genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); - genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); - - leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); - mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); - iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); - - the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); - the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); - rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); - man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); - dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); - - the_happening.addEdge('belongsTo', genre_horror); - the_italian_job.addEdge('belongsTo', genre_action); - rev_road.addEdge('belongsTo', genre_drama); - man_mask.addEdge('belongsTo', genre_drama); - man_mask.addEdge('belongsTo', genre_action); - dead_man.addEdge('belongsTo', genre_drama); - - the_happening.addEdge('actor', mark); - the_italian_job.addEdge('actor', mark); - rev_road.addEdge('actor', leo); - man_mask.addEdge('actor', leo); - dead_man.addEdge('actor', iggy); - """) - -We are all set. You can now query your graph. Here are some examples:: - - # Find all movies of the genre Drama - for r in session.execute_graph(""" - g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): - print(r) - - # Find all movies of the same genre than the movie 'Dead Man' - for r in session.execute_graph(""" - g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): - print(r) - - # Find all movies of Mark Wahlberg - for r in session.execute_graph(""" - g.V().has('person', 'name', 'Mark Wahlberg').in('actor').valueMap();"""): - print(r) - -To see a more graph examples, see `DataStax Graph Examples `_. - -Graph Types -~~~~~~~~~~~ - -Here are the supported graph types with their python representations: - -========== ================ -DSE Graph Python -========== ================ -boolean bool -bigint long, int (PY3) -int int -smallint int -varint int -float float -double double -uuid uuid.UUID -Decimal Decimal -inet str -timestamp datetime.datetime -date datetime.date -time datetime.time -duration datetime.timedelta -point Point -linestring LineString -polygon Polygon -blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) -========== ================ - -Graph Row Factory -~~~~~~~~~~~~~~~~~ - -By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.graph.graph_object_row_factory`), known graph result -types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these -types, a :class:`.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. -The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), -or lists by index:: - - # dicts with `__getattr__` or `__getitem__` - result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined - result # dse.graph.Result({u'key_str': u'value', u'key_int': 3}) - result.value # {u'key_int': 3, u'key_str': u'value'} (dict) - result.key_str # u'value' - result.key_int # 3 - result['key_str'] # u'value' - result['key_int'] # 3 - - # lists with `__getitem__` - result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] - result # dse.graph.Result([0, 1, 2]) - result.value # [0, 1, 2] (list) - result[1] # 1 (list[1]) - -You can use a different row factory by setting :attr:`.Session.default_graph_row_factory` or passing it to -:meth:`.Session.execute_graph`. For example, :func:`.graph.single_object_row_factory` returns the JSON result string`, -unparsed. :func:`.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, -unlike :func:`.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results -also provide convenience methods for converting to known types (:meth:`~.Result.as_vertex`, :meth:`~.Result.as_edge`, :meth:`~.Result.as_path`). - -Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to -deserialize properties, use the :class:`.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. -deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: - - # ... - from cassandra.graph import GraphSON1Deserializer - - row = session.execute_graph("g.V().toList()")[0] - value = row.properties['my_property_key'][0].value # accessing the VertexProperty value - value = GraphSON1Deserializer.deserialize_timestamp(value) - - print(value) # 2017-06-26 08:27:05 - print(type(value)) # - - -Named Parameters -~~~~~~~~~~~~~~~~ - -Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`:: - - result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) - [r.value for r in result_set] # [1, 2] - -All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized -automatically to their graph representation: - -Example:: - - session.execute_graph(""" - g.addV('person'). - property('name', text_value). - property('age', integer_value). - property('birthday', timestamp_value). - property('house_yard', polygon_value).toList() - """, { - 'text_value': 'Mike Smith', - 'integer_value': 34, - 'timestamp_value': datetime.datetime(1967, 12, 30), - 'polygon_value': Polygon(((30, 10), (40, 40), (20, 40), (10, 20), (30, 10))) - }) - - -As with all Execution Profile parameters, graph options can be set in the cluster default (as shown in the first example) -or specified per execution:: - - ep = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, - graph_options=GraphOptions(graph_name='something-else')) - session.execute_graph(statement, execution_profile=ep) - -Using GraphSON2 Protocol -~~~~~~~~~~~~~~~~~~~~~~~~ - -The default graph protocol used is GraphSON1. However GraphSON1 may -cause problems of type conversion happening during the serialization -of the query to the DSE Graph server, or the deserialization of the -responses back from a string Gremlin query. GraphSON2 offers better -support for the complex data types handled by DSE Graph. - -DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol -for graph queries. Enabling GraphSON2 can be done by `changing the -graph protocol of the execution profile` and `setting the graphson2 row factory`:: - - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory - - # Create a GraphSON2 execution profile - ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', - graph_protocol=GraphProtocol.GRAPHSON_2_0), - row_factory=graph_graphson2_row_factory) - - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - session = cluster.connect() - session.execute_graph(...) - -Using GraphSON2, all properties will be automatically deserialized to -its Python representation. Note that it may bring significant -behavioral change at runtime. - -It is generally recommended to switch to GraphSON2 as it brings more -consistent support for complex data types in the Graph driver and will -be activated by default in the next major version (Python dse-driver -driver 3.0). diff --git a/docs/core_graph.rst b/docs/core_graph.rst new file mode 100644 index 0000000000..47dc53d38d --- /dev/null +++ b/docs/core_graph.rst @@ -0,0 +1,434 @@ +DataStax Graph Queries +====================== + +The driver executes graph queries over the Cassandra native protocol. Use +:meth:`.Session.execute_graph` or :meth:`.Session.execute_graph_async` for +executing gremlin queries in DataStax Graph. + +The driver defines three Execution Profiles suitable for graph execution: + +* :data:`~.cluster.EXEC_PROFILE_GRAPH_DEFAULT` +* :data:`~.cluster.EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT` +* :data:`~.cluster.EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT` + +See :doc:`getting_started` and :doc:`execution_profiles` +for more detail on working with profiles. + +In DSE 6.8.0, the Core graph engine has been introduced and is now the default. It +provides a better unified multi-model, performance and scale. This guide +is for graphs that use the core engine. If you work with previous versions of +DSE or existing graphs, see :doc:`classic_graph`. + +Getting Started with Graph and the Core Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, we need to create a graph in the system. To access the system API, we +use the system execution profile :: + + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT + + cluster = Cluster() + session = cluster.connect() + + graph_name = 'movies' + session.execute_graph("system.graph(name).create()", {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + + +Graphs that use the core engine only support GraphSON3. Since they are Cassandra tables under +the hood, we can automatically configure the execution profile with the proper options +(row_factory and graph_protocol) when executing queries. You only need to make sure that +the `graph_name` is set and GraphSON3 will be automatically used:: + + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + + graph_name = 'movies' + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + session = cluster.connect() + session.execute_graph("g.addV(...)") + + +Note that this graph engine detection is based on the metadata. You might experience +some query errors if the graph has been newly created and is not yet in the metadata. This +would result to a badly configured execution profile. If you really want to avoid that, +configure your execution profile explicitly:: + + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson3_row_factory + + graph_name = 'movies' + ep_graphson3 = GraphExecutionProfile( + row_factory=graph_graphson3_row_factory, + graph_options=GraphOptions( + graph_protocol=GraphProtocol.GRAPHSON_3_0, + graph_name=graph_name)) + + cluster = Cluster(execution_profiles={'core': ep_graphson3}) + session = cluster.connect() + session.execute_graph("g.addV(...)", execution_profile='core') + + +We are ready to configure our graph schema. We will create a simple one for movies:: + + # A Vertex represents a "thing" in the world. + # Create the genre vertex + query = """ + schema.vertexLabel('genre') + .partitionBy('genreId', Int) + .property('name', Text) + .create() + """ + session.execute_graph(query) + + # Create the person vertex + query = """ + schema.vertexLabel('person') + .partitionBy('personId', Int) + .property('name', Text) + .create() + """ + session.execute_graph(query) + + # Create the movie vertex + query = """ + schema.vertexLabel('movie') + .partitionBy('movieId', Int) + .property('title', Text) + .property('year', Int) + .property('country', Text) + .create() + """ + session.execute_graph(query) + + # An edge represents a relationship between two vertices + # Create our edges + queries = """ + schema.edgeLabel('belongsTo').from('movie').to('genre').create(); + schema.edgeLabel('actor').from('movie').to('person').create(); + """ + session.execute_graph(queries) + + # Indexes to execute graph requests efficiently + + # If you have a node with the search workload enabled (solr), use the following: + indexes = """ + schema.vertexLabel('genre').searchIndex() + .by("name") + .create(); + + schema.vertexLabel('person').searchIndex() + .by("name") + .create(); + + schema.vertexLabel('movie').searchIndex() + .by('title') + .by("year") + .create(); + """ + session.execute_graph(indexes) + + # Otherwise, use secondary indexes: + indexes = """ + schema.vertexLabel('genre') + .secondaryIndex('by_genre') + .by('name') + .create() + + schema.vertexLabel('person') + .secondaryIndex('by_name') + .by('name') + .create() + + schema.vertexLabel('movie') + .secondaryIndex('by_title') + .by('title') + .create() + """ + session.execute_graph(indexes) + +Add some edge indexes (materialized views):: + + indexes = """ + schema.edgeLabel('belongsTo') + .from('movie') + .to('genre') + .materializedView('movie__belongsTo__genre_by_in_genreId') + .ifNotExists() + .partitionBy(IN, 'genreId') + .clusterBy(OUT, 'movieId', Asc) + .create() + + schema.edgeLabel('actor') + .from('movie') + .to('person') + .materializedView('movie__actor__person_by_in_personId') + .ifNotExists() + .partitionBy(IN, 'personId') + .clusterBy(OUT, 'movieId', Asc) + .create() + """ + session.execute_graph(indexes) + +Next, we'll add some data:: + + session.execute_graph(""" + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); + """) + + session.execute_graph(""" + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); + """) + + session.execute_graph(""" + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). + property('year', 2003).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). + property('year', 1998).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). + property('year', 1995).property('country', 'United States').next(); + """) + +Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: + + session.execute_graph(""" + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').id().next(); + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').id().next(); + genre_action = g.V().hasLabel('genre').has('name', 'Action').id().next(); + + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').id().next(); + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').id().next(); + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').id().next(); + + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').id().next(); + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').id().next(); + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').id().next(); + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').id().next(); + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').id().next(); + + g.addE('belongsTo').from(__.V(the_happening)).to(__.V(genre_horror)).next(); + g.addE('belongsTo').from(__.V(the_italian_job)).to(__.V(genre_action)).next(); + g.addE('belongsTo').from(__.V(rev_road)).to(__.V(genre_drama)).next(); + g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_drama)).next(); + g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_action)).next(); + g.addE('belongsTo').from(__.V(dead_man)).to(__.V(genre_drama)).next(); + + g.addE('actor').from(__.V(the_happening)).to(__.V(mark)).next(); + g.addE('actor').from(__.V(the_italian_job)).to(__.V(mark)).next(); + g.addE('actor').from(__.V(rev_road)).to(__.V(leo)).next(); + g.addE('actor').from(__.V(man_mask)).to(__.V(leo)).next(); + g.addE('actor').from(__.V(dead_man)).to(__.V(iggy)).next(); + """) + +We are all set. You can now query your graph. Here are some examples:: + + # Find all movies of the genre Drama + for r in session.execute_graph(""" + g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): + print(r) + + # Find all movies of the same genre than the movie 'Dead Man' + for r in session.execute_graph(""" + g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): + print(r) + + # Find all movies of Mark Wahlberg + for r in session.execute_graph(""" + g.V().has('person', 'name', 'Mark Wahlberg').in('actor').valueMap();"""): + print(r) + +To see a more graph examples, see `DataStax Graph Examples `_. + +Graph Types for the Core Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here are the supported graph types with their python representations: + +============ ================= +DSE Graph Python Driver +============ ================= +text str +boolean bool +bigint long +int int +smallint int +varint long +double float +float float +uuid UUID +bigdecimal Decimal +duration Duration (cassandra.util) +inet str or IPV4Address/IPV6Address (if available) +timestamp datetime.datetime +date datetime.date +time datetime.time +polygon Polygon +point Point +linestring LineString +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +list list +map dict +set set or list + (Can return a list due to numerical values returned by Java) +tuple tuple +udt class or namedtuple +============ ================= + +Named Parameters +~~~~~~~~~~~~~~~~ + +Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`:: + + result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + [r.value for r in result_set] # [1, 2] + +All python types listed in `Graph Types for the Core Engine`_ can be passed as named parameters and will be serialized +automatically to their graph representation: + +Example:: + + session.execute_graph(""" + g.addV('person'). + property('name', text_value). + property('age', integer_value). + property('birthday', timestamp_value). + property('house_yard', polygon_value).next() + """, { + 'text_value': 'Mike Smith', + 'integer_value': 34, + 'timestamp_value': datetime.datetime(1967, 12, 30), + 'polygon_value': Polygon(((30, 10), (40, 40), (20, 40), (10, 20), (30, 10))) + }) + + +As with all Execution Profile parameters, graph options can be set in the cluster default (as shown in the first example) +or specified per execution:: + + ep = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, + graph_options=GraphOptions(graph_name='something-else')) + session.execute_graph(statement, execution_profile=ep) + +CQL collections, Tuple and UDT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a very interesting feature of the core engine: we can use all CQL data types, including +list, map, set, tuple and udt. Here is an example using all these types:: + + query = """ + schema.type('address') + .property('address', Text) + .property('city', Text) + .property('state', Text) + .create(); + """ + session.execute_graph(query) + + # It works the same way than normal CQL UDT, so we + # can create an udt class and register it + class Address(object): + def __init__(self, address, city, state): + self.address = address + self.city = city + self.state = state + + session.cluster.register_user_type(graph_name, 'address', Address) + + query = """ + schema.vertexLabel('person') + .partitionBy('personId', Int) + .property('address', typeOf('address')) + .property('friends', listOf(Text)) + .property('skills', setOf(Text)) + .property('scores', mapOf(Text, Int)) + .property('last_workout', tupleOf(Text, Date)) + .create() + """ + session.execute_graph(query) + + # insertion example + query = """ + g.addV('person') + .property('personId', pid) + .property('address', address) + .property('friends', friends) + .property('skills', skills) + .property('scores', scores) + .property('last_workout', last_workout) + .next() + """ + + session.execute_graph(query, { + 'pid': 3, + 'address': Address('42 Smith St', 'Quebec', 'QC'), + 'friends': ['Al', 'Mike', 'Cathy'], + 'skills': {'food', 'fight', 'chess'}, + 'scores': {'math': 98, 'french': 3}, + 'last_workout': ('CrossFit', datetime.date(2018, 11, 20)) + }) + +Limitations +----------- + +Since Python is not a strongly-typed language and the UDT/Tuple graphson representation is, you might +get schema errors when trying to write numerical data. Example:: + + session.execute_graph(""" + schema.vertexLabel('test_tuple').partitionBy('id', Int).property('t', tupleOf(Text, Bigint)).create() + """) + + session.execute_graph(""" + g.addV('test_tuple').property('id', 0).property('t', t) + """, + {'t': ('Test', 99))} + ) + + # error: [Invalid query] message="Value component 1 is of type int, not bigint" + +This is because the server requires the client to include a GraphSON schema definition +with every UDT or tuple query. In the general case, the driver can't determine what Graph type +is meant by, e.g., an int value, and so it can't serialize the value with the correct type in the schema. +The driver provides some numerical type-wrapper factories that you can use to specify types: + +* :func:`~.to_int` +* :func:`~.to_bigint` +* :func:`~.to_smallint` +* :func:`~.to_float` +* :func:`~.to_double` + +Here's the working example of the case above:: + + from cassandra.graph import to_bigint + + session.execute_graph(""" + g.addV('test_tuple').property('id', 0).property('t', t) + """, + {'t': ('Test', to_bigint(99))} + ) + +Continuous Paging +~~~~~~~~~~~~~~~~~ + +This is another nice feature that comes with the core engine: continuous paging with +graph queries. If all nodes of the cluster are >= DSE 6.8.0, it is automatically +enabled under the hood to get the best performance. If you want to explicitly +enable/disable it, you can do it through the execution profile:: + + # Disable it + ep = GraphExecutionProfile(..., continuous_paging_options=None)) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + + # Enable with a custom max_pages option + ep = GraphExecutionProfile(..., + continuous_paging_options=ContinuousPagingOptions(max_pages=10))) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) diff --git a/docs/graph.rst b/docs/graph.rst index 47dc53d38d..49ec51e73b 100644 --- a/docs/graph.rst +++ b/docs/graph.rst @@ -1,26 +1,8 @@ DataStax Graph Queries ====================== -The driver executes graph queries over the Cassandra native protocol. Use -:meth:`.Session.execute_graph` or :meth:`.Session.execute_graph_async` for -executing gremlin queries in DataStax Graph. - -The driver defines three Execution Profiles suitable for graph execution: - -* :data:`~.cluster.EXEC_PROFILE_GRAPH_DEFAULT` -* :data:`~.cluster.EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT` -* :data:`~.cluster.EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT` - -See :doc:`getting_started` and :doc:`execution_profiles` -for more detail on working with profiles. - -In DSE 6.8.0, the Core graph engine has been introduced and is now the default. It -provides a better unified multi-model, performance and scale. This guide -is for graphs that use the core engine. If you work with previous versions of -DSE or existing graphs, see :doc:`classic_graph`. - -Getting Started with Graph and the Core Engine -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Getting Started +~~~~~~~~~~~~~~~ First, we need to create a graph in the system. To access the system API, we use the system execution profile :: @@ -31,204 +13,129 @@ use the system execution profile :: session = cluster.connect() graph_name = 'movies' - session.execute_graph("system.graph(name).create()", {'name': graph_name}, + session.execute_graph("system.graph(name).ifNotExists().create()", {'name': graph_name}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) -Graphs that use the core engine only support GraphSON3. Since they are Cassandra tables under -the hood, we can automatically configure the execution profile with the proper options -(row_factory and graph_protocol) when executing queries. You only need to make sure that -the `graph_name` is set and GraphSON3 will be automatically used:: - - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - - graph_name = 'movies' - ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - session = cluster.connect() - session.execute_graph("g.addV(...)") - +To execute requests on our newly created graph, we need to setup an execution +profile. Additionally, we also need to set the schema_mode to `development` +for the schema creation:: -Note that this graph engine detection is based on the metadata. You might experience -some query errors if the graph has been newly created and is not yet in the metadata. This -would result to a badly configured execution profile. If you really want to avoid that, -configure your execution profile explicitly:: from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson3_row_factory + from cassandra.graph import GraphOptions graph_name = 'movies' - ep_graphson3 = GraphExecutionProfile( - row_factory=graph_graphson3_row_factory, - graph_options=GraphOptions( - graph_protocol=GraphProtocol.GRAPHSON_3_0, - graph_name=graph_name)) + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) - cluster = Cluster(execution_profiles={'core': ep_graphson3}) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) session = cluster.connect() - session.execute_graph("g.addV(...)", execution_profile='core') + + session.execute_graph("schema.config().option('graph.schema_mode').set('development')") We are ready to configure our graph schema. We will create a simple one for movies:: - # A Vertex represents a "thing" in the world. - # Create the genre vertex - query = """ - schema.vertexLabel('genre') - .partitionBy('genreId', Int) - .property('name', Text) - .create() + # properties are used to define a vertex + properties = """ + schema.propertyKey("genreId").Text().create(); + schema.propertyKey("personId").Text().create(); + schema.propertyKey("movieId").Text().create(); + schema.propertyKey("name").Text().create(); + schema.propertyKey("title").Text().create(); + schema.propertyKey("year").Int().create(); + schema.propertyKey("country").Text().create(); """ - session.execute_graph(query) - - # Create the person vertex - query = """ - schema.vertexLabel('person') - .partitionBy('personId', Int) - .property('name', Text) - .create() - """ - session.execute_graph(query) - - # Create the movie vertex - query = """ - schema.vertexLabel('movie') - .partitionBy('movieId', Int) - .property('title', Text) - .property('year', Int) - .property('country', Text) - .create() - """ - session.execute_graph(query) - - # An edge represents a relationship between two vertices - # Create our edges - queries = """ - schema.edgeLabel('belongsTo').from('movie').to('genre').create(); - schema.edgeLabel('actor').from('movie').to('person').create(); - """ - session.execute_graph(queries) - # Indexes to execute graph requests efficiently + session.execute_graph(properties) # we can execute multiple statements in a single request - # If you have a node with the search workload enabled (solr), use the following: - indexes = """ - schema.vertexLabel('genre').searchIndex() - .by("name") - .create(); - - schema.vertexLabel('person').searchIndex() - .by("name") - .create(); - - schema.vertexLabel('movie').searchIndex() - .by('title') - .by("year") - .create(); + # A Vertex represents a "thing" in the world. + vertices = """ + schema.vertexLabel("genre").properties("genreId","name").create(); + schema.vertexLabel("person").properties("personId","name").create(); + schema.vertexLabel("movie").properties("movieId","title","year","country").create(); """ - session.execute_graph(indexes) - # Otherwise, use secondary indexes: - indexes = """ - schema.vertexLabel('genre') - .secondaryIndex('by_genre') - .by('name') - .create() - - schema.vertexLabel('person') - .secondaryIndex('by_name') - .by('name') - .create() - - schema.vertexLabel('movie') - .secondaryIndex('by_title') - .by('title') - .create() + session.execute_graph(vertices) + + # An edge represents a relationship between two vertices + edges = """ + schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); + schema.edgeLabel("actor").connection("movie","person").create(); """ - session.execute_graph(indexes) -Add some edge indexes (materialized views):: + session.execute_graph(edges) + # Indexes to execute graph requests efficiently indexes = """ - schema.edgeLabel('belongsTo') - .from('movie') - .to('genre') - .materializedView('movie__belongsTo__genre_by_in_genreId') - .ifNotExists() - .partitionBy(IN, 'genreId') - .clusterBy(OUT, 'movieId', Asc) - .create() - - schema.edgeLabel('actor') - .from('movie') - .to('person') - .materializedView('movie__actor__person_by_in_personId') - .ifNotExists() - .partitionBy(IN, 'personId') - .clusterBy(OUT, 'movieId', Asc) - .create() + schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); + schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); + schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); + schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); + schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); + schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); + schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); """ - session.execute_graph(indexes) Next, we'll add some data:: session.execute_graph(""" - g.addV('genre').property('genreId', 1).property('name', 'Action').next(); - g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); - g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); - g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); """) session.execute_graph(""" - g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); - g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); - g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); """) session.execute_graph(""" - g.addV('movie').property('movieId', 1).property('title', 'The Happening'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). - property('year', 2003).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). - property('year', 1998).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). - property('year', 1995).property('country', 'United States').next(); + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). + property('year', 2003).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). + property('year', 1998).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). + property('year', 1995).property('country', 'United States').next(); """) Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: session.execute_graph(""" - genre_horror = g.V().hasLabel('genre').has('name', 'Horror').id().next(); - genre_drama = g.V().hasLabel('genre').has('name', 'Drama').id().next(); - genre_action = g.V().hasLabel('genre').has('name', 'Action').id().next(); - - leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').id().next(); - mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').id().next(); - iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').id().next(); - - the_happening = g.V().hasLabel('movie').has('title', 'The Happening').id().next(); - the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').id().next(); - rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').id().next(); - man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').id().next(); - dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').id().next(); - - g.addE('belongsTo').from(__.V(the_happening)).to(__.V(genre_horror)).next(); - g.addE('belongsTo').from(__.V(the_italian_job)).to(__.V(genre_action)).next(); - g.addE('belongsTo').from(__.V(rev_road)).to(__.V(genre_drama)).next(); - g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_drama)).next(); - g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_action)).next(); - g.addE('belongsTo').from(__.V(dead_man)).to(__.V(genre_drama)).next(); - - g.addE('actor').from(__.V(the_happening)).to(__.V(mark)).next(); - g.addE('actor').from(__.V(the_italian_job)).to(__.V(mark)).next(); - g.addE('actor').from(__.V(rev_road)).to(__.V(leo)).next(); - g.addE('actor').from(__.V(man_mask)).to(__.V(leo)).next(); - g.addE('actor').from(__.V(dead_man)).to(__.V(iggy)).next(); + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); + genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); + + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); + + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); + + the_happening.addEdge('belongsTo', genre_horror); + the_italian_job.addEdge('belongsTo', genre_action); + rev_road.addEdge('belongsTo', genre_drama); + man_mask.addEdge('belongsTo', genre_drama); + man_mask.addEdge('belongsTo', genre_action); + dead_man.addEdge('belongsTo', genre_drama); + + the_happening.addEdge('actor', mark); + the_italian_job.addEdge('actor', mark); + rev_road.addEdge('actor', leo); + man_mask.addEdge('actor', leo); + dead_man.addEdge('actor', iggy); """) We are all set. You can now query your graph. Here are some examples:: @@ -237,7 +144,7 @@ We are all set. You can now query your graph. Here are some examples:: for r in session.execute_graph(""" g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): print(r) - + # Find all movies of the same genre than the movie 'Dead Man' for r in session.execute_graph(""" g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): @@ -250,40 +157,78 @@ We are all set. You can now query your graph. Here are some examples:: To see a more graph examples, see `DataStax Graph Examples `_. -Graph Types for the Core Engine -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Graph Types +~~~~~~~~~~~ Here are the supported graph types with their python representations: -============ ================= -DSE Graph Python Driver -============ ================= -text str -boolean bool -bigint long -int int -smallint int -varint long -double float -float float -uuid UUID -bigdecimal Decimal -duration Duration (cassandra.util) -inet str or IPV4Address/IPV6Address (if available) -timestamp datetime.datetime -date datetime.date -time datetime.time -polygon Polygon -point Point -linestring LineString -blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) -list list -map dict -set set or list - (Can return a list due to numerical values returned by Java) -tuple tuple -udt class or namedtuple -============ ================= +========== ================ +DSE Graph Python +========== ================ +boolean bool +bigint long, int (PY3) +int int +smallint int +varint int +float float +double double +uuid uuid.UUID +Decimal Decimal +inet str +timestamp datetime.datetime +date datetime.date +time datetime.time +duration datetime.timedelta +point Point +linestring LineString +polygon Polygon +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +========== ================ + +Graph Row Factory +~~~~~~~~~~~~~~~~~ + +By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.graph.graph_object_row_factory`), known graph result +types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these +types, a :class:`.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. +The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), +or lists by index:: + + # dicts with `__getattr__` or `__getitem__` + result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined + result # dse.graph.Result({u'key_str': u'value', u'key_int': 3}) + result.value # {u'key_int': 3, u'key_str': u'value'} (dict) + result.key_str # u'value' + result.key_int # 3 + result['key_str'] # u'value' + result['key_int'] # 3 + + # lists with `__getitem__` + result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] + result # dse.graph.Result([0, 1, 2]) + result.value # [0, 1, 2] (list) + result[1] # 1 (list[1]) + +You can use a different row factory by setting :attr:`.Session.default_graph_row_factory` or passing it to +:meth:`.Session.execute_graph`. For example, :func:`.graph.single_object_row_factory` returns the JSON result string`, +unparsed. :func:`.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, +unlike :func:`.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results +also provide convenience methods for converting to known types (:meth:`~.Result.as_vertex`, :meth:`~.Result.as_edge`, :meth:`~.Result.as_path`). + +Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to +deserialize properties, use the :class:`.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. +deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: + + # ... + from cassandra.graph import GraphSON1Deserializer + + row = session.execute_graph("g.V().toList()")[0] + value = row.properties['my_property_key'][0].value # accessing the VertexProperty value + value = GraphSON1Deserializer.deserialize_timestamp(value) + + print(value) # 2017-06-26 08:27:05 + print(type(value)) # + Named Parameters ~~~~~~~~~~~~~~~~ @@ -293,7 +238,7 @@ Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`: result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) [r.value for r in result_set] # [1, 2] -All python types listed in `Graph Types for the Core Engine`_ can be passed as named parameters and will be serialized +All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized automatically to their graph representation: Example:: @@ -303,7 +248,7 @@ Example:: property('name', text_value). property('age', integer_value). property('birthday', timestamp_value). - property('house_yard', polygon_value).next() + property('house_yard', polygon_value).toList() """, { 'text_value': 'Mike Smith', 'integer_value': 34, @@ -319,116 +264,36 @@ or specified per execution:: graph_options=GraphOptions(graph_name='something-else')) session.execute_graph(statement, execution_profile=ep) -CQL collections, Tuple and UDT -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Using GraphSON2 Protocol +~~~~~~~~~~~~~~~~~~~~~~~~ -This is a very interesting feature of the core engine: we can use all CQL data types, including -list, map, set, tuple and udt. Here is an example using all these types:: +The default graph protocol used is GraphSON1. However GraphSON1 may +cause problems of type conversion happening during the serialization +of the query to the DSE Graph server, or the deserialization of the +responses back from a string Gremlin query. GraphSON2 offers better +support for the complex data types handled by DSE Graph. - query = """ - schema.type('address') - .property('address', Text) - .property('city', Text) - .property('state', Text) - .create(); - """ - session.execute_graph(query) - - # It works the same way than normal CQL UDT, so we - # can create an udt class and register it - class Address(object): - def __init__(self, address, city, state): - self.address = address - self.city = city - self.state = state - - session.cluster.register_user_type(graph_name, 'address', Address) - - query = """ - schema.vertexLabel('person') - .partitionBy('personId', Int) - .property('address', typeOf('address')) - .property('friends', listOf(Text)) - .property('skills', setOf(Text)) - .property('scores', mapOf(Text, Int)) - .property('last_workout', tupleOf(Text, Date)) - .create() - """ - session.execute_graph(query) - - # insertion example - query = """ - g.addV('person') - .property('personId', pid) - .property('address', address) - .property('friends', friends) - .property('skills', skills) - .property('scores', scores) - .property('last_workout', last_workout) - .next() - """ - - session.execute_graph(query, { - 'pid': 3, - 'address': Address('42 Smith St', 'Quebec', 'QC'), - 'friends': ['Al', 'Mike', 'Cathy'], - 'skills': {'food', 'fight', 'chess'}, - 'scores': {'math': 98, 'french': 3}, - 'last_workout': ('CrossFit', datetime.date(2018, 11, 20)) - }) - -Limitations ------------ +DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol +for graph queries. Enabling GraphSON2 can be done by `changing the +graph protocol of the execution profile` and `setting the graphson2 row factory`:: -Since Python is not a strongly-typed language and the UDT/Tuple graphson representation is, you might -get schema errors when trying to write numerical data. Example:: - - session.execute_graph(""" - schema.vertexLabel('test_tuple').partitionBy('id', Int).property('t', tupleOf(Text, Bigint)).create() - """) - - session.execute_graph(""" - g.addV('test_tuple').property('id', 0).property('t', t) - """, - {'t': ('Test', 99))} - ) - - # error: [Invalid query] message="Value component 1 is of type int, not bigint" - -This is because the server requires the client to include a GraphSON schema definition -with every UDT or tuple query. In the general case, the driver can't determine what Graph type -is meant by, e.g., an int value, and so it can't serialize the value with the correct type in the schema. -The driver provides some numerical type-wrapper factories that you can use to specify types: - -* :func:`~.to_int` -* :func:`~.to_bigint` -* :func:`~.to_smallint` -* :func:`~.to_float` -* :func:`~.to_double` - -Here's the working example of the case above:: - - from cassandra.graph import to_bigint - - session.execute_graph(""" - g.addV('test_tuple').property('id', 0).property('t', t) - """, - {'t': ('Test', to_bigint(99))} - ) - -Continuous Paging -~~~~~~~~~~~~~~~~~ + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory -This is another nice feature that comes with the core engine: continuous paging with -graph queries. If all nodes of the cluster are >= DSE 6.8.0, it is automatically -enabled under the hood to get the best performance. If you want to explicitly -enable/disable it, you can do it through the execution profile:: + # Create a GraphSON2 execution profile + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', + graph_protocol=GraphProtocol.GRAPHSON_2_0), + row_factory=graph_graphson2_row_factory) - # Disable it - ep = GraphExecutionProfile(..., continuous_paging_options=None)) cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + session = cluster.connect() + session.execute_graph(...) - # Enable with a custom max_pages option - ep = GraphExecutionProfile(..., - continuous_paging_options=ContinuousPagingOptions(max_pages=10))) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) +Using GraphSON2, all properties will be automatically deserialized to +its Python representation. Note that it may bring significant +behavioral change at runtime. + +It is generally recommended to switch to GraphSON2 as it brings more +consistent support for complex data types in the Graph driver and will +be activated by default in the next major version (Python dse-driver +driver 3.0). diff --git a/docs/graph_fluent.rst b/docs/graph_fluent.rst index 9a039847c4..c79aa1ecf4 100644 --- a/docs/graph_fluent.rst +++ b/docs/graph_fluent.rst @@ -71,28 +71,6 @@ If you want to change execution property defaults, please see the :doc:`Executio for a more generalized discussion of the API. Graph traversal queries use the same execution profile defined for DSE graph. If you need to change the default properties, please refer to the :doc:`DSE Graph query documentation page ` -Configuring a Traversal Execution Profile for the Core graph engine -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To execute a traversal query with graphs that use the core engine, you need to configure -a graphson3 execution profile: - -.. code-block:: python - - from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.datastax.graph import GraphProtocol - from cassandra.datastax.graph.fluent import DseGraph - - ep_graphson3 = DseGraph.create_execution_profile( - 'my_core_graph_name', - graph_protocol=GraphProtocol.GRAPHSON_3_0 - ) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson3}) - - g = DseGraph.traversal_source(session) - print g.V().toList() - - Explicit Graph Traversal Execution with a DSE Session ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -121,22 +99,6 @@ Below is an example of explicit execution. For this example, assume the schema h for result in session.execute_graph(v_query): pprint(result.value) -Converting a traversal to a bytecode query for core graphs require some more work, because we -need the cluster context for UDT and tuple types: - -.. code-block:: python - - g = DseGraph.traversal_source(session=session) - context = { - 'cluster': cluster, - 'graph_name': 'the_graph_for_the_query' - } - addV_query = DseGraph.query_from_traversal( - g.addV('genre').property('genreId', 1).property('name', 'Action'), - graph_protocol=GraphProtocol.GRAPHSON_3_0, - context=context - ) - Implicit Graph Traversal Execution with TinkerPop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/index.rst b/docs/index.rst index f79cc36ac8..3bd6312f7a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -54,10 +54,7 @@ Contents Working with DSE geometry types :doc:`graph` - Graph queries with the Core engine - -:doc:`classic_graph` - Graph queries with the Classic engine + Graph queries :doc:`graph_fluent` DataStax Graph Fluent API From 72ee31bfbdf9176418c3d6e6268c7ffeee7ad003 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 25 Feb 2020 20:49:49 -0500 Subject: [PATCH 035/211] release 3.22: bump docs.yaml (no core graph docs) --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index e19ccd7d9e..4b34f6cb5f 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.22' - ref: e347d8f3 + ref: 02055657 - name: '3.21' ref: 5589d96b - name: '3.20' From c0bb0427e705e04c9ea65ac31a6c69cf6f932a94 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 26 Feb 2020 13:16:35 -0600 Subject: [PATCH 036/211] release 3.22: bump docs.yaml again --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 4b34f6cb5f..fe5bbf548e 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.22' - ref: 02055657 + ref: a1f8e102 - name: '3.21' ref: 5589d96b - name: '3.20' From 75affb70779a9d8983a070564a5ec382fe4541f1 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Thu, 12 Mar 2020 15:17:56 -0500 Subject: [PATCH 037/211] PYTHON-1228 Provide log wrapper to asyncore so it doesnt error on shutdown --- cassandra/io/asyncorereactor.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/cassandra/io/asyncorereactor.py b/cassandra/io/asyncorereactor.py index 1a6b9fd3e9..e07aab4697 100644 --- a/cassandra/io/asyncorereactor.py +++ b/cassandra/io/asyncorereactor.py @@ -35,7 +35,21 @@ from cassandra.connection import Connection, ConnectionShutdown, NONBLOCKING, Timer, TimerManager -log = logging.getLogger(__name__) + +# TODO: Remove when Python 2 is removed +class LogWrapper(object): + """ PYTHON-1228. If our logger has disappeared, there's nothing we can do, so just execute nothing """ + def __init__(self): + self._log = logging.getLogger(__name__) + + def __getattr__(self, name): + try: + return getattr(self._log, name) + except: + return lambda *args, **kwargs: None + + +log = LogWrapper() _dispatcher_map = {} From cbcb2259c4adcab60d2c5844080058dd505e5e64 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Fri, 13 Mar 2020 09:38:20 -0500 Subject: [PATCH 038/211] Changelog entry for PYTHON-1228 --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fe8c2c13e5..c28d9229f9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +3.23.0 +====== +Unreleased + +Bug Fixes +--------- +* Asyncore logging exception on shutdown (PYTHON-1228) + 3.22.0 ====== February 26, 2020 From 316937f105be9f9170b9bf74c20895d96f9880e7 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Mon, 16 Mar 2020 07:49:35 -0500 Subject: [PATCH 039/211] PYTHON-1226 Remove all user-facing references to Apollo or Constellation (#1070) --- CHANGELOG.rst | 2 +- README.rst | 2 +- cassandra/cluster.py | 2 +- cassandra/datastax/cloud/__init__.py | 6 +++--- docs/cloud.rst | 8 ++++---- docs/index.rst | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c28d9229f9..413b0631a2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -154,7 +154,7 @@ October 28, 2019 Features -------- -* DataStax Apollo Support (PYTHON-1074) +* DataStax Astra Support (PYTHON-1074) * Use 4.0 schema parser in 4 alpha and snapshot builds (PYTHON-1158) Bug Fixes diff --git a/README.rst b/README.rst index 6fabdf7531..6bd3e45749 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ Features * Configurable `load balancing `_ and `retry policies `_ * `Concurrent execution utilities `_ * `Object mapper `_ -* `Connecting to DataStax Apollo database (cloud) `_ +* `Connecting to DataStax Astra database (cloud) `_ * DSE Graph execution API * DSE Geometric type serialization * DSE PlainText and GSSAPI authentication diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 58051104a1..255d035dac 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -3469,7 +3469,7 @@ def connect(self): self._protocol_version = self._cluster.protocol_version self._set_new_connection(self._reconnect_internal()) - self._cluster.metadata.dbaas = self._connection._product_type == dscloud.PRODUCT_APOLLO + self._cluster.metadata.dbaas = self._connection._product_type == dscloud.DATASTAX_CLOUD_PRODUCT_TYPE def _set_new_connection(self, conn): """ diff --git a/cassandra/datastax/cloud/__init__.py b/cassandra/datastax/cloud/__init__.py index 46fd822b87..f79d72a7a2 100644 --- a/cassandra/datastax/cloud/__init__.py +++ b/cassandra/datastax/cloud/__init__.py @@ -41,7 +41,7 @@ __all__ = ['get_cloud_config'] -PRODUCT_APOLLO = "DATASTAX_APOLLO" +DATASTAX_CLOUD_PRODUCT_TYPE = "DATASTAX_APOLLO" class CloudConfig(object): @@ -138,7 +138,7 @@ def read_metadata_info(config, cloud_config): except Exception as e: log.exception(e) raise DriverException("Unable to connect to the metadata service at %s. " - "Check the cluster status in the Constellation cloud console. " % url) + "Check the cluster status in the cloud console. " % url) if response.code != 200: raise DriverException(("Error while fetching the metadata at: %s. " @@ -183,7 +183,7 @@ def _pyopenssl_context_from_cert(ca_cert_location, cert_location, key_location): except ImportError as e: six.reraise( ImportError, - ImportError("PyOpenSSL must be installed to connect to Apollo with the Eventlet or Twisted event loops"), + ImportError("PyOpenSSL must be installed to connect to Astra with the Eventlet or Twisted event loops"), sys.exc_info()[2] ) ssl_context = SSL.Context(SSL.TLSv1_METHOD) diff --git a/docs/cloud.rst b/docs/cloud.rst index 7ddb763a42..e8cc2fa750 100644 --- a/docs/cloud.rst +++ b/docs/cloud.rst @@ -2,9 +2,9 @@ Cloud ----- Connecting ========== -To connect to a DataStax Apollo cluster: +To connect to a DataStax Astra cluster: -1. Download the secure connect bundle from your Apollo account. +1. Download the secure connect bundle from your Astra account. 2. Connect to your cluster with .. code-block:: python @@ -19,9 +19,9 @@ To connect to a DataStax Apollo cluster: cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider) session = cluster.connect() -Apollo Differences +Astra Differences ================== -In most circumstances, the client code for interacting with an Apollo cluster will be the same as interacting with any other Cassandra cluster. The exceptions being: +In most circumstances, the client code for interacting with an Astra cluster will be the same as interacting with any other Cassandra cluster. The exceptions being: * A cloud configuration must be passed to a :class:`~.Cluster` instance via the `cloud` attribute (as demonstrated above). * An SSL connection will be established automatically. Manual SSL configuration is not allowed, and using `ssl_context` or `ssl_options` will result in an exception. diff --git a/docs/index.rst b/docs/index.rst index 3bd6312f7a..d18cf6acd2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -48,7 +48,7 @@ Contents Some discussion on the driver's approach to working with timestamp, date, time types :doc:`cloud` - A guide to connecting to Datastax Apollo + A guide to connecting to Datastax Astra :doc:`geo_types` Working with DSE geometry types From 1842c2e4d2d85a46fc677e171851f3f493907b70 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 11 Mar 2020 20:55:30 -0500 Subject: [PATCH 040/211] PYTHON-1207 Support transient replication in metadata --- CHANGELOG.rst | 4 ++ cassandra/metadata.py | 50 ++++++++++++++++------ tests/unit/test_metadata.py | 83 +++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 413b0631a2..9cd64d8fc8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,10 @@ ====== Unreleased +Features +-------- +Transient Replication Support (PYTHON-1207) + Bug Fixes --------- * Asyncore logging exception on shutdown (PYTHON-1228) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index 5cdcef807e..a839c2206c 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -450,18 +450,37 @@ def make_token_replica_map(self, token_to_host_owner, ring): return {} +def parse_replication_factor(input_rf): + """ + Given the inputted replication factor, returns a tuple containing number of total replicas + and number of transient replicas + """ + transient_replicas = None + try: + total_replicas = int(input_rf) + except ValueError: + try: + rf = input_rf.split('/') + total_replicas, transient_replicas = int(rf[0]), int(rf[1]) + except Exception: + raise ValueError("Unable to determine replication factor from: {}".format(input_rf)) + return total_replicas, transient_replicas + + class SimpleStrategy(ReplicationStrategy): replication_factor = None """ The replication factor for this keyspace. """ + transient_replicas = None + """ + The number of transient replicas for this keyspace. + """ def __init__(self, options_map): - try: - self.replication_factor = int(options_map['replication_factor']) - except Exception: - raise ValueError("SimpleStrategy requires an integer 'replication_factor' option") + self._raw_replication_factor = options_map['replication_factor'] + self.replication_factor, self.transient_replicas = parse_replication_factor(self._raw_replication_factor) def make_token_replica_map(self, token_to_host_owner, ring): replica_map = {} @@ -482,14 +501,14 @@ def export_for_schema(self): Returns a string version of these replication options which are suitable for use in a CREATE KEYSPACE statement. """ - return "{'class': 'SimpleStrategy', 'replication_factor': '%d'}" \ - % (self.replication_factor,) + return "{'class': 'SimpleStrategy', 'replication_factor': '%s'}" \ + % (self._raw_replication_factor,) def __eq__(self, other): if not isinstance(other, SimpleStrategy): return False - return self.replication_factor == other.replication_factor + return str(self._raw_replication_factor) == str(other._raw_replication_factor) class NetworkTopologyStrategy(ReplicationStrategy): @@ -500,12 +519,19 @@ class NetworkTopologyStrategy(ReplicationStrategy): """ def __init__(self, dc_replication_factors): - self.dc_replication_factors = dict( - (str(k), int(v)) for k, v in dc_replication_factors.items()) + try: + self.dc_replication_factors = dict( + (str(k), int(v)) for k, v in dc_replication_factors.items()) + except ValueError: + self.dc_replication_factors = dict( + (str(k), str(v)) for k, v in dc_replication_factors.items()) def make_token_replica_map(self, token_to_host_owner, ring): - dc_rf_map = dict((dc, int(rf)) - for dc, rf in self.dc_replication_factors.items() if rf > 0) + dc_rf_map = {} + for dc, rf in self.dc_replication_factors.items(): + total_rf = parse_replication_factor(rf)[0] + if total_rf > 0: + dc_rf_map[dc] = total_rf # build a map of DCs to lists of indexes into `ring` for tokens that # belong to that DC @@ -586,7 +612,7 @@ def export_for_schema(self): """ ret = "{'class': 'NetworkTopologyStrategy'" for dc, repl_factor in sorted(self.dc_replication_factors.items()): - ret += ", '%s': '%d'" % (dc, repl_factor) + ret += ", '%s': '%s'" % (dc, repl_factor) return ret + "}" def __eq__(self, other): diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index 0ab64a4fcc..174ac1493b 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -85,6 +85,89 @@ def test_replication_strategy(self): self.assertRaises(NotImplementedError, rs.make_token_replica_map, None, None) self.assertRaises(NotImplementedError, rs.export_for_schema) + def test_simple_replication_type_parsing(self): + """ Test equality between passing numeric and string replication factor for simple strategy """ + rs = ReplicationStrategy() + + simple_int = rs.create('SimpleStrategy', {'replication_factor': 3}) + simple_str = rs.create('SimpleStrategy', {'replication_factor': '3'}) + + self.assertEqual(simple_int.export_for_schema(), simple_str.export_for_schema()) + self.assertEqual(simple_int, simple_str) + + # make token replica map + ring = [MD5Token(0), MD5Token(1), MD5Token(2)] + hosts = [Host('dc1.{}'.format(host), SimpleConvictionPolicy) for host in range(3)] + token_to_host = dict(zip(ring, hosts)) + self.assertEqual( + simple_int.make_token_replica_map(token_to_host, ring), + simple_str.make_token_replica_map(token_to_host, ring) + ) + + def test_transient_replication_parsing(self): + """ Test that we can PARSE a transient replication factor for SimpleStrategy """ + rs = ReplicationStrategy() + + simple_transient = rs.create('SimpleStrategy', {'replication_factor': '3/1'}) + self.assertEqual(simple_transient.replication_factor, 3) + self.assertEqual(simple_transient.transient_replicas, 1) + self.assertIn("'replication_factor': '3/1'", simple_transient.export_for_schema()) + + simple_str = rs.create('SimpleStrategy', {'replication_factor': '3'}) + self.assertNotEqual(simple_transient, simple_str) + + # make token replica map + ring = [MD5Token(0), MD5Token(1), MD5Token(2)] + hosts = [Host('dc1.{}'.format(host), SimpleConvictionPolicy) for host in range(3)] + token_to_host = dict(zip(ring, hosts)) + self.assertEqual( + simple_transient.make_token_replica_map(token_to_host, ring), + simple_str.make_token_replica_map(token_to_host, ring) + ) + + def test_nts_replication_parsing(self): + """ Test equality between passing numeric and string replication factor for NTS """ + rs = ReplicationStrategy() + + nts_int = rs.create('NetworkTopologyStrategy', {'dc1': 3, 'dc2': 5}) + nts_str = rs.create('NetworkTopologyStrategy', {'dc1': '3', 'dc2': '5'}) + + self.assertEqual(nts_int.dc_replication_factors['dc1'], 3) + self.assertEqual(nts_str.dc_replication_factors['dc1'], 3) + + self.assertEqual(nts_int.export_for_schema(), nts_str.export_for_schema()) + self.assertEqual(nts_int, nts_str) + + # make token replica map + ring = [MD5Token(0), MD5Token(1), MD5Token(2)] + hosts = [Host('dc1.{}'.format(host), SimpleConvictionPolicy) for host in range(3)] + token_to_host = dict(zip(ring, hosts)) + self.assertEqual( + nts_int.make_token_replica_map(token_to_host, ring), + nts_str.make_token_replica_map(token_to_host, ring) + ) + + def test_nts_transient_parsing(self): + """ Test that we can PARSE a transient replication factor for NTS """ + rs = ReplicationStrategy() + + nts_transient = rs.create('NetworkTopologyStrategy', {'dc1': '3/1', 'dc2': '5/1'}) + self.assertEqual(nts_transient.dc_replication_factors['dc1'], '3/1') + self.assertEqual(nts_transient.dc_replication_factors['dc2'], '5/1') + self.assertIn("'dc1': '3/1', 'dc2': '5/1'", nts_transient.export_for_schema()) + + nts_str = rs.create('NetworkTopologyStrategy', {'dc1': '3', 'dc2': '5'}) + self.assertNotEqual(nts_transient, nts_str) + + # make token replica map + ring = [MD5Token(0), MD5Token(1), MD5Token(2)] + hosts = [Host('dc1.{}'.format(host), SimpleConvictionPolicy) for host in range(3)] + token_to_host = dict(zip(ring, hosts)) + self.assertEqual( + nts_transient.make_token_replica_map(token_to_host, ring), + nts_str.make_token_replica_map(token_to_host, ring) + ) + def test_nts_make_token_replica_map(self): token_to_host_owner = {} From f59be8da5b1a4c2ad1747bcf15e21eb07c44a1e9 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 26 Feb 2020 13:16:35 -0600 Subject: [PATCH 041/211] release 3.22: bump docs.yaml again --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 4b34f6cb5f..fe5bbf548e 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.22' - ref: 02055657 + ref: a1f8e102 - name: '3.21' ref: 5589d96b - name: '3.20' From 291ae2e4e16447db65c98c003163adb5f978b7e1 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Thu, 12 Mar 2020 19:26:21 -0500 Subject: [PATCH 042/211] 4.0 test changes: - Add TestCluster class to integration tests to allow easier setting of default options and updated all call points - Removed setting __defaults__ on stuff - Changed a bunch of '4.0' checks to be '4.0-a' - Enabled materialized views, sasi, and transient replication in standard test startup options - Updated materialized views to specify not null for primary keys as required in 4.0 - Added new protocol error message received from 4.0 in test_no_connection_refused_on_timeout --- tests/integration/__init__.py | 71 +++++---- tests/integration/advanced/__init__.py | 11 +- tests/integration/advanced/graph/__init__.py | 32 ++-- .../integration/advanced/graph/test_graph.py | 8 +- .../integration/advanced/test_adv_metadata.py | 5 +- tests/integration/advanced/test_auth.py | 14 +- .../integration/advanced/test_cont_paging.py | 6 +- .../test_cqlengine_where_operators.py | 7 +- .../advanced/test_unixsocketendpoint.py | 6 +- tests/integration/cloud/__init__.py | 3 +- tests/integration/cloud/test_cloud.py | 2 +- tests/integration/cqlengine/__init__.py | 5 +- .../cqlengine/advanced/test_cont_paging.py | 8 +- .../cqlengine/connections/test_connection.py | 16 +- .../cqlengine/query/test_queryset.py | 6 +- .../statements/test_base_statement.py | 5 +- .../integration/cqlengine/test_connections.py | 7 +- tests/integration/long/test_consistency.py | 41 ++--- tests/integration/long/test_failure_types.py | 9 +- tests/integration/long/test_ipv6.py | 15 +- tests/integration/long/test_large_data.py | 10 +- .../long/test_loadbalancingpolicies.py | 35 +++-- tests/integration/long/test_policies.py | 6 +- tests/integration/long/test_schema.py | 11 +- tests/integration/long/test_ssl.py | 24 +-- .../integration/long/test_topology_change.py | 5 +- tests/integration/long/utils.py | 2 +- .../standard/test_authentication.py | 14 +- .../test_authentication_misconfiguration.py | 5 +- .../standard/test_client_warnings.py | 5 +- tests/integration/standard/test_cluster.py | 144 +++++++++--------- tests/integration/standard/test_concurrent.py | 7 +- tests/integration/standard/test_connection.py | 36 +++-- .../standard/test_control_connection.py | 5 +- .../standard/test_custom_cluster.py | 14 +- .../standard/test_custom_payload.py | 6 +- .../standard/test_custom_protocol_handler.py | 22 +-- .../standard/test_cython_protocol_handlers.py | 28 ++-- tests/integration/standard/test_dse.py | 6 +- tests/integration/standard/test_metadata.py | 84 +++++----- tests/integration/standard/test_metrics.py | 36 +++-- tests/integration/standard/test_policies.py | 12 +- .../standard/test_prepared_statements.py | 16 +- tests/integration/standard/test_query.py | 66 ++++---- .../integration/standard/test_query_paging.py | 7 +- tests/integration/standard/test_routing.py | 6 +- .../standard/test_row_factories.py | 17 ++- tests/integration/standard/test_types.py | 33 ++-- tests/integration/standard/test_udts.py | 33 ++-- 49 files changed, 491 insertions(+), 481 deletions(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index d6f26acbcd..65cd6a2f1f 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -75,7 +75,7 @@ def get_server_versions(): if cass_version is not None: return (cass_version, cql_version) - c = Cluster() + c = TestCluster() s = c.connect() row = s.execute('SELECT cql_version, release_version FROM system.local')[0] @@ -199,33 +199,14 @@ def _get_dse_version_from_cass(cass_version): CCM_KWARGS['version'] = CCM_VERSION -#This changes the default contact_point parameter in Cluster -def set_default_cass_ip(): - if CASSANDRA_IP.startswith("127.0.0."): - return - defaults = list(Cluster.__init__.__defaults__) - defaults = [[CASSANDRA_IP]] + defaults[1:] - try: - Cluster.__init__.__defaults__ = tuple(defaults) - except: - Cluster.__init__.__func__.__defaults__ = tuple(defaults) - - -def set_default_beta_flag_true(): - defaults = list(Cluster.__init__.__defaults__) - defaults = (defaults[:28] + [True] + defaults[29:]) - try: - Cluster.__init__.__defaults__ = tuple(defaults) - except: - Cluster.__init__.__func__.__defaults__ = tuple(defaults) - - +ALLOW_BETA_PROTOCOL = False def get_default_protocol(): - if CASSANDRA_VERSION >= Version('4.0'): + if CASSANDRA_VERSION >= Version('4.0-a'): if DSE_VERSION: return ProtocolVersion.DSE_V2 else: - set_default_beta_flag_true() + global ALLOW_BETA_PROTOCOL + ALLOW_BETA_PROTOCOL = True return ProtocolVersion.V5 if CASSANDRA_VERSION >= Version('3.10'): if DSE_VERSION: @@ -254,7 +235,7 @@ def get_supported_protocol_versions(): 4.0(C*) -> 5(beta),4,3 4.0(DSE) -> DSE_v2, DSE_V1,4,3 ` """ - if CASSANDRA_VERSION >= Version('4.0'): + if CASSANDRA_VERSION >= Version('4.0-a'): if DSE_VERSION: return (3, 4, ProtocolVersion.DSE_V1, ProtocolVersion.DSE_V2) else: @@ -293,7 +274,7 @@ def get_unsupported_upper_protocol(): supported by the version of C* running """ - if CASSANDRA_VERSION >= Version('4.0'): + if CASSANDRA_VERSION >= Version('4.0-a'): if DSE_VERSION: return None else: @@ -341,9 +322,9 @@ def _id_and_mark(f): greaterthanorequalcass36 = unittest.skipUnless(CASSANDRA_VERSION >= Version('3.6'), 'Cassandra version 3.6 or greater required') greaterthanorequalcass3_10 = unittest.skipUnless(CASSANDRA_VERSION >= Version('3.10'), 'Cassandra version 3.10 or greater required') greaterthanorequalcass3_11 = unittest.skipUnless(CASSANDRA_VERSION >= Version('3.11'), 'Cassandra version 3.11 or greater required') -greaterthanorequalcass40 = unittest.skipUnless(CASSANDRA_VERSION >= Version('4.0'), 'Cassandra version 4.0 or greater required') -lessthanorequalcass40 = unittest.skipUnless(CASSANDRA_VERSION <= Version('4.0'), 'Cassandra version less or equal to 4.0 required') -lessthancass40 = unittest.skipUnless(CASSANDRA_VERSION < Version('4.0'), 'Cassandra version less than 4.0 required') +greaterthanorequalcass40 = unittest.skipUnless(CASSANDRA_VERSION >= Version('4.0-a'), 'Cassandra version 4.0 or greater required') +lessthanorequalcass40 = unittest.skipUnless(CASSANDRA_VERSION <= Version('4.0-a'), 'Cassandra version less or equal to 4.0 required') +lessthancass40 = unittest.skipUnless(CASSANDRA_VERSION < Version('4.0-a'), 'Cassandra version less than 4.0 required') lessthancass30 = unittest.skipUnless(CASSANDRA_VERSION < Version('3.0'), 'Cassandra version less then 3.0 required') greaterthanorequaldse68 = unittest.skipUnless(DSE_VERSION and DSE_VERSION >= Version('6.8'), "DSE 6.8 or greater required for this test") greaterthanorequaldse67 = unittest.skipUnless(DSE_VERSION and DSE_VERSION >= Version('6.7'), "DSE 6.7 or greater required for this test") @@ -469,7 +450,6 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, dse_cluster = True if DSE_VERSION else False if not workloads: workloads = [] - set_default_cass_ip() if ccm_options is None and DSE_VERSION: ccm_options = {"version": CCM_VERSION} @@ -527,9 +507,6 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, CCM_CLUSTER = DseCluster(path, cluster_name, **ccm_options) CCM_CLUSTER.set_configuration_options({'start_native_transport': True}) CCM_CLUSTER.set_configuration_options({'batch_size_warn_threshold_in_kb': 5}) - if Version(dse_version) >= Version('5.0'): - CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) - CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) if Version(dse_version) >= Version('5.1'): # For Inet4Address CCM_CLUSTER.set_dse_configuration_options({ @@ -565,6 +542,12 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) if Version(cassandra_version) >= Version('3.0'): CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) + if Version(cassandra_version) >= Version('4.0-a'): + CCM_CLUSTER.set_configuration_options({ + 'enable_materialized_views': True, + 'enable_sasi_indexes': True, + 'enable_transient_replication': True, + }) common.switch_cluster(path, cluster_name) CCM_CLUSTER.set_configuration_options(configuration_options) CCM_CLUSTER.populate(nodes, ipformat=ipformat) @@ -699,9 +682,9 @@ def setup_keyspace(ipformat=None, wait=True, protocol_version=None): _protocol_version = PROTOCOL_VERSION if not ipformat: - cluster = Cluster(protocol_version=_protocol_version) + cluster = TestCluster(protocol_version=_protocol_version) else: - cluster = Cluster(contact_points=["::1"], protocol_version=_protocol_version) + cluster = TestCluster(contact_points=["::1"], protocol_version=_protocol_version) session = cluster.connect() try: @@ -795,7 +778,7 @@ def create_keyspace(cls, rf): @classmethod def common_setup(cls, rf, keyspace_creation=True, create_class_table=False, **cluster_kwargs): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION, **cluster_kwargs) + cls.cluster = TestCluster(**cluster_kwargs) cls.session = cls.cluster.connect(wait_for_all_pools=True) cls.ks_name = cls.__name__.lower() if keyspace_creation: @@ -981,3 +964,19 @@ def assert_startswith(s, prefix): raise AssertionError( '{} does not start with {}'.format(repr(s), repr(prefix)) ) + + +class TestCluster(object): + DEFAULT_PROTOCOL_VERSION = default_protocol_version + DEFAULT_CASSANDRA_IP = CASSANDRA_IP + DEFAULT_ALLOW_BETA = ALLOW_BETA_PROTOCOL + + def __new__(cls, **kwargs): + if 'protocol_version' not in kwargs: + kwargs['protocol_version'] = cls.DEFAULT_PROTOCOL_VERSION + if 'contact_points' not in kwargs: + kwargs['contact_points'] = [cls.DEFAULT_CASSANDRA_IP] + if 'allow_beta_protocol_version' not in kwargs: + kwargs['allow_beta_protocol_version'] = cls.DEFAULT_ALLOW_BETA + return Cluster(**kwargs) + diff --git a/tests/integration/advanced/__init__.py b/tests/integration/advanced/__init__.py index c5da6c0154..b2820e037b 100644 --- a/tests/integration/advanced/__init__.py +++ b/tests/integration/advanced/__init__.py @@ -25,10 +25,8 @@ from ccmlib import common -from cassandra.cluster import Cluster - -from tests.integration import PROTOCOL_VERSION, get_server_versions, BasicKeyspaceUnitTestCase, \ - drop_keyspace_shutdown_cluster, get_node, USE_CASS_EXTERNAL, set_default_cass_ip +from tests.integration import get_server_versions, BasicKeyspaceUnitTestCase, \ + drop_keyspace_shutdown_cluster, get_node, USE_CASS_EXTERNAL, TestCluster from tests.integration import use_singledc, use_single_node, wait_for_node_socket, CASSANDRA_IP home = expanduser('~') @@ -97,7 +95,6 @@ def use_cluster_with_graph(num_nodes): when started all at once. """ if USE_CASS_EXTERNAL: - set_default_cass_ip() return # Create the cluster but don't start it. @@ -109,7 +106,7 @@ def use_cluster_with_graph(num_nodes): # Wait for spark master to start up spark_master_http = ("localhost", 7080) common.check_socket_listening(spark_master_http, timeout=60) - tmp_cluster = Cluster(protocol_version=PROTOCOL_VERSION) + tmp_cluster = TestCluster() # Start up remaining nodes. try: @@ -137,7 +134,7 @@ class BasicGeometricUnitTestCase(BasicKeyspaceUnitTestCase): @classmethod def common_dse_setup(cls, rf, keyspace_creation=True): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() cls.ks_name = cls.__name__.lower() if keyspace_creation: diff --git a/tests/integration/advanced/graph/__init__.py b/tests/integration/advanced/graph/__init__.py index 6002d57f78..0573cf2557 100644 --- a/tests/integration/advanced/graph/__init__.py +++ b/tests/integration/advanced/graph/__init__.py @@ -160,14 +160,13 @@ def session_setup(self): ) ) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={ - EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, - EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, - "graphson1": ep_graphson1, - "graphson2": ep_graphson2, - "graphson3": ep_graphson3 - }) + self.cluster = TestCluster(execution_profiles={ + EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, + EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, + "graphson1": ep_graphson1, + "graphson2": ep_graphson2, + "graphson3": ep_graphson3 + }) self.session = self.cluster.connect() self.ks_name = self._testMethodName.lower() @@ -276,14 +275,13 @@ def session_setup(self): ) ) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={ - EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, - EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, - "graphson1": ep_graphson1, - "graphson2": ep_graphson2, - "graphson3": ep_graphson3 - }) + self.cluster = TestCluster(execution_profiles={ + EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson1, + EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT: ep_analytics, + "graphson1": ep_graphson1, + "graphson2": ep_graphson2, + "graphson3": ep_graphson3 + }) self.session = self.cluster.connect() self.ks_name = self._testMethodName.lower() @@ -362,7 +360,7 @@ class BasicSharedGraphUnitTestCase(BasicKeyspaceUnitTestCase): @classmethod def session_setup(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() cls.ks_name = cls.__name__.lower() cls.cass_version, cls.cql_version = get_server_versions() diff --git a/tests/integration/advanced/graph/test_graph.py b/tests/integration/advanced/graph/test_graph.py index 898779f789..020d631d69 100644 --- a/tests/integration/advanced/graph/test_graph.py +++ b/tests/integration/advanced/graph/test_graph.py @@ -19,12 +19,13 @@ from cassandra.protocol import SyntaxException from cassandra.policies import WhiteListRoundRobinPolicy from cassandra.cluster import NoHostAvailable -from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, GraphExecutionProfile, Cluster +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, GraphExecutionProfile, from cassandra.graph import single_object_row_factory, Vertex, graph_object_row_factory, \ graph_graphson2_row_factory, graph_graphson3_row_factory from cassandra.util import SortedSet -from tests.integration import PROTOCOL_VERSION, DSE_VERSION, greaterthanorequaldse51, greaterthanorequaldse68, requiredse +from tests.integration import DSE_VERSION, greaterthanorequaldse51, greaterthanorequaldse68, \ + requiredse, TestCluster from tests.integration.advanced.graph import BasicGraphUnitTestCase, GraphUnitTestCase, \ GraphProtocol, ClassicGraphSchema, CoreGraphSchema, use_single_node_with_graph @@ -149,8 +150,7 @@ def test_graph_profile(self): exec_short_timeout.graph_options.graph_name = self.graph_name # Add a single execution policy on cluster creation - local_cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={"exec_dif_factory": exec_dif_factory}) + local_cluster = TestCluster(execution_profiles={"exec_dif_factory": exec_dif_factory}) local_session = local_cluster.connect() self.addCleanup(local_cluster.shutdown) diff --git a/tests/integration/advanced/test_adv_metadata.py b/tests/integration/advanced/test_adv_metadata.py index 52944aabdf..b3af6fa5d1 100644 --- a/tests/integration/advanced/test_adv_metadata.py +++ b/tests/integration/advanced/test_adv_metadata.py @@ -14,12 +14,11 @@ from packaging.version import Version -from cassandra.cluster import Cluster from tests.integration import (BasicExistingKeyspaceUnitTestCase, BasicSharedKeyspaceUnitTestCase, BasicSharedKeyspaceUnitTestCaseRF1, greaterthanorequaldse51, greaterthanorequaldse60, greaterthanorequaldse68, use_single_node, - DSE_VERSION, requiredse, PROTOCOL_VERSION) + DSE_VERSION, requiredse, TestCluster) try: import unittest2 as unittest @@ -393,4 +392,4 @@ def test_connection_on_graph_schema_error(self): """ % (self.ks_name,)) self.session.execute('TRUNCATE system_schema.vertices') - Cluster(protocol_version=PROTOCOL_VERSION).connect().shutdown() + TestCluster().connect().shutdown() diff --git a/tests/integration/advanced/test_auth.py b/tests/integration/advanced/test_auth.py index 59bd3dec5c..748304aef4 100644 --- a/tests/integration/advanced/test_auth.py +++ b/tests/integration/advanced/test_auth.py @@ -26,11 +26,11 @@ from cassandra.auth import (DSEGSSAPIAuthProvider, DSEPlainTextAuthProvider, SaslAuthProvider, TransitionalModePlainTextAuthProvider) -from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, Cluster, NoHostAvailable +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, NoHostAvailable from cassandra.protocol import Unauthorized from cassandra.query import SimpleStatement from tests.integration import (get_cluster, greaterthanorequaldse51, - remove_cluster, requiredse, DSE_VERSION) + remove_cluster, requiredse, DSE_VERSION, TestCluster) from tests.integration.advanced import ADS_HOME, use_single_node_with_graph from tests.integration.advanced.graph import reset_graph, ClassicGraphFixtures @@ -157,7 +157,7 @@ def connect_and_query(self, auth_provider, query=None): Runs a simple system query with the auth_provided specified. """ os.environ['KRB5_CONFIG'] = self.krb_conf - self.cluster = Cluster(auth_provider=auth_provider) + self.cluster = TestCluster(auth_provider=auth_provider) self.session = self.cluster.connect() query = query if query else "SELECT * FROM system.local" statement = SimpleStatement(query) @@ -320,7 +320,7 @@ def _remove_proxy_setup(self): os.environ['KRB5_CONFIG'] = self.krb_conf self.refresh_kerberos_tickets(self.cassandra_keytab, "cassandra@DATASTAX.COM", self.krb_conf) auth_provider = DSEGSSAPIAuthProvider(service='dse', qops=["auth"], principal='cassandra@DATASTAX.COM') - cluster = Cluster(auth_provider=auth_provider) + cluster = TestCluster(auth_provider=auth_provider) session = cluster.connect() session.execute("REVOKE PROXY.LOGIN ON ROLE '{0}' FROM '{1}'".format('charlie@DATASTAX.COM', 'bob@DATASTAX.COM')) @@ -338,7 +338,7 @@ def _setup_for_proxy(self, grant=True): os.environ['KRB5_CONFIG'] = self.krb_conf self.refresh_kerberos_tickets(self.cassandra_keytab, "cassandra@DATASTAX.COM", self.krb_conf) auth_provider = DSEGSSAPIAuthProvider(service='dse', qops=["auth"], principal='cassandra@DATASTAX.COM') - cluster = Cluster(auth_provider=auth_provider) + cluster = TestCluster(auth_provider=auth_provider) session = cluster.connect() stmts = [ @@ -403,7 +403,7 @@ def setUpClass(self): # Create users and test keyspace self.user_role = 'user1' self.server_role = 'server' - self.root_cluster = Cluster(auth_provider=DSEPlainTextAuthProvider('cassandra', 'cassandra')) + self.root_cluster = TestCluster(auth_provider=DSEPlainTextAuthProvider('cassandra', 'cassandra')) self.root_session = self.root_cluster.connect() stmts = [ @@ -469,7 +469,7 @@ def get_sasl_options(self, mechanism='PLAIN'): return sasl_options def connect_and_query(self, auth_provider, execute_as=None, query="SELECT * FROM testproxy.testproxy"): - self.cluster = Cluster(auth_provider=auth_provider) + self.cluster = TestCluster(auth_provider=auth_provider) self.session = self.cluster.connect() rs = self.session.execute(query, execute_as=execute_as) return rs diff --git a/tests/integration/advanced/test_cont_paging.py b/tests/integration/advanced/test_cont_paging.py index 82b3fe2960..c5f1cbfff3 100644 --- a/tests/integration/advanced/test_cont_paging.py +++ b/tests/integration/advanced/test_cont_paging.py @@ -13,7 +13,7 @@ # limitations under the License. from tests.integration import use_singledc, greaterthanorequaldse51, BasicSharedKeyspaceUnitTestCaseRF3WM, \ - DSE_VERSION, ProtocolVersion, greaterthanorequaldse60, requiredse + DSE_VERSION, ProtocolVersion, greaterthanorequaldse60, requiredse, TestCluster import logging log = logging.getLogger(__name__) @@ -28,7 +28,7 @@ from packaging.version import Version import time -from cassandra.cluster import Cluster, ExecutionProfile, ContinuousPagingOptions +from cassandra.cluster import ExecutionProfile, ContinuousPagingOptions from cassandra.concurrent import execute_concurrent from cassandra.query import SimpleStatement @@ -64,7 +64,7 @@ def tearDownClass(cls): @classmethod def create_cluster(cls): - cls.cluster_with_profiles = Cluster(protocol_version=cls.protocol_version, execution_profiles=cls.execution_profiles) + cls.cluster_with_profiles = TestCluster(protocol_version=cls.protocol_version, execution_profiles=cls.execution_profiles) cls.session_with_profiles = cls.cluster_with_profiles.connect(wait_for_all_pools=True) statements_and_params = zip( diff --git a/tests/integration/advanced/test_cqlengine_where_operators.py b/tests/integration/advanced/test_cqlengine_where_operators.py index 9497feabd7..8ade3db09d 100644 --- a/tests/integration/advanced/test_cqlengine_where_operators.py +++ b/tests/integration/advanced/test_cqlengine_where_operators.py @@ -20,13 +20,12 @@ import os import time -from cassandra.cluster import Cluster from cassandra.cqlengine import columns, connection, models from cassandra.cqlengine.management import (CQLENG_ALLOW_SCHEMA_MANAGEMENT, create_keyspace_simple, drop_table, sync_table) from cassandra.cqlengine.statements import IsNotNull -from tests.integration import DSE_VERSION, requiredse, CASSANDRA_IP, greaterthanorequaldse60 +from tests.integration import DSE_VERSION, requiredse, CASSANDRA_IP, greaterthanorequaldse60, TestCluster from tests.integration.advanced import use_single_node_with_graph_and_solr from tests.integration.cqlengine import DEFAULT_KEYSPACE @@ -65,7 +64,7 @@ class IsNotNullTests(unittest.TestCase): @classmethod def setUpClass(cls): if DSE_VERSION: - cls.cluster = Cluster() + cls.cluster = TestCluster() @greaterthanorequaldse60 def test_is_not_null_execution(self): @@ -81,7 +80,7 @@ def test_is_not_null_execution(self): @test_category cqlengine """ - cluster = Cluster() + cluster = TestCluster() self.addCleanup(cluster.shutdown) session = cluster.connect() diff --git a/tests/integration/advanced/test_unixsocketendpoint.py b/tests/integration/advanced/test_unixsocketendpoint.py index e435314637..1f6665964a 100644 --- a/tests/integration/advanced/test_unixsocketendpoint.py +++ b/tests/integration/advanced/test_unixsocketendpoint.py @@ -20,12 +20,12 @@ import subprocess import logging -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.connection import UnixSocketEndPoint from cassandra.policies import WhiteListRoundRobinPolicy, RoundRobinPolicy from tests import notwindows -from tests.integration import use_single_node +from tests.integration import use_single_node, TestCluster log = logging.getLogger() log.setLevel('DEBUG') @@ -65,7 +65,7 @@ def setUpClass(cls): lbp = UnixSocketWhiteListRoundRobinPolicy([UNIX_SOCKET_PATH]) ep = ExecutionProfile(load_balancing_policy=lbp) endpoint = UnixSocketEndPoint(UNIX_SOCKET_PATH) - cls.cluster = Cluster([endpoint], execution_profiles={EXEC_PROFILE_DEFAULT: ep}) + cls.cluster = TestCluster([endpoint], execution_profiles={EXEC_PROFILE_DEFAULT: ep}) @classmethod def tearDownClass(cls): diff --git a/tests/integration/cloud/__init__.py b/tests/integration/cloud/__init__.py index 83f5e21ce3..ca05ae4ce5 100644 --- a/tests/integration/cloud/__init__.py +++ b/tests/integration/cloud/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License +from cassandra.cluster import Cluster try: import unittest2 as unittest @@ -20,8 +21,6 @@ import os import subprocess -from cassandra.cluster import Cluster - from tests.integration import CLOUD_PROXY_PATH, USE_CASS_EXTERNAL diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index 31b5367f3c..ef76b71303 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -30,7 +30,7 @@ from mock import patch -from tests.integration import requirescloudproxy +from tests.integration import requirescloudproxy, TestCluster from tests.util import wait_until_not_raised from tests.integration.cloud import CloudProxyCluster, CLOUD_PROXY_SERVER diff --git a/tests/integration/cqlengine/__init__.py b/tests/integration/cqlengine/__init__.py index d098ea7014..e68baaabf1 100644 --- a/tests/integration/cqlengine/__init__.py +++ b/tests/integration/cqlengine/__init__.py @@ -24,7 +24,8 @@ from cassandra.cqlengine.management import create_keyspace_simple, drop_keyspace, CQLENG_ALLOW_SCHEMA_MANAGEMENT import cassandra -from tests.integration import get_server_versions, use_single_node, PROTOCOL_VERSION, CASSANDRA_IP, set_default_cass_ip +from tests.integration import get_server_versions, use_single_node, PROTOCOL_VERSION, CASSANDRA_IP, ALLOW_BETA_PROTOCOL + DEFAULT_KEYSPACE = 'cqlengine_test' @@ -35,7 +36,6 @@ def setup_package(): warnings.simplefilter('always') # for testing warnings, make sure all are let through os.environ[CQLENG_ALLOW_SCHEMA_MANAGEMENT] = '1' - set_default_cass_ip() use_single_node() setup_connection(DEFAULT_KEYSPACE) @@ -55,6 +55,7 @@ def setup_connection(keyspace_name): connection.setup([CASSANDRA_IP], consistency=ConsistencyLevel.ONE, protocol_version=PROTOCOL_VERSION, + allow_beta_protocol_version=ALLOW_BETA_PROTOCOL, default_keyspace=keyspace_name) diff --git a/tests/integration/cqlengine/advanced/test_cont_paging.py b/tests/integration/cqlengine/advanced/test_cont_paging.py index ec7b196f1a..38b4355312 100644 --- a/tests/integration/cqlengine/advanced/test_cont_paging.py +++ b/tests/integration/cqlengine/advanced/test_cont_paging.py @@ -21,13 +21,13 @@ from packaging.version import Version -from cassandra.cluster import (EXEC_PROFILE_DEFAULT, Cluster, +from cassandra.cluster import (EXEC_PROFILE_DEFAULT, ContinuousPagingOptions, ExecutionProfile, ProtocolVersion) from cassandra.cqlengine import columns, connection, models from cassandra.cqlengine.management import drop_table, sync_table from tests.integration import (DSE_VERSION, greaterthanorequaldse51, - greaterthanorequaldse60, requiredse) + greaterthanorequaldse60, requiredse, TestCluster) class TestMultiKeyModel(models.Model): @@ -76,8 +76,8 @@ def tearDownClass(cls): def _create_cluster_with_cp_options(cls, name, cp_options): execution_profiles = {EXEC_PROFILE_DEFAULT: ExecutionProfile(continuous_paging_options=cp_options)} - cls.cluster_default = Cluster(protocol_version=cls.protocol_version, - execution_profiles=execution_profiles) + cls.cluster_default = TestCluster(protocol_version=cls.protocol_version, + execution_profiles=execution_profiles) cls.session_default = cls.cluster_default.connect(wait_for_all_pools=True) connection.register_connection(name, default=True, session=cls.session_default) cls.connections.add(name) diff --git a/tests/integration/cqlengine/connections/test_connection.py b/tests/integration/cqlengine/connections/test_connection.py index bbc0231565..c46df31280 100644 --- a/tests/integration/cqlengine/connections/test_connection.py +++ b/tests/integration/cqlengine/connections/test_connection.py @@ -22,11 +22,11 @@ from cassandra.cqlengine.models import Model from cassandra.cqlengine import columns, connection, models from cassandra.cqlengine.management import sync_table -from cassandra.cluster import Cluster, ExecutionProfile, _clusters_for_shutdown, _ConfigMode, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, _clusters_for_shutdown, _ConfigMode, EXEC_PROFILE_DEFAULT from cassandra.policies import RoundRobinPolicy from cassandra.query import dict_factory -from tests.integration import CASSANDRA_IP, PROTOCOL_VERSION, execute_with_long_wait_retry, local +from tests.integration import CASSANDRA_IP, PROTOCOL_VERSION, execute_with_long_wait_retry, local, TestCluster from tests.integration.cqlengine.base import BaseCassEngTestCase from tests.integration.cqlengine import DEFAULT_KEYSPACE, setup_connection @@ -76,7 +76,7 @@ def setUpClass(cls): cls.keyspace1 = 'ctest1' cls.keyspace2 = 'ctest2' super(SeveralConnectionsTest, cls).setUpClass() - cls.setup_cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.setup_cluster = TestCluster() cls.setup_session = cls.setup_cluster.connect() ddl = "CREATE KEYSPACE {0} WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '{1}'}}".format(cls.keyspace1, 1) execute_with_long_wait_retry(cls.setup_session, ddl) @@ -93,7 +93,7 @@ def tearDownClass(cls): models.DEFAULT_KEYSPACE def setUp(self): - self.c = Cluster(protocol_version=PROTOCOL_VERSION) + self.c = TestCluster() self.session1 = self.c.connect(keyspace=self.keyspace1) self.session1.row_factory = dict_factory self.session2 = self.c.connect(keyspace=self.keyspace2) @@ -149,7 +149,7 @@ def test_connection_with_legacy_settings(self): self.assertEqual(conn.cluster._config_mode, _ConfigMode.LEGACY) def test_connection_from_session_with_execution_profile(self): - cluster = Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) + cluster = TestCluster(execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) session = cluster.connect() connection.default() connection.set_session(session) @@ -157,7 +157,7 @@ def test_connection_from_session_with_execution_profile(self): self.assertEqual(conn.cluster._config_mode, _ConfigMode.PROFILES) def test_connection_from_session_with_legacy_settings(self): - cluster = Cluster(load_balancing_policy=RoundRobinPolicy()) + cluster = TestCluster(load_balancing_policy=RoundRobinPolicy()) session = cluster.connect() session.row_factory = dict_factory connection.set_session(session) @@ -165,7 +165,7 @@ def test_connection_from_session_with_legacy_settings(self): self.assertEqual(conn.cluster._config_mode, _ConfigMode.LEGACY) def test_uncommitted_session_uses_legacy(self): - cluster = Cluster() + cluster = TestCluster() session = cluster.connect() session.row_factory = dict_factory connection.set_session(session) @@ -186,7 +186,7 @@ def test_legacy_insert_query(self): self.assertEqual(ConnectionModel.objects(key=0)[0].some_data, 'text0') def test_execution_profile_insert_query(self): - cluster = Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) + cluster = TestCluster(execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) session = cluster.connect() connection.default() connection.set_session(session) diff --git a/tests/integration/cqlengine/query/test_queryset.py b/tests/integration/cqlengine/query/test_queryset.py index e5a15b7c4b..6bc9d701b8 100644 --- a/tests/integration/cqlengine/query/test_queryset.py +++ b/tests/integration/cqlengine/query/test_queryset.py @@ -23,7 +23,7 @@ from packaging.version import Version import uuid -from cassandra.cluster import Cluster, Session +from cassandra.cluster import Session from cassandra import InvalidRequest from tests.integration.cqlengine.base import BaseCassEngTestCase from cassandra.cqlengine.connection import NOT_SET @@ -42,7 +42,7 @@ from cassandra.util import uuid_from_time from cassandra.cqlengine.connection import get_session from tests.integration import PROTOCOL_VERSION, CASSANDRA_VERSION, greaterthancass20, greaterthancass21, \ - greaterthanorequalcass30 + greaterthanorequalcass30, TestCluster from tests.integration.cqlengine import execute_count, DEFAULT_KEYSPACE @@ -775,7 +775,7 @@ def test_custom_indexed_field_can_be_queried(self): with self.assertRaises(InvalidRequest): list(CustomIndexedTestModel.objects.filter(description__gte='test')) - with Cluster().connect() as session: + with TestCluster().connect() as session: session.execute("CREATE INDEX custom_index_cqlengine ON {}.{} (description)". format(DEFAULT_KEYSPACE, CustomIndexedTestModel._table_name)) diff --git a/tests/integration/cqlengine/statements/test_base_statement.py b/tests/integration/cqlengine/statements/test_base_statement.py index db7d1ebd6a..474c45d02b 100644 --- a/tests/integration/cqlengine/statements/test_base_statement.py +++ b/tests/integration/cqlengine/statements/test_base_statement.py @@ -20,7 +20,6 @@ import six from cassandra.query import FETCH_SIZE_UNSET -from cassandra.cluster import Cluster, ConsistencyLevel from cassandra.cqlengine.statements import BaseCQLStatement from cassandra.cqlengine.management import sync_table, drop_table from cassandra.cqlengine.statements import InsertStatement, UpdateStatement, SelectStatement, DeleteStatement, \ @@ -30,7 +29,7 @@ from tests.integration.cqlengine.base import BaseCassEngTestCase, TestQueryUpdateModel from tests.integration.cqlengine import DEFAULT_KEYSPACE -from tests.integration import greaterthanorequalcass3_10 +from tests.integration import greaterthanorequalcass3_10, TestCluster from cassandra.cqlengine.connection import execute @@ -116,7 +115,7 @@ def test_like_operator(self): @test_category data_types:object_mapper """ - cluster = Cluster() + cluster = TestCluster() session = cluster.connect() self.addCleanup(cluster.shutdown) diff --git a/tests/integration/cqlengine/test_connections.py b/tests/integration/cqlengine/test_connections.py index 10dee66ddc..15adff3380 100644 --- a/tests/integration/cqlengine/test_connections.py +++ b/tests/integration/cqlengine/test_connections.py @@ -13,7 +13,6 @@ # limitations under the License. from cassandra import InvalidRequest -from cassandra.cluster import Cluster from cassandra.cluster import NoHostAvailable from cassandra.cqlengine import columns, CQLEngineException from cassandra.cqlengine import connection as conn @@ -23,7 +22,7 @@ from tests.integration.cqlengine import setup_connection, DEFAULT_KEYSPACE from tests.integration.cqlengine.base import BaseCassEngTestCase from tests.integration.cqlengine.query import test_queryset -from tests.integration import local, CASSANDRA_IP +from tests.integration import local, CASSANDRA_IP, TestCluster class TestModel(Model): @@ -227,7 +226,7 @@ def test_connection_creation_from_session(self): @test_category object_mapper """ - cluster = Cluster([CASSANDRA_IP]) + cluster = TestCluster() session = cluster.connect() connection_name = 'from_session' conn.register_connection(connection_name, session=session) @@ -258,7 +257,7 @@ def test_connection_param_validation(self): @test_category object_mapper """ - cluster = Cluster([CASSANDRA_IP]) + cluster = TestCluster() session = cluster.connect() with self.assertRaises(CQLEngineException): conn.register_connection("bad_coonection1", session=session, consistency="not_null") diff --git a/tests/integration/long/test_consistency.py b/tests/integration/long/test_consistency.py index a4507a9bf0..bbf446861a 100644 --- a/tests/integration/long/test_consistency.py +++ b/tests/integration/long/test_consistency.py @@ -19,10 +19,10 @@ import traceback from cassandra import ConsistencyLevel, OperationTimedOut, ReadTimeout, WriteTimeout, Unavailable -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import TokenAwarePolicy, RoundRobinPolicy, DowngradingConsistencyRetryPolicy from cassandra.query import SimpleStatement -from tests.integration import use_singledc, PROTOCOL_VERSION, execute_until_pass +from tests.integration import use_singledc, execute_until_pass, TestCluster from tests.integration.long.utils import ( force_stop, create_schema, wait_for_down, wait_for_up, start, CoordinatorStats @@ -129,8 +129,9 @@ def _assert_reads_fail(self, session, keyspace, consistency_levels): pass def _test_tokenaware_one_node_down(self, keyspace, rf, accepted): - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()))}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()))} + ) session = cluster.connect(wait_for_all_pools=True) wait_for_up(cluster, 1) wait_for_up(cluster, 2) @@ -180,8 +181,9 @@ def test_rfthree_tokenaware_one_node_down(self): def test_rfthree_tokenaware_none_down(self): keyspace = 'test_rfthree_tokenaware_none_down' - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()))}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()))} + ) session = cluster.connect(wait_for_all_pools=True) wait_for_up(cluster, 1) wait_for_up(cluster, 2) @@ -203,9 +205,10 @@ def test_rfthree_tokenaware_none_down(self): cluster.shutdown() def _test_downgrading_cl(self, keyspace, rf, accepted): - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()), - DowngradingConsistencyRetryPolicy())}) + cluster = TestCluster(execution_profiles={ + EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()), + DowngradingConsistencyRetryPolicy()) + }) session = cluster.connect(wait_for_all_pools=True) create_schema(cluster, session, keyspace, replication_factor=rf) @@ -246,16 +249,18 @@ def test_rftwo_downgradingcl(self): def test_rfthree_roundrobin_downgradingcl(self): keyspace = 'test_rfthree_roundrobin_downgradingcl' - with Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(RoundRobinPolicy(), - DowngradingConsistencyRetryPolicy())}) as cluster: + with TestCluster(execution_profiles={ + EXEC_PROFILE_DEFAULT: ExecutionProfile(RoundRobinPolicy(), + DowngradingConsistencyRetryPolicy()) + }) as cluster: self.rfthree_downgradingcl(cluster, keyspace, True) def test_rfthree_tokenaware_downgradingcl(self): keyspace = 'test_rfthree_tokenaware_downgradingcl' - with Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()), - DowngradingConsistencyRetryPolicy())}) as cluster: + with TestCluster(execution_profiles={ + EXEC_PROFILE_DEFAULT: ExecutionProfile(TokenAwarePolicy(RoundRobinPolicy()), + DowngradingConsistencyRetryPolicy()) + }) as cluster: self.rfthree_downgradingcl(cluster, keyspace, False) def rfthree_downgradingcl(self, cluster, keyspace, roundrobin): @@ -334,7 +339,7 @@ def test_pool_with_host_down(self): all_contact_points = ["127.0.0.1", "127.0.0.2", "127.0.0.3"] # Connect up and find out which host will bet queries routed to to first - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() cluster.connect(wait_for_all_pools=True) hosts = cluster.metadata.all_hosts() address = hosts[0].address @@ -344,13 +349,13 @@ def test_pool_with_host_down(self): # We now register a cluster that has it's Control Connection NOT on the node that we are shutting down. # We do this so we don't miss the event contact_point = '127.0.0.{0}'.format(self.get_node_not_x(node_to_stop)) - cluster = Cluster(contact_points=[contact_point], protocol_version=PROTOCOL_VERSION) + cluster = TestCluster(contact_points=[contact_point]) cluster.connect(wait_for_all_pools=True) try: force_stop(node_to_stop) wait_for_down(cluster, node_to_stop) # Attempt a query against that node. It should complete - cluster2 = Cluster(contact_points=all_contact_points, protocol_version=PROTOCOL_VERSION) + cluster2 = TestCluster(contact_points=all_contact_points) session2 = cluster2.connect() session2.execute("SELECT * FROM system.local") finally: diff --git a/tests/integration/long/test_failure_types.py b/tests/integration/long/test_failure_types.py index 25854a57f7..6bdff8d15d 100644 --- a/tests/integration/long/test_failure_types.py +++ b/tests/integration/long/test_failure_types.py @@ -25,13 +25,13 @@ ConsistencyLevel, OperationTimedOut, ReadTimeout, WriteTimeout, ReadFailure, WriteFailure, FunctionFailure, ProtocolVersion, ) -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.concurrent import execute_concurrent_with_args from cassandra.query import SimpleStatement from tests.integration import ( use_singledc, PROTOCOL_VERSION, get_cluster, setup_keyspace, remove_cluster, get_node, start_cluster_wait_for_up, requiresmallclockgranularity, - local, CASSANDRA_VERSION) + local, CASSANDRA_VERSION, TestCluster) try: @@ -83,7 +83,7 @@ def setUp(self): raise unittest.SkipTest( "Native protocol 4,0+ is required for custom payloads, currently using %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() self.session = self.cluster.connect() self.nodes_currently_failing = [] self.node1, self.node2, self.node3 = get_cluster().nodes.values() @@ -332,8 +332,7 @@ def setUp(self): """ Setup sessions and pause node1 """ - self.cluster = Cluster( - protocol_version=PROTOCOL_VERSION, + self.cluster = TestCluster( execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile( load_balancing_policy=HostFilterPolicy( diff --git a/tests/integration/long/test_ipv6.py b/tests/integration/long/test_ipv6.py index 5f2bdbddf3..a49c1677e8 100644 --- a/tests/integration/long/test_ipv6.py +++ b/tests/integration/long/test_ipv6.py @@ -15,11 +15,11 @@ import os, socket, errno from ccmlib import common -from cassandra.cluster import Cluster, NoHostAvailable +from cassandra.cluster import NoHostAvailable from cassandra.io.asyncorereactor import AsyncoreConnection from tests import is_monkey_patched -from tests.integration import use_cluster, remove_cluster, PROTOCOL_VERSION +from tests.integration import use_cluster, remove_cluster, TestCluster if is_monkey_patched(): LibevConnection = -1 @@ -75,8 +75,7 @@ class IPV6ConnectionTest(object): connection_class = None def test_connect(self): - cluster = Cluster(connection_class=self.connection_class, contact_points=['::1'], connect_timeout=10, - protocol_version=PROTOCOL_VERSION) + cluster = TestCluster(connection_class=self.connection_class, contact_points=['::1'], connect_timeout=10) session = cluster.connect() future = session.execute_async("SELECT * FROM system.local") future.result() @@ -84,16 +83,16 @@ def test_connect(self): cluster.shutdown() def test_error(self): - cluster = Cluster(connection_class=self.connection_class, contact_points=['::1'], port=9043, - connect_timeout=10, protocol_version=PROTOCOL_VERSION) + cluster = TestCluster(connection_class=self.connection_class, contact_points=['::1'], port=9043, + connect_timeout=10) self.assertRaisesRegexp(NoHostAvailable, '\(\'Unable to connect.*%s.*::1\', 9043.*Connection refused.*' % errno.ECONNREFUSED, cluster.connect) def test_error_multiple(self): if len(socket.getaddrinfo('localhost', 9043, socket.AF_UNSPEC, socket.SOCK_STREAM)) < 2: raise unittest.SkipTest('localhost only resolves one address') - cluster = Cluster(connection_class=self.connection_class, contact_points=['localhost'], port=9043, - connect_timeout=10, protocol_version=PROTOCOL_VERSION) + cluster = TestCluster(connection_class=self.connection_class, contact_points=['localhost'], port=9043, + connect_timeout=10) self.assertRaisesRegexp(NoHostAvailable, '\(\'Unable to connect.*Tried connecting to \[\(.*\(.*\].*Last error', cluster.connect) diff --git a/tests/integration/long/test_large_data.py b/tests/integration/long/test_large_data.py index 071268d86c..ce7e4398da 100644 --- a/tests/integration/long/test_large_data.py +++ b/tests/integration/long/test_large_data.py @@ -21,10 +21,10 @@ import logging, sys, traceback, time from cassandra import ConsistencyLevel, OperationTimedOut, WriteTimeout -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.query import dict_factory from cassandra.query import SimpleStatement -from tests.integration import use_singledc, PROTOCOL_VERSION +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster from tests.integration.long.utils import create_schema try: @@ -61,9 +61,9 @@ def setUp(self): self.keyspace = 'large_data' def make_session_and_keyspace(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(request_timeout=20, - row_factory=dict_factory)}) + cluster = TestCluster(execution_profiles={ + EXEC_PROFILE_DEFAULT: ExecutionProfile(request_timeout=20, row_factory=dict_factory) + }) session = cluster.connect() create_schema(cluster, session, self.keyspace) return session diff --git a/tests/integration/long/test_loadbalancingpolicies.py b/tests/integration/long/test_loadbalancingpolicies.py index 8a5b7fe4c8..c3bf911ed0 100644 --- a/tests/integration/long/test_loadbalancingpolicies.py +++ b/tests/integration/long/test_loadbalancingpolicies.py @@ -19,7 +19,7 @@ from cassandra import ConsistencyLevel, Unavailable, OperationTimedOut, ReadTimeout, ReadFailure, \ WriteTimeout, WriteFailure -from cassandra.cluster import Cluster, NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.concurrent import execute_concurrent_with_args from cassandra.metadata import murmur3 from cassandra.policies import ( @@ -29,7 +29,7 @@ ) from cassandra.query import SimpleStatement -from tests.integration import use_singledc, use_multidc, remove_cluster, PROTOCOL_VERSION +from tests.integration import use_singledc, use_multidc, remove_cluster, TestCluster from tests.integration.long.utils import (wait_for_up, create_schema, CoordinatorStats, force_stop, wait_for_down, decommission, start, @@ -62,8 +62,11 @@ def teardown_class(cls): def _connect_probe_cluster(self): if not self.probe_cluster: # distinct cluster so we can see the status of nodes ignored by the LBP being tested - self.probe_cluster = Cluster(schema_metadata_enabled=False, token_metadata_enabled=False, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy())}) + self.probe_cluster = TestCluster( + schema_metadata_enabled=False, + token_metadata_enabled=False, + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy())} + ) self.probe_session = self.probe_cluster.connect() def _wait_for_nodes_up(self, nodes, cluster=None): @@ -90,8 +93,8 @@ def _wait_for_nodes_down(self, nodes, cluster=None): def _cluster_session_with_lbp(self, lbp): # create a cluster with no delay on events - cluster = Cluster(protocol_version=PROTOCOL_VERSION, topology_event_refresh_window=0, status_event_refresh_window=0, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=lbp)}) + cluster = TestCluster(topology_event_refresh_window=0, status_event_refresh_window=0, + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=lbp)}) session = cluster.connect() return cluster, session @@ -180,7 +183,7 @@ def test_token_aware_is_used_by_default(self): @test_category load_balancing:token_aware """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() if murmur3 is not None: self.assertTrue(isinstance(cluster.profile_manager.default.load_balancing_policy, TokenAwarePolicy)) @@ -659,11 +662,14 @@ def test_white_list(self): use_singledc() keyspace = 'test_white_list' - cluster = Cluster(('127.0.0.2',), protocol_version=PROTOCOL_VERSION, - topology_event_refresh_window=0, status_event_refresh_window=0, - execution_profiles={EXEC_PROFILE_DEFAULT: - ExecutionProfile(load_balancing_policy= - WhiteListRoundRobinPolicy((IP_FORMAT % 2,)))}) + cluster = TestCluster( + contact_points=('127.0.0.2',), topology_event_refresh_window=0, status_event_refresh_window=0, + execution_profiles={ + EXEC_PROFILE_DEFAULT: ExecutionProfile( + load_balancing_policy=WhiteListRoundRobinPolicy((IP_FORMAT % 2,)) + ) + } + ) session = cluster.connect() self._wait_for_nodes_up([1, 2, 3]) @@ -709,9 +715,8 @@ def test_black_list_with_host_filter_policy(self): child_policy=RoundRobinPolicy(), predicate=lambda host: host.address != ignored_address ) - cluster = Cluster( - (IP_FORMAT % 1,), - protocol_version=PROTOCOL_VERSION, + cluster = TestCluster( + contact_points=(IP_FORMAT % 1,), topology_event_refresh_window=0, status_event_refresh_window=0, execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=hfp)} diff --git a/tests/integration/long/test_policies.py b/tests/integration/long/test_policies.py index d694476fb5..0648e6cc93 100644 --- a/tests/integration/long/test_policies.py +++ b/tests/integration/long/test_policies.py @@ -18,9 +18,9 @@ import unittest # noqa from cassandra import ConsistencyLevel, Unavailable -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT -from tests.integration import use_cluster, get_cluster, get_node +from tests.integration import use_cluster, get_cluster, get_node, TestCluster def setup_module(): @@ -47,7 +47,7 @@ def test_should_rethrow_on_unvailable_with_default_policy_if_cas(self): ep = ExecutionProfile(consistency_level=ConsistencyLevel.ALL, serial_consistency_level=ConsistencyLevel.SERIAL) - cluster = Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: ep}) + cluster = TestCluster(execution_profiles={EXEC_PROFILE_DEFAULT: ep}) session = cluster.connect() session.execute("CREATE KEYSPACE test_retry_policy_cas WITH replication = {'class':'SimpleStrategy','replication_factor': 3};") diff --git a/tests/integration/long/test_schema.py b/tests/integration/long/test_schema.py index 2ad854688d..e2945a117b 100644 --- a/tests/integration/long/test_schema.py +++ b/tests/integration/long/test_schema.py @@ -15,10 +15,9 @@ import logging from cassandra import ConsistencyLevel, AlreadyExists -from cassandra.cluster import Cluster from cassandra.query import SimpleStatement -from tests.integration import use_singledc, PROTOCOL_VERSION, execute_until_pass +from tests.integration import use_singledc, execute_until_pass, TestCluster import time @@ -38,7 +37,7 @@ class SchemaTests(unittest.TestCase): @classmethod def setup_class(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect(wait_for_all_pools=True) @classmethod @@ -99,7 +98,7 @@ def test_for_schema_disagreements_same_keyspace(self): Tests for any schema disagreements using the same keyspace multiple times """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect(wait_for_all_pools=True) for i in range(30): @@ -133,7 +132,7 @@ def test_for_schema_disagreement_attribute(self): @test_category schema """ # This should yield a schema disagreement - cluster = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=0.001) + cluster = TestCluster(max_schema_agreement_wait=0.001) session = cluster.connect(wait_for_all_pools=True) rs = session.execute("CREATE KEYSPACE test_schema_disagreement WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}") @@ -146,7 +145,7 @@ def test_for_schema_disagreement_attribute(self): cluster.shutdown() # These should have schema agreement - cluster = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=100) + cluster = TestCluster(max_schema_agreement_wait=100) session = cluster.connect() rs = session.execute("CREATE KEYSPACE test_schema_disagreement WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}") self.check_and_wait_for_agreement(session, rs, True) diff --git a/tests/integration/long/test_ssl.py b/tests/integration/long/test_ssl.py index 49cad63c68..7698849945 100644 --- a/tests/integration/long/test_ssl.py +++ b/tests/integration/long/test_ssl.py @@ -18,7 +18,7 @@ import unittest import os, sys, traceback, logging, ssl, time, math, uuid -from cassandra.cluster import Cluster, NoHostAvailable +from cassandra.cluster import NoHostAvailable from cassandra.connection import DefaultEndPoint from cassandra import ConsistencyLevel from cassandra.query import SimpleStatement @@ -26,7 +26,7 @@ from OpenSSL import SSL, crypto from tests.integration import ( - PROTOCOL_VERSION, get_cluster, remove_cluster, use_single_node, start_cluster_wait_for_up, EVENT_LOOP_MANAGER, + get_cluster, remove_cluster, use_single_node, start_cluster_wait_for_up, EVENT_LOOP_MANAGER, TestCluster ) if not hasattr(ssl, 'match_hostname'): @@ -103,9 +103,8 @@ def validate_ssl_options(**kwargs): if tries > 5: raise RuntimeError("Failed to connect to SSL cluster after 5 attempts") try: - cluster = Cluster( + cluster = TestCluster( contact_points=[DefaultEndPoint(hostname)], - protocol_version=PROTOCOL_VERSION, ssl_options=ssl_options, ssl_context=ssl_context ) @@ -185,7 +184,7 @@ def test_can_connect_with_ssl_long_running(self): if tries > 5: raise RuntimeError("Failed to connect to SSL cluster after 5 attempts") try: - cluster = Cluster(protocol_version=PROTOCOL_VERSION, ssl_options=ssl_options) + cluster = TestCluster(ssl_options=ssl_options) session = cluster.connect(wait_for_all_pools=True) break except Exception: @@ -291,8 +290,8 @@ def test_cannot_connect_without_client_auth(self): @test_category connection:ssl """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION, ssl_options={'ca_certs': CLIENT_CA_CERTS, - 'ssl_version': ssl_version}) + cluster = TestCluster(ssl_options={'ca_certs': CLIENT_CA_CERTS, + 'ssl_version': ssl_version}) with self.assertRaises(NoHostAvailable) as _: cluster.connect() @@ -320,10 +319,11 @@ def test_cannot_connect_with_bad_client_auth(self): # I don't set the bad certfile for pyopenssl because it hangs ssl_options['certfile'] = DRIVER_CERTFILE_BAD - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - ssl_options={'ca_certs': CLIENT_CA_CERTS, - 'ssl_version': ssl_version, - 'keyfile': DRIVER_KEYFILE}) + cluster = TestCluster( + ssl_options={'ca_certs': CLIENT_CA_CERTS, + 'ssl_version': ssl_version, + 'keyfile': DRIVER_KEYFILE} + ) with self.assertRaises(NoHostAvailable) as _: cluster.connect() @@ -364,7 +364,7 @@ def test_ssl_want_write_errors_are_retried(self): """ ssl_options = {'ca_certs': CLIENT_CA_CERTS, 'ssl_version': ssl_version} - cluster = Cluster(protocol_version=PROTOCOL_VERSION, ssl_options=ssl_options) + cluster = TestCluster(ssl_options=ssl_options) session = cluster.connect(wait_for_all_pools=True) try: session.execute('drop keyspace ssl_error_test') diff --git a/tests/integration/long/test_topology_change.py b/tests/integration/long/test_topology_change.py index 8800cd802b..5b12eef28c 100644 --- a/tests/integration/long/test_topology_change.py +++ b/tests/integration/long/test_topology_change.py @@ -1,8 +1,7 @@ from unittest import TestCase -from cassandra.cluster import Cluster from cassandra.policies import HostStateListener -from tests.integration import PROTOCOL_VERSION, get_node, use_cluster, local +from tests.integration import get_node, use_cluster, local, TestCluster from tests.integration.long.utils import decommission from tests.util import wait_until @@ -32,7 +31,7 @@ def test_removed_node_stops_reconnecting(self): use_cluster("test_down_then_removed", [3], start=True) state_listener = StateListener() - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() self.addCleanup(cluster.shutdown) cluster.register_listener(state_listener) session = cluster.connect(wait_for_all_pools=True) diff --git a/tests/integration/long/utils.py b/tests/integration/long/utils.py index 421e694a9a..a5b5bdd226 100644 --- a/tests/integration/long/utils.py +++ b/tests/integration/long/utils.py @@ -93,7 +93,7 @@ def force_stop(node): def decommission(node): - if (DSE_VERSION and DSE_VERSION >= Version("5.1")) or CASSANDRA_VERSION >= Version("4.0"): + if (DSE_VERSION and DSE_VERSION >= Version("5.1")) or CASSANDRA_VERSION >= Version("4.0-a"): # CASSANDRA-12510 get_node(node).decommission(force=True) else: diff --git a/tests/integration/standard/test_authentication.py b/tests/integration/standard/test_authentication.py index 4c32e9de3d..9755c5098b 100644 --- a/tests/integration/standard/test_authentication.py +++ b/tests/integration/standard/test_authentication.py @@ -15,11 +15,11 @@ import logging import time -from cassandra.cluster import Cluster, NoHostAvailable +from cassandra.cluster import NoHostAvailable from cassandra.auth import PlainTextAuthProvider, SASLClient, SaslAuthProvider from tests.integration import use_singledc, get_cluster, remove_cluster, PROTOCOL_VERSION, CASSANDRA_IP, \ - set_default_cass_ip, USE_CASS_EXTERNAL, start_cluster_wait_for_up + USE_CASS_EXTERNAL, start_cluster_wait_for_up, TestCluster from tests.integration.util import assert_quiescent_pool_state try: @@ -44,8 +44,6 @@ def setup_module(): ccm_cluster.set_configuration_options(config_options) log.debug("Starting ccm test cluster with %s", config_options) start_cluster_wait_for_up(ccm_cluster) - else: - set_default_cass_ip() def teardown_module(): @@ -77,14 +75,12 @@ def cluster_as(self, usr, pwd): # to ensure the role manager is setup for _ in range(5): try: - cluster = Cluster( - protocol_version=PROTOCOL_VERSION, + cluster = TestCluster( idle_heartbeat_interval=0, auth_provider=self.get_authentication_provider(username='cassandra', password='cassandra')) cluster.connect(wait_for_all_pools=True) - return Cluster( - protocol_version=PROTOCOL_VERSION, + return TestCluster( idle_heartbeat_interval=0, auth_provider=self.get_authentication_provider(username=usr, password=pwd)) except Exception as e: @@ -147,7 +143,7 @@ def test_connect_empty_pwd(self): cluster.shutdown() def test_connect_no_auth_provider(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() try: self.assertRaisesRegexp(NoHostAvailable, '.*AuthenticationFailed.*', diff --git a/tests/integration/standard/test_authentication_misconfiguration.py b/tests/integration/standard/test_authentication_misconfiguration.py index caac84dd25..546141d801 100644 --- a/tests/integration/standard/test_authentication_misconfiguration.py +++ b/tests/integration/standard/test_authentication_misconfiguration.py @@ -14,8 +14,7 @@ import unittest -from cassandra.cluster import Cluster -from tests.integration import CASSANDRA_IP, USE_CASS_EXTERNAL, use_cluster, PROTOCOL_VERSION +from tests.integration import USE_CASS_EXTERNAL, use_cluster, TestCluster class MisconfiguredAuthenticationTests(unittest.TestCase): @@ -34,7 +33,7 @@ def setUpClass(cls): cls.ccm_cluster = ccm_cluster def test_connect_no_auth_provider(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION, contact_points=[CASSANDRA_IP]) + cluster = TestCluster() cluster.connect() cluster.refresh_nodes() down_hosts = [host for host in cluster.metadata.all_hosts() if not host.is_up] diff --git a/tests/integration/standard/test_client_warnings.py b/tests/integration/standard/test_client_warnings.py index 1092af7776..c5ce5dc726 100644 --- a/tests/integration/standard/test_client_warnings.py +++ b/tests/integration/standard/test_client_warnings.py @@ -19,9 +19,8 @@ import unittest from cassandra.query import BatchStatement -from cassandra.cluster import Cluster -from tests.integration import use_singledc, PROTOCOL_VERSION, local +from tests.integration import use_singledc, PROTOCOL_VERSION, local, TestCluster def setup_module(): @@ -35,7 +34,7 @@ def setUpClass(cls): if PROTOCOL_VERSION < 4: return - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() cls.session.execute("CREATE TABLE IF NOT EXISTS test1rf.client_warning (k int, v0 int, v1 int, PRIMARY KEY (k, v0))") diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index 2314931b7d..cdb6f1f3b7 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -27,7 +27,7 @@ from packaging.version import Version import cassandra -from cassandra.cluster import Cluster, NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT, ControlConnection +from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT, ControlConnection, Cluster from cassandra.concurrent import execute_concurrent from cassandra.policies import (RoundRobinPolicy, ExponentialReconnectionPolicy, RetryPolicy, SimpleConvictionPolicy, HostDistance, @@ -40,10 +40,10 @@ from cassandra.connection import DefaultEndPoint from tests import notwindows -from tests.integration import use_singledc, PROTOCOL_VERSION, get_server_versions, CASSANDRA_VERSION, \ +from tests.integration import use_singledc, get_server_versions, CASSANDRA_VERSION, \ execute_until_pass, execute_with_long_wait_retry, get_node, MockLoggingHandler, get_unsupported_lower_protocol, \ get_unsupported_upper_protocol, protocolv5, local, CASSANDRA_IP, greaterthanorequalcass30, lessthanorequalcass40, \ - DSE_VERSION + DSE_VERSION, TestCluster, PROTOCOL_VERSION from tests.integration.util import assert_quiescent_pool_state import sys @@ -81,8 +81,9 @@ def test_ignored_host_up(self): @test_category connection """ ignored_host_policy = IgnoredHostPolicy(["127.0.0.2", "127.0.0.3"]) - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=ignored_host_policy)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=ignored_host_policy)} + ) cluster.connect() for host in cluster.metadata.all_hosts(): if str(host) == "127.0.0.1:9042": @@ -102,7 +103,7 @@ def test_host_resolution(self): @test_category connection """ - cluster = Cluster(contact_points=["localhost"], protocol_version=PROTOCOL_VERSION, connect_timeout=1) + cluster = TestCluster(contact_points=["localhost"], connect_timeout=1) self.assertTrue(DefaultEndPoint('127.0.0.1') in cluster.endpoints_resolved) @local @@ -116,11 +117,14 @@ def test_host_duplication(self): @test_category connection """ - cluster = Cluster(contact_points=["localhost", "127.0.0.1", "localhost", "localhost", "localhost"], protocol_version=PROTOCOL_VERSION, connect_timeout=1) + cluster = TestCluster( + contact_points=["localhost", "127.0.0.1", "localhost", "localhost", "localhost"], + connect_timeout=1 + ) cluster.connect(wait_for_all_pools=True) self.assertEqual(len(cluster.metadata.all_hosts()), 3) cluster.shutdown() - cluster = Cluster(contact_points=["127.0.0.1", "localhost"], protocol_version=PROTOCOL_VERSION, connect_timeout=1) + cluster = TestCluster(contact_points=["127.0.0.1", "localhost"], connect_timeout=1) cluster.connect(wait_for_all_pools=True) self.assertEqual(len(cluster.metadata.all_hosts()), 3) cluster.shutdown() @@ -144,7 +148,7 @@ def test_raise_error_on_control_connection_timeout(self): """ get_node(1).pause() - cluster = Cluster(contact_points=['127.0.0.1'], protocol_version=PROTOCOL_VERSION, connect_timeout=1) + cluster = TestCluster(contact_points=['127.0.0.1'], connect_timeout=1) with self.assertRaisesRegexp(NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"): cluster.connect() @@ -157,7 +161,7 @@ def test_basic(self): Test basic connection and usage """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() result = execute_until_pass(session, """ @@ -213,20 +217,19 @@ def cleanup(): self.addCleanup(cleanup) # Test with empty list - self.cluster_to_shutdown = Cluster([], protocol_version=PROTOCOL_VERSION) + self.cluster_to_shutdown = TestCluster(contact_points=[]) with self.assertRaises(NoHostAvailable): self.cluster_to_shutdown.connect() self.cluster_to_shutdown.shutdown() # Test with only invalid - self.cluster_to_shutdown = Cluster(('1.2.3.4',), protocol_version=PROTOCOL_VERSION) + self.cluster_to_shutdown = TestCluster(contact_points=('1.2.3.4',)) with self.assertRaises(NoHostAvailable): self.cluster_to_shutdown.connect() self.cluster_to_shutdown.shutdown() # Test with valid and invalid hosts - self.cluster_to_shutdown = Cluster(("127.0.0.1", "127.0.0.2", "1.2.3.4"), - protocol_version=PROTOCOL_VERSION) + self.cluster_to_shutdown = TestCluster(contact_points=("127.0.0.1", "127.0.0.2", "1.2.3.4")) self.cluster_to_shutdown.connect() self.cluster_to_shutdown.shutdown() @@ -298,7 +301,7 @@ def test_invalid_protocol_negotation(self): upper_bound = get_unsupported_upper_protocol() log.debug('got upper_bound of {}'.format(upper_bound)) if upper_bound is not None: - cluster = Cluster(protocol_version=upper_bound) + cluster = TestCluster(protocol_version=upper_bound) with self.assertRaises(NoHostAvailable): cluster.connect() cluster.shutdown() @@ -306,7 +309,7 @@ def test_invalid_protocol_negotation(self): lower_bound = get_unsupported_lower_protocol() log.debug('got lower_bound of {}'.format(lower_bound)) if lower_bound is not None: - cluster = Cluster(protocol_version=lower_bound) + cluster = TestCluster(protocol_version=lower_bound) with self.assertRaises(NoHostAvailable): cluster.connect() cluster.shutdown() @@ -316,7 +319,7 @@ def test_connect_on_keyspace(self): Ensure clusters that connect on a keyspace, do """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() result = session.execute( """ @@ -334,7 +337,7 @@ def test_connect_on_keyspace(self): cluster.shutdown() def test_set_keyspace_twice(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() session.execute("USE system") session.execute("USE system") @@ -345,7 +348,7 @@ def test_default_connections(self): Ensure errors are not thrown when using non-default policies """ - Cluster( + TestCluster( reconnection_policy=ExponentialReconnectionPolicy(1.0, 600.0), conviction_policy_factory=SimpleConvictionPolicy, protocol_version=PROTOCOL_VERSION @@ -355,7 +358,7 @@ def test_connect_to_already_shutdown_cluster(self): """ Ensure you cannot connect to a cluster that's been shutdown """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() cluster.shutdown() self.assertRaises(Exception, cluster.connect) @@ -364,7 +367,7 @@ def test_auth_provider_is_callable(self): Ensure that auth_providers are always callable """ self.assertRaises(TypeError, Cluster, auth_provider=1, protocol_version=1) - c = Cluster(protocol_version=1) + c = TestCluster(protocol_version=1) self.assertRaises(TypeError, setattr, c, 'auth_provider', 1) def test_v2_auth_provider(self): @@ -373,7 +376,7 @@ def test_v2_auth_provider(self): """ bad_auth_provider = lambda x: {'username': 'foo', 'password': 'bar'} self.assertRaises(TypeError, Cluster, auth_provider=bad_auth_provider, protocol_version=2) - c = Cluster(protocol_version=2) + c = TestCluster(protocol_version=2) self.assertRaises(TypeError, setattr, c, 'auth_provider', bad_auth_provider) def test_conviction_policy_factory_is_callable(self): @@ -389,8 +392,8 @@ def test_connect_to_bad_hosts(self): when a cluster cannot connect to given hosts """ - cluster = Cluster(['127.1.2.9', '127.1.2.10'], - protocol_version=PROTOCOL_VERSION) + cluster = TestCluster(contact_points=['127.1.2.9', '127.1.2.10'], + protocol_version=PROTOCOL_VERSION) self.assertRaises(NoHostAvailable, cluster.connect) def test_cluster_settings(self): @@ -400,7 +403,7 @@ def test_cluster_settings(self): if PROTOCOL_VERSION >= 3: raise unittest.SkipTest("min/max requests and core/max conns aren't used with v3 protocol") - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() min_requests_per_connection = cluster.get_min_requests_per_connection(HostDistance.LOCAL) self.assertEqual(cassandra.cluster.DEFAULT_MIN_REQUESTS, min_requests_per_connection) @@ -423,7 +426,7 @@ def test_cluster_settings(self): self.assertEqual(cluster.get_max_connections_per_host(HostDistance.LOCAL), max_connections_per_host + 1) def test_refresh_schema(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() original_meta = cluster.metadata.keyspaces @@ -435,7 +438,7 @@ def test_refresh_schema(self): cluster.shutdown() def test_refresh_schema_keyspace(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() original_meta = cluster.metadata.keyspaces @@ -451,7 +454,7 @@ def test_refresh_schema_keyspace(self): cluster.shutdown() def test_refresh_schema_table(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() original_meta = cluster.metadata.keyspaces @@ -477,7 +480,7 @@ def test_refresh_schema_type(self): raise unittest.SkipTest('UDTs are not specified in change events for protocol v2') # We may want to refresh types on keyspace change events in that case(?) - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() keyspace_name = 'test1rf' @@ -516,7 +519,7 @@ def patched_wait_for_responses(*args, **kwargs): agreement_timeout = 1 # cluster agreement wait exceeded - c = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=agreement_timeout) + c = TestCluster(max_schema_agreement_wait=agreement_timeout) c.connect() self.assertTrue(c.metadata.keyspaces) @@ -541,7 +544,7 @@ def patched_wait_for_responses(*args, **kwargs): refresh_threshold = 0.5 # cluster agreement bypass - c = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=0) + c = TestCluster(max_schema_agreement_wait=0) start_time = time.time() s = c.connect() end_time = time.time() @@ -572,7 +575,7 @@ def test_trace(self): Ensure trace can be requested for async and non-async queries """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() result = session.execute( "SELECT * FROM system.local", trace=True) @@ -618,7 +621,7 @@ def test_trace_unavailable(self): @test_category query """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() self.addCleanup(cluster.shutdown) session = cluster.connect() @@ -660,7 +663,7 @@ def test_one_returns_none(self): @test_category query """ - with Cluster() as cluster: + with TestCluster() as cluster: session = cluster.connect() self.assertIsNone(session.execute("SELECT * from system.local WHERE key='madeup_key'").one()) @@ -669,7 +672,7 @@ def test_string_coverage(self): Ensure str(future) returns without error """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() query = "SELECT * FROM system.local" @@ -726,7 +729,7 @@ def test_can_connect_with_sslauth(self): def _warning_are_issued_when_auth(self, auth_provider): with MockLoggingHandler().set_module_name(connection.__name__) as mock_handler: - with Cluster(auth_provider=auth_provider) as cluster: + with TestCluster(auth_provider=auth_provider) as cluster: session = cluster.connect() self.assertIsNotNone(session.execute("SELECT * from system.local")) @@ -740,8 +743,8 @@ def _warning_are_issued_when_auth(self, auth_provider): def test_idle_heartbeat(self): interval = 2 - cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=interval, - monitor_reporting_enabled=False) + cluster = TestCluster(idle_heartbeat_interval=interval, + monitor_reporting_enabled=False) if PROTOCOL_VERSION < 3: cluster.set_core_connections_per_host(HostDistance.LOCAL, 1) session = cluster.connect(wait_for_all_pools=True) @@ -803,7 +806,7 @@ def test_idle_heartbeat_disabled(self): self.assertTrue(Cluster.idle_heartbeat_interval) # heartbeat disabled with '0' - cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=0) + cluster = TestCluster(idle_heartbeat_interval=0) self.assertEqual(cluster.idle_heartbeat_interval, 0) session = cluster.connect() @@ -819,7 +822,7 @@ def test_idle_heartbeat_disabled(self): def test_pool_management(self): # Ensure that in_flight and request_ids quiesce after cluster operations - cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=0) # no idle heartbeat here, pool management is tested in test_idle_heartbeat + cluster = TestCluster(idle_heartbeat_interval=0) # no idle heartbeat here, pool management is tested in test_idle_heartbeat session = cluster.connect() session2 = cluster.connect() @@ -863,7 +866,7 @@ def test_profile_load_balancing(self): RoundRobinPolicy(), lambda host: host.address == CASSANDRA_IP ) ) - with Cluster(execution_profiles={'node1': node1}, monitor_reporting_enabled=False) as cluster: + with TestCluster(execution_profiles={'node1': node1}, monitor_reporting_enabled=False) as cluster: session = cluster.connect(wait_for_all_pools=True) # default is DCA RR for all hosts @@ -904,7 +907,7 @@ def test_profile_load_balancing(self): self.assertTrue(session.execute(query, execution_profile='node1')[0].release_version) def test_setting_lbp_legacy(self): - cluster = Cluster() + cluster = TestCluster() self.addCleanup(cluster.shutdown) cluster.load_balancing_policy = RoundRobinPolicy() self.assertEqual( @@ -932,7 +935,7 @@ def test_profile_lb_swap(self): rr1 = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) rr2 = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) exec_profiles = {'rr1': rr1, 'rr2': rr2} - with Cluster(execution_profiles=exec_profiles) as cluster: + with TestCluster(execution_profiles=exec_profiles) as cluster: session = cluster.connect(wait_for_all_pools=True) # default is DCA RR for all hosts @@ -959,7 +962,7 @@ def test_ta_lbp(self): """ query = "select release_version from system.local" ta1 = ExecutionProfile() - with Cluster() as cluster: + with TestCluster() as cluster: session = cluster.connect() cluster.add_execution_profile("ta1", ta1) rs = session.execute(query, execution_profile='ta1') @@ -980,7 +983,7 @@ def test_clone_shared_lbp(self): query = "select release_version from system.local" rr1 = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) exec_profiles = {'rr1': rr1} - with Cluster(execution_profiles=exec_profiles) as cluster: + with TestCluster(execution_profiles=exec_profiles) as cluster: session = cluster.connect(wait_for_all_pools=True) self.assertGreater(len(cluster.metadata.all_hosts()), 1, "We only have one host connected at this point") @@ -1008,7 +1011,7 @@ def test_missing_exec_prof(self): rr1 = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) rr2 = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) exec_profiles = {'rr1': rr1, 'rr2': rr2} - with Cluster(execution_profiles=exec_profiles) as cluster: + with TestCluster(execution_profiles=exec_profiles) as cluster: session = cluster.connect() with self.assertRaises(ValueError): session.execute(query, execution_profile='rr3') @@ -1035,7 +1038,7 @@ def test_profile_pool_management(self): RoundRobinPolicy(), lambda host: host.address == "127.0.0.2" ) ) - with Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: node1, 'node2': node2}) as cluster: + with TestCluster(execution_profiles={EXEC_PROFILE_DEFAULT: node1, 'node2': node2}) as cluster: session = cluster.connect(wait_for_all_pools=True) pools = session.get_pool_state() # there are more hosts, but we connected to the ones in the lbp aggregate @@ -1070,7 +1073,7 @@ def test_add_profile_timeout(self): RoundRobinPolicy(), lambda host: host.address == "127.0.0.1" ) ) - with Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: node1}) as cluster: + with TestCluster(execution_profiles={EXEC_PROFILE_DEFAULT: node1}) as cluster: session = cluster.connect(wait_for_all_pools=True) pools = session.get_pool_state() self.assertGreater(len(cluster.metadata.all_hosts()), 2) @@ -1096,7 +1099,7 @@ def test_add_profile_timeout(self): @notwindows def test_execute_query_timeout(self): - with Cluster() as cluster: + with TestCluster() as cluster: session = cluster.connect(wait_for_all_pools=True) query = "SELECT * FROM system.local" @@ -1142,8 +1145,7 @@ def test_replicas_are_queried(self): tap_profile = ExecutionProfile( load_balancing_policy=TokenAwarePolicy(RoundRobinPolicy()) ) - with Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: tap_profile}) as cluster: + with TestCluster(execution_profiles={EXEC_PROFILE_DEFAULT: tap_profile}) as cluster: session = cluster.connect(wait_for_all_pools=True) session.execute(''' CREATE TABLE test1rf.table_with_big_key ( @@ -1168,9 +1170,8 @@ def test_replicas_are_queried(self): log = logging.getLogger(__name__) log.info("The only replica found was: {}".format(only_replica)) available_hosts = [host for host in ["127.0.0.1", "127.0.0.2", "127.0.0.3"] if host != only_replica] - with Cluster(contact_points=available_hosts, - protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: hfp_profile}) as cluster: + with TestCluster(contact_points=available_hosts, + execution_profiles={EXEC_PROFILE_DEFAULT: hfp_profile}) as cluster: session = cluster.connect(wait_for_all_pools=True) prepared = session.prepare("""SELECT * from test1rf.table_with_big_key @@ -1196,10 +1197,10 @@ def test_compact_option(self): @test_category connection """ - nc_cluster = Cluster(protocol_version=PROTOCOL_VERSION, no_compact=True) + nc_cluster = TestCluster(no_compact=True) nc_session = nc_cluster.connect() - cluster = Cluster(protocol_version=PROTOCOL_VERSION, no_compact=False) + cluster = TestCluster(no_compact=False) session = cluster.connect() self.addCleanup(cluster.shutdown) @@ -1284,7 +1285,7 @@ def test_address_translator_basic(self): @test_category metadata """ lh_ad = LocalHostAdressTranslator({'127.0.0.1': '127.0.0.1', '127.0.0.2': '127.0.0.1', '127.0.0.3': '127.0.0.1'}) - c = Cluster(address_translator=lh_ad) + c = TestCluster(address_translator=lh_ad) c.connect() self.assertEqual(len(c.metadata.all_hosts()), 1) c.shutdown() @@ -1304,7 +1305,7 @@ def test_address_translator_with_mixed_nodes(self): """ adder_map = {'127.0.0.1': '127.0.0.1', '127.0.0.2': '127.0.0.3', '127.0.0.3': '127.0.0.2'} lh_ad = LocalHostAdressTranslator(adder_map) - c = Cluster(address_translator=lh_ad) + c = TestCluster(address_translator=lh_ad) c.connect() for host in c.metadata.all_hosts(): self.assertEqual(adder_map.get(host.address), host.broadcast_address) @@ -1330,7 +1331,7 @@ def test_no_connect(self): @test_category configuration """ - with Cluster() as cluster: + with TestCluster() as cluster: self.assertFalse(cluster.is_shutdown) self.assertTrue(cluster.is_shutdown) @@ -1344,7 +1345,7 @@ def test_simple_nested(self): @test_category configuration """ - with Cluster(**self.cluster_kwargs) as cluster: + with TestCluster(**self.cluster_kwargs) as cluster: with cluster.connect() as session: self.assertFalse(cluster.is_shutdown) self.assertFalse(session.is_shutdown) @@ -1362,7 +1363,7 @@ def test_cluster_no_session(self): @test_category configuration """ - with Cluster(**self.cluster_kwargs) as cluster: + with TestCluster(**self.cluster_kwargs) as cluster: session = cluster.connect() self.assertFalse(cluster.is_shutdown) self.assertFalse(session.is_shutdown) @@ -1380,7 +1381,7 @@ def test_session_no_cluster(self): @test_category configuration """ - cluster = Cluster(**self.cluster_kwargs) + cluster = TestCluster(**self.cluster_kwargs) unmanaged_session = cluster.connect() with cluster.connect() as session: self.assertFalse(cluster.is_shutdown) @@ -1411,7 +1412,7 @@ def test_down_event_with_active_connection(self): @test_category connection """ - with Cluster(protocol_version=PROTOCOL_VERSION) as cluster: + with TestCluster() as cluster: session = cluster.connect(wait_for_all_pools=True) random_host = cluster.metadata.all_hosts()[0] cluster.on_down(random_host, False) @@ -1440,8 +1441,9 @@ class DontPrepareOnIgnoredHostsTest(unittest.TestCase): def test_prepare_on_ignored_hosts(self): - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=self.ignore_node_3_policy)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=self.ignore_node_3_policy)} + ) session = cluster.connect() cluster.reprepare_on_up, cluster.prepare_on_all_hosts = True, False @@ -1486,7 +1488,7 @@ def test_invalid_protocol_version_beta_option(self): @test_category connection """ - cluster = Cluster(protocol_version=cassandra.ProtocolVersion.MAX_SUPPORTED, allow_beta_protocol_version=False) + cluster = TestCluster(protocol_version=cassandra.ProtocolVersion.V5, allow_beta_protocol_version=False) try: with self.assertRaises(NoHostAvailable): cluster.connect() @@ -1504,9 +1506,9 @@ def test_valid_protocol_version_beta_options_connect(self): @test_category connection """ - cluster = Cluster(protocol_version=cassandra.ProtocolVersion.MAX_SUPPORTED, allow_beta_protocol_version=True) + cluster = Cluster(protocol_version=cassandra.ProtocolVersion.V5, allow_beta_protocol_version=True) session = cluster.connect() - self.assertEqual(cluster.protocol_version, cassandra.ProtocolVersion.MAX_SUPPORTED) + self.assertEqual(cluster.protocol_version, cassandra.ProtocolVersion.V5) self.assertTrue(session.execute("select release_version from system.local")[0]) cluster.shutdown() @@ -1524,7 +1526,7 @@ def test_deprecation_warnings_legacy_parameters(self): @test_category logs """ with warnings.catch_warnings(record=True) as w: - Cluster(load_balancing_policy=RoundRobinPolicy()) + TestCluster(load_balancing_policy=RoundRobinPolicy()) self.assertEqual(len(w), 1) self.assertIn("Legacy execution parameters will be removed in 4.0. Consider using execution profiles.", str(w[0].message)) @@ -1541,7 +1543,7 @@ def test_deprecation_warnings_meta_refreshed(self): @test_category logs """ with warnings.catch_warnings(record=True) as w: - cluster = Cluster() + cluster = TestCluster() cluster.set_meta_refresh_enabled(True) self.assertEqual(len(w), 1) self.assertIn("Cluster.set_meta_refresh_enabled is deprecated and will be removed in 4.0.", @@ -1559,7 +1561,7 @@ def test_deprecation_warning_default_consistency_level(self): @test_category logs """ with warnings.catch_warnings(record=True) as w: - cluster = Cluster() + cluster = TestCluster() session = cluster.connect() session.default_consistency_level = ConsistencyLevel.ONE self.assertEqual(len(w), 1) diff --git a/tests/integration/standard/test_concurrent.py b/tests/integration/standard/test_concurrent.py index 954e5f28f4..8bd65c7f6f 100644 --- a/tests/integration/standard/test_concurrent.py +++ b/tests/integration/standard/test_concurrent.py @@ -17,12 +17,12 @@ from cassandra import InvalidRequest, ConsistencyLevel, ReadTimeout, WriteTimeout, OperationTimedOut, \ ReadFailure, WriteFailure -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.concurrent import execute_concurrent, execute_concurrent_with_args, ExecutionResult from cassandra.policies import HostDistance from cassandra.query import tuple_factory, SimpleStatement -from tests.integration import use_singledc, PROTOCOL_VERSION +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster from six import next @@ -42,8 +42,7 @@ class ClusterTests(unittest.TestCase): @classmethod def setUpClass(cls): - cls.cluster = Cluster( - protocol_version=PROTOCOL_VERSION, + cls.cluster = TestCluster( execution_profiles = { EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory) } diff --git a/tests/integration/standard/test_connection.py b/tests/integration/standard/test_connection.py index 4af48a562c..aaa5a27dfd 100644 --- a/tests/integration/standard/test_connection.py +++ b/tests/integration/standard/test_connection.py @@ -28,7 +28,7 @@ from unittest import SkipTest from cassandra import ConsistencyLevel, OperationTimedOut -from cassandra.cluster import NoHostAvailable, ConnectionShutdown, Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import NoHostAvailable, ConnectionShutdown, ExecutionProfile, EXEC_PROFILE_DEFAULT import cassandra.io.asyncorereactor from cassandra.io.asyncorereactor import AsyncoreConnection from cassandra.protocol import QueryMessage @@ -37,8 +37,9 @@ from cassandra.pool import HostConnectionPool from tests import is_monkey_patched -from tests.integration import use_singledc, PROTOCOL_VERSION, get_node, CASSANDRA_IP, local, \ - requiresmallclockgranularity, greaterthancass20 +from tests.integration import use_singledc, get_node, CASSANDRA_IP, local, \ + requiresmallclockgranularity, greaterthancass20, TestCluster + try: from cassandra.io.libevreactor import LibevConnection import cassandra.io.libevreactor @@ -56,15 +57,13 @@ def setup_module(): class ConnectionTimeoutTest(unittest.TestCase): def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles= - {EXEC_PROFILE_DEFAULT: ExecutionProfile( - load_balancing_policy=HostFilterPolicy( - RoundRobinPolicy(), predicate=lambda host: host.address == CASSANDRA_IP - ) - ) - } - ) + self.cluster = TestCluster(execution_profiles={ + EXEC_PROFILE_DEFAULT: ExecutionProfile( + load_balancing_policy=HostFilterPolicy( + RoundRobinPolicy(), predicate=lambda host: host.address == CASSANDRA_IP + ) + ) + }) self.session = self.cluster.connect() @@ -118,7 +117,7 @@ class HeartbeatTest(unittest.TestCase): """ def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=1) + self.cluster = TestCluster(idle_heartbeat_interval=1) self.session = self.cluster.connect(wait_for_all_pools=True) def tearDown(self): @@ -217,7 +216,12 @@ def get_connection(self, timeout=5): for i in range(5): try: contact_point = CASSANDRA_IP - conn = self.klass.factory(endpoint=contact_point, timeout=timeout, protocol_version=PROTOCOL_VERSION) + conn = self.klass.factory( + endpoint=contact_point, + timeout=timeout, + protocol_version=TestCluster.DEFAULT_PROTOCOL_VERSION, + allow_beta_protocol_version=TestCluster.DEFAULT_ALLOW_BETA + ) break except (OperationTimedOut, NoHostAvailable, ConnectionShutdown) as e: continue @@ -412,10 +416,10 @@ class C1(self.klass): class C2(self.klass): pass - clusterC1 = Cluster(connection_class=C1) + clusterC1 = TestCluster(connection_class=C1) clusterC1.connect(wait_for_all_pools=True) - clusterC2 = Cluster(connection_class=C2) + clusterC2 = TestCluster(connection_class=C2) clusterC2.connect(wait_for_all_pools=True) self.addCleanup(clusterC1.shutdown) self.addCleanup(clusterC2.shutdown) diff --git a/tests/integration/standard/test_control_connection.py b/tests/integration/standard/test_control_connection.py index b91d29c4e6..fe02de43da 100644 --- a/tests/integration/standard/test_control_connection.py +++ b/tests/integration/standard/test_control_connection.py @@ -22,9 +22,8 @@ import unittest # noqa -from cassandra.cluster import Cluster from cassandra.protocol import ConfigurationException -from tests.integration import use_singledc, PROTOCOL_VERSION +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster from tests.integration.datatype_utils import update_datatypes @@ -39,7 +38,7 @@ def setUp(self): raise unittest.SkipTest( "Native protocol 3,0+ is required for UDTs using %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() def tearDown(self): try: diff --git a/tests/integration/standard/test_custom_cluster.py b/tests/integration/standard/test_custom_cluster.py index 1943557ee4..84e0737086 100644 --- a/tests/integration/standard/test_custom_cluster.py +++ b/tests/integration/standard/test_custom_cluster.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cassandra.cluster import Cluster, NoHostAvailable -from tests.integration import use_singledc, get_cluster, remove_cluster, local +from cassandra.cluster import NoHostAvailable +from tests.integration import use_singledc, get_cluster, remove_cluster, local, TestCluster from tests.util import wait_until, wait_until_not_raised try: @@ -31,9 +31,9 @@ def setup_module(): # can't use wait_for_binary_proto cause ccm tries on port 9042 ccm_cluster.start(wait_for_binary_proto=False) # wait until all nodes are up - wait_until_not_raised(lambda: Cluster(['127.0.0.1'], port=9046).connect().shutdown(), 1, 20) - wait_until_not_raised(lambda: Cluster(['127.0.0.2'], port=9046).connect().shutdown(), 1, 20) - wait_until_not_raised(lambda: Cluster(['127.0.0.3'], port=9046).connect().shutdown(), 1, 20) + wait_until_not_raised(lambda: TestCluster(contact_points=['127.0.0.1'], port=9046).connect().shutdown(), 1, 20) + wait_until_not_raised(lambda: TestCluster(contact_points=['127.0.0.2'], port=9046).connect().shutdown(), 1, 20) + wait_until_not_raised(lambda: TestCluster(contact_points=['127.0.0.3'], port=9046).connect().shutdown(), 1, 20) def teardown_module(): @@ -50,11 +50,11 @@ def test_connection_honor_cluster_port(self): All hosts should be marked as up and we should be able to execute queries on it. """ - cluster = Cluster() + cluster = TestCluster() with self.assertRaises(NoHostAvailable): cluster.connect() # should fail on port 9042 - cluster = Cluster(port=9046) + cluster = TestCluster(port=9046) session = cluster.connect(wait_for_all_pools=True) wait_until(lambda: len(cluster.metadata.all_hosts()) == 3, 1, 5) diff --git a/tests/integration/standard/test_custom_payload.py b/tests/integration/standard/test_custom_payload.py index c68e9ef843..9906a8243e 100644 --- a/tests/integration/standard/test_custom_payload.py +++ b/tests/integration/standard/test_custom_payload.py @@ -21,9 +21,9 @@ import six from cassandra.query import (SimpleStatement, BatchStatement, BatchType) -from cassandra.cluster import Cluster -from tests.integration import use_singledc, PROTOCOL_VERSION, local +from tests.integration import use_singledc, PROTOCOL_VERSION, local, TestCluster + def setup_module(): use_singledc() @@ -38,7 +38,7 @@ def setUp(self): raise unittest.SkipTest( "Native protocol 4,0+ is required for custom payloads, currently using %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() self.session = self.cluster.connect() def tearDown(self): diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py index 2ab847677e..d5108ed47b 100644 --- a/tests/integration/standard/test_custom_protocol_handler.py +++ b/tests/integration/standard/test_custom_protocol_handler.py @@ -19,13 +19,13 @@ from cassandra.protocol import ProtocolHandler, ResultMessage, QueryMessage, UUIDType, read_int from cassandra.query import tuple_factory, SimpleStatement -from cassandra.cluster import (Cluster, ResponseFuture, ExecutionProfile, EXEC_PROFILE_DEFAULT, +from cassandra.cluster import (ResponseFuture, ExecutionProfile, EXEC_PROFILE_DEFAULT, ContinuousPagingOptions, NoHostAvailable) from cassandra import ProtocolVersion, ConsistencyLevel -from tests.integration import use_singledc, PROTOCOL_VERSION, drop_keyspace_shutdown_cluster, \ +from tests.integration import use_singledc, drop_keyspace_shutdown_cluster, \ greaterthanorequalcass30, execute_with_long_wait_retry, greaterthanorequaldse51, greaterthanorequalcass3_10, \ - greaterthanorequalcass31 + greaterthanorequalcass31, TestCluster from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params from six import binary_type @@ -43,7 +43,7 @@ class CustomProtocolHandlerTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() cls.session.execute("CREATE KEYSPACE custserdes WITH replication = { 'class' : 'SimpleStrategy', 'replication_factor': '1'}") cls.session.set_keyspace("custserdes") @@ -68,8 +68,9 @@ def test_custom_raw_uuid_row_results(self): """ # Ensure that we get normal uuid back first - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)} + ) session = cluster.connect(keyspace="custserdes") result = session.execute("SELECT schema_version FROM system.local") @@ -105,8 +106,9 @@ def test_custom_raw_row_results_all_types(self): @test_category data_types:serialization """ # Connect using a custom protocol handler that tracks the various types the result message is used with. - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)} + ) session = cluster.connect(keyspace="custserdes") session.client_protocol_handler = CustomProtocolHandlerResultMessageTracked @@ -133,7 +135,7 @@ def test_protocol_divergence_v5_fail_by_continuous_paging(self): @test_category connection """ - cluster = Cluster(protocol_version=ProtocolVersion.V5, allow_beta_protocol_version=True) + cluster = TestCluster(protocol_version=ProtocolVersion.V5, allow_beta_protocol_version=True) session = cluster.connect() max_pages = 4 @@ -228,7 +230,7 @@ def _send_query_message(self, session, timeout, **kwargs): return future def _protocol_divergence_fail_by_flag_uses_int(self, version, uses_int_query_flag, int_flag = True, beta=False): - cluster = Cluster(protocol_version=version, allow_beta_protocol_version=beta) + cluster = TestCluster(protocol_version=version, allow_beta_protocol_version=beta) session = cluster.connect() query_one = SimpleStatement("INSERT INTO test3rf.test (k, v) VALUES (1, 1)") diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py index c6be3760fb..4e45553be2 100644 --- a/tests/integration/standard/test_cython_protocol_handlers.py +++ b/tests/integration/standard/test_cython_protocol_handlers.py @@ -9,18 +9,17 @@ from itertools import count -from cassandra.query import tuple_factory -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.concurrent import execute_concurrent_with_args -from cassandra.protocol import ProtocolHandler, LazyProtocolHandler, NumpyProtocolHandler from cassandra.cython_deps import HAVE_CYTHON, HAVE_NUMPY +from cassandra.protocol import ProtocolHandler, LazyProtocolHandler, NumpyProtocolHandler +from cassandra.query import tuple_factory from tests import VERIFY_CYTHON -from tests.integration import use_singledc, PROTOCOL_VERSION, notprotocolv1, \ - drop_keyspace_shutdown_cluster, BasicSharedKeyspaceUnitTestCase, greaterthancass21 +from tests.integration import use_singledc, notprotocolv1, \ + drop_keyspace_shutdown_cluster, BasicSharedKeyspaceUnitTestCase, greaterthancass21, TestCluster from tests.integration.datatype_utils import update_datatypes from tests.integration.standard.utils import ( create_table_with_all_types, get_all_primitive_params, get_primitive_datatypes) - from tests.unit.cython.utils import cythontest, numpytest @@ -35,7 +34,7 @@ class CythonProtocolHandlerTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() cls.session.execute("CREATE KEYSPACE testspace WITH replication = " "{ 'class' : 'SimpleStrategy', 'replication_factor': '1'}") @@ -66,8 +65,9 @@ def test_cython_lazy_results_paged(self): Test Cython-based parser that returns an iterator, over multiple pages """ # arrays = { 'a': arr1, 'b': arr2, ... } - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)} + ) session = cluster.connect(keyspace="testspace") session.client_protocol_handler = LazyProtocolHandler session.default_fetch_size = 2 @@ -99,8 +99,9 @@ def test_numpy_results_paged(self): Test Numpy-based parser that returns a NumPy array """ # arrays = { 'a': arr1, 'b': arr2, ... } - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)} + ) session = cluster.connect(keyspace="testspace") session.client_protocol_handler = NumpyProtocolHandler session.default_fetch_size = 2 @@ -181,8 +182,9 @@ def get_data(protocol_handler): """ Get data from the test table. """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)} + ) session = cluster.connect(keyspace="testspace") # use our custom protocol handler diff --git a/tests/integration/standard/test_dse.py b/tests/integration/standard/test_dse.py index 40bcb68495..1b9b5bef84 100644 --- a/tests/integration/standard/test_dse.py +++ b/tests/integration/standard/test_dse.py @@ -16,11 +16,10 @@ from packaging.version import Version -from cassandra.cluster import Cluster from tests import notwindows from tests.unit.cython.utils import notcython from tests.integration import (execute_until_pass, - execute_with_long_wait_retry, use_cluster) + execute_with_long_wait_retry, use_cluster, TestCluster) try: import unittest2 as unittest @@ -60,8 +59,7 @@ def _test_basic(self, dse_version): ) use_cluster(cluster_name=cluster_name, nodes=[3], dse_options={}) - cluster = Cluster( - allow_beta_protocol_version=(dse_version >= Version('6.7.0'))) + cluster = TestCluster() session = cluster.connect() result = execute_until_pass( session, diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index 9fae550f64..d3dea05fad 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -29,7 +29,6 @@ from cassandra import AlreadyExists, SignatureDescriptor, UserFunctionDescriptor, UserAggregateDescriptor -from cassandra.cluster import Cluster from cassandra.encoder import Encoder from cassandra.metadata import (IndexMetadata, Token, murmur3, Function, Aggregate, protect_name, protect_names, RegisteredTableExtension, _RegisteredExtensionType, get_schema_parser, @@ -42,8 +41,8 @@ greaterthanorequaldse51, greaterthanorequalcass30, lessthancass30, local, get_supported_protocol_versions, greaterthancass20, greaterthancass21, assert_startswith, greaterthanorequalcass40, - greaterthanorequaldse67, lessthancass40 -) + greaterthanorequaldse67, lessthancass40, + TestCluster) log = logging.getLogger(__name__) @@ -104,7 +103,7 @@ def test_host_release_version(self): class MetaDataRemovalTest(unittest.TestCase): def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, contact_points=['127.0.0.1', '127.0.0.2', '127.0.0.3', '126.0.0.186']) + self.cluster = TestCluster(contact_points=['127.0.0.1', '127.0.0.2', '127.0.0.3', '126.0.0.186']) self.cluster.connect() def tearDown(self): @@ -138,11 +137,11 @@ def test_schema_metadata_disable(self): @test_category metadata """ # Validate metadata is missing where appropriate - no_schema = Cluster(schema_metadata_enabled=False) + no_schema = TestCluster(schema_metadata_enabled=False) no_schema_session = no_schema.connect() self.assertEqual(len(no_schema.metadata.keyspaces), 0) self.assertEqual(no_schema.metadata.export_schema_as_string(), '') - no_token = Cluster(token_metadata_enabled=False) + no_token = TestCluster(token_metadata_enabled=False) no_token_session = no_token.connect() self.assertEqual(len(no_token.metadata.token_map.token_to_host_owner), 0) @@ -548,7 +547,7 @@ def test_non_size_tiered_compaction(self): self.assertIn("'tombstone_threshold': '0.3'", cql) self.assertIn("LeveledCompactionStrategy", cql) # formerly legacy options; reintroduced in 4.0 - if CASSANDRA_VERSION < Version('4.0'): + if CASSANDRA_VERSION < Version('4.0-a'): self.assertNotIn("min_threshold", cql) self.assertNotIn("max_threshold", cql) @@ -570,7 +569,7 @@ def test_refresh_schema_metadata(self): @test_category metadata """ - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() self.assertNotIn("new_keyspace", cluster2.metadata.keyspaces) @@ -653,7 +652,7 @@ def test_refresh_keyspace_metadata(self): @test_category metadata """ - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() self.assertTrue(cluster2.metadata.keyspaces[self.keyspace_name].durable_writes) @@ -684,7 +683,7 @@ def test_refresh_table_metadata(self): table_name = "test" self.session.execute("CREATE TABLE {0}.{1} (a int PRIMARY KEY, b text)".format(self.keyspace_name, table_name)) - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() self.assertNotIn("c", cluster2.metadata.keyspaces[self.keyspace_name].tables[table_name].columns) @@ -718,12 +717,13 @@ def test_refresh_metadata_for_mv(self): self.session.execute("CREATE TABLE {0}.{1} (a int PRIMARY KEY, b text)".format(self.keyspace_name, self.function_table_name)) - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() try: self.assertNotIn("mv1", cluster2.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT a, b FROM {0}.{1} WHERE b IS NOT NULL PRIMARY KEY (a, b)" + self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT a, b FROM {0}.{1} " + "WHERE a IS NOT NULL AND b IS NOT NULL PRIMARY KEY (a, b)" .format(self.keyspace_name, self.function_table_name)) self.assertNotIn("mv1", cluster2.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) @@ -741,12 +741,15 @@ def test_refresh_metadata_for_mv(self): self.assertIsNot(original_meta, self.session.cluster.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views['mv1']) self.assertEqual(original_meta.as_cql_query(), current_meta.as_cql_query()) - cluster3 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster3 = TestCluster(schema_event_refresh_window=-1) cluster3.connect() try: self.assertNotIn("mv2", cluster3.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv2 AS SELECT a, b FROM {0}.{1} WHERE b IS NOT NULL PRIMARY KEY (a, b)" - .format(self.keyspace_name, self.function_table_name)) + self.session.execute( + "CREATE MATERIALIZED VIEW {0}.mv2 AS SELECT a, b FROM {0}.{1} " + "WHERE a IS NOT NULL AND b IS NOT NULL PRIMARY KEY (a, b)".format( + self.keyspace_name, self.function_table_name) + ) self.assertNotIn("mv2", cluster3.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) cluster3.refresh_materialized_view_metadata(self.keyspace_name, 'mv2') self.assertIn("mv2", cluster3.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) @@ -773,7 +776,7 @@ def test_refresh_user_type_metadata(self): if PROTOCOL_VERSION < 3: raise unittest.SkipTest("Protocol 3+ is required for UDTs, currently testing against {0}".format(PROTOCOL_VERSION)) - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() self.assertEqual(cluster2.metadata.keyspaces[self.keyspace_name].user_types, {}) @@ -801,7 +804,7 @@ def test_refresh_user_type_metadata_proto_2(self): raise unittest.SkipTest("Protocol versions 1 and 2 are not supported in Cassandra version ".format(CASSANDRA_VERSION)) for protocol_version in (1, 2): - cluster = Cluster(protocol_version=protocol_version) + cluster = TestCluster() session = cluster.connect() self.assertEqual(cluster.metadata.keyspaces[self.keyspace_name].user_types, {}) @@ -841,7 +844,7 @@ def test_refresh_user_function_metadata(self): if PROTOCOL_VERSION < 4: raise unittest.SkipTest("Protocol 4+ is required for UDFs, currently testing against {0}".format(PROTOCOL_VERSION)) - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() self.assertEqual(cluster2.metadata.keyspaces[self.keyspace_name].functions, {}) @@ -877,7 +880,7 @@ def test_refresh_user_aggregate_metadata(self): if PROTOCOL_VERSION < 4: raise unittest.SkipTest("Protocol 4+ is required for UDAs, currently testing against {0}".format(PROTOCOL_VERSION)) - cluster2 = Cluster(protocol_version=PROTOCOL_VERSION, schema_event_refresh_window=-1) + cluster2 = TestCluster(schema_event_refresh_window=-1) cluster2.connect() self.assertEqual(cluster2.metadata.keyspaces[self.keyspace_name].aggregates, {}) @@ -940,7 +943,10 @@ def test_table_extensions(self): v = t + 'view' s.execute("CREATE TABLE %s.%s (k text PRIMARY KEY, v int)" % (ks, t)) - s.execute("CREATE MATERIALIZED VIEW %s.%s AS SELECT * FROM %s.%s WHERE v IS NOT NULL PRIMARY KEY (v, k)" % (ks, v, ks, t)) + s.execute( + "CREATE MATERIALIZED VIEW %s.%s AS SELECT * FROM %s.%s " + "WHERE v IS NOT NULL AND k IS NOT NULL PRIMARY KEY (v, k)" % (ks, v, ks, t) + ) table_meta = ks_meta.tables[t] view_meta = table_meta.views[v] @@ -1034,7 +1040,7 @@ def test_export_schema(self): Test export schema functionality """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() cluster.connect() self.assertIsInstance(cluster.metadata.export_schema_as_string(), six.string_types) @@ -1045,7 +1051,7 @@ def test_export_keyspace_schema(self): Test export keyspace schema functionality """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() cluster.connect() for keyspace in cluster.metadata.keyspaces: @@ -1085,7 +1091,7 @@ def test_export_keyspace_schema_udts(self): if sys.version_info[0:2] != (2, 7): raise unittest.SkipTest('This test compares static strings generated from dict items, which may change orders. Test with 2.7.') - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() session.execute(""" @@ -1153,7 +1159,7 @@ def test_case_sensitivity(self): Test that names that need to be escaped in CREATE statements are """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() ksname = 'AnInterestingKeyspace' @@ -1198,7 +1204,7 @@ def test_already_exists_exceptions(self): Ensure AlreadyExists exception is thrown when hit """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() ksname = 'test3rf' @@ -1224,7 +1230,7 @@ def test_replicas(self): if murmur3 is None: raise unittest.SkipTest('the murmur3 extension is not available') - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() self.assertEqual(cluster.metadata.get_replicas('test3rf', 'key'), []) cluster.connect('test3rf') @@ -1240,7 +1246,7 @@ def test_token_map(self): Test token mappings """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() cluster.connect('test3rf') ring = cluster.metadata.token_map.ring owners = list(cluster.metadata.token_map.token_to_host_owner[token] for token in ring) @@ -1264,7 +1270,7 @@ class TokenMetadataTest(unittest.TestCase): def test_token(self): expected_node_count = len(get_cluster().nodes) - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() cluster.connect() tmap = cluster.metadata.token_map self.assertTrue(issubclass(tmap.token_class, Token)) @@ -1277,7 +1283,7 @@ class KeyspaceAlterMetadata(unittest.TestCase): Test verifies that table metadata is preserved on keyspace alter """ def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() self.session = self.cluster.connect() name = self._testMethodName.lower() crt_ks = ''' @@ -1322,7 +1328,7 @@ def table_name(self): @classmethod def setup_class(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() try: if cls.keyspace_name in cls.cluster.metadata.keyspaces: @@ -1431,7 +1437,7 @@ def function_name(self): @classmethod def setup_class(cls): if PROTOCOL_VERSION >= 4: - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.keyspace_name = cls.__name__.lower() cls.session = cls.cluster.connect() cls.session.execute("CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}" % cls.keyspace_name) @@ -1713,7 +1719,7 @@ def test_init_cond(self): """ # This is required until the java driver bundled with C* is updated to support v4 - c = Cluster(protocol_version=3) + c = TestCluster(protocol_version=3) s = c.connect(self.keyspace_name) encoder = Encoder() @@ -1897,7 +1903,7 @@ def function_name(self): @classmethod def setup_class(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.keyspace_name = cls.__name__.lower() cls.session = cls.cluster.connect() cls.session.execute("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}" % cls.keyspace_name) @@ -2023,7 +2029,11 @@ class MaterializedViewMetadataTestSimple(BasicSharedKeyspaceUnitTestCase): def setUp(self): self.session.execute("CREATE TABLE {0}.{1} (pk int PRIMARY KEY, c int)".format(self.keyspace_name, self.function_table_name)) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT pk, c FROM {0}.{1} WHERE c IS NOT NULL PRIMARY KEY (pk, c)".format(self.keyspace_name, self.function_table_name)) + self.session.execute( + "CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT pk, c FROM {0}.{1} " + "WHERE pk IS NOT NULL AND c IS NOT NULL PRIMARY KEY (pk, c)".format( + self.keyspace_name, self.function_table_name) + ) def tearDown(self): self.session.execute("DROP MATERIALIZED VIEW {0}.mv1".format(self.keyspace_name)) @@ -2094,7 +2104,11 @@ def test_materialized_view_metadata_drop(self): self.assertDictEqual({}, self.cluster.metadata.keyspaces[self.keyspace_name].tables[self.function_table_name].views) self.assertDictEqual({}, self.cluster.metadata.keyspaces[self.keyspace_name].views) - self.session.execute("CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT pk, c FROM {0}.{1} WHERE c IS NOT NULL PRIMARY KEY (pk, c)".format(self.keyspace_name, self.function_table_name)) + self.session.execute( + "CREATE MATERIALIZED VIEW {0}.mv1 AS SELECT pk, c FROM {0}.{1} " + "WHERE pk IS NOT NULL AND c IS NOT NULL PRIMARY KEY (pk, c)".format( + self.keyspace_name, self.function_table_name) + ) @greaterthanorequalcass30 diff --git a/tests/integration/standard/test_metrics.py b/tests/integration/standard/test_metrics.py index 7d3b7976e2..676a5340ef 100644 --- a/tests/integration/standard/test_metrics.py +++ b/tests/integration/standard/test_metrics.py @@ -26,8 +26,8 @@ from cassandra import ConsistencyLevel, WriteTimeout, Unavailable, ReadTimeout from cassandra.protocol import SyntaxException -from cassandra.cluster import Cluster, NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT -from tests.integration import get_cluster, get_node, use_singledc, PROTOCOL_VERSION, execute_until_pass +from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT +from tests.integration import get_cluster, get_node, use_singledc, execute_until_pass, TestCluster from greplin import scales from tests.integration import BasicSharedKeyspaceUnitTestCaseRF3WM, BasicExistingKeyspaceUnitTestCase, local @@ -42,16 +42,16 @@ class MetricsTests(unittest.TestCase): def setUp(self): contact_point = ['127.0.0.2'] - self.cluster = Cluster(contact_points=contact_point, metrics_enabled=True, protocol_version=PROTOCOL_VERSION, - execution_profiles= + self.cluster = TestCluster(contact_points=contact_point, metrics_enabled=True, + execution_profiles= {EXEC_PROFILE_DEFAULT: ExecutionProfile( load_balancing_policy=HostFilterPolicy( - RoundRobinPolicy(), lambda host: host.address in contact_point), + RoundRobinPolicy(), lambda host: host.address in contact_point), retry_policy=FallthroughRetryPolicy() ) } - ) + ) self.session = self.cluster.connect("test3rf", wait_for_all_pools=True) def tearDown(self): @@ -203,8 +203,10 @@ def test_metrics_per_cluster(self): @test_category metrics """ - cluster2 = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(retry_policy=FallthroughRetryPolicy())}) + cluster2 = TestCluster( + metrics_enabled=True, + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(retry_policy=FallthroughRetryPolicy())} + ) cluster2.connect(self.ks_name, wait_for_all_pools=True) self.assertEqual(len(cluster2.metadata.all_hosts()), 3) @@ -255,13 +257,17 @@ def test_duplicate_metrics_per_cluster(self): @test_category metrics """ - cluster2 = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION, - monitor_reporting_enabled=False, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(retry_policy=FallthroughRetryPolicy())}) - - cluster3 = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION, - monitor_reporting_enabled=False, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(retry_policy=FallthroughRetryPolicy())}) + cluster2 = TestCluster( + metrics_enabled=True, + monitor_reporting_enabled=False, + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(retry_policy=FallthroughRetryPolicy())} + ) + + cluster3 = TestCluster( + metrics_enabled=True, + monitor_reporting_enabled=False, + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(retry_policy=FallthroughRetryPolicy())} + ) # Ensure duplicate metric names are not allowed cluster2.metrics.set_stats_name("appcluster") diff --git a/tests/integration/standard/test_policies.py b/tests/integration/standard/test_policies.py index 53b6494437..24facf42a0 100644 --- a/tests/integration/standard/test_policies.py +++ b/tests/integration/standard/test_policies.py @@ -17,13 +17,13 @@ except ImportError: import unittest # noqa -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, SimpleConvictionPolicy, \ WhiteListRoundRobinPolicy from cassandra.pool import Host from cassandra.connection import DefaultEndPoint -from tests.integration import PROTOCOL_VERSION, local, use_singledc +from tests.integration import local, use_singledc, TestCluster from concurrent.futures import wait as wait_futures @@ -55,9 +55,9 @@ def test_predicate_changes(self): hfp = ExecutionProfile( load_balancing_policy=HostFilterPolicy(RoundRobinPolicy(), predicate=predicate) ) - cluster = Cluster((contact_point,), execution_profiles={EXEC_PROFILE_DEFAULT: hfp}, - protocol_version=PROTOCOL_VERSION, topology_event_refresh_window=0, - status_event_refresh_window=0) + cluster = TestCluster(contact_points=(contact_point,), execution_profiles={EXEC_PROFILE_DEFAULT: hfp}, + topology_event_refresh_window=0, + status_event_refresh_window=0) session = cluster.connect(wait_for_all_pools=True) queried_hosts = set() @@ -84,7 +84,7 @@ class WhiteListRoundRobinPolicyTests(unittest.TestCase): def test_only_connects_to_subset(self): only_connect_hosts = {"127.0.0.1", "127.0.0.2"} white_list = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy(only_connect_hosts)) - cluster = Cluster(execution_profiles={"white_list": white_list}) + cluster = TestCluster(execution_profiles={"white_list": white_list}) #cluster = Cluster(load_balancing_policy=WhiteListRoundRobinPolicy(only_connect_hosts)) session = cluster.connect(wait_for_all_pools=True) queried_hosts = set() diff --git a/tests/integration/standard/test_prepared_statements.py b/tests/integration/standard/test_prepared_statements.py index d314846e51..5c79f27346 100644 --- a/tests/integration/standard/test_prepared_statements.py +++ b/tests/integration/standard/test_prepared_statements.py @@ -13,7 +13,7 @@ # limitations under the License. -from tests.integration import use_singledc, PROTOCOL_VERSION +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster try: import unittest2 as unittest @@ -22,7 +22,6 @@ from cassandra import InvalidRequest, DriverException from cassandra import ConsistencyLevel, ProtocolVersion -from cassandra.cluster import Cluster from cassandra.query import PreparedStatement, UNSET_VALUE from tests.integration import (get_server_versions, greaterthanorequalcass40, greaterthanorequaldse50, requirecassandra, BasicSharedKeyspaceUnitTestCase) @@ -44,8 +43,7 @@ def setUpClass(cls): cls.cass_version = get_server_versions() def setUp(self): - self.cluster = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION, - allow_beta_protocol_version=True) + self.cluster = TestCluster(metrics_enabled=True, allow_beta_protocol_version=True) self.session = self.cluster.connect() def tearDown(self): @@ -517,7 +515,7 @@ def test_prepare_id_is_updated_across_session(self): @since 3.12 @jira_ticket PYTHON-808 """ - one_cluster = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION) + one_cluster = TestCluster(metrics_enabled=True) one_session = one_cluster.connect() self.addCleanup(one_cluster.shutdown) @@ -557,7 +555,7 @@ def test_id_is_not_updated_conditional_v4(self): @since 3.13 @jira_ticket PYTHON-847 """ - cluster = Cluster(protocol_version=ProtocolVersion.V4) + cluster = TestCluster(protocol_version=ProtocolVersion.V4) session = cluster.connect() self.addCleanup(cluster.shutdown) self._test_updated_conditional(session, 9) @@ -571,7 +569,7 @@ def test_id_is_not_updated_conditional_v5(self): @since 3.13 @jira_ticket PYTHON-847 """ - cluster = Cluster(protocol_version=ProtocolVersion.V5) + cluster = TestCluster(protocol_version=ProtocolVersion.V5) session = cluster.connect() self.addCleanup(cluster.shutdown) self._test_updated_conditional(session, 10) @@ -586,7 +584,7 @@ def test_id_is_not_updated_conditional_dsev1(self): @since 3.13 @jira_ticket PYTHON-847 """ - cluster = Cluster(protocol_version=ProtocolVersion.DSE_V1) + cluster = TestCluster(protocol_version=ProtocolVersion.DSE_V1) session = cluster.connect() self.addCleanup(cluster.shutdown) self._test_updated_conditional(session, 10) @@ -601,7 +599,7 @@ def test_id_is_not_updated_conditional_dsev2(self): @since 3.13 @jira_ticket PYTHON-847 """ - cluster = Cluster(protocol_version=ProtocolVersion.DSE_V2) + cluster = TestCluster(protocol_version=ProtocolVersion.DSE_V2) session = cluster.connect() self.addCleanup(cluster.shutdown) self._test_updated_conditional(session, 10) diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py index 63f94399a6..3e393cdb4b 100644 --- a/tests/integration/standard/test_query.py +++ b/tests/integration/standard/test_query.py @@ -24,11 +24,11 @@ from cassandra import ConsistencyLevel, Unavailable, InvalidRequest, cluster from cassandra.query import (PreparedStatement, BoundStatement, SimpleStatement, BatchStatement, BatchType, dict_factory, TraceUnavailable) -from cassandra.cluster import Cluster, NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import HostDistance, RoundRobinPolicy, WhiteListRoundRobinPolicy from tests.integration import use_singledc, PROTOCOL_VERSION, BasicSharedKeyspaceUnitTestCase, \ greaterthanprotocolv3, MockLoggingHandler, get_supported_protocol_versions, local, get_cluster, setup_keyspace, \ - USE_CASS_EXTERNAL, greaterthanorequalcass40, DSE_VERSION + USE_CASS_EXTERNAL, greaterthanorequalcass40, DSE_VERSION, TestCluster from tests import notwindows from tests.integration import greaterthanorequalcass30, get_node @@ -122,9 +122,9 @@ def test_trace_id_to_resultset(self): self.assertListEqual([rs_trace], rs.get_all_query_traces()) def test_trace_ignores_row_factory(self): - with Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) as cluster: - + with TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)} + ) as cluster: s = cluster.connect() query = "SELECT * FROM system.local" statement = SimpleStatement(query) @@ -367,7 +367,7 @@ def test_host_targeting_query(self): class PreparedStatementTests(unittest.TestCase): def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() self.session = self.cluster.connect() def tearDown(self): @@ -488,7 +488,7 @@ def test_prepared_metadata_generation(self): base_line = None for proto_version in get_supported_protocol_versions(): beta_flag = True if proto_version in ProtocolVersion.BETA_VERSIONS else False - cluster = Cluster(protocol_version=proto_version, allow_beta_protocol_version=beta_flag) + cluster = TestCluster() session = cluster.connect() select_statement = session.prepare("SELECT * FROM system.local") @@ -523,7 +523,7 @@ def test_prepare_on_all_hosts(self): @jira_ticket PYTHON-556 @expected_result queries will have to re-prepared on hosts that aren't the control connection """ - clus = Cluster(protocol_version=PROTOCOL_VERSION, prepare_on_all_hosts=False, reprepare_on_up=False) + clus = TestCluster(prepare_on_all_hosts=False, reprepare_on_up=False) self.addCleanup(clus.shutdown) session = clus.connect(wait_for_all_pools=True) @@ -543,11 +543,10 @@ def test_prepare_batch_statement(self): and the batch statement will be sent. """ policy = ForcedHostIndexPolicy() - clus = Cluster( + clus = TestCluster( execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=policy), }, - protocol_version=PROTOCOL_VERSION, prepare_on_all_hosts=False, reprepare_on_up=False, ) @@ -588,7 +587,7 @@ def test_prepare_batch_statement_after_alter(self): @expected_result queries will have to re-prepared on hosts that aren't the control connection and the batch statement will be sent. """ - clus = Cluster(protocol_version=PROTOCOL_VERSION, prepare_on_all_hosts=False, reprepare_on_up=False) + clus = TestCluster(prepare_on_all_hosts=False, reprepare_on_up=False) self.addCleanup(clus.shutdown) table = "test3rf.%s" % self._testMethodName.lower() @@ -647,7 +646,7 @@ def test_prepared_statement(self): Highlight the difference between Prepared and Bound statements """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cluster = TestCluster() session = cluster.connect() prepared = session.prepare('INSERT INTO test3rf.test (k, v) VALUES (?, ?)') @@ -671,7 +670,7 @@ def setUp(self): "Protocol 2.0+ is required for BATCH operations, currently testing against %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() if PROTOCOL_VERSION < 3: self.cluster.set_core_connections_per_host(HostDistance.LOCAL, 1) self.session = self.cluster.connect(wait_for_all_pools=True) @@ -802,7 +801,7 @@ def setUp(self): "Protocol 2.0+ is required for Serial Consistency, currently testing against %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() if PROTOCOL_VERSION < 3: self.cluster.set_core_connections_per_host(HostDistance.LOCAL, 1) self.session = self.cluster.connect() @@ -894,7 +893,7 @@ def setUp(self): % (PROTOCOL_VERSION,)) serial_profile = ExecutionProfile(consistency_level=ConsistencyLevel.SERIAL) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, execution_profiles={'serial': serial_profile}) + self.cluster = TestCluster(execution_profiles={'serial': serial_profile}) self.session = self.cluster.connect() ddl = ''' @@ -944,20 +943,15 @@ def test_no_connection_refused_on_timeout(self): continue else: # In this case result is an exception - if type(result).__name__ == "NoHostAvailable": + exception_type = type(result).__name__ + if exception_type == "NoHostAvailable": self.fail("PYTHON-91: Disconnected from Cassandra: %s" % result.message) - if type(result).__name__ == "WriteTimeout": - received_timeout = True - continue - if type(result).__name__ == "WriteFailure": - received_timeout = True - continue - if type(result).__name__ == "ReadTimeout": - continue - if type(result).__name__ == "ReadFailure": + if exception_type in ["WriteTimeout", "WriteFailure", "ReadTimeout", "ReadFailure", "ErrorMessageSub"]: + if type(result).__name__ in ["WriteTimeout", "WriteFailure"]: + received_timeout = True continue - self.fail("Unexpected exception %s: %s" % (type(result).__name__, result.message)) + self.fail("Unexpected exception %s: %s" % (exception_type, result.message)) # Make sure test passed self.assertTrue(received_timeout) @@ -1084,7 +1078,7 @@ def setUp(self): raise unittest.SkipTest( "Protocol 2.0+ is required for BATCH operations, currently testing against %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + self.cluster = TestCluster() self.session = self.cluster.connect() query = """ INSERT INTO test3rf.test (k, v) VALUES (?, ?) @@ -1359,7 +1353,7 @@ def test_unicode(self): class BaseKeyspaceTests(): @classmethod def setUpClass(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect(wait_for_all_pools=True) cls.ks_name = cls.__name__.lower() @@ -1425,7 +1419,7 @@ def test_setting_keyspace_and_session(self): @test_category query """ - cluster = Cluster(protocol_version=ProtocolVersion.V5, allow_beta_protocol_version=True) + cluster = TestCluster(protocol_version=ProtocolVersion.V5, allow_beta_protocol_version=True) session = cluster.connect(self.alternative_ks) self.addCleanup(cluster.shutdown) @@ -1442,8 +1436,7 @@ def test_setting_keyspace_and_session_after_created(self): @test_category query """ - pv = ProtocolVersion.DSE_V2 if DSE_VERSION else ProtocolVersion.V5 - cluster = Cluster(protocol_version=pv, allow_beta_protocol_version=True) + cluster = TestCluster() session = cluster.connect() self.addCleanup(cluster.shutdown) @@ -1461,8 +1454,7 @@ def test_setting_keyspace_and_same_session(self): @test_category query """ - pv = ProtocolVersion.DSE_V2 if DSE_VERSION else ProtocolVersion.V5 - cluster = Cluster(protocol_version=pv, allow_beta_protocol_version=True) + cluster = TestCluster() session = cluster.connect(self.ks_name) self.addCleanup(cluster.shutdown) @@ -1473,7 +1465,7 @@ def test_setting_keyspace_and_same_session(self): class SimpleWithKeyspaceTests(QueryKeyspaceTests, unittest.TestCase): @unittest.skip def test_lower_protocol(self): - cluster = Cluster(protocol_version=ProtocolVersion.V4) + cluster = TestCluster(protocol_version=ProtocolVersion.V4) session = cluster.connect(self.ks_name) self.addCleanup(cluster.shutdown) @@ -1527,7 +1519,7 @@ def confirm_results(self): class PreparedWithKeyspaceTests(BaseKeyspaceTests, unittest.TestCase): def setUp(self): - self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, allow_beta_protocol_version=True) + self.cluster = TestCluster() self.session = self.cluster.connect() def tearDown(self): @@ -1603,7 +1595,7 @@ def test_prepared_not_found(self): @test_category query """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION, allow_beta_protocol_version=True) + cluster = TestCluster() session = self.cluster.connect("system") self.addCleanup(cluster.shutdown) @@ -1625,7 +1617,7 @@ def test_prepared_in_query_keyspace(self): @test_category query """ - cluster = Cluster(protocol_version=PROTOCOL_VERSION, allow_beta_protocol_version=True) + cluster = TestCluster() session = self.cluster.connect() self.addCleanup(cluster.shutdown) diff --git a/tests/integration/standard/test_query_paging.py b/tests/integration/standard/test_query_paging.py index b1f7b39fc6..dac4ec5ce3 100644 --- a/tests/integration/standard/test_query_paging.py +++ b/tests/integration/standard/test_query_paging.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from tests.integration import use_singledc, PROTOCOL_VERSION +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster import logging log = logging.getLogger(__name__) @@ -26,7 +26,7 @@ from threading import Event from cassandra import ConsistencyLevel -from cassandra.cluster import Cluster, EXEC_PROFILE_DEFAULT, ExecutionProfile +from cassandra.cluster import EXEC_PROFILE_DEFAULT, ExecutionProfile from cassandra.concurrent import execute_concurrent, execute_concurrent_with_args from cassandra.policies import HostDistance from cassandra.query import SimpleStatement @@ -44,8 +44,7 @@ def setUp(self): "Protocol 2.0+ is required for Paging state, currently testing against %r" % (PROTOCOL_VERSION,)) - self.cluster = Cluster( - protocol_version=PROTOCOL_VERSION, + self.cluster = TestCluster( execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(consistency_level=ConsistencyLevel.LOCAL_QUORUM)} ) if PROTOCOL_VERSION < 3: diff --git a/tests/integration/standard/test_routing.py b/tests/integration/standard/test_routing.py index bf4c7878b6..e1dabba49a 100644 --- a/tests/integration/standard/test_routing.py +++ b/tests/integration/standard/test_routing.py @@ -21,9 +21,7 @@ import logging log = logging.getLogger(__name__) -from cassandra.cluster import Cluster - -from tests.integration import use_singledc, PROTOCOL_VERSION +from tests.integration import use_singledc, TestCluster def setup_module(): @@ -38,7 +36,7 @@ def cfname(self): @classmethod def setup_class(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect('test1rf') @classmethod diff --git a/tests/integration/standard/test_row_factories.py b/tests/integration/standard/test_row_factories.py index 48ceb1d949..93f25d9276 100644 --- a/tests/integration/standard/test_row_factories.py +++ b/tests/integration/standard/test_row_factories.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from tests.integration import get_server_versions, use_singledc, PROTOCOL_VERSION, BasicSharedKeyspaceUnitTestCaseWFunctionTable, BasicSharedKeyspaceUnitTestCase, execute_until_pass +from tests.integration import get_server_versions, use_singledc, \ + BasicSharedKeyspaceUnitTestCaseWFunctionTable, BasicSharedKeyspaceUnitTestCase, execute_until_pass, TestCluster try: import unittest2 as unittest except ImportError: import unittest # noqa -from cassandra.cluster import Cluster, ResultSet, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ResultSet, ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.query import tuple_factory, named_tuple_factory, dict_factory, ordered_dict_factory from cassandra.util import OrderedDict @@ -86,8 +87,9 @@ def setUpClass(cls): cls.select = "SELECT * FROM {0}.{1}".format(cls.ks_name, cls.ks_name) def _results_from_row_factory(self, row_factory): - cluster = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=row_factory)}) + cluster = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=row_factory)} + ) with cluster: return cluster.connect().execute(self.select) @@ -174,7 +176,7 @@ class NamedTupleFactoryAndNumericColNamesTests(unittest.TestCase): """ @classmethod def setup_class(cls): - cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION) + cls.cluster = TestCluster() cls.session = cls.cluster.connect() cls._cass_version, cls._cql_version = get_server_versions() ddl = ''' @@ -211,8 +213,9 @@ def test_can_select_with_dict_factory(self): """ can SELECT numeric column using dict_factory """ - with Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) as cluster: + with TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)} + ) as cluster: try: cluster.connect().execute('SELECT * FROM test1rf.table_num_col') except ValueError as e: diff --git a/tests/integration/standard/test_types.py b/tests/integration/standard/test_types.py index f0e56879c7..0592b7d737 100644 --- a/tests/integration/standard/test_types.py +++ b/tests/integration/standard/test_types.py @@ -25,16 +25,16 @@ import cassandra from cassandra import InvalidRequest from cassandra import util -from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.concurrent import execute_concurrent_with_args from cassandra.cqltypes import Int32Type, EMPTY from cassandra.query import dict_factory, ordered_dict_factory from cassandra.util import sortedset, Duration from tests.unit.cython.utils import cythontest -from tests.integration import use_singledc, PROTOCOL_VERSION, execute_until_pass, notprotocolv1, \ +from tests.integration import use_singledc, execute_until_pass, notprotocolv1, \ BasicSharedKeyspaceUnitTestCase, greaterthancass21, lessthancass30, greaterthanorequaldse51, \ - DSE_VERSION, greaterthanorequalcass3_10, requiredse + DSE_VERSION, greaterthanorequalcass3_10, requiredse, TestCluster from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES, COLLECTION_TYPES, PRIMITIVE_DATATYPES_KEYS, \ get_sample, get_all_samples, get_collection_sample @@ -136,7 +136,7 @@ def test_can_insert_primitive_datatypes(self): """ Test insertion of all datatype primitives """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name) # create table @@ -217,7 +217,7 @@ def test_can_insert_collection_datatypes(self): Test insertion of all collection types """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name) # use tuple encoding, to convert native python tuple into raw CQL s.encoder.mapping[tuple] = s.encoder.cql_encode_tuple @@ -449,7 +449,7 @@ def test_can_insert_tuples(self): if self.cass_version < (2, 1, 0): raise unittest.SkipTest("The tuple type was introduced in Cassandra 2.1") - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name) # use this encoder in order to insert tuples @@ -501,8 +501,9 @@ def test_can_insert_tuples_with_varying_lengths(self): if self.cass_version < (2, 1, 0): raise unittest.SkipTest("The tuple type was introduced in Cassandra 2.1") - c = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) + c = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)} + ) s = c.connect(self.keyspace_name) # set the encoder for tuples for the ability to write tuples @@ -539,7 +540,7 @@ def test_can_insert_tuples_all_primitive_datatypes(self): if self.cass_version < (2, 1, 0): raise unittest.SkipTest("The tuple type was introduced in Cassandra 2.1") - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name) s.encoder.mapping[tuple] = s.encoder.cql_encode_tuple @@ -567,8 +568,9 @@ def test_can_insert_tuples_all_collection_datatypes(self): if self.cass_version < (2, 1, 0): raise unittest.SkipTest("The tuple type was introduced in Cassandra 2.1") - c = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) + c = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)} + ) s = c.connect(self.keyspace_name) # set the encoder for tuples for the ability to write tuples @@ -665,8 +667,9 @@ def test_can_insert_nested_tuples(self): if self.cass_version < (2, 1, 0): raise unittest.SkipTest("The tuple type was introduced in Cassandra 2.1") - c = Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) + c = TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)} + ) s = c.connect(self.keyspace_name) # set the encoder for tuples for the ability to write tuples @@ -1276,7 +1279,7 @@ def test_nested_types_with_protocol_version(self): self.read_inserts_at_level(pvr) def read_inserts_at_level(self, proto_ver): - session = Cluster(protocol_version=proto_ver).connect(self.keyspace_name) + session = TestCluster(protocol_version=proto_ver).connect(self.keyspace_name) try: results = session.execute('select * from t')[0] self.assertEqual("[SortedSet([1, 2]), SortedSet([3, 5])]", str(results.v)) @@ -1294,7 +1297,7 @@ def read_inserts_at_level(self, proto_ver): session.cluster.shutdown() def run_inserts_at_version(self, proto_ver): - session = Cluster(protocol_version=proto_ver).connect(self.keyspace_name) + session = TestCluster(protocol_version=proto_ver).connect(self.keyspace_name) try: p = session.prepare('insert into t (k, v) values (?, ?)') session.execute(p, (0, [{1, 2}, {3, 5}])) diff --git a/tests/integration/standard/test_udts.py b/tests/integration/standard/test_udts.py index 4f23f9d5a9..6d9676f25e 100644 --- a/tests/integration/standard/test_udts.py +++ b/tests/integration/standard/test_udts.py @@ -22,12 +22,12 @@ import six from cassandra import InvalidRequest -from cassandra.cluster import Cluster, UserTypeDoesNotExist, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import UserTypeDoesNotExist, ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.query import dict_factory from cassandra.util import OrderedMap -from tests.integration import use_singledc, PROTOCOL_VERSION, execute_until_pass, \ - BasicSegregatedKeyspaceUnitTestCase, greaterthancass20, lessthancass30, greaterthanorequalcass36 +from tests.integration import use_singledc, execute_until_pass, \ + BasicSegregatedKeyspaceUnitTestCase, greaterthancass20, lessthancass30, greaterthanorequalcass36, TestCluster from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES, PRIMITIVE_DATATYPES_KEYS, \ COLLECTION_TYPES, get_sample, get_collection_sample @@ -79,7 +79,7 @@ def test_can_insert_unprepared_registered_udts(self): Test the insertion of unprepared, registered UDTs """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) s.execute("CREATE TYPE user (age int, name text)") @@ -123,7 +123,7 @@ def test_can_register_udt_before_connecting(self): Test the registration of UDTs before session creation """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(wait_for_all_pools=True) s.execute(""" @@ -144,7 +144,7 @@ def test_can_register_udt_before_connecting(self): # now that types are defined, shutdown and re-create Cluster c.shutdown() - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() User1 = namedtuple('user', ('age', 'name')) User2 = namedtuple('user', ('state', 'is_cool')) @@ -181,7 +181,7 @@ def test_can_insert_prepared_unregistered_udts(self): Test the insertion of prepared, unregistered UDTs """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) s.execute("CREATE TYPE user (age int, name text)") @@ -225,7 +225,7 @@ def test_can_insert_prepared_registered_udts(self): Test the insertion of prepared, registered UDTs """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) s.execute("CREATE TYPE user (age int, name text)") @@ -275,7 +275,7 @@ def test_can_insert_udts_with_nulls(self): Test the insertion of UDTs with null and empty string fields """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) s.execute("CREATE TYPE user (a text, b int, c uuid, d blob)") @@ -305,7 +305,7 @@ def test_can_insert_udts_with_varying_lengths(self): Test for ensuring extra-lengthy udts are properly inserted """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) max_test_length = 254 @@ -385,8 +385,9 @@ def nested_udt_verification_helper(self, session, max_nesting_depth, udts): self.assertEqual(udt, result["v_{0}".format(i)]) def _cluster_default_dict_factory(self): - return Cluster(protocol_version=PROTOCOL_VERSION, - execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)}) + return TestCluster( + execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=dict_factory)} + ) def test_can_insert_nested_registered_udts(self): """ @@ -485,7 +486,7 @@ def test_raise_error_on_nonexisting_udts(self): Test for ensuring that an error is raised for operating on a nonexisting udt or an invalid keyspace """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) User = namedtuple('user', ('age', 'name')) @@ -505,7 +506,7 @@ def test_can_insert_udt_all_datatypes(self): Test for inserting various types of PRIMITIVE_DATATYPES into UDT's """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) # create UDT @@ -550,7 +551,7 @@ def test_can_insert_udt_all_collection_datatypes(self): Test for inserting various types of COLLECTION_TYPES into UDT's """ - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) # create UDT @@ -617,7 +618,7 @@ def test_can_insert_nested_collections(self): if self.cass_version < (2, 1, 3): raise unittest.SkipTest("Support for nested collections was introduced in Cassandra 2.1.3") - c = Cluster(protocol_version=PROTOCOL_VERSION) + c = TestCluster() s = c.connect(self.keyspace_name, wait_for_all_pools=True) s.encoder.mapping[tuple] = s.encoder.cql_encode_tuple From 8473c044c9174a47d55cc82184b79b7075a34cf0 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Thu, 26 Mar 2020 14:12:48 -0500 Subject: [PATCH 043/211] More test fixes --- tests/integration/__init__.py | 3 +++ tests/integration/advanced/graph/test_graph.py | 2 +- tests/integration/advanced/test_unixsocketendpoint.py | 2 +- tests/integration/standard/test_query.py | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 65cd6a2f1f..32a6346c7e 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -507,6 +507,9 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, CCM_CLUSTER = DseCluster(path, cluster_name, **ccm_options) CCM_CLUSTER.set_configuration_options({'start_native_transport': True}) CCM_CLUSTER.set_configuration_options({'batch_size_warn_threshold_in_kb': 5}) + if Version(dse_version) >= Version('5.0'): + CCM_CLUSTER.set_configuration_options({'enable_user_defined_functions': True}) + CCM_CLUSTER.set_configuration_options({'enable_scripted_user_defined_functions': True}) if Version(dse_version) >= Version('5.1'): # For Inet4Address CCM_CLUSTER.set_dse_configuration_options({ diff --git a/tests/integration/advanced/graph/test_graph.py b/tests/integration/advanced/graph/test_graph.py index 020d631d69..a0b6534c34 100644 --- a/tests/integration/advanced/graph/test_graph.py +++ b/tests/integration/advanced/graph/test_graph.py @@ -19,7 +19,7 @@ from cassandra.protocol import SyntaxException from cassandra.policies import WhiteListRoundRobinPolicy from cassandra.cluster import NoHostAvailable -from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, GraphExecutionProfile, +from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, GraphExecutionProfile from cassandra.graph import single_object_row_factory, Vertex, graph_object_row_factory, \ graph_graphson2_row_factory, graph_graphson3_row_factory from cassandra.util import SortedSet diff --git a/tests/integration/advanced/test_unixsocketendpoint.py b/tests/integration/advanced/test_unixsocketendpoint.py index 1f6665964a..10cbc1b362 100644 --- a/tests/integration/advanced/test_unixsocketendpoint.py +++ b/tests/integration/advanced/test_unixsocketendpoint.py @@ -65,7 +65,7 @@ def setUpClass(cls): lbp = UnixSocketWhiteListRoundRobinPolicy([UNIX_SOCKET_PATH]) ep = ExecutionProfile(load_balancing_policy=lbp) endpoint = UnixSocketEndPoint(UNIX_SOCKET_PATH) - cls.cluster = TestCluster([endpoint], execution_profiles={EXEC_PROFILE_DEFAULT: ep}) + cls.cluster = TestCluster(contact_points=[endpoint], execution_profiles={EXEC_PROFILE_DEFAULT: ep}) @classmethod def tearDownClass(cls): diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py index 3e393cdb4b..d6401a987e 100644 --- a/tests/integration/standard/test_query.py +++ b/tests/integration/standard/test_query.py @@ -24,7 +24,7 @@ from cassandra import ConsistencyLevel, Unavailable, InvalidRequest, cluster from cassandra.query import (PreparedStatement, BoundStatement, SimpleStatement, BatchStatement, BatchType, dict_factory, TraceUnavailable) -from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT +from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT, Cluster from cassandra.policies import HostDistance, RoundRobinPolicy, WhiteListRoundRobinPolicy from tests.integration import use_singledc, PROTOCOL_VERSION, BasicSharedKeyspaceUnitTestCase, \ greaterthanprotocolv3, MockLoggingHandler, get_supported_protocol_versions, local, get_cluster, setup_keyspace, \ @@ -488,7 +488,7 @@ def test_prepared_metadata_generation(self): base_line = None for proto_version in get_supported_protocol_versions(): beta_flag = True if proto_version in ProtocolVersion.BETA_VERSIONS else False - cluster = TestCluster() + cluster = Cluster(protocol_version=proto_version, allow_beta_protocol_version=beta_flag) session = cluster.connect() select_statement = session.prepare("SELECT * FROM system.local") From 607ff52c7521f179fc944df4dfc9ddb075fbb30d Mon Sep 17 00:00:00 2001 From: Aleksandr Sorokoumov Date: Mon, 30 Mar 2020 11:27:42 +0200 Subject: [PATCH 044/211] CASSANDRA-15663 Add 'default' to a list of reserved keywords --- cassandra/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index a839c2206c..9f1de99615 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -49,7 +49,7 @@ cql_keywords = set(( 'add', 'aggregate', 'all', 'allow', 'alter', 'and', 'apply', 'as', 'asc', 'ascii', 'authorize', 'batch', 'begin', 'bigint', 'blob', 'boolean', 'by', 'called', 'clustering', 'columnfamily', 'compact', 'contains', 'count', - 'counter', 'create', 'custom', 'date', 'decimal', 'delete', 'desc', 'describe', 'deterministic', 'distinct', 'double', 'drop', + 'counter', 'create', 'custom', 'date', 'decimal', 'default', 'delete', 'desc', 'describe', 'deterministic', 'distinct', 'double', 'drop', 'entries', 'execute', 'exists', 'filtering', 'finalfunc', 'float', 'from', 'frozen', 'full', 'function', 'functions', 'grant', 'if', 'in', 'index', 'inet', 'infinity', 'initcond', 'input', 'insert', 'int', 'into', 'is', 'json', 'key', 'keys', 'keyspace', 'keyspaces', 'language', 'limit', 'list', 'login', 'map', 'materialized', 'modify', 'monotonic', 'nan', 'nologin', From 40fe726a358d1a844c025449a580f92f352245cf Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 11 Mar 2020 09:19:18 -0400 Subject: [PATCH 045/211] Support port discovery for C* 4.0 --- CHANGELOG.rst | 3 +- build.yaml | 8 +- cassandra/cluster.py | 136 ++++++++++++------ cassandra/connection.py | 19 +-- cassandra/metadata.py | 60 +++++++- cassandra/pool.py | 53 ++++++- cassandra/util.py | 10 +- tests/integration/__init__.py | 14 +- tests/integration/advanced/test_auth.py | 7 +- tests/integration/simulacron/test_endpoint.py | 16 +-- .../standard/test_control_connection.py | 28 +++- tests/integration/standard/test_metadata.py | 14 +- .../standard/test_single_interface.py | 77 ++++++++++ tests/unit/test_cluster.py | 11 ++ tests/unit/test_control_connection.py | 56 +++++++- 15 files changed, 408 insertions(+), 104 deletions(-) create mode 100644 tests/integration/standard/test_single_interface.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9cd64d8fc8..9f5deaabae 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,8 @@ Unreleased Features -------- -Transient Replication Support (PYTHON-1207) +* Transient Replication Support (PYTHON-1207) +* Support system.peers_v2 and port discovery for C* 4.0 (PYTHON-700) Bug Fixes --------- diff --git a/build.yaml b/build.yaml index 83bed55a09..bd40809ef3 100644 --- a/build.yaml +++ b/build.yaml @@ -21,7 +21,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] + - cassandra: ['2.1', '3.0', 'test-dse'] commit_branches: schedule: per_commit @@ -34,7 +34,7 @@ schedules: matrix: exclude: - python: [3.4, 3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] + - cassandra: ['2.1', '3.0', 'test-dse'] commit_branches_dev: schedule: per_commit @@ -184,9 +184,11 @@ build: pip install --upgrade pip pip install -U setuptools + pip install git+ssh://git@github.com/riptano/ccm-private.git@cassandra-7544-native-ports-with-dse-fix + # Remove this pyyaml installation when removing Python 3.4 support pip install PyYAML==5.2 - pip install $HOME/ccm + #pip install $HOME/ccm if [ -n "$CCM_IS_DSE" ]; then pip install -r test-datastax-requirements.txt diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 255d035dac..7120bdb9e1 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -64,7 +64,7 @@ RESULT_KIND_SET_KEYSPACE, RESULT_KIND_ROWS, RESULT_KIND_SCHEMA_CHANGE, ProtocolHandler, RESULT_KIND_VOID) -from cassandra.metadata import Metadata, protect_name, murmur3 +from cassandra.metadata import Metadata, protect_name, murmur3, _NodeInfo from cassandra.policies import (TokenAwarePolicy, DCAwareRoundRobinPolicy, SimpleConvictionPolicy, ExponentialReconnectionPolicy, HostDistance, RetryPolicy, IdentityTranslator, NoSpeculativeExecutionPlan, @@ -581,7 +581,7 @@ class Cluster(object): contact_points = ['127.0.0.1'] """ The list of contact points to try connecting for cluster discovery. A - contact point can be a string (ip, hostname) or a + contact point can be a string (ip or hostname), a tuple (ip/hostname, port) or a :class:`.connection.EndPoint` instance. Defaults to loopback interface. @@ -1152,20 +1152,24 @@ def __init__(self, self.endpoint_factory = endpoint_factory or DefaultEndPointFactory(port=self.port) self.endpoint_factory.configure(self) - raw_contact_points = [cp for cp in self.contact_points if not isinstance(cp, EndPoint)] + raw_contact_points = [] + for cp in [cp for cp in self.contact_points if not isinstance(cp, EndPoint)]: + raw_contact_points.append(cp if isinstance(cp, tuple) else (cp, port)) + self.endpoints_resolved = [cp for cp in self.contact_points if isinstance(cp, EndPoint)] self._endpoint_map_for_insights = {repr(ep): '{ip}:{port}'.format(ip=ep.address, port=ep.port) for ep in self.endpoints_resolved} - strs_resolved_map = _resolve_contact_points_to_string_map(raw_contact_points, port) + strs_resolved_map = _resolve_contact_points_to_string_map(raw_contact_points) self.endpoints_resolved.extend(list(chain( *[ - [DefaultEndPoint(x, port) for x in xs if x is not None] + [DefaultEndPoint(ip, port) for ip, port in xs if ip is not None] for xs in strs_resolved_map.values() if xs is not None ] ))) + self._endpoint_map_for_insights.update( - {key: ['{ip}:{port}'.format(ip=ip, port=port) for ip in value] + {key: ['{ip}:{port}'.format(ip=ip, port=port) for ip, port in value] for key, value in strs_resolved_map.items() if value is not None} ) @@ -3420,8 +3424,17 @@ class ControlConnection(object): _SELECT_SCHEMA_PEERS_TEMPLATE = "SELECT peer, host_id, {nt_col_name}, schema_version FROM system.peers" _SELECT_SCHEMA_LOCAL = "SELECT schema_version FROM system.local WHERE key='local'" + _SELECT_PEERS_V2 = "SELECT * FROM system.peers_v2" + _SELECT_PEERS_NO_TOKENS_V2 = "SELECT host_id, peer, peer_port, data_center, rack, native_address, native_port, release_version, schema_version FROM system.peers_v2" + _SELECT_SCHEMA_PEERS_V2 = "SELECT host_id, peer, peer_port, native_address, native_port, schema_version FROM system.peers_v2" + _MINIMUM_NATIVE_ADDRESS_DSE_VERSION = Version("6.0.0") + class PeersQueryType(object): + """internal Enum for _peers_query""" + PEERS = 0 + PEERS_SCHEMA = 1 + _is_shutdown = False _timeout = None _protocol_version = None @@ -3433,6 +3446,8 @@ class ControlConnection(object): _schema_meta_enabled = True _token_meta_enabled = True + _uses_peers_v2 = True + # for testing purposes _time = time @@ -3547,13 +3562,25 @@ def _try_connect(self, host): "SCHEMA_CHANGE": partial(_watch_callback, self_weakref, '_handle_schema_change') }, register_timeout=self._timeout) - sel_peers = self._peers_query_for_version(connection, self._SELECT_PEERS_NO_TOKENS_TEMPLATE) + sel_peers = self._get_peers_query(self.PeersQueryType.PEERS, connection) sel_local = self._SELECT_LOCAL if self._token_meta_enabled else self._SELECT_LOCAL_NO_TOKENS peers_query = QueryMessage(query=sel_peers, consistency_level=ConsistencyLevel.ONE) local_query = QueryMessage(query=sel_local, consistency_level=ConsistencyLevel.ONE) - shared_results = connection.wait_for_responses( - peers_query, local_query, timeout=self._timeout) + (peers_success, peers_result), (local_success, local_result) = connection.wait_for_responses( + peers_query, local_query, timeout=self._timeout, fail_on_error=False) + + if not local_success: + raise local_result + if not peers_success: + # error with the peers v2 query, fallback to peers v1 + self._uses_peers_v2 = False + sel_peers = self._get_peers_query(self.PeersQueryType.PEERS, connection) + peers_query = QueryMessage(query=sel_peers, consistency_level=ConsistencyLevel.ONE) + peers_result = connection.wait_for_response( + peers_query, timeout=self._timeout) + + shared_results = (peers_result, local_result) self._refresh_node_list_and_token_map(connection, preloaded_results=shared_results) self._refresh_schema(connection, preloaded_results=shared_results, schema_agreement_wait=-1) except Exception: @@ -3675,20 +3702,18 @@ def refresh_node_list_and_token_map(self, force_token_rebuild=False): def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, force_token_rebuild=False): - if preloaded_results: log.debug("[control connection] Refreshing node list and token map using preloaded results") peers_result = preloaded_results[0] local_result = preloaded_results[1] else: cl = ConsistencyLevel.ONE + sel_peers = self._get_peers_query(self.PeersQueryType.PEERS, connection) if not self._token_meta_enabled: log.debug("[control connection] Refreshing node list without token map") - sel_peers = self._peers_query_for_version(connection, self._SELECT_PEERS_NO_TOKENS_TEMPLATE) sel_local = self._SELECT_LOCAL_NO_TOKENS else: log.debug("[control connection] Refreshing node list and token map") - sel_peers = self._SELECT_PEERS sel_local = self._SELECT_LOCAL peers_query = QueryMessage(query=sel_peers, consistency_level=cl) local_query = QueryMessage(query=sel_local, consistency_level=cl) @@ -3718,13 +3743,17 @@ def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, self._update_location_info(host, datacenter, rack) host.host_id = local_row.get("host_id") host.listen_address = local_row.get("listen_address") - host.broadcast_address = local_row.get("broadcast_address") + host.listen_port = local_row.get("listen_port") + host.broadcast_address = _NodeInfo.get_broadcast_address(local_row) + host.broadcast_port = _NodeInfo.get_broadcast_port(local_row) - host.broadcast_rpc_address = self._address_from_row(local_row) + host.broadcast_rpc_address = _NodeInfo.get_broadcast_rpc_address(local_row) + host.broadcast_rpc_port = _NodeInfo.get_broadcast_rpc_port(local_row) if host.broadcast_rpc_address is None: if self._token_meta_enabled: # local rpc_address is not available, use the connection endpoint host.broadcast_rpc_address = connection.endpoint.address + host.broadcast_rpc_port = connection.endpoint.port else: # local rpc_address has not been queried yet, try to fetch it # separately, which might fail because C* < 2.1.6 doesn't have rpc_address @@ -3737,9 +3766,11 @@ def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, row = dict_factory( local_rpc_address_result.column_names, local_rpc_address_result.parsed_rows) - host.broadcast_rpc_address = row[0]['rpc_address'] + host.broadcast_rpc_address = _NodeInfo.get_broadcast_rpc_address(row[0]) + host.broadcast_rpc_port = _NodeInfo.get_broadcast_rpc_port(row[0]) else: host.broadcast_rpc_address = connection.endpoint.address + host.broadcast_rpc_port = connection.endpoint.port host.release_version = local_row.get("release_version") host.dse_version = local_row.get("dse_version") @@ -3777,8 +3808,10 @@ def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, should_rebuild_token_map |= self._update_location_info(host, datacenter, rack) host.host_id = row.get("host_id") - host.broadcast_address = row.get("peer") - host.broadcast_rpc_address = self._address_from_row(row) + host.broadcast_address = _NodeInfo.get_broadcast_address(row) + host.broadcast_port = _NodeInfo.get_broadcast_port(row) + host.broadcast_rpc_address = _NodeInfo.get_broadcast_rpc_address(row) + host.broadcast_rpc_port = _NodeInfo.get_broadcast_rpc_port(row) host.release_version = row.get("release_version") host.dse_version = row.get("dse_version") host.dse_workload = row.get("workload") @@ -3834,7 +3867,8 @@ def _refresh_nodes_if_not_up(self, host): def _handle_topology_change(self, event): change_type = event["change_type"] - host = self._cluster.metadata.get_host(event["address"][0]) + addr, port = event["address"] + host = self._cluster.metadata.get_host(addr, port) if change_type == "NEW_NODE" or change_type == "MOVED_NODE": if self._topology_event_refresh_window >= 0: delay = self._delay_for_event_type('topology_change', self._topology_event_refresh_window) @@ -3844,7 +3878,8 @@ def _handle_topology_change(self, event): def _handle_status_change(self, event): change_type = event["change_type"] - host = self._cluster.metadata.get_host(event["address"][0]) + addr, port = event["address"] + host = self._cluster.metadata.get_host(addr, port) if change_type == "UP": delay = self._delay_for_event_type('status_change', self._status_event_refresh_window) if host is None: @@ -3898,7 +3933,7 @@ def wait_for_schema_agreement(self, connection=None, preloaded_results=None, wai elapsed = 0 cl = ConsistencyLevel.ONE schema_mismatches = None - select_peers_query = self._peers_query_for_version(connection, self._SELECT_SCHEMA_PEERS_TEMPLATE) + select_peers_query = self._get_peers_query(self.PeersQueryType.PEERS_SCHEMA, connection) while elapsed < total_timeout: peers_query = QueryMessage(query=select_peers_query, consistency_level=cl) @@ -3955,43 +3990,50 @@ def _get_schema_mismatches(self, peers_result, local_result, local_address): return dict((version, list(nodes)) for version, nodes in six.iteritems(versions)) - def _address_from_row(self, row): + def _get_peers_query(self, peers_query_type, connection=None): """ - Parse the broadcast rpc address from a row and return it untranslated. - """ - addr = None - if "rpc_address" in row: - addr = row.get("rpc_address") # peers and local - if "native_transport_address" in row: - addr = row.get("native_transport_address") - if not addr or addr in ["0.0.0.0", "::"]: - addr = row.get("peer") - return addr + Determine the peers query to use. + + :param peers_query_type: Should be one of PeersQueryType enum. + + If _uses_peers_v2 is True, return the proper peers_v2 query (no templating). + Else, apply the logic below to choose the peers v1 address column name: - def _peers_query_for_version(self, connection, peers_query_template): - """ Given a connection: - find the server product version running on the connection's host, - use that to choose the column name for the transport address (see APOLLO-1130), and - use that column name in the provided peers query template. - - The provided template should be a string with a format replacement - field named nt_col_name. """ - host_release_version = self._cluster.metadata.get_host(connection.endpoint).release_version - host_dse_version = self._cluster.metadata.get_host(connection.endpoint).dse_version - uses_native_address_query = ( - host_dse_version and Version(host_dse_version) >= self._MINIMUM_NATIVE_ADDRESS_DSE_VERSION) + if peers_query_type not in (self.PeersQueryType.PEERS, self.PeersQueryType.PEERS_SCHEMA): + raise ValueError("Invalid peers query type: %s" % peers_query_type) - if uses_native_address_query: - select_peers_query = peers_query_template.format(nt_col_name="native_transport_address") - elif host_release_version: - select_peers_query = peers_query_template.format(nt_col_name="rpc_address") + if self._uses_peers_v2: + if peers_query_type == self.PeersQueryType.PEERS: + query = self._SELECT_PEERS_V2 if self._token_meta_enabled else self._SELECT_PEERS_NO_TOKENS_V2 + else: + query = self._SELECT_SCHEMA_PEERS_V2 else: - select_peers_query = self._SELECT_PEERS + if peers_query_type == self.PeersQueryType.PEERS and self._token_meta_enabled: + query = self._SELECT_PEERS + else: + query_template = (self._SELECT_SCHEMA_PEERS_TEMPLATE + if peers_query_type == self.PeersQueryType.PEERS_SCHEMA + else self._SELECT_PEERS_NO_TOKENS_TEMPLATE) + + host_release_version = self._cluster.metadata.get_host(connection.endpoint).release_version + host_dse_version = self._cluster.metadata.get_host(connection.endpoint).dse_version + uses_native_address_query = ( + host_dse_version and Version(host_dse_version) >= self._MINIMUM_NATIVE_ADDRESS_DSE_VERSION) + + if uses_native_address_query: + query = query_template.format(nt_col_name="native_transport_address") + elif host_release_version: + query = query_template.format(nt_col_name="rpc_address") + else: + query = self._SELECT_PEERS - return select_peers_query + return query def _signal_error(self): with self._lock: @@ -4181,7 +4223,7 @@ class ResponseFuture(object): coordinator_host = None """ - The host from which we recieved a response + The host from which we received a response """ attempted_hosts = None diff --git a/cassandra/connection.py b/cassandra/connection.py index 66af1f8521..f30be682a6 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -214,25 +214,26 @@ class DefaultEndPointFactory(EndPointFactory): port = None """ - If set, force all endpoints to use this port. + If no port is discovered in the row, this is the default port + used for endpoint creation. """ def __init__(self, port=None): self.port = port def create(self, row): - addr = None - if "rpc_address" in row: - addr = row.get("rpc_address") - if "native_transport_address" in row: - addr = row.get("native_transport_address") - if not addr or addr in ["0.0.0.0", "::"]: - addr = row.get("peer") + # TODO next major... move this class so we don't need this kind of hack + from cassandra.metadata import _NodeInfo + addr = _NodeInfo.get_broadcast_rpc_address(row) + port = _NodeInfo.get_broadcast_rpc_port(row) + if port is None: + port = self.port if self.port else 9042 # create the endpoint with the translated address + # TODO next major, create a TranslatedEndPoint type return DefaultEndPoint( self.cluster.address_translator.translate(addr), - self.port if self.port is not None else 9042) + port) @total_ordering diff --git a/cassandra/metadata.py b/cassandra/metadata.py index a839c2206c..6cbc4e85cf 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -338,20 +338,23 @@ def remove_host(self, host): with self._hosts_lock: return bool(self._hosts.pop(host.endpoint, False)) - def get_host(self, endpoint_or_address): + def get_host(self, endpoint_or_address, port=None): """ - Find a host in the metadata for a specific endpoint. If a string inet address is passed, - iterate all hosts to match the :attr:`~.pool.Host.broadcast_rpc_address` attribute. + Find a host in the metadata for a specific endpoint. If a string inet address and port are passed, + iterate all hosts to match the :attr:`~.pool.Host.broadcast_rpc_address` and + :attr:`~.pool.Host.broadcast_rpc_port`attributes. """ if not isinstance(endpoint_or_address, EndPoint): - return self._get_host_by_address(endpoint_or_address) + return self._get_host_by_address(endpoint_or_address, port) return self._hosts.get(endpoint_or_address) - def _get_host_by_address(self, address): + def _get_host_by_address(self, address, port=None): for host in six.itervalues(self._hosts): - if host.broadcast_rpc_address == address: + if (host.broadcast_rpc_address == address and + (port is None or host.broadcast_rpc_port is None or host.broadcast_rpc_port == port)): return host + return None def all_hosts(self): @@ -3316,3 +3319,48 @@ def group_keys_by_replica(session, keyspace, table, keys): return dict(keys_per_host) + +# TODO next major reorg +class _NodeInfo(object): + """ + Internal utility functions to determine the different host addresses/ports + from a local or peers row. + """ + + @staticmethod + def get_broadcast_rpc_address(row): + # TODO next major, change the parsing logic to avoid any + # overriding of a non-null value + addr = row.get("rpc_address") + if "native_address" in row: + addr = row.get("native_address") + if "native_transport_address" in row: + addr = row.get("native_transport_address") + if not addr or addr in ["0.0.0.0", "::"]: + addr = row.get("peer") + + return addr + + @staticmethod + def get_broadcast_rpc_port(row): + port = row.get("rpc_port") + if port is None or port == 0: + port = row.get("native_port") + + return port if port and port > 0 else None + + @staticmethod + def get_broadcast_address(row): + addr = row.get("broadcast_address") + if addr is None: + addr = row.get("peer") + + return addr + + @staticmethod + def get_broadcast_port(row): + port = row.get("broadcast_port") + if port is None or port == 0: + port = row.get("peer_port") + + return port if port and port > 0 else None diff --git a/cassandra/pool.py b/cassandra/pool.py index a4429aeed6..87e8f03716 100644 --- a/cassandra/pool.py +++ b/cassandra/pool.py @@ -55,21 +55,60 @@ class Host(object): broadcast_address = None """ - broadcast address configured for the node, *if available* ('peer' in system.peers table). - This is not present in the ``system.local`` table for older versions of Cassandra. It is also not queried if - :attr:`~.Cluster.token_metadata_enabled` is ``False``. + broadcast address configured for the node, *if available*: + + 'system.local.broadcast_address' or 'system.peers.peer' (Cassandra 2-3) + 'system.local.broadcast_address' or 'system.peers_v2.peer' (Cassandra 4) + + This is not present in the ``system.local`` table for older versions of Cassandra. It + is also not queried if :attr:`~.Cluster.token_metadata_enabled` is ``False``. + """ + + broadcast_port = None + """ + broadcast port configured for the node, *if available*: + + 'system.local.broadcast_port' or 'system.peers_v2.peer_port' (Cassandra 4) + + It is also not queried if :attr:`~.Cluster.token_metadata_enabled` is ``False``. """ broadcast_rpc_address = None """ - The broadcast rpc address of the node (`native_address` or `rpc_address`). + The broadcast rpc address of the node: + + 'system.local.rpc_address' or 'system.peers.rpc_address' (Cassandra 3) + 'system.local.rpc_address' or 'system.peers.native_transport_address (DSE 6+)' + 'system.local.rpc_address' or 'system.peers_v2.native_address (Cassandra 4)' + """ + + broadcast_rpc_port = None + """ + The broadcast rpc port of the node, *if available*: + + 'system.local.rpc_port' or 'system.peers.native_transport_port' (DSE 6+) + 'system.local.rpc_port' or 'system.peers_v2.native_port' (Cassandra 4) """ listen_address = None """ - listen address configured for the node, *if available*. This is only available in the ``system.local`` table for newer - versions of Cassandra. It is also not queried if :attr:`~.Cluster.token_metadata_enabled` is ``False``. - Usually the same as ``broadcast_address`` unless configured differently in cassandra.yaml. + listen address configured for the node, *if available*: + + 'system.local.listen_address' + + This is only available in the ``system.local`` table for newer versions of Cassandra. It is also not + queried if :attr:`~.Cluster.token_metadata_enabled` is ``False``. Usually the same as ``broadcast_address`` + unless configured differently in cassandra.yaml. + """ + + listen_port = None + """ + listen port configured for the node, *if available*: + + 'system.local.listen_port' + + This is only available in the ``system.local`` table for newer versions of Cassandra. It is also not + queried if :attr:`~.Cluster.token_metadata_enabled` is ``False``. """ conviction_policy = None diff --git a/cassandra/util.py b/cassandra/util.py index 0651591203..0a00533b33 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -189,17 +189,17 @@ def _addrinfo_to_ip_strings(addrinfo): extracts the IP address from the sockaddr portion of the result. Since this is meant to be used in conjunction with _addrinfo_or_none, - this will pass None and EndPont instances through unaffected. + this will pass None and EndPoint instances through unaffected. """ if addrinfo is None: return None - return [entry[4][0] for entry in addrinfo] + return [(entry[4][0], entry[4][1]) for entry in addrinfo] -def _resolve_contact_points_to_string_map(contact_points, port): +def _resolve_contact_points_to_string_map(contact_points): return OrderedDict( - (cp, _addrinfo_to_ip_strings(_addrinfo_or_none(cp, port))) - for cp in contact_points + ('{cp}:{port}'.format(cp=cp, port=port), _addrinfo_to_ip_strings(_addrinfo_or_none(cp, port))) + for cp, port in contact_points ) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 32a6346c7e..52b1286ebb 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -200,6 +200,8 @@ def _get_dse_version_from_cass(cass_version): ALLOW_BETA_PROTOCOL = False + + def get_default_protocol(): if CASSANDRA_VERSION >= Version('4.0-a'): if DSE_VERSION: @@ -340,6 +342,7 @@ def _id_and_mark(f): "This test is not suitible for environments with large clock granularity") requiressimulacron = unittest.skipIf(SIMULACRON_JAR is None or CASSANDRA_VERSION < Version("2.1"), "Simulacron jar hasn't been specified or C* version is 2.0") requirecassandra = unittest.skipIf(DSE_VERSION, "Cassandra required") +notdse = unittest.skipIf(DSE_VERSION, "DSE not supported") requiredse = unittest.skipUnless(DSE_VERSION, "DSE required") requirescloudproxy = unittest.skipIf(CLOUD_PROXY_PATH is None, "Cloud Proxy path hasn't been specified") @@ -368,6 +371,9 @@ def check_socket_listening(itf, timeout=60): return False +USE_SINGLE_INTERFACE = os.getenv('USE_SINGLE_INTERFACE', False) + + def get_cluster(): return CCM_CLUSTER @@ -380,8 +386,8 @@ def use_multidc(dc_list, workloads=[]): use_cluster(MULTIDC_CLUSTER_NAME, dc_list, start=True, workloads=workloads) -def use_singledc(start=True, workloads=[]): - use_cluster(CLUSTER_NAME, [3], start=start, workloads=workloads) +def use_singledc(start=True, workloads=[], use_single_interface=USE_SINGLE_INTERFACE): + use_cluster(CLUSTER_NAME, [3], start=start, workloads=workloads, use_single_interface=use_single_interface) def use_single_node(start=True, workloads=[], configuration_options={}, dse_options={}): @@ -446,7 +452,7 @@ def start_cluster_wait_for_up(cluster): def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, set_keyspace=True, ccm_options=None, - configuration_options={}, dse_options={}): + configuration_options={}, dse_options={}, use_single_interface=USE_SINGLE_INTERFACE): dse_cluster = True if DSE_VERSION else False if not workloads: workloads = [] @@ -553,7 +559,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, }) common.switch_cluster(path, cluster_name) CCM_CLUSTER.set_configuration_options(configuration_options) - CCM_CLUSTER.populate(nodes, ipformat=ipformat) + CCM_CLUSTER.populate(nodes, ipformat=ipformat, use_single_interface=use_single_interface) try: jvm_args = [] diff --git a/tests/integration/advanced/test_auth.py b/tests/integration/advanced/test_auth.py index 748304aef4..7e9aa8c23e 100644 --- a/tests/integration/advanced/test_auth.py +++ b/tests/integration/advanced/test_auth.py @@ -50,16 +50,17 @@ def teardown_module(): def wait_role_manager_setup_then_execute(session, statements): for s in statements: - e = None + exc = None for attempt in range(3): try: session.execute(s) break except Exception as e: + exc = e time.sleep(5) else: # if we didn't reach `break` - if e is not None: - raise e + if exc is not None: + raise exc @attr('long') diff --git a/tests/integration/simulacron/test_endpoint.py b/tests/integration/simulacron/test_endpoint.py index ba625765c7..691fcc8718 100644 --- a/tests/integration/simulacron/test_endpoint.py +++ b/tests/integration/simulacron/test_endpoint.py @@ -19,7 +19,8 @@ from functools import total_ordering from cassandra.cluster import Cluster -from cassandra.connection import DefaultEndPoint, EndPoint, EndPointFactory +from cassandra.connection import DefaultEndPoint, EndPoint, DefaultEndPointFactory +from cassandra.metadata import _NodeInfo from tests.integration import requiressimulacron from tests.integration.simulacron import SimulacronCluster, PROTOCOL_VERSION @@ -59,17 +60,10 @@ def __repr__(self): return "<%s: %s>" % (self.__class__.__name__, self.address) -class AddressEndPointFactory(EndPointFactory): +class AddressEndPointFactory(DefaultEndPointFactory): def create(self, row): - addr = None - if "rpc_address" in row: - addr = row.get("rpc_address") - if "native_transport_address" in row: - addr = row.get("native_transport_address") - if not addr or addr in ["0.0.0.0", "::"]: - addr = row.get("peer") - + addr = _NodeInfo.get_broadcast_rpc_address(row) return AddressEndPoint(addr) @@ -85,6 +79,7 @@ class EndPointTests(SimulacronCluster): def test_default_endpoint(self): hosts = self.cluster.metadata.all_hosts() + self.assertEqual(len(hosts), 3) for host in hosts: self.assertIsNotNone(host.endpoint) self.assertIsInstance(host.endpoint, DefaultEndPoint) @@ -106,6 +101,7 @@ def test_custom_endpoint(self): cluster.connect(wait_for_all_pools=True) hosts = cluster.metadata.all_hosts() + self.assertEqual(len(hosts), 3) for host in hosts: self.assertIsNotNone(host.endpoint) self.assertIsInstance(host.endpoint, AddressEndPoint) diff --git a/tests/integration/standard/test_control_connection.py b/tests/integration/standard/test_control_connection.py index fe02de43da..db7cff8506 100644 --- a/tests/integration/standard/test_control_connection.py +++ b/tests/integration/standard/test_control_connection.py @@ -23,7 +23,7 @@ from cassandra.protocol import ConfigurationException -from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster, greaterthanorequalcass40, notdse from tests.integration.datatype_utils import update_datatypes @@ -103,3 +103,29 @@ def test_get_control_connection_host(self): new_host = self.cluster.get_control_connection_host() self.assertNotEqual(host, new_host) + @notdse + @greaterthanorequalcass40 + def test_control_connection_port_discovery(self): + """ + Test to validate that the correct port is discovered when peersV2 is used (C* 4.0+). + + Unit tests already validate that the port can be picked up (or not) from the query. This validates + it picks up the correct port from a real server and is able to connect. + """ + self.cluster = TestCluster() + + host = self.cluster.get_control_connection_host() + self.assertEqual(host, None) + + self.session = self.cluster.connect() + cc_endpoint = self.cluster.control_connection._connection.endpoint + + host = self.cluster.get_control_connection_host() + self.assertEqual(host.endpoint, cc_endpoint) + self.assertEqual(host.is_up, True) + hosts = self.cluster.metadata.all_hosts() + self.assertEqual(3, len(hosts)) + + for host in hosts: + self.assertEqual(9042, host.broadcast_rpc_port) + self.assertEqual(7000, host.broadcast_port) diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index d3dea05fad..bd556f357d 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -42,7 +42,7 @@ get_supported_protocol_versions, greaterthancass20, greaterthancass21, assert_startswith, greaterthanorequalcass40, greaterthanorequaldse67, lessthancass40, - TestCluster) + TestCluster, DSE_VERSION) log = logging.getLogger(__name__) @@ -52,11 +52,12 @@ def setup_module(): use_singledc() -class HostMetatDataTests(BasicExistingKeyspaceUnitTestCase): +class HostMetaDataTests(BasicExistingKeyspaceUnitTestCase): @local - def test_broadcast_listen_address(self): + def test_host_addresses(self): """ - Check to ensure that the broadcast, rpc_address, listen adresss and host are is populated correctly + Check to ensure that the broadcast_address, broadcast_rpc_address, + listen adresss, ports and host are is populated correctly. @since 3.3 @jira_ticket PYTHON-332 @@ -69,6 +70,11 @@ def test_broadcast_listen_address(self): self.assertIsNotNone(host.broadcast_address) self.assertIsNotNone(host.broadcast_rpc_address) self.assertIsNotNone(host.host_id) + + if not DSE_VERSION and CASSANDRA_VERSION >= Version('4-a'): + self.assertIsNotNone(host.broadcast_port) + self.assertIsNotNone(host.broadcast_rpc_port) + con = self.cluster.control_connection.get_connections()[0] local_host = con.host diff --git a/tests/integration/standard/test_single_interface.py b/tests/integration/standard/test_single_interface.py new file mode 100644 index 0000000000..91451a52a0 --- /dev/null +++ b/tests/integration/standard/test_single_interface.py @@ -0,0 +1,77 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + +import six + +from cassandra import ConsistencyLevel +from cassandra.query import SimpleStatement + +from packaging.version import Version +from tests.integration import use_singledc, PROTOCOL_VERSION, \ + remove_cluster, greaterthanorequalcass40, notdse, \ + CASSANDRA_VERSION, DSE_VERSION, TestCluster + + +def setup_module(): + if not DSE_VERSION and CASSANDRA_VERSION >= Version('4-a'): + remove_cluster() + use_singledc(use_single_interface=True) + +def teardown_module(): + remove_cluster() + + +@notdse +@greaterthanorequalcass40 +class SingleInterfaceTest(unittest.TestCase): + + def setUp(self): + self.cluster = TestCluster() + self.session = self.cluster.connect() + + def tearDown(self): + if self.cluster is not None: + self.cluster.shutdown() + + def test_single_interface(self): + """ + Test that we can connect to a multiple hosts bound to a single interface. + """ + hosts = self.cluster.metadata._hosts + broadcast_rpc_ports = [] + broadcast_ports = [] + self.assertEqual(len(hosts), 3) + for endpoint, host in six.iteritems(hosts): + + self.assertEqual(endpoint.address, host.broadcast_rpc_address) + self.assertEqual(endpoint.port, host.broadcast_rpc_port) + + if host.broadcast_rpc_port in broadcast_rpc_ports: + self.fail("Duplicate broadcast_rpc_port") + broadcast_rpc_ports.append(host.broadcast_rpc_port) + if host.broadcast_port in broadcast_ports: + self.fail("Duplicate broadcast_port") + broadcast_ports.append(host.broadcast_port) + + for _ in range(1, 100): + self.session.execute(SimpleStatement("select * from system_distributed.view_build_status", + consistency_level=ConsistencyLevel.ALL)) + + for pool in self.session.get_pools(): + self.assertEquals(1, pool.get_state()['open_count']) diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py index 9478575cbf..249c0a17cc 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/test_cluster.py @@ -90,6 +90,17 @@ def test_exception_types(self): class ClusterTest(unittest.TestCase): + def test_tuple_for_contact_points(self): + cluster = Cluster(contact_points=[('localhost', 9045), ('127.0.0.2', 9046), '127.0.0.3'], port=9999) + for cp in cluster.endpoints_resolved: + if cp.address in ('::1', '127.0.0.1'): + self.assertEqual(cp.port, 9045) + elif cp.address == '127.0.0.2': + self.assertEqual(cp.port, 9046) + else: + self.assertEqual(cp.address, '127.0.0.3') + self.assertEqual(cp.port, 9999) + def test_invalid_contact_point_types(self): with self.assertRaises(ValueError): Cluster(contact_points=[None], protocol_version=4, connect_timeout=1) diff --git a/tests/unit/test_control_connection.py b/tests/unit/test_control_connection.py index db194fe0c7..3e75a0af27 100644 --- a/tests/unit/test_control_connection.py +++ b/tests/unit/test_control_connection.py @@ -49,10 +49,11 @@ def __init__(self): self.partitioner = None self.token_map = {} - def get_host(self, endpoint_or_address): + def get_host(self, endpoint_or_address, port=None): if not isinstance(endpoint_or_address, EndPoint): for host in six.itervalues(self.hosts): - if host.address == endpoint_or_address: + if (host.address == endpoint_or_address and + (port is None or host.broadcast_rpc_port is None or host.broadcast_rpc_port == port)): return host else: return self.hosts.get(endpoint_or_address) @@ -87,7 +88,7 @@ def __init__(self): def add_host(self, endpoint, datacenter, rack, signal=False, refresh_nodes=True): host = Host(endpoint, SimpleConvictionPolicy, datacenter, rack) self.added_hosts.append(host) - return host + return host, True def remove_host(self, host): self.removed_hosts.append(host) @@ -130,6 +131,12 @@ def __init__(self): [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"]], ["192.168.1.2", "10.0.0.2", "a", "dc1", "rack1", ["2", "102", "202"]]] ] + + self.peer_results_v2 = [ + ["native_address", "native_port", "peer", "peer_port", "schema_version", "data_center", "rack", "tokens"], + [["192.168.1.1", 9042, "10.0.0.1", 7042, "a", "dc1", "rack1", ["1", "101", "201"]], + ["192.168.1.2", 9042, "10.0.0.2", 7040, "a", "dc1", "rack1", ["2", "102", "202"]]] + ] self.wait_for_responses = Mock(return_value=_node_meta_results(self.local_results, self.peer_results)) @@ -347,6 +354,7 @@ def test_handle_topology_change(self): } self.cluster.scheduler.reset_mock() self.control_connection._handle_topology_change(event) + self.cluster.scheduler.schedule_unique.assert_called_once_with(ANY, self.control_connection._refresh_nodes_if_not_up, None) event = { @@ -377,7 +385,7 @@ def test_handle_status_change(self): # do the same with a known Host event = { 'change_type': 'UP', - 'address': ('192.168.1.0', 9000) + 'address': ('192.168.1.0', 9042) } self.cluster.scheduler.reset_mock() self.control_connection._handle_status_change(event) @@ -470,6 +478,46 @@ def test_refresh_disabled(self): call(0.0, cc_no_topo_refresh.refresh_schema, **schema_event)]) + def test_refresh_nodes_and_tokens_add_host_detects_port(self): + del self.connection.peer_results[:] + self.connection.peer_results.extend(self.connection.peer_results_v2) + self.connection.peer_results[1].append( + ["192.168.1.3", 555, "10.0.0.3", 666, "a", "dc1", "rack1", ["3", "103", "203"]] + ) + self.connection.wait_for_responses = Mock(return_value=_node_meta_results( + self.connection.local_results, self.connection.peer_results)) + self.cluster.scheduler.schedule = lambda delay, f, *args, **kwargs: f(*args, **kwargs) + self.control_connection.refresh_node_list_and_token_map() + self.assertEqual(1, len(self.cluster.added_hosts)) + self.assertEqual(self.cluster.added_hosts[0].endpoint.address, "192.168.1.3") + self.assertEqual(self.cluster.added_hosts[0].endpoint.port, 555) + self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_address, "192.168.1.3") + self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_port, 555) + self.assertEqual(self.cluster.added_hosts[0].broadcast_address, "10.0.0.3") + self.assertEquals(self.cluster.added_hosts[0].broadcast_port, 666) + self.assertEqual(self.cluster.added_hosts[0].datacenter, "dc1") + self.assertEqual(self.cluster.added_hosts[0].rack, "rack1") + + def test_refresh_nodes_and_tokens_add_host_detects_invalid_port(self): + del self.connection.peer_results[:] + self.connection.peer_results.extend(self.connection.peer_results_v2) + self.connection.peer_results[1].append( + ["192.168.1.3", -1, "10.0.0.3", 0, "a", "dc1", "rack1", ["3", "103", "203"]] + ) + self.connection.wait_for_responses = Mock(return_value=_node_meta_results( + self.connection.local_results, self.connection.peer_results)) + self.cluster.scheduler.schedule = lambda delay, f, *args, **kwargs: f(*args, **kwargs) + self.control_connection.refresh_node_list_and_token_map() + self.assertEqual(1, len(self.cluster.added_hosts)) + self.assertEqual(self.cluster.added_hosts[0].endpoint.address, "192.168.1.3") + self.assertEqual(self.cluster.added_hosts[0].endpoint.port, 9042) # fallback default + self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_address, "192.168.1.3") + self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_port, None) + self.assertEqual(self.cluster.added_hosts[0].broadcast_address, "10.0.0.3") + self.assertEquals(self.cluster.added_hosts[0].broadcast_port, None) + self.assertEqual(self.cluster.added_hosts[0].datacenter, "dc1") + self.assertEqual(self.cluster.added_hosts[0].rack, "rack1") + class EventTimingTest(unittest.TestCase): """ From 8aabd8fc4f2dfd9feba4f9af03834f622c19bf9d Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 31 Mar 2020 09:52:51 -0400 Subject: [PATCH 046/211] Add core graph documentation --- docs/.nav | 1 + docs/classic_graph.rst | 299 ++++++++++++++++++++++++ docs/core_graph.rst | 434 ----------------------------------- docs/graph.rst | 503 ++++++++++++++++++++++++++--------------- docs/graph_fluent.rst | 36 +++ docs/index.rst | 5 +- 6 files changed, 659 insertions(+), 619 deletions(-) create mode 100644 docs/classic_graph.rst delete mode 100644 docs/core_graph.rst diff --git a/docs/.nav b/docs/.nav index 568cd6a383..d5b54c4e13 100644 --- a/docs/.nav +++ b/docs/.nav @@ -5,6 +5,7 @@ lwt object_mapper geo_types graph +classic_graph performance query_paging security diff --git a/docs/classic_graph.rst b/docs/classic_graph.rst new file mode 100644 index 0000000000..ef68c86359 --- /dev/null +++ b/docs/classic_graph.rst @@ -0,0 +1,299 @@ +DataStax Classic Graph Queries +============================== + +Getting Started +~~~~~~~~~~~~~~~ + +First, we need to create a graph in the system. To access the system API, we +use the system execution profile :: + + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT + + cluster = Cluster() + session = cluster.connect() + + graph_name = 'movies' + session.execute_graph("system.graph(name).ifNotExists().engine(Classic).create()", {'name': graph_name}, + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + + +To execute requests on our newly created graph, we need to setup an execution +profile. Additionally, we also need to set the schema_mode to `development` +for the schema creation:: + + + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions + + graph_name = 'movies' + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) + + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + session = cluster.connect() + + session.execute_graph("schema.config().option('graph.schema_mode').set('development')") + + +We are ready to configure our graph schema. We will create a simple one for movies:: + + # properties are used to define a vertex + properties = """ + schema.propertyKey("genreId").Text().create(); + schema.propertyKey("personId").Text().create(); + schema.propertyKey("movieId").Text().create(); + schema.propertyKey("name").Text().create(); + schema.propertyKey("title").Text().create(); + schema.propertyKey("year").Int().create(); + schema.propertyKey("country").Text().create(); + """ + + session.execute_graph(properties) # we can execute multiple statements in a single request + + # A Vertex represents a "thing" in the world. + vertices = """ + schema.vertexLabel("genre").properties("genreId","name").create(); + schema.vertexLabel("person").properties("personId","name").create(); + schema.vertexLabel("movie").properties("movieId","title","year","country").create(); + """ + + session.execute_graph(vertices) + + # An edge represents a relationship between two vertices + edges = """ + schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); + schema.edgeLabel("actor").connection("movie","person").create(); + """ + + session.execute_graph(edges) + + # Indexes to execute graph requests efficiently + indexes = """ + schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); + schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); + schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); + schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); + schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); + schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); + schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); + """ + +Next, we'll add some data:: + + session.execute_graph(""" + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); + """) + + session.execute_graph(""" + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); + """) + + session.execute_graph(""" + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). + property('year', 2003).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). + property('year', 1998).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). + property('year', 1995).property('country', 'United States').next(); + """) + +Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: + + session.execute_graph(""" + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); + genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); + + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); + + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); + + the_happening.addEdge('belongsTo', genre_horror); + the_italian_job.addEdge('belongsTo', genre_action); + rev_road.addEdge('belongsTo', genre_drama); + man_mask.addEdge('belongsTo', genre_drama); + man_mask.addEdge('belongsTo', genre_action); + dead_man.addEdge('belongsTo', genre_drama); + + the_happening.addEdge('actor', mark); + the_italian_job.addEdge('actor', mark); + rev_road.addEdge('actor', leo); + man_mask.addEdge('actor', leo); + dead_man.addEdge('actor', iggy); + """) + +We are all set. You can now query your graph. Here are some examples:: + + # Find all movies of the genre Drama + for r in session.execute_graph(""" + g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): + print(r) + + # Find all movies of the same genre than the movie 'Dead Man' + for r in session.execute_graph(""" + g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): + print(r) + + # Find all movies of Mark Wahlberg + for r in session.execute_graph(""" + g.V().has('person', 'name', 'Mark Wahlberg').in('actor').valueMap();"""): + print(r) + +To see a more graph examples, see `DataStax Graph Examples `_. + +Graph Types +~~~~~~~~~~~ + +Here are the supported graph types with their python representations: + +========== ================ +DSE Graph Python +========== ================ +boolean bool +bigint long, int (PY3) +int int +smallint int +varint int +float float +double double +uuid uuid.UUID +Decimal Decimal +inet str +timestamp datetime.datetime +date datetime.date +time datetime.time +duration datetime.timedelta +point Point +linestring LineString +polygon Polygon +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +========== ================ + +Graph Row Factory +~~~~~~~~~~~~~~~~~ + +By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.graph.graph_object_row_factory`), known graph result +types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these +types, a :class:`.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. +The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), +or lists by index:: + + # dicts with `__getattr__` or `__getitem__` + result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined + result # dse.graph.Result({u'key_str': u'value', u'key_int': 3}) + result.value # {u'key_int': 3, u'key_str': u'value'} (dict) + result.key_str # u'value' + result.key_int # 3 + result['key_str'] # u'value' + result['key_int'] # 3 + + # lists with `__getitem__` + result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] + result # dse.graph.Result([0, 1, 2]) + result.value # [0, 1, 2] (list) + result[1] # 1 (list[1]) + +You can use a different row factory by setting :attr:`.Session.default_graph_row_factory` or passing it to +:meth:`.Session.execute_graph`. For example, :func:`.graph.single_object_row_factory` returns the JSON result string`, +unparsed. :func:`.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, +unlike :func:`.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results +also provide convenience methods for converting to known types (:meth:`~.Result.as_vertex`, :meth:`~.Result.as_edge`, :meth:`~.Result.as_path`). + +Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to +deserialize properties, use the :class:`.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. +deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: + + # ... + from cassandra.graph import GraphSON1Deserializer + + row = session.execute_graph("g.V().toList()")[0] + value = row.properties['my_property_key'][0].value # accessing the VertexProperty value + value = GraphSON1Deserializer.deserialize_timestamp(value) + + print(value) # 2017-06-26 08:27:05 + print(type(value)) # + + +Named Parameters +~~~~~~~~~~~~~~~~ + +Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`:: + + result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) + [r.value for r in result_set] # [1, 2] + +All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized +automatically to their graph representation: + +Example:: + + session.execute_graph(""" + g.addV('person'). + property('name', text_value). + property('age', integer_value). + property('birthday', timestamp_value). + property('house_yard', polygon_value).toList() + """, { + 'text_value': 'Mike Smith', + 'integer_value': 34, + 'timestamp_value': datetime.datetime(1967, 12, 30), + 'polygon_value': Polygon(((30, 10), (40, 40), (20, 40), (10, 20), (30, 10))) + }) + + +As with all Execution Profile parameters, graph options can be set in the cluster default (as shown in the first example) +or specified per execution:: + + ep = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, + graph_options=GraphOptions(graph_name='something-else')) + session.execute_graph(statement, execution_profile=ep) + +Using GraphSON2 Protocol +~~~~~~~~~~~~~~~~~~~~~~~~ + +The default graph protocol used is GraphSON1. However GraphSON1 may +cause problems of type conversion happening during the serialization +of the query to the DSE Graph server, or the deserialization of the +responses back from a string Gremlin query. GraphSON2 offers better +support for the complex data types handled by DSE Graph. + +DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol +for graph queries. Enabling GraphSON2 can be done by `changing the +graph protocol of the execution profile` and `setting the graphson2 row factory`:: + + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory + + # Create a GraphSON2 execution profile + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', + graph_protocol=GraphProtocol.GRAPHSON_2_0), + row_factory=graph_graphson2_row_factory) + + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + session = cluster.connect() + session.execute_graph(...) + +Using GraphSON2, all properties will be automatically deserialized to +its Python representation. Note that it may bring significant +behavioral change at runtime. + +It is generally recommended to switch to GraphSON2 as it brings more +consistent support for complex data types in the Graph driver and will +be activated by default in the next major version (Python dse-driver +driver 3.0). diff --git a/docs/core_graph.rst b/docs/core_graph.rst deleted file mode 100644 index 47dc53d38d..0000000000 --- a/docs/core_graph.rst +++ /dev/null @@ -1,434 +0,0 @@ -DataStax Graph Queries -====================== - -The driver executes graph queries over the Cassandra native protocol. Use -:meth:`.Session.execute_graph` or :meth:`.Session.execute_graph_async` for -executing gremlin queries in DataStax Graph. - -The driver defines three Execution Profiles suitable for graph execution: - -* :data:`~.cluster.EXEC_PROFILE_GRAPH_DEFAULT` -* :data:`~.cluster.EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT` -* :data:`~.cluster.EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT` - -See :doc:`getting_started` and :doc:`execution_profiles` -for more detail on working with profiles. - -In DSE 6.8.0, the Core graph engine has been introduced and is now the default. It -provides a better unified multi-model, performance and scale. This guide -is for graphs that use the core engine. If you work with previous versions of -DSE or existing graphs, see :doc:`classic_graph`. - -Getting Started with Graph and the Core Engine -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -First, we need to create a graph in the system. To access the system API, we -use the system execution profile :: - - from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT - - cluster = Cluster() - session = cluster.connect() - - graph_name = 'movies' - session.execute_graph("system.graph(name).create()", {'name': graph_name}, - execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) - - -Graphs that use the core engine only support GraphSON3. Since they are Cassandra tables under -the hood, we can automatically configure the execution profile with the proper options -(row_factory and graph_protocol) when executing queries. You only need to make sure that -the `graph_name` is set and GraphSON3 will be automatically used:: - - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - - graph_name = 'movies' - ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - session = cluster.connect() - session.execute_graph("g.addV(...)") - - -Note that this graph engine detection is based on the metadata. You might experience -some query errors if the graph has been newly created and is not yet in the metadata. This -would result to a badly configured execution profile. If you really want to avoid that, -configure your execution profile explicitly:: - - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson3_row_factory - - graph_name = 'movies' - ep_graphson3 = GraphExecutionProfile( - row_factory=graph_graphson3_row_factory, - graph_options=GraphOptions( - graph_protocol=GraphProtocol.GRAPHSON_3_0, - graph_name=graph_name)) - - cluster = Cluster(execution_profiles={'core': ep_graphson3}) - session = cluster.connect() - session.execute_graph("g.addV(...)", execution_profile='core') - - -We are ready to configure our graph schema. We will create a simple one for movies:: - - # A Vertex represents a "thing" in the world. - # Create the genre vertex - query = """ - schema.vertexLabel('genre') - .partitionBy('genreId', Int) - .property('name', Text) - .create() - """ - session.execute_graph(query) - - # Create the person vertex - query = """ - schema.vertexLabel('person') - .partitionBy('personId', Int) - .property('name', Text) - .create() - """ - session.execute_graph(query) - - # Create the movie vertex - query = """ - schema.vertexLabel('movie') - .partitionBy('movieId', Int) - .property('title', Text) - .property('year', Int) - .property('country', Text) - .create() - """ - session.execute_graph(query) - - # An edge represents a relationship between two vertices - # Create our edges - queries = """ - schema.edgeLabel('belongsTo').from('movie').to('genre').create(); - schema.edgeLabel('actor').from('movie').to('person').create(); - """ - session.execute_graph(queries) - - # Indexes to execute graph requests efficiently - - # If you have a node with the search workload enabled (solr), use the following: - indexes = """ - schema.vertexLabel('genre').searchIndex() - .by("name") - .create(); - - schema.vertexLabel('person').searchIndex() - .by("name") - .create(); - - schema.vertexLabel('movie').searchIndex() - .by('title') - .by("year") - .create(); - """ - session.execute_graph(indexes) - - # Otherwise, use secondary indexes: - indexes = """ - schema.vertexLabel('genre') - .secondaryIndex('by_genre') - .by('name') - .create() - - schema.vertexLabel('person') - .secondaryIndex('by_name') - .by('name') - .create() - - schema.vertexLabel('movie') - .secondaryIndex('by_title') - .by('title') - .create() - """ - session.execute_graph(indexes) - -Add some edge indexes (materialized views):: - - indexes = """ - schema.edgeLabel('belongsTo') - .from('movie') - .to('genre') - .materializedView('movie__belongsTo__genre_by_in_genreId') - .ifNotExists() - .partitionBy(IN, 'genreId') - .clusterBy(OUT, 'movieId', Asc) - .create() - - schema.edgeLabel('actor') - .from('movie') - .to('person') - .materializedView('movie__actor__person_by_in_personId') - .ifNotExists() - .partitionBy(IN, 'personId') - .clusterBy(OUT, 'movieId', Asc) - .create() - """ - session.execute_graph(indexes) - -Next, we'll add some data:: - - session.execute_graph(""" - g.addV('genre').property('genreId', 1).property('name', 'Action').next(); - g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); - g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); - g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); - """) - - session.execute_graph(""" - g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); - g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); - g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); - """) - - session.execute_graph(""" - g.addV('movie').property('movieId', 1).property('title', 'The Happening'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). - property('year', 2003).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). - property('year', 1998).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). - property('year', 1995).property('country', 'United States').next(); - """) - -Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: - - session.execute_graph(""" - genre_horror = g.V().hasLabel('genre').has('name', 'Horror').id().next(); - genre_drama = g.V().hasLabel('genre').has('name', 'Drama').id().next(); - genre_action = g.V().hasLabel('genre').has('name', 'Action').id().next(); - - leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').id().next(); - mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').id().next(); - iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').id().next(); - - the_happening = g.V().hasLabel('movie').has('title', 'The Happening').id().next(); - the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').id().next(); - rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').id().next(); - man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').id().next(); - dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').id().next(); - - g.addE('belongsTo').from(__.V(the_happening)).to(__.V(genre_horror)).next(); - g.addE('belongsTo').from(__.V(the_italian_job)).to(__.V(genre_action)).next(); - g.addE('belongsTo').from(__.V(rev_road)).to(__.V(genre_drama)).next(); - g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_drama)).next(); - g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_action)).next(); - g.addE('belongsTo').from(__.V(dead_man)).to(__.V(genre_drama)).next(); - - g.addE('actor').from(__.V(the_happening)).to(__.V(mark)).next(); - g.addE('actor').from(__.V(the_italian_job)).to(__.V(mark)).next(); - g.addE('actor').from(__.V(rev_road)).to(__.V(leo)).next(); - g.addE('actor').from(__.V(man_mask)).to(__.V(leo)).next(); - g.addE('actor').from(__.V(dead_man)).to(__.V(iggy)).next(); - """) - -We are all set. You can now query your graph. Here are some examples:: - - # Find all movies of the genre Drama - for r in session.execute_graph(""" - g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): - print(r) - - # Find all movies of the same genre than the movie 'Dead Man' - for r in session.execute_graph(""" - g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): - print(r) - - # Find all movies of Mark Wahlberg - for r in session.execute_graph(""" - g.V().has('person', 'name', 'Mark Wahlberg').in('actor').valueMap();"""): - print(r) - -To see a more graph examples, see `DataStax Graph Examples `_. - -Graph Types for the Core Engine -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Here are the supported graph types with their python representations: - -============ ================= -DSE Graph Python Driver -============ ================= -text str -boolean bool -bigint long -int int -smallint int -varint long -double float -float float -uuid UUID -bigdecimal Decimal -duration Duration (cassandra.util) -inet str or IPV4Address/IPV6Address (if available) -timestamp datetime.datetime -date datetime.date -time datetime.time -polygon Polygon -point Point -linestring LineString -blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) -list list -map dict -set set or list - (Can return a list due to numerical values returned by Java) -tuple tuple -udt class or namedtuple -============ ================= - -Named Parameters -~~~~~~~~~~~~~~~~ - -Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`:: - - result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) - [r.value for r in result_set] # [1, 2] - -All python types listed in `Graph Types for the Core Engine`_ can be passed as named parameters and will be serialized -automatically to their graph representation: - -Example:: - - session.execute_graph(""" - g.addV('person'). - property('name', text_value). - property('age', integer_value). - property('birthday', timestamp_value). - property('house_yard', polygon_value).next() - """, { - 'text_value': 'Mike Smith', - 'integer_value': 34, - 'timestamp_value': datetime.datetime(1967, 12, 30), - 'polygon_value': Polygon(((30, 10), (40, 40), (20, 40), (10, 20), (30, 10))) - }) - - -As with all Execution Profile parameters, graph options can be set in the cluster default (as shown in the first example) -or specified per execution:: - - ep = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, - graph_options=GraphOptions(graph_name='something-else')) - session.execute_graph(statement, execution_profile=ep) - -CQL collections, Tuple and UDT -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a very interesting feature of the core engine: we can use all CQL data types, including -list, map, set, tuple and udt. Here is an example using all these types:: - - query = """ - schema.type('address') - .property('address', Text) - .property('city', Text) - .property('state', Text) - .create(); - """ - session.execute_graph(query) - - # It works the same way than normal CQL UDT, so we - # can create an udt class and register it - class Address(object): - def __init__(self, address, city, state): - self.address = address - self.city = city - self.state = state - - session.cluster.register_user_type(graph_name, 'address', Address) - - query = """ - schema.vertexLabel('person') - .partitionBy('personId', Int) - .property('address', typeOf('address')) - .property('friends', listOf(Text)) - .property('skills', setOf(Text)) - .property('scores', mapOf(Text, Int)) - .property('last_workout', tupleOf(Text, Date)) - .create() - """ - session.execute_graph(query) - - # insertion example - query = """ - g.addV('person') - .property('personId', pid) - .property('address', address) - .property('friends', friends) - .property('skills', skills) - .property('scores', scores) - .property('last_workout', last_workout) - .next() - """ - - session.execute_graph(query, { - 'pid': 3, - 'address': Address('42 Smith St', 'Quebec', 'QC'), - 'friends': ['Al', 'Mike', 'Cathy'], - 'skills': {'food', 'fight', 'chess'}, - 'scores': {'math': 98, 'french': 3}, - 'last_workout': ('CrossFit', datetime.date(2018, 11, 20)) - }) - -Limitations ------------ - -Since Python is not a strongly-typed language and the UDT/Tuple graphson representation is, you might -get schema errors when trying to write numerical data. Example:: - - session.execute_graph(""" - schema.vertexLabel('test_tuple').partitionBy('id', Int).property('t', tupleOf(Text, Bigint)).create() - """) - - session.execute_graph(""" - g.addV('test_tuple').property('id', 0).property('t', t) - """, - {'t': ('Test', 99))} - ) - - # error: [Invalid query] message="Value component 1 is of type int, not bigint" - -This is because the server requires the client to include a GraphSON schema definition -with every UDT or tuple query. In the general case, the driver can't determine what Graph type -is meant by, e.g., an int value, and so it can't serialize the value with the correct type in the schema. -The driver provides some numerical type-wrapper factories that you can use to specify types: - -* :func:`~.to_int` -* :func:`~.to_bigint` -* :func:`~.to_smallint` -* :func:`~.to_float` -* :func:`~.to_double` - -Here's the working example of the case above:: - - from cassandra.graph import to_bigint - - session.execute_graph(""" - g.addV('test_tuple').property('id', 0).property('t', t) - """, - {'t': ('Test', to_bigint(99))} - ) - -Continuous Paging -~~~~~~~~~~~~~~~~~ - -This is another nice feature that comes with the core engine: continuous paging with -graph queries. If all nodes of the cluster are >= DSE 6.8.0, it is automatically -enabled under the hood to get the best performance. If you want to explicitly -enable/disable it, you can do it through the execution profile:: - - # Disable it - ep = GraphExecutionProfile(..., continuous_paging_options=None)) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - - # Enable with a custom max_pages option - ep = GraphExecutionProfile(..., - continuous_paging_options=ContinuousPagingOptions(max_pages=10))) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) diff --git a/docs/graph.rst b/docs/graph.rst index 49ec51e73b..47dc53d38d 100644 --- a/docs/graph.rst +++ b/docs/graph.rst @@ -1,8 +1,26 @@ DataStax Graph Queries ====================== -Getting Started -~~~~~~~~~~~~~~~ +The driver executes graph queries over the Cassandra native protocol. Use +:meth:`.Session.execute_graph` or :meth:`.Session.execute_graph_async` for +executing gremlin queries in DataStax Graph. + +The driver defines three Execution Profiles suitable for graph execution: + +* :data:`~.cluster.EXEC_PROFILE_GRAPH_DEFAULT` +* :data:`~.cluster.EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT` +* :data:`~.cluster.EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT` + +See :doc:`getting_started` and :doc:`execution_profiles` +for more detail on working with profiles. + +In DSE 6.8.0, the Core graph engine has been introduced and is now the default. It +provides a better unified multi-model, performance and scale. This guide +is for graphs that use the core engine. If you work with previous versions of +DSE or existing graphs, see :doc:`classic_graph`. + +Getting Started with Graph and the Core Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, we need to create a graph in the system. To access the system API, we use the system execution profile :: @@ -13,129 +31,204 @@ use the system execution profile :: session = cluster.connect() graph_name = 'movies' - session.execute_graph("system.graph(name).ifNotExists().create()", {'name': graph_name}, + session.execute_graph("system.graph(name).create()", {'name': graph_name}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) -To execute requests on our newly created graph, we need to setup an execution -profile. Additionally, we also need to set the schema_mode to `development` -for the schema creation:: - +Graphs that use the core engine only support GraphSON3. Since they are Cassandra tables under +the hood, we can automatically configure the execution profile with the proper options +(row_factory and graph_protocol) when executing queries. You only need to make sure that +the `graph_name` is set and GraphSON3 will be automatically used:: from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.graph import GraphOptions graph_name = 'movies' ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) session = cluster.connect() - - session.execute_graph("schema.config().option('graph.schema_mode').set('development')") + session.execute_graph("g.addV(...)") -We are ready to configure our graph schema. We will create a simple one for movies:: +Note that this graph engine detection is based on the metadata. You might experience +some query errors if the graph has been newly created and is not yet in the metadata. This +would result to a badly configured execution profile. If you really want to avoid that, +configure your execution profile explicitly:: - # properties are used to define a vertex - properties = """ - schema.propertyKey("genreId").Text().create(); - schema.propertyKey("personId").Text().create(); - schema.propertyKey("movieId").Text().create(); - schema.propertyKey("name").Text().create(); - schema.propertyKey("title").Text().create(); - schema.propertyKey("year").Int().create(); - schema.propertyKey("country").Text().create(); - """ + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson3_row_factory + + graph_name = 'movies' + ep_graphson3 = GraphExecutionProfile( + row_factory=graph_graphson3_row_factory, + graph_options=GraphOptions( + graph_protocol=GraphProtocol.GRAPHSON_3_0, + graph_name=graph_name)) - session.execute_graph(properties) # we can execute multiple statements in a single request + cluster = Cluster(execution_profiles={'core': ep_graphson3}) + session = cluster.connect() + session.execute_graph("g.addV(...)", execution_profile='core') + + +We are ready to configure our graph schema. We will create a simple one for movies:: # A Vertex represents a "thing" in the world. - vertices = """ - schema.vertexLabel("genre").properties("genreId","name").create(); - schema.vertexLabel("person").properties("personId","name").create(); - schema.vertexLabel("movie").properties("movieId","title","year","country").create(); + # Create the genre vertex + query = """ + schema.vertexLabel('genre') + .partitionBy('genreId', Int) + .property('name', Text) + .create() """ - - session.execute_graph(vertices) + session.execute_graph(query) + + # Create the person vertex + query = """ + schema.vertexLabel('person') + .partitionBy('personId', Int) + .property('name', Text) + .create() + """ + session.execute_graph(query) + + # Create the movie vertex + query = """ + schema.vertexLabel('movie') + .partitionBy('movieId', Int) + .property('title', Text) + .property('year', Int) + .property('country', Text) + .create() + """ + session.execute_graph(query) # An edge represents a relationship between two vertices - edges = """ - schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); - schema.edgeLabel("actor").connection("movie","person").create(); + # Create our edges + queries = """ + schema.edgeLabel('belongsTo').from('movie').to('genre').create(); + schema.edgeLabel('actor').from('movie').to('person').create(); """ - - session.execute_graph(edges) + session.execute_graph(queries) # Indexes to execute graph requests efficiently + + # If you have a node with the search workload enabled (solr), use the following: + indexes = """ + schema.vertexLabel('genre').searchIndex() + .by("name") + .create(); + + schema.vertexLabel('person').searchIndex() + .by("name") + .create(); + + schema.vertexLabel('movie').searchIndex() + .by('title') + .by("year") + .create(); + """ + session.execute_graph(indexes) + + # Otherwise, use secondary indexes: indexes = """ - schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); - schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); - schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); - schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); - schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); - schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); - schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); + schema.vertexLabel('genre') + .secondaryIndex('by_genre') + .by('name') + .create() + + schema.vertexLabel('person') + .secondaryIndex('by_name') + .by('name') + .create() + + schema.vertexLabel('movie') + .secondaryIndex('by_title') + .by('title') + .create() """ + session.execute_graph(indexes) + +Add some edge indexes (materialized views):: + + indexes = """ + schema.edgeLabel('belongsTo') + .from('movie') + .to('genre') + .materializedView('movie__belongsTo__genre_by_in_genreId') + .ifNotExists() + .partitionBy(IN, 'genreId') + .clusterBy(OUT, 'movieId', Asc) + .create() + + schema.edgeLabel('actor') + .from('movie') + .to('person') + .materializedView('movie__actor__person_by_in_personId') + .ifNotExists() + .partitionBy(IN, 'personId') + .clusterBy(OUT, 'movieId', Asc) + .create() + """ + session.execute_graph(indexes) Next, we'll add some data:: session.execute_graph(""" - g.addV('genre').property('genreId', 1).property('name', 'Action').next(); - g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); - g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); - g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); """) session.execute_graph(""" - g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); - g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); - g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); """) session.execute_graph(""" - g.addV('movie').property('movieId', 1).property('title', 'The Happening'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). - property('year', 2003).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). - property('year', 2008).property('country', 'United States').next(); - g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). - property('year', 1998).property('country', 'United States').next(); - - g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). - property('year', 1995).property('country', 'United States').next(); + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). + property('year', 2003).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). + property('year', 2008).property('country', 'United States').next(); + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). + property('year', 1998).property('country', 'United States').next(); + + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). + property('year', 1995).property('country', 'United States').next(); """) Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: session.execute_graph(""" - genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); - genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); - genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); - - leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); - mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); - iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); - - the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); - the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); - rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); - man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); - dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); - - the_happening.addEdge('belongsTo', genre_horror); - the_italian_job.addEdge('belongsTo', genre_action); - rev_road.addEdge('belongsTo', genre_drama); - man_mask.addEdge('belongsTo', genre_drama); - man_mask.addEdge('belongsTo', genre_action); - dead_man.addEdge('belongsTo', genre_drama); - - the_happening.addEdge('actor', mark); - the_italian_job.addEdge('actor', mark); - rev_road.addEdge('actor', leo); - man_mask.addEdge('actor', leo); - dead_man.addEdge('actor', iggy); + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').id().next(); + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').id().next(); + genre_action = g.V().hasLabel('genre').has('name', 'Action').id().next(); + + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').id().next(); + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').id().next(); + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').id().next(); + + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').id().next(); + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').id().next(); + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').id().next(); + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').id().next(); + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').id().next(); + + g.addE('belongsTo').from(__.V(the_happening)).to(__.V(genre_horror)).next(); + g.addE('belongsTo').from(__.V(the_italian_job)).to(__.V(genre_action)).next(); + g.addE('belongsTo').from(__.V(rev_road)).to(__.V(genre_drama)).next(); + g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_drama)).next(); + g.addE('belongsTo').from(__.V(man_mask)).to(__.V(genre_action)).next(); + g.addE('belongsTo').from(__.V(dead_man)).to(__.V(genre_drama)).next(); + + g.addE('actor').from(__.V(the_happening)).to(__.V(mark)).next(); + g.addE('actor').from(__.V(the_italian_job)).to(__.V(mark)).next(); + g.addE('actor').from(__.V(rev_road)).to(__.V(leo)).next(); + g.addE('actor').from(__.V(man_mask)).to(__.V(leo)).next(); + g.addE('actor').from(__.V(dead_man)).to(__.V(iggy)).next(); """) We are all set. You can now query your graph. Here are some examples:: @@ -144,7 +237,7 @@ We are all set. You can now query your graph. Here are some examples:: for r in session.execute_graph(""" g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): print(r) - + # Find all movies of the same genre than the movie 'Dead Man' for r in session.execute_graph(""" g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): @@ -157,78 +250,40 @@ We are all set. You can now query your graph. Here are some examples:: To see a more graph examples, see `DataStax Graph Examples `_. -Graph Types -~~~~~~~~~~~ +Graph Types for the Core Engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here are the supported graph types with their python representations: -========== ================ -DSE Graph Python -========== ================ -boolean bool -bigint long, int (PY3) -int int -smallint int -varint int -float float -double double -uuid uuid.UUID -Decimal Decimal -inet str -timestamp datetime.datetime -date datetime.date -time datetime.time -duration datetime.timedelta -point Point -linestring LineString -polygon Polygon -blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) -========== ================ - -Graph Row Factory -~~~~~~~~~~~~~~~~~ - -By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.graph.graph_object_row_factory`), known graph result -types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these -types, a :class:`.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. -The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), -or lists by index:: - - # dicts with `__getattr__` or `__getitem__` - result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined - result # dse.graph.Result({u'key_str': u'value', u'key_int': 3}) - result.value # {u'key_int': 3, u'key_str': u'value'} (dict) - result.key_str # u'value' - result.key_int # 3 - result['key_str'] # u'value' - result['key_int'] # 3 - - # lists with `__getitem__` - result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] - result # dse.graph.Result([0, 1, 2]) - result.value # [0, 1, 2] (list) - result[1] # 1 (list[1]) - -You can use a different row factory by setting :attr:`.Session.default_graph_row_factory` or passing it to -:meth:`.Session.execute_graph`. For example, :func:`.graph.single_object_row_factory` returns the JSON result string`, -unparsed. :func:`.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, -unlike :func:`.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results -also provide convenience methods for converting to known types (:meth:`~.Result.as_vertex`, :meth:`~.Result.as_edge`, :meth:`~.Result.as_path`). - -Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to -deserialize properties, use the :class:`.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. -deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: - - # ... - from cassandra.graph import GraphSON1Deserializer - - row = session.execute_graph("g.V().toList()")[0] - value = row.properties['my_property_key'][0].value # accessing the VertexProperty value - value = GraphSON1Deserializer.deserialize_timestamp(value) - - print(value) # 2017-06-26 08:27:05 - print(type(value)) # - +============ ================= +DSE Graph Python Driver +============ ================= +text str +boolean bool +bigint long +int int +smallint int +varint long +double float +float float +uuid UUID +bigdecimal Decimal +duration Duration (cassandra.util) +inet str or IPV4Address/IPV6Address (if available) +timestamp datetime.datetime +date datetime.date +time datetime.time +polygon Polygon +point Point +linestring LineString +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) +list list +map dict +set set or list + (Can return a list due to numerical values returned by Java) +tuple tuple +udt class or namedtuple +============ ================= Named Parameters ~~~~~~~~~~~~~~~~ @@ -238,7 +293,7 @@ Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`: result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) [r.value for r in result_set] # [1, 2] -All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized +All python types listed in `Graph Types for the Core Engine`_ can be passed as named parameters and will be serialized automatically to their graph representation: Example:: @@ -248,7 +303,7 @@ Example:: property('name', text_value). property('age', integer_value). property('birthday', timestamp_value). - property('house_yard', polygon_value).toList() + property('house_yard', polygon_value).next() """, { 'text_value': 'Mike Smith', 'integer_value': 34, @@ -264,36 +319,116 @@ or specified per execution:: graph_options=GraphOptions(graph_name='something-else')) session.execute_graph(statement, execution_profile=ep) -Using GraphSON2 Protocol -~~~~~~~~~~~~~~~~~~~~~~~~ +CQL collections, Tuple and UDT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The default graph protocol used is GraphSON1. However GraphSON1 may -cause problems of type conversion happening during the serialization -of the query to the DSE Graph server, or the deserialization of the -responses back from a string Gremlin query. GraphSON2 offers better -support for the complex data types handled by DSE Graph. +This is a very interesting feature of the core engine: we can use all CQL data types, including +list, map, set, tuple and udt. Here is an example using all these types:: -DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol -for graph queries. Enabling GraphSON2 can be done by `changing the -graph protocol of the execution profile` and `setting the graphson2 row factory`:: + query = """ + schema.type('address') + .property('address', Text) + .property('city', Text) + .property('state', Text) + .create(); + """ + session.execute_graph(query) + + # It works the same way than normal CQL UDT, so we + # can create an udt class and register it + class Address(object): + def __init__(self, address, city, state): + self.address = address + self.city = city + self.state = state + + session.cluster.register_user_type(graph_name, 'address', Address) + + query = """ + schema.vertexLabel('person') + .partitionBy('personId', Int) + .property('address', typeOf('address')) + .property('friends', listOf(Text)) + .property('skills', setOf(Text)) + .property('scores', mapOf(Text, Int)) + .property('last_workout', tupleOf(Text, Date)) + .create() + """ + session.execute_graph(query) + + # insertion example + query = """ + g.addV('person') + .property('personId', pid) + .property('address', address) + .property('friends', friends) + .property('skills', skills) + .property('scores', scores) + .property('last_workout', last_workout) + .next() + """ - from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory + session.execute_graph(query, { + 'pid': 3, + 'address': Address('42 Smith St', 'Quebec', 'QC'), + 'friends': ['Al', 'Mike', 'Cathy'], + 'skills': {'food', 'fight', 'chess'}, + 'scores': {'math': 98, 'french': 3}, + 'last_workout': ('CrossFit', datetime.date(2018, 11, 20)) + }) - # Create a GraphSON2 execution profile - ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', - graph_protocol=GraphProtocol.GRAPHSON_2_0), - row_factory=graph_graphson2_row_factory) +Limitations +----------- - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) - session = cluster.connect() - session.execute_graph(...) +Since Python is not a strongly-typed language and the UDT/Tuple graphson representation is, you might +get schema errors when trying to write numerical data. Example:: -Using GraphSON2, all properties will be automatically deserialized to -its Python representation. Note that it may bring significant -behavioral change at runtime. + session.execute_graph(""" + schema.vertexLabel('test_tuple').partitionBy('id', Int).property('t', tupleOf(Text, Bigint)).create() + """) + + session.execute_graph(""" + g.addV('test_tuple').property('id', 0).property('t', t) + """, + {'t': ('Test', 99))} + ) + + # error: [Invalid query] message="Value component 1 is of type int, not bigint" + +This is because the server requires the client to include a GraphSON schema definition +with every UDT or tuple query. In the general case, the driver can't determine what Graph type +is meant by, e.g., an int value, and so it can't serialize the value with the correct type in the schema. +The driver provides some numerical type-wrapper factories that you can use to specify types: + +* :func:`~.to_int` +* :func:`~.to_bigint` +* :func:`~.to_smallint` +* :func:`~.to_float` +* :func:`~.to_double` + +Here's the working example of the case above:: + + from cassandra.graph import to_bigint -It is generally recommended to switch to GraphSON2 as it brings more -consistent support for complex data types in the Graph driver and will -be activated by default in the next major version (Python dse-driver -driver 3.0). + session.execute_graph(""" + g.addV('test_tuple').property('id', 0).property('t', t) + """, + {'t': ('Test', to_bigint(99))} + ) + +Continuous Paging +~~~~~~~~~~~~~~~~~ + +This is another nice feature that comes with the core engine: continuous paging with +graph queries. If all nodes of the cluster are >= DSE 6.8.0, it is automatically +enabled under the hood to get the best performance. If you want to explicitly +enable/disable it, you can do it through the execution profile:: + + # Disable it + ep = GraphExecutionProfile(..., continuous_paging_options=None)) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + + # Enable with a custom max_pages option + ep = GraphExecutionProfile(..., + continuous_paging_options=ContinuousPagingOptions(max_pages=10))) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) diff --git a/docs/graph_fluent.rst b/docs/graph_fluent.rst index c79aa1ecf4..fbe0ef57df 100644 --- a/docs/graph_fluent.rst +++ b/docs/graph_fluent.rst @@ -71,6 +71,27 @@ If you want to change execution property defaults, please see the :doc:`Executio for a more generalized discussion of the API. Graph traversal queries use the same execution profile defined for DSE graph. If you need to change the default properties, please refer to the :doc:`DSE Graph query documentation page ` +Configuring a Traversal Execution Profile for the Core graph engine +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To execute a traversal query with graphs that use the core engine, you need to configure +a graphson3 execution profile: + +.. code-block:: python + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.datastax.graph import GraphProtocol + from cassandra.datastax.graph.fluent import DseGraph + + ep_graphson3 = DseGraph.create_execution_profile( + 'my_core_graph_name', + graph_protocol=GraphProtocol.GRAPHSON_3_0 + ) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson3}) + + g = DseGraph.traversal_source(session) + print g.V().toList() + + Explicit Graph Traversal Execution with a DSE Session ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -99,6 +120,21 @@ Below is an example of explicit execution. For this example, assume the schema h for result in session.execute_graph(v_query): pprint(result.value) +Converting a traversal to a bytecode query for core graphs require some more work, because we +need the cluster context for UDT and tuple types: + +.. code-block:: python + g = DseGraph.traversal_source(session=session) + context = { + 'cluster': cluster, + 'graph_name': 'the_graph_for_the_query' + } + addV_query = DseGraph.query_from_traversal( + g.addV('genre').property('genreId', 1).property('name', 'Action'), + graph_protocol=GraphProtocol.GRAPHSON_3_0, + context=context + ) + Implicit Graph Traversal Execution with TinkerPop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/index.rst b/docs/index.rst index d18cf6acd2..4cdd637e0a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -54,7 +54,10 @@ Contents Working with DSE geometry types :doc:`graph` - Graph queries + Graph queries with the Core engine + +:doc:`classic_graph` + Graph queries with the Classic engine :doc:`graph_fluent` DataStax Graph Fluent API From c036a640ccf045a5dcb007d2a1f9582ffdd0ba73 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 31 Mar 2020 11:25:45 -0400 Subject: [PATCH 047/211] build 3.22 docs for core graph --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index fe5bbf548e..be1740f086 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.22' - ref: a1f8e102 + ref: 1ccd5b99 - name: '3.21' ref: 5589d96b - name: '3.20' From df2533d0be19536819c7dc384937c9f100b4165f Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 31 Mar 2020 12:23:50 -0400 Subject: [PATCH 048/211] docs: better graph_fluent TOC position --- docs/.nav | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/.nav b/docs/.nav index d5b54c4e13..375f058817 100644 --- a/docs/.nav +++ b/docs/.nav @@ -5,6 +5,7 @@ lwt object_mapper geo_types graph +graph_fluent classic_graph performance query_paging From b06d524a26f586b30f7d3eea668a6486f0f778c8 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 26 Mar 2020 16:01:32 -0400 Subject: [PATCH 049/211] PYTHON-1207: transient replication support (phase 2) --- cassandra/metadata.py | 120 +++++++++++++----- docs/api/cassandra/metadata.rst | 4 + .../long/test_loadbalancingpolicies.py | 85 +++++++++---- tests/unit/test_metadata.py | 44 ++++++- 4 files changed, 185 insertions(+), 68 deletions(-) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index 5fb228934d..df38fc6670 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -386,6 +386,7 @@ def __new__(metacls, name, bases, dct): return cls + @six.add_metaclass(ReplicationStrategyTypeType) class _ReplicationStrategy(object): options_map = None @@ -453,37 +454,82 @@ def make_token_replica_map(self, token_to_host_owner, ring): return {} -def parse_replication_factor(input_rf): +class ReplicationFactor(object): + """ + Represent the replication factor of a keyspace. + """ + + all_replicas = None + """ + The number of total replicas. """ - Given the inputted replication factor, returns a tuple containing number of total replicas - and number of transient replicas + + full_replicas = None """ + The number of replicas that own a full copy of the data. This is the same + than `all_replicas` when transient replication is not enabled. + """ + transient_replicas = None - try: - total_replicas = int(input_rf) - except ValueError: + """ + The number of transient replicas. + + Only set if the keyspace has transient replication enabled. + """ + + def __init__(self, all_replicas, transient_replicas=None): + self.all_replicas = all_replicas + self.transient_replicas = transient_replicas + self.full_replicas = (all_replicas - transient_replicas) if transient_replicas else all_replicas + + @staticmethod + def create(rf): + """ + Given the inputted replication factor string, parse and return the ReplicationFactor instance. + """ + transient_replicas = None try: - rf = input_rf.split('/') - total_replicas, transient_replicas = int(rf[0]), int(rf[1]) - except Exception: - raise ValueError("Unable to determine replication factor from: {}".format(input_rf)) - return total_replicas, transient_replicas + all_replicas = int(rf) + except ValueError: + try: + rf = rf.split('/') + all_replicas, transient_replicas = int(rf[0]), int(rf[1]) + except Exception: + raise ValueError("Unable to determine replication factor from: {}".format(rf)) + + return ReplicationFactor(all_replicas, transient_replicas) + + def __str__(self): + return ("%d/%d" % (self.all_replicas, self.transient_replicas) if self.transient_replicas + else "%d" % self.all_replicas) + + def __eq__(self, other): + if not isinstance(other, ReplicationFactor): + return False + + return self.all_replicas == other.all_replicas and self.full_replicas == other.full_replicas class SimpleStrategy(ReplicationStrategy): - replication_factor = None - """ - The replication factor for this keyspace. + replication_factor_info = None """ - transient_replicas = None - """ - The number of transient replicas for this keyspace. + A :class:`cassandra.metadata.ReplicationFactor` instance. """ + @property + def replication_factor(self): + """ + The replication factor for this keyspace. + + For backward compatibility, this returns the + :attr:`cassandra.metadata.ReplicationFactor.full_replicas` value of + :attr:`cassandra.metadata.SimpleStrategy.replication_factor_info`. + """ + return self.replication_factor_info.full_replicas + def __init__(self, options_map): - self._raw_replication_factor = options_map['replication_factor'] - self.replication_factor, self.transient_replicas = parse_replication_factor(self._raw_replication_factor) + self.replication_factor_info = ReplicationFactor.create(options_map['replication_factor']) def make_token_replica_map(self, token_to_host_owner, ring): replica_map = {} @@ -505,36 +551,40 @@ def export_for_schema(self): suitable for use in a CREATE KEYSPACE statement. """ return "{'class': 'SimpleStrategy', 'replication_factor': '%s'}" \ - % (self._raw_replication_factor,) + % (str(self.replication_factor_info),) def __eq__(self, other): if not isinstance(other, SimpleStrategy): return False - return str(self._raw_replication_factor) == str(other._raw_replication_factor) + return str(self.replication_factor_info) == str(other.replication_factor_info) class NetworkTopologyStrategy(ReplicationStrategy): + dc_replication_factors_info = None + """ + A map of datacenter names to the :class:`cassandra.metadata.ReplicationFactor` instance for that DC. + """ + dc_replication_factors = None """ A map of datacenter names to the replication factor for that DC. + + For backward compatibility, this maps to the :attr:`cassandra.metadata.ReplicationFactor.full_replicas` + value of the :attr:`cassandra.metadata.NetworkTopologyStrategy.dc_replication_factors_info` dict. """ def __init__(self, dc_replication_factors): - try: - self.dc_replication_factors = dict( - (str(k), int(v)) for k, v in dc_replication_factors.items()) - except ValueError: - self.dc_replication_factors = dict( - (str(k), str(v)) for k, v in dc_replication_factors.items()) + self.dc_replication_factors_info = dict( + (str(k), ReplicationFactor.create(v)) for k, v in dc_replication_factors.items()) + self.dc_replication_factors = dict( + (dc, rf.full_replicas) for dc, rf in self.dc_replication_factors_info.items()) def make_token_replica_map(self, token_to_host_owner, ring): - dc_rf_map = {} - for dc, rf in self.dc_replication_factors.items(): - total_rf = parse_replication_factor(rf)[0] - if total_rf > 0: - dc_rf_map[dc] = total_rf + dc_rf_map = dict( + (dc, full_replicas) for dc, full_replicas in self.dc_replication_factors.items() + if full_replicas > 0) # build a map of DCs to lists of indexes into `ring` for tokens that # belong to that DC @@ -614,15 +664,15 @@ def export_for_schema(self): suitable for use in a CREATE KEYSPACE statement. """ ret = "{'class': 'NetworkTopologyStrategy'" - for dc, repl_factor in sorted(self.dc_replication_factors.items()): - ret += ", '%s': '%s'" % (dc, repl_factor) + for dc, rf in sorted(self.dc_replication_factors_info.items()): + ret += ", '%s': '%s'" % (dc, str(rf)) return ret + "}" def __eq__(self, other): if not isinstance(other, NetworkTopologyStrategy): return False - return self.dc_replication_factors == other.dc_replication_factors + return self.dc_replication_factors_info == other.dc_replication_factors_info class LocalStrategy(ReplicationStrategy): diff --git a/docs/api/cassandra/metadata.rst b/docs/api/cassandra/metadata.rst index ca33e34739..91fe39fd99 100644 --- a/docs/api/cassandra/metadata.rst +++ b/docs/api/cassandra/metadata.rst @@ -76,6 +76,10 @@ Tokens and Ring Topology .. autoclass:: ReplicationStrategy :members: +.. autoclass:: ReplicationFactor + :members: + :exclude-members: create + .. autoclass:: SimpleStrategy :members: diff --git a/tests/integration/long/test_loadbalancingpolicies.py b/tests/integration/long/test_loadbalancingpolicies.py index c3bf911ed0..f245569a80 100644 --- a/tests/integration/long/test_loadbalancingpolicies.py +++ b/tests/integration/long/test_loadbalancingpolicies.py @@ -16,6 +16,7 @@ import struct import sys import traceback +from cassandra import cqltypes from cassandra import ConsistencyLevel, Unavailable, OperationTimedOut, ReadTimeout, ReadFailure, \ WriteTimeout, WriteFailure @@ -29,7 +30,7 @@ ) from cassandra.query import SimpleStatement -from tests.integration import use_singledc, use_multidc, remove_cluster, TestCluster +from tests.integration import use_singledc, use_multidc, remove_cluster, TestCluster, greaterthanorequalcass40, notdse from tests.integration.long.utils import (wait_for_up, create_schema, CoordinatorStats, force_stop, wait_for_down, decommission, start, @@ -184,18 +185,19 @@ def test_token_aware_is_used_by_default(self): """ cluster = TestCluster() + self.addCleanup(cluster.shutdown) if murmur3 is not None: self.assertTrue(isinstance(cluster.profile_manager.default.load_balancing_policy, TokenAwarePolicy)) else: self.assertTrue(isinstance(cluster.profile_manager.default.load_balancing_policy, DCAwareRoundRobinPolicy)) - cluster.shutdown() - def test_roundrobin(self): use_singledc() keyspace = 'test_roundrobin' cluster, session = self._cluster_session_with_lbp(RoundRobinPolicy()) + self.addCleanup(cluster.shutdown) + self._wait_for_nodes_up(range(1, 4), cluster) create_schema(cluster, session, keyspace, replication_factor=3) self._insert(session, keyspace) @@ -226,12 +228,12 @@ def test_roundrobin(self): self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 6) self.coordinator_stats.assert_query_count_equals(self, 3, 6) - cluster.shutdown() def test_roundrobin_two_dcs(self): use_multidc([2, 2]) keyspace = 'test_roundrobin_two_dcs' cluster, session = self._cluster_session_with_lbp(RoundRobinPolicy()) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 5), cluster) create_schema(cluster, session, keyspace, replication_strategy=[2, 2]) @@ -260,12 +262,11 @@ def test_roundrobin_two_dcs(self): self.coordinator_stats.assert_query_count_equals(self, 4, 3) self.coordinator_stats.assert_query_count_equals(self, 5, 3) - cluster.shutdown() - def test_roundrobin_two_dcs_2(self): use_multidc([2, 2]) keyspace = 'test_roundrobin_two_dcs_2' cluster, session = self._cluster_session_with_lbp(RoundRobinPolicy()) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 5), cluster) create_schema(cluster, session, keyspace, replication_strategy=[2, 2]) @@ -294,12 +295,11 @@ def test_roundrobin_two_dcs_2(self): self.coordinator_stats.assert_query_count_equals(self, 4, 3) self.coordinator_stats.assert_query_count_equals(self, 5, 3) - cluster.shutdown() - def test_dc_aware_roundrobin_two_dcs(self): use_multidc([3, 2]) keyspace = 'test_dc_aware_roundrobin_two_dcs' cluster, session = self._cluster_session_with_lbp(DCAwareRoundRobinPolicy('dc1')) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 6)) create_schema(cluster, session, keyspace, replication_strategy=[2, 2]) @@ -312,12 +312,11 @@ def test_dc_aware_roundrobin_two_dcs(self): self.coordinator_stats.assert_query_count_equals(self, 4, 0) self.coordinator_stats.assert_query_count_equals(self, 5, 0) - cluster.shutdown() - def test_dc_aware_roundrobin_two_dcs_2(self): use_multidc([3, 2]) keyspace = 'test_dc_aware_roundrobin_two_dcs_2' cluster, session = self._cluster_session_with_lbp(DCAwareRoundRobinPolicy('dc2')) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 6)) create_schema(cluster, session, keyspace, replication_strategy=[2, 2]) @@ -330,12 +329,11 @@ def test_dc_aware_roundrobin_two_dcs_2(self): self.coordinator_stats.assert_query_count_equals(self, 4, 6) self.coordinator_stats.assert_query_count_equals(self, 5, 6) - cluster.shutdown() - def test_dc_aware_roundrobin_one_remote_host(self): use_multidc([2, 2]) keyspace = 'test_dc_aware_roundrobin_one_remote_host' cluster, session = self._cluster_session_with_lbp(DCAwareRoundRobinPolicy('dc2', used_hosts_per_remote_dc=1)) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 5)) create_schema(cluster, session, keyspace, replication_strategy=[2, 2]) @@ -408,8 +406,6 @@ def test_dc_aware_roundrobin_one_remote_host(self): except NoHostAvailable: pass - cluster.shutdown() - def test_token_aware(self): keyspace = 'test_token_aware' self.token_aware(keyspace) @@ -421,6 +417,7 @@ def test_token_aware_prepared(self): def token_aware(self, keyspace, use_prepared=False): use_singledc() cluster, session = self._cluster_session_with_lbp(TokenAwarePolicy(RoundRobinPolicy())) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 4), cluster) create_schema(cluster, session, keyspace, replication_factor=1) @@ -485,13 +482,12 @@ def token_aware(self, keyspace, use_prepared=False): self.assertEqual(results, set([0, 12])) self.coordinator_stats.assert_query_count_equals(self, 2, 0) - cluster.shutdown() - def test_token_aware_composite_key(self): use_singledc() keyspace = 'test_token_aware_composite_key' table = 'composite' cluster, session = self._cluster_session_with_lbp(TokenAwarePolicy(RoundRobinPolicy())) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 4), cluster) create_schema(cluster, session, keyspace, replication_factor=2) @@ -520,12 +516,11 @@ def test_token_aware_composite_key(self): self.assertTrue(results[0].i) - cluster.shutdown() - def test_token_aware_with_rf_2(self, use_prepared=False): use_singledc() keyspace = 'test_token_aware_with_rf_2' cluster, session = self._cluster_session_with_lbp(TokenAwarePolicy(RoundRobinPolicy())) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 4), cluster) create_schema(cluster, session, keyspace, replication_factor=2) @@ -546,11 +541,10 @@ def test_token_aware_with_rf_2(self, use_prepared=False): self.coordinator_stats.assert_query_count_equals(self, 2, 0) self.coordinator_stats.assert_query_count_equals(self, 3, 12) - cluster.shutdown() - def test_token_aware_with_local_table(self): use_singledc() cluster, session = self._cluster_session_with_lbp(TokenAwarePolicy(RoundRobinPolicy())) + self.addCleanup(cluster.shutdown) self._wait_for_nodes_up(range(1, 4), cluster) p = session.prepare("SELECT * FROM system.local WHERE key=?") @@ -558,8 +552,6 @@ def test_token_aware_with_local_table(self): r = session.execute(p, ('local',)) self.assertEqual(r[0].key, 'local') - cluster.shutdown() - def test_token_aware_with_shuffle_rf2(self): """ Test to validate the hosts are shuffled when the `shuffle_replicas` is truthy @@ -572,6 +564,7 @@ def test_token_aware_with_shuffle_rf2(self): """ keyspace = 'test_token_aware_with_rf_2' cluster, session = self._set_up_shuffle_test(keyspace, replication_factor=2) + self.addCleanup(cluster.shutdown) self._check_query_order_changes(session=session, keyspace=keyspace) @@ -586,8 +579,6 @@ def test_token_aware_with_shuffle_rf2(self): self.coordinator_stats.assert_query_count_equals(self, 2, 0) self.coordinator_stats.assert_query_count_equals(self, 3, 12) - cluster.shutdown() - def test_token_aware_with_shuffle_rf3(self): """ Test to validate the hosts are shuffled when the `shuffle_replicas` is truthy @@ -600,6 +591,7 @@ def test_token_aware_with_shuffle_rf3(self): """ keyspace = 'test_token_aware_with_rf_3' cluster, session = self._set_up_shuffle_test(keyspace, replication_factor=3) + self.addCleanup(cluster.shutdown) self._check_query_order_changes(session=session, keyspace=keyspace) @@ -625,7 +617,47 @@ def test_token_aware_with_shuffle_rf3(self): self.coordinator_stats.assert_query_count_equals(self, 2, 0) self.coordinator_stats.assert_query_count_equals(self, 3, 12) - cluster.shutdown() + @notdse + @greaterthanorequalcass40 + def test_token_aware_with_transient_replication(self): + """ + Test to validate that the token aware policy doesn't route any request to a transient node. + + @since 3.23 + @jira_ticket PYTHON-1207 + @expected_result the requests are spread across the 2 full replicas and + no other nodes are queried by the coordinator. + + @test_category policy + """ + # We can test this with a single dc when CASSANDRA-15670 is fixed + use_multidc([3, 3]) + + cluster, session = self._cluster_session_with_lbp( + TokenAwarePolicy(DCAwareRoundRobinPolicy(), shuffle_replicas=True) + ) + self.addCleanup(cluster.shutdown) + + session.execute("CREATE KEYSPACE test_tr WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': '3/1', 'dc2': '3/1'};") + session.execute("CREATE TABLE test_tr.users (id int PRIMARY KEY, username text) WITH read_repair ='NONE';") + for i in range(100): + session.execute("INSERT INTO test_tr.users (id, username) VALUES (%d, 'user');" % (i,)) + + query = session.prepare("SELECT * FROM test_tr.users WHERE id = ?") + for i in range(100): + f = session.execute_async(query, (i,), trace=True) + full_dc1_replicas = [h for h in cluster.metadata.get_replicas('test_tr', cqltypes.Int32Type.serialize(i, cluster.protocol_version)) + if h.datacenter == 'dc1'] + self.assertEqual(len(full_dc1_replicas), 2) + + f.result() + trace_hosts = [cluster.metadata.get_host(e.source) for e in f.get_query_trace().events] + + for h in f.attempted_hosts: + self.assertIn(h, full_dc1_replicas) + for h in trace_hosts: + self.assertIn(h, full_dc1_replicas) + def _set_up_shuffle_test(self, keyspace, replication_factor): use_singledc() @@ -670,6 +702,7 @@ def test_white_list(self): ) } ) + self.addCleanup(cluster.shutdown) session = cluster.connect() self._wait_for_nodes_up([1, 2, 3]) @@ -695,8 +728,6 @@ def test_white_list(self): self.fail() except NoHostAvailable: pass - finally: - cluster.shutdown() def test_black_list_with_host_filter_policy(self): """ diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index 174ac1493b..b2143f8c20 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -34,7 +34,7 @@ UserType, KeyspaceMetadata, get_schema_parser, _UnknownStrategy, ColumnMetadata, TableMetadata, IndexMetadata, Function, Aggregate, - Metadata, TokenMap) + Metadata, TokenMap, ReplicationFactor) from cassandra.policies import SimpleConvictionPolicy from cassandra.pool import Host @@ -42,6 +42,34 @@ log = logging.getLogger(__name__) +class ReplicationFactorTest(unittest.TestCase): + + def test_replication_factor_parsing(self): + rf = ReplicationFactor.create('3') + self.assertEqual(rf.all_replicas, 3) + self.assertEqual(rf.full_replicas, 3) + self.assertEqual(rf.transient_replicas, None) + self.assertEqual(str(rf), '3') + + rf = ReplicationFactor.create('3/1') + self.assertEqual(rf.all_replicas, 3) + self.assertEqual(rf.full_replicas, 2) + self.assertEqual(rf.transient_replicas, 1) + self.assertEqual(str(rf), '3/1') + + self.assertRaises(ValueError, ReplicationFactor.create, '3/') + self.assertRaises(ValueError, ReplicationFactor.create, 'a/1') + self.assertRaises(ValueError, ReplicationFactor.create, 'a') + self.assertRaises(ValueError, ReplicationFactor.create, '3/a') + + def test_replication_factor_equality(self): + self.assertEqual(ReplicationFactor.create('3/1'), ReplicationFactor.create('3/1')) + self.assertEqual(ReplicationFactor.create('3'), ReplicationFactor.create('3')) + self.assertNotEqual(ReplicationFactor.create('3'), ReplicationFactor.create('3/1')) + self.assertNotEqual(ReplicationFactor.create('3'), ReplicationFactor.create('3/1')) + + + class StrategiesTest(unittest.TestCase): @classmethod @@ -109,11 +137,11 @@ def test_transient_replication_parsing(self): rs = ReplicationStrategy() simple_transient = rs.create('SimpleStrategy', {'replication_factor': '3/1'}) - self.assertEqual(simple_transient.replication_factor, 3) - self.assertEqual(simple_transient.transient_replicas, 1) + self.assertEqual(simple_transient.replication_factor_info, ReplicationFactor(3, 1)) + self.assertEqual(simple_transient.replication_factor, 2) self.assertIn("'replication_factor': '3/1'", simple_transient.export_for_schema()) - simple_str = rs.create('SimpleStrategy', {'replication_factor': '3'}) + simple_str = rs.create('SimpleStrategy', {'replication_factor': '2'}) self.assertNotEqual(simple_transient, simple_str) # make token replica map @@ -134,6 +162,8 @@ def test_nts_replication_parsing(self): self.assertEqual(nts_int.dc_replication_factors['dc1'], 3) self.assertEqual(nts_str.dc_replication_factors['dc1'], 3) + self.assertEqual(nts_int.dc_replication_factors_info['dc1'], ReplicationFactor(3)) + self.assertEqual(nts_str.dc_replication_factors_info['dc1'], ReplicationFactor(3)) self.assertEqual(nts_int.export_for_schema(), nts_str.export_for_schema()) self.assertEqual(nts_int, nts_str) @@ -152,8 +182,10 @@ def test_nts_transient_parsing(self): rs = ReplicationStrategy() nts_transient = rs.create('NetworkTopologyStrategy', {'dc1': '3/1', 'dc2': '5/1'}) - self.assertEqual(nts_transient.dc_replication_factors['dc1'], '3/1') - self.assertEqual(nts_transient.dc_replication_factors['dc2'], '5/1') + self.assertEqual(nts_transient.dc_replication_factors_info['dc1'], ReplicationFactor(3, 1)) + self.assertEqual(nts_transient.dc_replication_factors_info['dc2'], ReplicationFactor(5, 1)) + self.assertEqual(nts_transient.dc_replication_factors['dc1'], 2) + self.assertEqual(nts_transient.dc_replication_factors['dc2'], 4) self.assertIn("'dc1': '3/1', 'dc2': '5/1'", nts_transient.export_for_schema()) nts_str = rs.create('NetworkTopologyStrategy', {'dc1': '3', 'dc2': '5'}) From a40a2af79da1eb5f10484046de646711f8d139ba Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 3 Apr 2020 10:46:57 -0400 Subject: [PATCH 050/211] release 3.23: changelog & version --- CHANGELOG.rst | 2 +- cassandra/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9f5deaabae..db2d7cb468 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,6 @@ 3.23.0 ====== -Unreleased +April 6, 2020 Features -------- diff --git a/cassandra/__init__.py b/cassandra/__init__.py index c4479464c3..161499fab2 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 22, 0) +__version_info__ = (3, 23, 0) __version__ = '.'.join(map(str, __version_info__)) From a8865a7e54b00ffa49b9c6c3492c2fdd9dced786 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 3 Apr 2020 10:50:13 -0400 Subject: [PATCH 051/211] release 3.23: docs --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index be1740f086..2298db2588 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.23' + ref: a40a2af7 - name: '3.22' ref: 1ccd5b99 - name: '3.21' From 94478d7083285cf18e74ed48ac70ab164fa1646e Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 3 Apr 2020 11:03:18 -0400 Subject: [PATCH 052/211] appveyor: use the tmp native ports branch --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index d1daaa6ec6..c687ab8f73 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,6 +16,7 @@ build_script: - cmd: | "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 python setup.py install --no-cython + pip install git+ssh://git@github.com/riptano/ccm-private.git@cassandra-7544-native-ports-with-dse-fix test_script: - ps: .\appveyor\run_test.ps1 cache: From ff8b3b2a87e396b7035feadcd7ad87f34dc05a17 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 3 Apr 2020 11:14:04 -0400 Subject: [PATCH 053/211] appveyor: remove use the tmp native ports branch --- appveyor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index c687ab8f73..d1daaa6ec6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,7 +16,6 @@ build_script: - cmd: | "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 python setup.py install --no-cython - pip install git+ssh://git@github.com/riptano/ccm-private.git@cassandra-7544-native-ports-with-dse-fix test_script: - ps: .\appveyor\run_test.ps1 cache: From 1a184b99a4fd0ed265b8037c636a8adeb5ebe046 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 6 Apr 2020 09:21:43 -0400 Subject: [PATCH 054/211] post version --- cassandra/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 161499fab2..fd4e516f16 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 23, 0) +__version_info__ = (3, 23, 0, 'post0') __version__ = '.'.join(map(str, __version_info__)) From 02e56bf68033487a9f5f5c5cf10534fb70d56afa Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 16 Apr 2020 10:19:29 -0400 Subject: [PATCH 055/211] add initial Jenkinsfile --- Jenkinsfile | 865 +++++++++++++++++++++++++++++++++++ build.yaml => build.yaml.bak | 0 2 files changed, 865 insertions(+) create mode 100644 Jenkinsfile rename build.yaml => build.yaml.bak (100%) diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000000..0b26bf00d0 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,865 @@ +#!groovy + +def initializeEnvironment() { + env.DRIVER_DISPLAY_NAME = 'Cassandra Python Driver' + env.DRIVER_METRIC_TYPE = 'oss' + if (env.GIT_URL.contains('riptano/python-driver')) { + env.DRIVER_DISPLAY_NAME = 'private ' + env.DRIVER_DISPLAY_NAME + env.DRIVER_METRIC_TYPE = 'oss-private' + } else if (env.GIT_URL.contains('python-dse-driver')) { + env.DRIVER_DISPLAY_NAME = 'DSE Python Driver' + env.DRIVER_METRIC_TYPE = 'dse' + } + + env.GIT_SHA = "${env.GIT_COMMIT.take(7)}" + env.GITHUB_PROJECT_URL = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" + env.GITHUB_BRANCH_URL = "${GITHUB_PROJECT_URL}/tree/${env.BRANCH_NAME}" + env.GITHUB_COMMIT_URL = "${GITHUB_PROJECT_URL}/commit/${env.GIT_COMMIT}" + + sh label: 'Assign Python global environment', script: '''#!/bin/bash -lex + pyenv global ${PYTHON_VERSION} + ''' + + sh label: 'Install socat; required for unix socket tests', script: '''#!/bin/bash -lex + sudo apt-get install socat + ''' + + sh label: 'Install the latest setuptools', script: '''#!/bin/bash -lex + pip install --upgrade pip + pip install -U setuptools + ''' + + sh label: 'Install CCM', script: '''#!/bin/bash -lex + pip install ${HOME}/ccm + ''' + + // Determine if server version is Apache Cassandra� or DataStax Enterprise + if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { + sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex + pip install -r test-datastax-requirements.txt + ''' + } else { + sh label: 'Install Apache CassandraⓇ requirements', script: '''#!/bin/bash -lex + pip install -r test-requirements.txt + ''' + } + + sh label: 'Install unit test modules', script: '''#!/bin/bash -lex + pip install nose-ignore-docstring nose-exclude service_identity + ''' + + if (params.CYTHON) { + sh label: 'Install cython modules', script: '''#!/bin/bash -lex + pip install cython numpy + ''' + } + + sh label: 'Download Apache CassandraⓇ or DataStax Enterprise', script: '''#!/bin/bash -lex + . ${CCM_ENVIRONMENT_SHELL} ${CASSANDRA_VERSION} + ''' + + sh label: 'Display Python and environment information', script: '''#!/bin/bash -le + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + python --version + pip --version + printenv | sort + ''' +} + +def installDriverAndCompileExtensions() { + if (params.CYTHON) { + sh label: 'Install the driver and compile with C extensions with Cython', script: '''#!/bin/bash -lex + python setup.py build_ext --inplace + ''' + } else { + sh label: 'Install the driver and compile with C extensions without Cython', script: '''#!/bin/bash -lex + python setup.py build_ext --inplace --no-cython + ''' + } +} + +def executeStandardTests() { + /* + * Run the cython unit tests, this is not done in travis because it takes too much time for the + * whole matrix to build with cython + */ + if (params.CYTHON) { + sh label: 'Execute Cython unit tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=True nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true + EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=True nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true + EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=True nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true + ''' + } + + sh label: 'Execute Simulacron integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + SIMULACRON_JAR="${HOME}/simulacron.jar" + #SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + ''' + + sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true + ''' + + sh label: 'Execute Apache CassandraⓇ integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_cluster.py || true + ''' + + if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { + sh label: 'Execute DataStax Enterprise integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true + ''' + } + + sh label: 'Execute DataStax Constellation integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true + ''' + + if (env.EXECUTE_LONG_TESTS == 'True') { + sh label: 'Execute long running integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true + ''' + } +} + +def executeDseSmokeTests() { + sh label: 'Execute profile DataStax Enterprise smoke test integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true + ''' +} + +def executeEventLoopTests() { + sh label: 'Execute profile event loop manager integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_TESTS=( + "tests/integration/standard/test_cluster.py" + "tests/integration/standard/test_concurrent.py" + "tests/integration/standard/test_connection.py" + "tests/integration/standard/test_control_connection.py" + "tests/integration/standard/test_metrics.py" + "tests/integration/standard/test_query.py" + "tests/integration/simulacron/test_endpoint.py" + "tests/integration/long/test_ssl.py" + ) + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true + ''' +} + +def executeUpgradeTests() { + sh label: 'Execute profile upgrade integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=upgrade_results.xml tests/integration/upgrade || true + ''' +} + +def executeTests() { + switch(params.PROFILE) { + case 'DSE-SMOKE-TEST': + executeDseSmokeTests() + break + case 'EVENT-LOOP': + executeEventLoopTests() + break + case 'UPGRADE': + executeUpgradeTests() + break + default: + executeStandardTests() + break + } +} + +def notifySlack(status = 'started') { + // Set the global pipeline scoped environment (this is above each matrix) + env.BUILD_STATED_SLACK_NOTIFIED = 'true' + + def buildType = 'Commit' + if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { + buildType = "${params.CI_SCHEDULE.toLowerCase().capitalize()}" + } + + def color = 'good' // Green + if (status.equalsIgnoreCase('aborted')) { + color = '808080' // Grey + } else if (status.equalsIgnoreCase('unstable')) { + color = 'warning' // Orange + } else if (status.equalsIgnoreCase('failed')) { + color = 'danger' // Red + } + + def message = """Build ${status} for ${env.DRIVER_DISPLAY_NAME} [${buildType}] +<${env.GITHUB_BRANCH_URL}|${env.BRANCH_NAME}> - <${env.RUN_DISPLAY_URL}|#${env.BUILD_NUMBER}> - <${env.GITHUB_COMMIT_URL}|${env.GIT_SHA}>""" + if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { + message += " - ${params.CI_SCHEDULE_PYTHON_VERSION} - ${params.EVENT_LOOP_MANAGER}" + } + if (!status.equalsIgnoreCase('Started')) { + message += """ +${status} after ${currentBuild.durationString - ' and counting'}""" + } + + slackSend color: "${color}", + channel: "#python-driver-dev-bots", + message: "${message}" +} + +def submitCIMetrics(buildType) { + long durationMs = currentBuild.duration + long durationSec = durationMs / 1000 + long nowSec = (currentBuild.startTimeInMillis + durationMs) / 1000 + def branchNameNoPeriods = env.BRANCH_NAME.replaceAll('\\.', '_') + def durationMetric = "okr.ci.python.${env.DRIVER_METRIC_TYPE}.${buildType}.${branchNameNoPeriods} ${durationSec} ${nowSec}" + + timeout(time: 1, unit: 'MINUTES') { + withCredentials([string(credentialsId: 'lab-grafana-address', variable: 'LAB_GRAFANA_ADDRESS'), + string(credentialsId: 'lab-grafana-port', variable: 'LAB_GRAFANA_PORT')]) { + withEnv(["DURATION_METRIC=${durationMetric}"]) { + sh label: 'Send runtime metrics to labgrafana', script: '''#!/bin/bash -lex + echo "${DURATION_METRIC}" | nc -q 5 ${LAB_GRAFANA_ADDRESS} ${LAB_GRAFANA_PORT} + ''' + } + } + } +} + +def describePerCommitStage() { + script { + def type = 'standard' + def serverDescription = 'current Apache CassandaraⓇ and supported DataStax Enterprise versions' + if (env.BRANCH_NAME ==~ /long-python.*/) { + type = 'long' + } else if (env.BRANCH_NAME ==~ /dev-python.*/) { + type = 'dev' + } + + currentBuild.displayName = "Per-Commit (${env.EVENT_LOOP_MANAGER} | ${type.capitalize()})" + currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.14 and v3.5.6 using ${env.EVENT_LOOP_MANAGER} event loop manager" + } +} + +def describeScheduledTestingStage() { + script { + def type = params.CI_SCHEDULE.toLowerCase().capitalize() + def displayName = "${type} schedule (${env.EVENT_LOOP_MANAGER}" + if (params.CYTHON) { + displayName += " | Cython" + } + if (params.PROFILE != 'NONE') { + displayName += " | ${params.PROFILE}" + } + displayName += ")" + currentBuild.displayName = displayName + + def serverVersionDescription = "${params.CI_SCHEDULE_SERVER_VERSION.replaceAll(' ', ', ')} server version(s) in the matrix" + def pythonVersionDescription = "${params.CI_SCHEDULE_PYTHON_VERSION.replaceAll(' ', ', ')} Python version(s) in the matrix" + def description = "${type} scheduled testing using ${env.EVENT_LOOP_MANAGER} event loop manager" + if (params.CYTHON) { + description += ", with Cython enabled" + } + if (params.PROFILE != 'NONE') { + description += ", ${params.PROFILE} profile" + } + description += ", ${serverVersionDescription}, and ${pythonVersionDescription}" + currentBuild.description = description + } +} + +def describeAdhocTestingStage() { + script { + def serverType = params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[0] + def serverDisplayName = 'Apache CassandaraⓇ' + def serverVersion = " v${serverType}" + if (serverType == 'ALL') { + serverDisplayName = "all ${serverDisplayName} and DataStax Enterprise server versions" + serverVersion = '' + } else { + try { + serverVersion = " v${env.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[1]}" + } catch (e) { + ;; // no-op + } + if (serverType == 'dse') { + serverDisplayName = 'DataStax Enterprise' + } + } + def displayName = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION} for v${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION} (${env.EVENT_LOOP_MANAGER}" + if (params.CYTHON) { + displayName += " | Cython" + } + if (params.PROFILE != 'NONE') { + displayName += " | ${params.PROFILE}" + } + displayName += ")" + currentBuild.displayName = displayName + + def description = "Testing ${serverDisplayName} ${serverVersion} using ${env.EVENT_LOOP_MANAGER} against Python ${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" + if (params.CYTHON) { + description += ", with Cython" + } + if (params.PROFILE == 'NONE') { + if (params.EXECUTE_LONG_TESTS) { + description += ", with" + } else { + description += ", without" + } + description += " long tests executed" + } else { + description += ", ${params.PROFILE} profile" + } + currentBuild.description = description + } +} + +def branchPatternCron = ~"(master)" + +pipeline { + agent none + + // Global pipeline timeout + options { + timeout(time: 10, unit: 'HOURS') + buildDiscarder(logRotator(artifactNumToKeepStr: '10', // Keep only the last 10 artifacts + numToKeepStr: '50')) // Keep only the last 50 build records + } + + parameters { + choice( + name: 'ADHOC_BUILD_TYPE', + choices: ['BUILD', 'BUILD-AND-EXECUTE-TESTS'], + description: '''

Perform a adhoc build operation

+ + + + + + + + + + + + + + + +
ChoiceDescription
BUILDPerforms a Per-Commit build
BUILD-AND-EXECUTE-TESTSPerforms a build and executes the integration and unit tests
''') + choice( + name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION', + choices: ['2.7.14', '3.4.9', '3.5.6', '3.6.6', '3.7.4', '3.8.0'], + description: 'Python version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY!') + choice( + name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION', + choices: ['2.1', // Legacy Apache CassandraⓇ + '2.2', // Legacy Apache CassandraⓇ + '3.0', // Previous Apache CassandraⓇ + '3.11', // Current Apache CassandraⓇ + '4.0', // Development Apache CassandraⓇ + 'dse-5.0', // Long Term Support DataStax Enterprise + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7', // Previous DataStax Enterprise + 'dse-6.8.0', // Current DataStax Enterprise + 'dse-6.8', // Development DataStax Enterprise + 'ALL'], + description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY! + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
2.1Apache CassandaraⓇ; v2.1.x
2.2Apache CassandarⓇ; v2.2.x
3.0Apache CassandaraⓇ v3.0.x
3.11Apache CassandaraⓇ v3.11.x
4.0Apache CassandaraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)
dse-5.0DataStax Enterprise v5.0.x (Long Term Support)
dse-5.1DataStax Enterprise v5.1.x
dse-6.0DataStax Enterprise v6.0.x
dse-6.7DataStax Enterprise v6.7.x
dse-6.8.0DataStax Enterprise v6.8.0
dse-6.8DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
''') + booleanParam( + name: 'CYTHON', + defaultValue: false, + description: 'Flag to determine if Cython should be enabled for scheduled or adhoc builds') + booleanParam( + name: 'EXECUTE_LONG_TESTS', + defaultValue: false, + description: 'Flag to determine if long integration tests should be executed for scheduled or adhoc builds') + choice( + name: 'EVENT_LOOP_MANAGER', + choices: ['LIBEV', 'GEVENT', 'EVENTLET', 'ASYNCIO', 'ASYNCORE', 'TWISTED'], + description: '''

Event loop manager to utilize for scheduled or adhoc builds

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
LIBEVA full-featured and high-performance event loop that is loosely modeled after libevent, but without its limitations and bugs
GEVENTA co-routine -based Python networking library that uses greenlet to provide a high-level synchronous API on top of the libev or libuv event loop
EVENTLETA concurrent networking library for Python that allows you to change how you run your code, not how you write it
ASYNCIOA library to write concurrent code using the async/await syntax
ASYNCOREA module provides the basic infrastructure for writing asynchronous socket service clients and servers
TWISTEDAn event-driven networking engine written in Python and licensed under the open source MIT license
''') + choice( + name: 'PROFILE', + choices: ['NONE', 'DSE-SMOKE-TEST', 'EVENT-LOOP', 'UPGRADE'], + description: '''

Profile to utilize for scheduled or adhoc builds

+ + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
NONEExecute the standard tests for the driver
DSE-SMOKE-TESTExecute only the DataStax Enterprise smoke tests
EVENT-LOOPExecute only the event loop tests for the specified event loop manager (see: EVENT_LOOP_MANAGER)
UPGRADEExecute only the upgrade tests
''') + choice( + name: 'CI_SCHEDULE', + choices: ['DO-NOT-CHANGE-THIS-SELECTION', 'WEEKNIGHTS', 'WEEKENDS'], + description: 'CI testing schedule to execute periodically scheduled builds and tests of the driver (DO NOT CHANGE THIS SELECTION)') + string( + name: 'CI_SCHEDULE_PYTHON_VERSION', + defaultValue: 'DO-NOT-CHANGE-THIS-SELECTION', + description: 'CI testing python version to utilize for scheduled test runs of the driver (DO NOT CHANGE THIS SELECTION)') + string( + name: 'CI_SCHEDULE_SERVER_VERSION', + defaultValue: 'DO-NOT-CHANGE-THIS-SELECTION', + description: 'CI testing server version to utilize for scheduled test runs of the driver (DO NOT CHANGE THIS SELECTION)') + } + + triggers { + parameterizedCron(branchPatternCron.matcher(env.BRANCH_NAME).matches() ? """ + # Every weeknight (Monday - Friday) around 4:00 AM + # These schedules will run with and without Cython enabled for Python v2.7.14 and v3.5.6 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + + # Every Saturday around 12:00, 4:00 and 8:00 PM + # These schedules are for weekly libev event manager runs with and without Cython for most of the Python versions (excludes v3.5.6.x) + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.4.9;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly gevent event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly eventlet event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + + # Every Sunday around 12:00 and 4:00 AM + # These schedules are for weekly asyncore event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly twisted event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + """ : "") + } + + environment { + OS_VERSION = 'ubuntu/bionic64/python-driver' + CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" + EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" + EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" + CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' + } + + stages { + stage ('Per-Commit') { + options { + timeout(time: 2, unit: 'HOURS') + } + when { + beforeAgent true + branch pattern: '((dev|long)-)+python.*', comparator: 'REGEXP' + allOf { + expression { params.ADHOC_BUILD_TYPE == 'BUILD' } + expression { params.CI_SCHEDULE == 'DO-NOT-CHANGE-THIS-SELECTION' } + not { buildingTag() } + } + } + + matrix { + axes { + axis { + name 'CASSANDRA_VERSION' + values '3.11', // Current Apache Cassandra + 'dse-6.8.0' // Current DataStax Enterprise + } + axis { + name 'PYTHON_VERSION' + values '2.7.14', '3.5.6' + } + axis { + name 'CYTHON_ENABLED' + values 'False' + } + } + + agent { + label "${OS_VERSION}" + } + + stages { + stage('Initialize-Environment') { + steps { + initializeEnvironment() + script { + if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { + notifySlack() + } + } + } + } + stage('Describe-Build') { + steps { + describePerCommitStage() + } + } + stage('Install-Driver-And-Compile-Extensions') { + steps { + installDriverAndCompileExtensions() + } + } + stage('Execute-Tests') { + steps { + + script { + if (env.BRANCH_NAME ==~ /long-python.*/) { + withEnv(["EXECUTE_LONG_TESTS=True"]) { + executeTests() + } + } + else { + executeTests() + } + } + } + post { + always { + junit testResults: '*_results.xml' + } + } + } + } + } + post { + always { + node('master') { + submitCIMetrics('commit') + } + } + aborted { + notifySlack('aborted') + } + success { + notifySlack('completed') + } + unstable { + notifySlack('unstable') + } + failure { + notifySlack('FAILED') + } + } + } + + stage ('Scheduled-Testing') { + when { + beforeAgent true + allOf { + expression { params.ADHOC_BUILD_TYPE == 'BUILD' } + expression { params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION' } + not { buildingTag() } + } + } + matrix { + axes { + axis { + name 'CASSANDRA_VERSION' + values '2.1', // Legacy Apache Cassandra + '2.2', // Legacy Apache Cassandra + '3.0', // Previous Apache Cassandra + '3.11', // Current Apache Cassandra + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7' // Current DataStax Enterprise + } + axis { + name 'CYTHON_ENABLED' + values 'True', 'False' + } + } + when { + beforeAgent true + allOf { + expression { return params.CI_SCHEDULE_SERVER_VERSION.split(' ').any { it =~ /(ALL|${env.CASSANDRA_VERSION})/ } } + } + } + + environment { + PYTHON_VERSION = "${params.CI_SCHEDULE_PYTHON_VERSION}" + } + agent { + label "${OS_VERSION}" + } + + stages { + stage('Initialize-Environment') { + steps { + initializeEnvironment() + script { + if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { + notifySlack() + } + } + } + } + stage('Describe-Build') { + steps { + describeScheduledTestingStage() + } + } + stage('Install-Driver-And-Compile-Extensions') { + steps { + installDriverAndCompileExtensions() + } + } + stage('Execute-Tests') { + steps { + executeTests() + } + post { + always { + junit testResults: '*_results.xml' + } + } + } + } + } + post { + aborted { + notifySlack('aborted') + } + success { + notifySlack('completed') + } + unstable { + notifySlack('unstable') + } + failure { + notifySlack('FAILED') + } + } + } + + + stage('Adhoc-Testing') { + when { + beforeAgent true + allOf { + expression { params.ADHOC_BUILD_TYPE == 'BUILD-AND-EXECUTE-TESTS' } + not { buildingTag() } + } + } + + environment { + CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" + PYTHON_VERSION = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" + } + + matrix { + axes { + axis { + name 'CASSANDRA_VERSION' + values '2.1', // Legacy Apache Cassandra + '2.2', // Legacy Apache Cassandra + '3.0', // Previous Apache Cassandra + '3.11', // Current Apache Cassandra + '4.0', // Development Apache Cassandra + 'dse-5.0', // Long Term Support DataStax Enterprise + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7', // Current DataStax Enterprise + 'dse-6.8' // Development DataStax Enterprise + } + } + when { + beforeAgent true + allOf { + expression { params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION ==~ /(ALL|${env.CASSANDRA_VERSION})/ } + } + } + + agent { + label "${OS_VERSION}" + } + + stages { + stage('Describe-Build') { + steps { + describeAdhocTestingStage() + } + } + stage('Initialize-Environment') { + steps { + initializeEnvironment() + } + } + stage('Install-Driver-And-Compile-Extensions') { + steps { + installDriverAndCompileExtensions() + } + } + stage('Execute-Tests') { + steps { + executeTests() + } + post { + always { + junit testResults: '*_results.xml' + } + } + } + } + } + } + } +} diff --git a/build.yaml b/build.yaml.bak similarity index 100% rename from build.yaml rename to build.yaml.bak From 4704453d52d65fa4b02ddbdcffb1425695483022 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 8 May 2020 13:40:38 -0400 Subject: [PATCH 056/211] paging_state typo --- docs/query_paging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/query_paging.rst b/docs/query_paging.rst index 2c4a4995ca..23ee2c1129 100644 --- a/docs/query_paging.rst +++ b/docs/query_paging.rst @@ -86,7 +86,7 @@ You can resume the pagination when executing a new query by using the :attr:`.Re results = session.execute(statement) # save the paging_state somewhere and return current results - web_session['paging_stage'] = results.paging_state + web_session['paging_state'] = results.paging_state # resume the pagination sometime later... From 448a879c883d8d0e3fab32cebd7d20a0b329ed64 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 11 May 2020 12:00:16 -0400 Subject: [PATCH 057/211] Do not trigger a schedule for the riptano repo --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0b26bf00d0..a0d1730a61 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -361,6 +361,7 @@ def describeAdhocTestingStage() { } def branchPatternCron = ~"(master)" +def repoPatternCron = ~"^(riptano)" pipeline { agent none @@ -551,7 +552,7 @@ pipeline { } triggers { - parameterizedCron(branchPatternCron.matcher(env.BRANCH_NAME).matches() ? """ + parameterizedCron(branchPatternCron.matcher(env.BRANCH_NAME).matches() && repoPatternCron.matcher(env.GIT_URL).matches() ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.14 and v3.5.6 H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 From 78c898275b4de284f9173d45dbf8c12952985099 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 12 May 2020 10:31:57 -0400 Subject: [PATCH 058/211] Change the riptano repo pattern match string --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a0d1730a61..e7a53f12cb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -361,7 +361,7 @@ def describeAdhocTestingStage() { } def branchPatternCron = ~"(master)" -def repoPatternCron = ~"^(riptano)" +def riptanoPatternCron = ~"(riptano)" pipeline { agent none @@ -552,7 +552,7 @@ pipeline { } triggers { - parameterizedCron(branchPatternCron.matcher(env.BRANCH_NAME).matches() && repoPatternCron.matcher(env.GIT_URL).matches() ? """ + parameterizedCron(branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(env.GIT_URL).find() ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.14 and v3.5.6 H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 From f2347735aa414bcbef333fa9f7723562624b4394 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 13 May 2020 09:09:13 -0400 Subject: [PATCH 059/211] Refactored graph fluent tests to avoid ram issue --- .../advanced/graph/fluent/__init__.py | 709 +++++++++++++++ .../advanced/graph/fluent/test_graph.py | 860 +----------------- .../fluent/test_graph_explicit_execution.py | 96 ++ .../fluent/test_graph_implicit_execution.py | 108 +++ 4 files changed, 923 insertions(+), 850 deletions(-) create mode 100644 tests/integration/advanced/graph/fluent/test_graph_explicit_execution.py create mode 100644 tests/integration/advanced/graph/fluent/test_graph_implicit_execution.py diff --git a/tests/integration/advanced/graph/fluent/__init__.py b/tests/integration/advanced/graph/fluent/__init__.py index 2c9ca172f8..3bb81e78e3 100644 --- a/tests/integration/advanced/graph/fluent/__init__.py +++ b/tests/integration/advanced/graph/fluent/__init__.py @@ -11,3 +11,712 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import sys +import datetime +import six +import time +from collections import namedtuple +from packaging.version import Version + +from cassandra.datastax.graph.fluent import DseGraph +from cassandra.graph import VertexProperty, GraphProtocol +from cassandra.util import Point, Polygon, LineString + +from gremlin_python.process.graph_traversal import GraphTraversal, GraphTraversalSource +from gremlin_python.process.traversal import P +from gremlin_python.structure.graph import Edge as TravEdge +from gremlin_python.structure.graph import Vertex as TravVertex, VertexProperty as TravVertexProperty + +from tests.util import wait_until_not_raised +from tests.integration import DSE_VERSION +from tests.integration.advanced.graph import ( + GraphUnitTestCase, ClassicGraphSchema, CoreGraphSchema, + VertexLabel) +from tests.integration import requiredse + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + + +import ipaddress + + +def check_equality_base(testcase, original, read_value): + if isinstance(original, float): + testcase.assertAlmostEqual(original, read_value, delta=.01) + elif isinstance(original, ipaddress.IPv4Address): + testcase.assertAlmostEqual(original, ipaddress.IPv4Address(read_value)) + elif isinstance(original, ipaddress.IPv6Address): + testcase.assertAlmostEqual(original, ipaddress.IPv6Address(read_value)) + else: + testcase.assertEqual(original, read_value) + + +def create_traversal_profiles(cluster, graph_name): + ep_graphson2 = DseGraph().create_execution_profile( + graph_name, graph_protocol=GraphProtocol.GRAPHSON_2_0) + ep_graphson3 = DseGraph().create_execution_profile( + graph_name, graph_protocol=GraphProtocol.GRAPHSON_3_0) + + cluster.add_execution_profile('traversal_graphson2', ep_graphson2) + cluster.add_execution_profile('traversal_graphson3', ep_graphson3) + + return ep_graphson2, ep_graphson3 + + +class _AbstractTraversalTest(GraphUnitTestCase): + + def setUp(self): + super(_AbstractTraversalTest, self).setUp() + self.ep_graphson2, self.ep_graphson3 = create_traversal_profiles(self.cluster, self.graph_name) + + def _test_basic_query(self, schema, graphson): + """ + Test to validate that basic graph queries works + + Creates a simple classic tinkerpot graph, and attempts to preform a basic query + using Tinkerpop's GLV with both explicit and implicit execution + ensuring that each one is correct. See reference graph here + http://www.tinkerpop.com/docs/3.0.0.M1/ + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result graph should generate and all vertices and edge results should be + + @test_category dse graph + """ + + g = self.fetch_traversal_source(graphson) + self.execute_graph(schema.fixtures.classic(), graphson) + traversal = g.V().has('name', 'marko').out('knows').values('name') + results_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results_list), 2) + self.assertIn('vadas', results_list) + self.assertIn('josh', results_list) + + def _test_classic_graph(self, schema, graphson): + """ + Test to validate that basic graph generation, and vertex and edges are surfaced correctly + + Creates a simple classic tinkerpot graph, and iterates over the the vertices and edges + using Tinkerpop's GLV with both explicit and implicit execution + ensuring that each one iscorrect. See reference graph here + http://www.tinkerpop.com/docs/3.0.0.M1/ + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result graph should generate and all vertices and edge results should be + + @test_category dse graph + """ + + self.execute_graph(schema.fixtures.classic(), graphson) + ep = self.get_execution_profile(graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V() + vert_list = self.execute_traversal(traversal, graphson) + + for vertex in vert_list: + schema.ensure_properties(self.session, vertex, execution_profile=ep) + self._validate_classic_vertex(g, vertex) + traversal = g.E() + edge_list = self.execute_traversal(traversal, graphson) + for edge in edge_list: + schema.ensure_properties(self.session, edge, execution_profile=ep) + self._validate_classic_edge(g, edge) + + def _test_graph_classic_path(self, schema, graphson): + """ + Test to validate that the path version of the result type is generated correctly. It also + tests basic path results as that is not covered elsewhere + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result path object should be unpacked correctly including all nested edges and vertices + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V().hasLabel('person').has('name', 'marko').as_('a').outE('knows').inV().as_('c', 'd').outE('created').as_('e', 'f', 'g').inV().path() + path_list = self.execute_traversal(traversal, graphson) + self.assertEqual(len(path_list), 2) + for path in path_list: + self._validate_path_result_type(g, path) + + def _test_range_query(self, schema, graphson): + """ + Test to validate range queries are handled correctly. + + Creates a very large line graph script and executes it. Then proceeds to to a range + limited query against it, and ensure that the results are formated correctly and that + the result set is properly sized. + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result result set should be properly formated and properly sized + + @test_category dse graph + """ + + self.execute_graph(schema.fixtures.line(150), graphson) + ep = self.get_execution_profile(graphson) + g = self.fetch_traversal_source(graphson) + + traversal = g.E().range(0, 10) + edges = self.execute_traversal(traversal, graphson) + self.assertEqual(len(edges), 10) + for edge in edges: + schema.ensure_properties(self.session, edge, execution_profile=ep) + self._validate_line_edge(g, edge) + + def _test_result_types(self, schema, graphson): + """ + Test to validate that the edge and vertex version of results are constructed correctly. + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result edge/vertex result types should be unpacked correctly. + @test_category dse graph + """ + self.execute_graph(schema.fixtures.line(150), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V() + vertices = self.execute_traversal(traversal, graphson) + for vertex in vertices: + self._validate_type(g, vertex) + + def _test_large_result_set(self, schema, graphson): + """ + Test to validate that large result sets return correctly. + + Creates a very large graph. Ensures that large result sets are handled appropriately. + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result when limits of result sets are hit errors should be surfaced appropriately + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.large(), graphson) + g = self.fetch_traversal_source(graphson) + traversal = g.V() + vertices = self.execute_traversal(traversal, graphson) + for vertex in vertices: + self._validate_generic_vertex_result_type(g, vertex) + + def _test_vertex_meta_properties(self, schema, graphson): + """ + Test verifying vertex property properties + + @since 1.0.0 + @jira_ticket PYTHON-641 + + @test_category dse graph + """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') + + s = self.session + s.execute_graph("schema.propertyKey('k0').Text().ifNotExists().create();") + s.execute_graph("schema.propertyKey('k1').Text().ifNotExists().create();") + s.execute_graph("schema.propertyKey('key').Text().properties('k0', 'k1').ifNotExists().create();") + s.execute_graph("schema.vertexLabel('MLP').properties('key').ifNotExists().create();") + s.execute_graph("schema.config().option('graph.allow_scan').set('true');") + v = s.execute_graph('''v = graph.addVertex('MLP') + v.property('key', 'meta_prop', 'k0', 'v0', 'k1', 'v1') + v''')[0] + + g = self.fetch_traversal_source(graphson) + + traversal = g.V() + # This should contain key, and value where value is a property + # This should be a vertex property and should contain sub properties + results = self.execute_traversal(traversal, graphson) + self._validate_meta_property(g, results[0]) + + def _test_vertex_multiple_properties(self, schema, graphson): + """ + Test verifying vertex property form for various Cardinality + + All key types are encoded as a list, regardless of cardinality + + Single cardinality properties have only one value -- the last one added + + Default is single (this is config dependent) + + @since 1.0.0 + @jira_ticket PYTHON-641 + + @test_category dse graph + """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') + + s = self.session + s.execute_graph('''Schema schema = graph.schema(); + schema.propertyKey('mult_key').Text().multiple().ifNotExists().create(); + schema.propertyKey('single_key').Text().single().ifNotExists().create(); + schema.vertexLabel('MPW1').properties('mult_key').ifNotExists().create(); + schema.vertexLabel('MPW2').properties('mult_key').ifNotExists().create(); + schema.vertexLabel('SW1').properties('single_key').ifNotExists().create();''') + + mpw1v = s.execute_graph('''v = graph.addVertex('MPW1') + v.property('mult_key', 'value') + v''')[0] + + mpw2v = s.execute_graph('''g.addV('MPW2').property('mult_key', 'value0').property('mult_key', 'value1')''')[0] + + g = self.fetch_traversal_source(graphson) + traversal = g.V(mpw1v.id).properties() + + vertex_props = self.execute_traversal(traversal, graphson) + + self.assertEqual(len(vertex_props), 1) + + self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), "mult_key") + self.assertEqual(vertex_props[0].value, "value") + + # multiple_with_two_values + #v = s.execute_graph('''g.addV(label, 'MPW2', 'mult_key', 'value0', 'mult_key', 'value1')''')[0] + traversal = g.V(mpw2v.id).properties() + + vertex_props = self.execute_traversal(traversal, graphson) + + self.assertEqual(len(vertex_props), 2) + self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), 'mult_key') + self.assertEqual(self.fetch_key_from_prop(vertex_props[1]), 'mult_key') + self.assertEqual(vertex_props[0].value, 'value0') + self.assertEqual(vertex_props[1].value, 'value1') + + # single_with_one_value + v = s.execute_graph('''v = graph.addVertex('SW1') + v.property('single_key', 'value') + v''')[0] + traversal = g.V(v.id).properties() + vertex_props = self.execute_traversal(traversal, graphson) + self.assertEqual(len(vertex_props), 1) + self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), "single_key") + self.assertEqual(vertex_props[0].value, "value") + + def should_parse_meta_properties(self): + g = self.fetch_traversal_source() + g.addV("meta_v").property("meta_prop", "hello", "sub_prop", "hi", "sub_prop2", "hi2") + + def _test_all_graph_types_with_schema(self, schema, graphson): + """ + Exhaustively goes through each type that is supported by dse_graph. + creates a vertex for each type using a dse-tinkerpop traversal, + It then attempts to fetch it from the server and compares it to what was inserted + Prime the graph with the correct schema first + + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result inserted objects are equivalent to those retrieved + + @test_category dse graph + """ + self._write_and_read_data_types(schema, graphson) + + def _test_all_graph_types_without_schema(self, schema, graphson): + """ + Exhaustively goes through each type that is supported by dse_graph. + creates a vertex for each type using a dse-tinkerpop traversal, + It then attempts to fetch it from the server and compares it to what was inserted + Do not prime the graph with the correct schema first + @since 1.0.0 + @jira_ticket PYTHON-641 + @expected_result inserted objects are equivalent to those retrieved + @test_category dse graph + """ + if schema is not ClassicGraphSchema: + raise unittest.SkipTest('schema-less is only for classic graphs') + self._write_and_read_data_types(schema, graphson, use_schema=False) + + def _test_dsl(self, schema, graphson): + """ + The test creates a SocialTraversal and a SocialTraversalSource as part of + a DSL. Then calls it's method and checks the results to verify + we have the expected results + + @since @since 1.1.0a1 + @jira_ticket PYTHON-790 + @expected_result only the vertex corresponding to marko is in the result + + @test_category dse graph + """ + class SocialTraversal(GraphTraversal): + def knows(self, person_name): + return self.out("knows").hasLabel("person").has("name", person_name).in_() + + class SocialTraversalSource(GraphTraversalSource): + def __init__(self, *args, **kwargs): + super(SocialTraversalSource, self).__init__(*args, **kwargs) + self.graph_traversal = SocialTraversal + + def people(self, *names): + return self.get_graph_traversal().V().has("name", P.within(*names)) + + self.execute_graph(schema.fixtures.classic(), graphson) + if schema is CoreGraphSchema: + self.execute_graph(""" + schema.edgeLabel('knows').from('person').to('person').materializedView('person__knows__person_by_in_name'). + ifNotExists().partitionBy('in_name').clusterBy('out_name', Asc).create() + """, graphson) + time.sleep(1) # give some time to the MV to be populated + g = self.fetch_traversal_source(graphson, traversal_class=SocialTraversalSource) + + traversal = g.people("marko", "albert").knows("vadas") + results = self.execute_traversal(traversal, graphson) + self.assertEqual(len(results), 1) + only_vertex = results[0] + schema.ensure_properties(self.session, only_vertex, + execution_profile=self.get_execution_profile(graphson)) + self._validate_classic_vertex(g, only_vertex) + + def _test_bulked_results(self, schema, graphson): + """ + Send a query expecting a bulked result and the driver "undoes" + the bulk and returns the expected list + + @since 1.1.0a1 + @jira_ticket PYTHON-771 + @expected_result the expanded list + + @test_category dse graph + """ + self.execute_graph(schema.fixtures.classic(), graphson) + g = self.fetch_traversal_source(graphson) + barrier_traversal = g.E().label().barrier() + results = self.execute_traversal(barrier_traversal, graphson) + self.assertEqual(sorted(["created", "created", "created", "created", "knows", "knows"]), sorted(results)) + + def _test_udt_with_classes(self, schema, graphson): + class Address(object): + + def __init__(self, address, city, state): + self.address = address + self.city = city + self.state = state + + def __eq__(self, other): + return self.address == other.address and self.city == other.city and self.state == other.state + + class AddressWithTags(object): + + def __init__(self, address, city, state, tags): + self.address = address + self.city = city + self.state = state + self.tags = tags + + def __eq__(self, other): + return (self.address == other.address and self.city == other.city + and self.state == other.state and self.tags == other.tags) + + class ComplexAddress(object): + + def __init__(self, address, address_tags, city, state, props): + self.address = address + self.address_tags = address_tags + self.city = city + self.state = state + self.props = props + + def __eq__(self, other): + return (self.address == other.address and self.address_tags == other.address_tags + and self.city == other.city and self.state == other.state + and self.props == other.props) + + class ComplexAddressWithOwners(object): + + def __init__(self, address, address_tags, city, state, props, owners): + self.address = address + self.address_tags = address_tags + self.city = city + self.state = state + self.props = props + self.owners = owners + + def __eq__(self, other): + return (self.address == other.address and self.address_tags == other.address_tags + and self.city == other.city and self.state == other.state + and self.props == other.props and self.owners == other.owners) + + self.__test_udt(schema, graphson, Address, AddressWithTags, ComplexAddress, ComplexAddressWithOwners) + + def _test_udt_with_namedtuples(self, schema, graphson): + AddressTuple = namedtuple('Address', ('address', 'city', 'state')) + AddressWithTagsTuple = namedtuple('AddressWithTags', ('address', 'city', 'state', 'tags')) + ComplexAddressTuple = namedtuple('ComplexAddress', ('address', 'address_tags', 'city', 'state', 'props')) + ComplexAddressWithOwnersTuple = namedtuple('ComplexAddressWithOwners', ('address', 'address_tags', 'city', + 'state', 'props', 'owners')) + + self.__test_udt(schema, graphson, AddressTuple, AddressWithTagsTuple, + ComplexAddressTuple, ComplexAddressWithOwnersTuple) + + def _write_and_read_data_types(self, schema, graphson, use_schema=True): + g = self.fetch_traversal_source(graphson) + ep = self.get_execution_profile(graphson) + for data in six.itervalues(schema.fixtures.datatypes()): + typ, value, deserializer = data + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + if use_schema or schema is CoreGraphSchema: + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + + write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id).\ + property(property_name, value) + self.execute_traversal(write_traversal, graphson) + + read_traversal = g.V().hasLabel(str(vertex_label.label)).has(property_name).properties() + results = self.execute_traversal(read_traversal, graphson) + + for result in results: + if result.label == 'pkid': + continue + self._check_equality(g, value, result.value) + + def __test_udt(self, schema, graphson, address_class, address_with_tags_class, + complex_address_class, complex_address_with_owners_class): + if schema is not CoreGraphSchema or DSE_VERSION < Version('6.8'): + raise unittest.SkipTest("Graph UDT is only supported with DSE 6.8+ and Core graphs.") + + ep = self.get_execution_profile(graphson) + + Address = address_class + AddressWithTags = address_with_tags_class + ComplexAddress = complex_address_class + ComplexAddressWithOwners = complex_address_with_owners_class + + # setup udt + self.session.execute_graph(""" + schema.type('address').property('address', Text).property('city', Text).property('state', Text).create(); + schema.type('addressTags').property('address', Text).property('city', Text).property('state', Text). + property('tags', setOf(Text)).create(); + schema.type('complexAddress').property('address', Text).property('address_tags', frozen(typeOf('addressTags'))). + property('city', Text).property('state', Text).property('props', mapOf(Text, Int)).create(); + schema.type('complexAddressWithOwners').property('address', Text). + property('address_tags', frozen(typeOf('addressTags'))). + property('city', Text).property('state', Text).property('props', mapOf(Text, Int)). + property('owners', frozen(listOf(tupleOf(Text, Int)))).create(); + """, execution_profile=ep) + + # wait max 10 seconds to get the UDT discovered. + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'address', Address), + 1, 10) + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags), + 1, 10) + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress), + 1, 10) + wait_until_not_raised( + lambda: self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners), + 1, 10) + + data = { + "udt1": ["typeOf('address')", Address('1440 Rd Smith', 'Quebec', 'QC')], + "udt2": ["tupleOf(typeOf('address'), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], + "udt3": ["tupleOf(frozen(typeOf('address')), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], + "udt4": ["tupleOf(tupleOf(Int, typeOf('address')), Text)", + ((42, Address('1440 Rd Smith', 'Quebec', 'QC')), 'hello')], + "udt5": ["tupleOf(tupleOf(Int, typeOf('addressTags')), Text)", + ((42, AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'})), 'hello')], + "udt6": ["tupleOf(tupleOf(Int, typeOf('complexAddress')), Text)", + ((42, ComplexAddress('1440 Rd Smith', + AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'}), + 'Quebec', 'QC', {'p1': 42, 'p2': 33})), 'hello')], + "udt7": ["tupleOf(tupleOf(Int, frozen(typeOf('complexAddressWithOwners'))), Text)", + ((42, ComplexAddressWithOwners( + '1440 Rd Smith', + AddressWithTags('1440 CRd Smith', 'Quebec', 'QC', {'t1', 't2'}), + 'Quebec', 'QC', {'p1': 42, 'p2': 33}, [('Mike', 43), ('Gina', 39)]) + ), 'hello')] + } + + g = self.fetch_traversal_source(graphson) + for typ, value in six.itervalues(data): + vertex_label = VertexLabel([typ]) + property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) + + write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id). \ + property(property_name, value) + self.execute_traversal(write_traversal, graphson) + + #vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] + #vertex_properties = list(schema.get_vertex_properties( + # self.session, vertex, execution_profile=ep)) + + read_traversal = g.V().hasLabel(str(vertex_label.label)).has(property_name).properties() + vertex_properties = self.execute_traversal(read_traversal, graphson) + + self.assertEqual(len(vertex_properties), 2) # include pkid + for vp in vertex_properties: + if vp.label == 'pkid': + continue + + self.assertIsInstance(vp, (VertexProperty, TravVertexProperty)) + self.assertEqual(vp.label, property_name) + self.assertEqual(vp.value, value) + + @staticmethod + def fetch_edge_props(g, edge): + edge_props = g.E(edge.id).properties().toList() + return edge_props + + @staticmethod + def fetch_vertex_props(g, vertex): + + vertex_props = g.V(vertex.id).properties().toList() + return vertex_props + + def _check_equality(self, g, original, read_value): + return check_equality_base(self, original, read_value) + + +def _validate_prop(key, value, unittest): + if key == 'index': + return + + if any(key.startswith(t) for t in ('int', 'short')): + typ = int + + elif any(key.startswith(t) for t in ('long',)): + if sys.version_info >= (3, 0): + typ = int + else: + typ = long + elif any(key.startswith(t) for t in ('float', 'double')): + typ = float + elif any(key.startswith(t) for t in ('polygon',)): + typ = Polygon + elif any(key.startswith(t) for t in ('point',)): + typ = Point + elif any(key.startswith(t) for t in ('Linestring',)): + typ = LineString + elif any(key.startswith(t) for t in ('neg',)): + typ = six.string_types + elif any(key.startswith(t) for t in ('date',)): + typ = datetime.date + elif any(key.startswith(t) for t in ('time',)): + typ = datetime.time + else: + unittest.fail("Received unexpected type: %s" % key) + + +@requiredse +class BaseImplicitExecutionTest(GraphUnitTestCase): + """ + This test class will execute all tests of the AbstractTraversalTestClass using implicit execution + This all traversal will be run directly using toList() + """ + def setUp(self): + super(BaseImplicitExecutionTest, self).setUp() + if DSE_VERSION: + self.ep = DseGraph().create_execution_profile(self.graph_name) + self.cluster.add_execution_profile(self.graph_name, self.ep) + + @staticmethod + def fetch_key_from_prop(property): + return property.key + + def fetch_traversal_source(self, graphson, **kwargs): + ep = self.get_execution_profile(graphson, traversal=True) + return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep, **kwargs) + + def execute_traversal(self, traversal, graphson=None): + return traversal.toList() + + def _validate_classic_vertex(self, g, vertex): + # Checks the properties on a classic vertex for correctness + vertex_props = self.fetch_vertex_props(g, vertex) + vertex_prop_keys = [vp.key for vp in vertex_props] + self.assertEqual(len(vertex_prop_keys), 2) + self.assertIn('name', vertex_prop_keys) + self.assertTrue('lang' in vertex_prop_keys or 'age' in vertex_prop_keys) + + def _validate_generic_vertex_result_type(self, g, vertex): + # Checks a vertex object for it's generic properties + properties = self.fetch_vertex_props(g, vertex) + for attr in ('id', 'label'): + self.assertIsNotNone(getattr(vertex, attr)) + self.assertTrue(len(properties) > 2) + + def _validate_classic_edge_properties(self, g, edge): + # Checks the properties on a classic edge for correctness + edge_props = self.fetch_edge_props(g, edge) + edge_prop_keys = [ep.key for ep in edge_props] + self.assertEqual(len(edge_prop_keys), 1) + self.assertIn('weight', edge_prop_keys) + + def _validate_classic_edge(self, g, edge): + self._validate_generic_edge_result_type(edge) + self._validate_classic_edge_properties(g, edge) + + def _validate_line_edge(self, g, edge): + self._validate_generic_edge_result_type(edge) + edge_props = self.fetch_edge_props(g, edge) + edge_prop_keys = [ep.key for ep in edge_props] + self.assertEqual(len(edge_prop_keys), 1) + self.assertIn('distance', edge_prop_keys) + + def _validate_generic_edge_result_type(self, edge): + self.assertIsInstance(edge, TravEdge) + + for attr in ('outV', 'inV', 'label', 'id'): + self.assertIsNotNone(getattr(edge, attr)) + + def _validate_path_result_type(self, g, objects_path): + for obj in objects_path: + if isinstance(obj, TravEdge): + self._validate_classic_edge(g, obj) + elif isinstance(obj, TravVertex): + self._validate_classic_vertex(g, obj) + else: + self.fail("Invalid object found in path " + str(obj.type)) + + def _validate_meta_property(self, g, vertex): + meta_props = g.V(vertex.id).properties().toList() + self.assertEqual(len(meta_props), 1) + meta_prop = meta_props[0] + self.assertEqual(meta_prop.value, "meta_prop") + self.assertEqual(meta_prop.key, "key") + + nested_props = g.V(vertex.id).properties().properties().toList() + self.assertEqual(len(nested_props), 2) + for nested_prop in nested_props: + self.assertTrue(nested_prop.key in ['k0', 'k1']) + self.assertTrue(nested_prop.value in ['v0', 'v1']) + + def _validate_type(self, g, vertex): + props = self.fetch_vertex_props(g, vertex) + for prop in props: + value = prop.value + key = prop.key + _validate_prop(key, value, self) + + +class BaseExplicitExecutionTest(GraphUnitTestCase): + + def fetch_traversal_source(self, graphson, **kwargs): + ep = self.get_execution_profile(graphson, traversal=True) + return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep, **kwargs) + + def execute_traversal(self, traversal, graphson): + ep = self.get_execution_profile(graphson, traversal=True) + ep = self.session.get_execution_profile(ep) + context = None + if graphson == GraphProtocol.GRAPHSON_3_0: + context = { + 'cluster': self.cluster, + 'graph_name': ep.graph_options.graph_name.decode('utf-8') if ep.graph_options.graph_name else None + } + query = DseGraph.query_from_traversal(traversal, graphson, context=context) + # Use an ep that is configured with the correct row factory, and bytecode-json language flat set + result_set = self.execute_graph(query, graphson, traversal=True) + return list(result_set) diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index 4ebb0b6109..02611c12c0 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -12,34 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -import datetime import six -import time -from collections import namedtuple -from concurrent.futures import Future -from packaging.version import Version from cassandra import cluster from cassandra.cluster import ContinuousPagingOptions from cassandra.datastax.graph.fluent import DseGraph -from cassandra.graph import Vertex, Edge, VertexProperty, GraphProtocol -from cassandra.util import Point, Polygon, LineString +from cassandra.graph import VertexProperty -from gremlin_python.process.graph_traversal import GraphTraversal, GraphTraversalSource -from gremlin_python.process.traversal import P -from gremlin_python.structure.graph import Edge as TravEdge -from gremlin_python.structure.graph import Vertex as TravVertex, VertexProperty as TravVertexProperty - -from tests.util import wait_until_not_raised -from tests.integration import DSE_VERSION, greaterthanorequaldse68 -from tests.integration.advanced.graph import GraphUnitTestCase, \ - ClassicGraphSchema, CoreGraphSchema, \ - validate_classic_vertex, validate_classic_edge, validate_generic_vertex_result_type, \ - validate_classic_edge_properties, validate_line_edge, \ - validate_generic_edge_result_type, validate_path_result_type, VertexLabel, \ - GraphTestConfiguration, BasicGraphUnitTestCase -from tests.integration import greaterthanorequaldse60, requiredse +from tests.integration import greaterthanorequaldse68 +from tests.integration.advanced.graph import ( + GraphUnitTestCase, ClassicGraphSchema, CoreGraphSchema, + VertexLabel, GraphTestConfiguration +) +from tests.integration import greaterthanorequaldse60 +from tests.integration.advanced.graph.fluent import ( + BaseExplicitExecutionTest, create_traversal_profiles, check_equality_base) try: import unittest2 as unittest @@ -47,836 +34,9 @@ import unittest # noqa -import ipaddress - - -def check_equality_base(testcase, original, read_value): - if isinstance(original, float): - testcase.assertAlmostEqual(original, read_value, delta=.01) - elif isinstance(original, ipaddress.IPv4Address): - testcase.assertAlmostEqual(original, ipaddress.IPv4Address(read_value)) - elif isinstance(original, ipaddress.IPv6Address): - testcase.assertAlmostEqual(original, ipaddress.IPv6Address(read_value)) - else: - testcase.assertEqual(original, read_value) - - -def create_traversal_profiles(cluster, graph_name): - ep_graphson2 = DseGraph().create_execution_profile(graph_name, - graph_protocol=GraphProtocol.GRAPHSON_2_0) - ep_graphson3 = DseGraph().create_execution_profile(graph_name, - graph_protocol=GraphProtocol.GRAPHSON_3_0) - - cluster.add_execution_profile('traversal_graphson2', ep_graphson2) - cluster.add_execution_profile('traversal_graphson3', ep_graphson3) - - return ep_graphson2, ep_graphson3 - - -class _AbstractTraversalTest(GraphUnitTestCase): - - def setUp(self): - super(_AbstractTraversalTest, self).setUp() - self.ep_graphson2, self.ep_graphson3 = create_traversal_profiles(self.cluster, self.graph_name) - - def _test_basic_query(self, schema, graphson): - """ - Test to validate that basic graph queries works - - Creates a simple classic tinkerpot graph, and attempts to preform a basic query - using Tinkerpop's GLV with both explicit and implicit execution - ensuring that each one is correct. See reference graph here - http://www.tinkerpop.com/docs/3.0.0.M1/ - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result graph should generate and all vertices and edge results should be - - @test_category dse graph - """ - - g = self.fetch_traversal_source(graphson) - self.execute_graph(schema.fixtures.classic(), graphson) - traversal = g.V().has('name', 'marko').out('knows').values('name') - results_list = self.execute_traversal(traversal, graphson) - self.assertEqual(len(results_list), 2) - self.assertIn('vadas', results_list) - self.assertIn('josh', results_list) - - def _test_classic_graph(self, schema, graphson): - """ - Test to validate that basic graph generation, and vertex and edges are surfaced correctly - - Creates a simple classic tinkerpot graph, and iterates over the the vertices and edges - using Tinkerpop's GLV with both explicit and implicit execution - ensuring that each one iscorrect. See reference graph here - http://www.tinkerpop.com/docs/3.0.0.M1/ - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result graph should generate and all vertices and edge results should be - - @test_category dse graph - """ - - self.execute_graph(schema.fixtures.classic(), graphson) - ep = self.get_execution_profile(graphson) - g = self.fetch_traversal_source(graphson) - traversal = g.V() - vert_list = self.execute_traversal(traversal, graphson) - - for vertex in vert_list: - schema.ensure_properties(self.session, vertex, execution_profile=ep) - self._validate_classic_vertex(g, vertex) - traversal = g.E() - edge_list = self.execute_traversal(traversal, graphson) - for edge in edge_list: - schema.ensure_properties(self.session, edge, execution_profile=ep) - self._validate_classic_edge(g, edge) - - def _test_graph_classic_path(self, schema, graphson): - """ - Test to validate that the path version of the result type is generated correctly. It also - tests basic path results as that is not covered elsewhere - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result path object should be unpacked correctly including all nested edges and vertices - @test_category dse graph - """ - self.execute_graph(schema.fixtures.classic(), graphson) - g = self.fetch_traversal_source(graphson) - traversal = g.V().hasLabel('person').has('name', 'marko').as_('a').outE('knows').inV().as_('c', 'd').outE('created').as_('e', 'f', 'g').inV().path() - path_list = self.execute_traversal(traversal, graphson) - self.assertEqual(len(path_list), 2) - for path in path_list: - self._validate_path_result_type(g, path) - - def _test_range_query(self, schema, graphson): - """ - Test to validate range queries are handled correctly. - - Creates a very large line graph script and executes it. Then proceeds to to a range - limited query against it, and ensure that the results are formated correctly and that - the result set is properly sized. - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result result set should be properly formated and properly sized - - @test_category dse graph - """ - - self.execute_graph(schema.fixtures.line(150), graphson) - ep = self.get_execution_profile(graphson) - g = self.fetch_traversal_source(graphson) - - traversal = g.E().range(0, 10) - edges = self.execute_traversal(traversal, graphson) - self.assertEqual(len(edges), 10) - for edge in edges: - schema.ensure_properties(self.session, edge, execution_profile=ep) - self._validate_line_edge(g, edge) - - def _test_result_types(self, schema, graphson): - """ - Test to validate that the edge and vertex version of results are constructed correctly. - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result edge/vertex result types should be unpacked correctly. - @test_category dse graph - """ - self.execute_graph(schema.fixtures.line(150), graphson) - g = self.fetch_traversal_source(graphson) - traversal = g.V() - vertices = self.execute_traversal(traversal, graphson) - for vertex in vertices: - self._validate_type(g, vertex) - - def _test_large_result_set(self, schema, graphson): - """ - Test to validate that large result sets return correctly. - - Creates a very large graph. Ensures that large result sets are handled appropriately. - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result when limits of result sets are hit errors should be surfaced appropriately - - @test_category dse graph - """ - self.execute_graph(schema.fixtures.large(), graphson) - g = self.fetch_traversal_source(graphson) - traversal = g.V() - vertices = self.execute_traversal(traversal, graphson) - for vertex in vertices: - self._validate_generic_vertex_result_type(g, vertex) - - def _test_vertex_meta_properties(self, schema, graphson): - """ - Test verifying vertex property properties - - @since 1.0.0 - @jira_ticket PYTHON-641 - - @test_category dse graph - """ - if schema is not ClassicGraphSchema: - raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') - - s = self.session - s.execute_graph("schema.propertyKey('k0').Text().ifNotExists().create();") - s.execute_graph("schema.propertyKey('k1').Text().ifNotExists().create();") - s.execute_graph("schema.propertyKey('key').Text().properties('k0', 'k1').ifNotExists().create();") - s.execute_graph("schema.vertexLabel('MLP').properties('key').ifNotExists().create();") - s.execute_graph("schema.config().option('graph.allow_scan').set('true');") - v = s.execute_graph('''v = graph.addVertex('MLP') - v.property('key', 'meta_prop', 'k0', 'v0', 'k1', 'v1') - v''')[0] - - g = self.fetch_traversal_source(graphson) - - traversal = g.V() - # This should contain key, and value where value is a property - # This should be a vertex property and should contain sub properties - results = self.execute_traversal(traversal, graphson) - self._validate_meta_property(g, results[0]) - - def _test_vertex_multiple_properties(self, schema, graphson): - """ - Test verifying vertex property form for various Cardinality - - All key types are encoded as a list, regardless of cardinality - - Single cardinality properties have only one value -- the last one added - - Default is single (this is config dependent) - - @since 1.0.0 - @jira_ticket PYTHON-641 - - @test_category dse graph - """ - if schema is not ClassicGraphSchema: - raise unittest.SkipTest('skipped because multiple properties are only supported with classic graphs') - - s = self.session - s.execute_graph('''Schema schema = graph.schema(); - schema.propertyKey('mult_key').Text().multiple().ifNotExists().create(); - schema.propertyKey('single_key').Text().single().ifNotExists().create(); - schema.vertexLabel('MPW1').properties('mult_key').ifNotExists().create(); - schema.vertexLabel('MPW2').properties('mult_key').ifNotExists().create(); - schema.vertexLabel('SW1').properties('single_key').ifNotExists().create();''') - - mpw1v = s.execute_graph('''v = graph.addVertex('MPW1') - v.property('mult_key', 'value') - v''')[0] - - mpw2v = s.execute_graph('''g.addV('MPW2').property('mult_key', 'value0').property('mult_key', 'value1')''')[0] - - g = self.fetch_traversal_source(graphson) - traversal = g.V(mpw1v.id).properties() - - vertex_props = self.execute_traversal(traversal, graphson) - - self.assertEqual(len(vertex_props), 1) - - self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), "mult_key") - self.assertEqual(vertex_props[0].value, "value") - - # multiple_with_two_values - #v = s.execute_graph('''g.addV(label, 'MPW2', 'mult_key', 'value0', 'mult_key', 'value1')''')[0] - traversal = g.V(mpw2v.id).properties() - - vertex_props = self.execute_traversal(traversal, graphson) - - self.assertEqual(len(vertex_props), 2) - self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), 'mult_key') - self.assertEqual(self.fetch_key_from_prop(vertex_props[1]), 'mult_key') - self.assertEqual(vertex_props[0].value, 'value0') - self.assertEqual(vertex_props[1].value, 'value1') - - # single_with_one_value - v = s.execute_graph('''v = graph.addVertex('SW1') - v.property('single_key', 'value') - v''')[0] - traversal = g.V(v.id).properties() - vertex_props = self.execute_traversal(traversal, graphson) - self.assertEqual(len(vertex_props), 1) - self.assertEqual(self.fetch_key_from_prop(vertex_props[0]), "single_key") - self.assertEqual(vertex_props[0].value, "value") - - def should_parse_meta_properties(self): - g = self.fetch_traversal_source() - g.addV("meta_v").property("meta_prop", "hello", "sub_prop", "hi", "sub_prop2", "hi2") - - def _test_all_graph_types_with_schema(self, schema, graphson): - """ - Exhaustively goes through each type that is supported by dse_graph. - creates a vertex for each type using a dse-tinkerpop traversal, - It then attempts to fetch it from the server and compares it to what was inserted - Prime the graph with the correct schema first - - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result inserted objects are equivalent to those retrieved - - @test_category dse graph - """ - self._write_and_read_data_types(schema, graphson) - - def _test_all_graph_types_without_schema(self, schema, graphson): - """ - Exhaustively goes through each type that is supported by dse_graph. - creates a vertex for each type using a dse-tinkerpop traversal, - It then attempts to fetch it from the server and compares it to what was inserted - Do not prime the graph with the correct schema first - @since 1.0.0 - @jira_ticket PYTHON-641 - @expected_result inserted objects are equivalent to those retrieved - @test_category dse graph - """ - if schema is not ClassicGraphSchema: - raise unittest.SkipTest('schema-less is only for classic graphs') - self._write_and_read_data_types(schema, graphson, use_schema=False) - - def _test_dsl(self, schema, graphson): - """ - The test creates a SocialTraversal and a SocialTraversalSource as part of - a DSL. Then calls it's method and checks the results to verify - we have the expected results - - @since @since 1.1.0a1 - @jira_ticket PYTHON-790 - @expected_result only the vertex corresponding to marko is in the result - - @test_category dse graph - """ - class SocialTraversal(GraphTraversal): - def knows(self, person_name): - return self.out("knows").hasLabel("person").has("name", person_name).in_() - - class SocialTraversalSource(GraphTraversalSource): - def __init__(self, *args, **kwargs): - super(SocialTraversalSource, self).__init__(*args, **kwargs) - self.graph_traversal = SocialTraversal - - def people(self, *names): - return self.get_graph_traversal().V().has("name", P.within(*names)) - - self.execute_graph(schema.fixtures.classic(), graphson) - if schema is CoreGraphSchema: - self.execute_graph(""" - schema.edgeLabel('knows').from('person').to('person').materializedView('person__knows__person_by_in_name'). - ifNotExists().partitionBy('in_name').clusterBy('out_name', Asc).create() - """, graphson) - time.sleep(1) # give some time to the MV to be populated - g = self.fetch_traversal_source(graphson, traversal_class=SocialTraversalSource) - - traversal = g.people("marko", "albert").knows("vadas") - results = self.execute_traversal(traversal, graphson) - self.assertEqual(len(results), 1) - only_vertex = results[0] - schema.ensure_properties(self.session, only_vertex, - execution_profile=self.get_execution_profile(graphson)) - self._validate_classic_vertex(g, only_vertex) - - def _test_bulked_results(self, schema, graphson): - """ - Send a query expecting a bulked result and the driver "undoes" - the bulk and returns the expected list - - @since 1.1.0a1 - @jira_ticket PYTHON-771 - @expected_result the expanded list - - @test_category dse graph - """ - self.execute_graph(schema.fixtures.classic(), graphson) - g = self.fetch_traversal_source(graphson) - barrier_traversal = g.E().label().barrier() - results = self.execute_traversal(barrier_traversal, graphson) - self.assertEqual(sorted(["created", "created", "created", "created", "knows", "knows"]), sorted(results)) - - def _test_udt_with_classes(self, schema, graphson): - class Address(object): - - def __init__(self, address, city, state): - self.address = address - self.city = city - self.state = state - - def __eq__(self, other): - return self.address == other.address and self.city == other.city and self.state == other.state - - class AddressWithTags(object): - - def __init__(self, address, city, state, tags): - self.address = address - self.city = city - self.state = state - self.tags = tags - - def __eq__(self, other): - return (self.address == other.address and self.city == other.city - and self.state == other.state and self.tags == other.tags) - - class ComplexAddress(object): - - def __init__(self, address, address_tags, city, state, props): - self.address = address - self.address_tags = address_tags - self.city = city - self.state = state - self.props = props - - def __eq__(self, other): - return (self.address == other.address and self.address_tags == other.address_tags - and self.city == other.city and self.state == other.state - and self.props == other.props) - - class ComplexAddressWithOwners(object): - - def __init__(self, address, address_tags, city, state, props, owners): - self.address = address - self.address_tags = address_tags - self.city = city - self.state = state - self.props = props - self.owners = owners - - def __eq__(self, other): - return (self.address == other.address and self.address_tags == other.address_tags - and self.city == other.city and self.state == other.state - and self.props == other.props and self.owners == other.owners) - - self.__test_udt(schema, graphson, Address, AddressWithTags, ComplexAddress, ComplexAddressWithOwners) - - def _test_udt_with_namedtuples(self, schema, graphson): - AddressTuple = namedtuple('Address', ('address', 'city', 'state')) - AddressWithTagsTuple = namedtuple('AddressWithTags', ('address', 'city', 'state', 'tags')) - ComplexAddressTuple = namedtuple('ComplexAddress', ('address', 'address_tags', 'city', 'state', 'props')) - ComplexAddressWithOwnersTuple = namedtuple('ComplexAddressWithOwners', ('address', 'address_tags', 'city', - 'state', 'props', 'owners')) - - self.__test_udt(schema, graphson, AddressTuple, AddressWithTagsTuple, - ComplexAddressTuple, ComplexAddressWithOwnersTuple) - - def _write_and_read_data_types(self, schema, graphson, use_schema=True): - g = self.fetch_traversal_source(graphson) - ep = self.get_execution_profile(graphson) - for data in six.itervalues(schema.fixtures.datatypes()): - typ, value, deserializer = data - vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) - if use_schema or schema is CoreGraphSchema: - schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) - - write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id).\ - property(property_name, value) - self.execute_traversal(write_traversal, graphson) - - read_traversal = g.V().hasLabel(str(vertex_label.label)).has(property_name).properties() - results = self.execute_traversal(read_traversal, graphson) - - for result in results: - if result.label == 'pkid': - continue - self._check_equality(g, value, result.value) - - def __test_udt(self, schema, graphson, address_class, address_with_tags_class, - complex_address_class, complex_address_with_owners_class): - if schema is not CoreGraphSchema or DSE_VERSION < Version('6.8'): - raise unittest.SkipTest("Graph UDT is only supported with DSE 6.8+ and Core graphs.") - - ep = self.get_execution_profile(graphson) - - Address = address_class - AddressWithTags = address_with_tags_class - ComplexAddress = complex_address_class - ComplexAddressWithOwners = complex_address_with_owners_class - - # setup udt - self.session.execute_graph(""" - schema.type('address').property('address', Text).property('city', Text).property('state', Text).create(); - schema.type('addressTags').property('address', Text).property('city', Text).property('state', Text). - property('tags', setOf(Text)).create(); - schema.type('complexAddress').property('address', Text).property('address_tags', frozen(typeOf('addressTags'))). - property('city', Text).property('state', Text).property('props', mapOf(Text, Int)).create(); - schema.type('complexAddressWithOwners').property('address', Text). - property('address_tags', frozen(typeOf('addressTags'))). - property('city', Text).property('state', Text).property('props', mapOf(Text, Int)). - property('owners', frozen(listOf(tupleOf(Text, Int)))).create(); - """, execution_profile=ep) - - # wait max 10 seconds to get the UDT discovered. - wait_until_not_raised( - lambda: self.session.cluster.register_user_type(self.graph_name, 'address', Address), - 1, 10) - wait_until_not_raised( - lambda: self.session.cluster.register_user_type(self.graph_name, 'addressTags', AddressWithTags), - 1, 10) - wait_until_not_raised( - lambda: self.session.cluster.register_user_type(self.graph_name, 'complexAddress', ComplexAddress), - 1, 10) - wait_until_not_raised( - lambda: self.session.cluster.register_user_type(self.graph_name, 'complexAddressWithOwners', ComplexAddressWithOwners), - 1, 10) - - data = { - "udt1": ["typeOf('address')", Address('1440 Rd Smith', 'Quebec', 'QC')], - "udt2": ["tupleOf(typeOf('address'), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], - "udt3": ["tupleOf(frozen(typeOf('address')), Text)", (Address('1440 Rd Smith', 'Quebec', 'QC'), 'hello')], - "udt4": ["tupleOf(tupleOf(Int, typeOf('address')), Text)", - ((42, Address('1440 Rd Smith', 'Quebec', 'QC')), 'hello')], - "udt5": ["tupleOf(tupleOf(Int, typeOf('addressTags')), Text)", - ((42, AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'})), 'hello')], - "udt6": ["tupleOf(tupleOf(Int, typeOf('complexAddress')), Text)", - ((42, ComplexAddress('1440 Rd Smith', - AddressWithTags('1440 Rd Smith', 'Quebec', 'QC', {'t1', 't2'}), - 'Quebec', 'QC', {'p1': 42, 'p2': 33})), 'hello')], - "udt7": ["tupleOf(tupleOf(Int, frozen(typeOf('complexAddressWithOwners'))), Text)", - ((42, ComplexAddressWithOwners( - '1440 Rd Smith', - AddressWithTags('1440 CRd Smith', 'Quebec', 'QC', {'t1', 't2'}), - 'Quebec', 'QC', {'p1': 42, 'p2': 33}, [('Mike', 43), ('Gina', 39)]) - ), 'hello')] - } - - g = self.fetch_traversal_source(graphson) - for typ, value in six.itervalues(data): - vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) - schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) - - write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id). \ - property(property_name, value) - self.execute_traversal(write_traversal, graphson) - - #vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] - #vertex_properties = list(schema.get_vertex_properties( - # self.session, vertex, execution_profile=ep)) - - read_traversal = g.V().hasLabel(str(vertex_label.label)).has(property_name).properties() - vertex_properties = self.execute_traversal(read_traversal, graphson) - - self.assertEqual(len(vertex_properties), 2) # include pkid - for vp in vertex_properties: - if vp.label == 'pkid': - continue - - self.assertIsInstance(vp, (VertexProperty, TravVertexProperty)) - self.assertEqual(vp.label, property_name) - self.assertEqual(vp.value, value) - - @staticmethod - def fetch_edge_props(g, edge): - edge_props = g.E(edge.id).properties().toList() - return edge_props - - @staticmethod - def fetch_vertex_props(g, vertex): - - vertex_props = g.V(vertex.id).properties().toList() - return vertex_props - - def _check_equality(self, g, original, read_value): - return check_equality_base(self, original, read_value) - - -@requiredse -class BaseImplicitExecutionTest(GraphUnitTestCase): - """ - This test class will execute all tests of the AbstractTraversalTestClass using implicit execution - This all traversal will be run directly using toList() - """ - def setUp(self): - super(BaseImplicitExecutionTest, self).setUp() - if DSE_VERSION: - self.ep = DseGraph().create_execution_profile(self.graph_name) - self.cluster.add_execution_profile(self.graph_name, self.ep) - - @staticmethod - def fetch_key_from_prop(property): - return property.key - - def fetch_traversal_source(self, graphson, **kwargs): - ep = self.get_execution_profile(graphson, traversal=True) - return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep, **kwargs) - - def execute_traversal(self, traversal, graphson=None): - return traversal.toList() - - def _validate_classic_vertex(self, g, vertex): - # Checks the properties on a classic vertex for correctness - vertex_props = self.fetch_vertex_props(g, vertex) - vertex_prop_keys = [vp.key for vp in vertex_props] - self.assertEqual(len(vertex_prop_keys), 2) - self.assertIn('name', vertex_prop_keys) - self.assertTrue('lang' in vertex_prop_keys or 'age' in vertex_prop_keys) - - def _validate_generic_vertex_result_type(self, g, vertex): - # Checks a vertex object for it's generic properties - properties = self.fetch_vertex_props(g, vertex) - for attr in ('id', 'label'): - self.assertIsNotNone(getattr(vertex, attr)) - self.assertTrue(len(properties) > 2) - - def _validate_classic_edge_properties(self, g, edge): - # Checks the properties on a classic edge for correctness - edge_props = self.fetch_edge_props(g, edge) - edge_prop_keys = [ep.key for ep in edge_props] - self.assertEqual(len(edge_prop_keys), 1) - self.assertIn('weight', edge_prop_keys) - - def _validate_classic_edge(self, g, edge): - self._validate_generic_edge_result_type(edge) - self._validate_classic_edge_properties(g, edge) - - def _validate_line_edge(self, g, edge): - self._validate_generic_edge_result_type(edge) - edge_props = self.fetch_edge_props(g, edge) - edge_prop_keys = [ep.key for ep in edge_props] - self.assertEqual(len(edge_prop_keys), 1) - self.assertIn('distance', edge_prop_keys) - - def _validate_generic_edge_result_type(self, edge): - self.assertIsInstance(edge, TravEdge) - - for attr in ('outV', 'inV', 'label', 'id'): - self.assertIsNotNone(getattr(edge, attr)) - - def _validate_path_result_type(self, g, objects_path): - for obj in objects_path: - if isinstance(obj, TravEdge): - self._validate_classic_edge(g, obj) - elif isinstance(obj, TravVertex): - self._validate_classic_vertex(g, obj) - else: - self.fail("Invalid object found in path " + str(obj.type)) - - def _validate_meta_property(self, g, vertex): - meta_props = g.V(vertex.id).properties().toList() - self.assertEqual(len(meta_props), 1) - meta_prop = meta_props[0] - self.assertEqual(meta_prop.value, "meta_prop") - self.assertEqual(meta_prop.key, "key") - - nested_props = g.V(vertex.id).properties().properties().toList() - self.assertEqual(len(nested_props), 2) - for nested_prop in nested_props: - self.assertTrue(nested_prop.key in ['k0', 'k1']) - self.assertTrue(nested_prop.value in ['v0', 'v1']) - - def _validate_type(self, g, vertex): - props = self.fetch_vertex_props(g, vertex) - for prop in props: - value = prop.value - key = prop.key - _validate_prop(key, value, self) - - -@requiredse -@GraphTestConfiguration.generate_tests(traversal=True) -class ImplicitExecutionTest(BaseImplicitExecutionTest, _AbstractTraversalTest): - def _test_iterate_step(self, schema, graphson): - """ - Test to validate that the iterate() step work on all dse versions. - @jira_ticket PYTHON-1155 - @expected_result iterate step works - @test_category dse graph - """ - - g = self.fetch_traversal_source(graphson) - self.execute_graph(schema.fixtures.classic(), graphson) - g.addV('person').property('name', 'Person1').iterate() - - -@requiredse -@GraphTestConfiguration.generate_tests(traversal=True) -class ImplicitAsyncExecutionTest(BaseImplicitExecutionTest): - """ - Test to validate that the traversal async execution works properly. - - @since 3.21.0 - @jira_ticket PYTHON-1129 - - @test_category dse graph - """ - - def setUp(self): - super(ImplicitAsyncExecutionTest, self).setUp() - self.ep_graphson2, self.ep_graphson3 = create_traversal_profiles(self.cluster, self.graph_name) - - - def _validate_results(self, results): - results = list(results) - self.assertEqual(len(results), 2) - self.assertIn('vadas', results) - self.assertIn('josh', results) - - def _test_promise(self, schema, graphson): - self.execute_graph(schema.fixtures.classic(), graphson) - g = self.fetch_traversal_source(graphson) - traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() - self._validate_results(traversal_future.result()) - - def _test_promise_error_is_propagated(self, schema, graphson): - self.execute_graph(schema.fixtures.classic(), graphson) - g = DseGraph().traversal_source(self.session, 'wrong_graph', execution_profile=self.ep) - traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() - with self.assertRaises(Exception): - traversal_future.result() - - def _test_promise_callback(self, schema, graphson): - self.execute_graph(schema.fixtures.classic(), graphson) - g = self.fetch_traversal_source(graphson) - future = Future() - - def cb(f): - future.set_result(f.result()) - - traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() - traversal_future.add_done_callback(cb) - self._validate_results(future.result()) - - def _test_promise_callback_on_error(self, schema, graphson): - self.execute_graph(schema.fixtures.classic(), graphson) - g = DseGraph().traversal_source(self.session, 'wrong_graph', execution_profile=self.ep) - future = Future() - - def cb(f): - try: - f.result() - except Exception as e: - future.set_exception(e) - - traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() - traversal_future.add_done_callback(cb) - with self.assertRaises(Exception): - future.result() - - -class ExplicitExecutionBase(GraphUnitTestCase): - - def fetch_traversal_source(self, graphson, **kwargs): - ep = self.get_execution_profile(graphson, traversal=True) - return DseGraph().traversal_source(self.session, self.graph_name, execution_profile=ep, **kwargs) - - def execute_traversal(self, traversal, graphson): - ep = self.get_execution_profile(graphson, traversal=True) - ep = self.session.get_execution_profile(ep) - context = None - if graphson == GraphProtocol.GRAPHSON_3_0: - context = { - 'cluster': self.cluster, - 'graph_name': ep.graph_options.graph_name.decode('utf-8') if ep.graph_options.graph_name else None - } - query = DseGraph.query_from_traversal(traversal, graphson, context=context) - # Use an ep that is configured with the correct row factory, and bytecode-json language flat set - result_set = self.execute_graph(query, graphson, traversal=True) - return list(result_set) - - -@requiredse -@GraphTestConfiguration.generate_tests(traversal=True) -class ExplicitExecutionTest(ExplicitExecutionBase, _AbstractTraversalTest): - """ - This test class will execute all tests of the AbstractTraversalTestClass using Explicit execution - All queries will be run by converting them to byte code, and calling execute graph explicitly with a generated ep. - """ - @staticmethod - def fetch_key_from_prop(property): - return property.label - - def _validate_classic_vertex(self, g, vertex): - validate_classic_vertex(self, vertex) - - def _validate_generic_vertex_result_type(self, g, vertex): - validate_generic_vertex_result_type(self, vertex) - - def _validate_classic_edge_properties(self, g, edge): - validate_classic_edge_properties(self, edge) - - def _validate_classic_edge(self, g, edge): - validate_classic_edge(self, edge) - - def _validate_line_edge(self, g, edge): - validate_line_edge(self, edge) - - def _validate_generic_edge_result_type(self, edge): - validate_generic_edge_result_type(self, edge) - - def _validate_type(self, g, vertex): - for key in vertex.properties: - value = vertex.properties[key][0].value - _validate_prop(key, value, self) - - def _validate_path_result_type(self, g, path_obj): - # This pre-processing is due to a change in TinkerPop - # properties are not returned automatically anymore - # with some queries. - for obj in path_obj.objects: - if not obj.properties: - props = [] - if isinstance(obj, Edge): - obj.properties = { - p.key: p.value - for p in self.fetch_edge_props(g, obj) - } - elif isinstance(obj, Vertex): - obj.properties = { - p.label: p.value - for p in self.fetch_vertex_props(g, obj) - } - - validate_path_result_type(self, path_obj) - - def _validate_meta_property(self, g, vertex): - - self.assertEqual(len(vertex.properties), 1) - self.assertEqual(len(vertex.properties['key']), 1) - p = vertex.properties['key'][0] - self.assertEqual(p.label, 'key') - self.assertEqual(p.value, 'meta_prop') - self.assertEqual(p.properties, {'k0': 'v0', 'k1': 'v1'}) - - -def _validate_prop(key, value, unittest): - if key == 'index': - return - - if any(key.startswith(t) for t in ('int', 'short')): - typ = int - - elif any(key.startswith(t) for t in ('long',)): - if sys.version_info >= (3, 0): - typ = int - else: - typ = long - elif any(key.startswith(t) for t in ('float', 'double')): - typ = float - elif any(key.startswith(t) for t in ('polygon',)): - typ = Polygon - elif any(key.startswith(t) for t in ('point',)): - typ = Point - elif any(key.startswith(t) for t in ('Linestring',)): - typ = LineString - elif any(key.startswith(t) for t in ('neg',)): - typ = six.string_types - elif any(key.startswith(t) for t in ('date',)): - typ = datetime.date - elif any(key.startswith(t) for t in ('time',)): - typ = datetime.time - else: - unittest.fail("Received unexpected type: %s" % key) - unittest.assertIsInstance(value, typ) - - @greaterthanorequaldse60 @GraphTestConfiguration.generate_tests(traversal=True) -class BatchStatementTests(ExplicitExecutionBase): +class BatchStatementTests(BaseExplicitExecutionTest): def setUp(self): super(BatchStatementTests, self).setUp() diff --git a/tests/integration/advanced/graph/fluent/test_graph_explicit_execution.py b/tests/integration/advanced/graph/fluent/test_graph_explicit_execution.py new file mode 100644 index 0000000000..1a5846203d --- /dev/null +++ b/tests/integration/advanced/graph/fluent/test_graph_explicit_execution.py @@ -0,0 +1,96 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cassandra.graph import Vertex, Edge + +from tests.integration.advanced.graph import ( + validate_classic_vertex, validate_classic_edge, validate_generic_vertex_result_type, + validate_classic_edge_properties, validate_line_edge, + validate_generic_edge_result_type, validate_path_result_type) + +from tests.integration import requiredse, DSE_VERSION +from tests.integration.advanced import use_single_node_with_graph +from tests.integration.advanced.graph import GraphTestConfiguration +from tests.integration.advanced.graph.fluent import ( + BaseExplicitExecutionTest, _AbstractTraversalTest, _validate_prop) + + +def setup_module(): + if DSE_VERSION: + dse_options = {'graph': {'realtime_evaluation_timeout_in_seconds': 60}} + use_single_node_with_graph(dse_options=dse_options) + + +@requiredse +@GraphTestConfiguration.generate_tests(traversal=True) +class ExplicitExecutionTest(BaseExplicitExecutionTest, _AbstractTraversalTest): + """ + This test class will execute all tests of the AbstractTraversalTestClass using Explicit execution + All queries will be run by converting them to byte code, and calling execute graph explicitly with a generated ep. + """ + @staticmethod + def fetch_key_from_prop(property): + return property.label + + def _validate_classic_vertex(self, g, vertex): + validate_classic_vertex(self, vertex) + + def _validate_generic_vertex_result_type(self, g, vertex): + validate_generic_vertex_result_type(self, vertex) + + def _validate_classic_edge_properties(self, g, edge): + validate_classic_edge_properties(self, edge) + + def _validate_classic_edge(self, g, edge): + validate_classic_edge(self, edge) + + def _validate_line_edge(self, g, edge): + validate_line_edge(self, edge) + + def _validate_generic_edge_result_type(self, edge): + validate_generic_edge_result_type(self, edge) + + def _validate_type(self, g, vertex): + for key in vertex.properties: + value = vertex.properties[key][0].value + _validate_prop(key, value, self) + + def _validate_path_result_type(self, g, path_obj): + # This pre-processing is due to a change in TinkerPop + # properties are not returned automatically anymore + # with some queries. + for obj in path_obj.objects: + if not obj.properties: + props = [] + if isinstance(obj, Edge): + obj.properties = { + p.key: p.value + for p in self.fetch_edge_props(g, obj) + } + elif isinstance(obj, Vertex): + obj.properties = { + p.label: p.value + for p in self.fetch_vertex_props(g, obj) + } + + validate_path_result_type(self, path_obj) + + def _validate_meta_property(self, g, vertex): + + self.assertEqual(len(vertex.properties), 1) + self.assertEqual(len(vertex.properties['key']), 1) + p = vertex.properties['key'][0] + self.assertEqual(p.label, 'key') + self.assertEqual(p.value, 'meta_prop') + self.assertEqual(p.properties, {'k0': 'v0', 'k1': 'v1'}) diff --git a/tests/integration/advanced/graph/fluent/test_graph_implicit_execution.py b/tests/integration/advanced/graph/fluent/test_graph_implicit_execution.py new file mode 100644 index 0000000000..50e6795867 --- /dev/null +++ b/tests/integration/advanced/graph/fluent/test_graph_implicit_execution.py @@ -0,0 +1,108 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from concurrent.futures import Future +from cassandra.datastax.graph.fluent import DseGraph + +from tests.integration import requiredse, DSE_VERSION +from tests.integration.advanced import use_single_node_with_graph +from tests.integration.advanced.graph import GraphTestConfiguration +from tests.integration.advanced.graph.fluent import ( + BaseImplicitExecutionTest, create_traversal_profiles, _AbstractTraversalTest) + + +def setup_module(): + if DSE_VERSION: + dse_options = {'graph': {'realtime_evaluation_timeout_in_seconds': 60}} + use_single_node_with_graph(dse_options=dse_options) + + +@requiredse +@GraphTestConfiguration.generate_tests(traversal=True) +class ImplicitExecutionTest(BaseImplicitExecutionTest, _AbstractTraversalTest): + def _test_iterate_step(self, schema, graphson): + """ + Test to validate that the iterate() step work on all dse versions. + @jira_ticket PYTHON-1155 + @expected_result iterate step works + @test_category dse graph + """ + + g = self.fetch_traversal_source(graphson) + self.execute_graph(schema.fixtures.classic(), graphson) + g.addV('person').property('name', 'Person1').iterate() + + +@requiredse +@GraphTestConfiguration.generate_tests(traversal=True) +class ImplicitAsyncExecutionTest(BaseImplicitExecutionTest): + """ + Test to validate that the traversal async execution works properly. + + @since 3.21.0 + @jira_ticket PYTHON-1129 + + @test_category dse graph + """ + + def setUp(self): + super(ImplicitAsyncExecutionTest, self).setUp() + self.ep_graphson2, self.ep_graphson3 = create_traversal_profiles(self.cluster, self.graph_name) + + def _validate_results(self, results): + results = list(results) + self.assertEqual(len(results), 2) + self.assertIn('vadas', results) + self.assertIn('josh', results) + + def _test_promise(self, schema, graphson): + self.execute_graph(schema.fixtures.classic(), graphson) + g = self.fetch_traversal_source(graphson) + traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() + self._validate_results(traversal_future.result()) + + def _test_promise_error_is_propagated(self, schema, graphson): + self.execute_graph(schema.fixtures.classic(), graphson) + g = DseGraph().traversal_source(self.session, 'wrong_graph', execution_profile=self.ep) + traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() + with self.assertRaises(Exception): + traversal_future.result() + + def _test_promise_callback(self, schema, graphson): + self.execute_graph(schema.fixtures.classic(), graphson) + g = self.fetch_traversal_source(graphson) + future = Future() + + def cb(f): + future.set_result(f.result()) + + traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() + traversal_future.add_done_callback(cb) + self._validate_results(future.result()) + + def _test_promise_callback_on_error(self, schema, graphson): + self.execute_graph(schema.fixtures.classic(), graphson) + g = DseGraph().traversal_source(self.session, 'wrong_graph', execution_profile=self.ep) + future = Future() + + def cb(f): + try: + f.result() + except Exception as e: + future.set_exception(e) + + traversal_future = g.V().has('name', 'marko').out('knows').values('name').promise() + traversal_future.add_done_callback(cb) + with self.assertRaises(Exception): + future.result() From f39467790db9220e56656de49fcddfa3f2601f05 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 12 May 2020 11:26:32 -0400 Subject: [PATCH 060/211] Use PROTOCOL_TLS to select the higher protocol version --- cassandra/datastax/cloud/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cassandra/datastax/cloud/__init__.py b/cassandra/datastax/cloud/__init__.py index f79d72a7a2..1459b28d8c 100644 --- a/cassandra/datastax/cloud/__init__.py +++ b/cassandra/datastax/cloud/__init__.py @@ -23,7 +23,7 @@ _HAS_SSL = True try: - from ssl import SSLContext, PROTOCOL_TLSv1, CERT_REQUIRED + from ssl import SSLContext, PROTOCOL_TLS, CERT_REQUIRED except: _HAS_SSL = False @@ -169,7 +169,7 @@ def parse_metadata_info(config, http_data): def _ssl_context_from_cert(ca_cert_location, cert_location, key_location): - ssl_context = SSLContext(PROTOCOL_TLSv1) + ssl_context = SSLContext(PROTOCOL_TLS) ssl_context.load_verify_locations(ca_cert_location) ssl_context.verify_mode = CERT_REQUIRED ssl_context.load_cert_chain(certfile=cert_location, keyfile=key_location) From ea250fbef856d1f26b89e94d8fc3c6bed9f8386c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 12 May 2020 16:11:37 -0400 Subject: [PATCH 061/211] Add python-* branches and Cython to Per Commit stage --- Jenkinsfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e7a53f12cb..22c0e27de2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -107,7 +107,7 @@ def executeStandardTests() { set +o allexport SIMULACRON_JAR="${HOME}/simulacron.jar" - #SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true ''' sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex @@ -116,7 +116,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true ''' sh label: 'Execute Apache CassandraⓇ integration tests', script: '''#!/bin/bash -lex @@ -125,7 +125,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_cluster.py || true + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true ''' if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { @@ -135,7 +135,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true ''' } @@ -145,7 +145,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true ''' if (env.EXECUTE_LONG_TESTS == 'True') { @@ -155,7 +155,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - #EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true ''' } } @@ -609,7 +609,7 @@ pipeline { } when { beforeAgent true - branch pattern: '((dev|long)-)+python.*', comparator: 'REGEXP' + branch pattern: '((dev|long)-)?python-.*', comparator: 'REGEXP' allOf { expression { params.ADHOC_BUILD_TYPE == 'BUILD' } expression { params.CI_SCHEDULE == 'DO-NOT-CHANGE-THIS-SELECTION' } @@ -630,7 +630,7 @@ pipeline { } axis { name 'CYTHON_ENABLED' - values 'False' + values 'False', 'True' } } From 0419d587f9c670821ab861e921ef1bb2c7dbd471 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 12 May 2020 20:30:33 -0400 Subject: [PATCH 062/211] Use CYTHON_ENABLED to determine the driver should cythonize modules --- Jenkinsfile | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 22c0e27de2..c168189858 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,7 +48,7 @@ def initializeEnvironment() { pip install nose-ignore-docstring nose-exclude service_identity ''' - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { sh label: 'Install cython modules', script: '''#!/bin/bash -lex pip install cython numpy ''' @@ -71,7 +71,7 @@ def initializeEnvironment() { } def installDriverAndCompileExtensions() { - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { sh label: 'Install the driver and compile with C extensions with Cython', script: '''#!/bin/bash -lex python setup.py build_ext --inplace ''' @@ -87,7 +87,7 @@ def executeStandardTests() { * Run the cython unit tests, this is not done in travis because it takes too much time for the * whole matrix to build with cython */ - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { sh label: 'Execute Cython unit tests', script: '''#!/bin/bash -lex # Load CCM environment variables set -o allexport @@ -291,7 +291,7 @@ def describeScheduledTestingStage() { script { def type = params.CI_SCHEDULE.toLowerCase().capitalize() def displayName = "${type} schedule (${env.EVENT_LOOP_MANAGER}" - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { displayName += " | Cython" } if (params.PROFILE != 'NONE') { @@ -303,7 +303,7 @@ def describeScheduledTestingStage() { def serverVersionDescription = "${params.CI_SCHEDULE_SERVER_VERSION.replaceAll(' ', ', ')} server version(s) in the matrix" def pythonVersionDescription = "${params.CI_SCHEDULE_PYTHON_VERSION.replaceAll(' ', ', ')} Python version(s) in the matrix" def description = "${type} scheduled testing using ${env.EVENT_LOOP_MANAGER} event loop manager" - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { description += ", with Cython enabled" } if (params.PROFILE != 'NONE') { @@ -333,7 +333,7 @@ def describeAdhocTestingStage() { } } def displayName = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION} for v${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION} (${env.EVENT_LOOP_MANAGER}" - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { displayName += " | Cython" } if (params.PROFILE != 'NONE') { @@ -343,7 +343,7 @@ def describeAdhocTestingStage() { currentBuild.displayName = displayName def description = "Testing ${serverDisplayName} ${serverVersion} using ${env.EVENT_LOOP_MANAGER} against Python ${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" - if (params.CYTHON) { + if (env.CYTHON_ENABLED == 'True') { description += ", with Cython" } if (params.PROFILE == 'NONE') { @@ -409,8 +409,7 @@ pipeline { 'dse-5.1', // Legacy DataStax Enterprise 'dse-6.0', // Previous DataStax Enterprise 'dse-6.7', // Previous DataStax Enterprise - 'dse-6.8.0', // Current DataStax Enterprise - 'dse-6.8', // Development DataStax Enterprise + 'dse-6.8', // Current DataStax Enterprise 'ALL'], description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY! @@ -456,10 +455,6 @@ pipeline { - - - - @@ -600,6 +595,7 @@ pipeline { EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' + CCM_MAX_HEAP_SIZE = '1024M' } stages { From d6a5e4dd5ee06081eb55caebbf8a61417cafce37 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 13 May 2020 14:44:22 -0400 Subject: [PATCH 063/211] Add python-1238 changelog entry --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index db2d7cb468..4aab9e19e0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +3.24.0 +====== +Not released + +Bug Fixes +--------- +* Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) + 3.23.0 ====== April 6, 2020 From 0b63a242ae03b3c6700ccc9ec3b5d2b6e5cc0a3f Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 13 May 2020 15:27:03 -0400 Subject: [PATCH 064/211] jenkins try to fix the parameterizedCron condition --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index c168189858..1bcb78226b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -547,7 +547,7 @@ pipeline { } triggers { - parameterizedCron(branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(env.GIT_URL).find() ? """ + parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(env.GIT_URL).find()) ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.14 and v3.5.6 H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 From 892340fad6438f270ff8183ec72603340aacce2c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 14 May 2020 11:41:19 -0400 Subject: [PATCH 065/211] Bump geomet dependency to <0.3 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 42bc6d0e9e..f784fba1b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -geomet>=0.1,<0.2 +geomet>=0.1,<0.3 six >=1.9 futures <=2.2.0 # Futures is not required for Python 3, but it works up through 2.2.0 (after which it introduced breaking syntax). diff --git a/setup.py b/setup.py index e157228f56..bee1052e90 100644 --- a/setup.py +++ b/setup.py @@ -404,7 +404,7 @@ def run_setup(extensions): sys.stderr.write("Bypassing Cython setup requirement\n") dependencies = ['six >=1.9', - 'geomet>=0.1,<0.2'] + 'geomet>=0.1,<0.3'] if not PY3: dependencies.append('futures') From 932b535b2ddcbe2bddbdc9456676919cfb92c57f Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 14 May 2020 11:41:44 -0400 Subject: [PATCH 066/211] Uninstall geomet when testing Cassandra --- Jenkinsfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 1bcb78226b..fe5e60652a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -42,6 +42,11 @@ def initializeEnvironment() { sh label: 'Install Apache CassandraⓇ requirements', script: '''#!/bin/bash -lex pip install -r test-requirements.txt ''' + + sh label: 'Uninstall the geomet dependency since it is not required for Cassandra', script: '''#!/bin/bash -lex + pip uninstall -y geomet + ''' + } sh label: 'Install unit test modules', script: '''#!/bin/bash -lex From ffc9e372f9a971edb68365c6ec717309995ee4fa Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 14 May 2020 15:02:39 -0400 Subject: [PATCH 067/211] Make geomet an optional dependency at runtime --- CHANGELOG.rst | 8 ++++++++ Jenkinsfile | 4 ++-- cassandra/util.py | 20 +++++++++++++++++++- tests/unit/advanced/test_geometry.py | 4 ++-- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4aab9e19e0..0df3dd6a20 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,10 +2,18 @@ ====== Not released +Features +-------- +* Make geomet an optional dependency at runtime (PYTHON-1237) + Bug Fixes --------- * Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) +Others +------ +* Bump geomet dependency version to 0.2 (PYTHON-1243) + 3.23.0 ====== April 6, 2020 diff --git a/Jenkinsfile b/Jenkinsfile index fe5e60652a..36151926f9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -552,7 +552,7 @@ pipeline { } triggers { - parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(env.GIT_URL).find()) ? """ + parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(GIT_URL).find()) ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.14 and v3.5.6 H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 @@ -600,7 +600,7 @@ pipeline { EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' - CCM_MAX_HEAP_SIZE = '1024M' + CCM_MAX_HEAP_SIZE = '1536M' } stages { diff --git a/cassandra/util.py b/cassandra/util.py index 0a00533b33..ead58c82f6 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -17,7 +17,6 @@ import datetime from functools import total_ordering import logging -from geomet import wkt from itertools import chain import random import re @@ -25,6 +24,15 @@ import uuid import sys +_HAS_GEOMET = True +try: + from geomet import wkt +except: + _HAS_GEOMET = False + + +from cassandra import DriverException + DATETIME_EPOC = datetime.datetime(1970, 1, 1) UTC_DATETIME_EPOC = datetime.datetime.utcfromtimestamp(0) @@ -35,6 +43,7 @@ assert sys.byteorder in ('little', 'big') is_little_endian = sys.byteorder == 'little' + def datetime_from_timestamp(timestamp): """ Creates a timezone-agnostic datetime from timestamp (in seconds) in a consistent manner. @@ -1308,6 +1317,9 @@ def from_wkt(s): """ Parse a Point geometry from a wkt string and return a new Point object. """ + if not _HAS_GEOMET: + raise DriverException("Geomet is required to deserialize a wkt geometry.") + try: geom = wkt.loads(s) except ValueError: @@ -1363,6 +1375,9 @@ def from_wkt(s): """ Parse a LineString geometry from a wkt string and return a new LineString object. """ + if not _HAS_GEOMET: + raise DriverException("Geomet is required to deserialize a wkt geometry.") + try: geom = wkt.loads(s) except ValueError: @@ -1444,6 +1459,9 @@ def from_wkt(s): """ Parse a Polygon geometry from a wkt string and return a new Polygon object. """ + if not _HAS_GEOMET: + raise DriverException("Geomet is required to deserialize a wkt geometry.") + try: geom = wkt.loads(s) except ValueError: diff --git a/tests/unit/advanced/test_geometry.py b/tests/unit/advanced/test_geometry.py index 7cd8f666e6..4fa2644ff2 100644 --- a/tests/unit/advanced/test_geometry.py +++ b/tests/unit/advanced/test_geometry.py @@ -22,7 +22,7 @@ from cassandra.cqltypes import lookup_casstype from cassandra.protocol import ProtocolVersion from cassandra.cqltypes import PointType, LineStringType, PolygonType, WKBGeometryType -from cassandra.util import Point, LineString, Polygon, _LinearRing, Distance +from cassandra.util import Point, LineString, Polygon, _LinearRing, Distance, _HAS_GEOMET wkb_be = 0 wkb_le = 1 @@ -104,7 +104,7 @@ def test_eq(self): # specifically use assertFalse(eq) to make sure we're using the geo __eq__ operator self.assertFalse(geo == object()) - +@unittest.skipUnless(_HAS_GEOMET, "Skip wkt geometry tests when geomet is not installed") class WKTTest(unittest.TestCase): def test_line_parse(self): From 696b047e90cd3ffc3ddceb9fb5ee84c11e5c9174 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 19 May 2020 13:53:24 -0400 Subject: [PATCH 068/211] Fix writing data in a Boolean field using the Fluent API --- CHANGELOG.rst | 1 + cassandra/datastax/graph/graphson.py | 4 +-- tests/integration/advanced/graph/__init__.py | 2 ++ .../advanced/graph/test_graph_datatype.py | 26 +++++++++++++------ 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0df3dd6a20..6c40a5b8a0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Features Bug Fixes --------- * Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) +* [GRAPH] Can't write data in a Boolean field using the Fluent API (PYTHON-1239) Others ------ diff --git a/cassandra/datastax/graph/graphson.py b/cassandra/datastax/graph/graphson.py index 8419c7992b..956d7d7f18 100644 --- a/cassandra/datastax/graph/graphson.py +++ b/cassandra/datastax/graph/graphson.py @@ -52,7 +52,7 @@ DSE Graph GraphSON 2.0 GraphSON 3.0 | Python Driver ------------ | -------------- | -------------- | ------------ text | string | string | str -boolean | g:Boolean | g:Boolean | bool +boolean | | | bool bigint | g:Int64 | g:Int64 | long int | g:Int32 | g:Int32 | int double | g:Double | g:Double | float @@ -125,7 +125,7 @@ class TextTypeIO(GraphSONTypeIO): class BooleanTypeIO(GraphSONTypeIO): - graphson_base_type = 'Boolean' + graphson_base_type = None cql_type = 'boolean' @classmethod diff --git a/tests/integration/advanced/graph/__init__.py b/tests/integration/advanced/graph/__init__.py index 0573cf2557..6c9458dd02 100644 --- a/tests/integration/advanced/graph/__init__.py +++ b/tests/integration/advanced/graph/__init__.py @@ -418,6 +418,8 @@ class ClassicGraphFixtures(GraphFixtures): @staticmethod def datatypes(): data = { + "boolean1": ["Boolean()", True, None], + "boolean2": ["Boolean()", False, None], "point1": ["Point()", Point(.5, .13), GraphSON1Deserializer.deserialize_point], "point2": ["Point()", Point(-5, .0), GraphSON1Deserializer.deserialize_point], diff --git a/tests/integration/advanced/graph/test_graph_datatype.py b/tests/integration/advanced/graph/test_graph_datatype.py index d4d28b80df..222b1f5ace 100644 --- a/tests/integration/advanced/graph/test_graph_datatype.py +++ b/tests/integration/advanced/graph/test_graph_datatype.py @@ -28,6 +28,7 @@ from cassandra.graph.query import GraphProtocol from cassandra.graph.types import VertexProperty +from tests.util import wait_until from tests.integration.advanced.graph import BasicGraphUnitTestCase, ClassicGraphFixtures, \ ClassicGraphSchema, CoreGraphSchema from tests.integration.advanced.graph import VertexLabel, GraphTestConfiguration, GraphUnitTestCase @@ -94,14 +95,18 @@ def _test_all_datatypes(self, schema, graphson): schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] - vertex_properties = list(schema.get_vertex_properties( - self.session, vertex, execution_profile=ep)) + def get_vertex_properties(): + return list(schema.get_vertex_properties( + self.session, vertex, execution_profile=ep)) + prop_returned = 1 if DSE_VERSION < Version('5.1') else 2 # include pkid >=5.1 + wait_until( + lambda: len(get_vertex_properties()) == prop_returned, 0.2, 15) + + vertex_properties = get_vertex_properties() if graphson == GraphProtocol.GRAPHSON_1_0: vertex_properties = [vp.as_vertex_property() for vp in vertex_properties] - prop_returned = 1 if DSE_VERSION < Version('5.1') else 2 # include pkid >=5.1 - self.assertEqual(len(vertex_properties), prop_returned) for vp in vertex_properties: if vp.label == 'pkid': continue @@ -109,7 +114,7 @@ def _test_all_datatypes(self, schema, graphson): self.assertIsInstance(vp, VertexProperty) self.assertEqual(vp.label, property_name) if graphson == GraphProtocol.GRAPHSON_1_0: - deserialized_value = deserializer(vp.value) + deserialized_value = deserializer(vp.value) if deserializer else vp.value self.assertEqual(deserialized_value, value) else: self.assertEqual(vp.value, value) @@ -171,10 +176,15 @@ def __test_udt(self, schema, graphson, address_class, address_with_tags_class, schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] - vertex_properties = list(schema.get_vertex_properties( - self.session, vertex, execution_profile=ep)) - self.assertEqual(len(vertex_properties), 2) # include pkid + def get_vertex_properties(): + return list(schema.get_vertex_properties( + self.session, vertex, execution_profile=ep)) + + wait_until( + lambda: len(get_vertex_properties()) == 2, 0.2, 15) + + vertex_properties = get_vertex_properties() for vp in vertex_properties: if vp.label == 'pkid': continue From e513c54e5f9fa3cdfd030c82a446ac1421441c0f Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 20 May 2020 09:20:49 -0400 Subject: [PATCH 069/211] Bump gremlinpython dependency to 3.4.6 --- CHANGELOG.rst | 1 + Jenkinsfile | 8 ++++++-- setup.py | 2 +- test-datastax-requirements.txt | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0df3dd6a20..a32679e13b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ Bug Fixes Others ------ * Bump geomet dependency version to 0.2 (PYTHON-1243) +* Bump gremlinpython dependency version to 3.4.6 (PYTHON-1212) 3.23.0 ====== diff --git a/Jenkinsfile b/Jenkinsfile index 36151926f9..774d357c29 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -623,7 +623,11 @@ pipeline { axis { name 'CASSANDRA_VERSION' values '3.11', // Current Apache Cassandra - 'dse-6.8.0' // Current DataStax Enterprise + 'dse-5.0', // Long Term Support DataStax Enterprise + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7', // Previous DataStax Enterprise + 'dse-6.8' // Current DataStax Enterprise } axis { name 'PYTHON_VERSION' @@ -631,7 +635,7 @@ pipeline { } axis { name 'CYTHON_ENABLED' - values 'False', 'True' + values 'False' } } diff --git a/setup.py b/setup.py index bee1052e90..745d05dfb3 100644 --- a/setup.py +++ b/setup.py @@ -410,7 +410,7 @@ def run_setup(extensions): dependencies.append('futures') _EXTRAS_REQUIRE = { - 'graph': ['gremlinpython==3.3.4'] + 'graph': ['gremlinpython==3.4.6'] } setup( diff --git a/test-datastax-requirements.txt b/test-datastax-requirements.txt index 69cc3a9484..3a47b8de16 100644 --- a/test-datastax-requirements.txt +++ b/test-datastax-requirements.txt @@ -1,3 +1,3 @@ -r test-requirements.txt kerberos -gremlinpython==3.3.4 +gremlinpython==3.4.6 From 7dcda6da6331e1b9c7628cd69b5254d4c8791a81 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 20 May 2020 18:42:13 -0400 Subject: [PATCH 070/211] Wait the simulacron http server is ready before running tests --- Jenkinsfile | 9 +++++---- tests/integration/simulacron/utils.py | 4 +++- tests/util.py | 14 ++++++++------ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 774d357c29..9ba138cf4a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -290,6 +290,11 @@ def describePerCommitStage() { currentBuild.displayName = "Per-Commit (${env.EVENT_LOOP_MANAGER} | ${type.capitalize()})" currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.14 and v3.5.6 using ${env.EVENT_LOOP_MANAGER} event loop manager" } + + sh label: 'Describe the python environment', script: '''#!/bin/bash -lex + python -V + pip freeze + ''' } def describeScheduledTestingStage() { @@ -623,10 +628,6 @@ pipeline { axis { name 'CASSANDRA_VERSION' values '3.11', // Current Apache Cassandra - 'dse-5.0', // Long Term Support DataStax Enterprise - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7', // Previous DataStax Enterprise 'dse-6.8' // Current DataStax Enterprise } axis { diff --git a/tests/integration/simulacron/utils.py b/tests/integration/simulacron/utils.py index 870b60bd46..b1d9debaf3 100644 --- a/tests/integration/simulacron/utils.py +++ b/tests/integration/simulacron/utils.py @@ -19,6 +19,7 @@ from cassandra.metadata import SchemaParserV4, SchemaParserDSE68 +from tests.util import wait_until_not_raised from tests.integration import CASSANDRA_VERSION, SIMULACRON_JAR, DSE_VERSION DEFAULT_CLUSTER = "python_simulacron_cluster" @@ -110,7 +111,8 @@ def submit_request(self, query): request.add_header("Content-Type", 'application/json') request.add_header("Content-Length", len(data)) - connection = opener.open(request) + # wait that simulacron is ready and listening + connection = wait_until_not_raised(lambda: opener.open(request), 1, 10) return connection.read().decode('utf-8') def prime_server_versions(self): diff --git a/tests/util.py b/tests/util.py index c5dfd8a387..5c7ac2416f 100644 --- a/tests/util.py +++ b/tests/util.py @@ -15,6 +15,7 @@ import time from functools import wraps + def wait_until(condition, delay, max_attempts): """ Executes a function at regular intervals while the condition @@ -44,22 +45,23 @@ def wait_until_not_raised(condition, delay, max_attempts): """ def wrapped_condition(): try: - condition() + result = condition() except: - return False + return False, None - return True + return True, result attempt = 0 while attempt < (max_attempts-1): attempt += 1 - if wrapped_condition(): - return + success, result = wrapped_condition() + if success: + return result time.sleep(delay) # last attempt, let the exception raise - condition() + return condition() def late(seconds=1): From c2bf6255cb8c6af4a2d34edd914078db8159811d Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 21 May 2020 11:09:58 -0400 Subject: [PATCH 071/211] Add use_default_tempdir cloud config options --- CHANGELOG.rst | 1 + cassandra/datastax/cloud/__init__.py | 3 +- tests/unit/advanced/cloud/test_cloud.py | 42 +++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d6b1971059..7e1033e566 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Not released Features -------- * Make geomet an optional dependency at runtime (PYTHON-1237) +* Add use_default_tempdir cloud config options (PYTHON-1245) Bug Fixes --------- diff --git a/cassandra/datastax/cloud/__init__.py b/cassandra/datastax/cloud/__init__.py index 1459b28d8c..ecb4a73fd4 100644 --- a/cassandra/datastax/cloud/__init__.py +++ b/cassandra/datastax/cloud/__init__.py @@ -97,8 +97,9 @@ def get_cloud_config(cloud_config, create_pyopenssl_context=False): def read_cloud_config_from_zip(cloud_config, create_pyopenssl_context): secure_bundle = cloud_config['secure_connect_bundle'] + use_default_tempdir = cloud_config.get('use_default_tempdir', None) with ZipFile(secure_bundle) as zipfile: - base_dir = os.path.dirname(secure_bundle) + base_dir = tempfile.gettempdir() if use_default_tempdir else os.path.dirname(secure_bundle) tmp_dir = tempfile.mkdtemp(dir=base_dir) try: zipfile.extractall(path=tmp_dir) diff --git a/tests/unit/advanced/cloud/test_cloud.py b/tests/unit/advanced/cloud/test_cloud.py index e6001fb474..ab18f0af72 100644 --- a/tests/unit/advanced/cloud/test_cloud.py +++ b/tests/unit/advanced/cloud/test_cloud.py @@ -6,23 +6,29 @@ # You may obtain a copy of the License at # # http://www.datastax.com/terms/datastax-dse-driver-license-terms +import tempfile +import os +import shutil +import six + try: import unittest2 as unittest except ImportError: import unittest # noqa -import os - +from cassandra import DriverException from cassandra.datastax import cloud from mock import patch +from tests import notwindows class CloudTests(unittest.TestCase): current_path = os.path.dirname(os.path.abspath(__file__)) + creds_path = os.path.join(current_path, './creds.zip') config_zip = { - 'secure_connect_bundle': os.path.join(current_path, './creds.zip') + 'secure_connect_bundle': creds_path } metadata_json = """ {"region":"local", @@ -75,3 +81,33 @@ def test_parse_metadata_info(self): ] for host_id in host_ids: self.assertIn(host_id, config.host_ids) + + @notwindows + def test_use_default_tempdir(self): + tmpdir = tempfile.mkdtemp() + + def clean_tmp_dir(): + os.chmod(tmpdir, 0o777) + shutil.rmtree(tmpdir) + self.addCleanup(clean_tmp_dir) + + tmp_creds_path = os.path.join(tmpdir, 'creds.zip') + shutil.copyfile(self.creds_path, tmp_creds_path) + os.chmod(tmpdir, 0o544) + config = { + 'secure_connect_bundle': tmp_creds_path + } + + # The directory is not writtable.. we expect a permission error + exc = PermissionError if six.PY3 else OSError + with self.assertRaises(exc): + cloud.get_cloud_config(config) + + # With use_default_tempdir, we expect an connection refused + # since the cluster doesn't exist + with self.assertRaises(DriverException): + config = { + 'secure_connect_bundle': tmp_creds_path, + 'use_default_tempdir': True + } + cloud.get_cloud_config(config) From 63525402596b59128e2ed8e7c0bf231976fad2f5 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 21 May 2020 12:26:33 -0400 Subject: [PATCH 072/211] Set the jenkins config to always run the unit tests --- Jenkinsfile | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9ba138cf4a..97c9ad676e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -88,22 +88,17 @@ def installDriverAndCompileExtensions() { } def executeStandardTests() { - /* - * Run the cython unit tests, this is not done in travis because it takes too much time for the - * whole matrix to build with cython - */ - if (env.CYTHON_ENABLED == 'True') { - sh label: 'Execute Cython unit tests', script: '''#!/bin/bash -lex - # Load CCM environment variables - set -o allexport - . ${HOME}/environment.txt - set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=True nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true - EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=True nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true - EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=True nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true - ''' - } + sh label: 'Execute unit tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true + EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true + EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true + ''' sh label: 'Execute Simulacron integration tests', script: '''#!/bin/bash -lex # Load CCM environment variables From dfd06e890c459b4eb28216f6479dd3e97e90f3c5 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 21 May 2020 15:49:02 -0400 Subject: [PATCH 073/211] Document cloud use_default_tempdir --- cassandra/cluster.py | 5 ++++- docs/cloud.rst | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 7120bdb9e1..f69625b1be 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -993,7 +993,10 @@ def default_retry_policy(self, policy): { # path to the secure connect bundle - 'secure_connect_bundle': '/path/to/secure-connect-dbname.zip' + 'secure_connect_bundle': '/path/to/secure-connect-dbname.zip', + + # optional config options + 'use_default_tempdir': True # use the system temp dir for the zip extraction } The zip file will be temporarily extracted in the same directory to diff --git a/docs/cloud.rst b/docs/cloud.rst index e8cc2fa750..7ff7693736 100644 --- a/docs/cloud.rst +++ b/docs/cloud.rst @@ -19,6 +19,25 @@ To connect to a DataStax Astra cluster: cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider) session = cluster.connect() +Cloud Config Options +==================== + +use_default_tempdir ++++++++++++++++++++ + +The secure connect bundle needs to be extracted to load the certificates into the SSLContext. +By default, the zip location is used as the base dir for the extraction. In some environments, +the zip location file system is read-only (e.g Azure Function). With *use_default_tempdir* set to *True*, +the default temporary directory of the system will be used as base dir. + +.. code:: python + + cloud_config = { + 'secure_connect_bundle': '/path/to/secure-connect-dbname.zip', + 'use_default_tempdir': True + } + ... + Astra Differences ================== In most circumstances, the client code for interacting with an Astra cluster will be the same as interacting with any other Cassandra cluster. The exceptions being: From ed7c0eebb0227cd84242d0be07978882d4d7bc1c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 1 Jun 2020 15:44:36 -0400 Subject: [PATCH 074/211] Fix tox utf8 issue and travis lz4 --- .travis.yml | 3 ++- tox.ini | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f1fff4bb63..b485e21227 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,8 @@ addons: - libev-dev install: - - pip install tox-travis lz4 + - pip install tox-travis + - if [[ $TRAVIS_PYTHON_VERSION != pypy3.5 ]]; then pip install lz4; fi script: - tox diff --git a/tox.ini b/tox.ini index 9f0d510045..e71f6294a0 100644 --- a/tox.ini +++ b/tox.ini @@ -18,6 +18,7 @@ deps = {[base]deps} setenv = LIBEV_EMBED=0 CARES_EMBED=0 + LC_ALL=en_US.UTF-8 changedir = {envtmpdir} commands = nosetests --verbosity=2 --no-path-adjustment {toxinidir}/tests/unit/ From 9dab6c2d5015e8f1e3387637c3a0503653dfcbfc Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 1 Jun 2020 15:46:51 -0400 Subject: [PATCH 075/211] Fix travis build image link --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 6bd3e45749..358f588d32 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,8 @@ DataStax Driver for Apache Cassandra ==================================== -.. image:: https://travis-ci.org/datastax/python-driver.png?branch=master - :target: https://travis-ci.org/datastax/python-driver +.. image:: https://travis-ci.com/datastax/python-driver.png?branch=master + :target: https://travis-ci.com/github/datastax/python-driver A modern, `feature-rich `_ and highly-tunable Python client library for Apache Cassandra (2.1+) and DataStax Enterprise (4.7+) using exclusively Cassandra's binary protocol and Cassandra Query Language v3. From 3317f42c2cc5d81083795edfa0986903d818e04c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 1 Jun 2020 16:40:53 -0400 Subject: [PATCH 076/211] Use gevent 1.4 for travis --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index e71f6294a0..fd50a6c1d6 100644 --- a/tox.ini +++ b/tox.ini @@ -25,7 +25,7 @@ commands = nosetests --verbosity=2 --no-path-adjustment {toxinidir}/tests/unit/ [testenv:gevent_loop] deps = {[base]deps} - gevent + gevent>=1.4,<1.5 setenv = LIBEV_EMBED=0 CARES_EMBED=0 @@ -37,7 +37,7 @@ commands = [testenv:eventlet_loop] deps = {[base]deps} - gevent + gevent>=1.4,<1.5 setenv = LIBEV_EMBED=0 CARES_EMBED=0 From a6a66cac2931c4f66d248ed7091d68f6932de5b0 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 2 Jun 2020 09:46:04 -0400 Subject: [PATCH 077/211] ensure the connection max request id's is respected --- cassandra/connection.py | 2 +- cassandra/pool.py | 2 +- .../integration/simulacron/test_connection.py | 40 +++++- tests/unit/test_host_connection_pool.py | 126 ++++++++++-------- 4 files changed, 112 insertions(+), 58 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index f30be682a6..3d154de033 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1443,7 +1443,7 @@ def __init__(self, connection, owner): log.debug("Sending options message heartbeat on idle connection (%s) %s", id(connection), connection.endpoint) with connection.lock: - if connection.in_flight <= connection.max_request_id: + if connection.in_flight < connection.max_request_id: connection.in_flight += 1 connection.send_msg(OptionsMessage(), connection.get_request_id(), self._options_callback) else: diff --git a/cassandra/pool.py b/cassandra/pool.py index 87e8f03716..cd27656046 100644 --- a/cassandra/pool.py +++ b/cassandra/pool.py @@ -418,7 +418,7 @@ def borrow_connection(self, timeout): remaining = timeout while True: with conn.lock: - if conn.in_flight <= conn.max_request_id: + if conn.in_flight < conn.max_request_id: conn.in_flight += 1 return conn, conn.get_request_id() if timeout is not None: diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index afe2685dbf..11bfef7fb7 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -24,7 +24,7 @@ from cassandra import OperationTimedOut from cassandra.cluster import (EXEC_PROFILE_DEFAULT, Cluster, ExecutionProfile, _Scheduler, NoHostAvailable) -from cassandra.policies import HostStateListener, RoundRobinPolicy +from cassandra.policies import HostStateListener, RoundRobinPolicy, WhiteListRoundRobinPolicy from tests import connection_class, thread_pool_executor_class from tests.util import late @@ -32,7 +32,7 @@ from tests.integration.util import assert_quiescent_pool_state # important to import the patch PROTOCOL_VERSION from the simulacron module from tests.integration.simulacron import SimulacronBase, PROTOCOL_VERSION -from cassandra.connection import DEFAULT_CQL_VERSION +from cassandra.connection import DEFAULT_CQL_VERSION, Connection from tests.unit.cython.utils import cythontest from tests.integration.simulacron.utils import (NO_THEN, PrimeOptions, prime_query, prime_request, @@ -475,3 +475,39 @@ def test_driver_recovers_nework_isolation(self): time.sleep(idle_heartbeat_timeout + idle_heartbeat_interval + 2) self.assertIsNotNone(session.execute("SELECT * from system.local")) + + def test_max_in_flight(self): + """ Verify we don't exceed max_in_flight when borrowing connections or sending heartbeats """ + Connection.max_in_flight = 50 + start_and_prime_singledc() + profile = ExecutionProfile(request_timeout=1, load_balancing_policy=WhiteListRoundRobinPolicy(['127.0.0.1'])) + cluster = Cluster( + protocol_version=PROTOCOL_VERSION, + compression=False, + execution_profiles={EXEC_PROFILE_DEFAULT: profile}, + idle_heartbeat_interval=.1, + idle_heartbeat_timeout=.1, + ) + session = cluster.connect(wait_for_all_pools=True) + self.addCleanup(cluster.shutdown) + + query = session.prepare("INSERT INTO table1 (id) VALUES (?)") + + prime_request(PauseReads()) + + futures = [] + # + 50 because simulacron doesn't immediately block all queries + for i in range(Connection.max_in_flight + 50): + futures.append(session.execute_async(query, ['a'])) + + prime_request(ResumeReads()) + + for future in futures: + # We're veryfing we don't get an assertion error from Connection.get_request_id, + # so skip any valid errors + try: + future.result() + except OperationTimedOut: + pass + except NoHostAvailable: + pass diff --git a/tests/unit/test_host_connection_pool.py b/tests/unit/test_host_connection_pool.py index 78af47651b..e62488b400 100644 --- a/tests/unit/test_host_connection_pool.py +++ b/tests/unit/test_host_connection_pool.py @@ -22,11 +22,14 @@ from cassandra.cluster import Session from cassandra.connection import Connection -from cassandra.pool import Host, HostConnectionPool, NoConnectionsAvailable +from cassandra.pool import HostConnection, HostConnectionPool +from cassandra.pool import Host, NoConnectionsAvailable from cassandra.policies import HostDistance, SimpleConvictionPolicy -class HostConnectionPoolTests(unittest.TestCase): +class _PoolTests(unittest.TestCase): + PoolImpl = None + uses_single_connection = None def make_session(self): session = NonCallableMagicMock(spec=Session, keyspace='foobarkeyspace') @@ -41,7 +44,7 @@ def test_borrow_and_return(self): conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=False, max_request_id=100) session.cluster.connection_factory.return_value = conn - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) c, request_id = pool.borrow_connection(timeout=0.01) @@ -51,7 +54,8 @@ def test_borrow_and_return(self): pool.return_connection(conn) self.assertEqual(0, conn.in_flight) - self.assertNotIn(conn, pool._trash) + if not self.uses_single_connection: + self.assertNotIn(conn, pool._trash) def test_failed_wait_for_connection(self): host = Mock(spec=Host, address='ip1') @@ -59,7 +63,7 @@ def test_failed_wait_for_connection(self): conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=False, max_request_id=100) session.cluster.connection_factory.return_value = conn - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) pool.borrow_connection(timeout=0.01) @@ -77,7 +81,7 @@ def test_successful_wait_for_connection(self): conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=False, max_request_id=100, lock=Lock()) session.cluster.connection_factory.return_value = conn - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) pool.borrow_connection(timeout=0.01) @@ -95,48 +99,6 @@ def get_second_conn(): t.join() self.assertEqual(0, conn.in_flight) - def test_all_connections_trashed(self): - host = Mock(spec=Host, address='ip1') - session = self.make_session() - conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=False, max_request_id=100, lock=Lock()) - session.cluster.connection_factory.return_value = conn - session.cluster.get_core_connections_per_host.return_value = 1 - - # manipulate the core connection setting so that we can - # trash the only connection - pool = HostConnectionPool(host, HostDistance.LOCAL, session) - session.cluster.get_core_connections_per_host.return_value = 0 - pool._maybe_trash_connection(conn) - session.cluster.get_core_connections_per_host.return_value = 1 - - submit_called = Event() - - def fire_event(*args, **kwargs): - submit_called.set() - - session.submit.side_effect = fire_event - - def get_conn(): - conn.reset_mock() - c, request_id = pool.borrow_connection(1.0) - self.assertIs(conn, c) - self.assertEqual(1, conn.in_flight) - conn.set_keyspace_blocking.assert_called_once_with('foobarkeyspace') - pool.return_connection(c) - - t = Thread(target=get_conn) - t.start() - - submit_called.wait() - self.assertEqual(1, pool._scheduled_for_creation) - session.submit.assert_called_once_with(pool._create_new_connection) - - # now run the create_new_connection call - pool._create_new_connection() - - t.join() - self.assertEqual(0, conn.in_flight) - def test_spawn_when_at_max(self): host = Mock(spec=Host, address='ip1') session = self.make_session() @@ -147,7 +109,7 @@ def test_spawn_when_at_max(self): # core conns = 1, max conns = 2 session.cluster.get_max_connections_per_host.return_value = 2 - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) pool.borrow_connection(timeout=0.01) @@ -160,7 +122,8 @@ def test_spawn_when_at_max(self): # purposes of this test, as long as it results in a new connection # creation being scheduled self.assertRaises(NoConnectionsAvailable, pool.borrow_connection, 0) - session.submit.assert_called_once_with(pool._create_new_connection) + if not self.uses_single_connection: + session.submit.assert_called_once_with(pool._create_new_connection) def test_return_defunct_connection(self): host = Mock(spec=Host, address='ip1') @@ -169,7 +132,7 @@ def test_return_defunct_connection(self): max_request_id=100, signaled_error=False) session.cluster.connection_factory.return_value = conn - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) pool.borrow_connection(timeout=0.01) @@ -188,7 +151,7 @@ def test_return_defunct_connection_on_down_host(self): max_request_id=100, signaled_error=False) session.cluster.connection_factory.return_value = conn - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) pool.borrow_connection(timeout=0.01) @@ -205,10 +168,11 @@ def test_return_defunct_connection_on_down_host(self): def test_return_closed_connection(self): host = Mock(spec=Host, address='ip1') session = self.make_session() - conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=True, max_request_id=100, signaled_error=False) + conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=True, max_request_id=100, + signaled_error=False) session.cluster.connection_factory.return_value = conn - pool = HostConnectionPool(host, HostDistance.LOCAL, session) + pool = self.PoolImpl(host, HostDistance.LOCAL, session) session.cluster.connection_factory.assert_called_once_with(host.endpoint) pool.borrow_connection(timeout=0.01) @@ -241,3 +205,57 @@ def test_host_equality(self): self.assertEqual(a, b, 'Two Host instances should be equal when sharing.') self.assertNotEqual(a, c, 'Two Host instances should NOT be equal when using two different addresses.') self.assertNotEqual(b, c, 'Two Host instances should NOT be equal when using two different addresses.') + + +class HostConnectionPoolTests(_PoolTests): + PoolImpl = HostConnectionPool + uses_single_connection = False + + def test_all_connections_trashed(self): + host = Mock(spec=Host, address='ip1') + session = self.make_session() + conn = NonCallableMagicMock(spec=Connection, in_flight=0, is_defunct=False, is_closed=False, max_request_id=100, + lock=Lock()) + session.cluster.connection_factory.return_value = conn + session.cluster.get_core_connections_per_host.return_value = 1 + + # manipulate the core connection setting so that we can + # trash the only connection + pool = self.PoolImpl(host, HostDistance.LOCAL, session) + session.cluster.get_core_connections_per_host.return_value = 0 + pool._maybe_trash_connection(conn) + session.cluster.get_core_connections_per_host.return_value = 1 + + submit_called = Event() + + def fire_event(*args, **kwargs): + submit_called.set() + + session.submit.side_effect = fire_event + + def get_conn(): + conn.reset_mock() + c, request_id = pool.borrow_connection(1.0) + self.assertIs(conn, c) + self.assertEqual(1, conn.in_flight) + conn.set_keyspace_blocking.assert_called_once_with('foobarkeyspace') + pool.return_connection(c) + + t = Thread(target=get_conn) + t.start() + + submit_called.wait() + self.assertEqual(1, pool._scheduled_for_creation) + session.submit.assert_called_once_with(pool._create_new_connection) + + # now run the create_new_connection call + pool._create_new_connection() + + t.join() + self.assertEqual(0, conn.in_flight) + + +class HostConnectionTests(_PoolTests): + PoolImpl = HostConnection + uses_single_connection = True + From f9d69f37c4855ba941e945b0cb237c8a9ca0ffbc Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 2 Jun 2020 09:59:17 -0400 Subject: [PATCH 078/211] PYTHON-1196: Add test to verify we can handle TCP backpressure --- Jenkinsfile | 1 + .../simulacron/test_backpressure.py | 179 ++++++++++++++++++ .../integration/simulacron/test_connection.py | 2 +- tests/integration/simulacron/utils.py | 27 +++ 4 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 tests/integration/simulacron/test_backpressure.py diff --git a/Jenkinsfile b/Jenkinsfile index 97c9ad676e..b8a116c6cf 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -184,6 +184,7 @@ def executeEventLoopTests() { "tests/integration/standard/test_connection.py" "tests/integration/standard/test_control_connection.py" "tests/integration/standard/test_metrics.py" + "tests/integration/simulacron/test_backpressure.py" "tests/integration/standard/test_query.py" "tests/integration/simulacron/test_endpoint.py" "tests/integration/long/test_ssl.py" diff --git a/tests/integration/simulacron/test_backpressure.py b/tests/integration/simulacron/test_backpressure.py new file mode 100644 index 0000000000..b7b428f64f --- /dev/null +++ b/tests/integration/simulacron/test_backpressure.py @@ -0,0 +1,179 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +from cassandra import OperationTimedOut +from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT, NoHostAvailable +from cassandra.policies import RoundRobinPolicy, WhiteListRoundRobinPolicy +from tests.integration import requiressimulacron, libevtest +from tests.integration.simulacron import SimulacronBase, PROTOCOL_VERSION +from tests.integration.simulacron.utils import ResumeReads, PauseReads, prime_request, start_and_prime_singledc + + +@requiressimulacron +@libevtest +class TCPBackpressureTests(SimulacronBase): + def setUp(self): + self.callback_successes = 0 + self.callback_errors = 0 + + def callback_success(self, results): + self.callback_successes += 1 + + def callback_error(self, results): + self.callback_errors += 1 + + def _fill_buffers(self, session, query, expected_blocked=3, **execute_kwargs): + futures = [] + buffer = '1' * 50000 + for _ in range(100000): + future = session.execute_async(query, [buffer], **execute_kwargs) + futures.append(future) + + total_blocked = 0 + for pool in session.get_pools(): + if not pool._connection._socket_writable: + total_blocked += 1 + if total_blocked == expected_blocked: + break + else: + raise Exception("Unable to fill TCP send buffer on expected number of nodes") + return futures + + def test_paused_connections(self): + """ Verify all requests come back as expected if node resumes within query timeout """ + start_and_prime_singledc() + profile = ExecutionProfile(request_timeout=500, load_balancing_policy=RoundRobinPolicy()) + cluster = Cluster( + protocol_version=PROTOCOL_VERSION, + compression=False, + execution_profiles={EXEC_PROFILE_DEFAULT: profile}, + ) + session = cluster.connect(wait_for_all_pools=True) + self.addCleanup(cluster.shutdown) + + query = session.prepare("INSERT INTO table1 (id) VALUES (?)") + + prime_request(PauseReads()) + futures = self._fill_buffers(session, query) + + # Make sure we actually have some stuck in-flight requests + for in_flight in [pool._connection.in_flight for pool in session.get_pools()]: + self.assertGreater(in_flight, 100) + time.sleep(.5) + for in_flight in [pool._connection.in_flight for pool in session.get_pools()]: + self.assertGreater(in_flight, 100) + + prime_request(ResumeReads()) + + for future in futures: + try: + future.result() + except NoHostAvailable as e: + # We shouldn't have any timeouts here, but all of the queries beyond what can fit + # in the tcp buffer will have returned with a ConnectionBusy exception + self.assertIn("ConnectionBusy", str(e)) + + # Verify that we can continue sending queries without any problems + for host in session.cluster.metadata.all_hosts(): + session.execute(query, ["a"], host=host) + + def test_queued_requests_timeout(self): + """ Verify that queued requests timeout as expected """ + start_and_prime_singledc() + profile = ExecutionProfile(request_timeout=.1, load_balancing_policy=RoundRobinPolicy()) + cluster = Cluster( + protocol_version=PROTOCOL_VERSION, + compression=False, + execution_profiles={EXEC_PROFILE_DEFAULT: profile}, + ) + session = cluster.connect(wait_for_all_pools=True) + self.addCleanup(cluster.shutdown) + + query = session.prepare("INSERT INTO table1 (id) VALUES (?)") + + prime_request(PauseReads()) + + futures = [] + for i in range(1000): + future = session.execute_async(query, [str(i)]) + future.add_callbacks(callback=self.callback_success, errback=self.callback_error) + futures.append(future) + + successes = 0 + for future in futures: + try: + future.result() + successes += 1 + except OperationTimedOut: + pass + + # Simulacron will respond to a couple queries before cutting off reads, so we'll just verify + # that only "a few" successes happened here + self.assertLess(successes, 50) + self.assertLess(self.callback_successes, 50) + self.assertEqual(self.callback_errors, len(futures) - self.callback_successes) + + def test_cluster_busy(self): + """ Verify that once TCP buffer is full we get busy exceptions rather than timeouts """ + start_and_prime_singledc() + profile = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) + cluster = Cluster( + protocol_version=PROTOCOL_VERSION, + compression=False, + execution_profiles={EXEC_PROFILE_DEFAULT: profile}, + ) + session = cluster.connect(wait_for_all_pools=True) + self.addCleanup(cluster.shutdown) + + query = session.prepare("INSERT INTO table1 (id) VALUES (?)") + + prime_request(PauseReads()) + + # These requests will get stuck in the TCP buffer and we have no choice but to let them time out + self._fill_buffers(session, query, expected_blocked=3) + + # Now that our send buffer is completely full, verify we immediately get busy exceptions rather than timing out + for i in range(1000): + with self.assertRaises(NoHostAvailable) as e: + session.execute(query, [str(i)]) + self.assertIn("ConnectionBusy", str(e.exception)) + + def test_node_busy(self): + """ Verify that once TCP buffer is full, queries continue to get re-routed to other nodes """ + start_and_prime_singledc() + profile = ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) + cluster = Cluster( + protocol_version=PROTOCOL_VERSION, + compression=False, + execution_profiles={EXEC_PROFILE_DEFAULT: profile}, + ) + session = cluster.connect(wait_for_all_pools=True) + self.addCleanup(cluster.shutdown) + + query = session.prepare("INSERT INTO table1 (id) VALUES (?)") + + prime_request(PauseReads(dc_id=0, node_id=0)) + + blocked_profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy(["127.0.0.1"])) + cluster.add_execution_profile('blocked_profile', blocked_profile) + + # Fill our blocked node's tcp buffer until we get a busy exception + self._fill_buffers(session, query, expected_blocked=1, execution_profile='blocked_profile') + + # Now that our send buffer is completely full on one node, + # verify queries get re-routed to other nodes and queries complete successfully + for i in range(1000): + session.execute(query, [str(i)]) + diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index 11bfef7fb7..4ef97247a6 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -39,7 +39,7 @@ start_and_prime_cluster_defaults, start_and_prime_singledc, clear_queries, RejectConnections, - RejectType, AcceptConnections) + RejectType, AcceptConnections, PauseReads, ResumeReads) class TrackDownListener(HostStateListener): diff --git a/tests/integration/simulacron/utils.py b/tests/integration/simulacron/utils.py index b1d9debaf3..ba9573fd23 100644 --- a/tests/integration/simulacron/utils.py +++ b/tests/integration/simulacron/utils.py @@ -338,6 +338,33 @@ def method(self): return "DELETE" +class _PauseOrResumeReads(SimulacronRequest): + def __init__(self, cluster_name=DEFAULT_CLUSTER, dc_id=None, node_id=None): + self.path = "pause-reads/{}".format(cluster_name) + if dc_id is not None: + self.path += "/{}".format(dc_id) + if node_id is not None: + self.path += "/{}".format(node_id) + elif node_id: + raise Exception("Can't set node_id without dc_id") + + @property + def method(self): + raise NotImplementedError() + + +class PauseReads(_PauseOrResumeReads): + @property + def method(self): + return "PUT" + + +class ResumeReads(_PauseOrResumeReads): + @property + def method(self): + return "DELETE" + + def prime_driver_defaults(): """ Function to prime the necessary queries so the test harness can run From e14a7a097fb651b2c1875f7173cfe04fd500889b Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 2 Jun 2020 11:02:51 -0400 Subject: [PATCH 079/211] PYTHON-1248: Libevreactor: Raise ConnectionBusy if tcp send buffer is full --- CHANGELOG.rst | 1 + Jenkinsfile | 2 +- cassandra/cluster.py | 9 ++++++--- cassandra/connection.py | 3 +++ cassandra/io/libevreactor.py | 4 ++++ tests/integration/simulacron/test_backpressure.py | 2 +- 6 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7e1033e566..da1b5dfd0f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ Features -------- * Make geomet an optional dependency at runtime (PYTHON-1237) * Add use_default_tempdir cloud config options (PYTHON-1245) +* Tcp flow control for libevreactor (PYTHON-1248) Bug Fixes --------- diff --git a/Jenkinsfile b/Jenkinsfile index b8a116c6cf..e133beb5db 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -601,7 +601,7 @@ pipeline { EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' - CCM_MAX_HEAP_SIZE = '1536M' + CCM_MAX_HEAP_SIZE = '1280M' } stages { diff --git a/cassandra/cluster.py b/cassandra/cluster.py index f69625b1be..616fb7f147 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -48,7 +48,7 @@ from cassandra.connection import (ConnectionException, ConnectionShutdown, ConnectionHeartbeat, ProtocolVersionUnsupported, EndPoint, DefaultEndPoint, DefaultEndPointFactory, - ContinuousPagingState, SniEndPointFactory) + ContinuousPagingState, SniEndPointFactory, ConnectionBusy) from cassandra.cqltypes import UserType from cassandra.encoder import Encoder from cassandra.protocol import (QueryMessage, ResultMessage, @@ -4445,7 +4445,9 @@ def _query(self, host, message=None, cb=None): except NoConnectionsAvailable as exc: log.debug("All connections for host %s are at capacity, moving to the next host", host) self._errors[host] = exc - return None + except ConnectionBusy as exc: + log.debug("Connection for host %s is busy, moving to the next host", host) + self._errors[host] = exc except Exception as exc: log.debug("Error querying host %s", host, exc_info=True) self._errors[host] = exc @@ -4453,7 +4455,8 @@ def _query(self, host, message=None, cb=None): self._metrics.on_connection_error() if connection: pool.return_connection(connection) - return None + + return None @property def has_more_pages(self): diff --git a/cassandra/connection.py b/cassandra/connection.py index 3d154de033..6ce3e44a30 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -692,6 +692,7 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, self._requests = {} self._iobuf = io.BytesIO() self._continuous_paging_sessions = {} + self._socket_writable = True if ssl_options: self._check_hostname = bool(self.ssl_options.pop('check_hostname', False)) @@ -926,6 +927,8 @@ def send_msg(self, msg, request_id, cb, encoder=ProtocolHandler.encode_message, raise ConnectionShutdown("Connection to %s is defunct" % self.endpoint) elif self.is_closed: raise ConnectionShutdown("Connection to %s is closed" % self.endpoint) + elif not self._socket_writable: + raise ConnectionBusy("Connection %s is overloaded" % self.endpoint) # queue the decoder function with the request # this allows us to inject custom functions per request to encode, decode messages diff --git a/cassandra/io/libevreactor.py b/cassandra/io/libevreactor.py index 2487419784..917e16aea8 100644 --- a/cassandra/io/libevreactor.py +++ b/cassandra/io/libevreactor.py @@ -310,6 +310,8 @@ def handle_write(self, watcher, revents, errno=None): with self._deque_lock: next_msg = self.deque.popleft() except IndexError: + if not self._socket_writable: + self._socket_writable = True return try: @@ -317,6 +319,8 @@ def handle_write(self, watcher, revents, errno=None): except socket.error as err: if (err.args[0] in NONBLOCKING or err.args[0] in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE)): + if err.args[0] in NONBLOCKING: + self._socket_writable = False with self._deque_lock: self.deque.appendleft(next_msg) else: diff --git a/tests/integration/simulacron/test_backpressure.py b/tests/integration/simulacron/test_backpressure.py index b7b428f64f..69c38da8fe 100644 --- a/tests/integration/simulacron/test_backpressure.py +++ b/tests/integration/simulacron/test_backpressure.py @@ -45,7 +45,7 @@ def _fill_buffers(self, session, query, expected_blocked=3, **execute_kwargs): for pool in session.get_pools(): if not pool._connection._socket_writable: total_blocked += 1 - if total_blocked == expected_blocked: + if total_blocked >= expected_blocked: break else: raise Exception("Unable to fill TCP send buffer on expected number of nodes") From 748a5a4b91790f61046f875be269ce44ffb4c7d1 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 2 Jun 2020 14:19:30 -0400 Subject: [PATCH 080/211] Update all python patch versions --- Jenkinsfile | 72 ++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e133beb5db..152ad629b1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -284,7 +284,7 @@ def describePerCommitStage() { } currentBuild.displayName = "Per-Commit (${env.EVENT_LOOP_MANAGER} | ${type.capitalize()})" - currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.14 and v3.5.6 using ${env.EVENT_LOOP_MANAGER} event loop manager" + currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.18 and v3.5.9 using ${env.EVENT_LOOP_MANAGER} event loop manager" } sh label: 'Describe the python environment', script: '''#!/bin/bash -lex @@ -402,7 +402,7 @@ pipeline {
dse-6.7 DataStax Enterprise v6.7.x
dse-6.8.0DataStax Enterprise v6.8.0
dse-6.8 DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
''') choice( name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION', - choices: ['2.7.14', '3.4.9', '3.5.6', '3.6.6', '3.7.4', '3.8.0'], + choices: ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], description: 'Python version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY!') choice( name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION', @@ -555,43 +555,43 @@ pipeline { triggers { parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(GIT_URL).find()) ? """ # Every weeknight (Monday - Friday) around 4:00 AM - # These schedules will run with and without Cython enabled for Python v2.7.14 and v3.5.6 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 # Every Saturday around 12:00, 4:00 and 8:00 PM - # These schedules are for weekly libev event manager runs with and without Cython for most of the Python versions (excludes v3.5.6.x) - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.4.9;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly gevent event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly eventlet event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly libev event manager runs with and without Cython for most of the Python versions (excludes v3.5.9.x) + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.4.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly gevent event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly eventlet event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 # Every Sunday around 12:00 and 4:00 AM - # These schedules are for weekly asyncore event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly twisted event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.9.x) - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.14;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.6;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.4;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.0;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly asyncore event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly twisted event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 """ : "") } @@ -628,7 +628,7 @@ pipeline { } axis { name 'PYTHON_VERSION' - values '2.7.14', '3.5.6' + values '2.7.18', '3.5.9' } axis { name 'CYTHON_ENABLED' From bbdefc7e15f599c9179ea26f4fc31ce117e76b09 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 4 Jun 2020 15:05:51 -0400 Subject: [PATCH 081/211] Avoid memory issue by running backpressure tests separately --- Jenkinsfile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 152ad629b1..49b5277b64 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -107,7 +107,13 @@ def executeStandardTests() { set +o allexport SIMULACRON_JAR="${HOME}/simulacron.jar" - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + + # Run backpressure tests separately to avoid memory issue + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true ''' sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex @@ -184,7 +190,6 @@ def executeEventLoopTests() { "tests/integration/standard/test_connection.py" "tests/integration/standard/test_control_connection.py" "tests/integration/standard/test_metrics.py" - "tests/integration/simulacron/test_backpressure.py" "tests/integration/standard/test_query.py" "tests/integration/simulacron/test_endpoint.py" "tests/integration/long/test_ssl.py" From 64c302ee3b6d55174dedae15abc6222a921668a0 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 5 Jun 2020 14:16:00 -0400 Subject: [PATCH 082/211] Fix graph elementMap() result deserialization --- CHANGELOG.rst | 1 + Jenkinsfile | 2 +- cassandra/datastax/graph/__init__.py | 2 +- cassandra/datastax/graph/graphson.py | 14 ++++- cassandra/datastax/graph/types.py | 51 ++++++++++++++++++- docs/api/cassandra/datastax/graph/index.rst | 3 ++ .../advanced/graph/test_graph_query.py | 23 ++++++++- 7 files changed, 90 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index da1b5dfd0f..d0d6d6d695 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Bug Fixes --------- * Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) * [GRAPH] Can't write data in a Boolean field using the Fluent API (PYTHON-1239) +* [GRAPH] Fix elementMap() result deserialization (PYTHON-1233) Others ------ diff --git a/Jenkinsfile b/Jenkinsfile index 49b5277b64..87b20804ca 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -606,7 +606,7 @@ pipeline { EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' - CCM_MAX_HEAP_SIZE = '1280M' + CCM_MAX_HEAP_SIZE = '1536M' } stages { diff --git a/cassandra/datastax/graph/__init__.py b/cassandra/datastax/graph/__init__.py index d828c7f707..11785c84f6 100644 --- a/cassandra/datastax/graph/__init__.py +++ b/cassandra/datastax/graph/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. -from cassandra.datastax.graph.types import Element, Vertex, VertexProperty, Edge, Path +from cassandra.datastax.graph.types import Element, Vertex, VertexProperty, Edge, Path, T from cassandra.datastax.graph.query import ( GraphOptions, GraphProtocol, GraphStatement, SimpleGraphStatement, Result, graph_object_row_factory, single_object_row_factory, diff --git a/cassandra/datastax/graph/graphson.py b/cassandra/datastax/graph/graphson.py index 956d7d7f18..4b333eb1bf 100644 --- a/cassandra/datastax/graph/graphson.py +++ b/cassandra/datastax/graph/graphson.py @@ -34,7 +34,7 @@ from cassandra.cqltypes import cql_types_from_string from cassandra.metadata import UserType from cassandra.util import Polygon, Point, LineString, Duration -from cassandra.datastax.graph.types import Vertex, VertexProperty, Edge, Path +from cassandra.datastax.graph.types import Vertex, VertexProperty, Edge, Path, T __all__ = ['GraphSON1Serializer', 'GraphSON1Deserializer', 'GraphSON1TypeDeserializer', 'GraphSON2Serializer', 'GraphSON2Deserializer', 'GraphSON2Reader', @@ -745,6 +745,15 @@ def deserialize(cls, value, reader=None): return udt_class(**dict(kwargs)) +class TTypeIO(GraphSONTypeIO): + prefix = 'g' + graphson_base_type = 'T' + + @classmethod + def deserialize(cls, value, reader=None): + return T.name_to_value[value] + + class _BaseGraphSONSerializer(object): _serializers = OrderedDict() @@ -1120,7 +1129,8 @@ def get_serializer(self, value): class GraphSON3Deserializer(GraphSON2Deserializer): _TYPES = GraphSON2Deserializer._TYPES + [MapTypeIO, ListTypeIO, SetTypeIO, TupleTypeIO, - UserTypeIO, DseDurationTypeIO, BulkSetTypeIO] + UserTypeIO, DseDurationTypeIO, + TTypeIO, BulkSetTypeIO] _deserializers = {t.graphson_type: t for t in _TYPES} diff --git a/cassandra/datastax/graph/types.py b/cassandra/datastax/graph/types.py index ae22cd4bfe..9817c99d7d 100644 --- a/cassandra/datastax/graph/types.py +++ b/cassandra/datastax/graph/types.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = ['Element', 'Vertex', 'Edge', 'VertexProperty', 'Path'] +__all__ = ['Element', 'Vertex', 'Edge', 'VertexProperty', 'Path', 'T'] class Element(object): @@ -159,3 +159,52 @@ def __str__(self): def __repr__(self): return "%s(%r, %r)" % (self.__class__.__name__, self.labels, [o.value for o in self.objects]) + + +class T(object): + """ + Represents a collection of tokens for more concise Traversal definitions. + """ + + name = None + val = None + + # class attributes + id = None + """ + """ + + key = None + """ + """ + label = None + """ + """ + value = None + """ + """ + + def __init__(self, name, val): + self.name = name + self.val = val + + def __str__(self): + return self.name + + def __repr__(self): + return "T.%s" % (self.name, ) + + +T.id = T("id", 1) +T.id_ = T("id_", 2) +T.key = T("key", 3) +T.label = T("label", 4) +T.value = T("value", 5) + +T.name_to_value = { + 'id': T.id, + 'id_': T.id_, + 'key': T.key, + 'label': T.label, + 'value': T.value +} diff --git a/docs/api/cassandra/datastax/graph/index.rst b/docs/api/cassandra/datastax/graph/index.rst index 18a0e7c511..dafd5f65fd 100644 --- a/docs/api/cassandra/datastax/graph/index.rst +++ b/docs/api/cassandra/datastax/graph/index.rst @@ -81,6 +81,9 @@ .. autoclass:: Path :members: +.. autoclass:: T + :members: + .. autoclass:: GraphSON1Serializer :members: diff --git a/tests/integration/advanced/graph/test_graph_query.py b/tests/integration/advanced/graph/test_graph_query.py index 1ccfc4a90c..0eda67894d 100644 --- a/tests/integration/advanced/graph/test_graph_query.py +++ b/tests/integration/advanced/graph/test_graph_query.py @@ -35,8 +35,9 @@ from cassandra.graph import (SimpleGraphStatement, single_object_row_factory, Result, GraphOptions, GraphProtocol, to_bigint) from cassandra.datastax.graph.query import _graph_options +from cassandra.datastax.graph.types import T -from tests.integration import DSE_VERSION, requiredse +from tests.integration import DSE_VERSION, requiredse, greaterthanorequaldse68 from tests.integration.advanced.graph import BasicGraphUnitTestCase, GraphTestConfiguration, \ validate_classic_vertex, GraphUnitTestCase, validate_classic_edge, validate_path_result_type, \ validate_line_edge, validate_generic_vertex_result_type, \ @@ -542,6 +543,26 @@ def _test_query_bulkset(self, schema, graphson): self.assertEqual(len(results), 5) self.assertEqual(results.count(35), 2) + @greaterthanorequaldse68 + def _test_elementMap_query(self, schema, graphson): + """ + Test to validate that an elementMap can be serialized properly. + """ + self.execute_graph(schema.fixtures.classic(), graphson) + rs = self.execute_graph('''g.V().has('name','marko').elementMap()''', graphson) + results_list = self.resultset_to_list(rs) + self.assertEqual(len(results_list), 1) + row = results_list[0] + if graphson == GraphProtocol.GRAPHSON_3_0: + self.assertIn(T.id, row) + self.assertIn(T.label, row) + if schema is CoreGraphSchema: + self.assertEqual(row[T.id], 'dseg:/person/marko') + self.assertEqual(row[T.label], 'person') + else: + self.assertIn('id', row) + self.assertIn('label', row) + @GraphTestConfiguration.generate_tests(schema=ClassicGraphSchema) class ClassicGraphQueryTest(GenericGraphQueryTest): From 86168e03869394a4b3d285aba98b5624d63ddbd2 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 8 Jun 2020 15:03:26 -0400 Subject: [PATCH 083/211] Fix PlainTextAuthProvider fails with unicode chars and Python3 --- CHANGELOG.rst | 1 + cassandra/auth.py | 3 ++- tests/unit/test_auth.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_auth.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d0d6d6d695..5549ac50af 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,7 @@ Features Bug Fixes --------- * Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) +* PlainTextAuthProvider fails with unicode chars and Python3 (PYTHON-1241) * [GRAPH] Can't write data in a Boolean field using the Fluent API (PYTHON-1239) * [GRAPH] Fix elementMap() result deserialization (PYTHON-1233) diff --git a/cassandra/auth.py b/cassandra/auth.py index 910592f7ac..3d2f751ac0 100644 --- a/cassandra/auth.py +++ b/cassandra/auth.py @@ -277,7 +277,8 @@ def get_initial_challenge(self): def evaluate_challenge(self, challenge): if challenge == six.b('PLAIN-START'): - return six.b("\x00%s\x00%s" % (self.username, self.password)) + data = "\x00%s\x00%s" % (self.username, self.password) + return data if six.PY2 else data.encode() raise Exception('Did not receive a valid challenge response from server') diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py new file mode 100644 index 0000000000..7b4196f831 --- /dev/null +++ b/tests/unit/test_auth.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# # Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +from cassandra.auth import PlainTextAuthenticator + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + + +class TestPlainTextAuthenticator(unittest.TestCase): + + def test_evaluate_challenge_with_unicode_data(self): + authenticator = PlainTextAuthenticator("johnӁ", "doeӁ") + self.assertEqual( + authenticator.evaluate_challenge(six.ensure_binary('PLAIN-START')), + six.ensure_binary("\x00johnӁ\x00doeӁ") + ) From e3d5515687c15ab9d6026353436a32c485bbdaf9 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 9 Jun 2020 13:18:04 -0400 Subject: [PATCH 084/211] Fix Graph execution profiles consistency level are not set to LOCAL_QUORUM for a cloud cluster --- CHANGELOG.rst | 1 + cassandra/cluster.py | 4 ++-- tests/integration/cloud/test_cloud.py | 13 +++++++++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5549ac50af..dde4c316dd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Bug Fixes --------- * Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) * PlainTextAuthProvider fails with unicode chars and Python3 (PYTHON-1241) +* Graph execution profiles consistency level are not set to LOCAL_QUORUM with a cloud cluster (PYTHON-1240) * [GRAPH] Can't write data in a Boolean field using the Fluent API (PYTHON-1239) * [GRAPH] Fix elementMap() result deserialization (PYTHON-1233) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 616fb7f147..c4d6de124d 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -418,7 +418,7 @@ class GraphExecutionProfile(ExecutionProfile): """ def __init__(self, load_balancing_policy=_NOT_SET, retry_policy=None, - consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, + consistency_level=_NOT_SET, serial_consistency_level=None, request_timeout=30.0, row_factory=None, graph_options=None, continuous_paging_options=_NOT_SET): """ @@ -443,7 +443,7 @@ def __init__(self, load_balancing_policy=_NOT_SET, retry_policy=None, class GraphAnalyticsExecutionProfile(GraphExecutionProfile): def __init__(self, load_balancing_policy=None, retry_policy=None, - consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, + consistency_level=_NOT_SET, serial_consistency_level=None, request_timeout=3600. * 24. * 7., row_factory=None, graph_options=None): """ diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index ef76b71303..5b9b268f5c 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -23,7 +23,7 @@ from ssl import SSLContext, PROTOCOL_TLSv1 from cassandra import DriverException, ConsistencyLevel, InvalidRequest -from cassandra.cluster import NoHostAvailable, ExecutionProfile, Cluster +from cassandra.cluster import NoHostAvailable, ExecutionProfile, Cluster, _execution_profile_to_string from cassandra.connection import SniEndPoint from cassandra.auth import PlainTextAuthProvider from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, ConstantReconnectionPolicy @@ -160,7 +160,16 @@ def test_metadata_ssl_error(self): def test_default_consistency(self): self.connect(self.creds) self.assertEqual(self.session.default_consistency_level, ConsistencyLevel.LOCAL_QUORUM) - self.assertEqual(self.cluster.profile_manager.default.consistency_level, ConsistencyLevel.LOCAL_QUORUM) + # Verify EXEC_PROFILE_DEFAULT, EXEC_PROFILE_GRAPH_DEFAULT, + # EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT, EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT + for ep_key in six.iterkeys(self.cluster.profile_manager.profiles): + ep = self.cluster.profile_manager.profiles[ep_key] + self.assertEqual( + ep.consistency_level, + ConsistencyLevel.LOCAL_QUORUM, + "Expecting LOCAL QUORUM for profile {}, but got {} instead".format( + _execution_profile_to_string(ep_key), ConsistencyLevel.value_to_name[ep.consistency_level] + )) def test_default_consistency_of_execution_profiles(self): cloud_config = {'secure_connect_bundle': self.creds} From 9f444b415cf5e9e4e48f5f620772cbfed9f36e5b Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 15 Jun 2020 08:25:48 -0400 Subject: [PATCH 085/211] In some cases, socket.write() return 0 as sent instead of raising NONBLOCKING --- cassandra/io/libevreactor.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cassandra/io/libevreactor.py b/cassandra/io/libevreactor.py index 917e16aea8..54e2d0de03 100644 --- a/cassandra/io/libevreactor.py +++ b/cassandra/io/libevreactor.py @@ -330,6 +330,11 @@ def handle_write(self, watcher, revents, errno=None): if sent < len(next_msg): with self._deque_lock: self.deque.appendleft(next_msg[sent:]) + # we've seen some cases that 0 is returned instead of NONBLOCKING. But usually, + # we don't expect this to happen. https://bugs.python.org/issue20951 + if sent == 0: + self._socket_writable = False + return def handle_read(self, watcher, revents, errno=None): if revents & libev.EV_ERROR: From 5cbbd1abd3bf9a84994f7b5ca495e80e586118a9 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 15 Jun 2020 14:50:07 -0400 Subject: [PATCH 086/211] Tests: Set MAX HEAP to 1500M when create a cluster with graph workload --- tests/integration/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 52b1286ebb..1c40f9bd46 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -566,7 +566,9 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, # This will enable the Mirroring query handler which will echo our custom payload k,v pairs back - if 'graph' not in workloads: + if 'graph' in workloads: + jvm_args += ['-Xms1500M', '-Xmx1500M'] + else: if PROTOCOL_VERSION >= 4: jvm_args = [" -Dcassandra.custom_query_handler_class=org.apache.cassandra.cql3.CustomPayloadMirroringQueryHandler"] if len(workloads) > 0: From f58ff459199ec6a2cc539acdf1613a808b6f317d Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 15 Jun 2020 16:15:00 -0400 Subject: [PATCH 087/211] Improve graph documentation for the core engine --- docs/graph_fluent.rst | 92 ++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/docs/graph_fluent.rst b/docs/graph_fluent.rst index fbe0ef57df..03cf8d36c0 100644 --- a/docs/graph_fluent.rst +++ b/docs/graph_fluent.rst @@ -27,7 +27,19 @@ hard to maintain. This fluent API allows you to build Gremlin traversals and wri queries directly in Python. These native traversal queries can be executed explicitly, with a `Session` object, or implicitly:: - g = DseGraph.traversal_source(session=dse_session) + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.datastax.graph import GraphProtocol + from cassandra.datastax.graph.fluent import DseGraph + + # Create an execution profile, using GraphSON3 for Core graphs + ep_graphson3 = DseGraph.create_execution_profile( + 'my_core_graph_name', + graph_protocol=GraphProtocol.GRAPHSON_3_0) + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson3}) + session = cluster.connect() + + # Execute a fluent graph query + g = DseGraph.traversal_source(session=session) g.addV('genre').property('genreId', 1).property('name', 'Action').next() # implicit execution caused by iterating over results @@ -50,15 +62,24 @@ Configuring a Traversal Execution Profile The fluent api takes advantage of *configuration profiles* to allow different execution configurations for the various query handlers. Graph traversal execution requires a custom execution profile to enable Gremlin-bytecode as -query language. Here is how to accomplish this configuration: +query language. With Core graphs, it is important to use GraphSON3. Here is how +to accomplish this configuration: .. code-block:: python from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.datastax.graph import GraphProtocol from cassandra.datastax.graph.fluent import DseGraph - ep = DseGraph.create_execution_profile('graph_name') - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) + # Using GraphSON3 as graph protocol is a requirement with Core graphs. + ep = DseGraph.create_execution_profile( + 'graph_name', + graph_protocol=GraphProtocol.GRAPHSON_3_0) + + # For Classic graphs, GraphSON1, GraphSON2 and GraphSON3 (DSE 6.8+) are supported. + ep_classic = DseGraph.create_execution_profile('classic_graph_name') # default is GraphSON2 + + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep, 'classic': ep_classic}) session = cluster.connect() g = DseGraph.traversal_source(session) # Build the GraphTraversalSource @@ -71,27 +92,6 @@ If you want to change execution property defaults, please see the :doc:`Executio for a more generalized discussion of the API. Graph traversal queries use the same execution profile defined for DSE graph. If you need to change the default properties, please refer to the :doc:`DSE Graph query documentation page ` -Configuring a Traversal Execution Profile for the Core graph engine -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To execute a traversal query with graphs that use the core engine, you need to configure -a graphson3 execution profile: - -.. code-block:: python - from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT - from cassandra.datastax.graph import GraphProtocol - from cassandra.datastax.graph.fluent import DseGraph - - ep_graphson3 = DseGraph.create_execution_profile( - 'my_core_graph_name', - graph_protocol=GraphProtocol.GRAPHSON_3_0 - ) - cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep_graphson3}) - - g = DseGraph.traversal_source(session) - print g.V().toList() - - Explicit Graph Traversal Execution with a DSE Session ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -101,19 +101,28 @@ Below is an example of explicit execution. For this example, assume the schema h .. code-block:: python + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT + from cassandra.datastax.graph import GraphProtocol from cassandra.datastax.graph.fluent import DseGraph from pprint import pprint - # create a tinkerpop graphson2 ExecutionProfile - ep = DseGraph.create_execution_profile('graph_name') + ep = DseGraph.create_execution_profile( + 'graph_name', + graph_protocol=GraphProtocol.GRAPHSON_3_0) cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) session = cluster.connect() g = DseGraph.traversal_source(session=session) + +Convert a traversal to a bytecode query for classic graphs:: + addV_query = DseGraph.query_from_traversal( - g.addV('genre').property('genreId', 1).property('name', 'Action') + g.addV('genre').property('genreId', 1).property('name', 'Action'), + graph_protocol=GraphProtocol.GRAPHSON_3_0 ) - v_query = DseGraph.query_from_traversal(g.V()) + v_query = DseGraph.query_from_traversal( + g.V(), + graph_protocol=GraphProtocol.GRAPHSON_3_0) for result in session.execute_graph(addV_query): pprint(result.value) @@ -124,7 +133,6 @@ Converting a traversal to a bytecode query for core graphs require some more wor need the cluster context for UDT and tuple types: .. code-block:: python - g = DseGraph.traversal_source(session=session) context = { 'cluster': cluster, 'graph_name': 'the_graph_for_the_query' @@ -135,6 +143,9 @@ need the cluster context for UDT and tuple types: context=context ) + for result in session.execute_graph(addV_query): + pprint(result.value) + Implicit Graph Traversal Execution with TinkerPop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -185,19 +196,18 @@ python `Future `, you need to bound the batch to a DSE session:: - batch = DseGraph.batch(session, 'graphson2') # bound the session and execution profile + batch = DseGraph.batch(session, 'graphson3') # bound the session and execution profile batch.add( g.addV('genre').property('genreId', 1).property('name', 'Action')) From 1592728ae4e70ffb16b90b3675268f0186a5e9f7 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 15 Jun 2020 19:46:53 -0400 Subject: [PATCH 088/211] Set resource_manager_options.worker_options.cores_total for DSE >=6.8 --- tests/integration/__init__.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 1c40f9bd46..207b48b098 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -534,7 +534,17 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, } }) if 'spark' in workloads: - config_options = {"initial_spark_worker_resources": 0.1} + if Version(dse_version) >= Version('6.8'): + config_options = { + "resource_manager_options": { + "worker_options": { + "cores_total": 0.1 + } + } + } + else: + config_options = {"initial_spark_worker_resources": 0.1} + if Version(dse_version) >= Version('6.7'): log.debug("Disabling AlwaysON SQL for a DSE 6.7 Cluster") config_options['alwayson_sql_options'] = {'enabled': False} From d42ac88e25d2e051d35a38e89f9be7a527cff761 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 16 Jun 2020 11:46:58 -0400 Subject: [PATCH 089/211] Set resource_manager_options.worker_options.memory_total for DSE >=6.8 --- tests/integration/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 207b48b098..1e1f582804 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -538,7 +538,8 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, config_options = { "resource_manager_options": { "worker_options": { - "cores_total": 0.1 + "cores_total": 0.1, + "memory_total": "64M" } } } From a1ba1f3c2905a625b4c1b05861ab687b2b4f46e9 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 16 Jun 2020 14:16:05 -0400 Subject: [PATCH 090/211] Use ccm cassandra-test branch for Windows to get use_single_interface support --- appveyor/appveyor.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor/appveyor.ps1 b/appveyor/appveyor.ps1 index cc1e6aa76f..5f6840e4e1 100644 --- a/appveyor/appveyor.ps1 +++ b/appveyor/appveyor.ps1 @@ -54,7 +54,7 @@ Start-Process python -ArgumentList "-m pip install psutil pyYaml six numpy" -Wai # Clone ccm from git and use master. If (!(Test-Path $env:CCM_PATH)) { - Start-Process git -ArgumentList "clone https://github.com/pcmanus/ccm.git $($env:CCM_PATH)" -Wait -NoNewWindow + Start-Process git -ArgumentList "clone -b cassandra-test https://github.com/pcmanus/ccm.git $($env:CCM_PATH)" -Wait -NoNewWindow } From e0b7e73c805a3581cc773cb49ff7a6933fa309c5 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 17 Jun 2020 13:43:48 -0400 Subject: [PATCH 091/211] release 3.24: changelog & version --- CHANGELOG.rst | 5 +++-- cassandra/__init__.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dde4c316dd..53a5e22436 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,6 @@ 3.24.0 ====== -Not released +June 18, 2020 Features -------- @@ -12,7 +12,7 @@ Bug Fixes --------- * Unable to connect to a cloud cluster using Ubuntu 20.04 (PYTHON-1238) * PlainTextAuthProvider fails with unicode chars and Python3 (PYTHON-1241) -* Graph execution profiles consistency level are not set to LOCAL_QUORUM with a cloud cluster (PYTHON-1240) +* [GRAPH] Graph execution profiles consistency level are not set to LOCAL_QUORUM with a cloud cluster (PYTHON-1240) * [GRAPH] Can't write data in a Boolean field using the Fluent API (PYTHON-1239) * [GRAPH] Fix elementMap() result deserialization (PYTHON-1233) @@ -20,6 +20,7 @@ Others ------ * Bump geomet dependency version to 0.2 (PYTHON-1243) * Bump gremlinpython dependency version to 3.4.6 (PYTHON-1212) +* Improve fluent graph documentation for core graphs (PYTHON-1244) 3.23.0 ====== diff --git a/cassandra/__init__.py b/cassandra/__init__.py index fd4e516f16..f2bf696035 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 23, 0, 'post0') +__version_info__ = (3, 24, 0) __version__ = '.'.join(map(str, __version_info__)) From c88255f202a21bbbae35f16e603b0f10f2f2cf36 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 17 Jun 2020 13:45:15 -0400 Subject: [PATCH 092/211] release 3.24: docs --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index 2298db2588..3a33e5a4e8 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.24' + ref: e0b7e73c - name: '3.23' ref: a40a2af7 - name: '3.22' From 21cac12b2ca68b1d2abdda97db1b73cf5f3ea450 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 9 Jul 2020 15:27:36 -0400 Subject: [PATCH 093/211] PYTHON-1254: Update Getting Started guide to include Astra connection example --- docs/getting_started.rst | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 8cb86a5504..ce31ca5d6f 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -3,16 +3,42 @@ Getting Started First, make sure you have the driver properly :doc:`installed `. -Connecting to Cassandra +Connecting to a Cluster ----------------------- Before we can start executing any queries against a Cassandra cluster we need to setup an instance of :class:`~.Cluster`. As the name suggests, you will typically have one instance of :class:`~.Cluster` for each Cassandra cluster you want to interact with. -The simplest way to create a :class:`~.Cluster` is like this: First, make sure you have the Cassandra driver properly :doc:`installed `. +Connecting to Astra ++++++++++++++++++++ + +If you are a DataStax `Astra `_ user, +here is how to connect to your cluster: + +1. Download the secure connect bundle from your Astra account. +2. Connect to your cluster with + +.. code-block:: python + + from cassandra.cluster import Cluster + from cassandra.auth import PlainTextAuthProvider + + cloud_config = { + 'secure_connect_bundle': '/path/to/secure-connect-dbname.zip' + } + auth_provider = PlainTextAuthProvider(username='user', password='pass') + cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider) + session = cluster.connect() + +See `Astra `_ and :doc:`cloud` for more details. + +Connecting to Cassandra ++++++++++++++++++++++++ +The simplest way to create a :class:`~.Cluster` is like this: + .. code-block:: python from cassandra.cluster import Cluster @@ -52,6 +78,8 @@ To establish connections and begin executing queries we need a cluster = Cluster() session = cluster.connect() +Session Keyspace +---------------- The :meth:`~.Cluster.connect()` method takes an optional ``keyspace`` argument which sets the default keyspace for all queries made through that :class:`~.Session`: @@ -60,7 +88,6 @@ which sets the default keyspace for all queries made through that :class:`~.Sess cluster = Cluster() session = cluster.connect('mykeyspace') - You can always change a Session's keyspace using :meth:`~.Session.set_keyspace` or by executing a ``USE `` query: @@ -70,6 +97,8 @@ by executing a ``USE `` query: # or you can do this instead session.execute('USE users') +Execution Profiles +------------------ Profiles are passed in by ``execution_profiles`` dict. In this case we can construct the base ``ExecutionProfile`` passing all attributes: From 3b50d0554c65b1030547145aef6061e4a854cb27 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 9 Jul 2020 15:29:10 -0400 Subject: [PATCH 094/211] update docs hash for 3.24 --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 3a33e5a4e8..eeccbe16b6 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.24' - ref: e0b7e73c + ref: 21cac12b - name: '3.23' ref: a40a2af7 - name: '3.22' From 4c582f24c62579f8ea5d2a83e14f9255a8a531b3 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 27 Jul 2020 14:26:06 -0400 Subject: [PATCH 095/211] Initial Jenkinsfile with a scripted pipeline --- Jenkinsfile | 816 +++++++++++++++++--------------------------- Jenkinsfile.bak | 873 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1182 insertions(+), 507 deletions(-) create mode 100644 Jenkinsfile.bak diff --git a/Jenkinsfile b/Jenkinsfile index 87b20804ca..61b2f3ad5c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,39 +1,172 @@ #!groovy - -def initializeEnvironment() { - env.DRIVER_DISPLAY_NAME = 'Cassandra Python Driver' - env.DRIVER_METRIC_TYPE = 'oss' +/* + +There are multiple combinations to test the python driver. + +Test Profiles: + + Full: Execute all unit and integration tests, including long tests. + Standard: Execute unit and integration tests. + Smoke Tests: Execute a small subset of tests. + EVENT_LOOP: Execute a small subset of tests selected to test EVENT_LOOPs. + +Matrix Types: + + Full: All server versions, python runtimes tested with and without Cython. + Develop: Smaller matrix for dev purpose. + Cassandra: All cassandra server versions. + Dse: All dse server versions. + +Parameters: + + EVENT_LOOP: 'LIBEV' (Default), 'GEVENT', 'EVENTLET', 'ASYNCIO', 'ASYNCORE', 'TWISTED' + CYTHON: Default, 'True', 'False' + +*/ + +@Library('dsdrivers-pipeline-lib@develop') +import com.datastax.jenkins.drivers.python.Slack + +slack = new Slack() + +// Define our predefined matrices +matrices = [ + "FULL": [ + "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0', 'dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], + "RUNTIME": ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + "CYTHON": ["True", "False"] + ], + "DEVELOP": [ + "SERVER": ['2.1', '3.11', 'dse-6.8'], + "RUNTIME": ['2.7.18', '3.6.10'], + "CYTHON": ["True", "False"] + ], + "CASSANDRA": [ + "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0'], + "RUNTIME": ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + "CYTHON": ["True", "False"] + ], + "DSE": [ + "SERVER": ['dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], + "RUNTIME": ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + "CYTHON": ["True", "False"] + ] +] + +def getBuildContext() { + /* + Based on schedule, parameters and branch name, configure the build context and env vars. + */ + + def driver_display_name = 'Cassandra Python Driver' if (env.GIT_URL.contains('riptano/python-driver')) { - env.DRIVER_DISPLAY_NAME = 'private ' + env.DRIVER_DISPLAY_NAME - env.DRIVER_METRIC_TYPE = 'oss-private' + driver_display_name = 'private ' + driver_display_name } else if (env.GIT_URL.contains('python-dse-driver')) { - env.DRIVER_DISPLAY_NAME = 'DSE Python Driver' - env.DRIVER_METRIC_TYPE = 'dse' + driver_display_name = 'DSE Python Driver' } - env.GIT_SHA = "${env.GIT_COMMIT.take(7)}" - env.GITHUB_PROJECT_URL = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" - env.GITHUB_BRANCH_URL = "${GITHUB_PROJECT_URL}/tree/${env.BRANCH_NAME}" - env.GITHUB_COMMIT_URL = "${GITHUB_PROJECT_URL}/commit/${env.GIT_COMMIT}" + def git_sha = "${env.GIT_COMMIT.take(7)}" + def github_project_url = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" + def github_branch_url = "${github_project_url}/tree/${env.BRANCH_NAME}" + def github.amrom.workers.devmit_url = "${github_project_url}/commit/${env.GIT_COMMIT}" - sh label: 'Assign Python global environment', script: '''#!/bin/bash -lex - pyenv global ${PYTHON_VERSION} - ''' + def profile = "${params.PROFILE}" + def EVENT_LOOP = "${params.EVENT_LOOP.toLowerCase()}" + matrixType = "FULL" + developBranchPattern = ~"((dev|long)-)?python-.*" - sh label: 'Install socat; required for unix socket tests', script: '''#!/bin/bash -lex - sudo apt-get install socat - ''' + if (developBranchPattern.matcher(env.BRANCH_NAME).matches()) { + matrixType = "DEVELOP" + if (env.BRANCH_NAME.contains("long")) { + profile = "FULL" + } + } + + // Check if parameters were set explicitly + if (params.MATRIX != "DEFAULT") { + matrixType = params.MATRIX + } + + matrix = matrices[matrixType].clone() + if (params.CYTHON != "DEFAULT") { + matrix["CYTHON"] = [params.CYTHON] + } + + if (params.SERVER_VERSION != "DEFAULT") { + matrix["SERVER"] = [params.SERVER_VERSION] + } + + if (params.PYTHON_VERSION != "DEFAULT") { + matrix["RUNTIME"] = [params.PYTHON_VERSION] + } + + if (params.CI_SCHEDULE == "WEEKNIGHTS") { + matrix["SERVER"] = params.CI_SCHEDULE_SERVER_VERSION.split(' ') + matrix["RUNTIME"] = params.CI_SCHEDULE_PYTHON_VERSION.split(' ') + } + + context = [ + vars: [ + "PROFILE=${profile}", + "EVENT_LOOP=${EVENT_LOOP}", + "DRIVER_DISPLAY_NAME=${driver_display_name}", "GIT_SHA=${git_sha}", "GITHUB_PROJECT_URL=${github_project_url}", + "GITHUB_BRANCH_URL=${github_branch_url}", "GITHUB_COMMIT_URL=${github.amrom.workers.devmit_url}" + ], + matrix: matrix + ] + + return context +} + +def buildAndTest(context) { + initializeEnvironment() + installDriverAndCompileExtensions() + + try { + executeTests() + } finally { + junit testResults: '*_results.xml' + } +} + +def getMatrixBuilds(buildContext) { + def tasks = [:] + matrix = buildContext.matrix + + matrix["SERVER"].each { serverVersion -> + matrix["RUNTIME"].each { runtimeVersion -> + matrix["CYTHON"].each { cythonFlag -> + def taskVars = [ + "CASSANDRA_VERSION=${serverVersion}", + "PYTHON_VERSION=${runtimeVersion}", + "CYTHON_ENABLED=${cythonFlag}" + ] + def cythonDesc = cythonFlag == "True" ? ", Cython": "" + tasks["${serverVersion}, py${runtimeVersion}${cythonDesc}"] = { + node("${OS_VERSION}") { + checkout scm + + withEnv(taskVars) { + buildAndTest(context) + } + } + } + } + } + } + return tasks +} - sh label: 'Install the latest setuptools', script: '''#!/bin/bash -lex +def initializeEnvironment() { + sh label: 'Initialize the environment', script: '''#!/bin/bash -lex + pyenv global ${PYTHON_VERSION} + sudo apt-get install socat pip install --upgrade pip pip install -U setuptools - ''' - - sh label: 'Install CCM', script: '''#!/bin/bash -lex pip install ${HOME}/ccm ''' - // Determine if server version is Apache Cassandra� or DataStax Enterprise + // Determine if server version is Apache CassandraⓇ or DataStax Enterprise if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex pip install -r test-datastax-requirements.txt @@ -46,7 +179,6 @@ def initializeEnvironment() { sh label: 'Uninstall the geomet dependency since it is not required for Cassandra', script: '''#!/bin/bash -lex pip uninstall -y geomet ''' - } sh label: 'Install unit test modules', script: '''#!/bin/bash -lex @@ -71,6 +203,7 @@ def initializeEnvironment() { python --version pip --version + pip freeze printenv | sort ''' } @@ -95,9 +228,9 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true - EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true - EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true + EVENT_LOOP=${EVENT_LOOP} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true + EVENT_LOOP=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true + EVENT_LOOP=gevent VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true ''' sh label: 'Execute Simulacron integration tests', script: '''#!/bin/bash -lex @@ -107,13 +240,13 @@ def executeStandardTests() { set +o allexport SIMULACRON_JAR="${HOME}/simulacron.jar" - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true # Run backpressure tests separately to avoid memory issue - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true ''' sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex @@ -122,7 +255,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true ''' sh label: 'Execute Apache CassandraⓇ integration tests', script: '''#!/bin/bash -lex @@ -131,7 +264,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true ''' if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { @@ -141,7 +274,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true + EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true ''' } @@ -151,17 +284,17 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true + EVENT_LOOP=${EVENT_LOOP} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true ''' - if (env.EXECUTE_LONG_TESTS == 'True') { + if (env.PROFILE == 'FULL') { sh label: 'Execute long running integration tests', script: '''#!/bin/bash -lex # Load CCM environment variable set -o allexport . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true ''' } } @@ -173,7 +306,7 @@ def executeDseSmokeTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true ''' } @@ -194,69 +327,34 @@ def executeEventLoopTests() { "tests/integration/simulacron/test_endpoint.py" "tests/integration/long/test_ssl.py" ) - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true - ''' -} - -def executeUpgradeTests() { - sh label: 'Execute profile upgrade integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=upgrade_results.xml tests/integration/upgrade || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true ''' } def executeTests() { - switch(params.PROFILE) { + switch(env.PROFILE) { case 'DSE-SMOKE-TEST': executeDseSmokeTests() break - case 'EVENT-LOOP': + case 'EVENT_LOOP': executeEventLoopTests() break - case 'UPGRADE': - executeUpgradeTests() - break default: executeStandardTests() break } } -def notifySlack(status = 'started') { - // Set the global pipeline scoped environment (this is above each matrix) - env.BUILD_STATED_SLACK_NOTIFIED = 'true' - def buildType = 'Commit' - if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { - buildType = "${params.CI_SCHEDULE.toLowerCase().capitalize()}" - } - - def color = 'good' // Green - if (status.equalsIgnoreCase('aborted')) { - color = '808080' // Grey - } else if (status.equalsIgnoreCase('unstable')) { - color = 'warning' // Orange - } else if (status.equalsIgnoreCase('failed')) { - color = 'danger' // Red - } - - def message = """Build ${status} for ${env.DRIVER_DISPLAY_NAME} [${buildType}] -<${env.GITHUB_BRANCH_URL}|${env.BRANCH_NAME}> - <${env.RUN_DISPLAY_URL}|#${env.BUILD_NUMBER}> - <${env.GITHUB_COMMIT_URL}|${env.GIT_SHA}>""" - if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { - message += " - ${params.CI_SCHEDULE_PYTHON_VERSION} - ${params.EVENT_LOOP_MANAGER}" - } - if (!status.equalsIgnoreCase('Started')) { - message += """ -${status} after ${currentBuild.durationString - ' and counting'}""" +// TODO move this in the shared lib +def getDriverMetricType() { + metric_type = 'oss' + if (env.GIT_URL.contains('riptano/python-driver')) { + metric_type = 'oss-private' + } else if (env.GIT_URL.contains('python-dse-driver')) { + metric_type = 'dse' } - - slackSend color: "${color}", - channel: "#python-driver-dev-bots", - message: "${message}" + return metric_type } def submitCIMetrics(buildType) { @@ -264,7 +362,8 @@ def submitCIMetrics(buildType) { long durationSec = durationMs / 1000 long nowSec = (currentBuild.startTimeInMillis + durationMs) / 1000 def branchNameNoPeriods = env.BRANCH_NAME.replaceAll('\\.', '_') - def durationMetric = "okr.ci.python.${env.DRIVER_METRIC_TYPE}.${buildType}.${branchNameNoPeriods} ${durationSec} ${nowSec}" + metric_type = getDriverMetricType() + def durationMetric = "okr.ci.python.${metric_type}.${buildType}.${branchNameNoPeriods} ${durationSec} ${nowSec}" timeout(time: 1, unit: 'MINUTES') { withCredentials([string(credentialsId: 'lab-grafana-address', variable: 'LAB_GRAFANA_ADDRESS'), @@ -278,108 +377,24 @@ def submitCIMetrics(buildType) { } } -def describePerCommitStage() { +def describeBuild(buildContext) { script { - def type = 'standard' - def serverDescription = 'current Apache CassandaraⓇ and supported DataStax Enterprise versions' - if (env.BRANCH_NAME ==~ /long-python.*/) { - type = 'long' - } else if (env.BRANCH_NAME ==~ /dev-python.*/) { - type = 'dev' - } - - currentBuild.displayName = "Per-Commit (${env.EVENT_LOOP_MANAGER} | ${type.capitalize()})" - currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.18 and v3.5.9 using ${env.EVENT_LOOP_MANAGER} event loop manager" + def runtimes = buildContext.matrix["RUNTIME"] + def serverVersions = buildContext.matrix["SERVER"] + def numBuilds = runtimes.size() * serverVersions.size() * buildContext.matrix["CYTHON"].size() + currentBuild.displayName = "${env.PROFILE} (${env.EVENT_LOOP} | ${numBuilds} builds)" + currentBuild.description = "${env.PROFILE} build testing servers (${serverVersions.join(', ')}) against Python (${runtimes.join(', ')}) using ${env.EVENT_LOOP} event loop manager" } - - sh label: 'Describe the python environment', script: '''#!/bin/bash -lex - python -V - pip freeze - ''' } -def describeScheduledTestingStage() { - script { - def type = params.CI_SCHEDULE.toLowerCase().capitalize() - def displayName = "${type} schedule (${env.EVENT_LOOP_MANAGER}" - if (env.CYTHON_ENABLED == 'True') { - displayName += " | Cython" - } - if (params.PROFILE != 'NONE') { - displayName += " | ${params.PROFILE}" - } - displayName += ")" - currentBuild.displayName = displayName - - def serverVersionDescription = "${params.CI_SCHEDULE_SERVER_VERSION.replaceAll(' ', ', ')} server version(s) in the matrix" - def pythonVersionDescription = "${params.CI_SCHEDULE_PYTHON_VERSION.replaceAll(' ', ', ')} Python version(s) in the matrix" - def description = "${type} scheduled testing using ${env.EVENT_LOOP_MANAGER} event loop manager" - if (env.CYTHON_ENABLED == 'True') { - description += ", with Cython enabled" - } - if (params.PROFILE != 'NONE') { - description += ", ${params.PROFILE} profile" - } - description += ", ${serverVersionDescription}, and ${pythonVersionDescription}" - currentBuild.description = description - } -} - -def describeAdhocTestingStage() { - script { - def serverType = params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[0] - def serverDisplayName = 'Apache CassandaraⓇ' - def serverVersion = " v${serverType}" - if (serverType == 'ALL') { - serverDisplayName = "all ${serverDisplayName} and DataStax Enterprise server versions" - serverVersion = '' - } else { - try { - serverVersion = " v${env.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[1]}" - } catch (e) { - ;; // no-op - } - if (serverType == 'dse') { - serverDisplayName = 'DataStax Enterprise' - } - } - def displayName = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION} for v${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION} (${env.EVENT_LOOP_MANAGER}" - if (env.CYTHON_ENABLED == 'True') { - displayName += " | Cython" - } - if (params.PROFILE != 'NONE') { - displayName += " | ${params.PROFILE}" - } - displayName += ")" - currentBuild.displayName = displayName - - def description = "Testing ${serverDisplayName} ${serverVersion} using ${env.EVENT_LOOP_MANAGER} against Python ${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" - if (env.CYTHON_ENABLED == 'True') { - description += ", with Cython" - } - if (params.PROFILE == 'NONE') { - if (params.EXECUTE_LONG_TESTS) { - description += ", with" - } else { - description += ", without" - } - description += " long tests executed" - } else { - description += ", ${params.PROFILE} profile" - } - currentBuild.description = description - } -} - -def branchPatternCron = ~"(master)" -def riptanoPatternCron = ~"(riptano)" +def scheduleTriggerJobName = "drivers/python/oss/master" pipeline { agent none // Global pipeline timeout options { - timeout(time: 10, unit: 'HOURS') + timeout(time: 10, unit: 'HOURS') // TODO timeout should be per build buildDiscarder(logRotator(artifactNumToKeepStr: '10', // Keep only the last 10 artifacts numToKeepStr: '50')) // Keep only the last 50 build records } @@ -406,12 +421,73 @@ pipeline { ''') choice( - name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION', - choices: ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], - description: 'Python version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY!') + name: 'PROFILE', + choices: ['STANDARD', 'FULL', 'DSE-SMOKE-TEST', 'EVENT_LOOP'], + description: '''

Profile to utilize for scheduled or adhoc builds

+ + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
STANDARDExecute the standard tests for the driver
FULLExecute all tests for the driver, including long tests.
DSE-SMOKE-TESTExecute only the DataStax Enterprise smoke tests
EVENT_LOOPExecute only the event loop tests for the specified event loop manager (see: EVENT_LOOP)
''') + choice( + name: 'MATRIX', + choices: ['DEFAULT', 'FULL', 'DEVELOP', 'CASSANDRA', 'DSE'], + description: '''

The matrix for the build.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
DEFAULTDefault to the build context.
FULLAll server versions, python runtimes tested with and without Cython.
DEVELOPSmaller matrix for dev purpose.
CASSANDRAAll cassandra server versions.
DSEAll dse server versions.
''') choice( - name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION', - choices: ['2.1', // Legacy Apache CassandraⓇ + name: 'PYTHON_VERSION', + choices: ['DEFAULT', '2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + description: 'Python runtime version. Default to the build context.') + choice( + name: 'SERVER_VERSION', + choices: ['DEFAULT', + '2.1', // Legacy Apache CassandraⓇ '2.2', // Legacy Apache CassandraⓇ '3.0', // Previous Apache CassandraⓇ '3.11', // Current Apache CassandraⓇ @@ -421,7 +497,7 @@ pipeline { 'dse-6.0', // Previous DataStax Enterprise 'dse-6.7', // Previous DataStax Enterprise 'dse-6.8', // Current DataStax Enterprise - 'ALL'], + ], description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY! @@ -429,10 +505,14 @@ pipeline { + + + + - + @@ -440,15 +520,15 @@ pipeline { - + - + - + @@ -471,16 +551,32 @@ pipeline {
Choice Description
DEFAULTDefault to the build context.
2.1Apache CassandaraⓇ; v2.1.xApache CassandraⓇ; v2.1.x
2.2
3.0Apache CassandaraⓇ v3.0.xApache CassandraⓇ v3.0.x
3.11Apache CassandaraⓇ v3.11.xApache CassandraⓇ v3.11.x
4.0Apache CassandaraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)Apache CassandraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)
dse-5.0DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
''') - booleanParam( + choice( name: 'CYTHON', - defaultValue: false, - description: 'Flag to determine if Cython should be enabled for scheduled or adhoc builds') - booleanParam( - name: 'EXECUTE_LONG_TESTS', - defaultValue: false, - description: 'Flag to determine if long integration tests should be executed for scheduled or adhoc builds') + choices: ['DEFAULT', 'True', 'False'], + description: '''

Flag to determine if Cython should be enabled

+ + + + + + + + + + + + + + + + + + + +
ChoiceDescription
DefaultDefault to the build context.
TrueEnable Cython
FalseDisable Cython
''') choice( - name: 'EVENT_LOOP_MANAGER', + name: 'EVENT_LOOP', choices: ['LIBEV', 'GEVENT', 'EVENTLET', 'ASYNCIO', 'ASYNCORE', 'TWISTED'], description: '''

Event loop manager to utilize for scheduled or adhoc builds

@@ -515,34 +611,6 @@ pipeline {
An event-driven networking engine written in Python and licensed under the open source MIT license
''') - choice( - name: 'PROFILE', - choices: ['NONE', 'DSE-SMOKE-TEST', 'EVENT-LOOP', 'UPGRADE'], - description: '''

Profile to utilize for scheduled or adhoc builds

- - - - - - - - - - - - - - - - - - - - - - - -
ChoiceDescription
NONEExecute the standard tests for the driver
DSE-SMOKE-TESTExecute only the DataStax Enterprise smoke tests
EVENT-LOOPExecute only the event loop tests for the specified event loop manager (see: EVENT_LOOP_MANAGER)
UPGRADEExecute only the upgrade tests
''') choice( name: 'CI_SCHEDULE', choices: ['DO-NOT-CHANGE-THIS-SELECTION', 'WEEKNIGHTS', 'WEEKENDS'], @@ -558,316 +626,50 @@ pipeline { } triggers { - parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(GIT_URL).find()) ? """ + parameterizedCron((scheduleTriggerJobName == env.JOB_NAME) ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 - - # Every Saturday around 12:00, 4:00 and 8:00 PM - # These schedules are for weekly libev event manager runs with and without Cython for most of the Python versions (excludes v3.5.9.x) - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.4.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly gevent event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly eventlet event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - - # Every Sunday around 12:00 and 4:00 AM - # These schedules are for weekly asyncore event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly twisted event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18 3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 """ : "") } environment { OS_VERSION = 'ubuntu/bionic64/python-driver' - CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" - EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" - EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' CCM_MAX_HEAP_SIZE = '1536M' } stages { - stage ('Per-Commit') { - options { - timeout(time: 2, unit: 'HOURS') + stage ('Build and Test') { + agent { + // If I removed this agent block, GIT_URL and GIT_COMMIT aren't set. + // However, this trigger an additional checkout + label "master" } when { beforeAgent true - branch pattern: '((dev|long)-)?python-.*', comparator: 'REGEXP' allOf { - expression { params.ADHOC_BUILD_TYPE == 'BUILD' } - expression { params.CI_SCHEDULE == 'DO-NOT-CHANGE-THIS-SELECTION' } not { buildingTag() } } } - matrix { - axes { - axis { - name 'CASSANDRA_VERSION' - values '3.11', // Current Apache Cassandra - 'dse-6.8' // Current DataStax Enterprise - } - axis { - name 'PYTHON_VERSION' - values '2.7.18', '3.5.9' - } - axis { - name 'CYTHON_ENABLED' - values 'False' - } - } + steps { + script { + context = getBuildContext() + withEnv(context.vars) { + describeBuild(context) + slack.notifyChannel() - agent { - label "${OS_VERSION}" - } + // build and test all builds + parallel getMatrixBuilds(context) - stages { - stage('Initialize-Environment') { - steps { - initializeEnvironment() - script { - if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { - notifySlack() - } - } - } - } - stage('Describe-Build') { - steps { - describePerCommitStage() - } - } - stage('Install-Driver-And-Compile-Extensions') { - steps { - installDriverAndCompileExtensions() - } - } - stage('Execute-Tests') { - steps { - - script { - if (env.BRANCH_NAME ==~ /long-python.*/) { - withEnv(["EXECUTE_LONG_TESTS=True"]) { - executeTests() - } - } - else { - executeTests() - } - } - } - post { - always { - junit testResults: '*_results.xml' - } - } - } - } - } - post { - always { - node('master') { + // send the metrics submitCIMetrics('commit') + slack.notifyChannel(currentBuild.currentResult) } } - aborted { - notifySlack('aborted') - } - success { - notifySlack('completed') - } - unstable { - notifySlack('unstable') - } - failure { - notifySlack('FAILED') - } - } - } - - stage ('Scheduled-Testing') { - when { - beforeAgent true - allOf { - expression { params.ADHOC_BUILD_TYPE == 'BUILD' } - expression { params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION' } - not { buildingTag() } - } - } - matrix { - axes { - axis { - name 'CASSANDRA_VERSION' - values '2.1', // Legacy Apache Cassandra - '2.2', // Legacy Apache Cassandra - '3.0', // Previous Apache Cassandra - '3.11', // Current Apache Cassandra - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7' // Current DataStax Enterprise - } - axis { - name 'CYTHON_ENABLED' - values 'True', 'False' - } - } - when { - beforeAgent true - allOf { - expression { return params.CI_SCHEDULE_SERVER_VERSION.split(' ').any { it =~ /(ALL|${env.CASSANDRA_VERSION})/ } } - } - } - - environment { - PYTHON_VERSION = "${params.CI_SCHEDULE_PYTHON_VERSION}" - } - agent { - label "${OS_VERSION}" - } - - stages { - stage('Initialize-Environment') { - steps { - initializeEnvironment() - script { - if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { - notifySlack() - } - } - } - } - stage('Describe-Build') { - steps { - describeScheduledTestingStage() - } - } - stage('Install-Driver-And-Compile-Extensions') { - steps { - installDriverAndCompileExtensions() - } - } - stage('Execute-Tests') { - steps { - executeTests() - } - post { - always { - junit testResults: '*_results.xml' - } - } - } - } - } - post { - aborted { - notifySlack('aborted') - } - success { - notifySlack('completed') - } - unstable { - notifySlack('unstable') - } - failure { - notifySlack('FAILED') - } } } - - stage('Adhoc-Testing') { - when { - beforeAgent true - allOf { - expression { params.ADHOC_BUILD_TYPE == 'BUILD-AND-EXECUTE-TESTS' } - not { buildingTag() } - } - } - - environment { - CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" - PYTHON_VERSION = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" - } - - matrix { - axes { - axis { - name 'CASSANDRA_VERSION' - values '2.1', // Legacy Apache Cassandra - '2.2', // Legacy Apache Cassandra - '3.0', // Previous Apache Cassandra - '3.11', // Current Apache Cassandra - '4.0', // Development Apache Cassandra - 'dse-5.0', // Long Term Support DataStax Enterprise - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7', // Current DataStax Enterprise - 'dse-6.8' // Development DataStax Enterprise - } - } - when { - beforeAgent true - allOf { - expression { params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION ==~ /(ALL|${env.CASSANDRA_VERSION})/ } - } - } - - agent { - label "${OS_VERSION}" - } - - stages { - stage('Describe-Build') { - steps { - describeAdhocTestingStage() - } - } - stage('Initialize-Environment') { - steps { - initializeEnvironment() - } - } - stage('Install-Driver-And-Compile-Extensions') { - steps { - installDriverAndCompileExtensions() - } - } - stage('Execute-Tests') { - steps { - executeTests() - } - post { - always { - junit testResults: '*_results.xml' - } - } - } - } - } - } } } diff --git a/Jenkinsfile.bak b/Jenkinsfile.bak new file mode 100644 index 0000000000..87b20804ca --- /dev/null +++ b/Jenkinsfile.bak @@ -0,0 +1,873 @@ +#!groovy + +def initializeEnvironment() { + env.DRIVER_DISPLAY_NAME = 'Cassandra Python Driver' + env.DRIVER_METRIC_TYPE = 'oss' + if (env.GIT_URL.contains('riptano/python-driver')) { + env.DRIVER_DISPLAY_NAME = 'private ' + env.DRIVER_DISPLAY_NAME + env.DRIVER_METRIC_TYPE = 'oss-private' + } else if (env.GIT_URL.contains('python-dse-driver')) { + env.DRIVER_DISPLAY_NAME = 'DSE Python Driver' + env.DRIVER_METRIC_TYPE = 'dse' + } + + env.GIT_SHA = "${env.GIT_COMMIT.take(7)}" + env.GITHUB_PROJECT_URL = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" + env.GITHUB_BRANCH_URL = "${GITHUB_PROJECT_URL}/tree/${env.BRANCH_NAME}" + env.GITHUB_COMMIT_URL = "${GITHUB_PROJECT_URL}/commit/${env.GIT_COMMIT}" + + sh label: 'Assign Python global environment', script: '''#!/bin/bash -lex + pyenv global ${PYTHON_VERSION} + ''' + + sh label: 'Install socat; required for unix socket tests', script: '''#!/bin/bash -lex + sudo apt-get install socat + ''' + + sh label: 'Install the latest setuptools', script: '''#!/bin/bash -lex + pip install --upgrade pip + pip install -U setuptools + ''' + + sh label: 'Install CCM', script: '''#!/bin/bash -lex + pip install ${HOME}/ccm + ''' + + // Determine if server version is Apache Cassandra� or DataStax Enterprise + if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { + sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex + pip install -r test-datastax-requirements.txt + ''' + } else { + sh label: 'Install Apache CassandraⓇ requirements', script: '''#!/bin/bash -lex + pip install -r test-requirements.txt + ''' + + sh label: 'Uninstall the geomet dependency since it is not required for Cassandra', script: '''#!/bin/bash -lex + pip uninstall -y geomet + ''' + + } + + sh label: 'Install unit test modules', script: '''#!/bin/bash -lex + pip install nose-ignore-docstring nose-exclude service_identity + ''' + + if (env.CYTHON_ENABLED == 'True') { + sh label: 'Install cython modules', script: '''#!/bin/bash -lex + pip install cython numpy + ''' + } + + sh label: 'Download Apache CassandraⓇ or DataStax Enterprise', script: '''#!/bin/bash -lex + . ${CCM_ENVIRONMENT_SHELL} ${CASSANDRA_VERSION} + ''' + + sh label: 'Display Python and environment information', script: '''#!/bin/bash -le + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + python --version + pip --version + printenv | sort + ''' +} + +def installDriverAndCompileExtensions() { + if (env.CYTHON_ENABLED == 'True') { + sh label: 'Install the driver and compile with C extensions with Cython', script: '''#!/bin/bash -lex + python setup.py build_ext --inplace + ''' + } else { + sh label: 'Install the driver and compile with C extensions without Cython', script: '''#!/bin/bash -lex + python setup.py build_ext --inplace --no-cython + ''' + } +} + +def executeStandardTests() { + + sh label: 'Execute unit tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true + EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true + EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true + ''' + + sh label: 'Execute Simulacron integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + SIMULACRON_JAR="${HOME}/simulacron.jar" + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + + # Run backpressure tests separately to avoid memory issue + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true + ''' + + sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true + ''' + + sh label: 'Execute Apache CassandraⓇ integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variables + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true + ''' + + if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { + sh label: 'Execute DataStax Enterprise integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true + ''' + } + + sh label: 'Execute DataStax Constellation integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true + ''' + + if (env.EXECUTE_LONG_TESTS == 'True') { + sh label: 'Execute long running integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true + ''' + } +} + +def executeDseSmokeTests() { + sh label: 'Execute profile DataStax Enterprise smoke test integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true + ''' +} + +def executeEventLoopTests() { + sh label: 'Execute profile event loop manager integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_TESTS=( + "tests/integration/standard/test_cluster.py" + "tests/integration/standard/test_concurrent.py" + "tests/integration/standard/test_connection.py" + "tests/integration/standard/test_control_connection.py" + "tests/integration/standard/test_metrics.py" + "tests/integration/standard/test_query.py" + "tests/integration/simulacron/test_endpoint.py" + "tests/integration/long/test_ssl.py" + ) + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true + ''' +} + +def executeUpgradeTests() { + sh label: 'Execute profile upgrade integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=upgrade_results.xml tests/integration/upgrade || true + ''' +} + +def executeTests() { + switch(params.PROFILE) { + case 'DSE-SMOKE-TEST': + executeDseSmokeTests() + break + case 'EVENT-LOOP': + executeEventLoopTests() + break + case 'UPGRADE': + executeUpgradeTests() + break + default: + executeStandardTests() + break + } +} + +def notifySlack(status = 'started') { + // Set the global pipeline scoped environment (this is above each matrix) + env.BUILD_STATED_SLACK_NOTIFIED = 'true' + + def buildType = 'Commit' + if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { + buildType = "${params.CI_SCHEDULE.toLowerCase().capitalize()}" + } + + def color = 'good' // Green + if (status.equalsIgnoreCase('aborted')) { + color = '808080' // Grey + } else if (status.equalsIgnoreCase('unstable')) { + color = 'warning' // Orange + } else if (status.equalsIgnoreCase('failed')) { + color = 'danger' // Red + } + + def message = """Build ${status} for ${env.DRIVER_DISPLAY_NAME} [${buildType}] +<${env.GITHUB_BRANCH_URL}|${env.BRANCH_NAME}> - <${env.RUN_DISPLAY_URL}|#${env.BUILD_NUMBER}> - <${env.GITHUB_COMMIT_URL}|${env.GIT_SHA}>""" + if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { + message += " - ${params.CI_SCHEDULE_PYTHON_VERSION} - ${params.EVENT_LOOP_MANAGER}" + } + if (!status.equalsIgnoreCase('Started')) { + message += """ +${status} after ${currentBuild.durationString - ' and counting'}""" + } + + slackSend color: "${color}", + channel: "#python-driver-dev-bots", + message: "${message}" +} + +def submitCIMetrics(buildType) { + long durationMs = currentBuild.duration + long durationSec = durationMs / 1000 + long nowSec = (currentBuild.startTimeInMillis + durationMs) / 1000 + def branchNameNoPeriods = env.BRANCH_NAME.replaceAll('\\.', '_') + def durationMetric = "okr.ci.python.${env.DRIVER_METRIC_TYPE}.${buildType}.${branchNameNoPeriods} ${durationSec} ${nowSec}" + + timeout(time: 1, unit: 'MINUTES') { + withCredentials([string(credentialsId: 'lab-grafana-address', variable: 'LAB_GRAFANA_ADDRESS'), + string(credentialsId: 'lab-grafana-port', variable: 'LAB_GRAFANA_PORT')]) { + withEnv(["DURATION_METRIC=${durationMetric}"]) { + sh label: 'Send runtime metrics to labgrafana', script: '''#!/bin/bash -lex + echo "${DURATION_METRIC}" | nc -q 5 ${LAB_GRAFANA_ADDRESS} ${LAB_GRAFANA_PORT} + ''' + } + } + } +} + +def describePerCommitStage() { + script { + def type = 'standard' + def serverDescription = 'current Apache CassandaraⓇ and supported DataStax Enterprise versions' + if (env.BRANCH_NAME ==~ /long-python.*/) { + type = 'long' + } else if (env.BRANCH_NAME ==~ /dev-python.*/) { + type = 'dev' + } + + currentBuild.displayName = "Per-Commit (${env.EVENT_LOOP_MANAGER} | ${type.capitalize()})" + currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.18 and v3.5.9 using ${env.EVENT_LOOP_MANAGER} event loop manager" + } + + sh label: 'Describe the python environment', script: '''#!/bin/bash -lex + python -V + pip freeze + ''' +} + +def describeScheduledTestingStage() { + script { + def type = params.CI_SCHEDULE.toLowerCase().capitalize() + def displayName = "${type} schedule (${env.EVENT_LOOP_MANAGER}" + if (env.CYTHON_ENABLED == 'True') { + displayName += " | Cython" + } + if (params.PROFILE != 'NONE') { + displayName += " | ${params.PROFILE}" + } + displayName += ")" + currentBuild.displayName = displayName + + def serverVersionDescription = "${params.CI_SCHEDULE_SERVER_VERSION.replaceAll(' ', ', ')} server version(s) in the matrix" + def pythonVersionDescription = "${params.CI_SCHEDULE_PYTHON_VERSION.replaceAll(' ', ', ')} Python version(s) in the matrix" + def description = "${type} scheduled testing using ${env.EVENT_LOOP_MANAGER} event loop manager" + if (env.CYTHON_ENABLED == 'True') { + description += ", with Cython enabled" + } + if (params.PROFILE != 'NONE') { + description += ", ${params.PROFILE} profile" + } + description += ", ${serverVersionDescription}, and ${pythonVersionDescription}" + currentBuild.description = description + } +} + +def describeAdhocTestingStage() { + script { + def serverType = params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[0] + def serverDisplayName = 'Apache CassandaraⓇ' + def serverVersion = " v${serverType}" + if (serverType == 'ALL') { + serverDisplayName = "all ${serverDisplayName} and DataStax Enterprise server versions" + serverVersion = '' + } else { + try { + serverVersion = " v${env.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[1]}" + } catch (e) { + ;; // no-op + } + if (serverType == 'dse') { + serverDisplayName = 'DataStax Enterprise' + } + } + def displayName = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION} for v${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION} (${env.EVENT_LOOP_MANAGER}" + if (env.CYTHON_ENABLED == 'True') { + displayName += " | Cython" + } + if (params.PROFILE != 'NONE') { + displayName += " | ${params.PROFILE}" + } + displayName += ")" + currentBuild.displayName = displayName + + def description = "Testing ${serverDisplayName} ${serverVersion} using ${env.EVENT_LOOP_MANAGER} against Python ${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" + if (env.CYTHON_ENABLED == 'True') { + description += ", with Cython" + } + if (params.PROFILE == 'NONE') { + if (params.EXECUTE_LONG_TESTS) { + description += ", with" + } else { + description += ", without" + } + description += " long tests executed" + } else { + description += ", ${params.PROFILE} profile" + } + currentBuild.description = description + } +} + +def branchPatternCron = ~"(master)" +def riptanoPatternCron = ~"(riptano)" + +pipeline { + agent none + + // Global pipeline timeout + options { + timeout(time: 10, unit: 'HOURS') + buildDiscarder(logRotator(artifactNumToKeepStr: '10', // Keep only the last 10 artifacts + numToKeepStr: '50')) // Keep only the last 50 build records + } + + parameters { + choice( + name: 'ADHOC_BUILD_TYPE', + choices: ['BUILD', 'BUILD-AND-EXECUTE-TESTS'], + description: '''

Perform a adhoc build operation

+ + + + + + + + + + + + + + + +
ChoiceDescription
BUILDPerforms a Per-Commit build
BUILD-AND-EXECUTE-TESTSPerforms a build and executes the integration and unit tests
''') + choice( + name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION', + choices: ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + description: 'Python version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY!') + choice( + name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION', + choices: ['2.1', // Legacy Apache CassandraⓇ + '2.2', // Legacy Apache CassandraⓇ + '3.0', // Previous Apache CassandraⓇ + '3.11', // Current Apache CassandraⓇ + '4.0', // Development Apache CassandraⓇ + 'dse-5.0', // Long Term Support DataStax Enterprise + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7', // Previous DataStax Enterprise + 'dse-6.8', // Current DataStax Enterprise + 'ALL'], + description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY! + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
2.1Apache CassandaraⓇ; v2.1.x
2.2Apache CassandarⓇ; v2.2.x
3.0Apache CassandaraⓇ v3.0.x
3.11Apache CassandaraⓇ v3.11.x
4.0Apache CassandaraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)
dse-5.0DataStax Enterprise v5.0.x (Long Term Support)
dse-5.1DataStax Enterprise v5.1.x
dse-6.0DataStax Enterprise v6.0.x
dse-6.7DataStax Enterprise v6.7.x
dse-6.8DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
''') + booleanParam( + name: 'CYTHON', + defaultValue: false, + description: 'Flag to determine if Cython should be enabled for scheduled or adhoc builds') + booleanParam( + name: 'EXECUTE_LONG_TESTS', + defaultValue: false, + description: 'Flag to determine if long integration tests should be executed for scheduled or adhoc builds') + choice( + name: 'EVENT_LOOP_MANAGER', + choices: ['LIBEV', 'GEVENT', 'EVENTLET', 'ASYNCIO', 'ASYNCORE', 'TWISTED'], + description: '''

Event loop manager to utilize for scheduled or adhoc builds

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
LIBEVA full-featured and high-performance event loop that is loosely modeled after libevent, but without its limitations and bugs
GEVENTA co-routine -based Python networking library that uses greenlet to provide a high-level synchronous API on top of the libev or libuv event loop
EVENTLETA concurrent networking library for Python that allows you to change how you run your code, not how you write it
ASYNCIOA library to write concurrent code using the async/await syntax
ASYNCOREA module provides the basic infrastructure for writing asynchronous socket service clients and servers
TWISTEDAn event-driven networking engine written in Python and licensed under the open source MIT license
''') + choice( + name: 'PROFILE', + choices: ['NONE', 'DSE-SMOKE-TEST', 'EVENT-LOOP', 'UPGRADE'], + description: '''

Profile to utilize for scheduled or adhoc builds

+ + + + + + + + + + + + + + + + + + + + + + + +
ChoiceDescription
NONEExecute the standard tests for the driver
DSE-SMOKE-TESTExecute only the DataStax Enterprise smoke tests
EVENT-LOOPExecute only the event loop tests for the specified event loop manager (see: EVENT_LOOP_MANAGER)
UPGRADEExecute only the upgrade tests
''') + choice( + name: 'CI_SCHEDULE', + choices: ['DO-NOT-CHANGE-THIS-SELECTION', 'WEEKNIGHTS', 'WEEKENDS'], + description: 'CI testing schedule to execute periodically scheduled builds and tests of the driver (DO NOT CHANGE THIS SELECTION)') + string( + name: 'CI_SCHEDULE_PYTHON_VERSION', + defaultValue: 'DO-NOT-CHANGE-THIS-SELECTION', + description: 'CI testing python version to utilize for scheduled test runs of the driver (DO NOT CHANGE THIS SELECTION)') + string( + name: 'CI_SCHEDULE_SERVER_VERSION', + defaultValue: 'DO-NOT-CHANGE-THIS-SELECTION', + description: 'CI testing server version to utilize for scheduled test runs of the driver (DO NOT CHANGE THIS SELECTION)') + } + + triggers { + parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(GIT_URL).find()) ? """ + # Every weeknight (Monday - Friday) around 4:00 AM + # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + + # Every Saturday around 12:00, 4:00 and 8:00 PM + # These schedules are for weekly libev event manager runs with and without Cython for most of the Python versions (excludes v3.5.9.x) + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.4.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly gevent event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly eventlet event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + + # Every Sunday around 12:00 and 4:00 AM + # These schedules are for weekly asyncore event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + # These schedules are for weekly twisted event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 + """ : "") + } + + environment { + OS_VERSION = 'ubuntu/bionic64/python-driver' + CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" + EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" + EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" + CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' + CCM_MAX_HEAP_SIZE = '1536M' + } + + stages { + stage ('Per-Commit') { + options { + timeout(time: 2, unit: 'HOURS') + } + when { + beforeAgent true + branch pattern: '((dev|long)-)?python-.*', comparator: 'REGEXP' + allOf { + expression { params.ADHOC_BUILD_TYPE == 'BUILD' } + expression { params.CI_SCHEDULE == 'DO-NOT-CHANGE-THIS-SELECTION' } + not { buildingTag() } + } + } + + matrix { + axes { + axis { + name 'CASSANDRA_VERSION' + values '3.11', // Current Apache Cassandra + 'dse-6.8' // Current DataStax Enterprise + } + axis { + name 'PYTHON_VERSION' + values '2.7.18', '3.5.9' + } + axis { + name 'CYTHON_ENABLED' + values 'False' + } + } + + agent { + label "${OS_VERSION}" + } + + stages { + stage('Initialize-Environment') { + steps { + initializeEnvironment() + script { + if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { + notifySlack() + } + } + } + } + stage('Describe-Build') { + steps { + describePerCommitStage() + } + } + stage('Install-Driver-And-Compile-Extensions') { + steps { + installDriverAndCompileExtensions() + } + } + stage('Execute-Tests') { + steps { + + script { + if (env.BRANCH_NAME ==~ /long-python.*/) { + withEnv(["EXECUTE_LONG_TESTS=True"]) { + executeTests() + } + } + else { + executeTests() + } + } + } + post { + always { + junit testResults: '*_results.xml' + } + } + } + } + } + post { + always { + node('master') { + submitCIMetrics('commit') + } + } + aborted { + notifySlack('aborted') + } + success { + notifySlack('completed') + } + unstable { + notifySlack('unstable') + } + failure { + notifySlack('FAILED') + } + } + } + + stage ('Scheduled-Testing') { + when { + beforeAgent true + allOf { + expression { params.ADHOC_BUILD_TYPE == 'BUILD' } + expression { params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION' } + not { buildingTag() } + } + } + matrix { + axes { + axis { + name 'CASSANDRA_VERSION' + values '2.1', // Legacy Apache Cassandra + '2.2', // Legacy Apache Cassandra + '3.0', // Previous Apache Cassandra + '3.11', // Current Apache Cassandra + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7' // Current DataStax Enterprise + } + axis { + name 'CYTHON_ENABLED' + values 'True', 'False' + } + } + when { + beforeAgent true + allOf { + expression { return params.CI_SCHEDULE_SERVER_VERSION.split(' ').any { it =~ /(ALL|${env.CASSANDRA_VERSION})/ } } + } + } + + environment { + PYTHON_VERSION = "${params.CI_SCHEDULE_PYTHON_VERSION}" + } + agent { + label "${OS_VERSION}" + } + + stages { + stage('Initialize-Environment') { + steps { + initializeEnvironment() + script { + if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { + notifySlack() + } + } + } + } + stage('Describe-Build') { + steps { + describeScheduledTestingStage() + } + } + stage('Install-Driver-And-Compile-Extensions') { + steps { + installDriverAndCompileExtensions() + } + } + stage('Execute-Tests') { + steps { + executeTests() + } + post { + always { + junit testResults: '*_results.xml' + } + } + } + } + } + post { + aborted { + notifySlack('aborted') + } + success { + notifySlack('completed') + } + unstable { + notifySlack('unstable') + } + failure { + notifySlack('FAILED') + } + } + } + + + stage('Adhoc-Testing') { + when { + beforeAgent true + allOf { + expression { params.ADHOC_BUILD_TYPE == 'BUILD-AND-EXECUTE-TESTS' } + not { buildingTag() } + } + } + + environment { + CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" + PYTHON_VERSION = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" + } + + matrix { + axes { + axis { + name 'CASSANDRA_VERSION' + values '2.1', // Legacy Apache Cassandra + '2.2', // Legacy Apache Cassandra + '3.0', // Previous Apache Cassandra + '3.11', // Current Apache Cassandra + '4.0', // Development Apache Cassandra + 'dse-5.0', // Long Term Support DataStax Enterprise + 'dse-5.1', // Legacy DataStax Enterprise + 'dse-6.0', // Previous DataStax Enterprise + 'dse-6.7', // Current DataStax Enterprise + 'dse-6.8' // Development DataStax Enterprise + } + } + when { + beforeAgent true + allOf { + expression { params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION ==~ /(ALL|${env.CASSANDRA_VERSION})/ } + } + } + + agent { + label "${OS_VERSION}" + } + + stages { + stage('Describe-Build') { + steps { + describeAdhocTestingStage() + } + } + stage('Initialize-Environment') { + steps { + initializeEnvironment() + } + } + stage('Install-Driver-And-Compile-Extensions') { + steps { + installDriverAndCompileExtensions() + } + } + stage('Execute-Tests') { + steps { + executeTests() + } + post { + always { + junit testResults: '*_results.xml' + } + } + } + } + } + } + } +} From 04d1f30c292c70c6052a299ba477add9c69c1d76 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 4 Aug 2020 08:59:39 -0400 Subject: [PATCH 096/211] Add Duration type attributes in docs --- cassandra/util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cassandra/util.py b/cassandra/util.py index ead58c82f6..f896ff4f86 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -1541,8 +1541,11 @@ class Duration(object): """ months = 0 + "" days = 0 + "" nanoseconds = 0 + "" def __init__(self, months=0, days=0, nanoseconds=0): self.months = months From 1d6ac40696b7eb522e116144c830cf8384b8547c Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Tue, 4 Aug 2020 14:25:05 -0400 Subject: [PATCH 097/211] disabling schedules temporarily --- Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 61b2f3ad5c..890cc425a6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -387,7 +387,7 @@ def describeBuild(buildContext) { } } -def scheduleTriggerJobName = "drivers/python/oss/master" +def scheduleTriggerJobName = "drivers/python/oss/master/disabled" pipeline { agent none @@ -641,11 +641,11 @@ pipeline { stages { stage ('Build and Test') { - agent { - // If I removed this agent block, GIT_URL and GIT_COMMIT aren't set. - // However, this trigger an additional checkout - label "master" - } + agent none + // // If I removed this agent block, GIT_URL and GIT_COMMIT aren't set. + // // However, this trigger an additional checkout + // label none + // } when { beforeAgent true allOf { From 01583bb1727016c13bb897f1a79211906b41e0d8 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 14 Sep 2020 14:27:00 -0400 Subject: [PATCH 098/211] Drop python 3.4 support --- CHANGELOG.rst | 8 ++++++++ Jenkinsfile | 14 +++++++------- README-dev.rst | 2 +- README.rst | 2 +- build.yaml.bak | 25 +++++-------------------- docs/index.rst | 2 +- docs/installation.rst | 2 +- setup.py | 1 - test-requirements.txt | 2 +- tox.ini | 2 +- 10 files changed, 26 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 53a5e22436..1579148446 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +3.25.0 +====== +Not released + +Others +------ +* Drop Python 3.4 support (PYTHON-1220) + 3.24.0 ====== June 18, 2020 diff --git a/Jenkinsfile b/Jenkinsfile index 890cc425a6..abb6092758 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -33,7 +33,7 @@ slack = new Slack() matrices = [ "FULL": [ "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0', 'dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], - "RUNTIME": ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] ], "DEVELOP": [ @@ -43,12 +43,12 @@ matrices = [ ], "CASSANDRA": [ "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0'], - "RUNTIME": ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] ], "DSE": [ "SERVER": ['dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], - "RUNTIME": ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] ] ] @@ -482,7 +482,7 @@ pipeline { ''') choice( name: 'PYTHON_VERSION', - choices: ['DEFAULT', '2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + choices: ['DEFAULT', '2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], description: 'Python runtime version. Default to the build context.') choice( name: 'SERVER_VERSION', @@ -641,11 +641,11 @@ pipeline { stages { stage ('Build and Test') { - agent none + agent { // // If I removed this agent block, GIT_URL and GIT_COMMIT aren't set. // // However, this trigger an additional checkout - // label none - // } + label "master" + } when { beforeAgent true allOf { diff --git a/README-dev.rst b/README-dev.rst index 8294d4efb8..85a722c3b7 100644 --- a/README-dev.rst +++ b/README-dev.rst @@ -176,7 +176,7 @@ Use tee to capture logs and see them on your terminal:: Testing Multiple Python Versions -------------------------------- -If you want to test all of python 2.7, 3.4, 3.5, 3.6, 3.7, and pypy, use tox (this is what +If you want to test all of python 2.7, 3.5, 3.6, 3.7, and pypy, use tox (this is what TravisCI runs):: tox diff --git a/README.rst b/README.rst index 358f588d32..7c5bf1eee1 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ DataStax Driver for Apache Cassandra A modern, `feature-rich `_ and highly-tunable Python client library for Apache Cassandra (2.1+) and DataStax Enterprise (4.7+) using exclusively Cassandra's binary protocol and Cassandra Query Language v3. -The driver supports Python 2.7, 3.4, 3.5, 3.6, 3.7 and 3.8. +The driver supports Python 2.7, 3.5, 3.6, 3.7 and 3.8. **Note:** DataStax products do not support big-endian systems. diff --git a/build.yaml.bak b/build.yaml.bak index bd40809ef3..100c86558a 100644 --- a/build.yaml.bak +++ b/build.yaml.bak @@ -8,7 +8,7 @@ schedules: EVENT_LOOP_MANAGER='libev' matrix: exclude: - - python: [3.4, 3.6, 3.7, 3.8] + - python: [3.6, 3.7, 3.8] - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] commit_long_test: @@ -20,7 +20,7 @@ schedules: EVENT_LOOP_MANAGER='libev' matrix: exclude: - - python: [3.4, 3.6, 3.7, 3.8] + - python: [3.6, 3.7, 3.8] - cassandra: ['2.1', '3.0', 'test-dse'] commit_branches: @@ -33,7 +33,7 @@ schedules: EXCLUDE_LONG=1 matrix: exclude: - - python: [3.4, 3.6, 3.7, 3.8] + - python: [3.6, 3.7, 3.8] - cassandra: ['2.1', '3.0', 'test-dse'] commit_branches_dev: @@ -46,7 +46,7 @@ schedules: EXCLUDE_LONG=1 matrix: exclude: - - python: [2.7, 3.4, 3.7, 3.6, 3.8] + - python: [2.7, 3.7, 3.6, 3.8] - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.8'] release_test: @@ -77,9 +77,6 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='gevent' JUST_EVENT_LOOP=1 - matrix: - exclude: - - python: [3.4] weekly_eventlet: schedule: 0 18 * * 6 @@ -89,9 +86,6 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='eventlet' JUST_EVENT_LOOP=1 - matrix: - exclude: - - python: [3.4] weekly_asyncio: schedule: 0 22 * * 6 @@ -113,9 +107,6 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='asyncore' JUST_EVENT_LOOP=1 - matrix: - exclude: - - python: [3.4] weekly_twister: schedule: 0 14 * * 7 @@ -125,9 +116,6 @@ schedules: env_vars: | EVENT_LOOP_MANAGER='twisted' JUST_EVENT_LOOP=1 - matrix: - exclude: - - python: [3.4] upgrade_tests: schedule: adhoc @@ -138,12 +126,11 @@ schedules: JUST_UPGRADE=True matrix: exclude: - - python: [3.4, 3.6, 3.7, 3.8] + - python: [3.6, 3.7, 3.8] - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse'] python: - 2.7 - - 3.4 - 3.5 - 3.6 - 3.7 @@ -186,8 +173,6 @@ build: pip install git+ssh://git@github.com/riptano/ccm-private.git@cassandra-7544-native-ports-with-dse-fix - # Remove this pyyaml installation when removing Python 3.4 support - pip install PyYAML==5.2 #pip install $HOME/ccm if [ -n "$CCM_IS_DSE" ]; then diff --git a/docs/index.rst b/docs/index.rst index 4cdd637e0a..978faa17c6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ A Python client driver for `Apache Cassandra® `_. This driver works exclusively with the Cassandra Query Language v3 (CQL3) and Cassandra's native protocol. Cassandra 2.1+ is supported, including DSE 4.7+. -The driver supports Python 2.7, 3.4, 3.5, 3.6, 3.7 and 3.8. +The driver supports Python 2.7, 3.5, 3.6, 3.7 and 3.8. This driver is open source under the `Apache v2 License `_. diff --git a/docs/installation.rst b/docs/installation.rst index b381425302..3855383602 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -3,7 +3,7 @@ Installation Supported Platforms ------------------- -Python 2.7, 3.4, 3.5, 3.6, 3.7 and 3.8 are supported. Both CPython (the standard Python +Python 2.7, 3.5, 3.6, 3.7 and 3.8 are supported. Both CPython (the standard Python implementation) and `PyPy `_ are supported and tested. Linux, OSX, and Windows are supported. diff --git a/setup.py b/setup.py index 745d05dfb3..aaaa1b4d2d 100644 --- a/setup.py +++ b/setup.py @@ -443,7 +443,6 @@ def run_setup(extensions): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', diff --git a/test-requirements.txt b/test-requirements.txt index f150a73247..9e62bfdee8 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -15,5 +15,5 @@ cython>=0.20,<0.30 packaging backports.ssl_match_hostname; python_version < '2.7.9' futurist; python_version >= '3.7' -asynctest; python_version > '3.4' +asynctest; python_version >= '3.5' ipaddress; python_version < '3.3.0' diff --git a/tox.ini b/tox.ini index fd50a6c1d6..efb610ac09 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,34,35,36,37,38},pypy +envlist = py{27,35,36,37,38},pypy [base] deps = nose From d96a44b8efd48f7da6fb86941a50eac1b2ab26fc Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 11 Sep 2020 15:49:26 -0400 Subject: [PATCH 099/211] Ensure the driver can connect when invalid peer hosts are in system.peers --- CHANGELOG.rst | 4 ++ cassandra/cluster.py | 17 +++++-- tests/unit/test_control_connection.py | 73 ++++++++++++++++++++------- 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1579148446..f88278a22e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,10 @@ ====== Not released +Features +-------- +* Ensure the driver can connect when invalid peer hosts are in system.peers (PYTHON-1260) + Others ------ * Drop Python 3.4 support (PYTHON-1220) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index c4d6de124d..ec91ce257a 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -3788,12 +3788,14 @@ def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, # any new nodes, so we need this additional check. (See PYTHON-90) should_rebuild_token_map = force_token_rebuild or self._cluster.metadata.partitioner is None for row in peers_result: + if not self._is_valid_peer(row): + log.warning( + "Found an invalid row for peer (%s). Ignoring host." % + _NodeInfo.get_broadcast_rpc_address(row)) + continue + endpoint = self._cluster.endpoint_factory.create(row) - tokens = row.get("tokens", None) - if 'tokens' in row and not tokens: # it was selected, but empty - log.warning("Excluding host (%s) with no tokens in system.peers table of %s." % (endpoint, connection.endpoint)) - continue if endpoint in found_hosts: log.warning("Found multiple hosts with the same endpoint (%s). Excluding peer %s", endpoint, row.get("peer")) continue @@ -3820,6 +3822,7 @@ def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, host.dse_workload = row.get("workload") host.dse_workloads = row.get("workloads") + tokens = row.get("tokens", None) if partitioner and tokens and self._token_meta_enabled: token_map[host] = tokens @@ -3834,6 +3837,12 @@ def _refresh_node_list_and_token_map(self, connection, preloaded_results=None, log.debug("[control connection] Rebuilding token map due to topology changes") self._cluster.metadata.rebuild_token_map(partitioner, token_map) + @staticmethod + def _is_valid_peer(row): + return bool(_NodeInfo.get_broadcast_rpc_address(row) and row.get("host_id") and + row.get("data_center") and row.get("rack") and + ('tokens' not in row or row.get('tokens'))) + def _update_location_info(self, host, datacenter, rack): if host.datacenter == datacenter and host.rack == rack: return False diff --git a/tests/unit/test_control_connection.py b/tests/unit/test_control_connection.py index 3e75a0af27..efad1ca5c9 100644 --- a/tests/unit/test_control_connection.py +++ b/tests/unit/test_control_connection.py @@ -127,15 +127,15 @@ def __init__(self): ] self.peer_results = [ - ["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens"], - [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"]], - ["192.168.1.2", "10.0.0.2", "a", "dc1", "rack1", ["2", "102", "202"]]] + ["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens", "host_id"], + [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"], "uuid1"], + ["192.168.1.2", "10.0.0.2", "a", "dc1", "rack1", ["2", "102", "202"], "uuid2"]] ] self.peer_results_v2 = [ - ["native_address", "native_port", "peer", "peer_port", "schema_version", "data_center", "rack", "tokens"], - [["192.168.1.1", 9042, "10.0.0.1", 7042, "a", "dc1", "rack1", ["1", "101", "201"]], - ["192.168.1.2", 9042, "10.0.0.2", 7040, "a", "dc1", "rack1", ["2", "102", "202"]]] + ["native_address", "native_port", "peer", "peer_port", "schema_version", "data_center", "rack", "tokens", "host_id"], + [["192.168.1.1", 9042, "10.0.0.1", 7042, "a", "dc1", "rack1", ["1", "101", "201"], "uuid1"], + ["192.168.1.2", 9042, "10.0.0.2", 7040, "a", "dc1", "rack1", ["2", "102", "202"], "uuid2"]] ] self.wait_for_responses = Mock(return_value=_node_meta_results(self.local_results, self.peer_results)) @@ -155,18 +155,18 @@ def sleep(self, amount): class ControlConnectionTest(unittest.TestCase): _matching_schema_preloaded_results = _node_meta_results( - local_results=(["schema_version", "cluster_name", "data_center", "rack", "partitioner", "release_version", "tokens"], - [["a", "foocluster", "dc1", "rack1", "Murmur3Partitioner", "2.2.0", ["0", "100", "200"]]]), - peer_results=(["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens"], - [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"]], - ["192.168.1.2", "10.0.0.2", "a", "dc1", "rack1", ["2", "102", "202"]]])) + local_results=(["schema_version", "cluster_name", "data_center", "rack", "partitioner", "release_version", "tokens", "host_id"], + [["a", "foocluster", "dc1", "rack1", "Murmur3Partitioner", "2.2.0", ["0", "100", "200"], "uuid1"]]), + peer_results=(["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens", "host_id"], + [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"], "uuid2"], + ["192.168.1.2", "10.0.0.2", "a", "dc1", "rack1", ["2", "102", "202"], "uuid3"]])) _nonmatching_schema_preloaded_results = _node_meta_results( - local_results=(["schema_version", "cluster_name", "data_center", "rack", "partitioner", "release_version", "tokens"], - [["a", "foocluster", "dc1", "rack1", "Murmur3Partitioner", "2.2.0", ["0", "100", "200"]]]), - peer_results=(["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens"], - [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"]], - ["192.168.1.2", "10.0.0.2", "b", "dc1", "rack1", ["2", "102", "202"]]])) + local_results=(["schema_version", "cluster_name", "data_center", "rack", "partitioner", "release_version", "tokens", "host_id"], + [["a", "foocluster", "dc1", "rack1", "Murmur3Partitioner", "2.2.0", ["0", "100", "200"], "uuid1"]]), + peer_results=(["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens", "host_id"], + [["192.168.1.1", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"], "uuid2"], + ["192.168.1.2", "10.0.0.2", "b", "dc1", "rack1", ["2", "102", "202"], "uuid3"]])) def setUp(self): self.cluster = MockCluster() @@ -275,6 +275,40 @@ def test_refresh_nodes_and_tokens(self): self.assertEqual(self.connection.wait_for_responses.call_count, 1) + def test_refresh_nodes_and_tokens_with_invalid_peers(self): + def refresh_and_validate_added_hosts(): + self.connection.wait_for_responses = Mock(return_value=_node_meta_results( + self.connection.local_results, self.connection.peer_results)) + self.control_connection.refresh_node_list_and_token_map() + self.assertEqual(1, len(self.cluster.added_hosts)) # only one valid peer found + + # peersV1 + del self.connection.peer_results[:] + self.connection.peer_results.extend([ + ["rpc_address", "peer", "schema_version", "data_center", "rack", "tokens", "host_id"], + [["192.168.1.3", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"], 'uuid5'], + # all others are invalid + [None, None, "a", "dc1", "rack1", ["1", "101", "201"], 'uuid1'], + ["192.168.1.7", "10.0.0.1", "a", None, "rack1", ["1", "101", "201"], 'uuid2'], + ["192.168.1.6", "10.0.0.1", "a", "dc1", None, ["1", "101", "201"], 'uuid3'], + ["192.168.1.5", "10.0.0.1", "a", "dc1", "rack1", None, 'uuid4'], + ["192.168.1.4", "10.0.0.1", "a", "dc1", "rack1", ["1", "101", "201"], None]]]) + refresh_and_validate_added_hosts() + + # peersV2 + del self.cluster.added_hosts[:] + del self.connection.peer_results[:] + self.connection.peer_results.extend([ + ["native_address", "native_port", "peer", "peer_port", "schema_version", "data_center", "rack", "tokens", "host_id"], + [["192.168.1.4", 9042, "10.0.0.1", 7042, "a", "dc1", "rack1", ["1", "101", "201"], "uuid1"], + # all others are invalid + [None, 9042, None, 7040, "a", "dc1", "rack1", ["2", "102", "202"], "uuid2"], + ["192.168.1.5", 9042, "10.0.0.2", 7040, "a", None, "rack1", ["2", "102", "202"], "uuid2"], + ["192.168.1.5", 9042, "10.0.0.2", 7040, "a", "dc1", None, ["2", "102", "202"], "uuid2"], + ["192.168.1.5", 9042, "10.0.0.2", 7040, "a", "dc1", "rack1", None, "uuid2"], + ["192.168.1.5", 9042, "10.0.0.2", 7040, "a", "dc1", "rack1", ["2", "102", "202"], None]]]) + refresh_and_validate_added_hosts() + def test_refresh_nodes_and_tokens_uses_preloaded_results_if_given(self): """ refresh_nodes_and_tokens uses preloaded results if given for shared table queries @@ -311,7 +345,7 @@ def test_refresh_nodes_and_tokens_no_partitioner(self): def test_refresh_nodes_and_tokens_add_host(self): self.connection.peer_results[1].append( - ["192.168.1.3", "10.0.0.3", "a", "dc1", "rack1", ["3", "103", "203"]] + ["192.168.1.3", "10.0.0.3", "a", "dc1", "rack1", ["3", "103", "203"], "uuid3"] ) self.cluster.scheduler.schedule = lambda delay, f, *args, **kwargs: f(*args, **kwargs) self.control_connection.refresh_node_list_and_token_map() @@ -319,6 +353,7 @@ def test_refresh_nodes_and_tokens_add_host(self): self.assertEqual(self.cluster.added_hosts[0].address, "192.168.1.3") self.assertEqual(self.cluster.added_hosts[0].datacenter, "dc1") self.assertEqual(self.cluster.added_hosts[0].rack, "rack1") + self.assertEqual(self.cluster.added_hosts[0].host_id, "uuid3") def test_refresh_nodes_and_tokens_remove_host(self): del self.connection.peer_results[1][1] @@ -482,7 +517,7 @@ def test_refresh_nodes_and_tokens_add_host_detects_port(self): del self.connection.peer_results[:] self.connection.peer_results.extend(self.connection.peer_results_v2) self.connection.peer_results[1].append( - ["192.168.1.3", 555, "10.0.0.3", 666, "a", "dc1", "rack1", ["3", "103", "203"]] + ["192.168.1.3", 555, "10.0.0.3", 666, "a", "dc1", "rack1", ["3", "103", "203"], "uuid3"] ) self.connection.wait_for_responses = Mock(return_value=_node_meta_results( self.connection.local_results, self.connection.peer_results)) @@ -502,7 +537,7 @@ def test_refresh_nodes_and_tokens_add_host_detects_invalid_port(self): del self.connection.peer_results[:] self.connection.peer_results.extend(self.connection.peer_results_v2) self.connection.peer_results[1].append( - ["192.168.1.3", -1, "10.0.0.3", 0, "a", "dc1", "rack1", ["3", "103", "203"]] + ["192.168.1.3", -1, "10.0.0.3", 0, "a", "dc1", "rack1", ["3", "103", "203"], "uuid3"] ) self.connection.wait_for_responses = Mock(return_value=_node_meta_results( self.connection.local_results, self.connection.peer_results)) From dc3f2f8074ee08bd44bafd5604c7fab06566e9c6 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 14 Oct 2020 14:26:15 -0400 Subject: [PATCH 100/211] Fix asyncore race condition cause logging exception on shutdown --- CHANGELOG.rst | 4 ++++ cassandra/io/asyncorereactor.py | 23 ++++++++--------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f88278a22e..a3217d6cda 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,10 @@ Features -------- * Ensure the driver can connect when invalid peer hosts are in system.peers (PYTHON-1260) +Bug Fixes +--------- +* Asyncore race condition cause logging exception on shutdown (PYTHON-1266) + Others ------ * Drop Python 3.4 support (PYTHON-1220) diff --git a/cassandra/io/asyncorereactor.py b/cassandra/io/asyncorereactor.py index e07aab4697..681552e589 100644 --- a/cassandra/io/asyncorereactor.py +++ b/cassandra/io/asyncorereactor.py @@ -36,20 +36,7 @@ from cassandra.connection import Connection, ConnectionShutdown, NONBLOCKING, Timer, TimerManager -# TODO: Remove when Python 2 is removed -class LogWrapper(object): - """ PYTHON-1228. If our logger has disappeared, there's nothing we can do, so just execute nothing """ - def __init__(self): - self._log = logging.getLogger(__name__) - - def __getattr__(self, name): - try: - return getattr(self._log, name) - except: - return lambda *args, **kwargs: None - - -log = LogWrapper() +log = logging.getLogger(__name__) _dispatcher_map = {} @@ -262,7 +249,13 @@ def _run_loop(self): self._loop_dispatcher.loop(self.timer_resolution) self._timers.service_timeouts() except Exception: - log.debug("Asyncore event loop stopped unexepectedly", exc_info=True) + try: + log.debug("Asyncore event loop stopped unexpectedly", exc_info=True) + except Exception: + # TODO: Remove when Python 2 support is removed + # PYTHON-1266. If our logger has disappeared, there's nothing we + # can do, so just log nothing. + pass break self._started = False From 501c134b4b94df47ff8588c79d98a4d3b3c25244 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 6 Aug 2020 10:38:12 -0400 Subject: [PATCH 101/211] initial support of protocol v5 checksumming --- cassandra/__init__.py | 8 ++ cassandra/connection.py | 91 +++++++++++++--- cassandra/marshal.py | 1 + cassandra/protocol.py | 35 ++++++- cassandra/segment.py | 222 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 342 insertions(+), 15 deletions(-) create mode 100644 cassandra/segment.py diff --git a/cassandra/__init__.py b/cassandra/__init__.py index f2bf696035..47fd2f6bec 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -235,6 +235,14 @@ def has_continuous_paging_support(cls, version): def has_continuous_paging_next_pages(cls, version): return version >= cls.DSE_V2 + @classmethod + def has_checksumming_support(cls, version): + return cls.V5 <= version < cls.DSE_V1 + + @classmethod + def has_cql_frame_compression_support(cls, version): + return version != cls.V5 + class WriteType(object): """ diff --git a/cassandra/connection.py b/cassandra/connection.py index 6ce3e44a30..13ffa5ecde 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -42,11 +42,15 @@ AuthResponseMessage, AuthChallengeMessage, AuthSuccessMessage, ProtocolException, RegisterMessage, ReviseRequestMessage) +from cassandra.segment import SegmentCodec, CrcException from cassandra.util import OrderedDict log = logging.getLogger(__name__) +segment_codec_no_compression = SegmentCodec() +segment_codec_lz4 = None + # We use an ordered dictionary and specifically add lz4 before # snappy so that lz4 will be preferred. Changing the order of this # will change the compression preferences for the driver. @@ -88,6 +92,7 @@ def lz4_decompress(byts): return lz4_block.decompress(byts[3::-1] + byts[4:]) locally_supported_compressions['lz4'] = (lz4_compress, lz4_decompress) + segment_codec_lz4 = SegmentCodec(lz4_compress, lz4_decompress) try: import snappy @@ -426,6 +431,10 @@ class ProtocolError(Exception): pass +class CrcMismatchException(ConnectionException): + pass + + class ContinuousPagingState(object): """ A class for specifying continuous paging state, only supported starting with DSE_V2. @@ -657,6 +666,7 @@ class Connection(object): allow_beta_protocol_version = False _iobuf = None + _frame_iobuf = None _current_frame = None _socket = None @@ -667,6 +677,8 @@ class Connection(object): _check_hostname = False _product_type = None + _is_checksumming_enabled = False + def __init__(self, host='127.0.0.1', port=9042, authenticator=None, ssl_options=None, sockopts=None, compression=True, cql_version=None, protocol_version=ProtocolVersion.MAX_SUPPORTED, is_control_connection=False, @@ -691,6 +703,7 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, self._push_watchers = defaultdict(set) self._requests = {} self._iobuf = io.BytesIO() + self._frame_iobuf = io.BytesIO() self._continuous_paging_sessions = {} self._socket_writable = True @@ -933,7 +946,14 @@ def send_msg(self, msg, request_id, cb, encoder=ProtocolHandler.encode_message, # queue the decoder function with the request # this allows us to inject custom functions per request to encode, decode messages self._requests[request_id] = (cb, decoder, result_metadata) - msg = encoder(msg, request_id, self.protocol_version, compressor=self.compressor, allow_beta_protocol_version=self.allow_beta_protocol_version) + msg = encoder(msg, request_id, self.protocol_version, compressor=self.compressor, + allow_beta_protocol_version=self.allow_beta_protocol_version) + + if self._is_checksumming_enabled: + buffer = io.BytesIO() + self._segment_codec.encode(buffer, msg) + msg = buffer.getvalue() + self.push(msg) return len(msg) @@ -1012,7 +1032,7 @@ def control_conn_disposed(self): @defunct_on_error def _read_frame_header(self): - buf = self._iobuf.getvalue() + buf = self._frame_iobuf.getvalue() pos = len(buf) if pos: version = int_from_buf_item(buf[0]) & PROTOCOL_VERSION_MASK @@ -1029,26 +1049,57 @@ def _read_frame_header(self): return pos def _reset_frame(self): - self._iobuf = io.BytesIO(self._iobuf.read()) - self._iobuf.seek(0, 2) # io.SEEK_END == 2 (constant not present in 2.6) + self._frame_iobuf = io.BytesIO(self._frame_iobuf.read()) + self._frame_iobuf.seek(0, 2) # 2 == SEEK_END self._current_frame = None + def _reset_io_buffer(self): + self._iobuf = io.BytesIO(self._iobuf.read()) + self._iobuf.seek(0, 2) # 2 == SEEK_END + + @defunct_on_error + def _process_segment_buffer(self): + if self._iobuf.tell(): + try: + segment_header = self._segment_codec.decode_header(self._iobuf) + if segment_header: + segment = self._segment_codec.decode(self._iobuf, segment_header) + if segment: + self._frame_iobuf.write(segment.payload) + except CrcException as exc: + # re-raise an exception that inherits from ConnectionException + raise CrcMismatchException(str(exc), self.endpoint) + def process_io_buffer(self): while True: + if self._is_checksumming_enabled: + self._process_segment_buffer() + else: + # TODO, try to avoid having 2 io buffers when protocol != V5 + self._frame_iobuf.write(self._iobuf.getvalue()) + + self._reset_io_buffer() + if not self._current_frame: pos = self._read_frame_header() else: - pos = self._iobuf.tell() + pos = self._frame_iobuf.tell() if not self._current_frame or pos < self._current_frame.end_pos: + if self._is_checksumming_enabled and self._iobuf.tell(): + # TODO keep the current segment frame? + # We have a multi-segments message and we need to read more data to complete + # the current cql frame + continue + # we don't have a complete header yet or we # already saw a header, but we don't have a # complete message yet return else: frame = self._current_frame - self._iobuf.seek(frame.body_offset) - msg = self._iobuf.read(frame.end_pos - frame.body_offset) + self._frame_iobuf.seek(frame.body_offset) + msg = self._frame_iobuf.read(frame.end_pos - frame.body_offset) self.process_msg(frame, msg) self._reset_frame() @@ -1185,11 +1236,17 @@ def _handle_options_response(self, options_response): compression_type = k break - # set the decompressor here, but set the compressor only after - # a successful Ready message - self._compression_type = compression_type - self._compressor, self.decompressor = \ - locally_supported_compressions[compression_type] + if (compression_type == 'snappy' and + ProtocolVersion.has_checksumming_support(self.protocol_version)): + log.debug("Snappy compression is not supported with protocol version %s and checksumming.", + self.protocol_version) + compression_type = None + else: + # set the decompressor here, but set the compressor only after + # a successful Ready message + self._compression_type = compression_type + self._compressor, self.decompressor = \ + locally_supported_compressions[compression_type] self._send_startup_message(compression_type, no_compact=self.no_compact) @@ -1210,6 +1267,7 @@ def _send_startup_message(self, compression=None, no_compact=False): def _handle_startup_response(self, startup_response, did_authenticate=False): if self.is_defunct: return + if isinstance(startup_response, ReadyMessage): if self.authenticator: log.warning("An authentication challenge was not sent, " @@ -1220,6 +1278,15 @@ def _handle_startup_response(self, startup_response, did_authenticate=False): log.debug("Got ReadyMessage on new connection (%s) from %s", id(self), self.endpoint) if self._compressor: self.compressor = self._compressor + + if ProtocolVersion.has_checksumming_support(self.protocol_version): + self._is_checksumming_enabled = True + if self.compressor: + self._segment_codec = segment_codec_lz4 + else: + self._segment_codec = segment_codec_no_compression + log.debug("Enabling protocol checksumming on connection (%s).", id(self)) + self.connected_event.set() elif isinstance(startup_response, AuthenticateMessage): log.debug("Got AuthenticateMessage on new connection (%s) from %s: %s", diff --git a/cassandra/marshal.py b/cassandra/marshal.py index 7533ebd307..43cb627b08 100644 --- a/cassandra/marshal.py +++ b/cassandra/marshal.py @@ -28,6 +28,7 @@ def _make_packer(format_string): int8_pack, int8_unpack = _make_packer('>b') uint64_pack, uint64_unpack = _make_packer('>Q') uint32_pack, uint32_unpack = _make_packer('>I') +uint32_le_pack, uint32_le_unpack = _make_packer('H') uint8_pack, uint8_unpack = _make_packer('>B') float_pack, float_unpack = _make_packer('>f') diff --git a/cassandra/protocol.py b/cassandra/protocol.py index eac9ebb8b5..f3b5189f13 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -31,7 +31,7 @@ UserAggregateDescriptor, SchemaTargetType) from cassandra.marshal import (int32_pack, int32_unpack, uint16_pack, uint16_unpack, uint8_pack, int8_unpack, uint64_pack, header_pack, - v3_header_pack, uint32_pack) + v3_header_pack, uint32_pack, uint32_le_unpack, uint32_le_pack) from cassandra.cqltypes import (AsciiType, BytesType, BooleanType, CounterColumnType, DateType, DecimalType, DoubleType, FloatType, Int32Type, @@ -1115,7 +1115,8 @@ def encode_message(cls, msg, stream_id, protocol_version, compressor, allow_beta msg.send_body(body, protocol_version) body = body.getvalue() - if compressor and len(body) > 0: + if (ProtocolVersion.has_cql_frame_compression_support(protocol_version) + and compressor and len(body) > 0): body = compressor(body) flags |= COMPRESSED_FLAG @@ -1155,7 +1156,8 @@ def decode_message(cls, protocol_version, user_type_map, stream_id, flags, opcod :param decompressor: optional decompression function to inflate the body :return: a message decoded from the body and frame attributes """ - if flags & COMPRESSED_FLAG: + if (ProtocolVersion.has_cql_frame_compression_support(protocol_version) and + flags & COMPRESSED_FLAG): if decompressor is None: raise RuntimeError("No de-compressor available for compressed frame!") body = decompressor(body) @@ -1271,6 +1273,33 @@ def read_int(f): return int32_unpack(f.read(4)) +def read_uint_le(f, size=4): + """ + Read a sequence of little endian bytes and return an unsigned integer. + """ + + if size == 4: + value = uint32_le_unpack(f.read(4)) + else: + value = 0 + for i in range(size): + value |= (read_byte(f) & 0xFF) << 8 * i + + return value + + +def write_uint_le(f, i, size=4): + """ + Write an unsigned integer on a sequence of little endian bytes. + """ + if size == 4: + f.write(uint32_le_pack(i)) + else: + for j in range(size): + shift = j * 8 + write_byte(f, i >> shift & 0xFF) + + def write_int(f, i): f.write(int32_pack(i)) diff --git a/cassandra/segment.py b/cassandra/segment.py new file mode 100644 index 0000000000..b62099473b --- /dev/null +++ b/cassandra/segment.py @@ -0,0 +1,222 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import zlib +import six + +from cassandra import DriverException +from cassandra.marshal import int32_pack +from cassandra.protocol import write_uint_le, read_uint_le + +CRC24_INIT = 0x875060 +CRC24_POLY = 0x1974F0B +CRC24_LENGTH = 3 +CRC32_LENGTH = 4 +CRC32_INITIAL = zlib.crc32(b"\xfa\x2d\x55\xca") + + +class CrcException(Exception): + """ + CRC mismatch error. This can happen when decoding a segment. + + TODO: here to avoid import cycles. In the next major, the exceptions + should be declared in a separated exceptions.py file. + """ + pass + + +def compute_crc24(data, length): + crc = CRC24_INIT + + for _ in range(length): + crc ^= (data & 0xff) << 16 + data >>= 8 + + for i in range(8): + crc <<= 1 + if crc & 0x1000000 != 0: + crc ^= CRC24_POLY + + return crc + + +def compute_crc32(data, value): + crc32 = zlib.crc32(data, value) + if six.PY2: + crc32 &= 0xffffffff + + return crc32 + + +class SegmentHeader(object): + + payload_length = None + uncompressed_payload_length = None + is_self_contained = None + + def __init__(self, payload_length, uncompressed_payload_length, is_self_contained): + self.payload_length = payload_length + self.uncompressed_payload_length = uncompressed_payload_length + self.is_self_contained = is_self_contained + + def length(self): + return SegmentCodec.UNCOMPRESSED_HEADER_LENGTH if self.uncompressed_payload_length < 0 \ + else SegmentCodec.COMPRESSED_HEADER_LENGTH + + +class Segment(object): + + MAX_PAYLOAD_LENGTH = 128 * 1024 - 1 + + payload = None + is_self_contained = None + + def __init__(self, payload, is_self_contained): + self.payload = payload + self.is_self_contained = is_self_contained + + +class SegmentCodec(object): + + COMPRESSED_HEADER_LENGTH = 5 + UNCOMPRESSED_HEADER_LENGTH = 3 + FLAG_OFFSET = 17 + + compressor = None + decompressor = None + + def __init__(self, compressor=None, decompressor=None): + self.compressor = compressor + self.decompressor = decompressor + + @property + def header_length(self): + return self.COMPRESSED_HEADER_LENGTH if self.compression \ + else self.UNCOMPRESSED_HEADER_LENGTH + + @property + def header_length_with_crc(self): + return self.header_length + CRC24_LENGTH + + @property + def compression(self): + return self.compressor and self.decompressor + + def compress(self, data): + # the uncompressed length is already encoded in the header, so + # we remove it here + return self.compressor(data)[4:] + + def decompress(self, encoded_data, uncompressed_length): + return self.decompressor(int32_pack(uncompressed_length) + encoded_data) + + def encode_header(self, buffer, payload_length, uncompressed_length, is_self_contained): + if payload_length > Segment.MAX_PAYLOAD_LENGTH: + raise DriverException('Payload length exceed Segment.MAX_PAYLOAD_LENGTH') + + header_data = payload_length + + flag_offset = self.FLAG_OFFSET + if self.compression: + header_data |= uncompressed_length << flag_offset + flag_offset += 17 + + if is_self_contained: + header_data |= 1 << flag_offset + + write_uint_le(buffer, header_data, self.header_length) + header_crc = compute_crc24(header_data, self.header_length) + write_uint_le(buffer, header_crc, CRC24_LENGTH) + + def _encode_segment(self, buffer, payload, is_self_contained): + uncompressed_payload = payload + uncompressed_payload_length = len(payload) + + if self.compression: + compressed_payload = self.compress(uncompressed_payload) + if len(compressed_payload) >= uncompressed_payload_length: + encoded_payload = uncompressed_payload + uncompressed_payload_length = 0 + else: + encoded_payload = compressed_payload + else: + encoded_payload = uncompressed_payload + + payload_length = len(encoded_payload) + self.encode_header(buffer, payload_length, uncompressed_payload_length, is_self_contained) + + payload_crc = compute_crc32(encoded_payload, CRC32_INITIAL) + buffer.write(encoded_payload) + write_uint_le(buffer, payload_crc) + + def encode(self, buffer, msg): + msg_length = len(msg) + + if msg_length > Segment.MAX_PAYLOAD_LENGTH: + payloads = [] + for i in range(0, msg_length, Segment.MAX_PAYLOAD_LENGTH): + payloads.append(msg[i:i + Segment.MAX_PAYLOAD_LENGTH]) + else: + payloads = [msg] + + is_self_contained = len(payloads) == 1 + for payload in payloads: + self._encode_segment(buffer, payload, is_self_contained) + + def decode_header(self, buffer): + if buffer.tell() < self.header_length_with_crc: + return + + buffer.seek(0) + header_data = read_uint_le(buffer, self.header_length) + + expected_header_crc = read_uint_le(buffer, CRC24_LENGTH) + actual_header_crc = compute_crc24(header_data, self.header_length) + if actual_header_crc != expected_header_crc: + raise CrcException('CRC mismatch on header {:x}. Received {:x}", computed {:x}.'.format( + header_data, expected_header_crc, actual_header_crc)) + + payload_length = header_data & Segment.MAX_PAYLOAD_LENGTH + header_data >>= 17 + + if self.compression: + uncompressed_payload_length = header_data & Segment.MAX_PAYLOAD_LENGTH + header_data >>= 17 + else: + uncompressed_payload_length = -1 + + is_self_contained = (header_data & 1) == 1 + + return SegmentHeader(payload_length, uncompressed_payload_length, is_self_contained) + + def decode(self, buffer, header): + marker = buffer.tell() + buffer_size = buffer.seek(0, 2) # 2 == SEEK_END + buffer.seek(marker) + if buffer_size < self.header_length_with_crc + header.payload_length + CRC32_LENGTH: + return + + encoded_payload = buffer.read(header.payload_length) + expected_payload_crc = read_uint_le(buffer) + + actual_payload_crc = compute_crc32(encoded_payload, CRC32_INITIAL) + if actual_payload_crc != expected_payload_crc: + raise CrcException('CRC mismatch on payload. Received {:x}", computed {:x}.'.format( + expected_payload_crc, actual_payload_crc)) + + payload = encoded_payload + if self.compression and header.uncompressed_payload_length > 0: + payload = self.decompress(encoded_payload, header.uncompressed_payload_length) + + return Segment(payload, header.is_self_contained) From 66de36063c49f748b6b13b24d8c441dededd32da Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 31 Aug 2020 14:29:24 -0400 Subject: [PATCH 102/211] Add test_segment.py + some cleanup --- cassandra/__init__.py | 4 - cassandra/connection.py | 32 +++--- cassandra/protocol.py | 5 +- cassandra/segment.py | 40 +++---- tests/unit/test_segment.py | 206 +++++++++++++++++++++++++++++++++++++ 5 files changed, 248 insertions(+), 39 deletions(-) create mode 100644 tests/unit/test_segment.py diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 47fd2f6bec..100df2df17 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -239,10 +239,6 @@ def has_continuous_paging_next_pages(cls, version): def has_checksumming_support(cls, version): return cls.V5 <= version < cls.DSE_V1 - @classmethod - def has_cql_frame_compression_support(cls, version): - return version != cls.V5 - class WriteType(object): """ diff --git a/cassandra/connection.py b/cassandra/connection.py index 13ffa5ecde..7054a389d0 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1059,13 +1059,17 @@ def _reset_io_buffer(self): @defunct_on_error def _process_segment_buffer(self): - if self._iobuf.tell(): + readable_bytes = self._iobuf.tell() + if readable_bytes >= self._segment_codec.header_length_with_crc: try: + self._iobuf.seek(0) segment_header = self._segment_codec.decode_header(self._iobuf) - if segment_header: + if readable_bytes >= segment_header.segment_length: segment = self._segment_codec.decode(self._iobuf, segment_header) - if segment: - self._frame_iobuf.write(segment.payload) + self._frame_iobuf.write(segment.payload) + else: + # not enough data to read the segment + self._iobuf.seek(0, 2) except CrcException as exc: # re-raise an exception that inherits from ConnectionException raise CrcMismatchException(str(exc), self.endpoint) @@ -1075,7 +1079,9 @@ def process_io_buffer(self): if self._is_checksumming_enabled: self._process_segment_buffer() else: - # TODO, try to avoid having 2 io buffers when protocol != V5 + # We should probably refactor the IO buffering stuff out of the Connection + # class to handle this in a better way. That would make the segment and frame + # decoding code clearer. self._frame_iobuf.write(self._iobuf.getvalue()) self._reset_io_buffer() @@ -1087,9 +1093,8 @@ def process_io_buffer(self): if not self._current_frame or pos < self._current_frame.end_pos: if self._is_checksumming_enabled and self._iobuf.tell(): - # TODO keep the current segment frame? - # We have a multi-segments message and we need to read more data to complete - # the current cql frame + # We have a multi-segments message and we need to read more + # data to complete the current cql frame continue # we don't have a complete header yet or we @@ -1236,10 +1241,12 @@ def _handle_options_response(self, options_response): compression_type = k break + # If snappy compression is selected with v5+checksumming, the connection + # will fail with OTO. Only lz4 is supported if (compression_type == 'snappy' and ProtocolVersion.has_checksumming_support(self.protocol_version)): - log.debug("Snappy compression is not supported with protocol version %s and checksumming.", - self.protocol_version) + log.debug("Snappy compression is not supported with protocol version %s and " + "checksumming. Consider installing lz4. Disabling compression.", self.protocol_version) compression_type = None else: # set the decompressor here, but set the compressor only after @@ -1281,10 +1288,7 @@ def _handle_startup_response(self, startup_response, did_authenticate=False): if ProtocolVersion.has_checksumming_support(self.protocol_version): self._is_checksumming_enabled = True - if self.compressor: - self._segment_codec = segment_codec_lz4 - else: - self._segment_codec = segment_codec_no_compression + self._segment_codec = segment_codec_lz4 if self.compressor else segment_codec_no_compression log.debug("Enabling protocol checksumming on connection (%s).", id(self)) self.connected_event.set() diff --git a/cassandra/protocol.py b/cassandra/protocol.py index f3b5189f13..c454824637 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -1115,7 +1115,8 @@ def encode_message(cls, msg, stream_id, protocol_version, compressor, allow_beta msg.send_body(body, protocol_version) body = body.getvalue() - if (ProtocolVersion.has_cql_frame_compression_support(protocol_version) + # With checksumming, the compression is done at the segment frame encoding + if (not ProtocolVersion.has_checksumming_support(protocol_version) and compressor and len(body) > 0): body = compressor(body) flags |= COMPRESSED_FLAG @@ -1156,7 +1157,7 @@ def decode_message(cls, protocol_version, user_type_map, stream_id, flags, opcod :param decompressor: optional decompression function to inflate the body :return: a message decoded from the body and frame attributes """ - if (ProtocolVersion.has_cql_frame_compression_support(protocol_version) and + if (not ProtocolVersion.has_checksumming_support(protocol_version) and flags & COMPRESSED_FLAG): if decompressor is None: raise RuntimeError("No de-compressor available for compressed frame!") diff --git a/cassandra/segment.py b/cassandra/segment.py index b62099473b..e3881c4402 100644 --- a/cassandra/segment.py +++ b/cassandra/segment.py @@ -28,10 +28,11 @@ class CrcException(Exception): """ - CRC mismatch error. This can happen when decoding a segment. + CRC mismatch error. - TODO: here to avoid import cycles. In the next major, the exceptions - should be declared in a separated exceptions.py file. + TODO: here to avoid import cycles with cassandra.connection. In the next + major, the exceptions should be declared in a separated exceptions.py + file. """ pass @@ -70,9 +71,14 @@ def __init__(self, payload_length, uncompressed_payload_length, is_self_containe self.uncompressed_payload_length = uncompressed_payload_length self.is_self_contained = is_self_contained - def length(self): - return SegmentCodec.UNCOMPRESSED_HEADER_LENGTH if self.uncompressed_payload_length < 0 \ + @property + def segment_length(self): + """ + Return the total length of the segment, including the CRC. + """ + hl = SegmentCodec.UNCOMPRESSED_HEADER_LENGTH if self.uncompressed_payload_length < 1 \ else SegmentCodec.COMPRESSED_HEADER_LENGTH + return hl + CRC24_LENGTH + self.payload_length + CRC32_LENGTH class Segment(object): @@ -107,7 +113,8 @@ def header_length(self): @property def header_length_with_crc(self): - return self.header_length + CRC24_LENGTH + return (self.COMPRESSED_HEADER_LENGTH if self.compression + else self.UNCOMPRESSED_HEADER_LENGTH) + CRC24_LENGTH @property def compression(self): @@ -135,11 +142,14 @@ def encode_header(self, buffer, payload_length, uncompressed_length, is_self_con if is_self_contained: header_data |= 1 << flag_offset - write_uint_le(buffer, header_data, self.header_length) + write_uint_le(buffer, header_data, size=self.header_length) header_crc = compute_crc24(header_data, self.header_length) - write_uint_le(buffer, header_crc, CRC24_LENGTH) + write_uint_le(buffer, header_crc, size=CRC24_LENGTH) def _encode_segment(self, buffer, payload, is_self_contained): + """ + Encode a message to a single segment. + """ uncompressed_payload = payload uncompressed_payload_length = len(payload) @@ -155,12 +165,14 @@ def _encode_segment(self, buffer, payload, is_self_contained): payload_length = len(encoded_payload) self.encode_header(buffer, payload_length, uncompressed_payload_length, is_self_contained) - payload_crc = compute_crc32(encoded_payload, CRC32_INITIAL) buffer.write(encoded_payload) write_uint_le(buffer, payload_crc) def encode(self, buffer, msg): + """ + Encode a message to one of more segments. + """ msg_length = len(msg) if msg_length > Segment.MAX_PAYLOAD_LENGTH: @@ -175,10 +187,6 @@ def encode(self, buffer, msg): self._encode_segment(buffer, payload, is_self_contained) def decode_header(self, buffer): - if buffer.tell() < self.header_length_with_crc: - return - - buffer.seek(0) header_data = read_uint_le(buffer, self.header_length) expected_header_crc = read_uint_le(buffer, CRC24_LENGTH) @@ -201,12 +209,6 @@ def decode_header(self, buffer): return SegmentHeader(payload_length, uncompressed_payload_length, is_self_contained) def decode(self, buffer, header): - marker = buffer.tell() - buffer_size = buffer.seek(0, 2) # 2 == SEEK_END - buffer.seek(marker) - if buffer_size < self.header_length_with_crc + header.payload_length + CRC32_LENGTH: - return - encoded_payload = buffer.read(header.payload_length) expected_payload_crc = read_uint_le(buffer) diff --git a/tests/unit/test_segment.py b/tests/unit/test_segment.py new file mode 100644 index 0000000000..bf661bb961 --- /dev/null +++ b/tests/unit/test_segment.py @@ -0,0 +1,206 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import unittest2 as unittest +except ImportError: + import unittest # noqa + +import io + +from cassandra import DriverException +from cassandra.segment import Segment, CrcException +from cassandra.connection import segment_codec_no_compression, segment_codec_lz4 + + +class SegmentCodecTest(unittest.TestCase): + + small_msg = b'b' * 50 + max_msg = b'b' * Segment.MAX_PAYLOAD_LENGTH + large_msg = b'b' * (Segment.MAX_PAYLOAD_LENGTH + 1) + + @staticmethod + def _header_to_bits(data): + # unpack a header to bits + # data should be the little endian bytes sequence + if len(data) > 6: # compressed + data = data[:5] + bits = ''.join(['{:08b}'.format(b) for b in reversed(data)]) + # return the compressed payload length, the uncompressed payload length, + # the self contained flag and the padding as bits + return bits[23:40] + bits[6:23] + bits[5:6] + bits[:5] + else: # uncompressed + data = data[:3] + bits = ''.join(['{:08b}'.format(b) for b in reversed(data)]) + # return the payload length, the self contained flag and + # the padding as bits + return bits[7:24] + bits[6:7] + bits[:6] + + def test_encode_uncompressed_header(self): + buffer = io.BytesIO() + segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) + self.assertEqual(buffer.tell(), 6) + self.assertEqual( + self._header_to_bits(buffer.getvalue()), + "00000000000110010" + "1" + "000000") + + def test_encode_compressed_header(self): + buffer = io.BytesIO() + compressed_length = len(segment_codec_lz4.compress(self.small_msg)) + segment_codec_lz4.encode_header(buffer, compressed_length, len(self.small_msg), True) + + self.assertEqual(buffer.tell(), 8) + self.assertEqual( + self._header_to_bits(buffer.getvalue()), + "{:017b}".format(compressed_length) + "00000000000110010" + "1" + "00000") + + def test_encode_uncompressed_header_with_max_payload(self): + buffer = io.BytesIO() + segment_codec_no_compression.encode_header(buffer, len(self.max_msg), -1, True) + self.assertEqual(buffer.tell(), 6) + self.assertEqual( + self._header_to_bits(buffer.getvalue()), + "11111111111111111" + "1" + "000000") + + def test_encode_header_fails_if_payload_too_big(self): + buffer = io.BytesIO() + for codec in [segment_codec_no_compression, segment_codec_lz4]: + with self.assertRaises(DriverException): + codec.encode_header(buffer, len(self.large_msg), -1, False) + + def test_encode_uncompressed_header_not_self_contained_msg(self): + buffer = io.BytesIO() + # simulate the first chunk with the max size + segment_codec_no_compression.encode_header(buffer, len(self.max_msg), -1, False) + self.assertEqual(buffer.tell(), 6) + self.assertEqual( + self._header_to_bits(buffer.getvalue()), + ("11111111111111111" + "0" # not self contained + "000000")) + + def test_encode_compressed_header_with_max_payload(self): + buffer = io.BytesIO() + compressed_length = len(segment_codec_lz4.compress(self.max_msg)) + segment_codec_lz4.encode_header(buffer, compressed_length, len(self.max_msg), True) + self.assertEqual(buffer.tell(), 8) + self.assertEqual( + self._header_to_bits(buffer.getvalue()), + "{:017b}".format(compressed_length) + "11111111111111111" + "1" + "00000") + + def test_encode_compressed_header_not_self_contained_msg(self): + buffer = io.BytesIO() + # simulate the first chunk with the max size + compressed_length = len(segment_codec_lz4.compress(self.max_msg)) + segment_codec_lz4.encode_header(buffer, compressed_length, len(self.max_msg), False) + self.assertEqual(buffer.tell(), 8) + self.assertEqual( + self._header_to_bits(buffer.getvalue()), + ("{:017b}".format(compressed_length) + + "11111111111111111" + "0" # not self contained + "00000")) + + def test_decode_uncompressed_header(self): + buffer = io.BytesIO() + segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) + buffer.seek(0) + header = segment_codec_no_compression.decode_header(buffer) + self.assertEqual(header.uncompressed_payload_length, -1) + self.assertEqual(header.payload_length, len(self.small_msg)) + self.assertEqual(header.is_self_contained, True) + + def test_decode_compressed_header(self): + buffer = io.BytesIO() + compressed_length = len(segment_codec_lz4.compress(self.small_msg)) + segment_codec_lz4.encode_header(buffer, compressed_length, len(self.small_msg), True) + buffer.seek(0) + header = segment_codec_lz4.decode_header(buffer) + self.assertEqual(header.uncompressed_payload_length, len(self.small_msg)) + self.assertEqual(header.payload_length, compressed_length) + self.assertEqual(header.is_self_contained, True) + + def test_decode_header_fails_if_corrupted(self): + buffer = io.BytesIO() + segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) + # corrupt one byte + buffer.seek(buffer.tell()-1) + buffer.write(b'0') + buffer.seek(0) + + with self.assertRaises(CrcException): + segment_codec_no_compression.decode_header(buffer) + + def test_decode_uncompressed_self_contained_segment(self): + buffer = io.BytesIO() + segment_codec_no_compression.encode(buffer, self.small_msg) + + buffer.seek(0) + header = segment_codec_no_compression.decode_header(buffer) + segment = segment_codec_no_compression.decode(buffer, header) + + self.assertEqual(header.is_self_contained, True) + self.assertEqual(header.uncompressed_payload_length, -1) + self.assertEqual(header.payload_length, len(self.small_msg)) + self.assertEqual(segment.payload, self.small_msg) + + def test_decode_compressed_self_contained_segment(self): + buffer = io.BytesIO() + segment_codec_lz4.encode(buffer, self.small_msg) + + buffer.seek(0) + header = segment_codec_lz4.decode_header(buffer) + segment = segment_codec_lz4.decode(buffer, header) + + self.assertEqual(header.is_self_contained, True) + self.assertEqual(header.uncompressed_payload_length, len(self.small_msg)) + self.assertGreater(header.uncompressed_payload_length, header.payload_length) + self.assertEqual(segment.payload, self.small_msg) + + def test_decode_multi_segments(self): + buffer = io.BytesIO() + segment_codec_no_compression.encode(buffer, self.large_msg) + + buffer.seek(0) + # We should have 2 segments to read + headers = [] + segments = [] + headers.append(segment_codec_no_compression.decode_header(buffer)) + segments.append(segment_codec_no_compression.decode(buffer, headers[0])) + headers.append(segment_codec_no_compression.decode_header(buffer)) + segments.append(segment_codec_no_compression.decode(buffer, headers[1])) + + self.assertTrue(all([h.is_self_contained is False for h in headers])) + decoded_msg = segments[0].payload + segments[1].payload + self.assertEqual(decoded_msg, self.large_msg) + + def test_decode_fails_if_corrupted(self): + buffer = io.BytesIO() + segment_codec_lz4.encode(buffer, self.small_msg) + buffer.seek(buffer.tell()-1) + buffer.write(b'0') + buffer.seek(0) + header = segment_codec_lz4.decode_header(buffer) + with self.assertRaises(CrcException): + segment_codec_lz4.decode(buffer, header) + + def test_decode_tiny_msg_not_compressed(self): + buffer = io.BytesIO() + segment_codec_lz4.encode(buffer, b'b') + buffer.seek(0) + header = segment_codec_lz4.decode_header(buffer) + segment = segment_codec_lz4.decode(buffer, header) + self.assertEqual(header.uncompressed_payload_length, 0) + self.assertEqual(header.payload_length, 1) + self.assertEqual(segment.payload, b'b') From 31ac39cd852cbc4e99b4a8049c9333b8c98f1b27 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 31 Aug 2020 14:47:31 -0400 Subject: [PATCH 103/211] Fix some unit tests to read the proper connection cql frame buffer --- tests/unit/io/test_twistedreactor.py | 6 +++--- tests/unit/io/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/io/test_twistedreactor.py b/tests/unit/io/test_twistedreactor.py index f3a2f05328..ef79943d7d 100644 --- a/tests/unit/io/test_twistedreactor.py +++ b/tests/unit/io/test_twistedreactor.py @@ -148,12 +148,12 @@ def test_handle_read__incomplete(self): # incomplete header self.obj_ut._iobuf.write(b'\x84\x00\x00\x00\x00') self.obj_ut.handle_read() - self.assertEqual(self.obj_ut._iobuf.getvalue(), b'\x84\x00\x00\x00\x00') + self.assertEqual(self.obj_ut._frame_iobuf.getvalue(), b'\x84\x00\x00\x00\x00') # full header, but incomplete body self.obj_ut._iobuf.write(b'\x00\x00\x00\x15') self.obj_ut.handle_read() - self.assertEqual(self.obj_ut._iobuf.getvalue(), + self.assertEqual(self.obj_ut._frame_iobuf.getvalue(), b'\x84\x00\x00\x00\x00\x00\x00\x00\x15') self.assertEqual(self.obj_ut._current_frame.end_pos, 30) @@ -174,7 +174,7 @@ def test_handle_read__fullmessage(self): self.obj_ut._iobuf.write( b'\x84\x01\x00\x02\x03\x00\x00\x00\x15' + body + extra) self.obj_ut.handle_read() - self.assertEqual(self.obj_ut._iobuf.getvalue(), extra) + self.assertEqual(self.obj_ut._frame_iobuf.getvalue(), extra) self.obj_ut.process_msg.assert_called_with( _Frame(version=4, flags=1, stream=2, opcode=3, body_offset=9, end_pos=9 + len(body)), body) diff --git a/tests/unit/io/utils.py b/tests/unit/io/utils.py index 168a5e8b76..b587c24bfd 100644 --- a/tests/unit/io/utils.py +++ b/tests/unit/io/utils.py @@ -435,7 +435,7 @@ def test_partial_header_read(self): self.get_socket(c).recv.return_value = message[0:1] c.handle_read(*self.null_handle_function_args) - self.assertEqual(c._iobuf.getvalue(), message[0:1]) + self.assertEqual(c._frame_iobuf.getvalue(), message[0:1]) self.get_socket(c).recv.return_value = message[1:] c.handle_read(*self.null_handle_function_args) @@ -461,7 +461,7 @@ def test_partial_message_read(self): # read in the first nine bytes self.get_socket(c).recv.return_value = message[:9] c.handle_read(*self.null_handle_function_args) - self.assertEqual(c._iobuf.getvalue(), message[:9]) + self.assertEqual(c._frame_iobuf.getvalue(), message[:9]) # ... then read in the rest self.get_socket(c).recv.return_value = message[9:] From bd05fe6aeec9df67445a720067d7f8434e1b1e8b Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 31 Aug 2020 15:04:36 -0400 Subject: [PATCH 104/211] Fix python2 bits formatting and missing lz4 deps with tox --- tests/unit/test_segment.py | 41 +++++++++++++++++++++----------------- tox.ini | 1 + 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tests/unit/test_segment.py b/tests/unit/test_segment.py index bf661bb961..85d4f38cbd 100644 --- a/tests/unit/test_segment.py +++ b/tests/unit/test_segment.py @@ -17,13 +17,18 @@ except ImportError: import unittest # noqa -import io +import six from cassandra import DriverException from cassandra.segment import Segment, CrcException from cassandra.connection import segment_codec_no_compression, segment_codec_lz4 +def to_bits(b): + if six.PY2: + b = six.byte2int(b) + return '{:08b}'.format(b) + class SegmentCodecTest(unittest.TestCase): small_msg = b'b' * 50 @@ -36,19 +41,19 @@ def _header_to_bits(data): # data should be the little endian bytes sequence if len(data) > 6: # compressed data = data[:5] - bits = ''.join(['{:08b}'.format(b) for b in reversed(data)]) + bits = ''.join([to_bits(b) for b in reversed(data)]) # return the compressed payload length, the uncompressed payload length, # the self contained flag and the padding as bits return bits[23:40] + bits[6:23] + bits[5:6] + bits[:5] else: # uncompressed data = data[:3] - bits = ''.join(['{:08b}'.format(b) for b in reversed(data)]) + bits = ''.join([to_bits(b) for b in reversed(data)]) # return the payload length, the self contained flag and # the padding as bits return bits[7:24] + bits[6:7] + bits[:6] def test_encode_uncompressed_header(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) self.assertEqual(buffer.tell(), 6) self.assertEqual( @@ -56,7 +61,7 @@ def test_encode_uncompressed_header(self): "00000000000110010" + "1" + "000000") def test_encode_compressed_header(self): - buffer = io.BytesIO() + buffer = six.BytesIO() compressed_length = len(segment_codec_lz4.compress(self.small_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.small_msg), True) @@ -66,7 +71,7 @@ def test_encode_compressed_header(self): "{:017b}".format(compressed_length) + "00000000000110010" + "1" + "00000") def test_encode_uncompressed_header_with_max_payload(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.max_msg), -1, True) self.assertEqual(buffer.tell(), 6) self.assertEqual( @@ -74,13 +79,13 @@ def test_encode_uncompressed_header_with_max_payload(self): "11111111111111111" + "1" + "000000") def test_encode_header_fails_if_payload_too_big(self): - buffer = io.BytesIO() + buffer = six.BytesIO() for codec in [segment_codec_no_compression, segment_codec_lz4]: with self.assertRaises(DriverException): codec.encode_header(buffer, len(self.large_msg), -1, False) def test_encode_uncompressed_header_not_self_contained_msg(self): - buffer = io.BytesIO() + buffer = six.BytesIO() # simulate the first chunk with the max size segment_codec_no_compression.encode_header(buffer, len(self.max_msg), -1, False) self.assertEqual(buffer.tell(), 6) @@ -91,7 +96,7 @@ def test_encode_uncompressed_header_not_self_contained_msg(self): "000000")) def test_encode_compressed_header_with_max_payload(self): - buffer = io.BytesIO() + buffer = six.BytesIO() compressed_length = len(segment_codec_lz4.compress(self.max_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.max_msg), True) self.assertEqual(buffer.tell(), 8) @@ -100,7 +105,7 @@ def test_encode_compressed_header_with_max_payload(self): "{:017b}".format(compressed_length) + "11111111111111111" + "1" + "00000") def test_encode_compressed_header_not_self_contained_msg(self): - buffer = io.BytesIO() + buffer = six.BytesIO() # simulate the first chunk with the max size compressed_length = len(segment_codec_lz4.compress(self.max_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.max_msg), False) @@ -113,7 +118,7 @@ def test_encode_compressed_header_not_self_contained_msg(self): "00000")) def test_decode_uncompressed_header(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) buffer.seek(0) header = segment_codec_no_compression.decode_header(buffer) @@ -122,7 +127,7 @@ def test_decode_uncompressed_header(self): self.assertEqual(header.is_self_contained, True) def test_decode_compressed_header(self): - buffer = io.BytesIO() + buffer = six.BytesIO() compressed_length = len(segment_codec_lz4.compress(self.small_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.small_msg), True) buffer.seek(0) @@ -132,7 +137,7 @@ def test_decode_compressed_header(self): self.assertEqual(header.is_self_contained, True) def test_decode_header_fails_if_corrupted(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) # corrupt one byte buffer.seek(buffer.tell()-1) @@ -143,7 +148,7 @@ def test_decode_header_fails_if_corrupted(self): segment_codec_no_compression.decode_header(buffer) def test_decode_uncompressed_self_contained_segment(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_no_compression.encode(buffer, self.small_msg) buffer.seek(0) @@ -156,7 +161,7 @@ def test_decode_uncompressed_self_contained_segment(self): self.assertEqual(segment.payload, self.small_msg) def test_decode_compressed_self_contained_segment(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_lz4.encode(buffer, self.small_msg) buffer.seek(0) @@ -169,7 +174,7 @@ def test_decode_compressed_self_contained_segment(self): self.assertEqual(segment.payload, self.small_msg) def test_decode_multi_segments(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_no_compression.encode(buffer, self.large_msg) buffer.seek(0) @@ -186,7 +191,7 @@ def test_decode_multi_segments(self): self.assertEqual(decoded_msg, self.large_msg) def test_decode_fails_if_corrupted(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_lz4.encode(buffer, self.small_msg) buffer.seek(buffer.tell()-1) buffer.write(b'0') @@ -196,7 +201,7 @@ def test_decode_fails_if_corrupted(self): segment_codec_lz4.decode(buffer, header) def test_decode_tiny_msg_not_compressed(self): - buffer = io.BytesIO() + buffer = six.BytesIO() segment_codec_lz4.encode(buffer, b'b') buffer.seek(0) header = segment_codec_lz4.decode_header(buffer) diff --git a/tox.ini b/tox.ini index efb610ac09..d013a480f0 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,7 @@ deps = nose pure-sasl kerberos futurist + lz4 [testenv] deps = {[base]deps} From d992e818edf93cc886100e5e4007e10ea8e4d4bc Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 9 Sep 2020 15:26:54 -0400 Subject: [PATCH 105/211] Avoid using 2 io buffers when checksumming is not used --- CHANGELOG.rst | 1 + cassandra/connection.py | 111 ++++++++++++------ .../standard/test_custom_protocol_handler.py | 11 +- tests/integration/standard/test_query.py | 4 +- tests/unit/io/test_twistedreactor.py | 6 +- tests/unit/io/utils.py | 18 +-- tests/unit/test_connection.py | 4 +- tests/unit/test_segment.py | 9 +- tox.ini | 5 +- 9 files changed, 114 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a3217d6cda..9aaf719542 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Not released Features -------- * Ensure the driver can connect when invalid peer hosts are in system.peers (PYTHON-1260) +* Implement protocol v5 checksumming (PYTHON-1258) Bug Fixes --------- diff --git a/cassandra/connection.py b/cassandra/connection.py index 7054a389d0..477eaf2f28 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -27,6 +27,8 @@ from threading import Thread, Event, RLock, Condition import time import ssl +import weakref + if 'gevent.monkey' in sys.modules: from gevent.queue import Queue, Empty @@ -610,6 +612,55 @@ def int_from_buf_item(i): int_from_buf_item = ord +class _ConnectionIOBuffer(object): + """ + Abstraction class to ease the use of the different connection io buffers. With + protocol V5 and checksumming, the data is read, validated and copied to another + cql frame buffer. + """ + _io_buffer = None + _cql_frame_buffer = None + _connection = None + + def __init__(self, connection): + self._io_buffer = io.BytesIO() + self._connection = weakref.proxy(connection) + + @property + def io_buffer(self): + return self._io_buffer + + @property + def cql_frame_buffer(self): + return self._cql_frame_buffer if self.is_checksumming_enabled else \ + self._io_buffer + + def set_checksumming_buffer(self): + self.reset_io_buffer() + self._cql_frame_buffer = io.BytesIO() + + @property + def is_checksumming_enabled(self): + return self._connection._is_checksumming_enabled + + def readable_io_bytes(self): + return self.io_buffer.tell() + + def readable_cql_frame_bytes(self): + return self.cql_frame_buffer.tell() + + def reset_io_buffer(self): + self._io_buffer = io.BytesIO(self._io_buffer.read()) + self._io_buffer.seek(0, 2) # 2 == SEEK_END + + def reset_cql_frame_buffer(self): + if self.is_checksumming_enabled: + self._cql_frame_buffer = io.BytesIO(self._cql_frame_buffer.read()) + self._cql_frame_buffer.seek(0, 2) # 2 == SEEK_END + else: + self.reset_io_buffer() + + class Connection(object): CALLBACK_ERR_THREAD_THRESHOLD = 100 @@ -665,8 +716,6 @@ class Connection(object): allow_beta_protocol_version = False - _iobuf = None - _frame_iobuf = None _current_frame = None _socket = None @@ -679,6 +728,11 @@ class Connection(object): _is_checksumming_enabled = False + @property + def _iobuf(self): + # backward compatibility, to avoid any change in the reactors + return self._io_buffer.io_buffer + def __init__(self, host='127.0.0.1', port=9042, authenticator=None, ssl_options=None, sockopts=None, compression=True, cql_version=None, protocol_version=ProtocolVersion.MAX_SUPPORTED, is_control_connection=False, @@ -702,8 +756,7 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, self.no_compact = no_compact self._push_watchers = defaultdict(set) self._requests = {} - self._iobuf = io.BytesIO() - self._frame_iobuf = io.BytesIO() + self._io_buffer = _ConnectionIOBuffer(self) self._continuous_paging_sessions = {} self._socket_writable = True @@ -844,6 +897,12 @@ def _connect_socket(self): for args in self.sockopts: self._socket.setsockopt(*args) + def _enable_checksumming(self): + self._io_buffer.set_checksumming_buffer() + self._is_checksumming_enabled = True + self._segment_codec = segment_codec_lz4 if self.compressor else segment_codec_no_compression + log.debug("Enabling protocol checksumming on connection (%s).", id(self)) + def close(self): raise NotImplementedError() @@ -1032,7 +1091,7 @@ def control_conn_disposed(self): @defunct_on_error def _read_frame_header(self): - buf = self._frame_iobuf.getvalue() + buf = self._io_buffer.cql_frame_buffer.getvalue() pos = len(buf) if pos: version = int_from_buf_item(buf[0]) & PROTOCOL_VERSION_MASK @@ -1048,28 +1107,19 @@ def _read_frame_header(self): self._current_frame = _Frame(version, flags, stream, op, header_size, body_len + header_size) return pos - def _reset_frame(self): - self._frame_iobuf = io.BytesIO(self._frame_iobuf.read()) - self._frame_iobuf.seek(0, 2) # 2 == SEEK_END - self._current_frame = None - - def _reset_io_buffer(self): - self._iobuf = io.BytesIO(self._iobuf.read()) - self._iobuf.seek(0, 2) # 2 == SEEK_END - @defunct_on_error def _process_segment_buffer(self): - readable_bytes = self._iobuf.tell() + readable_bytes = self._io_buffer.readable_io_bytes() if readable_bytes >= self._segment_codec.header_length_with_crc: try: - self._iobuf.seek(0) - segment_header = self._segment_codec.decode_header(self._iobuf) + self._io_buffer.io_buffer.seek(0) + segment_header = self._segment_codec.decode_header(self._io_buffer.io_buffer) if readable_bytes >= segment_header.segment_length: segment = self._segment_codec.decode(self._iobuf, segment_header) - self._frame_iobuf.write(segment.payload) + self._io_buffer.cql_frame_buffer.write(segment.payload) else: # not enough data to read the segment - self._iobuf.seek(0, 2) + self._io_buffer.io_buffer.seek(0, 2) except CrcException as exc: # re-raise an exception that inherits from ConnectionException raise CrcMismatchException(str(exc), self.endpoint) @@ -1078,21 +1128,15 @@ def process_io_buffer(self): while True: if self._is_checksumming_enabled: self._process_segment_buffer() - else: - # We should probably refactor the IO buffering stuff out of the Connection - # class to handle this in a better way. That would make the segment and frame - # decoding code clearer. - self._frame_iobuf.write(self._iobuf.getvalue()) - - self._reset_io_buffer() + self._io_buffer.reset_io_buffer() if not self._current_frame: pos = self._read_frame_header() else: - pos = self._frame_iobuf.tell() + pos = self._io_buffer.readable_cql_frame_bytes() if not self._current_frame or pos < self._current_frame.end_pos: - if self._is_checksumming_enabled and self._iobuf.tell(): + if self._is_checksumming_enabled and self._io_buffer.readable_io_bytes(): # We have a multi-segments message and we need to read more # data to complete the current cql frame continue @@ -1103,10 +1147,11 @@ def process_io_buffer(self): return else: frame = self._current_frame - self._frame_iobuf.seek(frame.body_offset) - msg = self._frame_iobuf.read(frame.end_pos - frame.body_offset) + self._io_buffer.cql_frame_buffer.seek(frame.body_offset) + msg = self._io_buffer.cql_frame_buffer.read(frame.end_pos - frame.body_offset) self.process_msg(frame, msg) - self._reset_frame() + self._io_buffer.reset_cql_frame_buffer() + self._current_frame = None @defunct_on_error def process_msg(self, header, body): @@ -1287,9 +1332,7 @@ def _handle_startup_response(self, startup_response, did_authenticate=False): self.compressor = self._compressor if ProtocolVersion.has_checksumming_support(self.protocol_version): - self._is_checksumming_enabled = True - self._segment_codec = segment_codec_lz4 if self.compressor else segment_codec_no_compression - log.debug("Enabling protocol checksumming on connection (%s).", id(self)) + self._enable_checksumming() self.connected_event.set() elif isinstance(startup_response, AuthenticateMessage): diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py index d5108ed47b..bf549511c8 100644 --- a/tests/integration/standard/test_custom_protocol_handler.py +++ b/tests/integration/standard/test_custom_protocol_handler.py @@ -25,7 +25,7 @@ from tests.integration import use_singledc, drop_keyspace_shutdown_cluster, \ greaterthanorequalcass30, execute_with_long_wait_retry, greaterthanorequaldse51, greaterthanorequalcass3_10, \ - greaterthanorequalcass31, TestCluster + TestCluster, greaterthanorequalcass40, requirecassandra from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params from six import binary_type @@ -124,7 +124,8 @@ def test_custom_raw_row_results_all_types(self): self.assertEqual(len(CustomResultMessageTracked.checked_rev_row_set), len(PRIMITIVE_DATATYPES)-1) cluster.shutdown() - @greaterthanorequalcass31 + @requirecassandra + @greaterthanorequalcass40 def test_protocol_divergence_v5_fail_by_continuous_paging(self): """ Test to validate that V5 and DSE_V1 diverge. ContinuousPagingOptions is not supported by V5 @@ -170,7 +171,8 @@ def test_protocol_divergence_v4_fail_by_flag_uses_int(self): self._protocol_divergence_fail_by_flag_uses_int(ProtocolVersion.V4, uses_int_query_flag=False, int_flag=True) - @greaterthanorequalcass3_10 + @requirecassandra + @greaterthanorequalcass40 def test_protocol_v5_uses_flag_int(self): """ Test to validate that the _PAGE_SIZE_FLAG is treated correctly using write_uint for V5 @@ -196,7 +198,8 @@ def test_protocol_dsev1_uses_flag_int(self): self._protocol_divergence_fail_by_flag_uses_int(ProtocolVersion.DSE_V1, uses_int_query_flag=True, int_flag=True) - @greaterthanorequalcass3_10 + @requirecassandra + @greaterthanorequalcass40 def test_protocol_divergence_v5_fail_by_flag_uses_int(self): """ Test to validate that the _PAGE_SIZE_FLAG is treated correctly using write_uint for V5 diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py index d6401a987e..ea0e326ff5 100644 --- a/tests/integration/standard/test_query.py +++ b/tests/integration/standard/test_query.py @@ -28,7 +28,7 @@ from cassandra.policies import HostDistance, RoundRobinPolicy, WhiteListRoundRobinPolicy from tests.integration import use_singledc, PROTOCOL_VERSION, BasicSharedKeyspaceUnitTestCase, \ greaterthanprotocolv3, MockLoggingHandler, get_supported_protocol_versions, local, get_cluster, setup_keyspace, \ - USE_CASS_EXTERNAL, greaterthanorequalcass40, DSE_VERSION, TestCluster + USE_CASS_EXTERNAL, greaterthanorequalcass40, DSE_VERSION, TestCluster, requirecassandra from tests import notwindows from tests.integration import greaterthanorequalcass30, get_node @@ -1408,6 +1408,8 @@ def test_setting_keyspace(self): """ self._check_set_keyspace_in_statement(self.session) + @requirecassandra + @greaterthanorequalcass40 def test_setting_keyspace_and_session(self): """ Test we can still send the keyspace independently even the session diff --git a/tests/unit/io/test_twistedreactor.py b/tests/unit/io/test_twistedreactor.py index ef79943d7d..e7c34cb4b5 100644 --- a/tests/unit/io/test_twistedreactor.py +++ b/tests/unit/io/test_twistedreactor.py @@ -148,12 +148,12 @@ def test_handle_read__incomplete(self): # incomplete header self.obj_ut._iobuf.write(b'\x84\x00\x00\x00\x00') self.obj_ut.handle_read() - self.assertEqual(self.obj_ut._frame_iobuf.getvalue(), b'\x84\x00\x00\x00\x00') + self.assertEqual(self.obj_ut._io_buffer.cql_frame_buffer.getvalue(), b'\x84\x00\x00\x00\x00') # full header, but incomplete body self.obj_ut._iobuf.write(b'\x00\x00\x00\x15') self.obj_ut.handle_read() - self.assertEqual(self.obj_ut._frame_iobuf.getvalue(), + self.assertEqual(self.obj_ut._io_buffer.cql_frame_buffer.getvalue(), b'\x84\x00\x00\x00\x00\x00\x00\x00\x15') self.assertEqual(self.obj_ut._current_frame.end_pos, 30) @@ -174,7 +174,7 @@ def test_handle_read__fullmessage(self): self.obj_ut._iobuf.write( b'\x84\x01\x00\x02\x03\x00\x00\x00\x15' + body + extra) self.obj_ut.handle_read() - self.assertEqual(self.obj_ut._frame_iobuf.getvalue(), extra) + self.assertEqual(self.obj_ut._io_buffer.cql_frame_buffer.getvalue(), extra) self.obj_ut.process_msg.assert_called_with( _Frame(version=4, flags=1, stream=2, opcode=3, body_offset=9, end_pos=9 + len(body)), body) diff --git a/tests/unit/io/utils.py b/tests/unit/io/utils.py index b587c24bfd..848513f031 100644 --- a/tests/unit/io/utils.py +++ b/tests/unit/io/utils.py @@ -309,14 +309,14 @@ def chunk(size): for message, expected_size in messages: message_chunks = message - c._iobuf = io.BytesIO() + c._io_buffer._io_buffer = io.BytesIO() c.process_io_buffer.reset_mock() c.handle_read(*self.null_handle_function_args) - c._iobuf.seek(0, os.SEEK_END) + c._io_buffer.io_buffer.seek(0, os.SEEK_END) # Ensure the message size is the good one and that the # message has been processed if it is non-empty - self.assertEqual(c._iobuf.tell(), expected_size) + self.assertEqual(c._io_buffer.io_buffer.tell(), expected_size) if expected_size == 0: c.process_io_buffer.assert_not_called() else: @@ -435,11 +435,11 @@ def test_partial_header_read(self): self.get_socket(c).recv.return_value = message[0:1] c.handle_read(*self.null_handle_function_args) - self.assertEqual(c._frame_iobuf.getvalue(), message[0:1]) + self.assertEqual(c._io_buffer.cql_frame_buffer.getvalue(), message[0:1]) self.get_socket(c).recv.return_value = message[1:] c.handle_read(*self.null_handle_function_args) - self.assertEqual(six.binary_type(), c._iobuf.getvalue()) + self.assertEqual(six.binary_type(), c._io_buffer.io_buffer.getvalue()) # let it write out a StartupMessage c.handle_write(*self.null_handle_function_args) @@ -461,12 +461,12 @@ def test_partial_message_read(self): # read in the first nine bytes self.get_socket(c).recv.return_value = message[:9] c.handle_read(*self.null_handle_function_args) - self.assertEqual(c._frame_iobuf.getvalue(), message[:9]) + self.assertEqual(c._io_buffer.cql_frame_buffer.getvalue(), message[:9]) # ... then read in the rest self.get_socket(c).recv.return_value = message[9:] c.handle_read(*self.null_handle_function_args) - self.assertEqual(six.binary_type(), c._iobuf.getvalue()) + self.assertEqual(six.binary_type(), c._io_buffer.io_buffer.getvalue()) # let it write out a StartupMessage c.handle_write(*self.null_handle_function_args) @@ -501,7 +501,7 @@ def test_mixed_message_and_buffer_sizes(self): for i in range(1, 15): c.process_io_buffer.reset_mock() - c._iobuf = io.BytesIO() + c._io_buffer._io_buffer = io.BytesIO() message = io.BytesIO(six.b('a') * (2**i)) def recv_side_effect(*args): @@ -511,7 +511,7 @@ def recv_side_effect(*args): self.get_socket(c).recv.side_effect = recv_side_effect c.handle_read(*self.null_handle_function_args) - if c._iobuf.tell(): + if c._io_buffer.io_buffer.tell(): c.process_io_buffer.assert_called_once() else: c.process_io_buffer.assert_not_called() diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index 68577a396e..21b8862772 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -100,7 +100,7 @@ def test_bad_protocol_version(self, *args): header = self.make_header_prefix(SupportedMessage, version=0x7f) options = self.make_options_body() message = self.make_msg(header, options) - c._iobuf = BytesIO() + c._iobuf._io_buffer = BytesIO() c._iobuf.write(message) c.process_io_buffer() @@ -117,7 +117,7 @@ def test_negative_body_length(self, *args): # read in a SupportedMessage response header = self.make_header_prefix(SupportedMessage) message = header + int32_pack(-13) - c._iobuf = BytesIO() + c._iobuf._io_buffer = BytesIO() c._iobuf.write(message) c.process_io_buffer() diff --git a/tests/unit/test_segment.py b/tests/unit/test_segment.py index 85d4f38cbd..fc49339d68 100644 --- a/tests/unit/test_segment.py +++ b/tests/unit/test_segment.py @@ -60,6 +60,7 @@ def test_encode_uncompressed_header(self): self._header_to_bits(buffer.getvalue()), "00000000000110010" + "1" + "000000") + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_encode_compressed_header(self): buffer = six.BytesIO() compressed_length = len(segment_codec_lz4.compress(self.small_msg)) @@ -80,7 +81,7 @@ def test_encode_uncompressed_header_with_max_payload(self): def test_encode_header_fails_if_payload_too_big(self): buffer = six.BytesIO() - for codec in [segment_codec_no_compression, segment_codec_lz4]: + for codec in [c for c in [segment_codec_no_compression, segment_codec_lz4] if c is not None]: with self.assertRaises(DriverException): codec.encode_header(buffer, len(self.large_msg), -1, False) @@ -95,6 +96,7 @@ def test_encode_uncompressed_header_not_self_contained_msg(self): "0" # not self contained "000000")) + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_encode_compressed_header_with_max_payload(self): buffer = six.BytesIO() compressed_length = len(segment_codec_lz4.compress(self.max_msg)) @@ -104,6 +106,7 @@ def test_encode_compressed_header_with_max_payload(self): self._header_to_bits(buffer.getvalue()), "{:017b}".format(compressed_length) + "11111111111111111" + "1" + "00000") + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_encode_compressed_header_not_self_contained_msg(self): buffer = six.BytesIO() # simulate the first chunk with the max size @@ -126,6 +129,7 @@ def test_decode_uncompressed_header(self): self.assertEqual(header.payload_length, len(self.small_msg)) self.assertEqual(header.is_self_contained, True) + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_compressed_header(self): buffer = six.BytesIO() compressed_length = len(segment_codec_lz4.compress(self.small_msg)) @@ -160,6 +164,7 @@ def test_decode_uncompressed_self_contained_segment(self): self.assertEqual(header.payload_length, len(self.small_msg)) self.assertEqual(segment.payload, self.small_msg) + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_compressed_self_contained_segment(self): buffer = six.BytesIO() segment_codec_lz4.encode(buffer, self.small_msg) @@ -190,6 +195,7 @@ def test_decode_multi_segments(self): decoded_msg = segments[0].payload + segments[1].payload self.assertEqual(decoded_msg, self.large_msg) + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_fails_if_corrupted(self): buffer = six.BytesIO() segment_codec_lz4.encode(buffer, self.small_msg) @@ -200,6 +206,7 @@ def test_decode_fails_if_corrupted(self): with self.assertRaises(CrcException): segment_codec_lz4.decode(buffer, header) + @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_tiny_msg_not_compressed(self): buffer = six.BytesIO() segment_codec_lz4.encode(buffer, b'b') diff --git a/tox.ini b/tox.ini index d013a480f0..d883a1f973 100644 --- a/tox.ini +++ b/tox.ini @@ -12,10 +12,11 @@ deps = nose pure-sasl kerberos futurist - lz4 +lz4_dependency = py27,py35,py36,py37,py38: lz4 [testenv] deps = {[base]deps} + {[base]lz4_dependency} setenv = LIBEV_EMBED=0 CARES_EMBED=0 @@ -26,6 +27,7 @@ commands = nosetests --verbosity=2 --no-path-adjustment {toxinidir}/tests/unit/ [testenv:gevent_loop] deps = {[base]deps} + {[base]lz4_dependency} gevent>=1.4,<1.5 setenv = LIBEV_EMBED=0 @@ -38,6 +40,7 @@ commands = [testenv:eventlet_loop] deps = {[base]deps} + {[base]lz4_dependency} gevent>=1.4,<1.5 setenv = LIBEV_EMBED=0 From ed76b47a8b8fc735dfae28049c4fd5a60557bc98 Mon Sep 17 00:00:00 2001 From: Radovan Date: Thu, 24 Sep 2020 20:52:35 +0300 Subject: [PATCH 106/211] Don't create Cluster with hosts if having cloud config We set out to build a Flask app that talks to Astra. The CQL plugin we used turned out to be crashing if we didn't give it hosts to connect to (https://github.com/thegeorgeous/flask-cqlalchemy/blob/master/flask_cqlalchemy/__init__.py#L48). But we also had to use the `cloud` key to pass in Astra credentials (via `setup_kwargs` nearby the link above). This exploded on us in cqlengine.connection#setup() where the hosts passed into Cluster() made that constructor blow up because: `ValueError: contact_points, endpoint_factory, ssl_context, and ssl_options cannot be specified with a cloud configuration` We're not sure this is the right place to patch, but it helped us at least. We figured coming out with a patch directly, instead of an issue, might save some time. --- cassandra/cqlengine/connection.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cassandra/cqlengine/connection.py b/cassandra/cqlengine/connection.py index 884e04ed74..c844ec7675 100644 --- a/cassandra/cqlengine/connection.py +++ b/cassandra/cqlengine/connection.py @@ -98,7 +98,11 @@ def setup(self): if self.lazy_connect: return - self.cluster = Cluster(self.hosts, **self.cluster_options) + if 'cloud' in self.cluster_options: + self.cluster = Cluster(**self.cluster_options) + else: + self.cluster = Cluster(self.hosts, **self.cluster_options) + try: self.session = self.cluster.connect() log.debug(format_log_context("connection initialized with internally created session", connection=self.name)) From d3769c36766856d71256a0f7617f1275a4679004 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 23 Oct 2020 15:59:04 -0400 Subject: [PATCH 107/211] Add a test and docs --- CHANGELOG.rst | 1 + cassandra/cqlengine/connection.py | 4 ++++ docs/cloud.rst | 33 +++++++++++++++++++++++++++ tests/integration/cloud/test_cloud.py | 19 +++++++++++++-- 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9aaf719542..1e1d265bfd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ Features -------- * Ensure the driver can connect when invalid peer hosts are in system.peers (PYTHON-1260) * Implement protocol v5 checksumming (PYTHON-1258) +* Fix the default cqlengine connection mechanism to work with Astra (PYTHON-1265) Bug Fixes --------- diff --git a/cassandra/cqlengine/connection.py b/cassandra/cqlengine/connection.py index c844ec7675..90e6d90317 100644 --- a/cassandra/cqlengine/connection.py +++ b/cassandra/cqlengine/connection.py @@ -99,6 +99,8 @@ def setup(self): return if 'cloud' in self.cluster_options: + if self.hosts: + log.warning("Ignoring hosts %s because a cloud config was provided.", self.hosts) self.cluster = Cluster(**self.cluster_options) else: self.cluster = Cluster(self.hosts, **self.cluster_options) @@ -305,6 +307,8 @@ def set_session(s): log.debug("cqlengine default connection initialized with %s", s) +# TODO next major: if a cloud config is specified in kwargs, hosts will be ignored. +# This function should be refactored to reflect this change. PYTHON-1265 def setup( hosts, default_keyspace, diff --git a/docs/cloud.rst b/docs/cloud.rst index 7ff7693736..3607ef8b4e 100644 --- a/docs/cloud.rst +++ b/docs/cloud.rst @@ -54,3 +54,36 @@ Limitations Event loops ^^^^^^^^^^^ Evenlet isn't yet supported for python 3.7+ due to an `issue in Eventlet `_. + + +CqlEngine +========= + +When using the object mapper, you can configure cqlengine with :func:`~.cqlengine.connection.set_session`: + +.. code:: python + + from cassandra.cqlengine import connection + ... + + c = Cluster(cloud={'secure_connect_bundle':'/path/to/secure-connect-test.zip'}, + auth_provider=PlainTextAuthProvider('user', 'pass')) + s = c.connect('myastrakeyspace') + connection.set_session(s) + ... + +If you are using some third-party libraries (flask, django, etc.), you might not be able to change the +configuration mechanism. For this reason, the `hosts` argument of the default +:func:`~.cqlengine.connection.setup` function will be ignored if a `cloud` config is provided: + +.. code:: python + + from cassandra.cqlengine import connection + ... + + connection.setup( + None, # or anything else + "myastrakeyspace", cloud={ + 'secure_connect_bundle':'/path/to/secure-connect-test.zip' + }, + auth_provider=PlainTextAuthProvider('user', 'pass')) diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index 5b9b268f5c..cd41b8f0e0 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -13,6 +13,10 @@ # limitations under the License from cassandra.datastax.cloud import parse_metadata_info from cassandra.query import SimpleStatement +from cassandra.cqlengine import connection +from cassandra.cqlengine.management import sync_table, create_keyspace_simple +from cassandra.cqlengine.models import Model +from cassandra.cqlengine import columns try: import unittest2 as unittest @@ -30,7 +34,7 @@ from mock import patch -from tests.integration import requirescloudproxy, TestCluster +from tests.integration import requirescloudproxy from tests.util import wait_until_not_raised from tests.integration.cloud import CloudProxyCluster, CLOUD_PROXY_SERVER @@ -143,7 +147,7 @@ def test_resolve_and_reconnect_on_node_down(self): wait_until_not_raised( lambda: self.assertEqual(len(self.hosts_up()), 3), 0.02, 250) - mocked_resolve.assert_called_once() + mocked_resolve.assert_called() def test_metadata_unreachable(self): with self.assertRaises(DriverException) as cm: @@ -234,3 +238,14 @@ def test_consistency_guardrails(self): self.session.execute(statement) except InvalidRequest: self.fail("InvalidRequest was incorrectly raised for write query at LOCAL QUORUM!") + + def test_cqlengine_can_connect(self): + class TestModel(Model): + id = columns.Integer(primary_key=True) + val = columns.Text() + + connection.setup(None, "test", cloud={'secure_connect_bundle': self.creds}) + create_keyspace_simple('test', 1) + sync_table(TestModel) + TestModel.objects.create(id=42, value='test') + self.assertEqual(len(TestModel.objects.all()), 1) From 8331eca6cc96d8bd3af2e37bc64693747515c2b6 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Mon, 26 Oct 2020 13:52:27 -0400 Subject: [PATCH 108/211] Update security documentation and examples to use PROTOCOL_TLS --- CHANGELOG.rst | 1 + cassandra/cluster.py | 2 +- docs/security.rst | 28 +++++++++++++-------------- tests/integration/cloud/test_cloud.py | 4 ++-- tests/integration/long/test_ssl.py | 12 ++++++------ 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1e1d265bfd..a01a7ee49a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ Bug Fixes Others ------ * Drop Python 3.4 support (PYTHON-1220) +* Update security documentation and examples to use PROTOCOL_TLS (PYTHON-1264) 3.24.0 ====== diff --git a/cassandra/cluster.py b/cassandra/cluster.py index ec91ce257a..cedcf8207b 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -785,7 +785,7 @@ def default_retry_policy(self, policy): By default, a ``ca_certs`` value should be supplied (the value should be a string pointing to the location of the CA certs file), and you probably - want to specify ``ssl_version`` as ``ssl.PROTOCOL_TLSv1`` to match + want to specify ``ssl_version`` as ``ssl.PROTOCOL_TLS`` to match Cassandra's default protocol. .. versionchanged:: 3.3.0 diff --git a/docs/security.rst b/docs/security.rst index 4cf3163fb0..6dd2624c24 100644 --- a/docs/security.rst +++ b/docs/security.rst @@ -119,9 +119,9 @@ The driver configuration: .. code-block:: python from cassandra.cluster import Cluster, Session - from ssl import SSLContext, PROTOCOL_TLSv1 + from ssl import SSLContext, PROTOCOL_TLS - ssl_context = SSLContext(PROTOCOL_TLSv1) + ssl_context = SSLContext(PROTOCOL_TLS) cluster = Cluster(['127.0.0.1'], ssl_context=ssl_context) session = cluster.connect() @@ -147,9 +147,9 @@ to `CERT_REQUIRED`. Otherwise, the loaded verify certificate will have no effect .. code-block:: python from cassandra.cluster import Cluster, Session - from ssl import SSLContext, PROTOCOL_TLSv1, CERT_REQUIRED + from ssl import SSLContext, PROTOCOL_TLS, CERT_REQUIRED - ssl_context = SSLContext(PROTOCOL_TLSv1) + ssl_context = SSLContext(PROTOCOL_TLS) ssl_context.load_verify_locations('/path/to/rootca.crt') ssl_context.verify_mode = CERT_REQUIRED @@ -161,9 +161,9 @@ Additionally, you can also force the driver to verify the `hostname` of the serv .. code-block:: python from cassandra.cluster import Cluster, Session - from ssl import SSLContext, PROTOCOL_TLSv1, CERT_REQUIRED + from ssl import SSLContext, PROTOCOL_TLS, CERT_REQUIRED - ssl_context = SSLContext(PROTOCOL_TLSv1) + ssl_context = SSLContext(PROTOCOL_TLS) ssl_context.load_verify_locations('/path/to/rootca.crt') ssl_context.verify_mode = CERT_REQUIRED ssl_context.check_hostname = True @@ -228,9 +228,9 @@ Finally, you can use that configuration with the following driver code: .. code-block:: python from cassandra.cluster import Cluster, Session - from ssl import SSLContext, PROTOCOL_TLSv1 + from ssl import SSLContext, PROTOCOL_TLS - ssl_context = SSLContext(PROTOCOL_TLSv1) + ssl_context = SSLContext(PROTOCOL_TLS) ssl_context.load_cert_chain( certfile='/path/to/client.crt_signed', keyfile='/path/to/client.key') @@ -251,9 +251,9 @@ The following driver code specifies that the connection should use two-way verif .. code-block:: python from cassandra.cluster import Cluster, Session - from ssl import SSLContext, PROTOCOL_TLSv1, CERT_REQUIRED + from ssl import SSLContext, PROTOCOL_TLS, CERT_REQUIRED - ssl_context = SSLContext(PROTOCOL_TLSv1) + ssl_context = SSLContext(PROTOCOL_TLS) ssl_context.load_verify_locations('/path/to/rootca.crt') ssl_context.verify_mode = CERT_REQUIRED ssl_context.load_cert_chain( @@ -275,7 +275,7 @@ for more details about ``SSLContext`` configuration. from cassandra.cluster import Cluster from cassandra.io.twistedreactor import TwistedConnection - ssl_context = SSL.Context(SSL.TLSv1_METHOD) + ssl_context = SSL.Context(SSL.TLSv1_2_METHOD) ssl_context.set_verify(SSL.VERIFY_PEER, callback=lambda _1, _2, _3, _4, ok: ok) ssl_context.use_certificate_file('/path/to/client.crt_signed') ssl_context.use_privatekey_file('/path/to/client.key') @@ -303,7 +303,7 @@ deprecated in the next major release. By default, a ``ca_certs`` value should be supplied (the value should be a string pointing to the location of the CA certs file), and you probably -want to specify ``ssl_version`` as ``ssl.PROTOCOL_TLSv1`` to match +want to specify ``ssl_version`` as ``ssl.PROTOCOL_TLS`` to match Cassandra's default protocol. For example: @@ -311,11 +311,11 @@ For example: .. code-block:: python from cassandra.cluster import Cluster - from ssl import PROTOCOL_TLSv1, CERT_REQUIRED + from ssl import PROTOCOL_TLS, CERT_REQUIRED ssl_opts = { 'ca_certs': '/path/to/my/ca.certs', - 'ssl_version': PROTOCOL_TLSv1, + 'ssl_version': PROTOCOL_TLS, 'cert_reqs': CERT_REQUIRED # Certificates are required and validated } cluster = Cluster(ssl_options=ssl_opts) diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index cd41b8f0e0..e0b9e2d382 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -24,7 +24,7 @@ import unittest # noqa import six -from ssl import SSLContext, PROTOCOL_TLSv1 +from ssl import SSLContext, PROTOCOL_TLS from cassandra import DriverException, ConsistencyLevel, InvalidRequest from cassandra.cluster import NoHostAvailable, ExecutionProfile, Cluster, _execution_profile_to_string @@ -92,7 +92,7 @@ def test_support_overriding_auth_provider(self): def test_error_overriding_ssl_context(self): with self.assertRaises(ValueError) as cm: - self.connect(self.creds, ssl_context=SSLContext(PROTOCOL_TLSv1)) + self.connect(self.creds, ssl_context=SSLContext(PROTOCOL_TLS)) self.assertIn('cannot be specified with a cloud configuration', str(cm.exception)) diff --git a/tests/integration/long/test_ssl.py b/tests/integration/long/test_ssl.py index 7698849945..4de46f4649 100644 --- a/tests/integration/long/test_ssl.py +++ b/tests/integration/long/test_ssl.py @@ -54,11 +54,11 @@ USES_PYOPENSSL = "twisted" in EVENT_LOOP_MANAGER or "eventlet" in EVENT_LOOP_MANAGER if "twisted" in EVENT_LOOP_MANAGER: import OpenSSL - ssl_version = OpenSSL.SSL.TLSv1_METHOD + ssl_version = OpenSSL.SSL.TLSv1_2_METHOD verify_certs = {'cert_reqs': SSL.VERIFY_PEER, 'check_hostname': True} else: - ssl_version = ssl.PROTOCOL_TLSv1 + ssl_version = ssl.PROTOCOL_TLS verify_certs = {'cert_reqs': ssl.CERT_REQUIRED, 'check_hostname': True} @@ -404,7 +404,7 @@ def test_can_connect_with_sslcontext_certificate(self): @test_category connection:ssl """ if USES_PYOPENSSL: - ssl_context = SSL.Context(SSL.TLSv1_METHOD) + ssl_context = SSL.Context(SSL.TLSv1_2_METHOD) ssl_context.load_verify_locations(CLIENT_CA_CERTS) else: ssl_context = ssl.SSLContext(ssl_version) @@ -428,7 +428,7 @@ def test_can_connect_with_ssl_client_auth_password_private_key(self): ssl_options = {} if USES_PYOPENSSL: - ssl_context = SSL.Context(SSL.TLSv1_METHOD) + ssl_context = SSL.Context(SSL.TLSv1_2_METHOD) ssl_context.use_certificate_file(abs_driver_certfile) with open(abs_driver_keyfile) as keyfile: key = crypto.load_privatekey(crypto.FILETYPE_PEM, keyfile.read(), b'cassandra') @@ -449,7 +449,7 @@ def test_can_connect_with_ssl_context_ca_host_match(self): """ ssl_options = {} if USES_PYOPENSSL: - ssl_context = SSL.Context(SSL.TLSv1_METHOD) + ssl_context = SSL.Context(SSL.TLSv1_2_METHOD) ssl_context.use_certificate_file(DRIVER_CERTFILE) with open(DRIVER_KEYFILE_ENCRYPTED) as keyfile: key = crypto.load_privatekey(crypto.FILETYPE_PEM, keyfile.read(), b'cassandra') @@ -472,7 +472,7 @@ def test_can_connect_with_ssl_context_ca_host_match(self): def test_cannot_connect_ssl_context_with_invalid_hostname(self): ssl_options = {} if USES_PYOPENSSL: - ssl_context = SSL.Context(SSL.TLSv1_METHOD) + ssl_context = SSL.Context(SSL.TLSv1_2_METHOD) ssl_context.use_certificate_file(DRIVER_CERTFILE) with open(DRIVER_KEYFILE_ENCRYPTED) as keyfile: key = crypto.load_privatekey(crypto.FILETYPE_PEM, keyfile.read(), b"cassandra") From 5c058524fbddabc6840d3189bf6df608aff77503 Mon Sep 17 00:00:00 2001 From: Aleksandr Sorokoumov Date: Tue, 10 Nov 2020 15:15:13 +0100 Subject: [PATCH 109/211] PYTHON-1269 Update list of CQL keywords * Add MBEAN and MBEANS to reserved words (CASSANDRA-15663) * Move UNSET outside of DSE specific (CASSANDRA-15663) --- CHANGELOG.rst | 1 + cassandra/metadata.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a01a7ee49a..97e3f2e8f9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,7 @@ Features Bug Fixes --------- * Asyncore race condition cause logging exception on shutdown (PYTHON-1266) +* Update list of reserved keywords (PYTHON-1269) Others ------ diff --git a/cassandra/metadata.py b/cassandra/metadata.py index df38fc6670..a82fbe48e3 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -52,18 +52,18 @@ 'counter', 'create', 'custom', 'date', 'decimal', 'default', 'delete', 'desc', 'describe', 'deterministic', 'distinct', 'double', 'drop', 'entries', 'execute', 'exists', 'filtering', 'finalfunc', 'float', 'from', 'frozen', 'full', 'function', 'functions', 'grant', 'if', 'in', 'index', 'inet', 'infinity', 'initcond', 'input', 'insert', 'int', 'into', 'is', 'json', - 'key', 'keys', 'keyspace', 'keyspaces', 'language', 'limit', 'list', 'login', 'map', 'materialized', 'modify', 'monotonic', 'nan', 'nologin', - 'norecursive', 'nosuperuser', 'not', 'null', 'of', 'on', 'options', 'or', 'order', 'password', 'permission', + 'key', 'keys', 'keyspace', 'keyspaces', 'language', 'limit', 'list', 'login', 'map', 'materialized', 'mbean', 'mbeans', 'modify', 'monotonic', + 'nan', 'nologin', 'norecursive', 'nosuperuser', 'not', 'null', 'of', 'on', 'options', 'or', 'order', 'password', 'permission', 'permissions', 'primary', 'rename', 'replace', 'returns', 'revoke', 'role', 'roles', 'schema', 'select', 'set', 'sfunc', 'smallint', 'static', 'storage', 'stype', 'superuser', 'table', 'text', 'time', 'timestamp', 'timeuuid', - 'tinyint', 'to', 'token', 'trigger', 'truncate', 'ttl', 'tuple', 'type', 'unlogged', 'update', 'use', 'user', + 'tinyint', 'to', 'token', 'trigger', 'truncate', 'ttl', 'tuple', 'type', 'unlogged', 'unset', 'update', 'use', 'user', 'users', 'using', 'uuid', 'values', 'varchar', 'varint', 'view', 'where', 'with', 'writetime', # DSE specifics "node", "nodes", "plan", "active", "application", "applications", "java", "executor", "executors", "std_out", "std_err", "renew", "delegation", "no", "redact", "token", "lowercasestring", "cluster", "authentication", "schemes", "scheme", "internal", "ldap", "kerberos", "remote", "object", "method", "call", "calls", "search", "schema", "config", "rows", - "columns", "profiles", "commit", "reload", "unset", "rebuild", "field", "workpool", "any", "submission", "indices", + "columns", "profiles", "commit", "reload", "rebuild", "field", "workpool", "any", "submission", "indices", "restrict", "unrestrict" )) """ From 95fae2fe82c6a2cff2be926d7ea6f9d930e81b22 Mon Sep 17 00:00:00 2001 From: Adam Holmberg Date: Mon, 1 Feb 2021 12:42:19 -0600 Subject: [PATCH 110/211] typos --- cassandra/cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index cedcf8207b..45e1fb410b 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -1570,7 +1570,7 @@ def set_core_connections_per_host(self, host_distance, core_connections): If :attr:`~.Cluster.protocol_version` is set to 3 or higher, this is not supported (there is always one connection per host, unless the host is remote and :attr:`connect_to_remote_hosts` is :const:`False`) - and using this will result in an :exc:`~.UnsupporteOperation`. + and using this will result in an :exc:`~.UnsupportedOperation`. """ if self.protocol_version >= 3: raise UnsupportedOperation( @@ -1603,7 +1603,7 @@ def set_max_connections_per_host(self, host_distance, max_connections): If :attr:`~.Cluster.protocol_version` is set to 3 or higher, this is not supported (there is always one connection per host, unless the host is remote and :attr:`connect_to_remote_hosts` is :const:`False`) - and using this will result in an :exc:`~.UnsupporteOperation`. + and using this will result in an :exc:`~.UnsupportedOperation`. """ if self.protocol_version >= 3: raise UnsupportedOperation( From a3a8cd098c0786df09cc885fef8f3bd65afe48cf Mon Sep 17 00:00:00 2001 From: Sam Tunnicliffe Date: Tue, 8 Dec 2020 10:21:41 +0000 Subject: [PATCH 111/211] PYTHON-1232 Bring protocol v5 out of beta --- cassandra/__init__.py | 11 +++++-- tests/integration/__init__.py | 9 +++--- tests/integration/standard/test_cluster.py | 36 ++++++++++++++-------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 100df2df17..4a273daff7 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -161,7 +161,12 @@ class ProtocolVersion(object): V5 = 5 """ - v5, in beta from 3.x+ + v5, in beta from 3.x+. Finalised in 4.0-beta4 + """ + + V6 = 6 + """ + v6, in beta from 4.0-beta4 """ DSE_V1 = 0x41 @@ -174,12 +179,12 @@ class ProtocolVersion(object): DSE private protocol v2, supported in DSE 6.0+ """ - SUPPORTED_VERSIONS = (DSE_V2, DSE_V1, V5, V4, V3, V2, V1) + SUPPORTED_VERSIONS = (DSE_V2, DSE_V1, V6, V5, V4, V3, V2, V1) """ A tuple of all supported protocol versions """ - BETA_VERSIONS = (V5,) + BETA_VERSIONS = (V6,) """ A tuple of all beta protocol versions """ diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 1e1f582804..77923853f1 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -207,8 +207,6 @@ def get_default_protocol(): if DSE_VERSION: return ProtocolVersion.DSE_V2 else: - global ALLOW_BETA_PROTOCOL - ALLOW_BETA_PROTOCOL = True return ProtocolVersion.V5 if CASSANDRA_VERSION >= Version('3.10'): if DSE_VERSION: @@ -234,9 +232,12 @@ def get_supported_protocol_versions(): 3.X -> 4, 3 3.10(C*) -> 5(beta),4,3 3.10(DSE) -> DSE_V1,4,3 - 4.0(C*) -> 5(beta),4,3 + 4.0(C*) -> 6(beta),5,4,3 4.0(DSE) -> DSE_v2, DSE_V1,4,3 ` """ + if CASSANDRA_VERSION >= Version('4.0-beta4'): + if not DSE_VERSION: + return (3, 4, 5, 6) if CASSANDRA_VERSION >= Version('4.0-a'): if DSE_VERSION: return (3, 4, ProtocolVersion.DSE_V1, ProtocolVersion.DSE_V2) @@ -316,7 +317,7 @@ def _id_and_mark(f): notprotocolv1 = unittest.skipUnless(PROTOCOL_VERSION > 1, 'Protocol v1 not supported') lessthenprotocolv4 = unittest.skipUnless(PROTOCOL_VERSION < 4, 'Protocol versions 4 or greater not supported') greaterthanprotocolv3 = unittest.skipUnless(PROTOCOL_VERSION >= 4, 'Protocol versions less than 4 are not supported') -protocolv5 = unittest.skipUnless(5 in get_supported_protocol_versions(), 'Protocol versions less than 5 are not supported') +protocolv6 = unittest.skipUnless(6 in get_supported_protocol_versions(), 'Protocol versions less than 6 are not supported') greaterthancass20 = unittest.skipUnless(CASSANDRA_VERSION >= Version('2.1'), 'Cassandra version 2.1 or greater required') greaterthancass21 = unittest.skipUnless(CASSANDRA_VERSION >= Version('2.2'), 'Cassandra version 2.2 or greater required') greaterthanorequalcass30 = unittest.skipUnless(CASSANDRA_VERSION >= Version('3.0'), 'Cassandra version 3.0 or greater required') diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index cdb6f1f3b7..a99076ebaa 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -42,7 +42,7 @@ from tests import notwindows from tests.integration import use_singledc, get_server_versions, CASSANDRA_VERSION, \ execute_until_pass, execute_with_long_wait_retry, get_node, MockLoggingHandler, get_unsupported_lower_protocol, \ - get_unsupported_upper_protocol, protocolv5, local, CASSANDRA_IP, greaterthanorequalcass30, lessthanorequalcass40, \ + get_unsupported_upper_protocol, protocolv6, local, CASSANDRA_IP, greaterthanorequalcass30, lessthanorequalcass40, \ DSE_VERSION, TestCluster, PROTOCOL_VERSION from tests.integration.util import assert_quiescent_pool_state import sys @@ -261,6 +261,18 @@ def test_protocol_negotiation(self): elif DSE_VERSION and DSE_VERSION >= Version("5.1"): self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.DSE_V1) self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.DSE_V1) + elif CASSANDRA_VERSION >= Version('4.0-beta4'): + self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.V5) + self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.V5) + elif CASSANDRA_VERSION >= Version('4.0-a'): + self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.V4) + self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.V4) + elif CASSANDRA_VERSION >= Version('3.11'): + self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.V4) + self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.V4) + elif CASSANDRA_VERSION >= Version('3.0'): + self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.V4) + self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.V4) elif CASSANDRA_VERSION >= Version('2.2'): self.assertEqual(updated_protocol_version, 4) self.assertEqual(updated_cluster_version, 4) @@ -1473,42 +1485,42 @@ def test_prepare_on_ignored_hosts(self): cluster.shutdown() -@protocolv5 +@protocolv6 class BetaProtocolTest(unittest.TestCase): - @protocolv5 + @protocolv6 def test_invalid_protocol_version_beta_option(self): """ - Test cluster connection with protocol v5 and beta flag not set + Test cluster connection with protocol v6 and beta flag not set @since 3.7.0 - @jira_ticket PYTHON-614 - @expected_result client shouldn't connect with V5 and no beta flag set + @jira_ticket PYTHON-614, PYTHON-1232 + @expected_result client shouldn't connect with V6 and no beta flag set @test_category connection """ - cluster = TestCluster(protocol_version=cassandra.ProtocolVersion.V5, allow_beta_protocol_version=False) + cluster = TestCluster(protocol_version=cassandra.ProtocolVersion.V6, allow_beta_protocol_version=False) try: with self.assertRaises(NoHostAvailable): cluster.connect() except Exception as e: self.fail("Unexpected error encountered {0}".format(e.message)) - @protocolv5 + @protocolv6 def test_valid_protocol_version_beta_options_connect(self): """ Test cluster connection with protocol version 5 and beta flag set @since 3.7.0 - @jira_ticket PYTHON-614 - @expected_result client should connect with protocol v5 and beta flag set. + @jira_ticket PYTHON-614, PYTHON-1232 + @expected_result client should connect with protocol v6 and beta flag set. @test_category connection """ - cluster = Cluster(protocol_version=cassandra.ProtocolVersion.V5, allow_beta_protocol_version=True) + cluster = Cluster(protocol_version=cassandra.ProtocolVersion.V6, allow_beta_protocol_version=True) session = cluster.connect() - self.assertEqual(cluster.protocol_version, cassandra.ProtocolVersion.V5) + self.assertEqual(cluster.protocol_version, cassandra.ProtocolVersion.V6) self.assertTrue(session.execute("select release_version from system.local")[0]) cluster.shutdown() From 64d47649a16a2862724d20082d2f5e947fe1fed6 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Sat, 6 Feb 2021 14:09:29 -0500 Subject: [PATCH 112/211] Make sure the protocol v5 downgrade for C* <4 if it is not explicitly specified --- cassandra/cluster.py | 10 +++++++++- cassandra/protocol.py | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 45e1fb410b..7e101afba8 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -63,7 +63,7 @@ BatchMessage, RESULT_KIND_PREPARED, RESULT_KIND_SET_KEYSPACE, RESULT_KIND_ROWS, RESULT_KIND_SCHEMA_CHANGE, ProtocolHandler, - RESULT_KIND_VOID) + RESULT_KIND_VOID, ProtocolException) from cassandra.metadata import Metadata, protect_name, murmur3, _NodeInfo from cassandra.policies import (TokenAwarePolicy, DCAwareRoundRobinPolicy, SimpleConvictionPolicy, ExponentialReconnectionPolicy, HostDistance, @@ -3548,6 +3548,14 @@ def _try_connect(self, host): break except ProtocolVersionUnsupported as e: self._cluster.protocol_downgrade(host.endpoint, e.startup_version) + except ProtocolException as e: + # protocol v5 is out of beta in C* >=4.0-beta5 and is now the default driver + # protocol version. If the protocol version was not explicitly specified, + # and that the server raises a beta protocol error, we should downgrade. + if not self._cluster._protocol_version_explicit and e.is_beta_protocol_error: + self._cluster.protocol_downgrade(host.endpoint, self._cluster.protocol_version) + else: + raise log.debug("[control connection] Established new connection %r, " "registering watchers and refreshing schema and topology", diff --git a/cassandra/protocol.py b/cassandra/protocol.py index c454824637..ed92a76679 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -180,6 +180,10 @@ class ProtocolException(ErrorMessageSub): summary = 'Protocol error' error_code = 0x000A + @property + def is_beta_protocol_error(self): + return 'USE_BETA flag is unset' in str(self) + class BadCredentials(ErrorMessageSub): summary = 'Bad credentials' From 942ec529fade2782169903bdc9d063f94170affb Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Sat, 6 Feb 2021 14:09:52 -0500 Subject: [PATCH 113/211] some test fixes --- cassandra/__init__.py | 4 ++-- tests/integration/__init__.py | 2 +- tests/integration/standard/test_cluster.py | 2 +- tests/unit/test_cluster.py | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 4a273daff7..1e16bca287 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -161,12 +161,12 @@ class ProtocolVersion(object): V5 = 5 """ - v5, in beta from 3.x+. Finalised in 4.0-beta4 + v5, in beta from 3.x+. Finalised in 4.0-beta5 """ V6 = 6 """ - v6, in beta from 4.0-beta4 + v6, in beta from 4.0-beta5 """ DSE_V1 = 0x41 diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 77923853f1..9d350af707 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -235,7 +235,7 @@ def get_supported_protocol_versions(): 4.0(C*) -> 6(beta),5,4,3 4.0(DSE) -> DSE_v2, DSE_V1,4,3 ` """ - if CASSANDRA_VERSION >= Version('4.0-beta4'): + if CASSANDRA_VERSION >= Version('4.0-beta5'): if not DSE_VERSION: return (3, 4, 5, 6) if CASSANDRA_VERSION >= Version('4.0-a'): diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index a99076ebaa..c7d8266fd9 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -261,7 +261,7 @@ def test_protocol_negotiation(self): elif DSE_VERSION and DSE_VERSION >= Version("5.1"): self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.DSE_V1) self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.DSE_V1) - elif CASSANDRA_VERSION >= Version('4.0-beta4'): + elif CASSANDRA_VERSION >= Version('4.0-beta5'): self.assertEqual(updated_protocol_version, cassandra.ProtocolVersion.V5) self.assertEqual(updated_cluster_version, cassandra.ProtocolVersion.V5) elif CASSANDRA_VERSION >= Version('4.0-a'): diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py index 249c0a17cc..620f642084 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/test_cluster.py @@ -209,6 +209,8 @@ def test_protocol_downgrade_test(self): lower = ProtocolVersion.get_lower_supported(ProtocolVersion.DSE_V2) self.assertEqual(ProtocolVersion.DSE_V1, lower) lower = ProtocolVersion.get_lower_supported(ProtocolVersion.DSE_V1) + self.assertEqual(ProtocolVersion.V5,lower) + lower = ProtocolVersion.get_lower_supported(ProtocolVersion.V5) self.assertEqual(ProtocolVersion.V4,lower) lower = ProtocolVersion.get_lower_supported(ProtocolVersion.V4) self.assertEqual(ProtocolVersion.V3,lower) From 8d62b28fae3d54520c460ab1fd49cd36f24d8d50 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 10 Feb 2021 08:24:13 -0500 Subject: [PATCH 114/211] make sure simulacron/test_empty_column uses protocolv4 --- tests/integration/simulacron/test_empty_column.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/simulacron/test_empty_column.py b/tests/integration/simulacron/test_empty_column.py index bd7fe6ead0..91c76985e1 100644 --- a/tests/integration/simulacron/test_empty_column.py +++ b/tests/integration/simulacron/test_empty_column.py @@ -27,8 +27,8 @@ from cassandra.cqlengine.connection import set_session from cassandra.cqlengine.models import Model -from tests.integration import PROTOCOL_VERSION, requiressimulacron -from tests.integration.simulacron import SimulacronCluster +from tests.integration import requiressimulacron +from tests.integration.simulacron import PROTOCOL_VERSION, SimulacronCluster from tests.integration.simulacron.utils import PrimeQuery, prime_request From 56053664652ac99f646e3472ce2854aa858fa483 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 10 Feb 2021 08:26:21 -0500 Subject: [PATCH 115/211] Enable checksumming before sending the auth message --- cassandra/connection.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cassandra/connection.py b/cassandra/connection.py index 477eaf2f28..ca1e7531db 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1345,6 +1345,9 @@ def _handle_startup_response(self, startup_response, did_authenticate=False): "if DSE authentication is configured with transitional mode" % (self.host,)) raise AuthenticationFailed('Remote end requires authentication') + if ProtocolVersion.has_checksumming_support(self.protocol_version): + self._enable_checksumming() + if isinstance(self.authenticator, dict): log.debug("Sending credentials-based auth response on %s", self) cm = CredentialsMessage(creds=self.authenticator) From 4a00c2233a0e93c493383cd789071731721f1dc8 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 10 Feb 2021 12:08:06 -0500 Subject: [PATCH 116/211] make sure to enable compression before sending the auth creds --- cassandra/connection.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index ca1e7531db..48b3caefed 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -897,6 +897,10 @@ def _connect_socket(self): for args in self.sockopts: self._socket.setsockopt(*args) + def _enable_compression(self): + if self._compressor: + self.compressor = self._compressor + def _enable_checksumming(self): self._io_buffer.set_checksumming_buffer() self._is_checksumming_enabled = True @@ -1328,8 +1332,7 @@ def _handle_startup_response(self, startup_response, did_authenticate=False): self.authenticator.__class__.__name__) log.debug("Got ReadyMessage on new connection (%s) from %s", id(self), self.endpoint) - if self._compressor: - self.compressor = self._compressor + self._enable_compression() if ProtocolVersion.has_checksumming_support(self.protocol_version): self._enable_checksumming() @@ -1345,6 +1348,7 @@ def _handle_startup_response(self, startup_response, did_authenticate=False): "if DSE authentication is configured with transitional mode" % (self.host,)) raise AuthenticationFailed('Remote end requires authentication') + self._enable_compression() if ProtocolVersion.has_checksumming_support(self.protocol_version): self._enable_checksumming() From 3ab8baedd2b07d713534641563169c049b3476c8 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Sat, 27 Feb 2021 13:42:05 -0500 Subject: [PATCH 117/211] Make sure no data is lost when there is not enough data in the buffer to read a segment --- cassandra/connection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index 48b3caefed..77e12fa714 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1122,8 +1122,9 @@ def _process_segment_buffer(self): segment = self._segment_codec.decode(self._iobuf, segment_header) self._io_buffer.cql_frame_buffer.write(segment.payload) else: - # not enough data to read the segment - self._io_buffer.io_buffer.seek(0, 2) + # not enough data to read the segment. reset the buffer pointer at the + # beginning to not lose what we previously read (header). + self._io_buffer.io_buffer.seek(0) except CrcException as exc: # re-raise an exception that inherits from ConnectionException raise CrcMismatchException(str(exc), self.endpoint) From eba143ab84bc42654cffd341310ef92c1ff8c072 Mon Sep 17 00:00:00 2001 From: Sam Tunnicliffe Date: Mon, 1 Mar 2021 16:15:45 +0000 Subject: [PATCH 118/211] Yield from process_io_buffer when containing an incomplete segment --- cassandra/connection.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index 77e12fa714..0d8a50e76f 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -621,6 +621,7 @@ class _ConnectionIOBuffer(object): _io_buffer = None _cql_frame_buffer = None _connection = None + _segment_consumed = False def __init__(self, connection): self._io_buffer = io.BytesIO() @@ -643,6 +644,10 @@ def set_checksumming_buffer(self): def is_checksumming_enabled(self): return self._connection._is_checksumming_enabled + @property + def has_consumed_segment(self): + return self._segment_consumed; + def readable_io_bytes(self): return self.io_buffer.tell() @@ -1118,23 +1123,33 @@ def _process_segment_buffer(self): try: self._io_buffer.io_buffer.seek(0) segment_header = self._segment_codec.decode_header(self._io_buffer.io_buffer) + if readable_bytes >= segment_header.segment_length: segment = self._segment_codec.decode(self._iobuf, segment_header) + self._io_buffer._segment_consumed = True self._io_buffer.cql_frame_buffer.write(segment.payload) else: # not enough data to read the segment. reset the buffer pointer at the # beginning to not lose what we previously read (header). + self._io_buffer._segment_consumed = False self._io_buffer.io_buffer.seek(0) except CrcException as exc: # re-raise an exception that inherits from ConnectionException raise CrcMismatchException(str(exc), self.endpoint) + else: + self._io_buffer._segment_consumed = False def process_io_buffer(self): while True: - if self._is_checksumming_enabled: + if self._is_checksumming_enabled and self._io_buffer.readable_io_bytes(): self._process_segment_buffer() self._io_buffer.reset_io_buffer() + if self._is_checksumming_enabled and not self._io_buffer.has_consumed_segment: + # We couldn't read an entire segment from the io buffer, so return + # control to allow more bytes to be read off the wire + return + if not self._current_frame: pos = self._read_frame_header() else: From 457a4e11d61b71bf44c38ce45113bbbb6841c113 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 17 Mar 2021 09:23:35 -0400 Subject: [PATCH 119/211] Fix simulacron.test_policies.test_delay_can_be_0 to not use protocol v5 --- tests/integration/simulacron/test_policies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/simulacron/test_policies.py b/tests/integration/simulacron/test_policies.py index 855a4de3ca..da093be43c 100644 --- a/tests/integration/simulacron/test_policies.py +++ b/tests/integration/simulacron/test_policies.py @@ -184,7 +184,7 @@ def test_delay_can_be_0(self): spec = ExecutionProfile(load_balancing_policy=RoundRobinPolicy(), speculative_execution_policy=ConstantSpeculativeExecutionPolicy(0, number_of_requests)) - cluster = Cluster(compression=False) + cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) cluster.add_execution_profile("spec", spec) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) From 6b963f6e1b2da11eeb96eab5b9023c05a6f76d34 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Wed, 17 Mar 2021 13:42:04 -0400 Subject: [PATCH 120/211] ssl tests: update all SSL certs --- tests/integration/long/ssl/127.0.0.1.keystore | Bin 3775 -> 4317 bytes .../integration/long/ssl/cassandra.truststore | Bin 846 -> 1074 bytes tests/integration/long/ssl/client.crt_signed | 28 +++++----- tests/integration/long/ssl/client.key | 52 +++++++++--------- .../integration/long/ssl/client_encrypted.key | 52 +++++++++--------- tests/integration/long/ssl/rootCa.crt | 28 +++++----- 6 files changed, 80 insertions(+), 80 deletions(-) diff --git a/tests/integration/long/ssl/127.0.0.1.keystore b/tests/integration/long/ssl/127.0.0.1.keystore index 3855f00a1ae458fb330deaaf11d623e203ef3ccc..98193ab54e271fc4d241442371204f3a63457262 100644 GIT binary patch literal 4317 zcmY+EbyyP)zr_cPk)zd-(j6Ppof1knj1&+U4HifZL<~|oN5~k6lr+kulo|-qjUX|) zd-r$m^WOK~_mA^@pXZ$OJ%4_nNV+LvA`&Q)E(Js(9IG393ML{WLLup*fk?WDf9y~w zlGN`%B2p(Hk`(@rZT*kTK~(?xqNXGwMj?ThP$cjaDg>hZU;CeNRv;7mH@1Wz@{Qn0e6~EhAT0YzBsB8;(BdvY@x^z3+P>d&=m%=i2Eb|#(^z5Y88n-y z=9#zx&Sa_Wb^wQ~)=~uiQn#ORFz*KsMYfANwOySmyC(f{^yx4+USG zw~p`Dvsi3fhV@=FkAW00671DKEt0vJRixTXju{!BJq=g@cBsqRw;om?^-2iwA@9ib z;L6)nvQ{ivade*BKpeU+E956`VN@-`?7`8aH>1ZM7ix-Lj49k&>!#(#dq-KR3XOXZ&f88C($O_eZ1q?A^5#*I*K$Y1$=Zy~mi_{~99f&9Q>Lb` zGpagag$P~fC${0Vx|%926!(e1CG_?+G7z$yi&3_tD}|{I&-e2CB~{YYgq$J5OtLgS~wUJZuQqb(+YNvUY~tLAm;}sWq8%h zRwF1-B%fVpt+)$@B~ZdXs6;KNSaoIE6*Y#7DG*O@x1>ggE?v;YGhBk%_Q{UJs$%4? z*GrLIPf?4GOT{v0^9U=tkaq zpajE3BL3i+T5SpY|C^za6Nia(Hb=fmFPfJpUB)CFG~cMI%&SWP&v+ z_Bq|)Gw1pfF~_c4mAF8(I~ln47yK)6m+tVBOYPnSC5E1qW`m)48cKfk{ZBug;vY`6 zvwO!^`K2o87+$kwuc&`jg9Z~tuRfXLq?gpm*Mq0gH7d0=n%_I?O%@Q<{iz)N`bz`F zxs)+=g!6XbXA*yr?%kK)sT4v3x{F>fU;9rW+S>8*Gc&9b<)SBz zf%+6Xx|yuiB5bF)J{qTib5bKrubwH6M6 z{OLQBKu!hrt=eY0{py&%Q&EUJ9_`9rha64%rBfFANbt(Gr3Ts*eWPO|e~g9QhjRTJ zl@v@cCJqJw3?K=R1&9Nn{}Sx~S8la8JG+d3YCS)%1g;YkyOBc?~s8| zNXla>l45N)?+yX>W}!R!>;~^vyn*3>b4RW zHw8Mga{iTE{6rr2^0jiW!M#q0`c|>5qs`+cixbVmTKkst$4(pVAA2*nz6yb74Frvc zOC9LDlJB<}889w8A!t1An327=FdR1l{ydI)Akkc?v7HAoW?zoy!unQHj^c%^G11z9 zHvKeXzuQ98fewDBa?&>8WJeACrG%5-Uk8KM7dS76Qk`6Gz7zkc_XaODIZ8-}4P|C4 z3bhi{sUNCR8XjJK<@^HCog=3|Qx~hgV7!1WT$;^dti2lu3}%Hx@|nZf#}P%-gU;Di zb2dx|A@va1jyFUJTv>Mj_Wi^sW2t<~VqL)Rc(^$mpH9$^hNWpiVUg8@D|af^a%Pa@ zM{q8G#Jv-BQL2JoCf$49koRWyhb)$4!59HTXwoiGcC(VE?-*hRI&u4V2yotXK+nBf zjnq92D+Q_ZJF1*3cQ3s1rJ|zl&bd0G{g&&JZy{Nu5ihMKv&nF<(3u;bT&D=m zPDnjhdQ=v`lJbgGL*M4n*qHGAZ~Y+r_40&Z-XZNKrWr0Ir5hG@^07v`xW*-CH}gXA z)Ta-esI27Ybd7k{%Sd~A(h+n}I{}w1JUMI9gHC}Na68uQU(ckaW~aJ}H$z3sD{PN) zZN&qVF6Z;fNFOzOp#VyhuXEwtKi*mUw60mUiCpA7$VwiHnUL%ZC$sULRQNo3P}gja zJBBcGIxph0KO;o+^~H(8$WV88p&AVaX>2PJrRW)HKxovx-XXJ!K~|@ zQ+iF66KgR`qZVFF$33|Mq1!JN63+Q;xw*xAueF8kmrQ(m2ED5S*3%a9@yYU)MgdK$ z7dAj_L$or_SBiWO-r+1nFqc*FCpPkyXjE2o6QtUbnTgvY%{}Om>TLY z7`UWXchCrlOduV*pFg9~preouL@ob|P?Bl#s|9>fWEbk~TNwQN3^JBIui!XcWlo37 zDz)>xoR z8Q^u53JH-R<`L|-YQ9xE*FO(JUnZVjn)a8E-o#7xg`zrr z@T^sG6pp;UDmfnI2z^nKqTWu2Qv-dnlNW$Azm67?+=J}bU1wR*<$o*EHf|SzN;c{^ zmF2(Vc6}W&gfmd8G_a~bR6F%)qsoCuTn%xVVQ^q*H@|I4^00ow$M$!I2-rn}-NO(| zA`yk{uOaP_WcK?$YhonYe+JgPze!f#5@tRW0jmlv)tY?juM+&NmrqV9rS|Twn@pAg zal|QD#DRqwgR_H^+o(9LS<2#LCVvXRrgL%k<48Xqta8%-`1V(&UqqFN&-I7at%8{E z^+9}%D8az)W1RP|@=zrk%`2;u7j1J`J2(@zo05*l3T|TB+RM}$s3hJvUE2v|%aw*! z&7>9O(B_kFTAm?k?-J>vhhDatST<@-HJcj=Y8~YISZcVaM8Zxd1GJF+mGn)ngp=t8 z-6vHy+0J2!s2(}4n$#7y9CL|&JQOuqcw=RqI2$df zb|QBg?4P(u-`K<5sAtz@!xK6RGZlRE!B_g#pSKWw(mB5B+kG|SeCjW4+HErZ7VDlA)|MK9B-kq!>URa=v67N@45>M=idD`|^|^A5pr+&FQc-i=2T>ba)|B}X1SXX<-ArAW0~fEeFdy05t$d9C zfdgml6)Ks2#yH$Ed_AG2d$p|7tJB_7dL`D5Uzl&ASW@7T=Z5u7SahX=v!9@2ZEc?k z7BSAT5s7VCPEZPq`&1^v(okcVI`lMfdJ+&)Cq+|!{ZRU2^BraYqZ#*VUX$3XM%C~* z{4dFlBG()pBhyC-N!V{xYi>-64Mr7*qr?io*jQth=P}99*Mxg1{0H{9&vu-BooXrm zZOqSf9j+tMS4q|kpVmYB|FV1T8STYRSzVT-_xtJ)N9^KCx zCrs+6k5Y9pPTyLiWX1U_7-(pxt;IJ23EGN^F)g|n#B+{D&QA#%yyes6?ReQiD4&s_ z%nph4(8xp(iN9pMBbjgdycP7Dh034aU2-BqNn9MHNQwdTKy|*~o z6PNaf=1E&Qh5K%woW$>%vvs&i*3k$zI4!>kR5%`?h^7y7Q8h_y91GtN^ACGi@_wVE zbpl=F6c^$(%sxUS&l}{y;^iye^h8*$X=QwyaYr?qe7;it#^rl(K89@CH~Ci%`rXYM z{AsUZSl<%FELE6Zn;*Zcq8@$e++f`ILd?VV`D_g7%Z6V6nm727#;0}7sc^Whst8N sC0X?xQf7B?>xN&mfJ_xqQ`U+zvD&K1$6_jWmpSO2zuUwlM6sy<0)%8G`v3p{ literal 3775 zcmeI!X*8Sp8UXMll2{TAu|&(LeN9Php>&2SYO5-ZjwxcVQfV!%twofgvF~D6)KWw( zRa>WsYUmbJYOijJDj}Al*L3d8ojZ5V{WRb1J^%B5c%F0K^WWa*eSW)(yNdt-0QCC< z^&=7kT%7>`ATh5&Kw!!ja5WfUGa4t9J0B8RI=O8R%@0Gn2 z3=Z}1a`$ob|1H4*|F#B0XN)+CR}F)~Vlir1ELL699>uG^7h-V#4F8rnNIM0WCHS4%USpfPcdp9N0!XLS@0=JJbzSb)vL6}c_0uqI6kIwL6q6h zS?v^UMEewR!o!w*hp1%}X%^|(_$JL+gOg34Hxz|WFg)RJ=O<=tK`eW%!nZq-XURuk z5ZN;jb9(rqb$dj&gQ~oRB=b-Sg?pX`?p9kF&W^xraQ&df2IG2fm-RpCF`wj(D@f#g z*Rj7zEcEK?=T{jy=MAGT`a3={qo=<*812-&pF1;n6sWyGNLq6|I$|qR^dKQStEDe@ z%+zr+;;XjA1rP@i0Q}<#?OpEP2>}6FjGR+9Sir${!aP>mU_4>Gzsq7Yr)z-=j&mTC z>90^wFF>z(xaVN{LEyUD^LQB`oVyduH`sECzWW@;Zqzn^8Ff>Q+=lBoaPJx*E4Y2i zH_y3R5v1Fq-mfBjB=^c^pm>#LKwL@>(=~9*WW=iIv;X5+lu!d&T#598TK2ZR{-u}M zJX$MxSBg|Atv6#UU_IDXH1bk!3G=>7g%)<4Zfw%}i5AA>2>0=@PZ=g$%O#9>ysvFu z&|Zd{std--D2%3UO$W3Ud1ap}Fw;LH{(QNrWKp<#>kU3XqUqqGMvb-;)bE0Z#KS(~ z(*mS|Y1Cam9dox<{ZG!vu$_Aq0|anm)o?19y@CC;V9rnh00jHK?3!ntrf8$XQLayOfg2RKdi zNajnvS!i`2c6>lvNtR8!FjR&t1}t@M+xa!0=}fssK#h`Pwp>@VMgsZ}p}XBRQRW^G z9|U5b9nIZthN^^#>{E#rukd_Fa2dAX=8NC2{YFXVQSxW4GR8ID_v6Z%!iMy7tyi ze&J!h0cKk+&f6o6MQQJ7XDyi*7ZfZGqh8oX?2?6=ZoD-pAIM+L&@;7+u)E(Jb!WLz z`IJvSbCGCF@pjg8``I_I^JugOXw9`a?MDXEMpk!@sMs?bb%CR-a9e%EYB>Iu@Mp9~fI}=5HXiOJWc$g{b>5_ICjG7BwyWDz&~e_J=lEin zO6$7Q>!KkyC-pq#gsSXU_c~>s5U-B(ARsI18+SLypUR!Kd&uPwxgl(FIk1dSYr{CA zoHZhuM7y*0KA_ychle-@+8;|_Jrql9QIxk+1O$d7i$AdWgJM_CQNpMrbl60o>$X*} z8+D4T8@G-Zy6)dqdVQ?Eq~^gNwP`^E*er8Tf8NFKnbC!KybWa zob|;WjAU-*QL$%@kf2Ortbd~T+&=vbomnAMVUu>=S?6a5BaHF!WN)cNlUaG=S?mA{ zd!@FUkhOZ#YyQCxVo30^9;gp*Y&p3EkvMGNQwPSBNROR+A#zqMxV4mcvD#X38ETah zm#I(_xT!xgBk7$X_IXu^vH2v_|L2n1dGBA($*m+#Yqih;9}*;a*#_<}cwM$~u_e&C zmg^mFsxxxbD~vo*yxK3uE}K)$8S!4oY2%w64@tFns%eVx{aw>*FJyb)+CrG}g#9ga2zjv5buRhh(o=>c3hOuFKaQwu2ctUS(+)CLw zq3|_B6hz5${Lxi_?^Jjm>6R2gJQ_;jOYaRb9L^Gc{N-rd2%Ww5Tl-38y#a#WvsVdwN29t<60cAdw1fB z?M9r!d_{x+>v-v=oeDTJ^qVGrmjilF4hk3&h_8qP1O$NuISB(@Q3w-OR_Ab@8QU&~ z8h2IkPaWoOY3={c2izVX)UbPe(D}v(oIS?quZbe?pF|P(jVMSA-!Eq25ct=Re@_bk zT@bD6k)-jp21-3Hj_iN>a7>s0$NMSoHFj|2&J59dXK*{UCTE%+lPHpsNU&clm`Ix3 z1sF7EiQ*<2UWIs{Jc+hDdh<%y3<-k!vJnm2%e3WgmTixJ-D8!0P4~1=ecE9E z>*JG~d=yz8nUta6(#NFik4uG3a|j6@=PlgjgPQ#PWA2B@om$iM`nr1k}4j%-$xGrkF; zQ~>iU0l(u1b77!2z~Orh^G}!ljTK40Yy!T`50D<65nyy2bpdY74OFN;|7Wu z+afqcv}jchOCdCsH5aZP9z;tgcD-G#PL0Vl(Oq1sH|G;(3S9vGQ1I1j*{hyVBNyFR zTM{UiGvj$8cWB9<$%XQ;NL}+pd)Y|A?*Uv1gR^|I>alX-1p`je6fT?lhVI82o-QMO zLQ~D8)JrYH`<0D<(v>`Vnt#h;$9t&t0txoDhEjAV)$~Ej&XsZL3JwiXUCFXr>FJDP zJ&FxakLj<~-J!n<&nfrbveajsb}_hf7nV0)m2A-na(_~t|3p#qr(s>IyUi1MoOco| z;>VQ^qXDPLX97~5O=S$N9J;6Ye{laF+<#pe|LfomSh=UNY!x;XvU7j*di!{GX$p+~ E4@6Q;7ytkO diff --git a/tests/integration/long/ssl/cassandra.truststore b/tests/integration/long/ssl/cassandra.truststore index 4de3d319197650252f27bea858bd279736f31241..b31e34b8aaad16a24f56d19d3f30f097727816a1 100644 GIT binary patch literal 1074 zcmV-21kL*}f&?xC0Ru3C1Lp<_Duzgg_YDCD0ic2d*aU(D)G&er&@h4n%mxW6hDe6@ z4FLxRpn?OvFoFZQ0s#Opf&;P!2`Yw2hW8Bt2LUiC1_~;MNQUiYLVpZyXo~SzUK~8{rIu&2=o!sj&~mMDhuK3pYAXVJQ%tSPR+L(kP;%# z6t;Rl6-YihrCU@K@;O|f0k1<;11d<5AS&W@(wV3mhbsHa#t?Z*RCh*Q>Jz) z#xH#;*w`l3MzW#$ZjUd6nq>-r=((f|?BR|xrVc2JZRiZF-d4s8g9W6FpS9$^m{eXI z(gAnnIYUrjebAHu$O5@^ojmp3VU$dP2%KwC@P{vpcs^uk?0xB}SMxsI&X&c%)dV+} zFQVi~UiiO~Z_3w;YuJDU|68Ro?{#4nrXkPiF1c2C?LF%Tz26%IyxlO!&jTC@m%0Va zx>a)%T*cX43*P<7AeEUw8-;W#8gJ$cc#zwNxg9P7 z;}y$!0_YIgCNb^ce=ODLa-RMOkChZJzF{8;YyMu@SApoDMFMTSw=RNWG7+m#&nStR zxI7$^9-|-&S|a$>4#(SZhAaUWz82z=RO*DBHEX+Ji;3fFQMTO$E0%M;JlI%l+}4uiP2)e2T2T?31=m;s+WC3I{`wczLYN_Xr0+D_v z_b|-Z&ae=)+)w<5V+BZ2v0Z>a25Q+(B;f0ExINz1I6MA1p}Q%>MRZ6CMrjX#W|@7; zWDf0+>9Tj{{=_hg+e$w4?)+PHDO?6C6pj_f)&!Q0s{etpihnU9RL6T literal 846 zcmezO_TO6u1_mY|W(3o0Mfv$9$%#O2{zRYPAO_Y5JyQcq1_ow+gC=HfgC@q$Od|P< z1Rgwe)iLpO*4w`O`X%AZI_g^uc-c6$+C196^D;7WvoaVs8FCwNvN4CUun9AT1{(?* z@Pjy9JRB*BC5go)i4}(YKrKKaZXUL@%+#EeVweCk57-UPi3W1wyha8FhK2@4hK7d5 zrcvU&#z3xtIh0EkJDM1kke$xR%D~*j$j<=uC>K)`BO}9}U9W$wKE2oRW_9zYo7}Ia zaMXL8$tld{*Zif4$D1nV3cP2e$Sa@ zdp7@mle3^N%Sq`==HerXXG+D--np%pD}8g*zSZ%quE*Xr_@#w>Tbpkd-00PqzU9f} zL=*OJr(Ziu^W43h&hzy3-G?DepQFs?KmFSn(__ocsv5!SeY*Z_;GTxVvHI#hiZ^u^ z&E|Z4jODP=r>ip>41RFfYkgxeKe}S+$#X}%@9}od>tlbmS8}UNb?uiAx2Dd_a&9ds zjyvIXdde;N*7$9boYN|AFj~Ax=>Hn$bS*?>!GYcx)3+a=b=x!UXTyIBg Date: Thu, 18 Mar 2021 10:47:17 -0400 Subject: [PATCH 121/211] release 3.25: changelog & version --- CHANGELOG.rst | 2 +- cassandra/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 97e3f2e8f9..d2d577c957 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,6 @@ 3.25.0 ====== -Not released +March 18, 2021 Features -------- diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 1e16bca287..5739d5d98e 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 24, 0) +__version_info__ = (3, 25, 0) __version__ = '.'.join(map(str, __version_info__)) From 15d715f4e686032b02ce785eca1d176d2b25e32b Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Thu, 18 Mar 2021 10:48:29 -0400 Subject: [PATCH 122/211] release 3.25: docs --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index eeccbe16b6..8e29b942e3 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.25' + ref: a83c36a5 - name: '3.24' ref: 21cac12b - name: '3.23' From 3ad2d1ecc1704aeef27214151111918e055ca13c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 14 Sep 2021 23:43:29 -0500 Subject: [PATCH 123/211] Fixes to the Travis build. (#1111) These fixes were originally implemented by user tbbharaj in https://github.com/datastax/python-driver/pull/1108. Extracting them into their own PR since 1108 is still being worked and I'd very much like to benefit from this work across _all_ PRs against python-driver. Major thanks to tbbharaj for the original work here. --- .travis.yml | 1 + tox.ini | 1 + 2 files changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index b485e21227..7e1e374822 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,7 @@ addons: - libev-dev install: + - pip install --upgrade setuptools - pip install tox-travis - if [[ $TRAVIS_PYTHON_VERSION != pypy3.5 ]]; then pip install lz4; fi diff --git a/tox.ini b/tox.ini index d883a1f973..6d94e11247 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,7 @@ deps = nose pure-sasl kerberos futurist + greenlet>=0.4.14,<0.4.17 lz4_dependency = py27,py35,py36,py37,py38: lz4 [testenv] From a51ed116471a63a65c63db6356a3ade9efdd1b85 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Wed, 15 Sep 2021 07:06:54 +0200 Subject: [PATCH 124/211] Merge pull request #1103 from psarna/fix_deprecation_in_tracing Tracing code uses a deprecated mechanism for fetching the first row when populating traces. The behavior is now fixed. --- cassandra/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cassandra/query.py b/cassandra/query.py index 0e7a41dc2d..f7a5b8fdf5 100644 --- a/cassandra/query.py +++ b/cassandra/query.py @@ -996,7 +996,8 @@ def populate(self, max_wait=2.0, wait_for_complete=True, query_cl=None): SimpleStatement(self._SELECT_SESSIONS_FORMAT, consistency_level=query_cl), (self.trace_id,), time_spent, max_wait) # PYTHON-730: There is race condition that the duration mutation is written before started_at the for fast queries - is_complete = session_results and session_results[0].duration is not None and session_results[0].started_at is not None + session_row = session_results.one() if session_results else None + is_complete = session_row is not None and session_row.duration is not None and session_row.started_at is not None if not session_results or (wait_for_complete and not is_complete): time.sleep(self._BASE_RETRY_SLEEP * (2 ** attempt)) attempt += 1 @@ -1006,7 +1007,6 @@ def populate(self, max_wait=2.0, wait_for_complete=True, query_cl=None): else: log.debug("Fetching parital trace info for trace ID: %s", self.trace_id) - session_row = session_results[0] self.request_type = session_row.request self.duration = timedelta(microseconds=session_row.duration) if is_complete else None self.started_at = session_row.started_at From 1d9077d3f4c937929acc14f45c7693e76dde39a9 Mon Sep 17 00:00:00 2001 From: Ultrabug Date: Fri, 17 Sep 2021 19:40:42 +0200 Subject: [PATCH 125/211] Merge pull request #1103 from numberly/fix_empty_paging This commit provides a fix to the situation when iterating on a ResultSet, the driver aborts the iteration if the server returns an empty page even if there are next pages available. Python driver is affected by the same problem as JAVA-2934 This fix is similar to https://github.com/datastax/java-driver/pull/1544 --- cassandra/cluster.py | 1 + tests/unit/test_resultset.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 7e101afba8..c2d2e719ac 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -5141,6 +5141,7 @@ def next(self): if not self.response_future._continuous_paging_session: self.fetch_next_page() self._page_iter = iter(self._current_rows) + return self.next() return next(self._page_iter) diff --git a/tests/unit/test_resultset.py b/tests/unit/test_resultset.py index 1af3e849b6..b37c3a2594 100644 --- a/tests/unit/test_resultset.py +++ b/tests/unit/test_resultset.py @@ -41,6 +41,19 @@ def test_iter_paged(self): type(response_future).has_more_pages = PropertyMock(side_effect=(True, True, False)) # after init to avoid side effects being consumed by init self.assertListEqual(list(itr), expected) + def test_iter_paged_with_empty_pages(self): + expected = list(range(10)) + response_future = Mock(has_more_pages=True, _continuous_paging_session=None) + response_future.result.side_effect = [ + ResultSet(Mock(), []), + ResultSet(Mock(), [0, 1, 2, 3, 4]), + ResultSet(Mock(), []), + ResultSet(Mock(), [5, 6, 7, 8, 9]), + ] + rs = ResultSet(response_future, []) + itr = iter(rs) + self.assertListEqual(list(itr), expected) + def test_list_non_paged(self): # list access on RS for backwards-compatibility expected = list(range(10)) From 12a8adce943fe37a05ad6580e8bd302b65c2d93a Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 17 Sep 2021 12:47:48 -0500 Subject: [PATCH 126/211] Comment update following off of https://github.com/datastax/python-driver/pull/1110 --- cassandra/cluster.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index c2d2e719ac..dc850ae809 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -5141,6 +5141,11 @@ def next(self): if not self.response_future._continuous_paging_session: self.fetch_next_page() self._page_iter = iter(self._current_rows) + + # Some servers can return empty pages in this case; Scylla is known to do + # so in some circumstances. Guard against this by recursing to handle + # the next(iter) call. If we have an empty page in that case it will + # get handled by the StopIteration handler when we recurse. return self.next() return next(self._page_iter) From 175942852bcfc97bac823834a0b170b0faa4adb0 Mon Sep 17 00:00:00 2001 From: Oren Efraimov Date: Tue, 23 Nov 2021 19:10:50 +0200 Subject: [PATCH 127/211] Merge pull request #1116 from Orenef11/fix_default_argument_value Removing Python mutable defaults from methods in tests/integration/__init__.py Co-authored-by: Efraimov Oren --- tests/integration/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 9d350af707..70ec11c213 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -383,15 +383,15 @@ def get_node(node_id): return CCM_CLUSTER.nodes['node%s' % node_id] -def use_multidc(dc_list, workloads=[]): +def use_multidc(dc_list, workloads=None): use_cluster(MULTIDC_CLUSTER_NAME, dc_list, start=True, workloads=workloads) -def use_singledc(start=True, workloads=[], use_single_interface=USE_SINGLE_INTERFACE): +def use_singledc(start=True, workloads=None, use_single_interface=USE_SINGLE_INTERFACE): use_cluster(CLUSTER_NAME, [3], start=start, workloads=workloads, use_single_interface=use_single_interface) -def use_single_node(start=True, workloads=[], configuration_options={}, dse_options={}): +def use_single_node(start=True, workloads=None, configuration_options=None, dse_options=None): use_cluster(SINGLE_NODE_CLUSTER_NAME, [1], start=start, workloads=workloads, configuration_options=configuration_options, dse_options=dse_options) @@ -453,10 +453,11 @@ def start_cluster_wait_for_up(cluster): def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, set_keyspace=True, ccm_options=None, - configuration_options={}, dse_options={}, use_single_interface=USE_SINGLE_INTERFACE): + configuration_options=None, dse_options=None, use_single_interface=USE_SINGLE_INTERFACE): + configuration_options = configuration_options or {} + dse_options = dse_options or {} + workloads = workloads or [] dse_cluster = True if DSE_VERSION else False - if not workloads: - workloads = [] if ccm_options is None and DSE_VERSION: ccm_options = {"version": CCM_VERSION} From 387150acc365b6cf1daaee58c62db13e4929099a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Jastrz=C4=99bski?= Date: Tue, 23 Nov 2021 18:18:33 +0100 Subject: [PATCH 128/211] Merge pull request #1114 from haaawk/stream_ids_fix Stop reusing stream ids of requests that have timed out due to client-side timeout (#1114) * ResponseFuture: do not return the stream ID on client timeout When a timeout occurs, the ResponseFuture associated with the query returns its stream ID to the associated connection's free stream ID pool - so that the stream ID can be immediately reused by another query. However, that it incorrect and dangerous. If query A times out before it receives a response from the cluster, a different query B might be issued on the same connection and stream. If response for query A arrives earlier than the response for query B, the first one might be misinterpreted as the response for query B. This commit changes the logic so that stream IDs are not returned on timeout - now, they are only returned after receiving a response. * Connection: fix tracking of in_flight requests This commit fixes tracking of in_flight requests. Before it, in case of a client-side timeout, the response ID was not returned to the pool, but the in_flight counter was decremented anyway. This counter is used to determine if there is a need to wait for stream IDs to be freed - without this patch, it could happen that the driver throught that it can initiate another request due to in_flight counter being low, but there weren't any free stream IDs to allocate, so an assertion was triggered and the connection was defuncted and opened again. Now, requests timed out on the client side are tracked in the orphaned_request_ids field, and the in_flight counter is decremented only after the response is received. * Connection: notify owning pool about released orphaned streams Before this patch, the following situation could occur: 1. On a single connection, multiple requests are spawned up to the maximum concurrency, 2. We want to issue more requests but we need to wait on a condition variable because requests spawned in 1. took all stream IDs and we need to wait until some of them are freed, 3. All requests from point 1. time out on the client side - we cannot free their stream IDs until the database node responds, 4. Responses for requests issued in point 1. arrive, but the Connection class has no access to the condition variable mentioned in point 2., so no requests from point 2. are admitted, 5. Requests from point 2. waiting on the condition variable time out despite there are stream IDs available. This commit adds an _on_orphaned_stream_released field to the Connection class, and now it notifies the owning pool in case a timed out request receives a late response and a stream ID is freed by calling _on_orphaned_stream_released callback. * HostConnection: implement replacing overloaded connections In a situation of very high overload or poor networking conditions, it might happen that there is a large number of outstanding requests on a single connection. Each request reserves a stream ID which cannot be reused until a response for it arrives, even if the request already timed out on the client side. Because the pool of available stream IDs for a single connection is limited, such situation might cause the set of free stream IDs to shrink to a very small size (including zero), which will drastically reduce the available concurrency on the connection, or even render it unusable for some time. In order to prevent this, the following strategy is adopted: when the number of orphaned stream IDs reaches a certain threshold (e.g. 75% of all available stream IDs), the connection becomes marked as overloaded. Meanwhile, a new connection is opened - when it becomes available, it replaces the old one, and the old connection is moved to "trash" where it waits until all its outstanding requests either respond or time out. This feature is implemented for HostConnection but not for HostConnectionPool, which means that it will only work for clusters which use protocol v3 or newer. This fix is heavily inspired by the fix for JAVA-1519. Co-authored-by: Piotr Dulikowski --- cassandra/cluster.py | 11 ++- cassandra/connection.py | 32 +++++++- cassandra/pool.py | 97 +++++++++++++++++++----- tests/unit/.noseids | Bin 0 -> 30098 bytes tests/unit/test_host_connection_pool.py | 20 ++--- tests/unit/test_response_future.py | 28 +++++++ 6 files changed, 158 insertions(+), 30 deletions(-) create mode 100644 tests/unit/.noseids diff --git a/cassandra/cluster.py b/cassandra/cluster.py index dc850ae809..cf78725f17 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -4361,10 +4361,17 @@ def _on_timeout(self, _attempts=0): pool = self.session._pools.get(self._current_host) if pool and not pool.is_shutdown: + # Do not return the stream ID to the pool yet. We cannot reuse it + # because the node might still be processing the query and will + # return a late response to that query - if we used such stream + # before the response to the previous query has arrived, the new + # query could get a response from the old query with self._connection.lock: - self._connection.request_ids.append(self._req_id) + self._connection.orphaned_request_ids.add(self._req_id) + if len(self._connection.orphaned_request_ids) >= self._connection.orphaned_threshold: + self._connection.orphaned_threshold_reached = True - pool.return_connection(self._connection) + pool.return_connection(self._connection, stream_was_orphaned=True) errors = self._errors if not errors: diff --git a/cassandra/connection.py b/cassandra/connection.py index 0d8a50e76f..0869584663 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -690,6 +690,7 @@ class Connection(object): # The current number of operations that are in flight. More precisely, # the number of request IDs that are currently in use. + # This includes orphaned requests. in_flight = 0 # Max concurrent requests allowed per connection. This is set optimistically high, allowing @@ -707,6 +708,20 @@ class Connection(object): # request_ids set highest_request_id = 0 + # Tracks the request IDs which are no longer waited on (timed out), but + # cannot be reused yet because the node might still send a response + # on this stream + orphaned_request_ids = None + + # Set to true if the orphaned stream ID count cross configured threshold + # and the connection will be replaced + orphaned_threshold_reached = False + + # If the number of orphaned streams reaches this threshold, this connection + # will become marked and will be replaced with a new connection by the + # owning pool (currently, only HostConnection supports this) + orphaned_threshold = 3 * max_in_flight // 4 + is_defunct = False is_closed = False lock = None @@ -733,6 +748,8 @@ class Connection(object): _is_checksumming_enabled = False + _on_orphaned_stream_released = None + @property def _iobuf(self): # backward compatibility, to avoid any change in the reactors @@ -742,7 +759,7 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, ssl_options=None, sockopts=None, compression=True, cql_version=None, protocol_version=ProtocolVersion.MAX_SUPPORTED, is_control_connection=False, user_type_map=None, connect_timeout=None, allow_beta_protocol_version=False, no_compact=False, - ssl_context=None): + ssl_context=None, on_orphaned_stream_released=None): # TODO next major rename host to endpoint and remove port kwarg. self.endpoint = host if isinstance(host, EndPoint) else DefaultEndPoint(host, port) @@ -764,6 +781,8 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, self._io_buffer = _ConnectionIOBuffer(self) self._continuous_paging_sessions = {} self._socket_writable = True + self.orphaned_request_ids = set() + self._on_orphaned_stream_released = on_orphaned_stream_released if ssl_options: self._check_hostname = bool(self.ssl_options.pop('check_hostname', False)) @@ -1188,11 +1207,22 @@ def process_msg(self, header, body): decoder = paging_session.decoder result_metadata = None else: + need_notify_of_release = False + with self.lock: + if stream_id in self.orphaned_request_ids: + self.in_flight -= 1 + self.orphaned_request_ids.remove(stream_id) + need_notify_of_release = True + if need_notify_of_release and self._on_orphaned_stream_released: + self._on_orphaned_stream_released() + try: callback, decoder, result_metadata = self._requests.pop(stream_id) # This can only happen if the stream_id was # removed due to an OperationTimedOut except KeyError: + with self.lock: + self.request_ids.append(stream_id) return try: diff --git a/cassandra/pool.py b/cassandra/pool.py index cd27656046..c82dfe9a6b 100644 --- a/cassandra/pool.py +++ b/cassandra/pool.py @@ -390,6 +390,10 @@ def __init__(self, host, host_distance, session): # this is used in conjunction with the connection streams. Not using the connection lock because the connection can be replaced in the lifetime of the pool. self._stream_available_condition = Condition(self._lock) self._is_replacing = False + # Contains connections which shouldn't be used anymore + # and are waiting until all requests time out or complete + # so that we can dispose of them. + self._trash = set() if host_distance == HostDistance.IGNORED: log.debug("Not opening connection to ignored host %s", self.host) @@ -399,13 +403,13 @@ def __init__(self, host, host_distance, session): return log.debug("Initializing connection for host %s", self.host) - self._connection = session.cluster.connection_factory(host.endpoint) + self._connection = session.cluster.connection_factory(host.endpoint, on_orphaned_stream_released=self.on_orphaned_stream_released) self._keyspace = session.keyspace if self._keyspace: self._connection.set_keyspace_blocking(self._keyspace) log.debug("Finished initializing connection for host %s", self.host) - def borrow_connection(self, timeout): + def _get_connection(self): if self.is_shutdown: raise ConnectionException( "Pool for %s is shutdown" % (self.host,), self.host) @@ -413,12 +417,25 @@ def borrow_connection(self, timeout): conn = self._connection if not conn: raise NoConnectionsAvailable() + return conn + + def borrow_connection(self, timeout): + conn = self._get_connection() + if conn.orphaned_threshold_reached: + with self._lock: + if not self._is_replacing: + self._is_replacing = True + self._session.submit(self._replace, conn) + log.debug( + "Connection to host %s reached orphaned stream limit, replacing...", + self.host + ) start = time.time() remaining = timeout while True: with conn.lock: - if conn.in_flight < conn.max_request_id: + if not (conn.orphaned_threshold_reached and conn.is_closed) and conn.in_flight < conn.max_request_id: conn.in_flight += 1 return conn, conn.get_request_id() if timeout is not None: @@ -426,15 +443,19 @@ def borrow_connection(self, timeout): if remaining < 0: break with self._stream_available_condition: - self._stream_available_condition.wait(remaining) + if conn.orphaned_threshold_reached and conn.is_closed: + conn = self._get_connection() + else: + self._stream_available_condition.wait(remaining) raise NoConnectionsAvailable("All request IDs are currently in use") - def return_connection(self, connection): - with connection.lock: - connection.in_flight -= 1 - with self._stream_available_condition: - self._stream_available_condition.notify() + def return_connection(self, connection, stream_was_orphaned=False): + if not stream_was_orphaned: + with connection.lock: + connection.in_flight -= 1 + with self._stream_available_condition: + self._stream_available_condition.notify() if connection.is_defunct or connection.is_closed: if connection.signaled_error and not self.shutdown_on_error: @@ -461,6 +482,24 @@ def return_connection(self, connection): return self._is_replacing = True self._session.submit(self._replace, connection) + else: + if connection in self._trash: + with connection.lock: + if connection.in_flight == len(connection.orphaned_request_ids): + with self._lock: + if connection in self._trash: + self._trash.remove(connection) + log.debug("Closing trashed connection (%s) to %s", id(connection), self.host) + connection.close() + return + + def on_orphaned_stream_released(self): + """ + Called when a response for an orphaned stream (timed out on the client + side) was received. + """ + with self._stream_available_condition: + self._stream_available_condition.notify() def _replace(self, connection): with self._lock: @@ -469,7 +508,7 @@ def _replace(self, connection): log.debug("Replacing connection (%s) to %s", id(connection), self.host) try: - conn = self._session.cluster.connection_factory(self.host.endpoint) + conn = self._session.cluster.connection_factory(self.host.endpoint, on_orphaned_stream_released=self.on_orphaned_stream_released) if self._keyspace: conn.set_keyspace_blocking(self._keyspace) self._connection = conn @@ -477,9 +516,15 @@ def _replace(self, connection): log.warning("Failed reconnecting %s. Retrying." % (self.host.endpoint,)) self._session.submit(self._replace, connection) else: - with self._lock: - self._is_replacing = False - self._stream_available_condition.notify() + with connection.lock: + with self._lock: + if connection.orphaned_threshold_reached: + if connection.in_flight == len(connection.orphaned_request_ids): + connection.close() + else: + self._trash.add(connection) + self._is_replacing = False + self._stream_available_condition.notify() def shutdown(self): with self._lock: @@ -493,6 +538,16 @@ def shutdown(self): self._connection.close() self._connection = None + trash_conns = None + with self._lock: + if self._trash: + trash_conns = self._trash + self._trash = set() + + if trash_conns is not None: + for conn in self._trash: + conn.close() + def _set_keyspace_for_all_conns(self, keyspace, callback): if self.is_shutdown or not self._connection: return @@ -548,7 +603,7 @@ def __init__(self, host, host_distance, session): log.debug("Initializing new connection pool for host %s", self.host) core_conns = session.cluster.get_core_connections_per_host(host_distance) - self._connections = [session.cluster.connection_factory(host.endpoint) + self._connections = [session.cluster.connection_factory(host.endpoint, on_orphaned_stream_released=self.on_orphaned_stream_released) for i in range(core_conns)] self._keyspace = session.keyspace @@ -652,7 +707,7 @@ def _add_conn_if_under_max(self): log.debug("Going to open new connection to host %s", self.host) try: - conn = self._session.cluster.connection_factory(self.host.endpoint) + conn = self._session.cluster.connection_factory(self.host.endpoint, on_orphaned_stream_released=self.on_orphaned_stream_released) if self._keyspace: conn.set_keyspace_blocking(self._session.keyspace) self._next_trash_allowed_at = time.time() + _MIN_TRASH_INTERVAL @@ -712,9 +767,10 @@ def _wait_for_conn(self, timeout): raise NoConnectionsAvailable() - def return_connection(self, connection): + def return_connection(self, connection, stream_was_orphaned=False): with connection.lock: - connection.in_flight -= 1 + if not stream_was_orphaned: + connection.in_flight -= 1 in_flight = connection.in_flight if connection.is_defunct or connection.is_closed: @@ -750,6 +806,13 @@ def return_connection(self, connection): else: self._signal_available_conn() + def on_orphaned_stream_released(self): + """ + Called when a response for an orphaned stream (timed out on the client + side) was received. + """ + self._signal_available_conn() + def _maybe_trash_connection(self, connection): core_conns = self._session.cluster.get_core_connections_per_host(self.host_distance) did_trash = False diff --git a/tests/unit/.noseids b/tests/unit/.noseids new file mode 100644 index 0000000000000000000000000000000000000000..1c956146fc04cc0b92d287530f5167fb38737082 GIT binary patch literal 30098 zcmcg#cX%At5r;rX140PI*noSJkR=PtHrO;7Ng z{r3iE(SLU~wua}JMXv#09638YJXUrpVyvv|dTlZKU$yR+9eZ2RHJ5}t<_pjF#sb^) z$MoWoZX2RFW>`*8q)#;ZaQ>r}MUW)VaP-Wn77W_1cut9MTBa%)#deXJ_}#9F|%rJD?(0 zc#akLrehN@kLC5YE>Ca;e1jd1p$t5?g@(GQxDKL?ph&L#X< z^PIpngl6crRuGyee642sWletA`BPzH=kZUhd~tY3KWt48#3DT5E{P;v-PWzTZyKJK zO8jx*fn{ijWFutvD@pm65uRiPGY!2z)fyJ+fU9*<`uQ(hUaXoZ6H z;McX&37kL|wE>+(geP($T-=IK*1a-fJc$!ybt^^%TxeOhw5p~1^Nw3Vyz@En z+UDenDmkkQI5945os;LgU;sW&G`;)X|xL=*poTIRCC84jE)FM`!tzk5Pvv1%-W#q?hb;(aqWu*W$xahlwvYQh+Iu8>?31^@~ z991mQ9IrS7SXJGpt^P_jtTmbQf}@!)PN5W6v!Tgl*CEAdw_n2sB>h?n5bCeF0@j`( zJQI#zD9CkecTH-jh1G?cmZauNqO0-F+ zDY{Ngo0mh7z(zI#BiRI;!h$g14BHC5i49u%m{}j3W<}UZHxunko7u4B0>QIaLc-|E zGz^QQ1-a$X#Y1=2g140oZv&~ZZGO_FwIB{HD{?g<6fBh?GQ{*$_JKDV)(FQ@+LM$F z+UHG~hRQQcO=>y>0Rn6*u#50#!Gws{u}q_m);-KdvV3=VbvgwMYvG`RDH{0F21AFi zH^Jd5P7Y&TICxcIP=X>sV1$#vmUIHCIgxFgz&o>mtBwt8-h?xDh5|-8Y3xp?Azf^j z9MKpjk!^WIQq{4Y6MjP;eBuN&&I!6T9h4HJkjM0en=t#UIg!_=BfFTiOJMHc#N3>Y znYP3yIB_$i>^qCXR^B%Dusb=CGdygq3~`_Zse=qJzl)Q@#&oIIAcm^8*w=1O)OCn@ zP^cYF=Fm+W!cnuR$><5UvuijZX`*eAUSJr)^X3C|rhed3Hu>6Rp+?oCE<1fRtP$>^ z0OXh|I?XwgCyt_UR1%3ItqR2IX4Qhqf)zkQ$u;egwkYb93E0ENw)}8-s1GiXru0qP z`*ny!Qkd=KWG3@Ly-6u%776J(PAU`Kg=9Nv0=b@(#o_KOJg^fmnig?royx+Mb^|B1 zow?LdCsiF%b|9n@oz#R~f^;;}xRH~_?%rr5GD0ztn>dN&8;q|PKuI7Bb?9*1W~OK# zCkuIA&00Z{1<4}HQ|;$OAI()p0zJ+qO`bV?C3>2^r56O{1gDmTPa%T|1$OCWQ7H-h!y(z94Q^|v+=2^ zHmTsC=#yrXjZe)XQnN6m5$?WaDpCH&%q=8!%+or>)Lkgf^qK!OsF6vnEU<8sX+!aAfO*eX}BD2O@q2n{S2s5O3+zyPA%)H{Gj4JmY}fV4zPaLt8$5?a(V`6w=2iV_+p z31v_adP#@s1P~|$^YfrB&pb^up2baLB9{g_5NPW4JROQwzy=K3^#d~LXLB>zmCK~& z1XhtWlOmPpaC6y{$E9MP62+!eFe#8w5(QJw<>oV%$A^*>P}3;G?gFNQn+#g)?c5}` z_d+5vmWah2+$^^C!oq_FSCoOqjXs=WF(VGMreAH6@FV+`1YXGWyRmr6+a3`JyzT;*|;5nsLzU=E%El?55q4E z!~<6RY%kP~-wZyNSP5i%fc&tt#>!$M(*U{zoTpf6$c`K$Wys<{CjojZO@o#uOYIoQ zrd zV!!*0VM8ylFqbgli%%CI_C3tlBMH-^(@eKh-iw(DP@kp}e1bswS>`3-wSApseDEq- z^UZ=w@UJTW-u$EyPN4{9EXZoliBtYQc;Ns$u?-!|8Z=Dcg&Hgw8E^+0OWo>9J-kF= zK-Wf7H!lsZ>z|#f^ilz3MLR+DVIv%wls$q-Rg#MOD#1DXP=bN>UA&Sj3tnI+FIy&; zg@@2BZ|i_{#mNAr2+wF}^A_HI(EKu7$|*Y*?Oyk?;0|@*LTVqpAlyC2-6)_`k z*RgOr(i1l*iL??>Dgv&`Tq>~Bpu}F!!s^&^SvA!e@p=Obubc%N*)Ez+e8siF0(*V7 zIN%8#yp;voX?^ygtxMs479M+YctrO?<$mC44z#zr*uxU9<2SO9*gFUbr5&`;2Uu9- ztm(D|jV`sS(s!dSex>uxLrYg@xAmj5f{FlLN^(TLv4g1u=LaxBHb64YuGMe~@`fcI4NI`$jE zmVkI~W5Ju4l^v3Cm>xxPnQlZ-%gWnX;IHW)K6V}thBppydU0$ca9xln9bi_W`jRIs zuzU46G^I^Oe=+D@zf=btaRagoXoIgnM-!bQ-m!cx3;&e|m~>2hCjnfj3tcG~6TrCw zmIy(=lM8xx#&px6wUV%fi)T)>#&>a%xM@HVnrAt+s44#4Tx@pr$0o_)JzN~N^~S-4 z`Yt>hiOP5{7j%ynp`cqW;vp^)Jz7Kui581^9~YY*Eh1u*w21c)$U*hnXZkDkIn?;U zQq*4#31}`X2%R3~kbVn}j2`sbfqxHdgSM;>ET7QA1&_f>b}q1dy3lBHqpZLOxv+fFpc`{RF$D?t32>Sp;X>S=ix}AA6g+(a zn;=ZrgC`-R1NtZz`qmgdj!+6vSZD@f==4K-ta3pMSL>QF3Cr{KFNi;Ax*2c<7+0CXz(c^8B{&Y z1(@&=u|h$h*bQ!-o`AD*i+Df9g^T-V$89NHAiBuiq zHKc?^8QLI$5w&^mI=UKwll?3gY+?(HH67SqP^~)fJRHdUq=h$}qm}eI zE?|t(I#i7>TBoWT=lppt=!q%$X!f*SoWYt(Jn`r(7db3+oA8>35BGtQ!56p~3}-TE zB107hF3I@xJk%GtaEH3%g4cAcC0bEm;(}dGWxrU=s`Ter@K}o4i@w}vYpD;%P+X9{ z>rm=WnG(f($?n&TagA^c><_;=%?wo~+=J9YMaP;T6a-QKU*RLUa17(XUP#D0)G>*# za+4VEE(za(`|eJuo{+`YxM`p#%2EjIJXnF$)b>ut_^)#l*wF(4y2DRzanLhEA~vhY_5hd!kq<|)3>;39%i8#(fc+xz5RpGgM$$@ zvf_0`L8+tfaPzux5MHjRKz*WVa^K}9g_ps*xr6rANViMXf$n?Ugk}dJ1OW~Gng$CV z2$5$#N=usaS_uve^dQCexrrVagecW8nhR7wz=tdJ2i)B9_L|7at8F)NIR7Cxjfor) z)pXORHWbA}FS&xwSSXMmaWmO72ow1{1M>MXHy?LEJ{CM;QP7P=thz)0kXy&3JD9l? zq6hUKD9Kc?BNl&aNy~D;dHjTX9(g`3Q^T5DH8jg{7UeDDpK_DBejwp`i)OW|idaA6 zrZhJYr5@bc&$*dyp}S&mk^oV}M+v&5)luvFMR-I1m9FUMCEs6Ii6@6-p{w~Z)VXGD zbGqa37!A;pN4H-to5{j`cv9Gu51BZ%{fY^3Bo)!K&1O2U?R5OeK)_M&9i7}dg&utj zZR}vFQG-U@$OlT2*?P5I;F&NQg^uOd%OR1cCpN>TmN9D6+7sE|@D}@5WKl1a~nae}k1h#KtEv{89f8r;zs~0kmCB%wg8=F7#vzZ)>4c_v{ z%6{{3_7{F?qq)@Jp-_OYL7P4NDh>$y=(mjgm7m4ld=~Kyu#AQMH-1WO3p?R;Q$5|^ z`5EkJ6-t6ZzL)z4Kb6gyRLboh?w|bVBl+l^-tAxf9L6&_ Date: Tue, 23 Nov 2021 11:12:58 -0600 Subject: [PATCH 129/211] Removing file unexpectedly included in previous PR --- tests/unit/.noseids | Bin 30098 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/unit/.noseids diff --git a/tests/unit/.noseids b/tests/unit/.noseids deleted file mode 100644 index 1c956146fc04cc0b92d287530f5167fb38737082..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 30098 zcmcg#cX%At5r;rX140PI*noSJkR=PtHrO;7Ng z{r3iE(SLU~wua}JMXv#09638YJXUrpVyvv|dTlZKU$yR+9eZ2RHJ5}t<_pjF#sb^) z$MoWoZX2RFW>`*8q)#;ZaQ>r}MUW)VaP-Wn77W_1cut9MTBa%)#deXJ_}#9F|%rJD?(0 zc#akLrehN@kLC5YE>Ca;e1jd1p$t5?g@(GQxDKL?ph&L#X< z^PIpngl6crRuGyee642sWletA`BPzH=kZUhd~tY3KWt48#3DT5E{P;v-PWzTZyKJK zO8jx*fn{ijWFutvD@pm65uRiPGY!2z)fyJ+fU9*<`uQ(hUaXoZ6H z;McX&37kL|wE>+(geP($T-=IK*1a-fJc$!ybt^^%TxeOhw5p~1^Nw3Vyz@En z+UDenDmkkQI5945os;LgU;sW&G`;)X|xL=*poTIRCC84jE)FM`!tzk5Pvv1%-W#q?hb;(aqWu*W$xahlwvYQh+Iu8>?31^@~ z991mQ9IrS7SXJGpt^P_jtTmbQf}@!)PN5W6v!Tgl*CEAdw_n2sB>h?n5bCeF0@j`( zJQI#zD9CkecTH-jh1G?cmZauNqO0-F+ zDY{Ngo0mh7z(zI#BiRI;!h$g14BHC5i49u%m{}j3W<}UZHxunko7u4B0>QIaLc-|E zGz^QQ1-a$X#Y1=2g140oZv&~ZZGO_FwIB{HD{?g<6fBh?GQ{*$_JKDV)(FQ@+LM$F z+UHG~hRQQcO=>y>0Rn6*u#50#!Gws{u}q_m);-KdvV3=VbvgwMYvG`RDH{0F21AFi zH^Jd5P7Y&TICxcIP=X>sV1$#vmUIHCIgxFgz&o>mtBwt8-h?xDh5|-8Y3xp?Azf^j z9MKpjk!^WIQq{4Y6MjP;eBuN&&I!6T9h4HJkjM0en=t#UIg!_=BfFTiOJMHc#N3>Y znYP3yIB_$i>^qCXR^B%Dusb=CGdygq3~`_Zse=qJzl)Q@#&oIIAcm^8*w=1O)OCn@ zP^cYF=Fm+W!cnuR$><5UvuijZX`*eAUSJr)^X3C|rhed3Hu>6Rp+?oCE<1fRtP$>^ z0OXh|I?XwgCyt_UR1%3ItqR2IX4Qhqf)zkQ$u;egwkYb93E0ENw)}8-s1GiXru0qP z`*ny!Qkd=KWG3@Ly-6u%776J(PAU`Kg=9Nv0=b@(#o_KOJg^fmnig?royx+Mb^|B1 zow?LdCsiF%b|9n@oz#R~f^;;}xRH~_?%rr5GD0ztn>dN&8;q|PKuI7Bb?9*1W~OK# zCkuIA&00Z{1<4}HQ|;$OAI()p0zJ+qO`bV?C3>2^r56O{1gDmTPa%T|1$OCWQ7H-h!y(z94Q^|v+=2^ zHmTsC=#yrXjZe)XQnN6m5$?WaDpCH&%q=8!%+or>)Lkgf^qK!OsF6vnEU<8sX+!aAfO*eX}BD2O@q2n{S2s5O3+zyPA%)H{Gj4JmY}fV4zPaLt8$5?a(V`6w=2iV_+p z31v_adP#@s1P~|$^YfrB&pb^up2baLB9{g_5NPW4JROQwzy=K3^#d~LXLB>zmCK~& z1XhtWlOmPpaC6y{$E9MP62+!eFe#8w5(QJw<>oV%$A^*>P}3;G?gFNQn+#g)?c5}` z_d+5vmWah2+$^^C!oq_FSCoOqjXs=WF(VGMreAH6@FV+`1YXGWyRmr6+a3`JyzT;*|;5nsLzU=E%El?55q4E z!~<6RY%kP~-wZyNSP5i%fc&tt#>!$M(*U{zoTpf6$c`K$Wys<{CjojZO@o#uOYIoQ zrd zV!!*0VM8ylFqbgli%%CI_C3tlBMH-^(@eKh-iw(DP@kp}e1bswS>`3-wSApseDEq- z^UZ=w@UJTW-u$EyPN4{9EXZoliBtYQc;Ns$u?-!|8Z=Dcg&Hgw8E^+0OWo>9J-kF= zK-Wf7H!lsZ>z|#f^ilz3MLR+DVIv%wls$q-Rg#MOD#1DXP=bN>UA&Sj3tnI+FIy&; zg@@2BZ|i_{#mNAr2+wF}^A_HI(EKu7$|*Y*?Oyk?;0|@*LTVqpAlyC2-6)_`k z*RgOr(i1l*iL??>Dgv&`Tq>~Bpu}F!!s^&^SvA!e@p=Obubc%N*)Ez+e8siF0(*V7 zIN%8#yp;voX?^ygtxMs479M+YctrO?<$mC44z#zr*uxU9<2SO9*gFUbr5&`;2Uu9- ztm(D|jV`sS(s!dSex>uxLrYg@xAmj5f{FlLN^(TLv4g1u=LaxBHb64YuGMe~@`fcI4NI`$jE zmVkI~W5Ju4l^v3Cm>xxPnQlZ-%gWnX;IHW)K6V}thBppydU0$ca9xln9bi_W`jRIs zuzU46G^I^Oe=+D@zf=btaRagoXoIgnM-!bQ-m!cx3;&e|m~>2hCjnfj3tcG~6TrCw zmIy(=lM8xx#&px6wUV%fi)T)>#&>a%xM@HVnrAt+s44#4Tx@pr$0o_)JzN~N^~S-4 z`Yt>hiOP5{7j%ynp`cqW;vp^)Jz7Kui581^9~YY*Eh1u*w21c)$U*hnXZkDkIn?;U zQq*4#31}`X2%R3~kbVn}j2`sbfqxHdgSM;>ET7QA1&_f>b}q1dy3lBHqpZLOxv+fFpc`{RF$D?t32>Sp;X>S=ix}AA6g+(a zn;=ZrgC`-R1NtZz`qmgdj!+6vSZD@f==4K-ta3pMSL>QF3Cr{KFNi;Ax*2c<7+0CXz(c^8B{&Y z1(@&=u|h$h*bQ!-o`AD*i+Df9g^T-V$89NHAiBuiq zHKc?^8QLI$5w&^mI=UKwll?3gY+?(HH67SqP^~)fJRHdUq=h$}qm}eI zE?|t(I#i7>TBoWT=lppt=!q%$X!f*SoWYt(Jn`r(7db3+oA8>35BGtQ!56p~3}-TE zB107hF3I@xJk%GtaEH3%g4cAcC0bEm;(}dGWxrU=s`Ter@K}o4i@w}vYpD;%P+X9{ z>rm=WnG(f($?n&TagA^c><_;=%?wo~+=J9YMaP;T6a-QKU*RLUa17(XUP#D0)G>*# za+4VEE(za(`|eJuo{+`YxM`p#%2EjIJXnF$)b>ut_^)#l*wF(4y2DRzanLhEA~vhY_5hd!kq<|)3>;39%i8#(fc+xz5RpGgM$$@ zvf_0`L8+tfaPzux5MHjRKz*WVa^K}9g_ps*xr6rANViMXf$n?Ugk}dJ1OW~Gng$CV z2$5$#N=usaS_uve^dQCexrrVagecW8nhR7wz=tdJ2i)B9_L|7at8F)NIR7Cxjfor) z)pXORHWbA}FS&xwSSXMmaWmO72ow1{1M>MXHy?LEJ{CM;QP7P=thz)0kXy&3JD9l? zq6hUKD9Kc?BNl&aNy~D;dHjTX9(g`3Q^T5DH8jg{7UeDDpK_DBejwp`i)OW|idaA6 zrZhJYr5@bc&$*dyp}S&mk^oV}M+v&5)luvFMR-I1m9FUMCEs6Ii6@6-p{w~Z)VXGD zbGqa37!A;pN4H-to5{j`cv9Gu51BZ%{fY^3Bo)!K&1O2U?R5OeK)_M&9i7}dg&utj zZR}vFQG-U@$OlT2*?P5I;F&NQg^uOd%OR1cCpN>TmN9D6+7sE|@D}@5WKl1a~nae}k1h#KtEv{89f8r;zs~0kmCB%wg8=F7#vzZ)>4c_v{ z%6{{3_7{F?qq)@Jp-_OYL7P4NDh>$y=(mjgm7m4ld=~Kyu#AQMH-1WO3p?R;Q$5|^ z`5EkJ6-t6ZzL)z4Kb6gyRLboh?w|bVBl+l^-tAxf9L6&_ Date: Mon, 24 Jan 2022 14:42:08 -0600 Subject: [PATCH 130/211] Merge pull request #1117 from datastax/remove_unittest2 PYTHON-1289 Removing unittest2 from the dependency list --- test-requirements.txt | 1 - tests/__init__.py | 5 +---- tests/integration/__init__.py | 5 +---- tests/integration/advanced/__init__.py | 5 +---- tests/integration/advanced/graph/fluent/__init__.py | 5 +---- tests/integration/advanced/graph/fluent/test_graph.py | 5 +---- tests/integration/advanced/graph/test_graph_datatype.py | 5 +---- tests/integration/advanced/graph/test_graph_query.py | 5 +---- tests/integration/advanced/test_adv_metadata.py | 5 +---- tests/integration/advanced/test_auth.py | 5 +---- tests/integration/advanced/test_cont_paging.py | 5 +---- .../integration/advanced/test_cqlengine_where_operators.py | 5 +---- tests/integration/advanced/test_geometry.py | 5 +---- tests/integration/advanced/test_unixsocketendpoint.py | 5 +---- tests/integration/cloud/__init__.py | 5 +---- tests/integration/cloud/test_cloud.py | 5 +---- tests/integration/cqlengine/__init__.py | 5 +---- tests/integration/cqlengine/advanced/test_cont_paging.py | 5 +---- tests/integration/cqlengine/base.py | 5 +---- tests/integration/cqlengine/columns/test_static_column.py | 5 +---- tests/integration/cqlengine/columns/test_validation.py | 5 +---- tests/integration/cqlengine/columns/test_value_io.py | 5 +---- tests/integration/cqlengine/connections/test_connection.py | 5 +---- tests/integration/cqlengine/management/test_management.py | 5 +---- tests/integration/cqlengine/model/test_model.py | 5 +---- tests/integration/cqlengine/model/test_model_io.py | 5 +---- tests/integration/cqlengine/model/test_udts.py | 5 +---- .../cqlengine/operators/test_where_operators.py | 5 +---- tests/integration/cqlengine/query/test_named.py | 5 +---- tests/integration/cqlengine/query/test_queryset.py | 5 +---- .../cqlengine/statements/test_assignment_clauses.py | 5 +---- .../cqlengine/statements/test_base_statement.py | 5 +---- .../cqlengine/statements/test_insert_statement.py | 5 +---- .../cqlengine/statements/test_select_statement.py | 5 +---- .../cqlengine/statements/test_update_statement.py | 5 +---- .../integration/cqlengine/statements/test_where_clause.py | 5 +---- tests/integration/cqlengine/test_ifexists.py | 5 +---- tests/integration/cqlengine/test_ifnotexists.py | 5 +---- tests/integration/cqlengine/test_lwt_conditional.py | 5 +---- tests/integration/cqlengine/test_ttl.py | 5 +---- tests/integration/long/__init__.py | 5 +---- tests/integration/long/test_consistency.py | 5 +---- tests/integration/long/test_failure_types.py | 5 +---- tests/integration/long/test_ipv6.py | 5 +---- tests/integration/long/test_large_data.py | 5 +---- tests/integration/long/test_loadbalancingpolicies.py | 5 +---- tests/integration/long/test_policies.py | 5 +---- tests/integration/long/test_schema.py | 5 +---- tests/integration/long/test_ssl.py | 5 +---- tests/integration/simulacron/__init__.py | 5 +---- tests/integration/simulacron/advanced/test_insights.py | 5 +---- tests/integration/simulacron/test_cluster.py | 5 +---- tests/integration/simulacron/test_connection.py | 5 +---- tests/integration/simulacron/test_empty_column.py | 5 +---- tests/integration/simulacron/test_endpoint.py | 5 +---- tests/integration/simulacron/test_policies.py | 5 +---- tests/integration/standard/__init__.py | 5 +---- tests/integration/standard/test_authentication.py | 5 +---- tests/integration/standard/test_client_warnings.py | 5 +---- tests/integration/standard/test_cluster.py | 5 +---- tests/integration/standard/test_concurrent.py | 5 +---- tests/integration/standard/test_connection.py | 5 +---- tests/integration/standard/test_control_connection.py | 5 +---- tests/integration/standard/test_custom_cluster.py | 5 +---- tests/integration/standard/test_custom_payload.py | 5 +---- tests/integration/standard/test_custom_protocol_handler.py | 5 +---- .../integration/standard/test_cython_protocol_handlers.py | 5 +---- tests/integration/standard/test_dse.py | 5 +---- tests/integration/standard/test_metadata.py | 5 +---- tests/integration/standard/test_metrics.py | 5 +---- tests/integration/standard/test_policies.py | 5 +---- tests/integration/standard/test_prepared_statements.py | 5 +---- tests/integration/standard/test_query.py | 5 +---- tests/integration/standard/test_query_paging.py | 5 +---- tests/integration/standard/test_routing.py | 5 +---- tests/integration/standard/test_row_factories.py | 5 +---- tests/integration/standard/test_single_interface.py | 5 +---- tests/integration/standard/test_types.py | 5 +---- tests/integration/standard/test_udts.py | 5 +---- tests/integration/upgrade/__init__.py | 5 +---- tests/integration/upgrade/test_upgrade.py | 5 +---- tests/stress_tests/test_load.py | 5 +---- tests/stress_tests/test_multi_inserts.py | 5 +---- tests/unit/advanced/cloud/test_cloud.py | 5 +---- tests/unit/advanced/test_auth.py | 5 +---- tests/unit/advanced/test_execution_profile.py | 5 +---- tests/unit/advanced/test_geometry.py | 5 +---- tests/unit/advanced/test_graph.py | 5 +---- tests/unit/advanced/test_insights.py | 5 +---- tests/unit/advanced/test_metadata.py | 5 +---- tests/unit/advanced/test_policies.py | 5 +---- tests/unit/cqlengine/test_columns.py | 5 +---- tests/unit/cqlengine/test_connection.py | 5 +---- tests/unit/cqlengine/test_udt.py | 5 +---- tests/unit/cython/test_bytesio.py | 5 +---- tests/unit/cython/test_types.py | 5 +---- tests/unit/cython/test_utils.py | 7 ++----- tests/unit/cython/utils.py | 5 +---- tests/unit/io/test_asyncorereactor.py | 5 +---- tests/unit/io/test_eventletreactor.py | 5 +---- tests/unit/io/test_geventreactor.py | 5 +---- tests/unit/io/test_libevreactor.py | 5 +---- tests/unit/io/test_twistedreactor.py | 5 +---- tests/unit/io/utils.py | 5 +---- tests/unit/test_auth.py | 5 +---- tests/unit/test_cluster.py | 5 +---- tests/unit/test_concurrent.py | 5 +---- tests/unit/test_connection.py | 5 +---- tests/unit/test_control_connection.py | 5 +---- tests/unit/test_endpoints.py | 5 +---- tests/unit/test_exception.py | 5 +---- tests/unit/test_host_connection_pool.py | 5 +---- tests/unit/test_marshalling.py | 5 +---- tests/unit/test_metadata.py | 5 +---- tests/unit/test_orderedmap.py | 5 +---- tests/unit/test_parameter_binding.py | 5 +---- tests/unit/test_policies.py | 5 +---- tests/unit/test_protocol.py | 5 +---- tests/unit/test_query.py | 5 +---- tests/unit/test_response_future.py | 5 +---- tests/unit/test_resultset.py | 5 +---- tests/unit/test_row_factories.py | 5 +---- tests/unit/test_segment.py | 5 +---- tests/unit/test_sortedset.py | 5 +---- tests/unit/test_time_util.py | 5 +---- tests/unit/test_timestamps.py | 5 +---- tests/unit/test_types.py | 5 +---- tests/unit/test_util_types.py | 5 +---- 128 files changed, 128 insertions(+), 510 deletions(-) diff --git a/test-requirements.txt b/test-requirements.txt index 9e62bfdee8..996cf4341f 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -3,7 +3,6 @@ scales nose mock>1.1 ccm>=2.1.2 -unittest2 pytz sure pure-sasl diff --git a/tests/__init__.py b/tests/__init__.py index cea5a872c6..48c589c424 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging import sys import socket diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 70ec11c213..d3c3332649 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -18,10 +18,7 @@ from tests import connection_class, EVENT_LOOP_MANAGER Cluster.connection_class = connection_class -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from packaging.version import Version import logging diff --git a/tests/integration/advanced/__init__.py b/tests/integration/advanced/__init__.py index b2820e037b..e2fa1a4a4a 100644 --- a/tests/integration/advanced/__init__.py +++ b/tests/integration/advanced/__init__.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from six.moves.urllib.request import build_opener, Request, HTTPHandler import re diff --git a/tests/integration/advanced/graph/fluent/__init__.py b/tests/integration/advanced/graph/fluent/__init__.py index 3bb81e78e3..3962029f45 100644 --- a/tests/integration/advanced/graph/fluent/__init__.py +++ b/tests/integration/advanced/graph/fluent/__init__.py @@ -35,10 +35,7 @@ VertexLabel) from tests.integration import requiredse -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import ipaddress diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index 02611c12c0..d46a74a146 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -28,10 +28,7 @@ from tests.integration.advanced.graph.fluent import ( BaseExplicitExecutionTest, create_traversal_profiles, check_equality_base) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest @greaterthanorequaldse60 diff --git a/tests/integration/advanced/graph/test_graph_datatype.py b/tests/integration/advanced/graph/test_graph_datatype.py index 222b1f5ace..0445ce8030 100644 --- a/tests/integration/advanced/graph/test_graph_datatype.py +++ b/tests/integration/advanced/graph/test_graph_datatype.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import time import six diff --git a/tests/integration/advanced/graph/test_graph_query.py b/tests/integration/advanced/graph/test_graph_query.py index 0eda67894d..9bc23e611a 100644 --- a/tests/integration/advanced/graph/test_graph_query.py +++ b/tests/integration/advanced/graph/test_graph_query.py @@ -22,10 +22,7 @@ import json import time -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import OperationTimedOut, ConsistencyLevel, InvalidRequest from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, NoHostAvailable diff --git a/tests/integration/advanced/test_adv_metadata.py b/tests/integration/advanced/test_adv_metadata.py index b3af6fa5d1..8228bfe220 100644 --- a/tests/integration/advanced/test_adv_metadata.py +++ b/tests/integration/advanced/test_adv_metadata.py @@ -20,10 +20,7 @@ greaterthanorequaldse68, use_single_node, DSE_VERSION, requiredse, TestCluster) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging import time diff --git a/tests/integration/advanced/test_auth.py b/tests/integration/advanced/test_auth.py index 7e9aa8c23e..3443419ab4 100644 --- a/tests/integration/advanced/test_auth.py +++ b/tests/integration/advanced/test_auth.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging import os import subprocess diff --git a/tests/integration/advanced/test_cont_paging.py b/tests/integration/advanced/test_cont_paging.py index c5f1cbfff3..2e75d7061d 100644 --- a/tests/integration/advanced/test_cont_paging.py +++ b/tests/integration/advanced/test_cont_paging.py @@ -18,10 +18,7 @@ import logging log = logging.getLogger(__name__) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from itertools import cycle, count from six.moves import range diff --git a/tests/integration/advanced/test_cqlengine_where_operators.py b/tests/integration/advanced/test_cqlengine_where_operators.py index 8ade3db09d..b2e4d4ba9e 100644 --- a/tests/integration/advanced/test_cqlengine_where_operators.py +++ b/tests/integration/advanced/test_cqlengine_where_operators.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import os import time diff --git a/tests/integration/advanced/test_geometry.py b/tests/integration/advanced/test_geometry.py index 8bee144d19..6a6737bd50 100644 --- a/tests/integration/advanced/test_geometry.py +++ b/tests/integration/advanced/test_geometry.py @@ -18,10 +18,7 @@ from cassandra.util import OrderedMap, sortedset from collections import namedtuple -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from uuid import uuid1 from cassandra.util import Point, LineString, Polygon from cassandra.cqltypes import LineStringType, PointType, PolygonType diff --git a/tests/integration/advanced/test_unixsocketendpoint.py b/tests/integration/advanced/test_unixsocketendpoint.py index 10cbc1b362..f2795d1a68 100644 --- a/tests/integration/advanced/test_unixsocketendpoint.py +++ b/tests/integration/advanced/test_unixsocketendpoint.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import time import subprocess diff --git a/tests/integration/cloud/__init__.py b/tests/integration/cloud/__init__.py index ca05ae4ce5..a6a4ab7a5d 100644 --- a/tests/integration/cloud/__init__.py +++ b/tests/integration/cloud/__init__.py @@ -13,10 +13,7 @@ # limitations under the License from cassandra.cluster import Cluster -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import os import subprocess diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index e0b9e2d382..ef4909a257 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -18,10 +18,7 @@ from cassandra.cqlengine.models import Model from cassandra.cqlengine import columns -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six from ssl import SSLContext, PROTOCOL_TLS diff --git a/tests/integration/cqlengine/__init__.py b/tests/integration/cqlengine/__init__.py index e68baaabf1..cd8f031ed1 100644 --- a/tests/integration/cqlengine/__init__.py +++ b/tests/integration/cqlengine/__init__.py @@ -14,10 +14,7 @@ import os import warnings -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import ConsistencyLevel from cassandra.cqlengine import connection diff --git a/tests/integration/cqlengine/advanced/test_cont_paging.py b/tests/integration/cqlengine/advanced/test_cont_paging.py index 38b4355312..89e05950e3 100644 --- a/tests/integration/cqlengine/advanced/test_cont_paging.py +++ b/tests/integration/cqlengine/advanced/test_cont_paging.py @@ -14,10 +14,7 @@ -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from packaging.version import Version diff --git a/tests/integration/cqlengine/base.py b/tests/integration/cqlengine/base.py index 8a6903350f..bdb62aa2a3 100644 --- a/tests/integration/cqlengine/base.py +++ b/tests/integration/cqlengine/base.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import sys diff --git a/tests/integration/cqlengine/columns/test_static_column.py b/tests/integration/cqlengine/columns/test_static_column.py index 69e222d2b9..0e8ace8c8f 100644 --- a/tests/integration/cqlengine/columns/test_static_column.py +++ b/tests/integration/cqlengine/columns/test_static_column.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from uuid import uuid4 diff --git a/tests/integration/cqlengine/columns/test_validation.py b/tests/integration/cqlengine/columns/test_validation.py index 69682fd68d..21fe1581ff 100644 --- a/tests/integration/cqlengine/columns/test_validation.py +++ b/tests/integration/cqlengine/columns/test_validation.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import sys from datetime import datetime, timedelta, date, tzinfo, time diff --git a/tests/integration/cqlengine/columns/test_value_io.py b/tests/integration/cqlengine/columns/test_value_io.py index 243c2b0fdb..2c82fe16f7 100644 --- a/tests/integration/cqlengine/columns/test_value_io.py +++ b/tests/integration/cqlengine/columns/test_value_io.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from datetime import datetime, timedelta, time from decimal import Decimal diff --git a/tests/integration/cqlengine/connections/test_connection.py b/tests/integration/cqlengine/connections/test_connection.py index c46df31280..92b6992573 100644 --- a/tests/integration/cqlengine/connections/test_connection.py +++ b/tests/integration/cqlengine/connections/test_connection.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import ConsistencyLevel diff --git a/tests/integration/cqlengine/management/test_management.py b/tests/integration/cqlengine/management/test_management.py index 7edb3e71dd..f37db5e51f 100644 --- a/tests/integration/cqlengine/management/test_management.py +++ b/tests/integration/cqlengine/management/test_management.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import mock import logging diff --git a/tests/integration/cqlengine/model/test_model.py b/tests/integration/cqlengine/model/test_model.py index bbd9e0cbb6..859facf0e1 100644 --- a/tests/integration/cqlengine/model/test_model.py +++ b/tests/integration/cqlengine/model/test_model.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import patch diff --git a/tests/integration/cqlengine/model/test_model_io.py b/tests/integration/cqlengine/model/test_model_io.py index 32ace5363f..3c4088cc83 100644 --- a/tests/integration/cqlengine/model/test_model_io.py +++ b/tests/integration/cqlengine/model/test_model_io.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from uuid import uuid4, UUID import random diff --git a/tests/integration/cqlengine/model/test_udts.py b/tests/integration/cqlengine/model/test_udts.py index 82973436ac..1e3adf9a71 100644 --- a/tests/integration/cqlengine/model/test_udts.py +++ b/tests/integration/cqlengine/model/test_udts.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from datetime import datetime, date, time from decimal import Decimal diff --git a/tests/integration/cqlengine/operators/test_where_operators.py b/tests/integration/cqlengine/operators/test_where_operators.py index fdfce1f0b8..555af11025 100644 --- a/tests/integration/cqlengine/operators/test_where_operators.py +++ b/tests/integration/cqlengine/operators/test_where_operators.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cqlengine.operators import * diff --git a/tests/integration/cqlengine/query/test_named.py b/tests/integration/cqlengine/query/test_named.py index 3a6f83b32e..eb85bbbb85 100644 --- a/tests/integration/cqlengine/query/test_named.py +++ b/tests/integration/cqlengine/query/test_named.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import ConsistencyLevel from cassandra.cqlengine import operators diff --git a/tests/integration/cqlengine/query/test_queryset.py b/tests/integration/cqlengine/query/test_queryset.py index 6bc9d701b8..ec5044b707 100644 --- a/tests/integration/cqlengine/query/test_queryset.py +++ b/tests/integration/cqlengine/query/test_queryset.py @@ -13,10 +13,7 @@ # limitations under the License. from __future__ import absolute_import -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from datetime import datetime from uuid import uuid4 diff --git a/tests/integration/cqlengine/statements/test_assignment_clauses.py b/tests/integration/cqlengine/statements/test_assignment_clauses.py index 594224d72d..82bf067cb4 100644 --- a/tests/integration/cqlengine/statements/test_assignment_clauses.py +++ b/tests/integration/cqlengine/statements/test_assignment_clauses.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cqlengine.statements import AssignmentClause, SetUpdateClause, ListUpdateClause, MapUpdateClause, MapDeleteClause, FieldDeleteClause, CounterUpdateClause diff --git a/tests/integration/cqlengine/statements/test_base_statement.py b/tests/integration/cqlengine/statements/test_base_statement.py index 474c45d02b..3b5be60520 100644 --- a/tests/integration/cqlengine/statements/test_base_statement.py +++ b/tests/integration/cqlengine/statements/test_base_statement.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from uuid import uuid4 import six diff --git a/tests/integration/cqlengine/statements/test_insert_statement.py b/tests/integration/cqlengine/statements/test_insert_statement.py index 3bf90ec313..a1dcd08968 100644 --- a/tests/integration/cqlengine/statements/test_insert_statement.py +++ b/tests/integration/cqlengine/statements/test_insert_statement.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six diff --git a/tests/integration/cqlengine/statements/test_select_statement.py b/tests/integration/cqlengine/statements/test_select_statement.py index 90c14bcfb6..c6d1ac69f4 100644 --- a/tests/integration/cqlengine/statements/test_select_statement.py +++ b/tests/integration/cqlengine/statements/test_select_statement.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cqlengine.columns import Column from cassandra.cqlengine.statements import SelectStatement, WhereClause diff --git a/tests/integration/cqlengine/statements/test_update_statement.py b/tests/integration/cqlengine/statements/test_update_statement.py index c6ed228d91..99105069dd 100644 --- a/tests/integration/cqlengine/statements/test_update_statement.py +++ b/tests/integration/cqlengine/statements/test_update_statement.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cqlengine.columns import Column, Set, List, Text from cassandra.cqlengine.operators import * diff --git a/tests/integration/cqlengine/statements/test_where_clause.py b/tests/integration/cqlengine/statements/test_where_clause.py index 3173320f7c..21671be086 100644 --- a/tests/integration/cqlengine/statements/test_where_clause.py +++ b/tests/integration/cqlengine/statements/test_where_clause.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six from cassandra.cqlengine.operators import EqualsOperator diff --git a/tests/integration/cqlengine/test_ifexists.py b/tests/integration/cqlengine/test_ifexists.py index 2797edd846..1189bc0ff5 100644 --- a/tests/integration/cqlengine/test_ifexists.py +++ b/tests/integration/cqlengine/test_ifexists.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import mock from uuid import uuid4 diff --git a/tests/integration/cqlengine/test_ifnotexists.py b/tests/integration/cqlengine/test_ifnotexists.py index 206101f1b2..260e132731 100644 --- a/tests/integration/cqlengine/test_ifnotexists.py +++ b/tests/integration/cqlengine/test_ifnotexists.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import mock from uuid import uuid4 diff --git a/tests/integration/cqlengine/test_lwt_conditional.py b/tests/integration/cqlengine/test_lwt_conditional.py index 1c418ae6d8..f8459a95ad 100644 --- a/tests/integration/cqlengine/test_lwt_conditional.py +++ b/tests/integration/cqlengine/test_lwt_conditional.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import mock import six diff --git a/tests/integration/cqlengine/test_ttl.py b/tests/integration/cqlengine/test_ttl.py index a9aa32db94..55457ff56a 100644 --- a/tests/integration/cqlengine/test_ttl.py +++ b/tests/integration/cqlengine/test_ttl.py @@ -13,10 +13,7 @@ # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from packaging.version import Version diff --git a/tests/integration/long/__init__.py b/tests/integration/long/__init__.py index 447f4885cc..19e7ed2c64 100644 --- a/tests/integration/long/__init__.py +++ b/tests/integration/long/__init__.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest try: from ccmlib import common diff --git a/tests/integration/long/test_consistency.py b/tests/integration/long/test_consistency.py index bbf446861a..0b9ebab3ed 100644 --- a/tests/integration/long/test_consistency.py +++ b/tests/integration/long/test_consistency.py @@ -28,10 +28,7 @@ force_stop, create_schema, wait_for_down, wait_for_up, start, CoordinatorStats ) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest ALL_CONSISTENCY_LEVELS = { ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.TWO, ConsistencyLevel.QUORUM, diff --git a/tests/integration/long/test_failure_types.py b/tests/integration/long/test_failure_types.py index 6bdff8d15d..2ca01066b0 100644 --- a/tests/integration/long/test_failure_types.py +++ b/tests/integration/long/test_failure_types.py @@ -34,10 +34,7 @@ local, CASSANDRA_VERSION, TestCluster) -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/long/test_ipv6.py b/tests/integration/long/test_ipv6.py index a49c1677e8..b63fdebcf3 100644 --- a/tests/integration/long/test_ipv6.py +++ b/tests/integration/long/test_ipv6.py @@ -30,10 +30,7 @@ except ImportError: LibevConnection = None -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest # If more modules do IPV6 testing, this can be moved down to integration.__init__. diff --git a/tests/integration/long/test_large_data.py b/tests/integration/long/test_large_data.py index ce7e4398da..59873204a4 100644 --- a/tests/integration/long/test_large_data.py +++ b/tests/integration/long/test_large_data.py @@ -27,10 +27,7 @@ from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster from tests.integration.long.utils import create_schema -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/long/test_loadbalancingpolicies.py b/tests/integration/long/test_loadbalancingpolicies.py index f245569a80..7848a21b1d 100644 --- a/tests/integration/long/test_loadbalancingpolicies.py +++ b/tests/integration/long/test_loadbalancingpolicies.py @@ -36,10 +36,7 @@ wait_for_down, decommission, start, bootstrap, stop, IP_FORMAT) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/long/test_policies.py b/tests/integration/long/test_policies.py index 0648e6cc93..680d0d7980 100644 --- a/tests/integration/long/test_policies.py +++ b/tests/integration/long/test_policies.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import ConsistencyLevel, Unavailable from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT diff --git a/tests/integration/long/test_schema.py b/tests/integration/long/test_schema.py index e2945a117b..f1cc80a17a 100644 --- a/tests/integration/long/test_schema.py +++ b/tests/integration/long/test_schema.py @@ -21,10 +21,7 @@ import time -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/long/test_ssl.py b/tests/integration/long/test_ssl.py index 4de46f4649..69285001f8 100644 --- a/tests/integration/long/test_ssl.py +++ b/tests/integration/long/test_ssl.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest import os, sys, traceback, logging, ssl, time, math, uuid from cassandra.cluster import NoHostAvailable diff --git a/tests/integration/simulacron/__init__.py b/tests/integration/simulacron/__init__.py index 6543265db2..c959fd6e08 100644 --- a/tests/integration/simulacron/__init__.py +++ b/tests/integration/simulacron/__init__.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from tests.integration import requiredse, CASSANDRA_VERSION, DSE_VERSION, SIMULACRON_JAR, PROTOCOL_VERSION from tests.integration.simulacron.utils import ( diff --git a/tests/integration/simulacron/advanced/test_insights.py b/tests/integration/simulacron/advanced/test_insights.py index 3da14659af..5ddae4ec7c 100644 --- a/tests/integration/simulacron/advanced/test_insights.py +++ b/tests/integration/simulacron/advanced/test_insights.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import time import json diff --git a/tests/integration/simulacron/test_cluster.py b/tests/integration/simulacron/test_cluster.py index b89f564f08..f859a5dd05 100644 --- a/tests/integration/simulacron/test_cluster.py +++ b/tests/integration/simulacron/test_cluster.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging from packaging.version import Version diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index 4ef97247a6..0c70d0a1e9 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging import time diff --git a/tests/integration/simulacron/test_empty_column.py b/tests/integration/simulacron/test_empty_column.py index 91c76985e1..046aaacf79 100644 --- a/tests/integration/simulacron/test_empty_column.py +++ b/tests/integration/simulacron/test_empty_column.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from collections import namedtuple, OrderedDict diff --git a/tests/integration/simulacron/test_endpoint.py b/tests/integration/simulacron/test_endpoint.py index 691fcc8718..9e2d91b6d3 100644 --- a/tests/integration/simulacron/test_endpoint.py +++ b/tests/integration/simulacron/test_endpoint.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from functools import total_ordering diff --git a/tests/integration/simulacron/test_policies.py b/tests/integration/simulacron/test_policies.py index da093be43c..6d0d081889 100644 --- a/tests/integration/simulacron/test_policies.py +++ b/tests/integration/simulacron/test_policies.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import OperationTimedOut, WriteTimeout from cassandra.cluster import Cluster, ExecutionProfile, ResponseFuture, EXEC_PROFILE_DEFAULT, NoHostAvailable diff --git a/tests/integration/standard/__init__.py b/tests/integration/standard/__init__.py index e54b6fd6bd..1f14bd6ec4 100644 --- a/tests/integration/standard/__init__.py +++ b/tests/integration/standard/__init__.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest try: from ccmlib import common diff --git a/tests/integration/standard/test_authentication.py b/tests/integration/standard/test_authentication.py index 9755c5098b..189da45c94 100644 --- a/tests/integration/standard/test_authentication.py +++ b/tests/integration/standard/test_authentication.py @@ -22,10 +22,7 @@ USE_CASS_EXTERNAL, start_cluster_wait_for_up, TestCluster from tests.integration.util import assert_quiescent_pool_state -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/standard/test_client_warnings.py b/tests/integration/standard/test_client_warnings.py index c5ce5dc726..5f63b5265a 100644 --- a/tests/integration/standard/test_client_warnings.py +++ b/tests/integration/standard/test_client_warnings.py @@ -13,10 +13,7 @@ # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from cassandra.query import BatchStatement diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index c7d8266fd9..a15c7f32e2 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from collections import deque from copy import copy diff --git a/tests/integration/standard/test_concurrent.py b/tests/integration/standard/test_concurrent.py index 8bd65c7f6f..ad4ef47473 100644 --- a/tests/integration/standard/test_concurrent.py +++ b/tests/integration/standard/test_concurrent.py @@ -26,10 +26,7 @@ from six import next -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/standard/test_connection.py b/tests/integration/standard/test_connection.py index aaa5a27dfd..76c8216d41 100644 --- a/tests/integration/standard/test_connection.py +++ b/tests/integration/standard/test_connection.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from functools import partial from mock import patch diff --git a/tests/integration/standard/test_control_connection.py b/tests/integration/standard/test_control_connection.py index db7cff8506..9d579476d2 100644 --- a/tests/integration/standard/test_control_connection.py +++ b/tests/integration/standard/test_control_connection.py @@ -16,10 +16,7 @@ # from cassandra import InvalidRequest -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.protocol import ConfigurationException diff --git a/tests/integration/standard/test_custom_cluster.py b/tests/integration/standard/test_custom_cluster.py index 84e0737086..d0f10d51db 100644 --- a/tests/integration/standard/test_custom_cluster.py +++ b/tests/integration/standard/test_custom_cluster.py @@ -16,10 +16,7 @@ from tests.integration import use_singledc, get_cluster, remove_cluster, local, TestCluster from tests.util import wait_until, wait_until_not_raised -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest def setup_module(): diff --git a/tests/integration/standard/test_custom_payload.py b/tests/integration/standard/test_custom_payload.py index 9906a8243e..3290852862 100644 --- a/tests/integration/standard/test_custom_payload.py +++ b/tests/integration/standard/test_custom_payload.py @@ -13,10 +13,7 @@ # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest import six diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py index bf549511c8..7443ce0748 100644 --- a/tests/integration/standard/test_custom_protocol_handler.py +++ b/tests/integration/standard/test_custom_protocol_handler.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.protocol import ProtocolHandler, ResultMessage, QueryMessage, UUIDType, read_int from cassandra.query import tuple_factory, SimpleStatement diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py index 4e45553be2..9cb5914f16 100644 --- a/tests/integration/standard/test_cython_protocol_handlers.py +++ b/tests/integration/standard/test_cython_protocol_handlers.py @@ -2,10 +2,7 @@ # Based on test_custom_protocol_handler.py -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from itertools import count diff --git a/tests/integration/standard/test_dse.py b/tests/integration/standard/test_dse.py index 1b9b5bef84..7b96094b3f 100644 --- a/tests/integration/standard/test_dse.py +++ b/tests/integration/standard/test_dse.py @@ -21,10 +21,7 @@ from tests.integration import (execute_until_pass, execute_with_long_wait_retry, use_cluster, TestCluster) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest CCM_IS_DSE = (os.environ.get('CCM_IS_DSE', None) == 'true') diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index bd556f357d..e20f1f0640 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from collections import defaultdict import difflib diff --git a/tests/integration/standard/test_metrics.py b/tests/integration/standard/test_metrics.py index 676a5340ef..ddc1091dc6 100644 --- a/tests/integration/standard/test_metrics.py +++ b/tests/integration/standard/test_metrics.py @@ -17,10 +17,7 @@ from cassandra.connection import ConnectionShutdown from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, FallthroughRetryPolicy -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.query import SimpleStatement from cassandra import ConsistencyLevel, WriteTimeout, Unavailable, ReadTimeout diff --git a/tests/integration/standard/test_policies.py b/tests/integration/standard/test_policies.py index 24facf42a0..46e91918ac 100644 --- a/tests/integration/standard/test_policies.py +++ b/tests/integration/standard/test_policies.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, SimpleConvictionPolicy, \ diff --git a/tests/integration/standard/test_prepared_statements.py b/tests/integration/standard/test_prepared_statements.py index 5c79f27346..1ed48d2964 100644 --- a/tests/integration/standard/test_prepared_statements.py +++ b/tests/integration/standard/test_prepared_statements.py @@ -15,10 +15,7 @@ from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import InvalidRequest, DriverException from cassandra import ConsistencyLevel, ProtocolVersion diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py index ea0e326ff5..8d2a3d74e2 100644 --- a/tests/integration/standard/test_query.py +++ b/tests/integration/standard/test_query.py @@ -15,10 +15,7 @@ from cassandra.concurrent import execute_concurrent from cassandra import DriverException -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging from cassandra import ProtocolVersion from cassandra import ConsistencyLevel, Unavailable, InvalidRequest, cluster diff --git a/tests/integration/standard/test_query_paging.py b/tests/integration/standard/test_query_paging.py index dac4ec5ce3..8e0ca8becc 100644 --- a/tests/integration/standard/test_query_paging.py +++ b/tests/integration/standard/test_query_paging.py @@ -16,10 +16,7 @@ import logging log = logging.getLogger(__name__) -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from itertools import cycle, count from six.moves import range diff --git a/tests/integration/standard/test_routing.py b/tests/integration/standard/test_routing.py index e1dabba49a..47697ee9c8 100644 --- a/tests/integration/standard/test_routing.py +++ b/tests/integration/standard/test_routing.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from uuid import uuid1 diff --git a/tests/integration/standard/test_row_factories.py b/tests/integration/standard/test_row_factories.py index 93f25d9276..6855e8a410 100644 --- a/tests/integration/standard/test_row_factories.py +++ b/tests/integration/standard/test_row_factories.py @@ -15,10 +15,7 @@ from tests.integration import get_server_versions, use_singledc, \ BasicSharedKeyspaceUnitTestCaseWFunctionTable, BasicSharedKeyspaceUnitTestCase, execute_until_pass, TestCluster -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cluster import ResultSet, ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.query import tuple_factory, named_tuple_factory, dict_factory, ordered_dict_factory diff --git a/tests/integration/standard/test_single_interface.py b/tests/integration/standard/test_single_interface.py index 91451a52a0..4677eff641 100644 --- a/tests/integration/standard/test_single_interface.py +++ b/tests/integration/standard/test_single_interface.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six diff --git a/tests/integration/standard/test_types.py b/tests/integration/standard/test_types.py index 0592b7d737..f69e88c64f 100644 --- a/tests/integration/standard/test_types.py +++ b/tests/integration/standard/test_types.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from datetime import datetime import math diff --git a/tests/integration/standard/test_udts.py b/tests/integration/standard/test_udts.py index 6d9676f25e..4c7826fb98 100644 --- a/tests/integration/standard/test_udts.py +++ b/tests/integration/standard/test_udts.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from collections import namedtuple from functools import partial diff --git a/tests/integration/upgrade/__init__.py b/tests/integration/upgrade/__init__.py index d2b9076bc2..e307a3e3cc 100644 --- a/tests/integration/upgrade/__init__.py +++ b/tests/integration/upgrade/__init__.py @@ -27,10 +27,7 @@ import time import logging -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest def setup_module(): diff --git a/tests/integration/upgrade/test_upgrade.py b/tests/integration/upgrade/test_upgrade.py index 31df55c02c..63e1a64b9d 100644 --- a/tests/integration/upgrade/test_upgrade.py +++ b/tests/integration/upgrade/test_upgrade.py @@ -20,10 +20,7 @@ from cassandra.policies import ConstantSpeculativeExecutionPolicy from tests.integration.upgrade import UpgradeBase, UpgradeBaseAuth, UpgradePath, upgrade_paths -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest # Previous Cassandra upgrade diff --git a/tests/stress_tests/test_load.py b/tests/stress_tests/test_load.py index a9771147ce..3492ff2923 100644 --- a/tests/stress_tests/test_load.py +++ b/tests/stress_tests/test_load.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import gc diff --git a/tests/stress_tests/test_multi_inserts.py b/tests/stress_tests/test_multi_inserts.py index 65bbe2a4e4..84dfc5e6f7 100644 --- a/tests/stress_tests/test_multi_inserts.py +++ b/tests/stress_tests/test_multi_inserts.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest import os from cassandra.cluster import Cluster diff --git a/tests/unit/advanced/cloud/test_cloud.py b/tests/unit/advanced/cloud/test_cloud.py index ab18f0af72..a7cd83a8ce 100644 --- a/tests/unit/advanced/cloud/test_cloud.py +++ b/tests/unit/advanced/cloud/test_cloud.py @@ -11,10 +11,7 @@ import shutil import six -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import DriverException from cassandra.datastax import cloud diff --git a/tests/unit/advanced/test_auth.py b/tests/unit/advanced/test_auth.py index bb411afe2b..840073e9e1 100644 --- a/tests/unit/advanced/test_auth.py +++ b/tests/unit/advanced/test_auth.py @@ -15,10 +15,7 @@ import os from puresasl import QOP -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.auth import DSEGSSAPIAuthProvider diff --git a/tests/unit/advanced/test_execution_profile.py b/tests/unit/advanced/test_execution_profile.py index 8592f56a44..478322f95b 100644 --- a/tests/unit/advanced/test_execution_profile.py +++ b/tests/unit/advanced/test_execution_profile.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cluster import GraphExecutionProfile, GraphAnalyticsExecutionProfile from cassandra.graph import GraphOptions diff --git a/tests/unit/advanced/test_geometry.py b/tests/unit/advanced/test_geometry.py index 4fa2644ff2..d85f1bc293 100644 --- a/tests/unit/advanced/test_geometry.py +++ b/tests/unit/advanced/test_geometry.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import struct import math diff --git a/tests/unit/advanced/test_graph.py b/tests/unit/advanced/test_graph.py index f25a229f42..25dd289dba 100644 --- a/tests/unit/advanced/test_graph.py +++ b/tests/unit/advanced/test_graph.py @@ -15,10 +15,7 @@ import warnings import json -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six diff --git a/tests/unit/advanced/test_insights.py b/tests/unit/advanced/test_insights.py index 2cc170e485..4f1dd7ac12 100644 --- a/tests/unit/advanced/test_insights.py +++ b/tests/unit/advanced/test_insights.py @@ -13,10 +13,7 @@ # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging from mock import sentinel diff --git a/tests/unit/advanced/test_metadata.py b/tests/unit/advanced/test_metadata.py index addd514169..cf730ebec5 100644 --- a/tests/unit/advanced/test_metadata.py +++ b/tests/unit/advanced/test_metadata.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.metadata import ( KeyspaceMetadata, TableMetadataDSE68, diff --git a/tests/unit/advanced/test_policies.py b/tests/unit/advanced/test_policies.py index 79e7410799..b8e4a4e757 100644 --- a/tests/unit/advanced/test_policies.py +++ b/tests/unit/advanced/test_policies.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import Mock diff --git a/tests/unit/cqlengine/test_columns.py b/tests/unit/cqlengine/test_columns.py index bcb174a8c0..a7bf74ec23 100644 --- a/tests/unit/cqlengine/test_columns.py +++ b/tests/unit/cqlengine/test_columns.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cqlengine.columns import Column diff --git a/tests/unit/cqlengine/test_connection.py b/tests/unit/cqlengine/test_connection.py index 9f8e500c6b..8e3a0b75bd 100644 --- a/tests/unit/cqlengine/test_connection.py +++ b/tests/unit/cqlengine/test_connection.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cluster import _ConfigMode from cassandra.cqlengine import connection diff --git a/tests/unit/cqlengine/test_udt.py b/tests/unit/cqlengine/test_udt.py index ebe1139fd0..0a126513d5 100644 --- a/tests/unit/cqlengine/test_udt.py +++ b/tests/unit/cqlengine/test_udt.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.cqlengine import columns from cassandra.cqlengine.models import Model diff --git a/tests/unit/cython/test_bytesio.py b/tests/unit/cython/test_bytesio.py index a156fc1272..cd4ea86f52 100644 --- a/tests/unit/cython/test_bytesio.py +++ b/tests/unit/cython/test_bytesio.py @@ -15,10 +15,7 @@ from tests.unit.cython.utils import cyimport, cythontest bytesio_testhelper = cyimport('tests.unit.cython.bytesio_testhelper') -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest class BytesIOTest(unittest.TestCase): diff --git a/tests/unit/cython/test_types.py b/tests/unit/cython/test_types.py index a0d2138c6d..545b82fc11 100644 --- a/tests/unit/cython/test_types.py +++ b/tests/unit/cython/test_types.py @@ -15,10 +15,7 @@ from tests.unit.cython.utils import cyimport, cythontest types_testhelper = cyimport('tests.unit.cython.types_testhelper') -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest class TypesTest(unittest.TestCase): diff --git a/tests/unit/cython/test_utils.py b/tests/unit/cython/test_utils.py index dc8745e471..0e79c235d8 100644 --- a/tests/unit/cython/test_utils.py +++ b/tests/unit/cython/test_utils.py @@ -15,10 +15,7 @@ from tests.unit.cython.utils import cyimport, cythontest utils_testhelper = cyimport('tests.unit.cython.utils_testhelper') -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest class UtilsTest(unittest.TestCase): @@ -26,4 +23,4 @@ class UtilsTest(unittest.TestCase): @cythontest def test_datetime_from_timestamp(self): - utils_testhelper.test_datetime_from_timestamp(self.assertEqual) \ No newline at end of file + utils_testhelper.test_datetime_from_timestamp(self.assertEqual) diff --git a/tests/unit/cython/utils.py b/tests/unit/cython/utils.py index 7f8be22ce0..fc21597c7d 100644 --- a/tests/unit/cython/utils.py +++ b/tests/unit/cython/utils.py @@ -18,10 +18,7 @@ except ImportError: VERIFY_CYTHON = False -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest def cyimport(import_path): """ diff --git a/tests/unit/io/test_asyncorereactor.py b/tests/unit/io/test_asyncorereactor.py index 4e0e540327..6f493896d0 100644 --- a/tests/unit/io/test_asyncorereactor.py +++ b/tests/unit/io/test_asyncorereactor.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import patch import socket diff --git a/tests/unit/io/test_eventletreactor.py b/tests/unit/io/test_eventletreactor.py index ce828cd6d8..e2b6a533a8 100644 --- a/tests/unit/io/test_eventletreactor.py +++ b/tests/unit/io/test_eventletreactor.py @@ -13,10 +13,7 @@ # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from tests.unit.io.utils import TimerTestMixin from tests import notpypy, EVENT_LOOP_MANAGER diff --git a/tests/unit/io/test_geventreactor.py b/tests/unit/io/test_geventreactor.py index ec64ce34c1..466b9ae5d5 100644 --- a/tests/unit/io/test_geventreactor.py +++ b/tests/unit/io/test_geventreactor.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from tests.unit.io.utils import TimerTestMixin diff --git a/tests/unit/io/test_libevreactor.py b/tests/unit/io/test_libevreactor.py index a02458edc8..67ab5fc7d6 100644 --- a/tests/unit/io/test_libevreactor.py +++ b/tests/unit/io/test_libevreactor.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import patch, Mock import weakref diff --git a/tests/unit/io/test_twistedreactor.py b/tests/unit/io/test_twistedreactor.py index e7c34cb4b5..b426a820c4 100644 --- a/tests/unit/io/test_twistedreactor.py +++ b/tests/unit/io/test_twistedreactor.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mock import Mock, patch from cassandra.connection import DefaultEndPoint diff --git a/tests/unit/io/utils.py b/tests/unit/io/utils.py index 848513f031..ddfa2c3198 100644 --- a/tests/unit/io/utils.py +++ b/tests/unit/io/utils.py @@ -37,10 +37,7 @@ from socket import error as socket_error import ssl -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import time diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 7b4196f831..68cce526e7 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -16,10 +16,7 @@ import six from cassandra.auth import PlainTextAuthenticator -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest class TestPlainTextAuthenticator(unittest.TestCase): diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py index 620f642084..6755f118fd 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/test_cluster.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import logging import six diff --git a/tests/unit/test_concurrent.py b/tests/unit/test_concurrent.py index cc6c12cdaa..9f67531a3c 100644 --- a/tests/unit/test_concurrent.py +++ b/tests/unit/test_concurrent.py @@ -13,10 +13,7 @@ # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from itertools import cycle from mock import Mock diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index 21b8862772..f06b67ebe0 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import Mock, ANY, call, patch import six diff --git a/tests/unit/test_control_connection.py b/tests/unit/test_control_connection.py index efad1ca5c9..276b2849ca 100644 --- a/tests/unit/test_control_connection.py +++ b/tests/unit/test_control_connection.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six diff --git a/tests/unit/test_endpoints.py b/tests/unit/test_endpoints.py index 2452e267ba..18f245e64b 100644 --- a/tests/unit/test_endpoints.py +++ b/tests/unit/test_endpoints.py @@ -6,10 +6,7 @@ # You may obtain a copy of the License at # # http://www.datastax.com/terms/datastax-dse-driver-license-terms -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import itertools diff --git a/tests/unit/test_exception.py b/tests/unit/test_exception.py index 3a082f7363..b39b22239c 100644 --- a/tests/unit/test_exception.py +++ b/tests/unit/test_exception.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from cassandra import Unavailable, Timeout, ConsistencyLevel import re diff --git a/tests/unit/test_host_connection_pool.py b/tests/unit/test_host_connection_pool.py index bda48dc76b..86d4bf9843 100644 --- a/tests/unit/test_host_connection_pool.py +++ b/tests/unit/test_host_connection_pool.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import Mock, NonCallableMagicMock from threading import Thread, Event, Lock diff --git a/tests/unit/test_marshalling.py b/tests/unit/test_marshalling.py index c2363e0adc..1fdbfa6a4b 100644 --- a/tests/unit/test_marshalling.py +++ b/tests/unit/test_marshalling.py @@ -15,10 +15,7 @@ from cassandra import ProtocolVersion -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import platform from datetime import datetime, date diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index b2143f8c20..b0a8b63b16 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from binascii import unhexlify import logging diff --git a/tests/unit/test_orderedmap.py b/tests/unit/test_orderedmap.py index f2baab40f0..9ca5699204 100644 --- a/tests/unit/test_orderedmap.py +++ b/tests/unit/test_orderedmap.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.util import OrderedMap, OrderedMapSerializedKey from cassandra.cqltypes import EMPTY, UTF8Type, lookup_casstype diff --git a/tests/unit/test_parameter_binding.py b/tests/unit/test_parameter_binding.py index 228f3f4432..8820114dc3 100644 --- a/tests/unit/test_parameter_binding.py +++ b/tests/unit/test_parameter_binding.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.encoder import Encoder from cassandra.protocol import ColumnMetadata diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index 5c0c11281b..a31b4f4c1b 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from itertools import islice, cycle from mock import Mock, patch, call diff --git a/tests/unit/test_protocol.py b/tests/unit/test_protocol.py index b43b21eeff..95a7a12b11 100644 --- a/tests/unit/test_protocol.py +++ b/tests/unit/test_protocol.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import Mock diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 7c2bfc0d14..2a2901aaff 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six diff --git a/tests/unit/test_response_future.py b/tests/unit/test_response_future.py index f76a2d677b..dbd8764ad9 100644 --- a/tests/unit/test_response_future.py +++ b/tests/unit/test_response_future.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from collections import deque from threading import RLock diff --git a/tests/unit/test_resultset.py b/tests/unit/test_resultset.py index b37c3a2594..97002d90d7 100644 --- a/tests/unit/test_resultset.py +++ b/tests/unit/test_resultset.py @@ -13,10 +13,7 @@ # limitations under the License. from cassandra.query import named_tuple_factory, dict_factory, tuple_factory -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from mock import Mock, PropertyMock, patch diff --git a/tests/unit/test_row_factories.py b/tests/unit/test_row_factories.py index 13049ba034..70691ad8fd 100644 --- a/tests/unit/test_row_factories.py +++ b/tests/unit/test_row_factories.py @@ -20,10 +20,7 @@ import sys -try: - from unittest import TestCase -except ImportError: - from unittest2 import TestCase +from unittest import TestCase log = logging.getLogger(__name__) diff --git a/tests/unit/test_segment.py b/tests/unit/test_segment.py index fc49339d68..f794b38b1d 100644 --- a/tests/unit/test_segment.py +++ b/tests/unit/test_segment.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import six diff --git a/tests/unit/test_sortedset.py b/tests/unit/test_sortedset.py index 3845c2c31c..49c3658df8 100644 --- a/tests/unit/test_sortedset.py +++ b/tests/unit/test_sortedset.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra.util import sortedset from cassandra.cqltypes import EMPTY diff --git a/tests/unit/test_time_util.py b/tests/unit/test_time_util.py index 7025f151d6..2605992d1c 100644 --- a/tests/unit/test_time_util.py +++ b/tests/unit/test_time_util.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest from cassandra import marshal from cassandra import util diff --git a/tests/unit/test_timestamps.py b/tests/unit/test_timestamps.py index 8903fbc99b..58958cff03 100644 --- a/tests/unit/test_timestamps.py +++ b/tests/unit/test_timestamps.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import mock diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index 562fd2c899..af3b327ef8 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import datetime import tempfile diff --git a/tests/unit/test_util_types.py b/tests/unit/test_util_types.py index b7dc837249..5d6058b394 100644 --- a/tests/unit/test_util_types.py +++ b/tests/unit/test_util_types.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -try: - import unittest2 as unittest -except ImportError: - import unittest # noqa +import unittest import datetime From e4e290fe2dc5f15f677f33bbb40ed115fce2406d Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 2 Feb 2022 10:21:02 -0600 Subject: [PATCH 131/211] Merge pull request #1119 from datastax/python-1290 PYTHON-1290 Convert asyncio reactor away from @asyncio.coroutine --- cassandra/io/asyncioreactor.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/cassandra/io/asyncioreactor.py b/cassandra/io/asyncioreactor.py index 7cb0444a32..ab0e90ae09 100644 --- a/cassandra/io/asyncioreactor.py +++ b/cassandra/io/asyncioreactor.py @@ -46,9 +46,8 @@ def __init__(self, timeout, callback, loop): self._handle = asyncio.run_coroutine_threadsafe(delayed, loop=loop) @staticmethod - @asyncio.coroutine - def _call_delayed_coro(timeout, callback, loop): - yield from asyncio.sleep(timeout, loop=loop) + async def _call_delayed_coro(timeout, callback, loop): + await asyncio.sleep(timeout, loop=loop) return callback() def __lt__(self, other): @@ -136,8 +135,7 @@ def close(self): self._close(), loop=self._loop ) - @asyncio.coroutine - def _close(self): + async def _close(self): log.debug("Closing connection (%s) to %s" % (id(self), self.endpoint)) if self._write_watcher: self._write_watcher.cancel() @@ -174,21 +172,19 @@ def push(self, data): # avoid races/hangs by just scheduling this, not using threadsafe self._loop.create_task(self._push_msg(chunks)) - @asyncio.coroutine - def _push_msg(self, chunks): + async def _push_msg(self, chunks): # This lock ensures all chunks of a message are sequential in the Queue - with (yield from self._write_queue_lock): + with await self._write_queue_lock: for chunk in chunks: self._write_queue.put_nowait(chunk) - @asyncio.coroutine - def handle_write(self): + async def handle_write(self): while True: try: - next_msg = yield from self._write_queue.get() + next_msg = await self._write_queue.get() if next_msg: - yield from self._loop.sock_sendall(self._socket, next_msg) + await self._loop.sock_sendall(self._socket, next_msg) except socket.error as err: log.debug("Exception in send for %s: %s", self, err) self.defunct(err) @@ -196,18 +192,19 @@ def handle_write(self): except asyncio.CancelledError: return - @asyncio.coroutine - def handle_read(self): + async def handle_read(self): while True: try: - buf = yield from self._loop.sock_recv(self._socket, self.in_buffer_size) + buf = await self._loop.sock_recv(self._socket, self.in_buffer_size) self._iobuf.write(buf) # sock_recv expects EWOULDBLOCK if socket provides no data, but # nonblocking ssl sockets raise these instead, so we handle them # ourselves by yielding to the event loop, where the socket will # get the reading/writing it "wants" before retrying except (ssl.SSLWantWriteError, ssl.SSLWantReadError): - yield + # Apparently the preferred way to yield to the event loop from within + # a native coroutine based on https://github.com/python/asyncio/issues/284 + await asyncio.sleep(0) continue except socket.error as err: log.debug("Exception during socket recv for %s: %s", From 8c4c6536d4728e052771651120df6cbb2e730773 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Wed, 16 Mar 2022 13:19:03 -0700 Subject: [PATCH 132/211] Merge pull request #1122 from andy-slac/concurrent-execution-profiles Adds one more keyword argument `execution_profile` to the `execute_concurrent` method to pass an execution profile. It is fowarded to `Session.execute_async` call. --- cassandra/concurrent.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cassandra/concurrent.py b/cassandra/concurrent.py index a8bddcbdab..0228f297fe 100644 --- a/cassandra/concurrent.py +++ b/cassandra/concurrent.py @@ -21,7 +21,7 @@ from threading import Condition import sys -from cassandra.cluster import ResultSet +from cassandra.cluster import ResultSet, EXEC_PROFILE_DEFAULT import logging log = logging.getLogger(__name__) @@ -29,7 +29,7 @@ ExecutionResult = namedtuple('ExecutionResult', ['success', 'result_or_exc']) -def execute_concurrent(session, statements_and_parameters, concurrency=100, raise_on_first_error=True, results_generator=False): +def execute_concurrent(session, statements_and_parameters, concurrency=100, raise_on_first_error=True, results_generator=False, execution_profile=EXEC_PROFILE_DEFAULT): """ Executes a sequence of (statement, parameters) tuples concurrently. Each ``parameters`` item must be a sequence or :const:`None`. @@ -56,6 +56,9 @@ def execute_concurrent(session, statements_and_parameters, concurrency=100, rais footprint is marginal CPU overhead (more thread coordination and sorting out-of-order results on-the-fly). + `execution_profile` argument is the execution profile to use for this + request, it is passed directly to :meth:`Session.execute_async`. + A sequence of ``ExecutionResult(success, result_or_exc)`` namedtuples is returned in the same order that the statements were passed in. If ``success`` is :const:`False`, there was an error executing the statement, and ``result_or_exc`` will be @@ -90,7 +93,8 @@ def execute_concurrent(session, statements_and_parameters, concurrency=100, rais if not statements_and_parameters: return [] - executor = ConcurrentExecutorGenResults(session, statements_and_parameters) if results_generator else ConcurrentExecutorListResults(session, statements_and_parameters) + executor = ConcurrentExecutorGenResults(session, statements_and_parameters, execution_profile) \ + if results_generator else ConcurrentExecutorListResults(session, statements_and_parameters, execution_profile) return executor.execute(concurrency, raise_on_first_error) @@ -98,9 +102,10 @@ class _ConcurrentExecutor(object): max_error_recursion = 100 - def __init__(self, session, statements_and_params): + def __init__(self, session, statements_and_params, execution_profile): self.session = session self._enum_statements = enumerate(iter(statements_and_params)) + self._execution_profile = execution_profile self._condition = Condition() self._fail_fast = False self._results_queue = [] @@ -132,7 +137,7 @@ def _execute_next(self): def _execute(self, idx, statement, params): self._exec_depth += 1 try: - future = self.session.execute_async(statement, params, timeout=None) + future = self.session.execute_async(statement, params, timeout=None, execution_profile=self._execution_profile) args = (future, idx) future.add_callbacks( callback=self._on_success, callback_args=args, From 9e4904917dcc895c96daafbfe7cc215f101e8f8c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 16 Mar 2022 15:24:32 -0500 Subject: [PATCH 133/211] Add tests for recent addition of execution profile support to cassandra.concurrent --- tests/integration/standard/test_concurrent.py | 66 +++++++++++-------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/tests/integration/standard/test_concurrent.py b/tests/integration/standard/test_concurrent.py index ad4ef47473..15da526bde 100644 --- a/tests/integration/standard/test_concurrent.py +++ b/tests/integration/standard/test_concurrent.py @@ -20,7 +20,7 @@ from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.concurrent import execute_concurrent, execute_concurrent_with_args, ExecutionResult from cassandra.policies import HostDistance -from cassandra.query import tuple_factory, SimpleStatement +from cassandra.query import dict_factory, tuple_factory, SimpleStatement from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster @@ -35,13 +35,16 @@ def setup_module(): use_singledc() +EXEC_PROFILE_DICT = "dict" + class ClusterTests(unittest.TestCase): @classmethod def setUpClass(cls): cls.cluster = TestCluster( execution_profiles = { - EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory) + EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory), + EXEC_PROFILE_DICT: ExecutionProfile(row_factory=dict_factory) } ) if PROTOCOL_VERSION < 3: @@ -52,11 +55,11 @@ def setUpClass(cls): def tearDownClass(cls): cls.cluster.shutdown() - def execute_concurrent_helper(self, session, query, results_generator=False): + def execute_concurrent_helper(self, session, query, **kwargs): count = 0 while count < 100: try: - return execute_concurrent(session, query, results_generator=False) + return execute_concurrent(session, query, results_generator=False, **kwargs) except (ReadTimeout, WriteTimeout, OperationTimedOut, ReadFailure, WriteFailure): ex_type, ex, tb = sys.exc_info() log.warning("{0}: {1} Backtrace: {2}".format(ex_type.__name__, ex, traceback.extract_tb(tb))) @@ -65,11 +68,11 @@ def execute_concurrent_helper(self, session, query, results_generator=False): raise RuntimeError("Failed to execute query after 100 attempts: {0}".format(query)) - def execute_concurrent_args_helper(self, session, query, params, results_generator=False): + def execute_concurrent_args_helper(self, session, query, params, results_generator=False, **kwargs): count = 0 while count < 100: try: - return execute_concurrent_with_args(session, query, params, results_generator=results_generator) + return execute_concurrent_with_args(session, query, params, results_generator=results_generator, **kwargs) except (ReadTimeout, WriteTimeout, OperationTimedOut, ReadFailure, WriteFailure): ex_type, ex, tb = sys.exc_info() log.warning("{0}: {1} Backtrace: {2}".format(ex_type.__name__, ex, traceback.extract_tb(tb))) @@ -77,7 +80,7 @@ def execute_concurrent_args_helper(self, session, query, params, results_generat raise RuntimeError("Failed to execute query after 100 attempts: {0}".format(query)) - def test_execute_concurrent(self): + def execute_concurrent_base(self, test_fn, validate_fn, zip_args=True): for num_statements in (0, 1, 2, 7, 10, 99, 100, 101, 199, 200, 201): # write statement = SimpleStatement( @@ -86,7 +89,9 @@ def test_execute_concurrent(self): statements = cycle((statement, )) parameters = [(i, i) for i in range(num_statements)] - results = self.execute_concurrent_helper(self.session, list(zip(statements, parameters))) + results = \ + test_fn(self.session, list(zip(statements, parameters))) if zip_args else \ + test_fn(self.session, statement, parameters) self.assertEqual(num_statements, len(results)) for success, result in results: self.assertTrue(success) @@ -99,32 +104,37 @@ def test_execute_concurrent(self): statements = cycle((statement, )) parameters = [(i, ) for i in range(num_statements)] - results = self.execute_concurrent_helper(self.session, list(zip(statements, parameters))) + results = \ + test_fn(self.session, list(zip(statements, parameters))) if zip_args else \ + test_fn(self.session, statement, parameters) + validate_fn(num_statements, results) + + def execute_concurrent_valiate_tuple(self, num_statements, results): self.assertEqual(num_statements, len(results)) self.assertEqual([(True, [(i,)]) for i in range(num_statements)], results) - def test_execute_concurrent_with_args(self): - for num_statements in (0, 1, 2, 7, 10, 99, 100, 101, 199, 200, 201): - statement = SimpleStatement( - "INSERT INTO test3rf.test (k, v) VALUES (%s, %s)", - consistency_level=ConsistencyLevel.QUORUM) - parameters = [(i, i) for i in range(num_statements)] - - results = self.execute_concurrent_args_helper(self.session, statement, parameters) + def execute_concurrent_valiate_dict(self, num_statements, results): self.assertEqual(num_statements, len(results)) - for success, result in results: - self.assertTrue(success) - self.assertFalse(result) + self.assertEqual([(True, [{"v":i}]) for i in range(num_statements)], results) - # read - statement = SimpleStatement( - "SELECT v FROM test3rf.test WHERE k=%s", - consistency_level=ConsistencyLevel.QUORUM) - parameters = [(i, ) for i in range(num_statements)] + def test_execute_concurrent(self): + self.execute_concurrent_base(self.execute_concurrent_helper, \ + self.execute_concurrent_valiate_tuple) - results = self.execute_concurrent_args_helper(self.session, statement, parameters) - self.assertEqual(num_statements, len(results)) - self.assertEqual([(True, [(i,)]) for i in range(num_statements)], results) + def test_execute_concurrent_with_args(self): + self.execute_concurrent_base(self.execute_concurrent_args_helper, \ + self.execute_concurrent_valiate_tuple, \ + zip_args=False) + + def test_execute_concurrent_with_execution_profile(self): + def run_fn(*args, **kwargs): + return self.execute_concurrent_helper(*args, execution_profile=EXEC_PROFILE_DICT, **kwargs) + self.execute_concurrent_base(run_fn, self.execute_concurrent_valiate_dict) + + def test_execute_concurrent_with_args_and_execution_profile(self): + def run_fn(*args, **kwargs): + return self.execute_concurrent_args_helper(*args, execution_profile=EXEC_PROFILE_DICT, **kwargs) + self.execute_concurrent_base(run_fn, self.execute_concurrent_valiate_dict, zip_args=False) def test_execute_concurrent_with_args_generator(self): """ From a7295e103023e12152fc0940906071b18356def3 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 17 Mar 2022 14:06:08 -0500 Subject: [PATCH 134/211] PYTHON-1294: Upgrade importlib-metadata to a much newer version --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7e1e374822..906775e90c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ python: - "3.5" - "3.6" - "3.7" + - "3.8" - "pypy2.7-6.0" - "pypy3.5" @@ -24,7 +25,7 @@ addons: - libev-dev install: - - pip install --upgrade setuptools + - pip install --upgrade setuptools importlib-metadata - pip install tox-travis - if [[ $TRAVIS_PYTHON_VERSION != pypy3.5 ]]; then pip install lz4; fi From 9a645c58ca0ec57f775251f94e55c30aa837b2ad Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Tue, 17 May 2022 15:24:12 -0300 Subject: [PATCH 135/211] Merge pull request #1126 from eamanu/fix-typos Fix typos detected by Lintian during the packaging In Debian --- cassandra/cluster.py | 2 +- cassandra/util.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index cf78725f17..c836fb4302 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -2391,7 +2391,7 @@ def default_consistency_level(self, cl): *Deprecated:* use execution profiles instead """ warn("Setting the consistency level at the session level will be removed in 4.0. Consider using " - "execution profiles and setting the desired consitency level to the EXEC_PROFILE_DEFAULT profile." + "execution profiles and setting the desired consistency level to the EXEC_PROFILE_DEFAULT profile." , DeprecationWarning) self._validate_set_legacy_config('default_consistency_level', cl) diff --git a/cassandra/util.py b/cassandra/util.py index f896ff4f86..dd5c58b01d 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -797,7 +797,7 @@ class OrderedMap(Mapping): ''' An ordered map that accepts non-hashable types for keys. It also maintains the insertion order of items, behaving as OrderedDict in that regard. These maps - are constructed and read just as normal mapping types, exept that they may + are constructed and read just as normal mapping types, except that they may contain arbitrary collections and other non-hashable items as keys:: >>> od = OrderedMap([({'one': 1, 'two': 2}, 'value'), From e4e34846d548cd2dc8e070d5c3ec43f28b980c03 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Sat, 3 Sep 2022 04:32:59 -0500 Subject: [PATCH 136/211] Ninja fix for quorum docstring --- cassandra/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 5739d5d98e..e6cb5c55bb 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -55,7 +55,7 @@ class ConsistencyLevel(object): QUORUM = 4 """ - ``ceil(RF/2)`` replicas must respond to consider the operation a success + ``ceil(RF/2) + 1`` replicas must respond to consider the operation a success """ ALL = 5 From b01372a879eacfc525d79fa299da1288d8342291 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 16 Dec 2022 15:17:51 -0600 Subject: [PATCH 137/211] Applying fixes to Jenkinsfile to get build working in AWS --- Jenkinsfile | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index abb6092758..58f189ebee 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -357,26 +357,6 @@ def getDriverMetricType() { return metric_type } -def submitCIMetrics(buildType) { - long durationMs = currentBuild.duration - long durationSec = durationMs / 1000 - long nowSec = (currentBuild.startTimeInMillis + durationMs) / 1000 - def branchNameNoPeriods = env.BRANCH_NAME.replaceAll('\\.', '_') - metric_type = getDriverMetricType() - def durationMetric = "okr.ci.python.${metric_type}.${buildType}.${branchNameNoPeriods} ${durationSec} ${nowSec}" - - timeout(time: 1, unit: 'MINUTES') { - withCredentials([string(credentialsId: 'lab-grafana-address', variable: 'LAB_GRAFANA_ADDRESS'), - string(credentialsId: 'lab-grafana-port', variable: 'LAB_GRAFANA_PORT')]) { - withEnv(["DURATION_METRIC=${durationMetric}"]) { - sh label: 'Send runtime metrics to labgrafana', script: '''#!/bin/bash -lex - echo "${DURATION_METRIC}" | nc -q 5 ${LAB_GRAFANA_ADDRESS} ${LAB_GRAFANA_PORT} - ''' - } - } - } -} - def describeBuild(buildContext) { script { def runtimes = buildContext.matrix["RUNTIME"] @@ -387,7 +367,9 @@ def describeBuild(buildContext) { } } -def scheduleTriggerJobName = "drivers/python/oss/master/disabled" +def scheduleTriggerJobName() { + "drivers/python/oss/master/disabled" +} pipeline { agent none @@ -663,8 +645,6 @@ pipeline { // build and test all builds parallel getMatrixBuilds(context) - // send the metrics - submitCIMetrics('commit') slack.notifyChannel(currentBuild.currentResult) } } From 5503bab7688edbd378b424eb917d056c690c871c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 16 Dec 2022 15:47:10 -0600 Subject: [PATCH 138/211] Fix to prior fix --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 58f189ebee..f39a79a8b4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -608,7 +608,7 @@ pipeline { } triggers { - parameterizedCron((scheduleTriggerJobName == env.JOB_NAME) ? """ + parameterizedCron((scheduleTriggerJobName() == env.JOB_NAME) ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18 3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 From 3dbe62012389250e11d0f0e6cf731a5278af35d7 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 21 Dec 2022 14:48:08 -0600 Subject: [PATCH 139/211] Smaller smoke test configuration to avoid explosion of test builds in AWS --- Jenkinsfile | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f39a79a8b4..283220e57d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -50,6 +50,15 @@ matrices = [ "SERVER": ['dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] + ], + /* + CI-friendly test configuration. Currently-supported Python version + modern C*/DSE instances. + We also avoid cython since it's tested as part of the nightlies. + */ + "SMOKE": [ + "SERVER": ['3.11', '4.0', 'dse-6.8'], + "RUNTIME": ['3.7.7', '3.8.3'], + "CYTHON": ["False"] ] ] @@ -72,7 +81,7 @@ def getBuildContext() { def profile = "${params.PROFILE}" def EVENT_LOOP = "${params.EVENT_LOOP.toLowerCase()}" - matrixType = "FULL" + matrixType = "SMOKE" developBranchPattern = ~"((dev|long)-)?python-.*" if (developBranchPattern.matcher(env.BRANCH_NAME).matches()) { @@ -404,7 +413,7 @@ pipeline { ''') choice( name: 'PROFILE', - choices: ['STANDARD', 'FULL', 'DSE-SMOKE-TEST', 'EVENT_LOOP'], + choices: ['SMOKE', 'STANDARD', 'FULL', 'DSE-SMOKE-TEST', 'EVENT_LOOP'], description: '''

Profile to utilize for scheduled or adhoc builds

@@ -413,6 +422,10 @@ pipeline { + + + + From 51416d9597d991eab22163a06a4b88cdd0939b8a Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 21 Dec 2022 14:52:21 -0600 Subject: [PATCH 140/211] Hey, let's actually update the right things, shall we? --- Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 283220e57d..e8281a15b5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -413,7 +413,7 @@ pipeline {
Choice Description
SMOKEBasic smoke tests for current Python runtimes + C*/DSE versions
STANDARD Execute the standard tests for the driver
''') choice( name: 'PROFILE', - choices: ['SMOKE', 'STANDARD', 'FULL', 'DSE-SMOKE-TEST', 'EVENT_LOOP'], + choices: ['STANDARD', 'FULL', 'DSE-SMOKE-TEST', 'EVENT_LOOP'], description: '''

Profile to utilize for scheduled or adhoc builds

@@ -422,10 +422,6 @@ pipeline { - - - - @@ -445,7 +441,7 @@ pipeline {
Choice Description
SMOKEBasic smoke tests for current Python runtimes + C*/DSE versions
STANDARD Execute the standard tests for the driver
''') choice( name: 'MATRIX', - choices: ['DEFAULT', 'FULL', 'DEVELOP', 'CASSANDRA', 'DSE'], + choices: ['DEFAULT', 'SMOKE', 'FULL', 'DEVELOP', 'CASSANDRA', 'DSE'], description: '''

The matrix for the build.

@@ -458,6 +454,10 @@ pipeline { + + + + From 1c9b4bd34da54d2ad2fccdf8479b62704615bd50 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 21 Dec 2022 14:56:05 -0600 Subject: [PATCH 141/211] Groovy fixes --- Jenkinsfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e8281a15b5..9536f52aa7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -30,6 +30,9 @@ import com.datastax.jenkins.drivers.python.Slack slack = new Slack() // Define our predefined matrices +// +// Smoke tests are CI-friendly test configuration. Currently-supported Python version + modern C*/DSE instances. +// We also avoid cython since it's tested as part of the nightlies. matrices = [ "FULL": [ "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0', 'dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], @@ -51,10 +54,6 @@ matrices = [ "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] ], - /* - CI-friendly test configuration. Currently-supported Python version + modern C*/DSE instances. - We also avoid cython since it's tested as part of the nightlies. - */ "SMOKE": [ "SERVER": ['3.11', '4.0', 'dse-6.8'], "RUNTIME": ['3.7.7', '3.8.3'], From dee95953b070b2f72ab564f48236466a3564aaba Mon Sep 17 00:00:00 2001 From: Stefano Rivera Date: Wed, 4 Jan 2023 21:55:21 -0800 Subject: [PATCH 142/211] HostFilterPolicyInitTest fix for Python 3.11 (#1131) The AttributeError message for a missing property setter changed in bpo-46730 (https://bugs.python.org/issue46730) --- tests/unit/test_policies.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index a31b4f4c1b..88db23daba 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -1295,7 +1295,10 @@ def test_init_kwargs(self): )) def test_immutable_predicate(self): - expected_message_regex = "can't set attribute" + if sys.version_info >= (3, 11): + expected_message_regex = "has no setter" + else: + expected_message_regex = "can't set attribute" hfp = HostFilterPolicy(child_policy=Mock(name='child_policy'), predicate=Mock(name='predicate')) with self.assertRaisesRegexp(AttributeError, expected_message_regex): From da026e78361963f363c28638547179e24c89935d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20=C5=81asica?= Date: Tue, 24 Jan 2023 21:11:52 +0100 Subject: [PATCH 143/211] Handle "log gone" case in the end of _run_loop (#1133) If log is somehow gone and file exception due to the race mention in PYTHON-1266 it will also inevitably fail for the same reason after the loop so we need to catch the exception there as well. --- cassandra/io/asyncorereactor.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cassandra/io/asyncorereactor.py b/cassandra/io/asyncorereactor.py index 681552e589..074c62f690 100644 --- a/cassandra/io/asyncorereactor.py +++ b/cassandra/io/asyncorereactor.py @@ -259,7 +259,13 @@ def _run_loop(self): break self._started = False - log.debug("Asyncore event loop ended") + try: + log.debug("Asyncore event loop ended") + except Exception: + # TODO: Remove when Python 2 support is removed + # PYTHON-1266. If our logger has disappeared, there's nothing we + # can do, so just log nothing. + pass def add_timer(self, timer): self._timers.add_timer(timer) From 3638de4ae878109c691e61742d6e8ba5d0a98ebf Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 24 Jan 2023 14:25:14 -0600 Subject: [PATCH 144/211] Minor refactor of prior commit --- cassandra/io/asyncorereactor.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cassandra/io/asyncorereactor.py b/cassandra/io/asyncorereactor.py index 074c62f690..0abdbbfe0a 100644 --- a/cassandra/io/asyncorereactor.py +++ b/cassandra/io/asyncorereactor.py @@ -248,24 +248,21 @@ def _run_loop(self): try: self._loop_dispatcher.loop(self.timer_resolution) self._timers.service_timeouts() - except Exception: - try: - log.debug("Asyncore event loop stopped unexpectedly", exc_info=True) - except Exception: - # TODO: Remove when Python 2 support is removed - # PYTHON-1266. If our logger has disappeared, there's nothing we - # can do, so just log nothing. - pass + except Exception as exc: + self._maybe_log_debug("Asyncore event loop stopped unexpectedly", exc_info=exc) break self._started = False + self._maybe_log_debug("Asyncore event loop ended") + + def _maybe_log_debug(self, *args, **kwargs): try: - log.debug("Asyncore event loop ended") + log.debug(*args, **kwargs) except Exception: # TODO: Remove when Python 2 support is removed # PYTHON-1266. If our logger has disappeared, there's nothing we # can do, so just log nothing. - pass + pass def add_timer(self, timer): self._timers.add_timer(timer) From 728c7126e1c7a09c6cd470cad2cba55fbbe49cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Reis?= Date: Thu, 23 Feb 2023 16:43:39 +0000 Subject: [PATCH 145/211] Fix jenkins builds (#1134) * remove master node dependency * set git env vars "manually" * fix branch_name * disable concurrent builds * workaround dse versions --- Jenkinsfile | 85 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9536f52aa7..0fdafb17d2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -35,12 +35,12 @@ slack = new Slack() // We also avoid cython since it's tested as part of the nightlies. matrices = [ "FULL": [ - "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0', 'dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], + "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0', 'dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'], "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] ], "DEVELOP": [ - "SERVER": ['2.1', '3.11', 'dse-6.8'], + "SERVER": ['2.1', '3.11', 'dse-6.8.30'], "RUNTIME": ['2.7.18', '3.6.10'], "CYTHON": ["True", "False"] ], @@ -50,20 +50,20 @@ matrices = [ "CYTHON": ["True", "False"] ], "DSE": [ - "SERVER": ['dse-5.0', 'dse-5.1', 'dse-6.0', 'dse-6.7', 'dse-6.8'], + "SERVER": ['dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'], "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], "CYTHON": ["True", "False"] ], "SMOKE": [ - "SERVER": ['3.11', '4.0', 'dse-6.8'], + "SERVER": ['3.11', '4.0', 'dse-6.8.30'], "RUNTIME": ['3.7.7', '3.8.3'], "CYTHON": ["False"] ] ] -def getBuildContext() { +def initializeSlackContext() { /* - Based on schedule, parameters and branch name, configure the build context and env vars. + Based on git branch/commit, configure the build context and env vars. */ def driver_display_name = 'Cassandra Python Driver' @@ -72,11 +72,17 @@ def getBuildContext() { } else if (env.GIT_URL.contains('python-dse-driver')) { driver_display_name = 'DSE Python Driver' } + env.DRIVER_DISPLAY_NAME = driver_display_name + env.GIT_SHA = "${env.GIT_COMMIT.take(7)}" + env.GITHUB_PROJECT_URL = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" + env.GITHUB_BRANCH_URL = "${env.GITHUB_PROJECT_URL}/tree/${env.BRANCH_NAME}" + env.GITHUB_COMMIT_URL = "${env.GITHUB_PROJECT_URL}/commit/${env.GIT_COMMIT}" +} - def git_sha = "${env.GIT_COMMIT.take(7)}" - def github_project_url = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" - def github_branch_url = "${github_project_url}/tree/${env.BRANCH_NAME}" - def github.amrom.workers.devmit_url = "${github_project_url}/commit/${env.GIT_COMMIT}" +def getBuildContext() { + /* + Based on schedule and parameters, configure the build context and env vars. + */ def profile = "${params.PROFILE}" def EVENT_LOOP = "${params.EVENT_LOOP.toLowerCase()}" @@ -116,9 +122,7 @@ def getBuildContext() { context = [ vars: [ "PROFILE=${profile}", - "EVENT_LOOP=${EVENT_LOOP}", - "DRIVER_DISPLAY_NAME=${driver_display_name}", "GIT_SHA=${git_sha}", "GITHUB_PROJECT_URL=${github_project_url}", - "GITHUB_BRANCH_URL=${github_branch_url}", "GITHUB_COMMIT_URL=${github.amrom.workers.devmit_url}" + "EVENT_LOOP=${EVENT_LOOP}" ], matrix: matrix ] @@ -152,7 +156,14 @@ def getMatrixBuilds(buildContext) { def cythonDesc = cythonFlag == "True" ? ", Cython": "" tasks["${serverVersion}, py${runtimeVersion}${cythonDesc}"] = { node("${OS_VERSION}") { - checkout scm + scm_variables = checkout scm + env.GIT_COMMIT = scm_variables.get('GIT_COMMIT') + env.GIT_URL = scm_variables.get('GIT_URL') + initializeSlackContext() + + if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { + slack.notifyChannel() + } withEnv(taskVars) { buildAndTest(context) @@ -203,6 +214,21 @@ def initializeEnvironment() { . ${CCM_ENVIRONMENT_SHELL} ${CASSANDRA_VERSION} ''' + if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { + env.DSE_FIXED_VERSION = env.CASSANDRA_VERSION.split('-')[1] + sh label: 'Update environment for DataStax Enterprise', script: '''#!/bin/bash -le + cat >> ${HOME}/environment.txt << ENVIRONMENT_EOF +CCM_CASSANDRA_VERSION=${DSE_FIXED_VERSION} # maintain for backwards compatibility +CCM_VERSION=${DSE_FIXED_VERSION} +CCM_SERVER_TYPE=dse +DSE_VERSION=${DSE_FIXED_VERSION} +CCM_IS_DSE=true +CCM_BRANCH=${DSE_FIXED_VERSION} +DSE_BRANCH=${DSE_FIXED_VERSION} +ENVIRONMENT_EOF + ''' + } + sh label: 'Display Python and environment information', script: '''#!/bin/bash -le # Load CCM environment variables set -o allexport @@ -384,6 +410,7 @@ pipeline { // Global pipeline timeout options { + disableConcurrentBuilds() timeout(time: 10, unit: 'HOURS') // TODO timeout should be per build buildDiscarder(logRotator(artifactNumToKeepStr: '10', // Keep only the last 10 artifacts numToKeepStr: '50')) // Keep only the last 50 build records @@ -486,11 +513,11 @@ pipeline { '3.0', // Previous Apache CassandraⓇ '3.11', // Current Apache CassandraⓇ '4.0', // Development Apache CassandraⓇ - 'dse-5.0', // Long Term Support DataStax Enterprise - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7', // Previous DataStax Enterprise - 'dse-6.8', // Current DataStax Enterprise + 'dse-5.0.15', // Long Term Support DataStax Enterprise + 'dse-5.1.35', // Legacy DataStax Enterprise + 'dse-6.0.18', // Previous DataStax Enterprise + 'dse-6.7.17', // Previous DataStax Enterprise + 'dse-6.8.30', // Current DataStax Enterprise ], description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY!
DEFAULT Default to the build context.
SMOKEBasic smoke tests for current Python runtimes + C*/DSE versions, no Cython
FULL All server versions, python runtimes tested with and without Cython.
@@ -525,23 +552,23 @@ pipeline { - + - + - + - + - +
Apache CassandraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)
dse-5.0dse-5.0.15 DataStax Enterprise v5.0.x (Long Term Support)
dse-5.1dse-5.1.35 DataStax Enterprise v5.1.x
dse-6.0dse-6.0.18 DataStax Enterprise v6.0.x
dse-6.7dse-6.7.17 DataStax Enterprise v6.7.x
dse-6.8dse-6.8.30 DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
''') @@ -623,7 +650,7 @@ pipeline { parameterizedCron((scheduleTriggerJobName() == env.JOB_NAME) ? """ # Every weeknight (Monday - Friday) around 4:00 AM # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18 3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18 3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1.35 dse-6.0.18 dse-6.7.17 """ : "") } @@ -635,11 +662,6 @@ pipeline { stages { stage ('Build and Test') { - agent { - // // If I removed this agent block, GIT_URL and GIT_COMMIT aren't set. - // // However, this trigger an additional checkout - label "master" - } when { beforeAgent true allOf { @@ -651,8 +673,7 @@ pipeline { script { context = getBuildContext() withEnv(context.vars) { - describeBuild(context) - slack.notifyChannel() + describeBuild(context) // build and test all builds parallel getMatrixBuilds(context) From 02aa886946d3e308f0e646cba8b61bed7a85ea11 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 28 Feb 2023 14:22:05 -0600 Subject: [PATCH 146/211] Merge pull request #1128 from python-driver/python-1304 Contains fixes for PYTHON-1304 and PYTHON-1287, both of which describe test failures caused by recent changes to driver code --- cassandra/pool.py | 8 ++++++-- tests/integration/standard/test_cluster.py | 15 +++++++++++++-- tests/integration/util.py | 12 +++++++++--- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/cassandra/pool.py b/cassandra/pool.py index c82dfe9a6b..d61e81cd0d 100644 --- a/cassandra/pool.py +++ b/cassandra/pool.py @@ -568,7 +568,9 @@ def get_state(self): connection = self._connection open_count = 1 if connection and not (connection.is_closed or connection.is_defunct) else 0 in_flights = [connection.in_flight] if connection else [] - return {'shutdown': self.is_shutdown, 'open_count': open_count, 'in_flights': in_flights} + orphan_requests = [connection.orphaned_request_ids] if connection else [] + return {'shutdown': self.is_shutdown, 'open_count': open_count, \ + 'in_flights': in_flights, 'orphan_requests': orphan_requests} @property def open_count(self): @@ -926,4 +928,6 @@ def get_connections(self): def get_state(self): in_flights = [c.in_flight for c in self._connections] - return {'shutdown': self.is_shutdown, 'open_count': self.open_count, 'in_flights': in_flights} + orphan_requests = [c.orphaned_request_ids for c in self._connections] + return {'shutdown': self.is_shutdown, 'open_count': self.open_count, \ + 'in_flights': in_flights, 'orphan_requests': orphan_requests} diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index a15c7f32e2..deceed58fd 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -16,7 +16,7 @@ from collections import deque from copy import copy -from mock import Mock, call, patch +from mock import Mock, call, patch, ANY import time from uuid import uuid4 import logging @@ -1478,7 +1478,18 @@ def test_prepare_on_ignored_hosts(self): # the length of mock_calls will vary, but all should use the unignored # address for c in cluster.connection_factory.mock_calls: - self.assertEqual(call(DefaultEndPoint(unignored_address)), c) + # PYTHON-1287 + # + # Cluster._prepare_all_queries() will call connection_factory _without_ the + # on_orphaned_stream_released arg introduced in commit + # 387150acc365b6cf1daaee58c62db13e4929099a. The reconnect handler for the + # downed node _will_ add this arg when it tries to rebuild it's conn pool, and + # whether this occurs while running this test amounts to a race condition. So + # to cover this case we assert one of two call styles here... the key is that + # the _only_ address we should see is the unignored_address. + self.assertTrue( \ + c == call(DefaultEndPoint(unignored_address)) or \ + c == call(DefaultEndPoint(unignored_address), on_orphaned_stream_released=ANY)) cluster.shutdown() diff --git a/tests/integration/util.py b/tests/integration/util.py index 6215449d1f..bcc4cb829b 100644 --- a/tests/integration/util.py +++ b/tests/integration/util.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from itertools import chain + from tests.integration import PROTOCOL_VERSION import time @@ -38,14 +40,18 @@ def assert_quiescent_pool_state(test_case, cluster, wait=None): for state in pool_states: test_case.assertFalse(state['shutdown']) test_case.assertGreater(state['open_count'], 0) - test_case.assertTrue(all((i == 0 for i in state['in_flights']))) + no_in_flight = all((i == 0 for i in state['in_flights'])) + orphans_and_inflights = zip(state['orphan_requests'],state['in_flights']) + all_orphaned = all((len(orphans) == inflight for (orphans,inflight) in orphans_and_inflights)) + test_case.assertTrue(no_in_flight or all_orphaned) for holder in cluster.get_connection_holders(): for connection in holder.get_connections(): # all ids are unique req_ids = connection.request_ids + orphan_ids = connection.orphaned_request_ids test_case.assertEqual(len(req_ids), len(set(req_ids))) - test_case.assertEqual(connection.highest_request_id, len(req_ids) - 1) - test_case.assertEqual(connection.highest_request_id, max(req_ids)) + test_case.assertEqual(connection.highest_request_id, len(req_ids) + len(orphan_ids) - 1) + test_case.assertEqual(connection.highest_request_id, max(chain(req_ids, orphan_ids))) if PROTOCOL_VERSION < 3: test_case.assertEqual(connection.highest_request_id, connection.max_request_id) From b0030194e3e3c5c2a932315b2282dab75ec23f67 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 8 Mar 2023 09:55:53 -0600 Subject: [PATCH 147/211] Merge pull request #1137 from python-driver/python-1329 PYTHON-1329 Change expected port numbers if use_single_interface is used --- tests/integration/__init__.py | 19 +++++++++++++++---- .../standard/test_single_interface.py | 4 ++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index d3c3332649..a344931a4e 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -52,6 +52,14 @@ SINGLE_NODE_CLUSTER_NAME = 'single_node' MULTIDC_CLUSTER_NAME = 'multidc_test_cluster' +# When use_single_interface is specified ccm will assign distinct port numbers to each +# node in the cluster. This value specifies the default port value used for the first +# node that comes up. +# +# TODO: In the future we may want to make this configurable, but this should only apply +# if a non-standard port were specified when starting up the cluster. +DEFAULT_SINGLE_INTERFACE_PORT=9046 + CCM_CLUSTER = None path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'ccm') @@ -593,7 +601,10 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, wait_for_node_socket(node, 300) log.debug("Binary ports are open") if set_keyspace: - setup_keyspace(ipformat=ipformat) + args = {"ipformat": ipformat} + if use_single_interface: + args["port"] = DEFAULT_SINGLE_INTERFACE_PORT + setup_keyspace(**args) except Exception: log.exception("Failed to start CCM cluster; removing cluster.") @@ -692,7 +703,7 @@ def drop_keyspace_shutdown_cluster(keyspace_name, session, cluster): cluster.shutdown() -def setup_keyspace(ipformat=None, wait=True, protocol_version=None): +def setup_keyspace(ipformat=None, wait=True, protocol_version=None, port=9042): # wait for nodes to startup if wait: time.sleep(10) @@ -703,9 +714,9 @@ def setup_keyspace(ipformat=None, wait=True, protocol_version=None): _protocol_version = PROTOCOL_VERSION if not ipformat: - cluster = TestCluster(protocol_version=_protocol_version) + cluster = TestCluster(protocol_version=_protocol_version, port=port) else: - cluster = TestCluster(contact_points=["::1"], protocol_version=_protocol_version) + cluster = TestCluster(contact_points=["::1"], protocol_version=_protocol_version, port=port) session = cluster.connect() try: diff --git a/tests/integration/standard/test_single_interface.py b/tests/integration/standard/test_single_interface.py index 4677eff641..ffd2bbe9c4 100644 --- a/tests/integration/standard/test_single_interface.py +++ b/tests/integration/standard/test_single_interface.py @@ -22,7 +22,7 @@ from packaging.version import Version from tests.integration import use_singledc, PROTOCOL_VERSION, \ remove_cluster, greaterthanorequalcass40, notdse, \ - CASSANDRA_VERSION, DSE_VERSION, TestCluster + CASSANDRA_VERSION, DSE_VERSION, TestCluster, DEFAULT_SINGLE_INTERFACE_PORT def setup_module(): @@ -39,7 +39,7 @@ def teardown_module(): class SingleInterfaceTest(unittest.TestCase): def setUp(self): - self.cluster = TestCluster() + self.cluster = TestCluster(port=DEFAULT_SINGLE_INTERFACE_PORT) self.session = self.cluster.connect() def tearDown(self): From ff704d6a225c10de270c47bf68d1c1559ba7839e Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 9 Mar 2023 15:46:56 -0600 Subject: [PATCH 148/211] Merge pull request #1139 from python-driver/python-1328 PYTHON-1328: Add explicit wait to give cluster time to get initialized --- tests/integration/standard/test_authentication.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/integration/standard/test_authentication.py b/tests/integration/standard/test_authentication.py index 189da45c94..b055bc75ec 100644 --- a/tests/integration/standard/test_authentication.py +++ b/tests/integration/standard/test_authentication.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from packaging.version import Version import logging import time from cassandra.cluster import NoHostAvailable from cassandra.auth import PlainTextAuthProvider, SASLClient, SaslAuthProvider -from tests.integration import use_singledc, get_cluster, remove_cluster, PROTOCOL_VERSION, CASSANDRA_IP, \ - USE_CASS_EXTERNAL, start_cluster_wait_for_up, TestCluster +from tests.integration import use_singledc, get_cluster, remove_cluster, PROTOCOL_VERSION, \ + CASSANDRA_IP, CASSANDRA_VERSION, USE_CASS_EXTERNAL, start_cluster_wait_for_up, TestCluster from tests.integration.util import assert_quiescent_pool_state import unittest @@ -42,12 +43,19 @@ def setup_module(): log.debug("Starting ccm test cluster with %s", config_options) start_cluster_wait_for_up(ccm_cluster) + # PYTHON-1328 + # + # Give the cluster enough time to startup (and perform necessary initialization) + # before executing the test. + if CASSANDRA_VERSION > Version('4.0-a'): + time.sleep(10) def teardown_module(): remove_cluster() # this test messes with config class AuthenticationTests(unittest.TestCase): + """ Tests to cover basic authentication functionality """ @@ -86,6 +94,7 @@ def cluster_as(self, usr, pwd): raise Exception('Unable to connect with creds: {}/{}'.format(usr, pwd)) def test_auth_connect(self): + user = 'u' passwd = 'password' From ffe0097505b96b61c5fd2ec626aa934caf397873 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 9 Mar 2023 15:48:35 -0600 Subject: [PATCH 149/211] Merge pull request #1140 from python-driver/python-1327 PYTHON-1327: Add annotation to note server-side fix for certain C* versions --- tests/integration/standard/test_prepared_statements.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integration/standard/test_prepared_statements.py b/tests/integration/standard/test_prepared_statements.py index 1ed48d2964..a643b19c07 100644 --- a/tests/integration/standard/test_prepared_statements.py +++ b/tests/integration/standard/test_prepared_statements.py @@ -13,9 +13,12 @@ # limitations under the License. -from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster +from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster, CASSANDRA_VERSION import unittest + +from packaging.version import Version + from cassandra import InvalidRequest, DriverException from cassandra import ConsistencyLevel, ProtocolVersion @@ -392,6 +395,9 @@ def test_raise_error_on_prepared_statement_execution_dropped_table(self): with self.assertRaises(InvalidRequest): self.session.execute(prepared, [0]) + @unittest.skipIf((CASSANDRA_VERSION >= Version('3.11.12') and CASSANDRA_VERSION < Version('4.0')) or \ + CASSANDRA_VERSION >= Version('4.0.2'), + "Fixed server-side in Cassandra 3.11.12, 4.0.2") def test_fail_if_different_query_id_on_reprepare(self): """ PYTHON-1124 and CASSANDRA-15252 """ keyspace = "test_fail_if_different_query_id_on_reprepare" From 4da7001b38e65e8d578d0b71b37ef7be3a618c2e Mon Sep 17 00:00:00 2001 From: Karthikeyan Singaravelan Date: Fri, 10 Mar 2023 03:39:01 +0530 Subject: [PATCH 150/211] Refactor deprecated unittest aliases for Python 3.11 compatibility. (#1112) --- .../cqlengine/management/test_compaction_settings.py | 6 +++--- .../integration/cqlengine/management/test_management.py | 3 ++- .../cqlengine/model/test_class_construction.py | 3 ++- tests/integration/cqlengine/test_batch_query.py | 3 ++- tests/integration/long/test_ipv6.py | 5 +++-- tests/integration/simulacron/test_connection.py | 3 ++- tests/integration/standard/test_authentication.py | 9 +++++---- tests/integration/standard/test_client_warnings.py | 9 +++++---- tests/integration/standard/test_cluster.py | 7 ++++--- tests/integration/standard/test_metadata.py | 6 +++--- tests/integration/standard/test_single_interface.py | 2 +- tests/integration/standard/test_types.py | 6 +++--- tests/unit/advanced/test_graph.py | 2 +- tests/unit/cqlengine/test_connection.py | 6 ++++-- tests/unit/test_connection.py | 2 +- tests/unit/test_control_connection.py | 4 ++-- tests/unit/test_policies.py | 2 +- tests/unit/test_protocol.py | 3 ++- tests/unit/test_response_future.py | 4 +++- tests/unit/test_timestamps.py | 3 ++- 20 files changed, 51 insertions(+), 37 deletions(-) diff --git a/tests/integration/cqlengine/management/test_compaction_settings.py b/tests/integration/cqlengine/management/test_compaction_settings.py index d5dea12744..673bda29a7 100644 --- a/tests/integration/cqlengine/management/test_compaction_settings.py +++ b/tests/integration/cqlengine/management/test_compaction_settings.py @@ -83,7 +83,7 @@ def test_alter_actually_alters(self): table_meta = _get_table_metadata(tmp) - self.assertRegexpMatches(table_meta.export_as_string(), '.*SizeTieredCompactionStrategy.*') + six.assertRegex(self, table_meta.export_as_string(), '.*SizeTieredCompactionStrategy.*') def test_alter_options(self): @@ -97,11 +97,11 @@ class AlterTable(Model): drop_table(AlterTable) sync_table(AlterTable) table_meta = _get_table_metadata(AlterTable) - self.assertRegexpMatches(table_meta.export_as_string(), ".*'sstable_size_in_mb': '64'.*") + six.assertRegex(self, table_meta.export_as_string(), ".*'sstable_size_in_mb': '64'.*") AlterTable.__options__['compaction']['sstable_size_in_mb'] = '128' sync_table(AlterTable) table_meta = _get_table_metadata(AlterTable) - self.assertRegexpMatches(table_meta.export_as_string(), ".*'sstable_size_in_mb': '128'.*") + six.assertRegex(self, table_meta.export_as_string(), ".*'sstable_size_in_mb': '128'.*") class OptionsTest(BaseCassEngTestCase): diff --git a/tests/integration/cqlengine/management/test_management.py b/tests/integration/cqlengine/management/test_management.py index f37db5e51f..67f87b10e4 100644 --- a/tests/integration/cqlengine/management/test_management.py +++ b/tests/integration/cqlengine/management/test_management.py @@ -13,6 +13,7 @@ # limitations under the License. import unittest +import six import mock import logging from packaging.version import Version @@ -261,7 +262,7 @@ def test_bogus_option_update(self): option = 'no way will this ever be an option' try: ModelWithTableProperties.__options__[option] = 'what was I thinking?' - self.assertRaisesRegexp(KeyError, "Invalid table option.*%s.*" % option, sync_table, ModelWithTableProperties) + six.assertRaisesRegex(self, KeyError, "Invalid table option.*%s.*" % option, sync_table, ModelWithTableProperties) finally: ModelWithTableProperties.__options__.pop(option, None) diff --git a/tests/integration/cqlengine/model/test_class_construction.py b/tests/integration/cqlengine/model/test_class_construction.py index 9c5afecbfc..95ba1f49bd 100644 --- a/tests/integration/cqlengine/model/test_class_construction.py +++ b/tests/integration/cqlengine/model/test_class_construction.py @@ -15,6 +15,7 @@ from uuid import uuid4 import warnings +import six from cassandra.cqlengine import columns, CQLEngineException from cassandra.cqlengine.models import Model, ModelException, ModelDefinitionException, ColumnQueryEvaluator from cassandra.cqlengine.query import ModelQuerySet, DMLQuery @@ -91,7 +92,7 @@ def test_attempting_to_make_duplicate_column_names_fails(self): Tests that trying to create conflicting db column names will fail """ - with self.assertRaisesRegexp(ModelException, r".*more than once$"): + with six.assertRaisesRegex(self, ModelException, r".*more than once$"): class BadNames(Model): words = columns.Text(primary_key=True) content = columns.Text(db_field='words') diff --git a/tests/integration/cqlengine/test_batch_query.py b/tests/integration/cqlengine/test_batch_query.py index 7b78fa9979..07ee2e13bf 100644 --- a/tests/integration/cqlengine/test_batch_query.py +++ b/tests/integration/cqlengine/test_batch_query.py @@ -13,6 +13,7 @@ # limitations under the License. import warnings +import six import sure from cassandra.cqlengine import columns @@ -223,7 +224,7 @@ def my_callback(*args, **kwargs): batch.execute() batch.execute() self.assertEqual(len(w), 2) # package filter setup to warn always - self.assertRegexpMatches(str(w[0].message), r"^Batch.*multiple.*") + six.assertRegex(self, str(w[0].message), r"^Batch.*multiple.*") def test_disable_multiple_callback_warning(self): """ diff --git a/tests/integration/long/test_ipv6.py b/tests/integration/long/test_ipv6.py index b63fdebcf3..6c7d447dfb 100644 --- a/tests/integration/long/test_ipv6.py +++ b/tests/integration/long/test_ipv6.py @@ -13,6 +13,7 @@ # limitations under the License. import os, socket, errno +import six from ccmlib import common from cassandra.cluster import NoHostAvailable @@ -82,7 +83,7 @@ def test_connect(self): def test_error(self): cluster = TestCluster(connection_class=self.connection_class, contact_points=['::1'], port=9043, connect_timeout=10) - self.assertRaisesRegexp(NoHostAvailable, '\(\'Unable to connect.*%s.*::1\', 9043.*Connection refused.*' + six.assertRaisesRegex(self, NoHostAvailable, '\(\'Unable to connect.*%s.*::1\', 9043.*Connection refused.*' % errno.ECONNREFUSED, cluster.connect) def test_error_multiple(self): @@ -90,7 +91,7 @@ def test_error_multiple(self): raise unittest.SkipTest('localhost only resolves one address') cluster = TestCluster(connection_class=self.connection_class, contact_points=['localhost'], port=9043, connect_timeout=10) - self.assertRaisesRegexp(NoHostAvailable, '\(\'Unable to connect.*Tried connecting to \[\(.*\(.*\].*Last error', + six.assertRaisesRegex(self, NoHostAvailable, '\(\'Unable to connect.*Tried connecting to \[\(.*\(.*\].*Last error', cluster.connect) diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index 0c70d0a1e9..e34e69f458 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -14,6 +14,7 @@ import unittest import logging +import six import time from mock import Mock, patch @@ -262,7 +263,7 @@ def connection_factory(self, *args, **kwargs): prime_request(PrimeOptions(then={"result": "no_result", "delay_in_ms": never})) prime_request(RejectConnections("unbind")) - self.assertRaisesRegexp(OperationTimedOut, "Connection defunct by heartbeat", future.result) + six.assertRaisesRegex(self, OperationTimedOut, "Connection defunct by heartbeat", future.result) def test_close_when_query(self): """ diff --git a/tests/integration/standard/test_authentication.py b/tests/integration/standard/test_authentication.py index b055bc75ec..c23c9eedf2 100644 --- a/tests/integration/standard/test_authentication.py +++ b/tests/integration/standard/test_authentication.py @@ -16,6 +16,7 @@ import logging import time +import six from cassandra.cluster import NoHostAvailable from cassandra.auth import PlainTextAuthProvider, SASLClient, SaslAuthProvider @@ -121,7 +122,7 @@ def test_auth_connect(self): def test_connect_wrong_pwd(self): cluster = self.cluster_as('cassandra', 'wrong_pass') try: - self.assertRaisesRegexp(NoHostAvailable, + six.assertRaisesRegex(self, NoHostAvailable, '.*AuthenticationFailed.', cluster.connect) assert_quiescent_pool_state(self, cluster) @@ -131,7 +132,7 @@ def test_connect_wrong_pwd(self): def test_connect_wrong_username(self): cluster = self.cluster_as('wrong_user', 'cassandra') try: - self.assertRaisesRegexp(NoHostAvailable, + six.assertRaisesRegex(self, NoHostAvailable, '.*AuthenticationFailed.*', cluster.connect) assert_quiescent_pool_state(self, cluster) @@ -141,7 +142,7 @@ def test_connect_wrong_username(self): def test_connect_empty_pwd(self): cluster = self.cluster_as('Cassandra', '') try: - self.assertRaisesRegexp(NoHostAvailable, + six.assertRaisesRegex(self, NoHostAvailable, '.*AuthenticationFailed.*', cluster.connect) assert_quiescent_pool_state(self, cluster) @@ -151,7 +152,7 @@ def test_connect_empty_pwd(self): def test_connect_no_auth_provider(self): cluster = TestCluster() try: - self.assertRaisesRegexp(NoHostAvailable, + six.assertRaisesRegex(self, NoHostAvailable, '.*AuthenticationFailed.*', cluster.connect) assert_quiescent_pool_state(self, cluster) diff --git a/tests/integration/standard/test_client_warnings.py b/tests/integration/standard/test_client_warnings.py index 5f63b5265a..166f172a16 100644 --- a/tests/integration/standard/test_client_warnings.py +++ b/tests/integration/standard/test_client_warnings.py @@ -15,6 +15,7 @@ import unittest +import six from cassandra.query import BatchStatement from tests.integration import use_singledc, PROTOCOL_VERSION, local, TestCluster @@ -70,7 +71,7 @@ def test_warning_basic(self): future = self.session.execute_async(self.warn_batch) future.result() self.assertEqual(len(future.warnings), 1) - self.assertRegexpMatches(future.warnings[0], 'Batch.*exceeding.*') + six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') def test_warning_with_trace(self): """ @@ -86,7 +87,7 @@ def test_warning_with_trace(self): future = self.session.execute_async(self.warn_batch, trace=True) future.result() self.assertEqual(len(future.warnings), 1) - self.assertRegexpMatches(future.warnings[0], 'Batch.*exceeding.*') + six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') self.assertIsNotNone(future.get_query_trace()) @local @@ -105,7 +106,7 @@ def test_warning_with_custom_payload(self): future = self.session.execute_async(self.warn_batch, custom_payload=payload) future.result() self.assertEqual(len(future.warnings), 1) - self.assertRegexpMatches(future.warnings[0], 'Batch.*exceeding.*') + six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') self.assertDictEqual(future.custom_payload, payload) @local @@ -124,6 +125,6 @@ def test_warning_with_trace_and_custom_payload(self): future = self.session.execute_async(self.warn_batch, trace=True, custom_payload=payload) future.result() self.assertEqual(len(future.warnings), 1) - self.assertRegexpMatches(future.warnings[0], 'Batch.*exceeding.*') + six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') self.assertIsNotNone(future.get_query_trace()) self.assertDictEqual(future.custom_payload, payload) diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index deceed58fd..c5f64f6c28 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -23,6 +23,7 @@ import warnings from packaging.version import Version +import six import cassandra from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT, ControlConnection, Cluster from cassandra.concurrent import execute_concurrent @@ -147,7 +148,7 @@ def test_raise_error_on_control_connection_timeout(self): get_node(1).pause() cluster = TestCluster(contact_points=['127.0.0.1'], connect_timeout=1) - with self.assertRaisesRegexp(NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"): + with six.assertRaisesRegex(self, NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"): cluster.connect() cluster.shutdown() @@ -535,7 +536,7 @@ def patched_wait_for_responses(*args, **kwargs): # cluster agreement wait used for refresh original_meta = c.metadata.keyspaces start_time = time.time() - self.assertRaisesRegexp(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata) + six.assertRaisesRegex(self, Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata) end_time = time.time() self.assertGreaterEqual(end_time - start_time, agreement_timeout) self.assertIs(original_meta, c.metadata.keyspaces) @@ -572,7 +573,7 @@ def patched_wait_for_responses(*args, **kwargs): # refresh wait overrides cluster value original_meta = c.metadata.keyspaces start_time = time.time() - self.assertRaisesRegexp(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata, + six.assertRaisesRegex(self, Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata, max_schema_agreement_wait=agreement_timeout) end_time = time.time() self.assertGreaterEqual(end_time - start_time, agreement_timeout) diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index e20f1f0640..6f76c2a9b0 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -1590,7 +1590,7 @@ def test_function_no_parameters(self): with self.VerifiedFunction(self, **kwargs) as vf: fn_meta = self.keyspace_function_meta[vf.signature] - self.assertRegexpMatches(fn_meta.as_cql_query(), "CREATE FUNCTION.*%s\(\) .*" % kwargs['name']) + six.assertRegex(self, fn_meta.as_cql_query(), "CREATE FUNCTION.*%s\(\) .*" % kwargs['name']) def test_functions_follow_keyspace_alter(self): """ @@ -1638,12 +1638,12 @@ def test_function_cql_called_on_null(self): kwargs['called_on_null_input'] = True with self.VerifiedFunction(self, **kwargs) as vf: fn_meta = self.keyspace_function_meta[vf.signature] - self.assertRegexpMatches(fn_meta.as_cql_query(), "CREATE FUNCTION.*\) CALLED ON NULL INPUT RETURNS .*") + six.assertRegex(self, fn_meta.as_cql_query(), "CREATE FUNCTION.*\) CALLED ON NULL INPUT RETURNS .*") kwargs['called_on_null_input'] = False with self.VerifiedFunction(self, **kwargs) as vf: fn_meta = self.keyspace_function_meta[vf.signature] - self.assertRegexpMatches(fn_meta.as_cql_query(), "CREATE FUNCTION.*\) RETURNS NULL ON NULL INPUT RETURNS .*") + six.assertRegex(self, fn_meta.as_cql_query(), "CREATE FUNCTION.*\) RETURNS NULL ON NULL INPUT RETURNS .*") class AggregateMetadata(FunctionTest): diff --git a/tests/integration/standard/test_single_interface.py b/tests/integration/standard/test_single_interface.py index ffd2bbe9c4..8d407be958 100644 --- a/tests/integration/standard/test_single_interface.py +++ b/tests/integration/standard/test_single_interface.py @@ -71,4 +71,4 @@ def test_single_interface(self): consistency_level=ConsistencyLevel.ALL)) for pool in self.session.get_pools(): - self.assertEquals(1, pool.get_state()['open_count']) + self.assertEqual(1, pool.get_state()['open_count']) diff --git a/tests/integration/standard/test_types.py b/tests/integration/standard/test_types.py index f69e88c64f..828f10b5e2 100644 --- a/tests/integration/standard/test_types.py +++ b/tests/integration/standard/test_types.py @@ -69,7 +69,7 @@ def test_can_insert_blob_type_as_string(self): msg = r'.*Invalid STRING constant \(.*?\) for "b" of type blob.*' else: msg = r'.*Invalid STRING constant \(.*?\) for b of type blob.*' - self.assertRaisesRegexp(InvalidRequest, msg, s.execute, query, params) + six.assertRaisesRegex(self, InvalidRequest, msg, s.execute, query, params) return # In python2, with Cassandra < 2.0, we can manually encode the 'byte str' type as hex for insertion in a blob. @@ -1060,7 +1060,7 @@ def _daterange_round_trip(self, to_insert, expected=None): results = self.session.execute(prep_sel) dr = results[0].dr - # sometimes this is truncated in the assertEquals output on failure; + # sometimes this is truncated in the assertEqual output on failure; if isinstance(expected, six.string_types): self.assertEqual(str(dr), expected) else: @@ -1114,7 +1114,7 @@ def _daterange_round_trip(self, to_insert, expected=None): results= self.session.execute("SELECT * FROM tab WHERE dr = '{0}' ".format(to_insert)) dr = results[0].dr - # sometimes this is truncated in the assertEquals output on failure; + # sometimes this is truncated in the assertEqual output on failure; if isinstance(expected, six.string_types): self.assertEqual(str(dr), expected) else: diff --git a/tests/unit/advanced/test_graph.py b/tests/unit/advanced/test_graph.py index 25dd289dba..77a920a3bf 100644 --- a/tests/unit/advanced/test_graph.py +++ b/tests/unit/advanced/test_graph.py @@ -259,7 +259,7 @@ def test_init_unknown_kwargs(self): with warnings.catch_warnings(record=True) as w: GraphOptions(unknown_param=42) self.assertEqual(len(w), 1) - self.assertRegexpMatches(str(w[0].message), r"^Unknown keyword.*GraphOptions.*") + six.assertRegex(self, str(w[0].message), r"^Unknown keyword.*GraphOptions.*") def test_update(self): opts = GraphOptions(**self.api_params) diff --git a/tests/unit/cqlengine/test_connection.py b/tests/unit/cqlengine/test_connection.py index 8e3a0b75bd..9c3454796a 100644 --- a/tests/unit/cqlengine/test_connection.py +++ b/tests/unit/cqlengine/test_connection.py @@ -14,6 +14,8 @@ import unittest +import six + from cassandra.cluster import _ConfigMode from cassandra.cqlengine import connection from cassandra.query import dict_factory @@ -50,12 +52,12 @@ def test_get_session_fails_without_existing_connection(self): """ Users can't get the default session without having a default connection set. """ - with self.assertRaisesRegexp(connection.CQLEngineException, self.no_registered_connection_msg): + with six.assertRaisesRegex(self, connection.CQLEngineException, self.no_registered_connection_msg): connection.get_session(connection=None) def test_get_cluster_fails_without_existing_connection(self): """ Users can't get the default cluster without having a default connection set. """ - with self.assertRaisesRegexp(connection.CQLEngineException, self.no_registered_connection_msg): + with six.assertRaisesRegex(self, connection.CQLEngineException, self.no_registered_connection_msg): connection.get_cluster(connection=None) diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index f06b67ebe0..97faa5e7fc 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -392,7 +392,7 @@ def send_msg(msg, req_id, msg_callback): connection.defunct.assert_has_calls([call(ANY)] * get_holders.call_count) exc = connection.defunct.call_args_list[0][0][0] self.assertIsInstance(exc, ConnectionException) - self.assertRegexpMatches(exc.args[0], r'^Received unexpected response to OptionsMessage.*') + six.assertRegex(self, exc.args[0], r'^Received unexpected response to OptionsMessage.*') holder.return_connection.assert_has_calls( [call(connection)] * get_holders.call_count) diff --git a/tests/unit/test_control_connection.py b/tests/unit/test_control_connection.py index 276b2849ca..53a5d6affc 100644 --- a/tests/unit/test_control_connection.py +++ b/tests/unit/test_control_connection.py @@ -526,7 +526,7 @@ def test_refresh_nodes_and_tokens_add_host_detects_port(self): self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_address, "192.168.1.3") self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_port, 555) self.assertEqual(self.cluster.added_hosts[0].broadcast_address, "10.0.0.3") - self.assertEquals(self.cluster.added_hosts[0].broadcast_port, 666) + self.assertEqual(self.cluster.added_hosts[0].broadcast_port, 666) self.assertEqual(self.cluster.added_hosts[0].datacenter, "dc1") self.assertEqual(self.cluster.added_hosts[0].rack, "rack1") @@ -546,7 +546,7 @@ def test_refresh_nodes_and_tokens_add_host_detects_invalid_port(self): self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_address, "192.168.1.3") self.assertEqual(self.cluster.added_hosts[0].broadcast_rpc_port, None) self.assertEqual(self.cluster.added_hosts[0].broadcast_address, "10.0.0.3") - self.assertEquals(self.cluster.added_hosts[0].broadcast_port, None) + self.assertEqual(self.cluster.added_hosts[0].broadcast_port, None) self.assertEqual(self.cluster.added_hosts[0].datacenter, "dc1") self.assertEqual(self.cluster.added_hosts[0].rack, "rack1") diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index 88db23daba..edafb7cb01 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -1301,7 +1301,7 @@ def test_immutable_predicate(self): expected_message_regex = "can't set attribute" hfp = HostFilterPolicy(child_policy=Mock(name='child_policy'), predicate=Mock(name='predicate')) - with self.assertRaisesRegexp(AttributeError, expected_message_regex): + with six.assertRaisesRegex(self, AttributeError, expected_message_regex): hfp.predicate = object() diff --git a/tests/unit/test_protocol.py b/tests/unit/test_protocol.py index 95a7a12b11..3d6828bdc5 100644 --- a/tests/unit/test_protocol.py +++ b/tests/unit/test_protocol.py @@ -14,6 +14,7 @@ import unittest +import six from mock import Mock from cassandra import ProtocolVersion, UnsupportedOperation @@ -172,7 +173,7 @@ def test_keyspace_flag_raises_before_v5(self): keyspace_message = QueryMessage('a', consistency_level=3, keyspace='ks') io = Mock(name='io') - with self.assertRaisesRegexp(UnsupportedOperation, 'Keyspaces.*set'): + with six.assertRaisesRegex(self, UnsupportedOperation, 'Keyspaces.*set'): keyspace_message.send_body(io, protocol_version=4) io.assert_not_called() diff --git a/tests/unit/test_response_future.py b/tests/unit/test_response_future.py index dbd8764ad9..a9c05976e0 100644 --- a/tests/unit/test_response_future.py +++ b/tests/unit/test_response_future.py @@ -16,6 +16,8 @@ from collections import deque from threading import RLock + +import six from mock import Mock, MagicMock, ANY from cassandra import ConsistencyLevel, Unavailable, SchemaTargetType, SchemaChangeType, OperationTimedOut @@ -158,7 +160,7 @@ def test_heartbeat_defunct_deadlock(self): # Simulate ResponseFuture timing out rf._on_timeout() - self.assertRaisesRegexp(OperationTimedOut, "Connection defunct by heartbeat", rf.result) + six.assertRaisesRegex(self, OperationTimedOut, "Connection defunct by heartbeat", rf.result) def test_read_timeout_error_message(self): session = self.make_session() diff --git a/tests/unit/test_timestamps.py b/tests/unit/test_timestamps.py index 58958cff03..fc1be071ad 100644 --- a/tests/unit/test_timestamps.py +++ b/tests/unit/test_timestamps.py @@ -15,6 +15,7 @@ import unittest import mock +import six from cassandra import timestamps from threading import Thread, Lock @@ -105,7 +106,7 @@ def assertLastCallArgRegex(self, call, pattern): last_warn_args, last_warn_kwargs = call self.assertEqual(len(last_warn_args), 1) self.assertEqual(len(last_warn_kwargs), 0) - self.assertRegexpMatches( + six.assertRegex(self, last_warn_args[0], pattern, ) From 6111c0c2d850433552ba87aa61cdda3c88d139eb Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 9 Mar 2023 16:24:58 -0600 Subject: [PATCH 151/211] Remove references to unsupported Python versions from setup.py --- setup.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/setup.py b/setup.py index aaaa1b4d2d..056469aca6 100644 --- a/setup.py +++ b/setup.py @@ -37,8 +37,6 @@ DistutilsExecError) from distutils.cmd import Command -PY3 = sys.version_info[0] == 3 - try: import subprocess has_subprocess = True @@ -406,9 +404,6 @@ def run_setup(extensions): dependencies = ['six >=1.9', 'geomet>=0.1,<0.3'] - if not PY3: - dependencies.append('futures') - _EXTRAS_REQUIRE = { 'graph': ['gremlinpython==3.4.6'] } @@ -442,9 +437,6 @@ def run_setup(extensions): 'Natural Language :: English', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation :: CPython', From 922d7ad565b4d3b3e0aafd6898c2639d968c6534 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 9 Mar 2023 17:09:13 -0600 Subject: [PATCH 152/211] Minor refactor of prior commit: now that we're dropping 2.7.x support we don't really need to leverage six for unit test functions. --- tests/integration/advanced/graph/test_graph.py | 2 +- .../cqlengine/management/test_compaction_settings.py | 6 +++--- tests/integration/cqlengine/management/test_management.py | 2 +- .../cqlengine/model/test_class_construction.py | 2 +- tests/integration/cqlengine/test_batch_query.py | 2 +- tests/integration/long/test_ipv6.py | 4 ++-- tests/integration/simulacron/test_connection.py | 2 +- tests/integration/standard/test_authentication.py | 8 ++++---- tests/integration/standard/test_client_warnings.py | 8 ++++---- tests/integration/standard/test_cluster.py | 6 +++--- tests/integration/standard/test_metadata.py | 6 +++--- tests/integration/standard/test_types.py | 2 +- tests/unit/advanced/test_graph.py | 2 +- tests/unit/cqlengine/test_connection.py | 4 ++-- tests/unit/test_connection.py | 2 +- tests/unit/test_policies.py | 2 +- tests/unit/test_protocol.py | 2 +- tests/unit/test_response_future.py | 2 +- 18 files changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/integration/advanced/graph/test_graph.py b/tests/integration/advanced/graph/test_graph.py index a0b6534c34..277283ea5a 100644 --- a/tests/integration/advanced/graph/test_graph.py +++ b/tests/integration/advanced/graph/test_graph.py @@ -266,6 +266,6 @@ def test_graph_protocol_default_for_core_fallback_to_graphson1_if_no_graph_name( self.assertEqual(ep.row_factory, graph_object_row_factory) regex = re.compile(".*Variable.*is unknown.*", re.S) - with six.assertRaisesRegex(self, SyntaxException, regex): + with self.assertRaisesRegex(SyntaxException, regex): self.execute_graph_queries(CoreGraphSchema.fixtures.classic(), execution_profile=ep, verify_graphson=GraphProtocol.GRAPHSON_1_0) diff --git a/tests/integration/cqlengine/management/test_compaction_settings.py b/tests/integration/cqlengine/management/test_compaction_settings.py index 673bda29a7..604e225586 100644 --- a/tests/integration/cqlengine/management/test_compaction_settings.py +++ b/tests/integration/cqlengine/management/test_compaction_settings.py @@ -83,7 +83,7 @@ def test_alter_actually_alters(self): table_meta = _get_table_metadata(tmp) - six.assertRegex(self, table_meta.export_as_string(), '.*SizeTieredCompactionStrategy.*') + self.assertRegex(table_meta.export_as_string(), '.*SizeTieredCompactionStrategy.*') def test_alter_options(self): @@ -97,11 +97,11 @@ class AlterTable(Model): drop_table(AlterTable) sync_table(AlterTable) table_meta = _get_table_metadata(AlterTable) - six.assertRegex(self, table_meta.export_as_string(), ".*'sstable_size_in_mb': '64'.*") + self.assertRegex(table_meta.export_as_string(), ".*'sstable_size_in_mb': '64'.*") AlterTable.__options__['compaction']['sstable_size_in_mb'] = '128' sync_table(AlterTable) table_meta = _get_table_metadata(AlterTable) - six.assertRegex(self, table_meta.export_as_string(), ".*'sstable_size_in_mb': '128'.*") + self.assertRegex(table_meta.export_as_string(), ".*'sstable_size_in_mb': '128'.*") class OptionsTest(BaseCassEngTestCase): diff --git a/tests/integration/cqlengine/management/test_management.py b/tests/integration/cqlengine/management/test_management.py index 67f87b10e4..2fd35b865e 100644 --- a/tests/integration/cqlengine/management/test_management.py +++ b/tests/integration/cqlengine/management/test_management.py @@ -262,7 +262,7 @@ def test_bogus_option_update(self): option = 'no way will this ever be an option' try: ModelWithTableProperties.__options__[option] = 'what was I thinking?' - six.assertRaisesRegex(self, KeyError, "Invalid table option.*%s.*" % option, sync_table, ModelWithTableProperties) + self.assertRaisesRegex(KeyError, "Invalid table option.*%s.*" % option, sync_table, ModelWithTableProperties) finally: ModelWithTableProperties.__options__.pop(option, None) diff --git a/tests/integration/cqlengine/model/test_class_construction.py b/tests/integration/cqlengine/model/test_class_construction.py index 95ba1f49bd..f764e78e5c 100644 --- a/tests/integration/cqlengine/model/test_class_construction.py +++ b/tests/integration/cqlengine/model/test_class_construction.py @@ -92,7 +92,7 @@ def test_attempting_to_make_duplicate_column_names_fails(self): Tests that trying to create conflicting db column names will fail """ - with six.assertRaisesRegex(self, ModelException, r".*more than once$"): + with self.assertRaisesRegex(ModelException, r".*more than once$"): class BadNames(Model): words = columns.Text(primary_key=True) content = columns.Text(db_field='words') diff --git a/tests/integration/cqlengine/test_batch_query.py b/tests/integration/cqlengine/test_batch_query.py index 07ee2e13bf..94496727a7 100644 --- a/tests/integration/cqlengine/test_batch_query.py +++ b/tests/integration/cqlengine/test_batch_query.py @@ -224,7 +224,7 @@ def my_callback(*args, **kwargs): batch.execute() batch.execute() self.assertEqual(len(w), 2) # package filter setup to warn always - six.assertRegex(self, str(w[0].message), r"^Batch.*multiple.*") + self.assertRegex(str(w[0].message), r"^Batch.*multiple.*") def test_disable_multiple_callback_warning(self): """ diff --git a/tests/integration/long/test_ipv6.py b/tests/integration/long/test_ipv6.py index 6c7d447dfb..3e2f2ffc5e 100644 --- a/tests/integration/long/test_ipv6.py +++ b/tests/integration/long/test_ipv6.py @@ -83,7 +83,7 @@ def test_connect(self): def test_error(self): cluster = TestCluster(connection_class=self.connection_class, contact_points=['::1'], port=9043, connect_timeout=10) - six.assertRaisesRegex(self, NoHostAvailable, '\(\'Unable to connect.*%s.*::1\', 9043.*Connection refused.*' + self.assertRaisesRegex(NoHostAvailable, '\(\'Unable to connect.*%s.*::1\', 9043.*Connection refused.*' % errno.ECONNREFUSED, cluster.connect) def test_error_multiple(self): @@ -91,7 +91,7 @@ def test_error_multiple(self): raise unittest.SkipTest('localhost only resolves one address') cluster = TestCluster(connection_class=self.connection_class, contact_points=['localhost'], port=9043, connect_timeout=10) - six.assertRaisesRegex(self, NoHostAvailable, '\(\'Unable to connect.*Tried connecting to \[\(.*\(.*\].*Last error', + self.assertRaisesRegex(NoHostAvailable, '\(\'Unable to connect.*Tried connecting to \[\(.*\(.*\].*Last error', cluster.connect) diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index e34e69f458..1def601d2e 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -263,7 +263,7 @@ def connection_factory(self, *args, **kwargs): prime_request(PrimeOptions(then={"result": "no_result", "delay_in_ms": never})) prime_request(RejectConnections("unbind")) - six.assertRaisesRegex(self, OperationTimedOut, "Connection defunct by heartbeat", future.result) + self.assertRaisesRegex(OperationTimedOut, "Connection defunct by heartbeat", future.result) def test_close_when_query(self): """ diff --git a/tests/integration/standard/test_authentication.py b/tests/integration/standard/test_authentication.py index c23c9eedf2..2f8ffbb068 100644 --- a/tests/integration/standard/test_authentication.py +++ b/tests/integration/standard/test_authentication.py @@ -122,7 +122,7 @@ def test_auth_connect(self): def test_connect_wrong_pwd(self): cluster = self.cluster_as('cassandra', 'wrong_pass') try: - six.assertRaisesRegex(self, NoHostAvailable, + self.assertRaisesRegex(NoHostAvailable, '.*AuthenticationFailed.', cluster.connect) assert_quiescent_pool_state(self, cluster) @@ -132,7 +132,7 @@ def test_connect_wrong_pwd(self): def test_connect_wrong_username(self): cluster = self.cluster_as('wrong_user', 'cassandra') try: - six.assertRaisesRegex(self, NoHostAvailable, + self.assertRaisesRegex(NoHostAvailable, '.*AuthenticationFailed.*', cluster.connect) assert_quiescent_pool_state(self, cluster) @@ -142,7 +142,7 @@ def test_connect_wrong_username(self): def test_connect_empty_pwd(self): cluster = self.cluster_as('Cassandra', '') try: - six.assertRaisesRegex(self, NoHostAvailable, + self.assertRaisesRegex(NoHostAvailable, '.*AuthenticationFailed.*', cluster.connect) assert_quiescent_pool_state(self, cluster) @@ -152,7 +152,7 @@ def test_connect_empty_pwd(self): def test_connect_no_auth_provider(self): cluster = TestCluster() try: - six.assertRaisesRegex(self, NoHostAvailable, + self.assertRaisesRegex(NoHostAvailable, '.*AuthenticationFailed.*', cluster.connect) assert_quiescent_pool_state(self, cluster) diff --git a/tests/integration/standard/test_client_warnings.py b/tests/integration/standard/test_client_warnings.py index 166f172a16..37003d5213 100644 --- a/tests/integration/standard/test_client_warnings.py +++ b/tests/integration/standard/test_client_warnings.py @@ -71,7 +71,7 @@ def test_warning_basic(self): future = self.session.execute_async(self.warn_batch) future.result() self.assertEqual(len(future.warnings), 1) - six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') + self.assertRegex(future.warnings[0], 'Batch.*exceeding.*') def test_warning_with_trace(self): """ @@ -87,7 +87,7 @@ def test_warning_with_trace(self): future = self.session.execute_async(self.warn_batch, trace=True) future.result() self.assertEqual(len(future.warnings), 1) - six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') + self.assertRegex(future.warnings[0], 'Batch.*exceeding.*') self.assertIsNotNone(future.get_query_trace()) @local @@ -106,7 +106,7 @@ def test_warning_with_custom_payload(self): future = self.session.execute_async(self.warn_batch, custom_payload=payload) future.result() self.assertEqual(len(future.warnings), 1) - six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') + self.assertRegex(future.warnings[0], 'Batch.*exceeding.*') self.assertDictEqual(future.custom_payload, payload) @local @@ -125,6 +125,6 @@ def test_warning_with_trace_and_custom_payload(self): future = self.session.execute_async(self.warn_batch, trace=True, custom_payload=payload) future.result() self.assertEqual(len(future.warnings), 1) - six.assertRegex(self, future.warnings[0], 'Batch.*exceeding.*') + self.assertRegex(future.warnings[0], 'Batch.*exceeding.*') self.assertIsNotNone(future.get_query_trace()) self.assertDictEqual(future.custom_payload, payload) diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index c5f64f6c28..ae6e3e5a4e 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -148,7 +148,7 @@ def test_raise_error_on_control_connection_timeout(self): get_node(1).pause() cluster = TestCluster(contact_points=['127.0.0.1'], connect_timeout=1) - with six.assertRaisesRegex(self, NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"): + with self.assertRaisesRegex(NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"): cluster.connect() cluster.shutdown() @@ -536,7 +536,7 @@ def patched_wait_for_responses(*args, **kwargs): # cluster agreement wait used for refresh original_meta = c.metadata.keyspaces start_time = time.time() - six.assertRaisesRegex(self, Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata) + self.assertRaisesRegex(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata) end_time = time.time() self.assertGreaterEqual(end_time - start_time, agreement_timeout) self.assertIs(original_meta, c.metadata.keyspaces) @@ -573,7 +573,7 @@ def patched_wait_for_responses(*args, **kwargs): # refresh wait overrides cluster value original_meta = c.metadata.keyspaces start_time = time.time() - six.assertRaisesRegex(self, Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata, + self.assertRaisesRegex(Exception, r"Schema metadata was not refreshed.*", c.refresh_schema_metadata, max_schema_agreement_wait=agreement_timeout) end_time = time.time() self.assertGreaterEqual(end_time - start_time, agreement_timeout) diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index 6f76c2a9b0..b83df22032 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -1590,7 +1590,7 @@ def test_function_no_parameters(self): with self.VerifiedFunction(self, **kwargs) as vf: fn_meta = self.keyspace_function_meta[vf.signature] - six.assertRegex(self, fn_meta.as_cql_query(), "CREATE FUNCTION.*%s\(\) .*" % kwargs['name']) + self.assertRegex(fn_meta.as_cql_query(), "CREATE FUNCTION.*%s\(\) .*" % kwargs['name']) def test_functions_follow_keyspace_alter(self): """ @@ -1638,12 +1638,12 @@ def test_function_cql_called_on_null(self): kwargs['called_on_null_input'] = True with self.VerifiedFunction(self, **kwargs) as vf: fn_meta = self.keyspace_function_meta[vf.signature] - six.assertRegex(self, fn_meta.as_cql_query(), "CREATE FUNCTION.*\) CALLED ON NULL INPUT RETURNS .*") + self.assertRegex(fn_meta.as_cql_query(), "CREATE FUNCTION.*\) CALLED ON NULL INPUT RETURNS .*") kwargs['called_on_null_input'] = False with self.VerifiedFunction(self, **kwargs) as vf: fn_meta = self.keyspace_function_meta[vf.signature] - six.assertRegex(self, fn_meta.as_cql_query(), "CREATE FUNCTION.*\) RETURNS NULL ON NULL INPUT RETURNS .*") + self.assertRegex(fn_meta.as_cql_query(), "CREATE FUNCTION.*\) RETURNS NULL ON NULL INPUT RETURNS .*") class AggregateMetadata(FunctionTest): diff --git a/tests/integration/standard/test_types.py b/tests/integration/standard/test_types.py index 828f10b5e2..6e2e9f7328 100644 --- a/tests/integration/standard/test_types.py +++ b/tests/integration/standard/test_types.py @@ -69,7 +69,7 @@ def test_can_insert_blob_type_as_string(self): msg = r'.*Invalid STRING constant \(.*?\) for "b" of type blob.*' else: msg = r'.*Invalid STRING constant \(.*?\) for b of type blob.*' - six.assertRaisesRegex(self, InvalidRequest, msg, s.execute, query, params) + self.assertRaisesRegex(InvalidRequest, msg, s.execute, query, params) return # In python2, with Cassandra < 2.0, we can manually encode the 'byte str' type as hex for insertion in a blob. diff --git a/tests/unit/advanced/test_graph.py b/tests/unit/advanced/test_graph.py index 77a920a3bf..a98a48c82f 100644 --- a/tests/unit/advanced/test_graph.py +++ b/tests/unit/advanced/test_graph.py @@ -259,7 +259,7 @@ def test_init_unknown_kwargs(self): with warnings.catch_warnings(record=True) as w: GraphOptions(unknown_param=42) self.assertEqual(len(w), 1) - six.assertRegex(self, str(w[0].message), r"^Unknown keyword.*GraphOptions.*") + self.assertRegex(str(w[0].message), r"^Unknown keyword.*GraphOptions.*") def test_update(self): opts = GraphOptions(**self.api_params) diff --git a/tests/unit/cqlengine/test_connection.py b/tests/unit/cqlengine/test_connection.py index 9c3454796a..962ee06b52 100644 --- a/tests/unit/cqlengine/test_connection.py +++ b/tests/unit/cqlengine/test_connection.py @@ -52,12 +52,12 @@ def test_get_session_fails_without_existing_connection(self): """ Users can't get the default session without having a default connection set. """ - with six.assertRaisesRegex(self, connection.CQLEngineException, self.no_registered_connection_msg): + with self.assertRaisesRegex(connection.CQLEngineException, self.no_registered_connection_msg): connection.get_session(connection=None) def test_get_cluster_fails_without_existing_connection(self): """ Users can't get the default cluster without having a default connection set. """ - with six.assertRaisesRegex(self, connection.CQLEngineException, self.no_registered_connection_msg): + with self.assertRaisesRegex(connection.CQLEngineException, self.no_registered_connection_msg): connection.get_cluster(connection=None) diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index 97faa5e7fc..bc6749a477 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -392,7 +392,7 @@ def send_msg(msg, req_id, msg_callback): connection.defunct.assert_has_calls([call(ANY)] * get_holders.call_count) exc = connection.defunct.call_args_list[0][0][0] self.assertIsInstance(exc, ConnectionException) - six.assertRegex(self, exc.args[0], r'^Received unexpected response to OptionsMessage.*') + self.assertRegex(exc.args[0], r'^Received unexpected response to OptionsMessage.*') holder.return_connection.assert_has_calls( [call(connection)] * get_holders.call_count) diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index edafb7cb01..a6c63dcfdc 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -1301,7 +1301,7 @@ def test_immutable_predicate(self): expected_message_regex = "can't set attribute" hfp = HostFilterPolicy(child_policy=Mock(name='child_policy'), predicate=Mock(name='predicate')) - with six.assertRaisesRegex(self, AttributeError, expected_message_regex): + with self.assertRaisesRegex(AttributeError, expected_message_regex): hfp.predicate = object() diff --git a/tests/unit/test_protocol.py b/tests/unit/test_protocol.py index 3d6828bdc5..0f251ffc0e 100644 --- a/tests/unit/test_protocol.py +++ b/tests/unit/test_protocol.py @@ -173,7 +173,7 @@ def test_keyspace_flag_raises_before_v5(self): keyspace_message = QueryMessage('a', consistency_level=3, keyspace='ks') io = Mock(name='io') - with six.assertRaisesRegex(self, UnsupportedOperation, 'Keyspaces.*set'): + with self.assertRaisesRegex(UnsupportedOperation, 'Keyspaces.*set'): keyspace_message.send_body(io, protocol_version=4) io.assert_not_called() diff --git a/tests/unit/test_response_future.py b/tests/unit/test_response_future.py index a9c05976e0..273490072f 100644 --- a/tests/unit/test_response_future.py +++ b/tests/unit/test_response_future.py @@ -160,7 +160,7 @@ def test_heartbeat_defunct_deadlock(self): # Simulate ResponseFuture timing out rf._on_timeout() - six.assertRaisesRegex(self, OperationTimedOut, "Connection defunct by heartbeat", rf.result) + self.assertRaisesRegex(OperationTimedOut, "Connection defunct by heartbeat", rf.result) def test_read_timeout_error_message(self): session = self.make_session() From bf7abff6e541dd720f739733f897279f29f5b7cd Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 13 Mar 2023 14:19:21 -0500 Subject: [PATCH 153/211] Update Travis config to only run versions that will be supported going forward --- .travis.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 906775e90c..7e59fa486d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,13 +3,9 @@ sudo: false language: python python: - - "2.7" - - "3.5" - - "3.6" - "3.7" - "3.8" - - "pypy2.7-6.0" - - "pypy3.5" + - "pypy3.7" env: - CASS_DRIVER_NO_CYTHON=1 From ee3f3af95cdd66075aa4da2da71a92caad74165a Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 13 Mar 2023 14:45:55 -0500 Subject: [PATCH 154/211] Trying to get to a maximal working Pypy version. Have to go back to 3.6 which isn't ideal... --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7e59fa486d..9f9c450a77 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ language: python python: - "3.7" - "3.8" - - "pypy3.7" + - "pypy3.6" env: - CASS_DRIVER_NO_CYTHON=1 From fa9b7af85bf07c381ff955790faa3ec7d13c7713 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 13 Mar 2023 14:48:45 -0500 Subject: [PATCH 155/211] Forgot to add complete extension --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9f9c450a77..54d3a6c89c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ language: python python: - "3.7" - "3.8" - - "pypy3.6" + - "pypy3.6-7.0.0" env: - CASS_DRIVER_NO_CYTHON=1 From fdac31e5c93f867166861e1573ee41b00f3e78f7 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 13 Mar 2023 15:52:34 -0500 Subject: [PATCH 156/211] Going back to known good non-2.7 PyPy target. PYTHON-1333 has more detail. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 54d3a6c89c..4d94d86087 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ language: python python: - "3.7" - "3.8" - - "pypy3.6-7.0.0" + - "pypy3.5" env: - CASS_DRIVER_NO_CYTHON=1 From f5001b8759d34896a1899b791a5ca57db8ba8069 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 13 Mar 2023 16:57:23 -0500 Subject: [PATCH 157/211] Release 3.26: changelog & version --- CHANGELOG.rst | 33 +++++++++++++++++++++++++++++++++ cassandra/__init__.py | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d2d577c957..fc7a702534 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,36 @@ +3.26.0 +====== +March 13, 2023 + +Features +-------- +* Add support for execution profiles in execute_concurrent (PR 1122) + +Bug Fixes +--------- +* Handle empty non-final result pages (PR 1110) +* Do not re-use stream IDs for in-flight requests (PR 1114) +* Asyncore race condition cause logging exception on shutdown (PYTHON-1266) + +Others +------ +* Fix deprecation warning in query tracing (PR 1103) +* Remove mutable default values from some tests (PR 1116) +* Remove dependency on unittest2 (PYTHON-1289) +* Fix deprecation warnings for asyncio.coroutine annotation in asyncioreactor (PYTTHON-1290) +* Fix typos in source files (PR 1126) +* HostFilterPolicyInitTest fix for Python 3.11 (PR 1131) +* Fix for DontPrepareOnIgnoredHostsTest (PYTHON-1287) +* tests.integration.simulacron.test_connection failures (PYTHON-1304) +* tests.integration.standard.test_single_interface.py appears to be failing for C* 4.0 (PYTHON-1329) +* Authentication tests appear to be failing fraudulently (PYTHON-1328) +* PreparedStatementTests.test_fail_if_different_query_id_on_reprepare() failing unexpectedly (PTYHON-1327) +* Refactor deprecated unittest aliases for Python 3.11 compatibility (PR 1112) + +Deprecations +------------ +* This release removes support for Python 2.7.x as well as Python 3.5.x and 3.6.x + 3.25.0 ====== March 18, 2021 diff --git a/cassandra/__init__.py b/cassandra/__init__.py index e6cb5c55bb..e14f20c6ed 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 25, 0) +__version_info__ = (3, 26, 0) __version__ = '.'.join(map(str, __version_info__)) From 54ef7d3483a0772ba0d907306bddff6885f9fe7f Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 27 Mar 2023 20:22:03 -0500 Subject: [PATCH 158/211] Doc changes following on from 3.26.0 release --- README-dev.rst | 5 ++--- README.rst | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README-dev.rst b/README-dev.rst index 85a722c3b7..5c0555f3a7 100644 --- a/README-dev.rst +++ b/README-dev.rst @@ -241,11 +241,10 @@ Adding a New Python Runtime Support * Add the new python version to our jenkins image: https://github.com/riptano/openstack-jenkins-drivers/ -* Add the new python version in job-creator: - https://github.com/riptano/job-creator/ +* Add the new python version in the Jenkinsfile and TravisCI configs as appropriate * Run the tests and ensure they all pass * also test all event loops * Update the wheels building repo to support that version: - https://github.com/riptano/python-dse-driver-wheels + https://github.com/datastax/python-driver-wheels diff --git a/README.rst b/README.rst index 7c5bf1eee1..197b698aa0 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ DataStax Driver for Apache Cassandra A modern, `feature-rich `_ and highly-tunable Python client library for Apache Cassandra (2.1+) and DataStax Enterprise (4.7+) using exclusively Cassandra's binary protocol and Cassandra Query Language v3. -The driver supports Python 2.7, 3.5, 3.6, 3.7 and 3.8. +The driver supports Python 3.7 and 3.8. **Note:** DataStax products do not support big-endian systems. From 17adb81817b353ec0aad92f46b936478f8f57514 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Sat, 8 Apr 2023 10:59:51 -0500 Subject: [PATCH 159/211] Fix typo --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fc7a702534..544585e1ce 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,7 +17,7 @@ Others * Fix deprecation warning in query tracing (PR 1103) * Remove mutable default values from some tests (PR 1116) * Remove dependency on unittest2 (PYTHON-1289) -* Fix deprecation warnings for asyncio.coroutine annotation in asyncioreactor (PYTTHON-1290) +* Fix deprecation warnings for asyncio.coroutine annotation in asyncioreactor (PYTHON-1290) * Fix typos in source files (PR 1126) * HostFilterPolicyInitTest fix for Python 3.11 (PR 1131) * Fix for DontPrepareOnIgnoredHostsTest (PYTHON-1287) From 32a2613db39cb8ca14f8b8cb65bfd5ebf2992083 Mon Sep 17 00:00:00 2001 From: Jamie Gillenwater Date: Fri, 21 Apr 2023 10:41:15 -0400 Subject: [PATCH 160/211] Add 3.26 for docs (#1147) --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index 8e29b942e3..11e9d2d5ac 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.26' + ref: f5001b8 - name: '3.25' ref: a83c36a5 - name: '3.24' From f1e9126fc4db56c2f0dc9fb5c10c4c06f0892fc7 Mon Sep 17 00:00:00 2001 From: Jamie Gillenwater Date: Fri, 21 Apr 2023 11:02:16 -0400 Subject: [PATCH 161/211] Update driver version supported (#1148) --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 978faa17c6..005d02bf84 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ A Python client driver for `Apache Cassandra® `_. This driver works exclusively with the Cassandra Query Language v3 (CQL3) and Cassandra's native protocol. Cassandra 2.1+ is supported, including DSE 4.7+. -The driver supports Python 2.7, 3.5, 3.6, 3.7 and 3.8. +The driver supports Python 3.7 and 3.8. This driver is open source under the `Apache v2 License `_. From 9fa742a78a0b242b5f00d30e8a09df9c7467dfa6 Mon Sep 17 00:00:00 2001 From: Jamie Gillenwater Date: Fri, 21 Apr 2023 11:04:13 -0400 Subject: [PATCH 162/211] Update ref to include latest version supported (#1149) --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 11e9d2d5ac..c95a63d5a9 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.26' - ref: f5001b8 + ref: f1e9126 - name: '3.25' ref: a83c36a5 - name: '3.24' From 058be1e995a71dbf6afc0cbdf3a2b0a429cd5e28 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 1 May 2023 10:56:12 -0500 Subject: [PATCH 163/211] PYTHON-1341 Impl of client-side column-level encryption/decryption (#1150) --- .travis.yml | 1 - Jenkinsfile | 2 +- cassandra/cluster.py | 20 +- cassandra/obj_parser.pyx | 16 +- cassandra/parsing.pxd | 2 + cassandra/parsing.pyx | 4 +- cassandra/policies.py | 181 +++++++++++++++++- cassandra/protocol.py | 46 +++-- cassandra/query.py | 23 ++- cassandra/row_parser.pyx | 12 +- docs/column_encryption.rst | 92 +++++++++ docs/index.rst | 3 + requirements.txt | 1 + .../standard/test_custom_protocol_handler.py | 4 +- tests/integration/standard/test_policies.py | 73 ++++++- tests/unit/test_policies.py | 136 ++++++++++++- tox.ini | 5 +- 17 files changed, 573 insertions(+), 48 deletions(-) create mode 100644 docs/column_encryption.rst diff --git a/.travis.yml b/.travis.yml index 4d94d86087..dbabf61378 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,6 @@ language: python python: - "3.7" - "3.8" - - "pypy3.5" env: - CASS_DRIVER_NO_CYTHON=1 diff --git a/Jenkinsfile b/Jenkinsfile index 0fdafb17d2..f3b5d0718b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -57,7 +57,7 @@ matrices = [ "SMOKE": [ "SERVER": ['3.11', '4.0', 'dse-6.8.30'], "RUNTIME": ['3.7.7', '3.8.3'], - "CYTHON": ["False"] + "CYTHON": ["True", "False"] ] ] diff --git a/cassandra/cluster.py b/cassandra/cluster.py index c836fb4302..6b0d495013 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -1003,6 +1003,12 @@ def default_retry_policy(self, policy): load the configuration and certificates. """ + column_encryption_policy = None + """ + An instance of :class:`cassandra.policies.ColumnEncryptionPolicy` specifying encryption materials to be + used for columns in this cluster. + """ + @property def schema_metadata_enabled(self): """ @@ -1104,7 +1110,8 @@ def __init__(self, monitor_reporting_enabled=True, monitor_reporting_interval=30, client_id=None, - cloud=None): + cloud=None, + column_encryption_policy=None): """ ``executor_threads`` defines the number of threads in a pool for handling asynchronous tasks such as extablishing connection pools or refreshing metadata. @@ -1152,6 +1159,9 @@ def __init__(self, self.port = port + if column_encryption_policy is not None: + self.column_encryption_policy = column_encryption_policy + self.endpoint_factory = endpoint_factory or DefaultEndPointFactory(port=self.port) self.endpoint_factory.configure(self) @@ -2535,6 +2545,12 @@ def __init__(self, cluster, hosts, keyspace=None): self.encoder = Encoder() + if self.cluster.column_encryption_policy is not None: + try: + self.client_protocol_handler.column_encryption_policy = self.cluster.column_encryption_policy + except AttributeError: + log.info("Unable to set column encryption policy for session") + # create connection pools in parallel self._initial_connect_futures = set() for host in hosts: @@ -3074,7 +3090,7 @@ def prepare(self, query, custom_payload=None, keyspace=None): prepared_keyspace = keyspace if keyspace else None prepared_statement = PreparedStatement.from_message( response.query_id, response.bind_metadata, response.pk_indexes, self.cluster.metadata, query, prepared_keyspace, - self._protocol_version, response.column_metadata, response.result_metadata_id) + self._protocol_version, response.column_metadata, response.result_metadata_id, self.cluster.column_encryption_policy) prepared_statement.custom_payload = future.custom_payload self.cluster.add_prepared(response.query_id, prepared_statement) diff --git a/cassandra/obj_parser.pyx b/cassandra/obj_parser.pyx index a0b5316a33..cf43771dd7 100644 --- a/cassandra/obj_parser.pyx +++ b/cassandra/obj_parser.pyx @@ -17,9 +17,12 @@ include "ioutils.pyx" from cassandra import DriverException from cassandra.bytesio cimport BytesIOReader from cassandra.deserializers cimport Deserializer, from_binary +from cassandra.deserializers import find_deserializer from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser from cassandra.tuple cimport tuple_new, tuple_set +from cpython.bytes cimport PyBytes_AsStringAndSize + cdef class ListParser(ColumnParser): """Decode a ResultMessage into a list of tuples (or other objects)""" @@ -58,18 +61,29 @@ cdef class TupleRowParser(RowParser): assert desc.rowsize >= 0 cdef Buffer buf + cdef Buffer newbuf cdef Py_ssize_t i, rowsize = desc.rowsize cdef Deserializer deserializer cdef tuple res = tuple_new(desc.rowsize) + ce_policy = desc.column_encryption_policy for i in range(rowsize): # Read the next few bytes get_buf(reader, &buf) # Deserialize bytes to python object deserializer = desc.deserializers[i] + coldesc = desc.coldescs[i] + uses_ce = ce_policy and ce_policy.contains_column(coldesc) try: - val = from_binary(deserializer, &buf, desc.protocol_version) + if uses_ce: + col_type = ce_policy.column_type(coldesc) + decrypted_bytes = ce_policy.decrypt(coldesc, to_bytes(&buf)) + PyBytes_AsStringAndSize(decrypted_bytes, &newbuf.ptr, &newbuf.size) + deserializer = find_deserializer(ce_policy.column_type(coldesc)) + val = from_binary(deserializer, &newbuf, desc.protocol_version) + else: + val = from_binary(deserializer, &buf, desc.protocol_version) except Exception as e: raise DriverException('Failed decoding result column "%s" of type %s: %s' % (desc.colnames[i], desc.coltypes[i].cql_parameterized_type(), diff --git a/cassandra/parsing.pxd b/cassandra/parsing.pxd index aa9478cd14..27dc368b07 100644 --- a/cassandra/parsing.pxd +++ b/cassandra/parsing.pxd @@ -18,6 +18,8 @@ from cassandra.deserializers cimport Deserializer cdef class ParseDesc: cdef public object colnames cdef public object coltypes + cdef public object column_encryption_policy + cdef public list coldescs cdef Deserializer[::1] deserializers cdef public int protocol_version cdef Py_ssize_t rowsize diff --git a/cassandra/parsing.pyx b/cassandra/parsing.pyx index d2bc0a3abe..954767d227 100644 --- a/cassandra/parsing.pyx +++ b/cassandra/parsing.pyx @@ -19,9 +19,11 @@ Module containing the definitions and declarations (parsing.pxd) for parsers. cdef class ParseDesc: """Description of what structure to parse""" - def __init__(self, colnames, coltypes, deserializers, protocol_version): + def __init__(self, colnames, coltypes, column_encryption_policy, coldescs, deserializers, protocol_version): self.colnames = colnames self.coltypes = coltypes + self.column_encryption_policy = column_encryption_policy + self.coldescs = coldescs self.deserializers = deserializers self.protocol_version = protocol_version self.rowsize = len(colnames) diff --git a/cassandra/policies.py b/cassandra/policies.py index fa1e8cf385..26b9aa4c5a 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -12,13 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple +from functools import lru_cache from itertools import islice, cycle, groupby, repeat import logging +import os from random import randint, shuffle from threading import Lock import socket import warnings + +from cryptography.hazmat.primitives import padding +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + from cassandra import WriteType as WT +from cassandra.cqltypes import _cqltypes # This is done this way because WriteType was originally @@ -455,7 +463,7 @@ class HostFilterPolicy(LoadBalancingPolicy): A :class:`.LoadBalancingPolicy` subclass configured with a child policy, and a single-argument predicate. This policy defers to the child policy for hosts where ``predicate(host)`` is truthy. Hosts for which - ``predicate(host)`` is falsey will be considered :attr:`.IGNORED`, and will + ``predicate(host)`` is falsy will be considered :attr:`.IGNORED`, and will not be used in a query plan. This can be used in the cases where you need a whitelist or blacklist @@ -491,7 +499,7 @@ def __init__(self, child_policy, predicate): :param child_policy: an instantiated :class:`.LoadBalancingPolicy` that this one will defer to. :param predicate: a one-parameter function that takes a :class:`.Host`. - If it returns a falsey value, the :class:`.Host` will + If it returns a falsy value, the :class:`.Host` will be :attr:`.IGNORED` and not returned in query plans. """ super(HostFilterPolicy, self).__init__() @@ -527,7 +535,7 @@ def predicate(self): def distance(self, host): """ Checks if ``predicate(host)``, then returns - :attr:`~HostDistance.IGNORED` if falsey, and defers to the child policy + :attr:`~HostDistance.IGNORED` if falsy, and defers to the child policy otherwise. """ if self.predicate(host): @@ -616,7 +624,7 @@ class ReconnectionPolicy(object): def new_schedule(self): """ This should return a finite or infinite iterable of delays (each as a - floating point number of seconds) inbetween each failed reconnection + floating point number of seconds) in-between each failed reconnection attempt. Note that if the iterable is finite, reconnection attempts will cease once the iterable is exhausted. """ @@ -626,12 +634,12 @@ def new_schedule(self): class ConstantReconnectionPolicy(ReconnectionPolicy): """ A :class:`.ReconnectionPolicy` subclass which sleeps for a fixed delay - inbetween each reconnection attempt. + in-between each reconnection attempt. """ def __init__(self, delay, max_attempts=64): """ - `delay` should be a floating point number of seconds to wait inbetween + `delay` should be a floating point number of seconds to wait in-between each attempt. `max_attempts` should be a total number of attempts to be made before @@ -655,7 +663,7 @@ def new_schedule(self): class ExponentialReconnectionPolicy(ReconnectionPolicy): """ A :class:`.ReconnectionPolicy` subclass which exponentially increases - the length of the delay inbetween each reconnection attempt up to + the length of the delay in-between each reconnection attempt up to a set maximum delay. A random amount of jitter (+/- 15%) will be added to the pure exponential @@ -715,7 +723,7 @@ class RetryPolicy(object): timeout and unavailable failures. These are failures reported from the server side. Timeouts are configured by `settings in cassandra.yaml `_. - Unavailable failures occur when the coordinator cannot acheive the consistency + Unavailable failures occur when the coordinator cannot achieve the consistency level for a request. For further information see the method descriptions below. @@ -1181,3 +1189,160 @@ def _rethrow(self, *args, **kwargs): on_read_timeout = _rethrow on_write_timeout = _rethrow on_unavailable = _rethrow + + +ColDesc = namedtuple('ColDesc', ['ks', 'table', 'col']) +ColData = namedtuple('ColData', ['key','type']) + +class ColumnEncryptionPolicy(object): + """ + A policy enabling (mostly) transparent encryption and decryption of data before it is + sent to the cluster. + + Key materials and other configurations are specified on a per-column basis. This policy can + then be used by driver structures which are aware of the underlying columns involved in their + work. In practice this includes the following cases: + + * Prepared statements - data for columns specified by the cluster's policy will be transparently + encrypted before they are sent + * Rows returned from any query - data for columns specified by the cluster's policy will be + transparently decrypted before they are returned to the user + + To enable this functionality, create an instance of this class (or more likely a subclass) + before creating a cluster. This policy should then be configured and supplied to the Cluster + at creation time via the :attr:`.Cluster.column_encryption_policy` attribute. + """ + + def encrypt(self, coldesc, obj_bytes): + """ + Encrypt the specified bytes using the cryptography materials for the specified column. + Largely used internally, although this could also be used to encrypt values supplied + to non-prepared statements in a way that is consistent with this policy. + """ + raise NotImplementedError() + + def decrypt(self, coldesc, encrypted_bytes): + """ + Decrypt the specified (encrypted) bytes using the cryptography materials for the + specified column. Used internally; could be used externally as well but there's + not currently an obvious use case. + """ + raise NotImplementedError() + + def add_column(self, coldesc, key): + """ + Provide cryptography materials to be used when encrypted and/or decrypting data + for the specified column. + """ + raise NotImplementedError() + + def contains_column(self, coldesc): + """ + Predicate to determine if a specific column is supported by this policy. + Currently only used internally. + """ + raise NotImplementedError() + + def encode_and_encrypt(self, coldesc, obj): + """ + Helper function to enable use of this policy on simple (i.e. non-prepared) + statements. + """ + raise NotImplementedError() + +AES256_BLOCK_SIZE = 128 +AES256_BLOCK_SIZE_BYTES = int(AES256_BLOCK_SIZE / 8) +AES256_KEY_SIZE = 256 +AES256_KEY_SIZE_BYTES = int(AES256_KEY_SIZE / 8) + +class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): + + # CBC uses an IV that's the same size as the block size + # + # TODO: Need to find some way to expose mode options + # (CBC etc.) without leaking classes from the underlying + # impl here + def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)): + + self.mode = mode + self.iv = iv + + # ColData for a given ColDesc is always preserved. We only create a Cipher + # when there's an actual need to for a given ColDesc + self.coldata = {} + self.ciphers = {} + + def encrypt(self, coldesc, obj_bytes): + + # AES256 has a 128-bit block size so if the input bytes don't align perfectly on + # those blocks we have to pad them. There's plenty of room for optimization here: + # + # * Instances of the PKCS7 padder should be managed in a bounded pool + # * It would be nice if we could get a flag from encrypted data to indicate + # whether it was padded or not + # * Might be able to make this happen with a leading block of flags in encrypted data + padder = padding.PKCS7(AES256_BLOCK_SIZE).padder() + padded_bytes = padder.update(obj_bytes) + padder.finalize() + + cipher = self._get_cipher(coldesc) + encryptor = cipher.encryptor() + return encryptor.update(padded_bytes) + encryptor.finalize() + + def decrypt(self, coldesc, encrypted_bytes): + + cipher = self._get_cipher(coldesc) + decryptor = cipher.decryptor() + padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize() + + unpadder = padding.PKCS7(AES256_BLOCK_SIZE).unpadder() + return unpadder.update(padded_bytes) + unpadder.finalize() + + def add_column(self, coldesc, key, type): + + if not coldesc: + raise ValueError("ColDesc supplied to add_column cannot be None") + if not key: + raise ValueError("Key supplied to add_column cannot be None") + if not type: + raise ValueError("Type supplied to add_column cannot be None") + if type not in _cqltypes.keys(): + raise ValueError("Type %s is not a supported type".format(type)) + if not len(key) == AES256_KEY_SIZE_BYTES: + raise ValueError("AES256 column encryption policy expects a 256-bit encryption key") + self.coldata[coldesc] = ColData(key, _cqltypes[type]) + + def contains_column(self, coldesc): + return coldesc in self.coldata + + def encode_and_encrypt(self, coldesc, obj): + if not coldesc: + raise ValueError("ColDesc supplied to encode_and_encrypt cannot be None") + if not obj: + raise ValueError("Object supplied to encode_and_encrypt cannot be None") + coldata = self.coldata.get(coldesc) + if not coldata: + raise ValueError("Could not find ColData for ColDesc %s".format(coldesc)) + return self.encrypt(coldesc, coldata.type.serialize(obj, None)) + + def cache_info(self): + return AES256ColumnEncryptionPolicy._build_cipher.cache_info() + + def column_type(self, coldesc): + return self.coldata[coldesc].type + + def _get_cipher(self, coldesc): + """ + Access relevant state from this instance necessary to create a Cipher and then get one, + hopefully returning a cached instance if we've already done so (and it hasn't been evicted) + """ + + try: + coldata = self.coldata[coldesc] + return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv) + except KeyError: + raise ValueError("Could not find column {}".format(coldesc)) + + # Explicitly use a class method here to avoid caching self + @lru_cache(maxsize=128) + def _build_cipher(key, mode, iv): + return Cipher(algorithms.AES256(key), mode(iv)) diff --git a/cassandra/protocol.py b/cassandra/protocol.py index ed92a76679..5e3610811e 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -29,9 +29,6 @@ AlreadyExists, InvalidRequest, Unauthorized, UnsupportedOperation, UserFunctionDescriptor, UserAggregateDescriptor, SchemaTargetType) -from cassandra.marshal import (int32_pack, int32_unpack, uint16_pack, uint16_unpack, - uint8_pack, int8_unpack, uint64_pack, header_pack, - v3_header_pack, uint32_pack, uint32_le_unpack, uint32_le_pack) from cassandra.cqltypes import (AsciiType, BytesType, BooleanType, CounterColumnType, DateType, DecimalType, DoubleType, FloatType, Int32Type, @@ -40,6 +37,10 @@ UTF8Type, VarcharType, UUIDType, UserType, TupleType, lookup_casstype, SimpleDateType, TimeType, ByteType, ShortType, DurationType) +from cassandra.marshal import (int32_pack, int32_unpack, uint16_pack, uint16_unpack, + uint8_pack, int8_unpack, uint64_pack, header_pack, + v3_header_pack, uint32_pack, uint32_le_unpack, uint32_le_pack) +from cassandra.policies import ColDesc from cassandra import WriteType from cassandra.cython_deps import HAVE_CYTHON, HAVE_NUMPY from cassandra import util @@ -723,11 +724,11 @@ class ResultMessage(_MessageType): def __init__(self, kind): self.kind = kind - def recv(self, f, protocol_version, user_type_map, result_metadata): + def recv(self, f, protocol_version, user_type_map, result_metadata, column_encryption_policy): if self.kind == RESULT_KIND_VOID: return elif self.kind == RESULT_KIND_ROWS: - self.recv_results_rows(f, protocol_version, user_type_map, result_metadata) + self.recv_results_rows(f, protocol_version, user_type_map, result_metadata, column_encryption_policy) elif self.kind == RESULT_KIND_SET_KEYSPACE: self.new_keyspace = read_string(f) elif self.kind == RESULT_KIND_PREPARED: @@ -738,32 +739,40 @@ def recv(self, f, protocol_version, user_type_map, result_metadata): raise DriverException("Unknown RESULT kind: %d" % self.kind) @classmethod - def recv_body(cls, f, protocol_version, user_type_map, result_metadata): + def recv_body(cls, f, protocol_version, user_type_map, result_metadata, column_encryption_policy): kind = read_int(f) msg = cls(kind) - msg.recv(f, protocol_version, user_type_map, result_metadata) + msg.recv(f, protocol_version, user_type_map, result_metadata, column_encryption_policy) return msg - def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata): + def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata, column_encryption_policy): self.recv_results_metadata(f, user_type_map) column_metadata = self.column_metadata or result_metadata rowcount = read_int(f) rows = [self.recv_row(f, len(column_metadata)) for _ in range(rowcount)] self.column_names = [c[2] for c in column_metadata] self.column_types = [c[3] for c in column_metadata] + col_descs = [ColDesc(md[0], md[1], md[2]) for md in column_metadata] + + def decode_val(val, col_md, col_desc): + uses_ce = column_encryption_policy and column_encryption_policy.contains_column(col_desc) + col_type = column_encryption_policy.column_type(col_desc) if uses_ce else col_md[3] + raw_bytes = column_encryption_policy.decrypt(col_desc, val) if uses_ce else val + return col_type.from_binary(raw_bytes, protocol_version) + + def decode_row(row): + return tuple(decode_val(val, col_md, col_desc) for val, col_md, col_desc in zip(row, column_metadata, col_descs)) + try: - self.parsed_rows = [ - tuple(ctype.from_binary(val, protocol_version) - for ctype, val in zip(self.column_types, row)) - for row in rows] + self.parsed_rows = [decode_row(row) for row in rows] except Exception: for row in rows: - for i in range(len(row)): + for val, col_md, col_desc in zip(row, column_metadata, col_descs): try: - self.column_types[i].from_binary(row[i], protocol_version) + decode_val(val, col_md, col_desc) except Exception as e: - raise DriverException('Failed decoding result column "%s" of type %s: %s' % (self.column_names[i], - self.column_types[i].cql_parameterized_type(), + raise DriverException('Failed decoding result column "%s" of type %s: %s' % (col_md[2], + col_md[3].cql_parameterized_type(), str(e))) def recv_results_prepared(self, f, protocol_version, user_type_map): @@ -1099,6 +1108,9 @@ class _ProtocolHandler(object): result decoding implementations. """ + column_encryption_policy = None + """Instance of :class:`cassandra.policies.ColumnEncryptionPolicy` in use by this handler""" + @classmethod def encode_message(cls, msg, stream_id, protocol_version, compressor, allow_beta_protocol_version): """ @@ -1193,7 +1205,7 @@ def decode_message(cls, protocol_version, user_type_map, stream_id, flags, opcod log.warning("Unknown protocol flags set: %02x. May cause problems.", flags) msg_class = cls.message_types_by_opcode[opcode] - msg = msg_class.recv_body(body, protocol_version, user_type_map, result_metadata) + msg = msg_class.recv_body(body, protocol_version, user_type_map, result_metadata, cls.column_encryption_policy) msg.stream_id = stream_id msg.trace_id = trace_id msg.custom_payload = custom_payload diff --git a/cassandra/query.py b/cassandra/query.py index f7a5b8fdf5..dc72d7cbcd 100644 --- a/cassandra/query.py +++ b/cassandra/query.py @@ -31,6 +31,7 @@ from cassandra.util import unix_time_from_uuid1 from cassandra.encoder import Encoder import cassandra.encoder +from cassandra.policies import ColDesc from cassandra.protocol import _UNSET_VALUE from cassandra.util import OrderedDict, _sanitize_identifiers @@ -442,12 +443,14 @@ class PreparedStatement(object): query_string = None result_metadata = None result_metadata_id = None + column_encryption_policy = None routing_key_indexes = None _routing_key_index_set = None serial_consistency_level = None # TODO never used? def __init__(self, column_metadata, query_id, routing_key_indexes, query, - keyspace, protocol_version, result_metadata, result_metadata_id): + keyspace, protocol_version, result_metadata, result_metadata_id, + column_encryption_policy=None): self.column_metadata = column_metadata self.query_id = query_id self.routing_key_indexes = routing_key_indexes @@ -456,14 +459,17 @@ def __init__(self, column_metadata, query_id, routing_key_indexes, query, self.protocol_version = protocol_version self.result_metadata = result_metadata self.result_metadata_id = result_metadata_id + self.column_encryption_policy = column_encryption_policy self.is_idempotent = False @classmethod def from_message(cls, query_id, column_metadata, pk_indexes, cluster_metadata, query, prepared_keyspace, protocol_version, result_metadata, - result_metadata_id): + result_metadata_id, column_encryption_policy=None): if not column_metadata: - return PreparedStatement(column_metadata, query_id, None, query, prepared_keyspace, protocol_version, result_metadata, result_metadata_id) + return PreparedStatement(column_metadata, query_id, None, + query, prepared_keyspace, protocol_version, result_metadata, + result_metadata_id, column_encryption_policy) if pk_indexes: routing_key_indexes = pk_indexes @@ -489,7 +495,7 @@ def from_message(cls, query_id, column_metadata, pk_indexes, cluster_metadata, return PreparedStatement(column_metadata, query_id, routing_key_indexes, query, prepared_keyspace, protocol_version, result_metadata, - result_metadata_id) + result_metadata_id, column_encryption_policy) def bind(self, values): """ @@ -577,6 +583,7 @@ def bind(self, values): values = () proto_version = self.prepared_statement.protocol_version col_meta = self.prepared_statement.column_metadata + ce_policy = self.prepared_statement.column_encryption_policy # special case for binding dicts if isinstance(values, dict): @@ -623,7 +630,13 @@ def bind(self, values): raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version) else: try: - self.values.append(col_spec.type.serialize(value, proto_version)) + col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name) + uses_ce = ce_policy and ce_policy.contains_column(col_desc) + col_type = ce_policy.column_type(col_desc) if uses_ce else col_spec.type + col_bytes = col_type.serialize(value, proto_version) + if uses_ce: + col_bytes = ce_policy.encrypt(col_desc, col_bytes) + self.values.append(col_bytes) except (TypeError, struct.error) as exc: actual_type = type(value) message = ('Received an argument of invalid type for column "%s". ' diff --git a/cassandra/row_parser.pyx b/cassandra/row_parser.pyx index 3a4b2f4604..88277a4593 100644 --- a/cassandra/row_parser.pyx +++ b/cassandra/row_parser.pyx @@ -13,13 +13,14 @@ # limitations under the License. from cassandra.parsing cimport ParseDesc, ColumnParser +from cassandra.policies import ColDesc from cassandra.obj_parser import TupleRowParser from cassandra.deserializers import make_deserializers include "ioutils.pyx" def make_recv_results_rows(ColumnParser colparser): - def recv_results_rows(self, f, int protocol_version, user_type_map, result_metadata): + def recv_results_rows(self, f, int protocol_version, user_type_map, result_metadata, column_encryption_policy): """ Parse protocol data given as a BytesIO f into a set of columns (e.g. list of tuples) This is used as the recv_results_rows method of (Fast)ResultMessage @@ -28,11 +29,12 @@ def make_recv_results_rows(ColumnParser colparser): column_metadata = self.column_metadata or result_metadata - self.column_names = [c[2] for c in column_metadata] - self.column_types = [c[3] for c in column_metadata] + self.column_names = [md[2] for md in column_metadata] + self.column_types = [md[3] for md in column_metadata] - desc = ParseDesc(self.column_names, self.column_types, make_deserializers(self.column_types), - protocol_version) + desc = ParseDesc(self.column_names, self.column_types, column_encryption_policy, + [ColDesc(md[0], md[1], md[2]) for md in column_metadata], + make_deserializers(self.column_types), protocol_version) reader = BytesIOReader(f.read()) try: self.parsed_rows = colparser.parse_rows(reader, desc) diff --git a/docs/column_encryption.rst b/docs/column_encryption.rst new file mode 100644 index 0000000000..4d2a6c2d91 --- /dev/null +++ b/docs/column_encryption.rst @@ -0,0 +1,92 @@ +Column Encryption +================= + +Overview +-------- +Support for client-side encryption of data was added in version 3.27.0 of the Python driver. When using +this feature data will be encrypted on-the-fly according to a specified :class:`~.ColumnEncryptionPolicy` +instance. This policy is also used to decrypt data in returned rows. If a prepared statement is used +this decryption is transparent to the user; retrieved data will be decrypted and converted into the original +type (according to definitions in the encryption policy). Support for simple (i.e. non-prepared) queries is +also available, although in this case values must be manually encrypted and/or decrypted. The +:class:`~.ColumnEncryptionPolicy` instance provides methods to assist with these operations. + +Client-side encryption and decryption should work against all versions of Cassandra and DSE. It does not +utilize any server-side functionality to do its work. + +Configuration +------------- +Client-side encryption is enabled by creating an instance of a subclass of :class:`~.ColumnEncryptionPolicy` +and adding information about columns to be encrypted to it. This policy is then supplied to :class:`~.Cluster` +when it's created. + +.. code-block:: python + import os + + from cassandra.policies import ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES + + key = os.urandom(AES256_KEY_SIZE_BYTES) + cl_policy = AES256ColumnEncryptionPolicy() + col_desc = ColDesc('ks1','table1','column1') + cql_type = "int" + cl_policy.add_column(col_desc, key, cql_type) + cluster = Cluster(column_encryption_policy=cl_policy) + +:class:`~.AES256ColumnEncryptionPolicy` is a subclass of :class:`~.ColumnEncryptionPolicy` which provides +encryption and decryption via AES-256. This class is currently the only available column encryption policy +implementation, although users can certainly implement their own by subclassing :class:`~.ColumnEncryptionPolicy`. + +:class:`~.ColDesc` is a named tuple which uniquely identifies a column in a given keyspace and table. When we +have this tuple, the encryption key and the CQL type contained by this column we can add the column to the policy +using :func:`~.ColumnEncryptionPolicy.add_column`. Once we have added all column definitions to the policy we +pass it along to the cluster. + +The CQL type for the column only has meaning at the client; it is never sent to Cassandra. The encryption key +is also never sent to the server; all the server ever sees are random bytes reflecting the encrypted data. As a +result all columns containing client-side encrypted values should be declared with the CQL type "blob" at the +Cassandra server. + +Usage +----- + +Encryption +^^^^^^^^^^ +Client-side encryption shines most when used with prepared statements. A prepared statement is aware of information +about the columns in the query it was built from and we can use this information to transparently encrypt any +supplied parameters. For example, we can create a prepared statement to insert a value into column1 (as defined above) +by executing the following code after creating a :class:`~.Cluster` in the manner described above: + +.. code-block:: python + session = cluster.connect() + prepared = session.prepare("insert into ks1.table1 (column1) values (?)") + session.execute(prepared, (1000,)) + +Our encryption policy will detect that "column1" is an encrypted column and take appropriate action. + +As mentioned above client-side encryption can also be used with simple queries, although such use cases are +certainly not transparent. :class:`~.ColumnEncryptionPolicy` provides a helper named +:func:`~.ColumnEncryptionPolicy.encode_and_encrypt` which will convert an input value into bytes using the +standard serialization methods employed by the driver. The result is then encrypted according to the configuration +of the policy. Using this approach the example above could be implemented along the lines of the following: + +.. code-block:: python + session = cluster.connect() + session.execute("insert into ks1.table1 (column1) values (%s)",(cl_policy.encode_and_encrypt(col_desc, 1000),)) + +Decryption +^^^^^^^^^^ +Decryption of values returned from the server is always transparent. Whether we're executing a simple or prepared +statement encrypted columns will be decrypted automatically and made available via rows just like any other +result. + +Limitations +----------- +:class:`~.AES256ColumnEncryptionPolicy` uses the implementation of AES-256 provided by the +`cryptography `_ module. Any limitations of this module should be considered +when deploying client-side encryption. Note specifically that a Rust compiler is required for modern versions +of the cryptography package, although wheels exist for many common platforms. + +Client-side encryption has been implemented for both the default Cython and pure Python row processing logic. +This functionality has not yet been ported to the NumPy Cython implementation. We have reason to believe the +NumPy processing works reasonably well on Python 3.7 but fails for Python 3.8. We hope to address this discrepancy +in a future release. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 005d02bf84..6f34f249fb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -50,6 +50,9 @@ Contents :doc:`cloud` A guide to connecting to Datastax Astra +:doc:`column_encryption` + Transparent client-side per-column encryption and decryption + :doc:`geo_types` Working with DSE geometry types diff --git a/requirements.txt b/requirements.txt index f784fba1b9..44356365ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +cryptography >= 35.0 geomet>=0.1,<0.3 six >=1.9 futures <=2.2.0 diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py index 7443ce0748..60c5fb8969 100644 --- a/tests/integration/standard/test_custom_protocol_handler.py +++ b/tests/integration/standard/test_custom_protocol_handler.py @@ -261,7 +261,7 @@ class CustomResultMessageRaw(ResultMessage): my_type_codes[0xc] = UUIDType type_codes = my_type_codes - def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata): + def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata, column_encryption_policy): self.recv_results_metadata(f, user_type_map) column_metadata = self.column_metadata or result_metadata rowcount = read_int(f) @@ -290,7 +290,7 @@ class CustomResultMessageTracked(ResultMessage): type_codes = my_type_codes checked_rev_row_set = set() - def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata): + def recv_results_rows(self, f, protocol_version, user_type_map, result_metadata, column_encryption_policy): self.recv_results_metadata(f, user_type_map) column_metadata = self.column_metadata or result_metadata rowcount = read_int(f) diff --git a/tests/integration/standard/test_policies.py b/tests/integration/standard/test_policies.py index 46e91918ac..8f46306236 100644 --- a/tests/integration/standard/test_policies.py +++ b/tests/integration/standard/test_policies.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from decimal import Decimal +import os +import random import unittest from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, SimpleConvictionPolicy, \ - WhiteListRoundRobinPolicy + WhiteListRoundRobinPolicy, ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES from cassandra.pool import Host from cassandra.connection import DefaultEndPoint @@ -90,3 +93,71 @@ def test_only_connects_to_subset(self): queried_hosts.update(response.response_future.attempted_hosts) queried_hosts = set(host.address for host in queried_hosts) self.assertEqual(queried_hosts, only_connect_hosts) + +class ColumnEncryptionPolicyTest(unittest.TestCase): + + def _recreate_keyspace(self, session): + session.execute("drop keyspace if exists foo") + session.execute("CREATE KEYSPACE foo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}") + session.execute("CREATE TABLE foo.bar(encrypted blob, unencrypted int, primary key(unencrypted))") + + def test_end_to_end_prepared(self): + + # We only currently perform testing on a single type/expected value pair since CLE functionality is essentially + # independent of the underlying type. We intercept data after it's been encoded when it's going out and before it's + # encoded when coming back; the actual types of the data involved don't impact us. + expected = 12345 + expected_type = "int" + + key = os.urandom(AES256_KEY_SIZE_BYTES) + cl_policy = AES256ColumnEncryptionPolicy() + col_desc = ColDesc('foo','bar','encrypted') + cl_policy.add_column(col_desc, key, expected_type) + + cluster = TestCluster(column_encryption_policy=cl_policy) + session = cluster.connect() + self._recreate_keyspace(session) + + prepared = session.prepare("insert into foo.bar (encrypted, unencrypted) values (?,?)") + session.execute(prepared, (expected,expected)) + + # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted + # values here to confirm that we don't interfere with regular processing of unencrypted vals. + (encrypted,unencrypted) = session.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) + + # Confirm the same behaviour from a subsequent prepared statement as well + prepared = session.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") + (encrypted,unencrypted) = session.execute(prepared, [expected]).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) + + def test_end_to_end_simple(self): + + expected = 67890 + expected_type = "int" + + key = os.urandom(AES256_KEY_SIZE_BYTES) + cl_policy = AES256ColumnEncryptionPolicy() + col_desc = ColDesc('foo','bar','encrypted') + cl_policy.add_column(col_desc, key, expected_type) + + cluster = TestCluster(column_encryption_policy=cl_policy) + session = cluster.connect() + self._recreate_keyspace(session) + + # Use encode_and_encrypt helper function to populate date + session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)",(cl_policy.encode_and_encrypt(col_desc, expected), expected)) + + # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted + # values here to confirm that we don't interfere with regular processing of unencrypted vals. + (encrypted,unencrypted) = session.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) + + # Confirm the same behaviour from a subsequent prepared statement as well + prepared = session.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") + (encrypted,unencrypted) = session.execute(prepared, [expected]).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index a6c63dcfdc..451d5c50c9 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -16,6 +16,7 @@ from itertools import islice, cycle from mock import Mock, patch, call +import os from random import randint import six from six.moves._thread import LockType @@ -25,6 +26,8 @@ from cassandra import ConsistencyLevel from cassandra.cluster import Cluster +from cassandra.connection import DefaultEndPoint +from cassandra.cqltypes import BytesType from cassandra.metadata import Metadata from cassandra.policies import (RoundRobinPolicy, WhiteListRoundRobinPolicy, DCAwareRoundRobinPolicy, TokenAwarePolicy, SimpleConvictionPolicy, @@ -32,9 +35,10 @@ RetryPolicy, WriteType, DowngradingConsistencyRetryPolicy, ConstantReconnectionPolicy, LoadBalancingPolicy, ConvictionPolicy, ReconnectionPolicy, FallthroughRetryPolicy, - IdentityTranslator, EC2MultiRegionTranslator, HostFilterPolicy) + IdentityTranslator, EC2MultiRegionTranslator, HostFilterPolicy, + AES256ColumnEncryptionPolicy, ColDesc, + AES256_BLOCK_SIZE_BYTES, AES256_KEY_SIZE_BYTES) from cassandra.pool import Host -from cassandra.connection import DefaultEndPoint from cassandra.query import Statement from six.moves import xrange @@ -1500,3 +1504,131 @@ def test_create_whitelist(self): self.assertEqual(set(query_plan), {Host(DefaultEndPoint("127.0.0.1"), SimpleConvictionPolicy), Host(DefaultEndPoint("127.0.0.4"), SimpleConvictionPolicy)}) +class AES256ColumnEncryptionPolicyTest(unittest.TestCase): + + def _random_block(self): + return os.urandom(AES256_BLOCK_SIZE_BYTES) + + def _random_key(self): + return os.urandom(AES256_KEY_SIZE_BYTES) + + def _test_round_trip(self, bytes): + coldesc = ColDesc('ks1','table1','col1') + policy = AES256ColumnEncryptionPolicy() + policy.add_column(coldesc, self._random_key(), "blob") + encrypted_bytes = policy.encrypt(coldesc, bytes) + self.assertEqual(bytes, policy.decrypt(coldesc, encrypted_bytes)) + + def test_no_padding_necessary(self): + self._test_round_trip(self._random_block()) + + def test_some_padding_required(self): + for byte_size in range(1,AES256_BLOCK_SIZE_BYTES - 1): + bytes = os.urandom(byte_size) + self._test_round_trip(bytes) + for byte_size in range(AES256_BLOCK_SIZE_BYTES + 1,(2 * AES256_BLOCK_SIZE_BYTES) - 1): + bytes = os.urandom(byte_size) + self._test_round_trip(bytes) + + def test_add_column_invalid_key_size_raises(self): + coldesc = ColDesc('ks1','table1','col1') + policy = AES256ColumnEncryptionPolicy() + for key_size in range(1,AES256_KEY_SIZE_BYTES - 1): + with self.assertRaises(ValueError): + policy.add_column(coldesc, os.urandom(key_size), "blob") + for key_size in range(AES256_KEY_SIZE_BYTES + 1,(2 * AES256_KEY_SIZE_BYTES) - 1): + with self.assertRaises(ValueError): + policy.add_column(coldesc, os.urandom(key_size), "blob") + + def test_add_column_null_coldesc_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + policy.add_column(None, self._random_block(), "blob") + + def test_add_column_null_key_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, None, "blob") + + def test_add_column_null_type_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_block(), None) + + def test_add_column_unknown_type_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_block(), "foobar") + + def test_encode_and_encrypt_null_coldesc_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encode_and_encrypt(None, self._random_block()) + + def test_encode_and_encrypt_null_obj_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encode_and_encrypt(coldesc, None) + + def test_encode_and_encrypt_unknown_coldesc_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encode_and_encrypt(ColDesc('ks2','table2','col2'), self._random_block()) + + def test_contains_column(self): + coldesc = ColDesc('ks1','table1','col1') + policy = AES256ColumnEncryptionPolicy() + policy.add_column(coldesc, self._random_key(), "blob") + self.assertTrue(policy.contains_column(coldesc)) + self.assertFalse(policy.contains_column(ColDesc('ks2','table1','col1'))) + self.assertFalse(policy.contains_column(ColDesc('ks1','table2','col1'))) + self.assertFalse(policy.contains_column(ColDesc('ks1','table1','col2'))) + self.assertFalse(policy.contains_column(ColDesc('ks2','table2','col2'))) + + def test_encrypt_unknown_column(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encrypt(ColDesc('ks2','table2','col2'), self._random_block()) + + def test_decrypt_unknown_column(self): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + encrypted_bytes = policy.encrypt(coldesc, self._random_block()) + with self.assertRaises(ValueError): + policy.decrypt(ColDesc('ks2','table2','col2'), encrypted_bytes) + + def test_cache_info(self): + coldesc1 = ColDesc('ks1','table1','col1') + coldesc2 = ColDesc('ks2','table2','col2') + coldesc3 = ColDesc('ks3','table3','col3') + policy = AES256ColumnEncryptionPolicy() + for coldesc in [coldesc1, coldesc2, coldesc3]: + policy.add_column(coldesc, self._random_key(), "blob") + + # First run for this coldesc should be a miss, everything else should be a cache hit + for _ in range(10): + policy.encrypt(coldesc1, self._random_block()) + cache_info = policy.cache_info() + self.assertEqual(cache_info.hits, 9) + self.assertEqual(cache_info.misses, 1) + self.assertEqual(cache_info.maxsize, 128) + + # Important note: we're measuring the size of the cache of ciphers, NOT stored + # keys. We won't have a cipher here until we actually encrypt something + self.assertEqual(cache_info.currsize, 1) + policy.encrypt(coldesc2, self._random_block()) + self.assertEqual(policy.cache_info().currsize, 2) + policy.encrypt(coldesc3, self._random_block()) + self.assertEqual(policy.cache_info().currsize, 3) diff --git a/tox.ini b/tox.ini index 6d94e11247..4aae7a0140 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,35,36,37,38},pypy +envlist = py{37,38},pypy [base] deps = nose @@ -13,7 +13,8 @@ deps = nose kerberos futurist greenlet>=0.4.14,<0.4.17 -lz4_dependency = py27,py35,py36,py37,py38: lz4 + cryptography>=35.0 +lz4_dependency = py37,py38: lz4 [testenv] deps = {[base]deps} From ad400562efe6ffde9d6b8d4e7a4255e7751015f2 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 1 May 2023 11:01:35 -0500 Subject: [PATCH 164/211] Release 3.27: changelog & version --- CHANGELOG.rst | 8 ++++++++ cassandra/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 544585e1ce..432998869e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +3.27.0 +====== +May 1, 2023 + +Features +-------- +* Add support for client-side encryption (PYTHON-1341) + 3.26.0 ====== March 13, 2023 diff --git a/cassandra/__init__.py b/cassandra/__init__.py index e14f20c6ed..1573abdf00 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 26, 0) +__version_info__ = (3, 27, 0) __version__ = '.'.join(map(str, __version_info__)) From 5646fb0ff5e95c2eea7998f96a1679d996ca88ac Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 1 May 2023 15:08:25 -0500 Subject: [PATCH 165/211] Include docs for 3.27.0 (#1153) --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index c95a63d5a9..92d68b3013 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.27' + ref: ad40056 - name: '3.26' ref: f1e9126 - name: '3.25' From 1767fdbfefc97fa2474461c391bc387c7cf0fdc2 Mon Sep 17 00:00:00 2001 From: Jamie Gillenwater Date: Mon, 1 May 2023 16:45:09 -0400 Subject: [PATCH 166/211] update RH nav order (#1154) * update RH nav order * add line break * add api --- docs/.nav | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/.nav b/docs/.nav index 375f058817..79f3029073 100644 --- a/docs/.nav +++ b/docs/.nav @@ -3,10 +3,6 @@ getting_started execution_profiles lwt object_mapper -geo_types -graph -graph_fluent -classic_graph performance query_paging security @@ -14,5 +10,12 @@ upgrading user_defined_types dates_and_times cloud +column_encryption +geo_types +graph +classic_graph +graph_fluent +CHANGELOG faq api + From 7556a2da46aa48cbdac7ea266970e7dfc426caa6 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 1 May 2023 15:46:22 -0500 Subject: [PATCH 167/211] Update docs.yaml to point to latest 3.27.0 docs --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 92d68b3013..0529caddbe 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.27' - ref: ad40056 + ref: 1767fdb - name: '3.26' ref: f1e9126 - name: '3.25' From 78b1e2bde47fdef8ffe967fa8b06371872ba7c2a Mon Sep 17 00:00:00 2001 From: Jamie Gillenwater Date: Mon, 1 May 2023 17:04:53 -0400 Subject: [PATCH 168/211] remove future plans (#1155) --- docs/column_encryption.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/column_encryption.rst b/docs/column_encryption.rst index 4d2a6c2d91..289f9cd62b 100644 --- a/docs/column_encryption.rst +++ b/docs/column_encryption.rst @@ -87,6 +87,5 @@ when deploying client-side encryption. Note specifically that a Rust compiler i of the cryptography package, although wheels exist for many common platforms. Client-side encryption has been implemented for both the default Cython and pure Python row processing logic. -This functionality has not yet been ported to the NumPy Cython implementation. We have reason to believe the -NumPy processing works reasonably well on Python 3.7 but fails for Python 3.8. We hope to address this discrepancy -in a future release. \ No newline at end of file +This functionality has not yet been ported to the NumPy Cython implementation. During testing, +the NumPy processing works on Python 3.7 but fails for Python 3.8. \ No newline at end of file From e98c26eb959a8bdbd14a0ecdf40bc05e4f047781 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 1 May 2023 16:12:33 -0500 Subject: [PATCH 169/211] Update docs.yaml to point to latest 3.27.0 docs, take two --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 0529caddbe..d7226d5e69 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.27' - ref: 1767fdb + ref: 78b1e2b - name: '3.26' ref: f1e9126 - name: '3.25' From d911621d365baa6c637dedb70eed487a8a18f78c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 1 May 2023 21:18:34 -0500 Subject: [PATCH 170/211] Missed dependency on cryptography in setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 056469aca6..ba7cd92f20 100644 --- a/setup.py +++ b/setup.py @@ -402,7 +402,8 @@ def run_setup(extensions): sys.stderr.write("Bypassing Cython setup requirement\n") dependencies = ['six >=1.9', - 'geomet>=0.1,<0.3'] + 'geomet>=0.1,<0.3', + 'cryptography>=35.0'] _EXTRAS_REQUIRE = { 'graph': ['gremlinpython==3.4.6'] From 078e2f616176c2a666852179c9e07b10a488ca9c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 9 May 2023 09:34:28 -0500 Subject: [PATCH 171/211] Remove different build matrix selection for develop branches (#1138) --- Jenkinsfile | 68 ++++++++++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 43 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f3b5d0718b..bd04b0fa29 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -33,31 +33,35 @@ slack = new Slack() // // Smoke tests are CI-friendly test configuration. Currently-supported Python version + modern C*/DSE instances. // We also avoid cython since it's tested as part of the nightlies. +DEFAULT_CASSANDRA = ['2.1', '2.2', '3.0', '3.11', '4.0'] +DEFAULT_DSE = ['dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'] +DEFAULT_RUNTIME = ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'] +DEFAULT_CYTHON = ["True", "False"] matrices = [ "FULL": [ - "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0', 'dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'], - "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], - "CYTHON": ["True", "False"] + "SERVER": DEFAULT_CASSANDRA + DEFAULT_DSE, + "RUNTIME": DEFAULT_RUNTIME, + "CYTHON": DEFAULT_CYTHON ], "DEVELOP": [ "SERVER": ['2.1', '3.11', 'dse-6.8.30'], "RUNTIME": ['2.7.18', '3.6.10'], - "CYTHON": ["True", "False"] + "CYTHON": DEFAULT_CYTHON ], "CASSANDRA": [ - "SERVER": ['2.1', '2.2', '3.0', '3.11', '4.0'], - "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], - "CYTHON": ["True", "False"] + "SERVER": DEFAULT_CASSANDRA, + "RUNTIME": DEFAULT_RUNTIME, + "CYTHON": DEFAULT_CYTHON ], "DSE": [ - "SERVER": ['dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'], - "RUNTIME": ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], - "CYTHON": ["True", "False"] + "SERVER": DEFAULT_DSE, + "RUNTIME": DEFAULT_RUNTIME, + "CYTHON": DEFAULT_CYTHON ], "SMOKE": [ - "SERVER": ['3.11', '4.0', 'dse-6.8.30'], - "RUNTIME": ['3.7.7', '3.8.3'], - "CYTHON": ["True", "False"] + "SERVER": DEFAULT_CASSANDRA.takeRight(2) + DEFAULT_DSE.takeRight(1), + "RUNTIME": DEFAULT_RUNTIME.takeRight(2), + "CYTHON": ["False"] ] ] @@ -84,24 +88,13 @@ def getBuildContext() { Based on schedule and parameters, configure the build context and env vars. */ - def profile = "${params.PROFILE}" + def PROFILE = "${params.PROFILE}" def EVENT_LOOP = "${params.EVENT_LOOP.toLowerCase()}" - matrixType = "SMOKE" - developBranchPattern = ~"((dev|long)-)?python-.*" - if (developBranchPattern.matcher(env.BRANCH_NAME).matches()) { - matrixType = "DEVELOP" - if (env.BRANCH_NAME.contains("long")) { - profile = "FULL" - } - } + matrixType = params.MATRIX != "DEFAULT" ? params.MATRIX : "SMOKE" + matrix = matrices[matrixType].clone() // Check if parameters were set explicitly - if (params.MATRIX != "DEFAULT") { - matrixType = params.MATRIX - } - - matrix = matrices[matrixType].clone() if (params.CYTHON != "DEFAULT") { matrix["CYTHON"] = [params.CYTHON] } @@ -121,7 +114,7 @@ def getBuildContext() { context = [ vars: [ - "PROFILE=${profile}", + "PROFILE=${PROFILE}", "EVENT_LOOP=${EVENT_LOOP}" ], matrix: matrix @@ -503,22 +496,11 @@ pipeline { ''') choice( name: 'PYTHON_VERSION', - choices: ['DEFAULT', '2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], + choices: ['DEFAULT'] + DEFAULT_RUNTIME, description: 'Python runtime version. Default to the build context.') choice( name: 'SERVER_VERSION', - choices: ['DEFAULT', - '2.1', // Legacy Apache CassandraⓇ - '2.2', // Legacy Apache CassandraⓇ - '3.0', // Previous Apache CassandraⓇ - '3.11', // Current Apache CassandraⓇ - '4.0', // Development Apache CassandraⓇ - 'dse-5.0.15', // Long Term Support DataStax Enterprise - 'dse-5.1.35', // Legacy DataStax Enterprise - 'dse-6.0.18', // Previous DataStax Enterprise - 'dse-6.7.17', // Previous DataStax Enterprise - 'dse-6.8.30', // Current DataStax Enterprise - ], + choices: ['DEFAULT'] + DEFAULT_CASSANDRA + DEFAULT_DSE, description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY! @@ -549,7 +531,7 @@ pipeline { - + @@ -574,7 +556,7 @@ pipeline {
4.0Apache CassandraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)Apache CassandraⓇ v4.0.x
dse-5.0.15
''') choice( name: 'CYTHON', - choices: ['DEFAULT', 'True', 'False'], + choices: ['DEFAULT'] + DEFAULT_CYTHON, description: '''

Flag to determine if Cython should be enabled

From 9ed9894a4cd9b1c2338a512ce13658b8675b0638 Mon Sep 17 00:00:00 2001 From: Emelia <105240296+emeliawilkinson24@users.noreply.github.com> Date: Tue, 9 May 2023 10:38:02 -0400 Subject: [PATCH 172/211] DOC-2813 (#1145) Added error handling blog reference. --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 197b698aa0..e140371f07 100644 --- a/README.rst +++ b/README.rst @@ -57,6 +57,10 @@ Contributing ------------ See `CONTRIBUTING.md `_. +Error Handling +------------ +While originally written for the Java driver, users may reference the `Cassandra error handling done right blog `_ for resolving error handling scenarios with Apache Cassandra. + Reporting Problems ------------------ Please report any bugs and make any feature requests on the From 1092fbd8812415deae0845ae40f0936f326c588c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 12 May 2023 09:54:04 -0500 Subject: [PATCH 173/211] DOC-3278 Update comment for retry policy (#1158) --- cassandra/policies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index 26b9aa4c5a..36063abafe 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -873,7 +873,7 @@ def on_request_error(self, query, consistency, error, retry_num): `retry_num` counts how many times the operation has been retried, so the first time this method is called, `retry_num` will be 0. - The default, it triggers a retry on the next host in the query plan + By default, it triggers a retry on the next host in the query plan with the same consistency level. """ # TODO revisit this for the next major From 0002e9126f66363220421b85219ae5183f22dd12 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 19 May 2023 12:25:32 -0500 Subject: [PATCH 174/211] Fix for rendering of code blocks in CLE documentation (#1159) --- docs/column_encryption.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/column_encryption.rst b/docs/column_encryption.rst index 289f9cd62b..1392972fa6 100644 --- a/docs/column_encryption.rst +++ b/docs/column_encryption.rst @@ -21,6 +21,7 @@ and adding information about columns to be encrypted to it. This policy is then when it's created. .. code-block:: python + import os from cassandra.policies import ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES @@ -57,6 +58,7 @@ supplied parameters. For example, we can create a prepared statement to insert by executing the following code after creating a :class:`~.Cluster` in the manner described above: .. code-block:: python + session = cluster.connect() prepared = session.prepare("insert into ks1.table1 (column1) values (?)") session.execute(prepared, (1000,)) @@ -70,6 +72,7 @@ standard serialization methods employed by the driver. The result is then encry of the policy. Using this approach the example above could be implemented along the lines of the following: .. code-block:: python + session = cluster.connect() session.execute("insert into ks1.table1 (column1) values (%s)",(cl_policy.encode_and_encrypt(col_desc, 1000),)) @@ -88,4 +91,4 @@ of the cryptography package, although wheels exist for many common platforms. Client-side encryption has been implemented for both the default Cython and pure Python row processing logic. This functionality has not yet been ported to the NumPy Cython implementation. During testing, -the NumPy processing works on Python 3.7 but fails for Python 3.8. \ No newline at end of file +the NumPy processing works on Python 3.7 but fails for Python 3.8. From 7ba87616b8de2110f0a678d7b3d93476391b9208 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 18 May 2023 04:40:50 -0500 Subject: [PATCH 175/211] Update docs.yaml to include recent fixes to CLE docs --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index d7226d5e69..77c738b4f4 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.27' - ref: 78b1e2b + ref: 0002e912 - name: '3.26' ref: f1e9126 - name: '3.25' From 6894b028ffda01a63fad1deebe0b5300349d1611 Mon Sep 17 00:00:00 2001 From: Brad Schoening <5796692+bschoening@users.noreply.github.com> Date: Tue, 23 May 2023 17:30:35 -0400 Subject: [PATCH 176/211] remove unnecessary import __future__ (#1156) --- cassandra/cluster.py | 1 - cassandra/connection.py | 1 - cassandra/cqlengine/functions.py | 1 - cassandra/cqltypes.py | 1 - cassandra/protocol.py | 1 - cassandra/util.py | 1 - 6 files changed, 6 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 6b0d495013..13af920e87 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -16,7 +16,6 @@ This module houses the main classes you will interact with, :class:`.Cluster` and :class:`.Session`. """ -from __future__ import absolute_import import atexit from binascii import hexlify diff --git a/cassandra/connection.py b/cassandra/connection.py index 0869584663..2532342d67 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import # to enable import io from stdlib from collections import defaultdict, deque import errno from functools import wraps, partial, total_ordering diff --git a/cassandra/cqlengine/functions.py b/cassandra/cqlengine/functions.py index 5cb0f673d1..a2495c010d 100644 --- a/cassandra/cqlengine/functions.py +++ b/cassandra/cqlengine/functions.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division from datetime import datetime from cassandra.cqlengine import UnicodeMixin, ValidationError diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 7946a63af8..65a863b6b5 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -27,7 +27,6 @@ # for example), these classes would be a good place to tack on # .from_cql_literal() and .as_cql_literal() classmethods (or whatever). -from __future__ import absolute_import # to enable import io from stdlib import ast from binascii import unhexlify import calendar diff --git a/cassandra/protocol.py b/cassandra/protocol.py index 5e3610811e..39c91a0e5b 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import # to enable import io from stdlib from collections import namedtuple import logging import socket diff --git a/cassandra/util.py b/cassandra/util.py index dd5c58b01d..60490e5460 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import with_statement import calendar import datetime from functools import total_ordering From 64aa5ba16bb2d86cf70925112b4c458434981ea1 Mon Sep 17 00:00:00 2001 From: Lukas Elmer Date: Tue, 23 May 2023 23:34:41 +0200 Subject: [PATCH 177/211] docs: convert print statement to function in docs (#1157) --- cassandra/cluster.py | 4 ++-- cassandra/cqlengine/query.py | 12 ++++++------ cassandra/datastax/graph/fluent/__init__.py | 2 +- cassandra/query.py | 10 +++++----- docs/api/cassandra/cqlengine/models.rst | 4 ++-- docs/cqlengine/connections.rst | 2 +- docs/cqlengine/models.rst | 2 +- docs/execution_profiles.rst | 14 +++++++------- docs/faq.rst | 4 ++-- docs/getting_started.rst | 12 ++++++------ docs/graph_fluent.rst | 10 +++++----- docs/object_mapper.rst | 4 ++-- 12 files changed, 40 insertions(+), 40 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 13af920e87..d55547d559 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -989,7 +989,7 @@ def default_retry_policy(self, policy): cloud = None """ A dict of the cloud configuration. Example:: - + { # path to the secure connect bundle 'secure_connect_bundle': '/path/to/secure-connect-dbname.zip', @@ -1450,7 +1450,7 @@ def __init__(self, street, zipcode): # results will include Address instances results = session.execute("SELECT * FROM users") row = results[0] - print row.id, row.location.street, row.location.zipcode + print(row.id, row.location.street, row.location.zipcode) """ if self.protocol_version < 3: diff --git a/cassandra/cqlengine/query.py b/cassandra/cqlengine/query.py index 11f664ec02..1978d319f4 100644 --- a/cassandra/cqlengine/query.py +++ b/cassandra/cqlengine/query.py @@ -286,15 +286,15 @@ class ContextQuery(object): with ContextQuery(Automobile, keyspace='test2') as A: A.objects.create(manufacturer='honda', year=2008, model='civic') - print len(A.objects.all()) # 1 result + print(len(A.objects.all())) # 1 result with ContextQuery(Automobile, keyspace='test4') as A: - print len(A.objects.all()) # 0 result + print(len(A.objects.all())) # 0 result # Multiple models with ContextQuery(Automobile, Automobile2, connection='cluster2') as (A, A2): - print len(A.objects.all()) - print len(A2.objects.all()) + print(len(A.objects.all())) + print(len(A2.objects.all())) """ @@ -809,11 +809,11 @@ class Comment(Model): print("Normal") for comment in Comment.objects(photo_id=u): - print comment.comment_id + print(comment.comment_id) print("Reversed") for comment in Comment.objects(photo_id=u).order_by("-comment_id"): - print comment.comment_id + print(comment.comment_id) """ if len(colnames) == 0: clone = copy.deepcopy(self) diff --git a/cassandra/datastax/graph/fluent/__init__.py b/cassandra/datastax/graph/fluent/__init__.py index 44a0d136e0..92f148721e 100644 --- a/cassandra/datastax/graph/fluent/__init__.py +++ b/cassandra/datastax/graph/fluent/__init__.py @@ -257,7 +257,7 @@ def traversal_source(session=None, graph_name=None, execution_profile=EXEC_PROFI session = c.connect() g = DseGraph.traversal_source(session, 'my_graph') - print g.V().valueMap().toList() + print(g.V().valueMap().toList()) """ diff --git a/cassandra/query.py b/cassandra/query.py index dc72d7cbcd..7e4efc2511 100644 --- a/cassandra/query.py +++ b/cassandra/query.py @@ -77,7 +77,7 @@ def tuple_factory(colnames, rows): >>> session = cluster.connect('mykeyspace') >>> session.row_factory = tuple_factory >>> rows = session.execute("SELECT name, age FROM users LIMIT 1") - >>> print rows[0] + >>> print(rows[0]) ('Bob', 42) .. versionchanged:: 2.0.0 @@ -133,16 +133,16 @@ def named_tuple_factory(colnames, rows): >>> user = rows[0] >>> # you can access field by their name: - >>> print "name: %s, age: %d" % (user.name, user.age) + >>> print("name: %s, age: %d" % (user.name, user.age)) name: Bob, age: 42 >>> # or you can access fields by their position (like a tuple) >>> name, age = user - >>> print "name: %s, age: %d" % (name, age) + >>> print("name: %s, age: %d" % (name, age)) name: Bob, age: 42 >>> name = user[0] >>> age = user[1] - >>> print "name: %s, age: %d" % (name, age) + >>> print("name: %s, age: %d" % (name, age)) name: Bob, age: 42 .. versionchanged:: 2.0.0 @@ -188,7 +188,7 @@ def dict_factory(colnames, rows): >>> session = cluster.connect('mykeyspace') >>> session.row_factory = dict_factory >>> rows = session.execute("SELECT name, age FROM users LIMIT 1") - >>> print rows[0] + >>> print(rows[0]) {u'age': 42, u'name': u'Bob'} .. versionchanged:: 2.0.0 diff --git a/docs/api/cassandra/cqlengine/models.rst b/docs/api/cassandra/cqlengine/models.rst index 60b1471184..ee689a2b48 100644 --- a/docs/api/cassandra/cqlengine/models.rst +++ b/docs/api/cassandra/cqlengine/models.rst @@ -103,7 +103,7 @@ Model TestIfNotExistsModel.if_not_exists().create(id=id, count=9, text='111111111111') except LWTException as e: # handle failure case - print e.existing # dict containing LWT result fields + print(e.existing # dict containing LWT result fields) This method is supported on Cassandra 2.0 or later. @@ -144,7 +144,7 @@ Model t.iff(count=5).update('other text') except LWTException as e: # handle failure case - print e.existing # existing object + print(e.existing # existing object) .. automethod:: get diff --git a/docs/cqlengine/connections.rst b/docs/cqlengine/connections.rst index 03ade27521..fd44303514 100644 --- a/docs/cqlengine/connections.rst +++ b/docs/cqlengine/connections.rst @@ -99,7 +99,7 @@ You can specify a default connection per model: year = columns.Integer(primary_key=True) model = columns.Text(primary_key=True) - print len(Automobile.objects.all()) # executed on the connection 'cluster2' + print(len(Automobile.objects.all())) # executed on the connection 'cluster2' QuerySet and model instance --------------------------- diff --git a/docs/cqlengine/models.rst b/docs/cqlengine/models.rst index c0ba390119..719513f4a9 100644 --- a/docs/cqlengine/models.rst +++ b/docs/cqlengine/models.rst @@ -201,7 +201,7 @@ are only created, presisted, and queried via table Models. A short example to in users.create(name="Joe", addr=address(street="Easy St.", zipcode=99999)) user = users.objects(name="Joe")[0] - print user.name, user.addr + print(user.name, user.addr) # Joe address(street=u'Easy St.', zipcode=99999) UDTs are modeled by inheriting :class:`~.usertype.UserType`, and setting column type attributes. Types are then used in defining diff --git a/docs/execution_profiles.rst b/docs/execution_profiles.rst index 7be1a85e3f..0965d77f3d 100644 --- a/docs/execution_profiles.rst +++ b/docs/execution_profiles.rst @@ -43,7 +43,7 @@ Default session = cluster.connect() local_query = 'SELECT rpc_address FROM system.local' for _ in cluster.metadata.all_hosts(): - print session.execute(local_query)[0] + print(session.execute(local_query)[0]) .. parsed-literal:: @@ -69,7 +69,7 @@ Initializing cluster with profiles profiles = {'node1': node1_profile, 'node2': node2_profile} session = Cluster(execution_profiles=profiles).connect() for _ in cluster.metadata.all_hosts(): - print session.execute(local_query, execution_profile='node1')[0] + print(session.execute(local_query, execution_profile='node1')[0]) .. parsed-literal:: @@ -81,7 +81,7 @@ Initializing cluster with profiles .. code:: python for _ in cluster.metadata.all_hosts(): - print session.execute(local_query, execution_profile='node2')[0] + print(session.execute(local_query, execution_profile='node2')[0]) .. parsed-literal:: @@ -93,7 +93,7 @@ Initializing cluster with profiles .. code:: python for _ in cluster.metadata.all_hosts(): - print session.execute(local_query)[0] + print(session.execute(local_query)[0]) .. parsed-literal:: @@ -123,7 +123,7 @@ New profiles can be added constructing from scratch, or deriving from default: cluster.add_execution_profile(node1_profile, locked_execution) for _ in cluster.metadata.all_hosts(): - print session.execute(local_query, execution_profile=node1_profile)[0] + print(session.execute(local_query, execution_profile=node1_profile)[0]) .. parsed-literal:: @@ -144,8 +144,8 @@ We also have the ability to pass profile instances to be used for execution, but tmp = session.execution_profile_clone_update('node1', request_timeout=100, row_factory=tuple_factory) - print session.execute(local_query, execution_profile=tmp)[0] - print session.execute(local_query, execution_profile='node1')[0] + print(session.execute(local_query, execution_profile=tmp)[0]) + print(session.execute(local_query, execution_profile='node1')[0]) .. parsed-literal:: diff --git a/docs/faq.rst b/docs/faq.rst index 56cb648a24..194d5520e8 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -44,7 +44,7 @@ Since tracing is done asynchronously to the request, this method polls until the >>> result = future.result() >>> trace = future.get_query_trace() >>> for e in trace.events: - >>> print e.source_elapsed, e.description + >>> print(e.source_elapsed, e.description) 0:00:00.000077 Parsing select * from system.local 0:00:00.000153 Preparing statement @@ -67,7 +67,7 @@ With prepared statements, the replicas are obtained by ``routing_key``, based on >>> bound = prepared.bind((1,)) >>> replicas = cluster.metadata.get_replicas(bound.keyspace, bound.routing_key) >>> for h in replicas: - >>> print h.address + >>> print(h.address) 127.0.0.1 127.0.0.2 diff --git a/docs/getting_started.rst b/docs/getting_started.rst index ce31ca5d6f..432e42ec4f 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -142,7 +142,7 @@ way to execute a query is to use :meth:`~.Session.execute()`: rows = session.execute('SELECT name, age, email FROM users') for user_row in rows: - print user_row.name, user_row.age, user_row.email + print(user_row.name, user_row.age, user_row.email) This will transparently pick a Cassandra node to execute the query against and handle any retries that are necessary if the operation fails. @@ -158,19 +158,19 @@ examples are equivalent: rows = session.execute('SELECT name, age, email FROM users') for row in rows: - print row.name, row.age, row.email + print(row.name, row.age, row.email) .. code-block:: python rows = session.execute('SELECT name, age, email FROM users') for (name, age, email) in rows: - print name, age, email + print(name, age, email) .. code-block:: python rows = session.execute('SELECT name, age, email FROM users') for row in rows: - print row[0], row[1], row[2] + print(row[0], row[1], row[2]) If you prefer another result format, such as a ``dict`` per row, you can change the :attr:`~.Session.row_factory` attribute. @@ -358,7 +358,7 @@ For example: try: rows = future.result() user = rows[0] - print user.name, user.age + print(user.name, user.age) except ReadTimeout: log.exception("Query timed out:") @@ -375,7 +375,7 @@ This works well for executing many queries concurrently: # wait for them to complete and use the results for future in futures: rows = future.result() - print rows[0].name + print(rows[0].name) Alternatively, instead of calling :meth:`~.ResponseFuture.result()`, you can attach callback and errback functions through the diff --git a/docs/graph_fluent.rst b/docs/graph_fluent.rst index 03cf8d36c0..8d5ad5377d 100644 --- a/docs/graph_fluent.rst +++ b/docs/graph_fluent.rst @@ -83,7 +83,7 @@ to accomplish this configuration: session = cluster.connect() g = DseGraph.traversal_source(session) # Build the GraphTraversalSource - print g.V().toList() # Traverse the Graph + print(g.V().toList()) # Traverse the Graph Note that the execution profile created with :meth:`DseGraph.create_execution_profile <.datastax.graph.fluent.DseGraph.create_execution_profile>` cannot be used for any groovy string queries. @@ -231,11 +231,11 @@ Batch Queries DSE Graph supports batch queries using a :class:`TraversalBatch <.datastax.graph.fluent.query.TraversalBatch>` object instantiated with :meth:`DseGraph.batch <.datastax.graph.fluent.DseGraph.batch>`. A :class:`TraversalBatch <.datastax.graph.fluent.query.TraversalBatch>` allows -you to execute multiple graph traversals in a single atomic transaction. A -traversal batch is executed with :meth:`.Session.execute_graph` or using -:meth:`TraversalBatch.execute <.datastax.graph.fluent.query.TraversalBatch.execute>` if bounded to a DSE session. +you to execute multiple graph traversals in a single atomic transaction. A +traversal batch is executed with :meth:`.Session.execute_graph` or using +:meth:`TraversalBatch.execute <.datastax.graph.fluent.query.TraversalBatch.execute>` if bounded to a DSE session. -Either way you choose to execute the traversal batch, you need to configure +Either way you choose to execute the traversal batch, you need to configure the execution profile accordingly. Here is a example:: from cassandra.cluster import Cluster diff --git a/docs/object_mapper.rst b/docs/object_mapper.rst index 50d3cbf320..4366c0ad52 100644 --- a/docs/object_mapper.rst +++ b/docs/object_mapper.rst @@ -87,7 +87,7 @@ Getting Started >>> q.count() 4 >>> for instance in q: - >>> print instance.description + >>> print(instance.description) example5 example6 example7 @@ -101,5 +101,5 @@ Getting Started >>> q2.count() 1 >>> for instance in q2: - >>> print instance.description + >>> print(instance.description) example5 From f46581ebc7c98f80beb297522df4e2a07a67383a Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 24 May 2023 12:06:18 -0500 Subject: [PATCH 178/211] Revert "remove unnecessary import __future__ (#1156)" This reverts commit 6894b028ffda01a63fad1deebe0b5300349d1611. --- cassandra/cluster.py | 1 + cassandra/connection.py | 1 + cassandra/cqlengine/functions.py | 1 + cassandra/cqltypes.py | 1 + cassandra/protocol.py | 1 + cassandra/util.py | 1 + 6 files changed, 6 insertions(+) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index d55547d559..12b00c42db 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -16,6 +16,7 @@ This module houses the main classes you will interact with, :class:`.Cluster` and :class:`.Session`. """ +from __future__ import absolute_import import atexit from binascii import hexlify diff --git a/cassandra/connection.py b/cassandra/connection.py index 2532342d67..0869584663 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import # to enable import io from stdlib from collections import defaultdict, deque import errno from functools import wraps, partial, total_ordering diff --git a/cassandra/cqlengine/functions.py b/cassandra/cqlengine/functions.py index a2495c010d..5cb0f673d1 100644 --- a/cassandra/cqlengine/functions.py +++ b/cassandra/cqlengine/functions.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import division from datetime import datetime from cassandra.cqlengine import UnicodeMixin, ValidationError diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 65a863b6b5..7946a63af8 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -27,6 +27,7 @@ # for example), these classes would be a good place to tack on # .from_cql_literal() and .as_cql_literal() classmethods (or whatever). +from __future__ import absolute_import # to enable import io from stdlib import ast from binascii import unhexlify import calendar diff --git a/cassandra/protocol.py b/cassandra/protocol.py index 39c91a0e5b..5e3610811e 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import # to enable import io from stdlib from collections import namedtuple import logging import socket diff --git a/cassandra/util.py b/cassandra/util.py index 60490e5460..dd5c58b01d 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import with_statement import calendar import datetime from functools import total_ordering From d8431d4bf1b19742ca6552a0ddd006038d497e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Mathieu?= Date: Wed, 24 May 2023 22:43:06 +0200 Subject: [PATCH 179/211] Don't fail when inserting UDTs with prepared queries with some missing fields (#1151) --- cassandra/cqltypes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 7946a63af8..8167b3b894 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -1026,7 +1026,9 @@ def serialize_safe(cls, val, protocol_version): try: item = val[i] except TypeError: - item = getattr(val, fieldname) + item = getattr(val, fieldname, None) + if item is None and not hasattr(val, fieldname): + log.warning(f"field {fieldname} is part of the UDT {cls.typename} but is not present in the value {val}") if item is not None: packed_item = subtype.to_binary(item, proto_version) From a1e2d3d16ce9baf4eb4055404e1f4129495cff57 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 24 May 2023 15:48:05 -0500 Subject: [PATCH 180/211] PYTHON-1343 Use Cython for smoke builds (#1162) --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index bd04b0fa29..4d872f6927 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -61,7 +61,7 @@ matrices = [ "SMOKE": [ "SERVER": DEFAULT_CASSANDRA.takeRight(2) + DEFAULT_DSE.takeRight(1), "RUNTIME": DEFAULT_RUNTIME.takeRight(2), - "CYTHON": ["False"] + "CYTHON": ["True"] ] ] From 863e690ae256a4b774590ff77180af02b4fe02ba Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 31 May 2023 12:56:14 -0500 Subject: [PATCH 181/211] Jenkinsfile cleanup (#1163) --- Jenkinsfile | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4d872f6927..e6c2d9700f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,9 +13,10 @@ Test Profiles: Matrix Types: Full: All server versions, python runtimes tested with and without Cython. - Develop: Smaller matrix for dev purpose. Cassandra: All cassandra server versions. Dse: All dse server versions. + Smoke: CI-friendly configurations. Currently-supported Python version + modern Cassandra/DSE instances. + We also avoid cython since it's tested as part of the nightlies Parameters: @@ -29,13 +30,9 @@ import com.datastax.jenkins.drivers.python.Slack slack = new Slack() -// Define our predefined matrices -// -// Smoke tests are CI-friendly test configuration. Currently-supported Python version + modern C*/DSE instances. -// We also avoid cython since it's tested as part of the nightlies. DEFAULT_CASSANDRA = ['2.1', '2.2', '3.0', '3.11', '4.0'] DEFAULT_DSE = ['dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'] -DEFAULT_RUNTIME = ['2.7.18', '3.5.9', '3.6.10', '3.7.7', '3.8.3'] +DEFAULT_RUNTIME = ['3.7.7', '3.8.3'] DEFAULT_CYTHON = ["True", "False"] matrices = [ "FULL": [ @@ -43,11 +40,6 @@ matrices = [ "RUNTIME": DEFAULT_RUNTIME, "CYTHON": DEFAULT_CYTHON ], - "DEVELOP": [ - "SERVER": ['2.1', '3.11', 'dse-6.8.30'], - "RUNTIME": ['2.7.18', '3.6.10'], - "CYTHON": DEFAULT_CYTHON - ], "CASSANDRA": [ "SERVER": DEFAULT_CASSANDRA, "RUNTIME": DEFAULT_RUNTIME, @@ -394,8 +386,9 @@ def describeBuild(buildContext) { } } -def scheduleTriggerJobName() { - "drivers/python/oss/master/disabled" +// branch pattern for cron +def branchPatternCron() { + ~"(master)" } pipeline { @@ -460,7 +453,7 @@ pipeline {
''') choice( name: 'MATRIX', - choices: ['DEFAULT', 'SMOKE', 'FULL', 'DEVELOP', 'CASSANDRA', 'DSE'], + choices: ['DEFAULT', 'SMOKE', 'FULL', 'CASSANDRA', 'DSE'], description: '''

The matrix for the build.

@@ -481,10 +474,6 @@ pipeline { - - - - @@ -629,10 +618,10 @@ pipeline { } triggers { - parameterizedCron((scheduleTriggerJobName() == env.JOB_NAME) ? """ + parameterizedCron(branchPatternCron().matcher(env.BRANCH_NAME).matches() ? """ # Every weeknight (Monday - Friday) around 4:00 AM - # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18 3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1.35 dse-6.0.18 dse-6.7.17 + # These schedules will run with and without Cython enabled for Python 3.7.7 and 3.8.3 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.7 3.8.3;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1.35 dse-6.0.18 dse-6.7.17 """ : "") } From 5ea3f4b2b6258118312bec6c43f10729165ed7d5 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 1 Jun 2023 15:34:15 -0500 Subject: [PATCH 182/211] PYTHON-1351 Convert cryptography to an optional dependency (#1164) --- cassandra/column_encryption/_policies.py | 126 +++++++++++++++ cassandra/column_encryption/policies.py | 20 +++ cassandra/policies.py | 108 +------------ docs/column_encryption.rst | 3 +- docs/installation.rst | 27 +++- requirements.txt | 1 - setup.py | 9 +- test-datastax-requirements.txt | 1 + .../column_encryption/test_policies.py | 94 +++++++++++ tests/integration/standard/test_policies.py | 75 +-------- tests/unit/column_encryption/test_policies.py | 149 ++++++++++++++++++ tests/unit/test_policies.py | 135 +--------------- 12 files changed, 424 insertions(+), 324 deletions(-) create mode 100644 cassandra/column_encryption/_policies.py create mode 100644 cassandra/column_encryption/policies.py create mode 100644 tests/integration/standard/column_encryption/test_policies.py create mode 100644 tests/unit/column_encryption/test_policies.py diff --git a/cassandra/column_encryption/_policies.py b/cassandra/column_encryption/_policies.py new file mode 100644 index 0000000000..e049ba2d22 --- /dev/null +++ b/cassandra/column_encryption/_policies.py @@ -0,0 +1,126 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import namedtuple +from functools import lru_cache + +import logging +import os + +log = logging.getLogger(__name__) + +from cassandra.cqltypes import _cqltypes +from cassandra.policies import ColumnEncryptionPolicy + +from cryptography.hazmat.primitives import padding +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + +AES256_BLOCK_SIZE = 128 +AES256_BLOCK_SIZE_BYTES = int(AES256_BLOCK_SIZE / 8) +AES256_KEY_SIZE = 256 +AES256_KEY_SIZE_BYTES = int(AES256_KEY_SIZE / 8) + +ColData = namedtuple('ColData', ['key','type']) + +class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): + + # CBC uses an IV that's the same size as the block size + # + # TODO: Need to find some way to expose mode options + # (CBC etc.) without leaking classes from the underlying + # impl here + def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)): + + self.mode = mode + self.iv = iv + + # ColData for a given ColDesc is always preserved. We only create a Cipher + # when there's an actual need to for a given ColDesc + self.coldata = {} + self.ciphers = {} + + def encrypt(self, coldesc, obj_bytes): + + # AES256 has a 128-bit block size so if the input bytes don't align perfectly on + # those blocks we have to pad them. There's plenty of room for optimization here: + # + # * Instances of the PKCS7 padder should be managed in a bounded pool + # * It would be nice if we could get a flag from encrypted data to indicate + # whether it was padded or not + # * Might be able to make this happen with a leading block of flags in encrypted data + padder = padding.PKCS7(AES256_BLOCK_SIZE).padder() + padded_bytes = padder.update(obj_bytes) + padder.finalize() + + cipher = self._get_cipher(coldesc) + encryptor = cipher.encryptor() + return encryptor.update(padded_bytes) + encryptor.finalize() + + def decrypt(self, coldesc, encrypted_bytes): + + cipher = self._get_cipher(coldesc) + decryptor = cipher.decryptor() + padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize() + + unpadder = padding.PKCS7(AES256_BLOCK_SIZE).unpadder() + return unpadder.update(padded_bytes) + unpadder.finalize() + + def add_column(self, coldesc, key, type): + + if not coldesc: + raise ValueError("ColDesc supplied to add_column cannot be None") + if not key: + raise ValueError("Key supplied to add_column cannot be None") + if not type: + raise ValueError("Type supplied to add_column cannot be None") + if type not in _cqltypes.keys(): + raise ValueError("Type %s is not a supported type".format(type)) + if not len(key) == AES256_KEY_SIZE_BYTES: + raise ValueError("AES256 column encryption policy expects a 256-bit encryption key") + self.coldata[coldesc] = ColData(key, _cqltypes[type]) + + def contains_column(self, coldesc): + return coldesc in self.coldata + + def encode_and_encrypt(self, coldesc, obj): + if not coldesc: + raise ValueError("ColDesc supplied to encode_and_encrypt cannot be None") + if not obj: + raise ValueError("Object supplied to encode_and_encrypt cannot be None") + coldata = self.coldata.get(coldesc) + if not coldata: + raise ValueError("Could not find ColData for ColDesc %s".format(coldesc)) + return self.encrypt(coldesc, coldata.type.serialize(obj, None)) + + def cache_info(self): + return AES256ColumnEncryptionPolicy._build_cipher.cache_info() + + def column_type(self, coldesc): + return self.coldata[coldesc].type + + def _get_cipher(self, coldesc): + """ + Access relevant state from this instance necessary to create a Cipher and then get one, + hopefully returning a cached instance if we've already done so (and it hasn't been evicted) + """ + + try: + coldata = self.coldata[coldesc] + return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv) + except KeyError: + raise ValueError("Could not find column {}".format(coldesc)) + + # Explicitly use a class method here to avoid caching self + @lru_cache(maxsize=128) + def _build_cipher(key, mode, iv): + return Cipher(algorithms.AES256(key), mode(iv)) diff --git a/cassandra/column_encryption/policies.py b/cassandra/column_encryption/policies.py new file mode 100644 index 0000000000..770084bd48 --- /dev/null +++ b/cassandra/column_encryption/policies.py @@ -0,0 +1,20 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import cryptography + from cassandra.column_encryption._policies import * +except ImportError: + # Cryptography is not installed + pass diff --git a/cassandra/policies.py b/cassandra/policies.py index 36063abafe..c60e558465 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -16,30 +16,22 @@ from functools import lru_cache from itertools import islice, cycle, groupby, repeat import logging -import os from random import randint, shuffle from threading import Lock import socket import warnings -from cryptography.hazmat.primitives import padding -from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +log = logging.getLogger(__name__) from cassandra import WriteType as WT -from cassandra.cqltypes import _cqltypes - # This is done this way because WriteType was originally # defined here and in order not to break the API. # It may removed in the next mayor. WriteType = WT - from cassandra import ConsistencyLevel, OperationTimedOut -log = logging.getLogger(__name__) - - class HostDistance(object): """ A measure of how "distant" a node is from the client, which @@ -1192,7 +1184,6 @@ def _rethrow(self, *args, **kwargs): ColDesc = namedtuple('ColDesc', ['ks', 'table', 'col']) -ColData = namedtuple('ColData', ['key','type']) class ColumnEncryptionPolicy(object): """ @@ -1249,100 +1240,3 @@ def encode_and_encrypt(self, coldesc, obj): statements. """ raise NotImplementedError() - -AES256_BLOCK_SIZE = 128 -AES256_BLOCK_SIZE_BYTES = int(AES256_BLOCK_SIZE / 8) -AES256_KEY_SIZE = 256 -AES256_KEY_SIZE_BYTES = int(AES256_KEY_SIZE / 8) - -class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): - - # CBC uses an IV that's the same size as the block size - # - # TODO: Need to find some way to expose mode options - # (CBC etc.) without leaking classes from the underlying - # impl here - def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)): - - self.mode = mode - self.iv = iv - - # ColData for a given ColDesc is always preserved. We only create a Cipher - # when there's an actual need to for a given ColDesc - self.coldata = {} - self.ciphers = {} - - def encrypt(self, coldesc, obj_bytes): - - # AES256 has a 128-bit block size so if the input bytes don't align perfectly on - # those blocks we have to pad them. There's plenty of room for optimization here: - # - # * Instances of the PKCS7 padder should be managed in a bounded pool - # * It would be nice if we could get a flag from encrypted data to indicate - # whether it was padded or not - # * Might be able to make this happen with a leading block of flags in encrypted data - padder = padding.PKCS7(AES256_BLOCK_SIZE).padder() - padded_bytes = padder.update(obj_bytes) + padder.finalize() - - cipher = self._get_cipher(coldesc) - encryptor = cipher.encryptor() - return encryptor.update(padded_bytes) + encryptor.finalize() - - def decrypt(self, coldesc, encrypted_bytes): - - cipher = self._get_cipher(coldesc) - decryptor = cipher.decryptor() - padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize() - - unpadder = padding.PKCS7(AES256_BLOCK_SIZE).unpadder() - return unpadder.update(padded_bytes) + unpadder.finalize() - - def add_column(self, coldesc, key, type): - - if not coldesc: - raise ValueError("ColDesc supplied to add_column cannot be None") - if not key: - raise ValueError("Key supplied to add_column cannot be None") - if not type: - raise ValueError("Type supplied to add_column cannot be None") - if type not in _cqltypes.keys(): - raise ValueError("Type %s is not a supported type".format(type)) - if not len(key) == AES256_KEY_SIZE_BYTES: - raise ValueError("AES256 column encryption policy expects a 256-bit encryption key") - self.coldata[coldesc] = ColData(key, _cqltypes[type]) - - def contains_column(self, coldesc): - return coldesc in self.coldata - - def encode_and_encrypt(self, coldesc, obj): - if not coldesc: - raise ValueError("ColDesc supplied to encode_and_encrypt cannot be None") - if not obj: - raise ValueError("Object supplied to encode_and_encrypt cannot be None") - coldata = self.coldata.get(coldesc) - if not coldata: - raise ValueError("Could not find ColData for ColDesc %s".format(coldesc)) - return self.encrypt(coldesc, coldata.type.serialize(obj, None)) - - def cache_info(self): - return AES256ColumnEncryptionPolicy._build_cipher.cache_info() - - def column_type(self, coldesc): - return self.coldata[coldesc].type - - def _get_cipher(self, coldesc): - """ - Access relevant state from this instance necessary to create a Cipher and then get one, - hopefully returning a cached instance if we've already done so (and it hasn't been evicted) - """ - - try: - coldata = self.coldata[coldesc] - return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv) - except KeyError: - raise ValueError("Could not find column {}".format(coldesc)) - - # Explicitly use a class method here to avoid caching self - @lru_cache(maxsize=128) - def _build_cipher(key, mode, iv): - return Cipher(algorithms.AES256(key), mode(iv)) diff --git a/docs/column_encryption.rst b/docs/column_encryption.rst index 1392972fa6..5cfb736c1f 100644 --- a/docs/column_encryption.rst +++ b/docs/column_encryption.rst @@ -24,7 +24,8 @@ when it's created. import os - from cassandra.policies import ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES + from cassandra.policies import ColDesc + from cassandra.column_encryption.policies import AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES key = os.urandom(AES256_KEY_SIZE_BYTES) cl_policy = AES256ColumnEncryptionPolicy() diff --git a/docs/installation.rst b/docs/installation.rst index 3855383602..bea6d6ece5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -3,7 +3,7 @@ Installation Supported Platforms ------------------- -Python 2.7, 3.5, 3.6, 3.7 and 3.8 are supported. Both CPython (the standard Python +Python 3.7 and 3.8 are supported. Both CPython (the standard Python implementation) and `PyPy `_ are supported and tested. Linux, OSX, and Windows are supported. @@ -26,7 +26,7 @@ To check if the installation was successful, you can run:: python -c 'import cassandra; print cassandra.__version__' -It should print something like "3.22.0". +It should print something like "3.27.0". .. _installation-datastax-graph: @@ -34,7 +34,7 @@ It should print something like "3.22.0". --------------------------- The driver provides an optional fluent graph API that depends on Apache TinkerPop (gremlinpython). It is not installed by default. To be able to build Gremlin traversals, you need to install -the `graph` requirements:: +the `graph` extra:: pip install cassandra-driver[graph] @@ -67,6 +67,27 @@ support this:: pip install scales +*Optional:* Column-Level Encryption (CLE) Support +-------------------------------------------------- +The driver has built-in support for client-side encryption and +decryption of data. For more, see :doc:`column_encryption`. + +CLE depends on the Python `cryptography `_ module. +When installing Python driver 3.27.0. the `cryptography` module is +also downloaded and installed. +If you are using Python driver 3.28.0 or later and want to use CLE, you must +install the `cryptography `_ module. + +You can install this module along with the driver by specifying the `cle` extra:: + + pip install cassandra-driver[cle] + +Alternatively, you can also install the module directly via `pip`:: + + pip install cryptography + +Any version of cryptography >= 35.0 will work for the CLE feature. You can find additional +details at `PYTHON-1351 `_ Speeding Up Installation ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/requirements.txt b/requirements.txt index 44356365ce..f784fba1b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -cryptography >= 35.0 geomet>=0.1,<0.3 six >=1.9 futures <=2.2.0 diff --git a/setup.py b/setup.py index ba7cd92f20..30ce602c3e 100644 --- a/setup.py +++ b/setup.py @@ -402,11 +402,11 @@ def run_setup(extensions): sys.stderr.write("Bypassing Cython setup requirement\n") dependencies = ['six >=1.9', - 'geomet>=0.1,<0.3', - 'cryptography>=35.0'] + 'geomet>=0.1,<0.3'] _EXTRAS_REQUIRE = { - 'graph': ['gremlinpython==3.4.6'] + 'graph': ['gremlinpython==3.4.6'], + 'cle': ['cryptography>=35.0'] } setup( @@ -424,7 +424,8 @@ def run_setup(extensions): packages=[ 'cassandra', 'cassandra.io', 'cassandra.cqlengine', 'cassandra.graph', 'cassandra.datastax', 'cassandra.datastax.insights', 'cassandra.datastax.graph', - 'cassandra.datastax.graph.fluent', 'cassandra.datastax.cloud' + 'cassandra.datastax.graph.fluent', 'cassandra.datastax.cloud', + "cassandra.column_encryption" ], keywords='cassandra,cql,orm,dse,graph', include_package_data=True, diff --git a/test-datastax-requirements.txt b/test-datastax-requirements.txt index 3a47b8de16..038a8b571d 100644 --- a/test-datastax-requirements.txt +++ b/test-datastax-requirements.txt @@ -1,3 +1,4 @@ -r test-requirements.txt kerberos gremlinpython==3.4.6 +cryptography >= 35.0 diff --git a/tests/integration/standard/column_encryption/test_policies.py b/tests/integration/standard/column_encryption/test_policies.py new file mode 100644 index 0000000000..87bfde3c31 --- /dev/null +++ b/tests/integration/standard/column_encryption/test_policies.py @@ -0,0 +1,94 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest + +from tests.integration import use_singledc, TestCluster + +from cassandra.policies import ColDesc + +from cassandra.column_encryption.policies import AES256ColumnEncryptionPolicy, \ + AES256_KEY_SIZE_BYTES + +def setup_module(): + use_singledc() + +class ColumnEncryptionPolicyTest(unittest.TestCase): + + def _recreate_keyspace(self, session): + session.execute("drop keyspace if exists foo") + session.execute("CREATE KEYSPACE foo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}") + session.execute("CREATE TABLE foo.bar(encrypted blob, unencrypted int, primary key(unencrypted))") + + def test_end_to_end_prepared(self): + + # We only currently perform testing on a single type/expected value pair since CLE functionality is essentially + # independent of the underlying type. We intercept data after it's been encoded when it's going out and before it's + # encoded when coming back; the actual types of the data involved don't impact us. + expected = 12345 + expected_type = "int" + + key = os.urandom(AES256_KEY_SIZE_BYTES) + cl_policy = AES256ColumnEncryptionPolicy() + col_desc = ColDesc('foo','bar','encrypted') + cl_policy.add_column(col_desc, key, expected_type) + + cluster = TestCluster(column_encryption_policy=cl_policy) + session = cluster.connect() + self._recreate_keyspace(session) + + prepared = session.prepare("insert into foo.bar (encrypted, unencrypted) values (?,?)") + session.execute(prepared, (expected,expected)) + + # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted + # values here to confirm that we don't interfere with regular processing of unencrypted vals. + (encrypted,unencrypted) = session.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) + + # Confirm the same behaviour from a subsequent prepared statement as well + prepared = session.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") + (encrypted,unencrypted) = session.execute(prepared, [expected]).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) + + def test_end_to_end_simple(self): + + expected = 67890 + expected_type = "int" + + key = os.urandom(AES256_KEY_SIZE_BYTES) + cl_policy = AES256ColumnEncryptionPolicy() + col_desc = ColDesc('foo','bar','encrypted') + cl_policy.add_column(col_desc, key, expected_type) + + cluster = TestCluster(column_encryption_policy=cl_policy) + session = cluster.connect() + self._recreate_keyspace(session) + + # Use encode_and_encrypt helper function to populate date + session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)",(cl_policy.encode_and_encrypt(col_desc, expected), expected)) + + # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted + # values here to confirm that we don't interfere with regular processing of unencrypted vals. + (encrypted,unencrypted) = session.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) + + # Confirm the same behaviour from a subsequent prepared statement as well + prepared = session.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") + (encrypted,unencrypted) = session.execute(prepared, [expected]).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) diff --git a/tests/integration/standard/test_policies.py b/tests/integration/standard/test_policies.py index 8f46306236..696bc46856 100644 --- a/tests/integration/standard/test_policies.py +++ b/tests/integration/standard/test_policies.py @@ -12,14 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from decimal import Decimal -import os -import random import unittest from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, SimpleConvictionPolicy, \ - WhiteListRoundRobinPolicy, ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES + WhiteListRoundRobinPolicy from cassandra.pool import Host from cassandra.connection import DefaultEndPoint @@ -27,11 +24,9 @@ from concurrent.futures import wait as wait_futures - def setup_module(): use_singledc() - class HostFilterPolicyTests(unittest.TestCase): def test_predicate_changes(self): @@ -93,71 +88,3 @@ def test_only_connects_to_subset(self): queried_hosts.update(response.response_future.attempted_hosts) queried_hosts = set(host.address for host in queried_hosts) self.assertEqual(queried_hosts, only_connect_hosts) - -class ColumnEncryptionPolicyTest(unittest.TestCase): - - def _recreate_keyspace(self, session): - session.execute("drop keyspace if exists foo") - session.execute("CREATE KEYSPACE foo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}") - session.execute("CREATE TABLE foo.bar(encrypted blob, unencrypted int, primary key(unencrypted))") - - def test_end_to_end_prepared(self): - - # We only currently perform testing on a single type/expected value pair since CLE functionality is essentially - # independent of the underlying type. We intercept data after it's been encoded when it's going out and before it's - # encoded when coming back; the actual types of the data involved don't impact us. - expected = 12345 - expected_type = "int" - - key = os.urandom(AES256_KEY_SIZE_BYTES) - cl_policy = AES256ColumnEncryptionPolicy() - col_desc = ColDesc('foo','bar','encrypted') - cl_policy.add_column(col_desc, key, expected_type) - - cluster = TestCluster(column_encryption_policy=cl_policy) - session = cluster.connect() - self._recreate_keyspace(session) - - prepared = session.prepare("insert into foo.bar (encrypted, unencrypted) values (?,?)") - session.execute(prepared, (expected,expected)) - - # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted - # values here to confirm that we don't interfere with regular processing of unencrypted vals. - (encrypted,unencrypted) = session.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() - self.assertEquals(expected, encrypted) - self.assertEquals(expected, unencrypted) - - # Confirm the same behaviour from a subsequent prepared statement as well - prepared = session.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") - (encrypted,unencrypted) = session.execute(prepared, [expected]).one() - self.assertEquals(expected, encrypted) - self.assertEquals(expected, unencrypted) - - def test_end_to_end_simple(self): - - expected = 67890 - expected_type = "int" - - key = os.urandom(AES256_KEY_SIZE_BYTES) - cl_policy = AES256ColumnEncryptionPolicy() - col_desc = ColDesc('foo','bar','encrypted') - cl_policy.add_column(col_desc, key, expected_type) - - cluster = TestCluster(column_encryption_policy=cl_policy) - session = cluster.connect() - self._recreate_keyspace(session) - - # Use encode_and_encrypt helper function to populate date - session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)",(cl_policy.encode_and_encrypt(col_desc, expected), expected)) - - # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted - # values here to confirm that we don't interfere with regular processing of unencrypted vals. - (encrypted,unencrypted) = session.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() - self.assertEquals(expected, encrypted) - self.assertEquals(expected, unencrypted) - - # Confirm the same behaviour from a subsequent prepared statement as well - prepared = session.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") - (encrypted,unencrypted) = session.execute(prepared, [expected]).one() - self.assertEquals(expected, encrypted) - self.assertEquals(expected, unencrypted) diff --git a/tests/unit/column_encryption/test_policies.py b/tests/unit/column_encryption/test_policies.py new file mode 100644 index 0000000000..f6b06a3ade --- /dev/null +++ b/tests/unit/column_encryption/test_policies.py @@ -0,0 +1,149 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest + +from cassandra.policies import ColDesc +from cassandra.column_encryption.policies import AES256ColumnEncryptionPolicy, \ + AES256_BLOCK_SIZE_BYTES, AES256_KEY_SIZE_BYTES + +class AES256ColumnEncryptionPolicyTest(unittest.TestCase): + + def _random_block(self): + return os.urandom(AES256_BLOCK_SIZE_BYTES) + + def _random_key(self): + return os.urandom(AES256_KEY_SIZE_BYTES) + + def _test_round_trip(self, bytes): + coldesc = ColDesc('ks1','table1','col1') + policy = AES256ColumnEncryptionPolicy() + policy.add_column(coldesc, self._random_key(), "blob") + encrypted_bytes = policy.encrypt(coldesc, bytes) + self.assertEqual(bytes, policy.decrypt(coldesc, encrypted_bytes)) + + def test_no_padding_necessary(self): + self._test_round_trip(self._random_block()) + + def test_some_padding_required(self): + for byte_size in range(1,AES256_BLOCK_SIZE_BYTES - 1): + bytes = os.urandom(byte_size) + self._test_round_trip(bytes) + for byte_size in range(AES256_BLOCK_SIZE_BYTES + 1,(2 * AES256_BLOCK_SIZE_BYTES) - 1): + bytes = os.urandom(byte_size) + self._test_round_trip(bytes) + + def test_add_column_invalid_key_size_raises(self): + coldesc = ColDesc('ks1','table1','col1') + policy = AES256ColumnEncryptionPolicy() + for key_size in range(1,AES256_KEY_SIZE_BYTES - 1): + with self.assertRaises(ValueError): + policy.add_column(coldesc, os.urandom(key_size), "blob") + for key_size in range(AES256_KEY_SIZE_BYTES + 1,(2 * AES256_KEY_SIZE_BYTES) - 1): + with self.assertRaises(ValueError): + policy.add_column(coldesc, os.urandom(key_size), "blob") + + def test_add_column_null_coldesc_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + policy.add_column(None, self._random_block(), "blob") + + def test_add_column_null_key_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, None, "blob") + + def test_add_column_null_type_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_block(), None) + + def test_add_column_unknown_type_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_block(), "foobar") + + def test_encode_and_encrypt_null_coldesc_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encode_and_encrypt(None, self._random_block()) + + def test_encode_and_encrypt_null_obj_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encode_and_encrypt(coldesc, None) + + def test_encode_and_encrypt_unknown_coldesc_raises(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encode_and_encrypt(ColDesc('ks2','table2','col2'), self._random_block()) + + def test_contains_column(self): + coldesc = ColDesc('ks1','table1','col1') + policy = AES256ColumnEncryptionPolicy() + policy.add_column(coldesc, self._random_key(), "blob") + self.assertTrue(policy.contains_column(coldesc)) + self.assertFalse(policy.contains_column(ColDesc('ks2','table1','col1'))) + self.assertFalse(policy.contains_column(ColDesc('ks1','table2','col1'))) + self.assertFalse(policy.contains_column(ColDesc('ks1','table1','col2'))) + self.assertFalse(policy.contains_column(ColDesc('ks2','table2','col2'))) + + def test_encrypt_unknown_column(self): + with self.assertRaises(ValueError): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + policy.encrypt(ColDesc('ks2','table2','col2'), self._random_block()) + + def test_decrypt_unknown_column(self): + policy = AES256ColumnEncryptionPolicy() + coldesc = ColDesc('ks1','table1','col1') + policy.add_column(coldesc, self._random_key(), "blob") + encrypted_bytes = policy.encrypt(coldesc, self._random_block()) + with self.assertRaises(ValueError): + policy.decrypt(ColDesc('ks2','table2','col2'), encrypted_bytes) + + def test_cache_info(self): + coldesc1 = ColDesc('ks1','table1','col1') + coldesc2 = ColDesc('ks2','table2','col2') + coldesc3 = ColDesc('ks3','table3','col3') + policy = AES256ColumnEncryptionPolicy() + for coldesc in [coldesc1, coldesc2, coldesc3]: + policy.add_column(coldesc, self._random_key(), "blob") + + # First run for this coldesc should be a miss, everything else should be a cache hit + for _ in range(10): + policy.encrypt(coldesc1, self._random_block()) + cache_info = policy.cache_info() + self.assertEqual(cache_info.hits, 9) + self.assertEqual(cache_info.misses, 1) + self.assertEqual(cache_info.maxsize, 128) + + # Important note: we're measuring the size of the cache of ciphers, NOT stored + # keys. We won't have a cipher here until we actually encrypt something + self.assertEqual(cache_info.currsize, 1) + policy.encrypt(coldesc2, self._random_block()) + self.assertEqual(policy.cache_info().currsize, 2) + policy.encrypt(coldesc3, self._random_block()) + self.assertEqual(policy.cache_info().currsize, 3) diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index 451d5c50c9..ec004ca9fe 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -16,7 +16,6 @@ from itertools import islice, cycle from mock import Mock, patch, call -import os from random import randint import six from six.moves._thread import LockType @@ -27,7 +26,6 @@ from cassandra import ConsistencyLevel from cassandra.cluster import Cluster from cassandra.connection import DefaultEndPoint -from cassandra.cqltypes import BytesType from cassandra.metadata import Metadata from cassandra.policies import (RoundRobinPolicy, WhiteListRoundRobinPolicy, DCAwareRoundRobinPolicy, TokenAwarePolicy, SimpleConvictionPolicy, @@ -35,9 +33,7 @@ RetryPolicy, WriteType, DowngradingConsistencyRetryPolicy, ConstantReconnectionPolicy, LoadBalancingPolicy, ConvictionPolicy, ReconnectionPolicy, FallthroughRetryPolicy, - IdentityTranslator, EC2MultiRegionTranslator, HostFilterPolicy, - AES256ColumnEncryptionPolicy, ColDesc, - AES256_BLOCK_SIZE_BYTES, AES256_KEY_SIZE_BYTES) + IdentityTranslator, EC2MultiRegionTranslator, HostFilterPolicy) from cassandra.pool import Host from cassandra.query import Statement @@ -1503,132 +1499,3 @@ def test_create_whitelist(self): # Only the filtered replicas should be allowed self.assertEqual(set(query_plan), {Host(DefaultEndPoint("127.0.0.1"), SimpleConvictionPolicy), Host(DefaultEndPoint("127.0.0.4"), SimpleConvictionPolicy)}) - -class AES256ColumnEncryptionPolicyTest(unittest.TestCase): - - def _random_block(self): - return os.urandom(AES256_BLOCK_SIZE_BYTES) - - def _random_key(self): - return os.urandom(AES256_KEY_SIZE_BYTES) - - def _test_round_trip(self, bytes): - coldesc = ColDesc('ks1','table1','col1') - policy = AES256ColumnEncryptionPolicy() - policy.add_column(coldesc, self._random_key(), "blob") - encrypted_bytes = policy.encrypt(coldesc, bytes) - self.assertEqual(bytes, policy.decrypt(coldesc, encrypted_bytes)) - - def test_no_padding_necessary(self): - self._test_round_trip(self._random_block()) - - def test_some_padding_required(self): - for byte_size in range(1,AES256_BLOCK_SIZE_BYTES - 1): - bytes = os.urandom(byte_size) - self._test_round_trip(bytes) - for byte_size in range(AES256_BLOCK_SIZE_BYTES + 1,(2 * AES256_BLOCK_SIZE_BYTES) - 1): - bytes = os.urandom(byte_size) - self._test_round_trip(bytes) - - def test_add_column_invalid_key_size_raises(self): - coldesc = ColDesc('ks1','table1','col1') - policy = AES256ColumnEncryptionPolicy() - for key_size in range(1,AES256_KEY_SIZE_BYTES - 1): - with self.assertRaises(ValueError): - policy.add_column(coldesc, os.urandom(key_size), "blob") - for key_size in range(AES256_KEY_SIZE_BYTES + 1,(2 * AES256_KEY_SIZE_BYTES) - 1): - with self.assertRaises(ValueError): - policy.add_column(coldesc, os.urandom(key_size), "blob") - - def test_add_column_null_coldesc_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - policy.add_column(None, self._random_block(), "blob") - - def test_add_column_null_key_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, None, "blob") - - def test_add_column_null_type_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_block(), None) - - def test_add_column_unknown_type_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_block(), "foobar") - - def test_encode_and_encrypt_null_coldesc_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_key(), "blob") - policy.encode_and_encrypt(None, self._random_block()) - - def test_encode_and_encrypt_null_obj_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_key(), "blob") - policy.encode_and_encrypt(coldesc, None) - - def test_encode_and_encrypt_unknown_coldesc_raises(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_key(), "blob") - policy.encode_and_encrypt(ColDesc('ks2','table2','col2'), self._random_block()) - - def test_contains_column(self): - coldesc = ColDesc('ks1','table1','col1') - policy = AES256ColumnEncryptionPolicy() - policy.add_column(coldesc, self._random_key(), "blob") - self.assertTrue(policy.contains_column(coldesc)) - self.assertFalse(policy.contains_column(ColDesc('ks2','table1','col1'))) - self.assertFalse(policy.contains_column(ColDesc('ks1','table2','col1'))) - self.assertFalse(policy.contains_column(ColDesc('ks1','table1','col2'))) - self.assertFalse(policy.contains_column(ColDesc('ks2','table2','col2'))) - - def test_encrypt_unknown_column(self): - with self.assertRaises(ValueError): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_key(), "blob") - policy.encrypt(ColDesc('ks2','table2','col2'), self._random_block()) - - def test_decrypt_unknown_column(self): - policy = AES256ColumnEncryptionPolicy() - coldesc = ColDesc('ks1','table1','col1') - policy.add_column(coldesc, self._random_key(), "blob") - encrypted_bytes = policy.encrypt(coldesc, self._random_block()) - with self.assertRaises(ValueError): - policy.decrypt(ColDesc('ks2','table2','col2'), encrypted_bytes) - - def test_cache_info(self): - coldesc1 = ColDesc('ks1','table1','col1') - coldesc2 = ColDesc('ks2','table2','col2') - coldesc3 = ColDesc('ks3','table3','col3') - policy = AES256ColumnEncryptionPolicy() - for coldesc in [coldesc1, coldesc2, coldesc3]: - policy.add_column(coldesc, self._random_key(), "blob") - - # First run for this coldesc should be a miss, everything else should be a cache hit - for _ in range(10): - policy.encrypt(coldesc1, self._random_block()) - cache_info = policy.cache_info() - self.assertEqual(cache_info.hits, 9) - self.assertEqual(cache_info.misses, 1) - self.assertEqual(cache_info.maxsize, 128) - - # Important note: we're measuring the size of the cache of ciphers, NOT stored - # keys. We won't have a cipher here until we actually encrypt something - self.assertEqual(cache_info.currsize, 1) - policy.encrypt(coldesc2, self._random_block()) - self.assertEqual(policy.cache_info().currsize, 2) - policy.encrypt(coldesc3, self._random_block()) - self.assertEqual(policy.cache_info().currsize, 3) From 531a6c767aaaeb612ad9412ac3ff268e117aad2f Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 2 Jun 2023 10:52:01 -0500 Subject: [PATCH 183/211] PYTHON-1350 Store IV along with encrypted text when using column-level encryption (#1160) --- cassandra/column_encryption/_policies.py | 45 +++++++----- .../column_encryption/test_policies.py | 71 +++++++++++++++---- tests/unit/column_encryption/test_policies.py | 20 ++++++ 3 files changed, 105 insertions(+), 31 deletions(-) diff --git a/cassandra/column_encryption/_policies.py b/cassandra/column_encryption/_policies.py index e049ba2d22..ef8097bfbd 100644 --- a/cassandra/column_encryption/_policies.py +++ b/cassandra/column_encryption/_policies.py @@ -35,15 +35,27 @@ class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): - # CBC uses an IV that's the same size as the block size - # - # TODO: Need to find some way to expose mode options - # (CBC etc.) without leaking classes from the underlying - # impl here - def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)): - - self.mode = mode + # Fix block cipher mode for now. IV size is a function of block cipher used + # so fixing this avoids (possibly unnecessary) validation logic here. + mode = modes.CBC + + # "iv" param here expects a bytearray that's the same size as the block + # size for AES-256 (128 bits or 16 bytes). If none is provided a new one + # will be randomly generated, but in this case the IV should be recorded and + # preserved or else you will not be able to decrypt any data encrypted by this + # policy. + def __init__(self, iv=None): + + # CBC uses an IV that's the same size as the block size + # + # Avoid defining IV with a default arg in order to stay away from + # any issues around the caching of default args self.iv = iv + if self.iv: + if not len(self.iv) == AES256_BLOCK_SIZE_BYTES: + raise ValueError("This policy uses AES-256 with CBC mode and therefore expects a 128-bit initialization vector") + else: + self.iv = os.urandom(AES256_BLOCK_SIZE_BYTES) # ColData for a given ColDesc is always preserved. We only create a Cipher # when there's an actual need to for a given ColDesc @@ -64,11 +76,13 @@ def encrypt(self, coldesc, obj_bytes): cipher = self._get_cipher(coldesc) encryptor = cipher.encryptor() - return encryptor.update(padded_bytes) + encryptor.finalize() + return self.iv + encryptor.update(padded_bytes) + encryptor.finalize() - def decrypt(self, coldesc, encrypted_bytes): + def decrypt(self, coldesc, bytes): - cipher = self._get_cipher(coldesc) + iv = bytes[:AES256_BLOCK_SIZE_BYTES] + encrypted_bytes = bytes[AES256_BLOCK_SIZE_BYTES:] + cipher = self._get_cipher(coldesc, iv=iv) decryptor = cipher.decryptor() padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize() @@ -108,19 +122,18 @@ def cache_info(self): def column_type(self, coldesc): return self.coldata[coldesc].type - def _get_cipher(self, coldesc): + def _get_cipher(self, coldesc, iv=None): """ Access relevant state from this instance necessary to create a Cipher and then get one, hopefully returning a cached instance if we've already done so (and it hasn't been evicted) """ - try: coldata = self.coldata[coldesc] - return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv) + return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, iv or self.iv) except KeyError: raise ValueError("Could not find column {}".format(coldesc)) # Explicitly use a class method here to avoid caching self @lru_cache(maxsize=128) - def _build_cipher(key, mode, iv): - return Cipher(algorithms.AES256(key), mode(iv)) + def _build_cipher(key, iv): + return Cipher(algorithms.AES256(key), AES256ColumnEncryptionPolicy.mode(iv)) diff --git a/tests/integration/standard/column_encryption/test_policies.py b/tests/integration/standard/column_encryption/test_policies.py index 87bfde3c31..bb84c0352c 100644 --- a/tests/integration/standard/column_encryption/test_policies.py +++ b/tests/integration/standard/column_encryption/test_policies.py @@ -20,7 +20,7 @@ from cassandra.policies import ColDesc from cassandra.column_encryption.policies import AES256ColumnEncryptionPolicy, \ - AES256_KEY_SIZE_BYTES + AES256_KEY_SIZE_BYTES, AES256_BLOCK_SIZE_BYTES def setup_module(): use_singledc() @@ -32,25 +32,28 @@ def _recreate_keyspace(self, session): session.execute("CREATE KEYSPACE foo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}") session.execute("CREATE TABLE foo.bar(encrypted blob, unencrypted int, primary key(unencrypted))") + def _create_policy(self, key, iv = None): + cl_policy = AES256ColumnEncryptionPolicy() + col_desc = ColDesc('foo','bar','encrypted') + cl_policy.add_column(col_desc, key, "int") + return (col_desc, cl_policy) + def test_end_to_end_prepared(self): # We only currently perform testing on a single type/expected value pair since CLE functionality is essentially # independent of the underlying type. We intercept data after it's been encoded when it's going out and before it's # encoded when coming back; the actual types of the data involved don't impact us. - expected = 12345 - expected_type = "int" + expected = 0 key = os.urandom(AES256_KEY_SIZE_BYTES) - cl_policy = AES256ColumnEncryptionPolicy() - col_desc = ColDesc('foo','bar','encrypted') - cl_policy.add_column(col_desc, key, expected_type) - + (_, cl_policy) = self._create_policy(key) cluster = TestCluster(column_encryption_policy=cl_policy) session = cluster.connect() self._recreate_keyspace(session) prepared = session.prepare("insert into foo.bar (encrypted, unencrypted) values (?,?)") - session.execute(prepared, (expected,expected)) + for i in range(100): + session.execute(prepared, (i, i)) # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted # values here to confirm that we don't interfere with regular processing of unencrypted vals. @@ -66,20 +69,19 @@ def test_end_to_end_prepared(self): def test_end_to_end_simple(self): - expected = 67890 - expected_type = "int" + expected = 1 key = os.urandom(AES256_KEY_SIZE_BYTES) - cl_policy = AES256ColumnEncryptionPolicy() - col_desc = ColDesc('foo','bar','encrypted') - cl_policy.add_column(col_desc, key, expected_type) - + (col_desc, cl_policy) = self._create_policy(key) cluster = TestCluster(column_encryption_policy=cl_policy) session = cluster.connect() self._recreate_keyspace(session) # Use encode_and_encrypt helper function to populate date - session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)",(cl_policy.encode_and_encrypt(col_desc, expected), expected)) + for i in range(1,100): + self.assertIsNotNone(i) + encrypted = cl_policy.encode_and_encrypt(col_desc, i) + session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)", (encrypted, i)) # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted # values here to confirm that we don't interfere with regular processing of unencrypted vals. @@ -92,3 +94,42 @@ def test_end_to_end_simple(self): (encrypted,unencrypted) = session.execute(prepared, [expected]).one() self.assertEquals(expected, encrypted) self.assertEquals(expected, unencrypted) + + def test_end_to_end_different_cle_contexts(self): + + expected = 2 + + key = os.urandom(AES256_KEY_SIZE_BYTES) + + # Simulate the creation of two AES256 policies at two different times. Python caches + # default param args at function definition time so a single value will be used any time + # the default val is used. Upshot is that within the same test we'll always have the same + # IV if we rely on the default args, so manually introduce some variation here to simulate + # what actually happens if you have two distinct sessions created at two different times. + iv1 = os.urandom(AES256_BLOCK_SIZE_BYTES) + (col_desc1, cl_policy1) = self._create_policy(key, iv=iv1) + cluster1 = TestCluster(column_encryption_policy=cl_policy1) + session1 = cluster1.connect() + self._recreate_keyspace(session1) + + # Use encode_and_encrypt helper function to populate date + for i in range(1,100): + self.assertIsNotNone(i) + encrypted = cl_policy1.encode_and_encrypt(col_desc1, i) + session1.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)", (encrypted, i)) + session1.shutdown() + cluster1.shutdown() + + # Explicitly clear the class-level cache here; we're trying to simulate a second connection from a completely new process and + # that would entail not re-using any cached ciphers + AES256ColumnEncryptionPolicy._build_cipher.cache_clear() + cache_info = cl_policy1.cache_info() + self.assertEqual(cache_info.currsize, 0) + + iv2 = os.urandom(AES256_BLOCK_SIZE_BYTES) + (_, cl_policy2) = self._create_policy(key, iv=iv2) + cluster2 = TestCluster(column_encryption_policy=cl_policy2) + session2 = cluster2.connect() + (encrypted,unencrypted) = session2.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) diff --git a/tests/unit/column_encryption/test_policies.py b/tests/unit/column_encryption/test_policies.py index f6b06a3ade..38136c69d4 100644 --- a/tests/unit/column_encryption/test_policies.py +++ b/tests/unit/column_encryption/test_policies.py @@ -55,6 +55,23 @@ def test_add_column_invalid_key_size_raises(self): with self.assertRaises(ValueError): policy.add_column(coldesc, os.urandom(key_size), "blob") + def test_add_column_invalid_iv_size_raises(self): + def test_iv_size(iv_size): + policy = AES256ColumnEncryptionPolicy(iv = os.urandom(iv_size)) + policy.add_column(coldesc, os.urandom(AES256_KEY_SIZE_BYTES), "blob") + policy.encrypt(coldesc, os.urandom(128)) + + coldesc = ColDesc('ks1','table1','col1') + for iv_size in range(1,AES256_BLOCK_SIZE_BYTES - 1): + with self.assertRaises(ValueError): + test_iv_size(iv_size) + for iv_size in range(AES256_BLOCK_SIZE_BYTES + 1,(2 * AES256_BLOCK_SIZE_BYTES) - 1): + with self.assertRaises(ValueError): + test_iv_size(iv_size) + + # Finally, confirm that the expected IV size has no issue + test_iv_size(AES256_BLOCK_SIZE_BYTES) + def test_add_column_null_coldesc_raises(self): with self.assertRaises(ValueError): policy = AES256ColumnEncryptionPolicy() @@ -125,6 +142,9 @@ def test_decrypt_unknown_column(self): policy.decrypt(ColDesc('ks2','table2','col2'), encrypted_bytes) def test_cache_info(self): + # Exclude any interference from tests above + AES256ColumnEncryptionPolicy._build_cipher.cache_clear() + coldesc1 = ColDesc('ks1','table1','col1') coldesc2 = ColDesc('ks2','table2','col2') coldesc3 = ColDesc('ks3','table3','col3') From 356d150358723607be1d9956a9bef684cd9cff0e Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 2 Jun 2023 14:57:09 -0500 Subject: [PATCH 184/211] PYTHON-1356 Create session-specific protocol handlers to contain session-specific CLE policies (#1165) --- cassandra/cluster.py | 15 +++++--- .../column_encryption/test_policies.py | 37 ++++++++++++++++++- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 12b00c42db..57fc2d4e8c 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -2545,12 +2545,6 @@ def __init__(self, cluster, hosts, keyspace=None): self.encoder = Encoder() - if self.cluster.column_encryption_policy is not None: - try: - self.client_protocol_handler.column_encryption_policy = self.cluster.column_encryption_policy - except AttributeError: - log.info("Unable to set column encryption policy for session") - # create connection pools in parallel self._initial_connect_futures = set() for host in hosts: @@ -2571,6 +2565,15 @@ def __init__(self, cluster, hosts, keyspace=None): self.session_id = uuid.uuid4() self._graph_paging_available = self._check_graph_paging_available() + if self.cluster.column_encryption_policy is not None: + try: + self.client_protocol_handler = type( + str(self.session_id) + "-ProtocolHandler", + (ProtocolHandler,), + {"column_encryption_policy": self.cluster.column_encryption_policy}) + except AttributeError: + log.info("Unable to set column encryption policy for session") + if self.cluster.monitor_reporting_enabled: cc_host = self.cluster.get_control_connection_host() valid_insights_version = (cc_host and version_supports_insights(cc_host.dse_version)) diff --git a/tests/integration/standard/column_encryption/test_policies.py b/tests/integration/standard/column_encryption/test_policies.py index bb84c0352c..dea6b6d39e 100644 --- a/tests/integration/standard/column_encryption/test_policies.py +++ b/tests/integration/standard/column_encryption/test_policies.py @@ -95,7 +95,11 @@ def test_end_to_end_simple(self): self.assertEquals(expected, encrypted) self.assertEquals(expected, unencrypted) - def test_end_to_end_different_cle_contexts(self): + def test_end_to_end_different_cle_contexts_different_ivs(self): + """ + Test to validate PYTHON-1350. We should be able to decode the data from two different contexts (with two different IVs) + since the IV used to decrypt the data is actually now stored with the data. + """ expected = 2 @@ -133,3 +137,34 @@ def test_end_to_end_different_cle_contexts(self): (encrypted,unencrypted) = session2.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() self.assertEquals(expected, encrypted) self.assertEquals(expected, unencrypted) + + def test_end_to_end_different_cle_contexts_different_policies(self): + """ + Test to validate PYTHON-1356. Class variables used to pass CLE policy down to protocol handler shouldn't persist. + """ + + expected = 3 + + key = os.urandom(AES256_KEY_SIZE_BYTES) + (col_desc, cl_policy) = self._create_policy(key) + cluster = TestCluster(column_encryption_policy=cl_policy) + session = cluster.connect() + self._recreate_keyspace(session) + + # Use encode_and_encrypt helper function to populate date + session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)",(cl_policy.encode_and_encrypt(col_desc, expected), expected)) + + # We now open a new session _without_ the CLE policy specified. We should _not_ be able to read decrypted bits from this session. + cluster2 = TestCluster() + session2 = cluster2.connect() + + # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted + # values here to confirm that we don't interfere with regular processing of unencrypted vals. + (encrypted,unencrypted) = session2.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(cl_policy.encode_and_encrypt(col_desc, expected), encrypted) + self.assertEquals(expected, unencrypted) + + # Confirm the same behaviour from a subsequent prepared statement as well + prepared = session2.prepare("select encrypted, unencrypted from foo.bar where unencrypted = ? allow filtering") + (encrypted,unencrypted) = session2.execute(prepared, [expected]).one() + self.assertEquals(cl_policy.encode_and_encrypt(col_desc, expected), encrypted) From 910f02820c7a9e0e7b46fc9fbda1ee9a0e051909 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 5 Jun 2023 09:23:25 -0500 Subject: [PATCH 185/211] CONN-38 Notes for 3.27.0 on PYTHON-1350 (#1166) --- docs/column_encryption.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/column_encryption.rst b/docs/column_encryption.rst index 5cfb736c1f..e18b9286ed 100644 --- a/docs/column_encryption.rst +++ b/docs/column_encryption.rst @@ -14,6 +14,20 @@ also available, although in this case values must be manually encrypted and/or d Client-side encryption and decryption should work against all versions of Cassandra and DSE. It does not utilize any server-side functionality to do its work. +WARNING: Consider upgrading to 3.28.0 or later +------------------------------------------------ +There is a significant issue with the column encryption functionality in Python driver 3.27.0. +To be able to decrypt your data, you must preserve the cipher initialization vector (IV) used by +the :class:`~.AES256ColumnEncryptionPolicy` when your data was written. +To decrypt your data, you must supply this IV when creating a policy to read this data. +If you do not supply this IV in the policy to read this data, you will **NOT BE ABLE TO DECRYPT YOUR DATA**. +See +`PYTHON-1350 `_ for more detail. + +DataStax recommends upgrading to Python driver 3.28.0 or later to avoid this issue. 3.28.0 or later manages the IV automatically. +Because of this change in functionality, any encrypted data written in 3.27.0 will **NOT** be readable by 3.28.0 or later. +After upgrading to Python driver 3.28.0 or later, it is critical that you re-encrypt your data with the new driver version. + Configuration ------------- Client-side encryption is enabled by creating an instance of a subclass of :class:`~.ColumnEncryptionPolicy` From 643d3a631b3f15603942af42b47228e0e2d7f193 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 5 Jun 2023 09:25:27 -0500 Subject: [PATCH 186/211] Update docs.yaml to point to most recent 3.27.0 docs changes --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 77c738b4f4..eb94f74590 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.27' - ref: 0002e912 + ref: 910f0282 - name: '3.26' ref: f1e9126 - name: '3.25' From 2db3728a4791ef08e41bdd2631b34ade0cd2dd8b Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 5 Jun 2023 09:36:50 -0500 Subject: [PATCH 187/211] PYTHON-1352 Add vector type, codec + support for parsing CQL type (#1161) --- cassandra/__init__.py | 2 +- cassandra/cqltypes.py | 37 ++++++++++++++++++++++++++++++++++--- tests/unit/test_types.py | 22 +++++++++++++++++++++- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 1573abdf00..ca15e93602 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 27, 0) +__version_info__ = (3, 28, 0b1) __version__ = '.'.join(map(str, __version_info__)) diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 8167b3b894..6cc89aafbb 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -235,13 +235,15 @@ def parse_casstype_args(typestring): else: names.append(None) - ctype = lookup_casstype_simple(tok) + try: + ctype = int(tok) + except ValueError: + ctype = lookup_casstype_simple(tok) types.append(ctype) # return the first (outer) type, which will have all parameters applied return args[0][0][0] - def lookup_casstype(casstype): """ Given a Cassandra type as a string (possibly including parameters), hand @@ -259,6 +261,7 @@ def lookup_casstype(casstype): try: return parse_casstype_args(casstype) except (ValueError, AssertionError, IndexError) as e: + log.debug("Exception in parse_casstype_args: %s" % e) raise ValueError("Don't know how to parse type string %r: %s" % (casstype, e)) @@ -296,7 +299,7 @@ class _CassandraType(object): """ def __repr__(self): - return '<%s( %r )>' % (self.cql_parameterized_type(), self.val) + return '<%s>' % (self.cql_parameterized_type()) @classmethod def from_binary(cls, byts, protocol_version): @@ -1423,3 +1426,31 @@ def serialize(cls, v, protocol_version): buf.write(int8_pack(cls._encode_precision(bound.precision))) return buf.getvalue() + +class VectorType(_CassandraType): + typename = 'org.apache.cassandra.db.marshal.VectorType' + vector_size = 0 + subtype = None + + @classmethod + def apply_parameters(cls, params, names): + assert len(params) == 2 + subtype = lookup_casstype(params[0]) + vsize = params[1] + return type('%s(%s)' % (cls.cass_parameterized_type_with([]), vsize), (cls,), {'vector_size': vsize, 'subtype': subtype}) + + @classmethod + def deserialize(cls, byts, protocol_version): + indexes = (4 * x for x in range(0, cls.vector_size)) + return [cls.subtype.deserialize(byts[idx:idx + 4], protocol_version) for idx in indexes] + + @classmethod + def serialize(cls, v, protocol_version): + buf = io.BytesIO() + for item in v: + buf.write(cls.subtype.serialize(item, protocol_version)) + return buf.getvalue() + + @classmethod + def cql_parameterized_type(cls): + return "%s<%s, %s>" % (cls.typename, cls.subtype.typename, cls.vector_size) diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index af3b327ef8..e85f5dbe67 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -27,7 +27,8 @@ EmptyValue, LongType, SetType, UTF8Type, cql_typename, int8_pack, int64_pack, lookup_casstype, lookup_casstype_simple, parse_casstype_args, - int32_pack, Int32Type, ListType, MapType + int32_pack, Int32Type, ListType, MapType, VectorType, + FloatType ) from cassandra.encoder import cql_quote from cassandra.pool import Host @@ -190,6 +191,12 @@ class BarType(FooType): self.assertEqual(UTF8Type, ctype.subtypes[2]) self.assertEqual([b'city', None, b'zip'], ctype.names) + def test_parse_casstype_vector(self): + ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 3)") + self.assertTrue(issubclass(ctype, VectorType)) + self.assertEqual(3, ctype.vector_size) + self.assertEqual(FloatType, ctype.subtype) + def test_empty_value(self): self.assertEqual(str(EmptyValue()), 'EMPTY') @@ -303,6 +310,19 @@ def test_cql_quote(self): self.assertEqual(cql_quote('test'), "'test'") self.assertEqual(cql_quote(0), '0') + def test_vector_round_trip(self): + base = [3.4, 2.9, 41.6, 12.0] + ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") + base_bytes = ctype.serialize(base, 0) + self.assertEqual(16, len(base_bytes)) + result = ctype.deserialize(base_bytes, 0) + self.assertEqual(len(base), len(result)) + for idx in range(0,len(base)): + self.assertAlmostEqual(base[idx], result[idx], places=5) + + def test_vector_cql_parameterized_type(self): + ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") + self.assertEqual(ctype.cql_parameterized_type(), "org.apache.cassandra.db.marshal.VectorType") ZERO = datetime.timedelta(0) From e3400a1e1eeb34fc1162fd601bf667cf802761ab Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 5 Jun 2023 10:08:34 -0500 Subject: [PATCH 188/211] Release 3.28.0: changelog & version --- CHANGELOG.rst | 22 ++++++++++++++++++++++ cassandra/__init__.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 432998869e..472881dbc5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,25 @@ +3.28.0 +====== +June 5, 2023 + +Features +-------- +* Add support for vector type (PYTHON-1352) +* Cryptography module is now an optional dependency (PYTHON-1351) + +Bug Fixes +--------- +* Store IV along with encrypted text when using column-level encryption (PYTHON-1350) +* Create session-specific protocol handlers to contain session-specific CLE policies (PYTHON-1356) + +Others +------ +* Use Cython for smoke builds (PYTHON-1343) +* Don't fail when inserting UDTs with prepared queries with some missing fields (PR 1151) +* Convert print statement to function in docs (PR 1157) +* Update comment for retry policy (DOC-3278) +* Added error handling blog reference (DOC-2813) + 3.27.0 ====== May 1, 2023 diff --git a/cassandra/__init__.py b/cassandra/__init__.py index ca15e93602..b048bd9358 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 28, 0b1) +__version_info__ = (3, 28, 0) __version__ = '.'.join(map(str, __version_info__)) From 7ccf5026d870072af548f29cbfc15873f51a2935 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 5 Jun 2023 23:55:41 -0500 Subject: [PATCH 189/211] Fixed non-valid rst in README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index e140371f07..47483f3881 100644 --- a/README.rst +++ b/README.rst @@ -58,7 +58,7 @@ Contributing See `CONTRIBUTING.md `_. Error Handling ------------- +-------------- While originally written for the Java driver, users may reference the `Cassandra error handling done right blog `_ for resolving error handling scenarios with Apache Cassandra. Reporting Problems From 50c93e907ae53a26e105c5203fa5aa04741d116c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 6 Jun 2023 10:04:22 -0500 Subject: [PATCH 190/211] Include docs for 3.28.0 --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index eb94f74590..2daac0608b 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.28' + ref: 7ccf5026 - name: '3.27' ref: 910f0282 - name: '3.26' From 4325afb6c652fc1ccbf09f1e1aa893da2b0f370c Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 6 Jun 2023 16:21:40 -0500 Subject: [PATCH 191/211] CONN-38 Notes for 3.28.0 on PYTHON-1350 (#1167) Co-authored-by: Jamie Gillenwater --- docs/column_encryption.rst | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/docs/column_encryption.rst b/docs/column_encryption.rst index e18b9286ed..ab67ef16d0 100644 --- a/docs/column_encryption.rst +++ b/docs/column_encryption.rst @@ -14,19 +14,11 @@ also available, although in this case values must be manually encrypted and/or d Client-side encryption and decryption should work against all versions of Cassandra and DSE. It does not utilize any server-side functionality to do its work. -WARNING: Consider upgrading to 3.28.0 or later +WARNING: Encryption format changes in 3.28.0 ------------------------------------------------ -There is a significant issue with the column encryption functionality in Python driver 3.27.0. -To be able to decrypt your data, you must preserve the cipher initialization vector (IV) used by -the :class:`~.AES256ColumnEncryptionPolicy` when your data was written. -To decrypt your data, you must supply this IV when creating a policy to read this data. -If you do not supply this IV in the policy to read this data, you will **NOT BE ABLE TO DECRYPT YOUR DATA**. -See -`PYTHON-1350 `_ for more detail. - -DataStax recommends upgrading to Python driver 3.28.0 or later to avoid this issue. 3.28.0 or later manages the IV automatically. -Because of this change in functionality, any encrypted data written in 3.27.0 will **NOT** be readable by 3.28.0 or later. -After upgrading to Python driver 3.28.0 or later, it is critical that you re-encrypt your data with the new driver version. +Python driver 3.28.0 introduces a new encryption format for data written by :class:`~.AES256ColumnEncryptionPolicy`. +As a result, any encrypted data written by Python driver 3.27.0 will **NOT** be readable. +If you upgraded from 3.27.0, you should re-encrypt your data with 3.28.0. Configuration ------------- From 8c41066330eb04c34eff57153ab2eda810844d5f Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 6 Jun 2023 16:24:28 -0500 Subject: [PATCH 192/211] Update docs.yaml to point to most recent 3.28.0 docs changes --- docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs.yaml b/docs.yaml index 2daac0608b..c34324e170 100644 --- a/docs.yaml +++ b/docs.yaml @@ -23,7 +23,7 @@ sections: CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: - name: '3.28' - ref: 7ccf5026 + ref: 4325afb6 - name: '3.27' ref: 910f0282 - name: '3.26' From 8ba0a5ccd71b66c77ee58994ed9da9ea34ff4cbe Mon Sep 17 00:00:00 2001 From: weideng1 Date: Mon, 28 Aug 2023 15:42:30 -0600 Subject: [PATCH 193/211] Jenkins using new python versions in the matrix (#1174) * update Jenkins matrix to use available python versions in the latest runner image * Use earliest and latest supported python runtimes for smoke test * Remove C* and DSE versions that are EOL'ed * make ccm-private work with python 3.10+ * use pynose to replace nosetests so that it can run under Python 3.10+, before PYTHON-1297 gets merged --- Jenkinsfile | 69 +++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e6c2d9700f..a6d19fb5ef 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -30,9 +30,9 @@ import com.datastax.jenkins.drivers.python.Slack slack = new Slack() -DEFAULT_CASSANDRA = ['2.1', '2.2', '3.0', '3.11', '4.0'] -DEFAULT_DSE = ['dse-5.0.15', 'dse-5.1.35', 'dse-6.0.18', 'dse-6.7.17', 'dse-6.8.30'] -DEFAULT_RUNTIME = ['3.7.7', '3.8.3'] +DEFAULT_CASSANDRA = ['3.0', '3.11', '4.0'] +DEFAULT_DSE = ['dse-5.1.35', 'dse-6.8.30'] +DEFAULT_RUNTIME = ['3.8.16', '3.9.16', '3.10.11', '3.11.3'] DEFAULT_CYTHON = ["True", "False"] matrices = [ "FULL": [ @@ -52,7 +52,7 @@ matrices = [ ], "SMOKE": [ "SERVER": DEFAULT_CASSANDRA.takeRight(2) + DEFAULT_DSE.takeRight(1), - "RUNTIME": DEFAULT_RUNTIME.takeRight(2), + "RUNTIME": DEFAULT_RUNTIME.take(1) + DEFAULT_RUNTIME.takeRight(1), "CYTHON": ["True"] ] ] @@ -167,6 +167,11 @@ def initializeEnvironment() { sudo apt-get install socat pip install --upgrade pip pip install -U setuptools + + # install a version of pyyaml<6.0 compatible with ccm-3.1.5 as of Aug 2023 + # this works around the python-3.10+ compatibility problem as described in DSP-23524 + pip install wheel + pip install "Cython<3.0" "pyyaml<6.0" --no-build-isolation pip install ${HOME}/ccm ''' @@ -186,7 +191,7 @@ def initializeEnvironment() { } sh label: 'Install unit test modules', script: '''#!/bin/bash -lex - pip install nose-ignore-docstring nose-exclude service_identity + pip install pynose nose-ignore-docstring nose-exclude service_identity ''' if (env.CYTHON_ENABLED == 'True') { @@ -247,9 +252,9 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true - EVENT_LOOP=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true - EVENT_LOOP=gevent VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true + EVENT_LOOP=${EVENT_LOOP} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true + EVENT_LOOP=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true + EVENT_LOOP=gevent VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true ''' sh label: 'Execute Simulacron integration tests', script: '''#!/bin/bash -lex @@ -259,13 +264,13 @@ def executeStandardTests() { set +o allexport SIMULACRON_JAR="${HOME}/simulacron.jar" - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true # Run backpressure tests separately to avoid memory issue - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true + SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true ''' sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex @@ -274,7 +279,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true ''' sh label: 'Execute Apache CassandraⓇ integration tests', script: '''#!/bin/bash -lex @@ -283,7 +288,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true ''' if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { @@ -293,7 +298,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true + EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true ''' } @@ -303,7 +308,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true + EVENT_LOOP=${EVENT_LOOP} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true ''' if (env.PROFILE == 'FULL') { @@ -313,7 +318,7 @@ def executeStandardTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true ''' } } @@ -325,7 +330,7 @@ def executeDseSmokeTests() { . ${HOME}/environment.txt set +o allexport - EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true ''' } @@ -346,7 +351,7 @@ def executeEventLoopTests() { "tests/integration/simulacron/test_endpoint.py" "tests/integration/long/test_ssl.py" ) - EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true + EVENT_LOOP=${EVENT_LOOP} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true ''' } @@ -502,14 +507,6 @@ pipeline { - - - - - - - - @@ -522,22 +519,10 @@ pipeline { - - - - - - - - - - - - @@ -620,8 +605,8 @@ pipeline { triggers { parameterizedCron(branchPatternCron().matcher(env.BRANCH_NAME).matches() ? """ # Every weeknight (Monday - Friday) around 4:00 AM - # These schedules will run with and without Cython enabled for Python 3.7.7 and 3.8.3 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.7 3.8.3;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1.35 dse-6.0.18 dse-6.7.17 + # These schedules will run with and without Cython enabled for Python 3.8.16 and 3.11.3 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.16 3.11.3;CI_SCHEDULE_SERVER_VERSION=3.11 4.0 dse-5.1.35 dse-6.8.30 """ : "") } From e24ffe451dcdb67266d986c46d492f759961be89 Mon Sep 17 00:00:00 2001 From: Emelia <105240296+emeliawilkinson24@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:28:50 -0400 Subject: [PATCH 194/211] Update redirects in docs.yaml (#1178) --- docs.yaml | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/docs.yaml b/docs.yaml index c34324e170..7dde5a0299 100644 --- a/docs.yaml +++ b/docs.yaml @@ -73,9 +73,47 @@ versions: redirects: - \A\/(.*)/\Z: /\1.html rewrites: - - search: cassandra.apache.org/doc/cql3/CQL.html - replace: cassandra.apache.org/doc/cql3/CQL-3.0.html - - search: http://www.datastax.com/documentation/cql/3.1/ - replace: https://docs.datastax.com/en/archived/cql/3.1/ - search: http://www.datastax.com/docs/1.2/cql_cli/cql/BATCH replace: https://docs.datastax.com/en/dse/6.7/cql/cql/cql_reference/cql_commands/cqlBatch.html + - search: http://www.datastax.com/documentation/cql/3.1/ + replace: https://docs.datastax.com/en/archived/cql/3.1/ + - search: 'https://community.datastax.com' + replace: 'https://www.datastax.com/dev/community' + - search: 'https://docs.datastax.com/en/astra/aws/doc/index.html' + replace: 'https://docs.datastax.com/en/astra-serverless/docs/connect/drivers/connect-python.html' + - search: 'http://cassandra.apache.org/doc/cql3/CQL.html#timeuuidFun' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/functions.html#timeuuid-functions' + - search: 'http://cassandra.apache.org/doc/cql3/CQL.html#tokenFun' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/functions.html#token' + - search: 'http://cassandra.apache.org/doc/cql3/CQL.html#collections' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/types.html#collections' + - search: 'http://cassandra.apache.org/doc/cql3/CQL.html#batchStmt' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/dml.html#batch_statement' + - search: 'http://cassandra.apache.org/doc/cql3/CQL-3.0.html#timeuuidFun' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/functions.html#timeuuid-functions' + - search: 'http://cassandra.apache.org/doc/cql3/CQL-3.0.html#tokenFun' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/functions.html#token' + - search: 'http://cassandra.apache.org/doc/cql3/CQL-3.0.html#collections' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/types.html#collections' + - search: 'http://cassandra.apache.org/doc/cql3/CQL-3.0.html#batchStmt' + replace: 'https://cassandra.apache.org/doc/3.11/cassandra/cql/dml.html#batch_statement' +checks: + external_links: + exclude: + - 'https://twitter.com/dsJavaDriver' + - 'https://twitter.com/datastaxeng' + - 'https://twitter.com/datastax' + - 'https://projectreactor.io' + - 'https://docs.datastax.com/en/drivers/java/4.[0-9]+/com/datastax/oss/driver/internal/' + - 'http://www.planetcassandra.org/blog/user-defined-functions-in-cassandra-3-0/' + - 'http://www.planetcassandra.org/making-the-change-from-thrift-to-cql/' + - 'https://academy.datastax.com/slack' + - 'https://community.datastax.com/index.html' + - 'https://micrometer.io/docs' + - 'http://datastax.github.io/java-driver/features/shaded_jar/' + - 'http://aka.ms/vcpython27' + internal_links: + exclude: + - 'netty_pipeline/' + - '../core/' + - '%5Bguava%20eviction%5D' From 375da0002651a012bee8aac1d069ad0af3b4c35e Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 11 Oct 2023 15:27:53 -0500 Subject: [PATCH 195/211] Add Jenkins support for Python 3.12.0 (#1180) --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a6d19fb5ef..7e4a3c4761 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,7 +32,7 @@ slack = new Slack() DEFAULT_CASSANDRA = ['3.0', '3.11', '4.0'] DEFAULT_DSE = ['dse-5.1.35', 'dse-6.8.30'] -DEFAULT_RUNTIME = ['3.8.16', '3.9.16', '3.10.11', '3.11.3'] +DEFAULT_RUNTIME = ['3.8.16', '3.9.16', '3.10.11', '3.11.3', '3.12.0'] DEFAULT_CYTHON = ["True", "False"] matrices = [ "FULL": [ @@ -605,8 +605,8 @@ pipeline { triggers { parameterizedCron(branchPatternCron().matcher(env.BRANCH_NAME).matches() ? """ # Every weeknight (Monday - Friday) around 4:00 AM - # These schedules will run with and without Cython enabled for Python 3.8.16 and 3.11.3 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.16 3.11.3;CI_SCHEDULE_SERVER_VERSION=3.11 4.0 dse-5.1.35 dse-6.8.30 + # These schedules will run with and without Cython enabled for Python 3.8.16 and 3.12.0 + H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.16 3.12.0;CI_SCHEDULE_SERVER_VERSION=3.11 4.0 dse-5.1.35 dse-6.8.30 """ : "") } From a7ab8cc76f41b511aeada7147fd423833c04e9be Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 11 Oct 2023 23:10:51 -0500 Subject: [PATCH 196/211] PYTHON-1364 Fix ssl.wrap_socket errors (from eventlet) for Python 3.12 (#1181) --- cassandra/cluster.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 57fc2d4e8c..00240186c0 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -99,7 +99,11 @@ try: from cassandra.io.eventletreactor import EventletConnection -except ImportError: +# PYTHON-1364 +# +# At the moment eventlet initialization is chucking AttributeErrors due to it's dependence on pyOpenSSL +# and some changes in Python 3.12 which have some knock-on effects there. +except (ImportError, AttributeError): EventletConnection = None try: @@ -113,8 +117,12 @@ def _is_eventlet_monkey_patched(): if 'eventlet.patcher' not in sys.modules: return False - import eventlet.patcher - return eventlet.patcher.is_monkey_patched('socket') + try: + import eventlet.patcher + return eventlet.patcher.is_monkey_patched('socket') + # Another case related to PYTHON-1364 + except AttributeError: + return False def _is_gevent_monkey_patched(): From e9136f41db04a5347877b948837b4c4172b51e14 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 13 Oct 2023 17:15:03 -0500 Subject: [PATCH 197/211] PYTHON-1313 Fix asyncio removals in Python 3.10 (#1179) --- cassandra/io/asyncioreactor.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cassandra/io/asyncioreactor.py b/cassandra/io/asyncioreactor.py index ab0e90ae09..95f92e26e0 100644 --- a/cassandra/io/asyncioreactor.py +++ b/cassandra/io/asyncioreactor.py @@ -41,13 +41,12 @@ def end(self): def __init__(self, timeout, callback, loop): delayed = self._call_delayed_coro(timeout=timeout, - callback=callback, - loop=loop) + callback=callback) self._handle = asyncio.run_coroutine_threadsafe(delayed, loop=loop) @staticmethod - async def _call_delayed_coro(timeout, callback, loop): - await asyncio.sleep(timeout, loop=loop) + async def _call_delayed_coro(timeout, callback): + await asyncio.sleep(timeout) return callback() def __lt__(self, other): @@ -90,8 +89,8 @@ def __init__(self, *args, **kwargs): self._connect_socket() self._socket.setblocking(0) - self._write_queue = asyncio.Queue(loop=self._loop) - self._write_queue_lock = asyncio.Lock(loop=self._loop) + self._write_queue = asyncio.Queue() + self._write_queue_lock = asyncio.Lock() # see initialize_reactor -- loop is running in a separate thread, so we # have to use a threadsafe call From d0e639b0e5c6e4d763169a18bf2972ce55cec385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Fri, 27 Oct 2023 21:22:57 +0200 Subject: [PATCH 198/211] Remove some remnants of Python2 (#1172) Co-authored-by: vgali7 --- .gitignore | 3 + CONTRIBUTING.rst | 1 - appveyor.yml | 5 +- benchmarks/callback_full_pipeline.py | 1 - benchmarks/future_batches.py | 2 +- benchmarks/future_full_pipeline.py | 2 +- benchmarks/sync.py | 1 - cassandra/auth.py | 16 +- cassandra/cluster.py | 33 +- cassandra/compat.py | 20 -- cassandra/concurrent.py | 26 +- cassandra/connection.py | 14 +- cassandra/cqlengine/__init__.py | 8 +- cassandra/cqlengine/columns.py | 7 +- cassandra/cqlengine/connection.py | 3 +- cassandra/cqlengine/management.py | 5 +- cassandra/cqlengine/models.py | 8 +- cassandra/cqlengine/operators.py | 4 +- cassandra/cqlengine/query.py | 25 +- cassandra/cqlengine/statements.py | 25 +- cassandra/cqlengine/usertype.py | 10 +- cassandra/cqltypes.py | 100 +++--- cassandra/cython_marshal.pyx | 16 +- cassandra/datastax/cloud/__init__.py | 11 +- cassandra/datastax/graph/fluent/_query.py | 3 +- .../datastax/graph/fluent/_serializers.py | 12 +- cassandra/datastax/graph/graphson.py | 53 ++-- cassandra/datastax/graph/query.py | 12 +- cassandra/datastax/insights/registry.py | 3 +- cassandra/datastax/insights/reporter.py | 7 +- cassandra/datastax/insights/serializers.py | 6 +- cassandra/deserializers.pyx | 4 - cassandra/encoder.py | 68 ++--- cassandra/io/asyncorereactor.py | 1 - cassandra/io/eventletreactor.py | 4 +- cassandra/io/geventreactor.py | 1 - cassandra/io/libevreactor.py | 1 - cassandra/marshal.py | 42 +-- cassandra/metadata.py | 32 +- cassandra/murmur3.py | 1 - cassandra/protocol.py | 14 +- cassandra/query.py | 8 +- cassandra/scylla/cloud.py | 0 cassandra/segment.py | 4 - cassandra/util.py | 288 ++---------------- docs/installation.rst | 8 +- .../execute_async_with_queue.py | 2 +- requirements.txt | 6 - setup.py | 3 +- tests/integration/__init__.py | 4 +- tests/integration/advanced/__init__.py | 2 +- tests/integration/advanced/graph/__init__.py | 23 +- .../advanced/graph/fluent/__init__.py | 11 +- .../advanced/graph/fluent/test_graph.py | 6 +- .../integration/advanced/graph/test_graph.py | 1 - .../advanced/graph/test_graph_datatype.py | 13 +- .../advanced/graph/test_graph_query.py | 7 +- .../integration/advanced/test_cont_paging.py | 1 - tests/integration/cloud/test_cloud.py | 8 +- .../columns/test_container_columns.py | 3 +- .../cqlengine/columns/test_value_io.py | 9 +- .../management/test_compaction_settings.py | 3 +- .../cqlengine/management/test_management.py | 1 - .../model/test_class_construction.py | 1 - .../operators/test_where_operators.py | 20 +- .../statements/test_base_statement.py | 3 +- .../statements/test_delete_statement.py | 21 +- .../statements/test_insert_statement.py | 8 +- .../statements/test_select_statement.py | 29 +- .../statements/test_update_statement.py | 13 +- .../cqlengine/statements/test_where_clause.py | 3 +- .../integration/cqlengine/test_batch_query.py | 3 - .../cqlengine/test_lwt_conditional.py | 3 +- tests/integration/datatype_utils.py | 11 +- tests/integration/long/test_ipv6.py | 1 - .../integration/simulacron/test_connection.py | 1 - tests/integration/simulacron/utils.py | 2 +- .../standard/test_authentication.py | 1 - .../standard/test_client_warnings.py | 1 - tests/integration/standard/test_concurrent.py | 2 - tests/integration/standard/test_connection.py | 1 - .../standard/test_custom_payload.py | 8 +- .../standard/test_custom_protocol_handler.py | 3 +- tests/integration/standard/test_metadata.py | 17 +- tests/integration/standard/test_query.py | 5 +- .../integration/standard/test_query_paging.py | 1 - .../standard/test_single_interface.py | 4 +- tests/integration/standard/test_types.py | 33 +- tests/integration/standard/test_udts.py | 7 +- tests/unit/advanced/cloud/test_cloud.py | 4 +- tests/unit/advanced/test_graph.py | 22 +- tests/unit/cqlengine/test_connection.py | 2 - tests/unit/io/utils.py | 15 +- tests/unit/test_auth.py | 5 +- tests/unit/test_cluster.py | 3 +- tests/unit/test_concurrent.py | 2 +- tests/unit/test_connection.py | 7 +- tests/unit/test_control_connection.py | 4 +- tests/unit/test_metadata.py | 23 +- tests/unit/test_orderedmap.py | 7 +- tests/unit/test_parameter_binding.py | 7 +- tests/unit/test_policies.py | 15 +- tests/unit/test_protocol.py | 1 - tests/unit/test_query.py | 4 +- tests/unit/test_response_future.py | 1 - tests/unit/test_segment.py | 34 +-- tests/unit/test_timestamps.py | 6 +- tests/unit/test_types.py | 8 +- tox.ini | 1 - 109 files changed, 410 insertions(+), 984 deletions(-) delete mode 100644 cassandra/compat.py create mode 100644 cassandra/scylla/cloud.py diff --git a/.gitignore b/.gitignore index 5c9cbec957..30ff731f85 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,6 @@ tests/unit/cython/bytesio_testhelper.c #iPython *.ipynb +venv +docs/venv +.eggs \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index cdd742c063..e5da81d74f 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -26,7 +26,6 @@ To protect the community, all contributors are required to `sign the DataStax Co Design and Implementation Guidelines ------------------------------------ -- We support Python 2.7+, so any changes must work in any of these runtimes (we use ``six``, ``futures``, and some internal backports for compatability) - We have integrations (notably Cassandra cqlsh) that require pure Python and minimal external dependencies. We try to avoid new external dependencies. Where compiled extensions are concerned, there should always be a pure Python fallback implementation. - This project follows `semantic versioning `_, so breaking API changes will only be introduced in major versions. - Legacy ``cqlengine`` has varying degrees of overreaching client-side validation. Going forward, we will avoid client validation where server feedback is adequate and not overly expensive. diff --git a/appveyor.yml b/appveyor.yml index d1daaa6ec6..f8a3fd7660 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,9 +1,6 @@ environment: matrix: - - PYTHON: "C:\\Python27-x64" - cassandra_version: 3.11.2 - ci_type: standard - - PYTHON: "C:\\Python35-x64" + - PYTHON: "C:\\Python37-x64" cassandra_version: 3.11.2 ci_type: standard os: Visual Studio 2015 diff --git a/benchmarks/callback_full_pipeline.py b/benchmarks/callback_full_pipeline.py index e3ecfe3be5..a4a4c33315 100644 --- a/benchmarks/callback_full_pipeline.py +++ b/benchmarks/callback_full_pipeline.py @@ -18,7 +18,6 @@ from threading import Event from base import benchmark, BenchmarkThread -from six.moves import range log = logging.getLogger(__name__) diff --git a/benchmarks/future_batches.py b/benchmarks/future_batches.py index 8cd915ebab..de4484e617 100644 --- a/benchmarks/future_batches.py +++ b/benchmarks/future_batches.py @@ -14,7 +14,7 @@ import logging from base import benchmark, BenchmarkThread -from six.moves import queue +import queue log = logging.getLogger(__name__) diff --git a/benchmarks/future_full_pipeline.py b/benchmarks/future_full_pipeline.py index 9a9fcfcd50..901573c18e 100644 --- a/benchmarks/future_full_pipeline.py +++ b/benchmarks/future_full_pipeline.py @@ -14,7 +14,7 @@ import logging from base import benchmark, BenchmarkThread -from six.moves import queue +import queue log = logging.getLogger(__name__) diff --git a/benchmarks/sync.py b/benchmarks/sync.py index f2a45fcd7d..96e744f700 100644 --- a/benchmarks/sync.py +++ b/benchmarks/sync.py @@ -13,7 +13,6 @@ # limitations under the License. from base import benchmark, BenchmarkThread -from six.moves import range class Runner(BenchmarkThread): diff --git a/cassandra/auth.py b/cassandra/auth.py index 3d2f751ac0..10200aa387 100644 --- a/cassandra/auth.py +++ b/cassandra/auth.py @@ -32,8 +32,6 @@ except ImportError: SASLClient = None -import six - log = logging.getLogger(__name__) # Custom payload keys related to DSE Unified Auth @@ -270,15 +268,15 @@ def __init__(self, username, password): self.password = password def get_mechanism(self): - return six.b("PLAIN") + return b"PLAIN" def get_initial_challenge(self): - return six.b("PLAIN-START") + return b"PLAIN-START" def evaluate_challenge(self, challenge): - if challenge == six.b('PLAIN-START'): + if challenge == b'PLAIN-START': data = "\x00%s\x00%s" % (self.username, self.password) - return data if six.PY2 else data.encode() + return data.encode() raise Exception('Did not receive a valid challenge response from server') @@ -297,13 +295,13 @@ def __init__(self, host, service, qops, properties): self.sasl = SASLClient(host, service, 'GSSAPI', qops=qops, **properties) def get_mechanism(self): - return six.b("GSSAPI") + return b"GSSAPI" def get_initial_challenge(self): - return six.b("GSSAPI-START") + return b"GSSAPI-START" def evaluate_challenge(self, challenge): - if challenge == six.b('GSSAPI-START'): + if challenge == b'GSSAPI-START': return self.sasl.process() else: return self.sasl.process(challenge) diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 00240186c0..6514838050 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -21,6 +21,7 @@ import atexit from binascii import hexlify from collections import defaultdict +from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor, FIRST_COMPLETED, wait as wait_futures from copy import copy from functools import partial, wraps @@ -29,8 +30,8 @@ import logging from warnings import warn from random import random -import six -from six.moves import filter, range, queue as Queue +import re +import queue import socket import sys import time @@ -79,7 +80,6 @@ HostTargetingStatement) from cassandra.marshal import int64_pack from cassandra.timestamps import MonotonicTimestampGenerator -from cassandra.compat import Mapping from cassandra.util import _resolve_contact_points_to_string_map, Version from cassandra.datastax.insights.reporter import MonitorReporter @@ -111,9 +111,6 @@ except ImportError: from cassandra.util import WeakSet # NOQA -if six.PY3: - long = int - def _is_eventlet_monkey_patched(): if 'eventlet.patcher' not in sys.modules: return False @@ -1158,7 +1155,7 @@ def __init__(self, else: self._contact_points_explicit = True - if isinstance(contact_points, six.string_types): + if isinstance(contact_points, str): raise TypeError("contact_points should not be a string, it should be a sequence (e.g. list) of strings") if None in contact_points: @@ -1793,8 +1790,8 @@ def _new_session(self, keyspace): return session def _session_register_user_types(self, session): - for keyspace, type_map in six.iteritems(self._user_types): - for udt_name, klass in six.iteritems(type_map): + for keyspace, type_map in self._user_types.items(): + for udt_name, klass in type_map.items(): session.user_type_registered(keyspace, udt_name, klass) def _cleanup_failed_on_up_handling(self, host): @@ -2683,7 +2680,7 @@ def execute_async(self, query, parameters=None, trace=False, custom_payload=None """ custom_payload = custom_payload if custom_payload else {} if execute_as: - custom_payload[_proxy_execute_key] = six.b(execute_as) + custom_payload[_proxy_execute_key] = execute_as.encode() future = self._create_response_future( query, parameters, trace, custom_payload, timeout, @@ -2747,8 +2744,8 @@ def execute_graph_async(self, query, parameters=None, trace=False, execution_pro custom_payload = execution_profile.graph_options.get_options_map() if execute_as: - custom_payload[_proxy_execute_key] = six.b(execute_as) - custom_payload[_request_timeout_key] = int64_pack(long(execution_profile.request_timeout * 1000)) + custom_payload[_proxy_execute_key] = execute_as.encode() + custom_payload[_request_timeout_key] = int64_pack(int(execution_profile.request_timeout * 1000)) future = self._create_response_future(query, parameters=None, trace=trace, custom_payload=custom_payload, timeout=_NOT_SET, execution_profile=execution_profile) @@ -2885,7 +2882,7 @@ def _create_response_future(self, query, parameters, trace, custom_payload, prepared_statement = None - if isinstance(query, six.string_types): + if isinstance(query, str): query = SimpleStatement(query) elif isinstance(query, PreparedStatement): query = query.bind(parameters) @@ -3353,10 +3350,6 @@ def user_type_registered(self, keyspace, user_type, klass): 'User type %s does not exist in keyspace %s' % (user_type, keyspace)) field_names = type_meta.field_names - if six.PY2: - # go from unicode to string to avoid decode errors from implicit - # decode when formatting non-ascii values - field_names = [fn.encode('utf-8') for fn in field_names] def encode(val): return '{ %s }' % ' , '.join('%s : %s' % ( @@ -4035,7 +4028,7 @@ def _get_schema_mismatches(self, peers_result, local_result, local_address): log.debug("[control connection] Schemas match") return None - return dict((version, list(nodes)) for version, nodes in six.iteritems(versions)) + return dict((version, list(nodes)) for version, nodes in versions.items()) def _get_peers_query(self, peers_query_type, connection=None): """ @@ -4155,7 +4148,7 @@ class _Scheduler(Thread): is_shutdown = False def __init__(self, executor): - self._queue = Queue.PriorityQueue() + self._queue = queue.PriorityQueue() self._scheduled_tasks = set() self._count = count() self._executor = executor @@ -4213,7 +4206,7 @@ def run(self): else: self._queue.put_nowait((run_at, i, task)) break - except Queue.Empty: + except queue.Empty: pass time.sleep(0.1) diff --git a/cassandra/compat.py b/cassandra/compat.py deleted file mode 100644 index 83c1b104e5..0000000000 --- a/cassandra/compat.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright DataStax, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six - -if six.PY2: - from collections import Mapping -elif six.PY3: - from collections.abc import Mapping diff --git a/cassandra/concurrent.py b/cassandra/concurrent.py index 0228f297fe..fb8f26e1cc 100644 --- a/cassandra/concurrent.py +++ b/cassandra/concurrent.py @@ -16,8 +16,6 @@ from collections import namedtuple from heapq import heappush, heappop from itertools import cycle -import six -from six.moves import xrange, zip from threading import Condition import sys @@ -119,7 +117,7 @@ def execute(self, concurrency, fail_fast): self._current = 0 self._exec_count = 0 with self._condition: - for n in xrange(concurrency): + for n in range(concurrency): if not self._execute_next(): break return self._results() @@ -143,17 +141,13 @@ def _execute(self, idx, statement, params): callback=self._on_success, callback_args=args, errback=self._on_error, errback_args=args) except Exception as exc: - # exc_info with fail_fast to preserve stack trace info when raising on the client thread - # (matches previous behavior -- not sure why we wouldn't want stack trace in the other case) - e = sys.exc_info() if self._fail_fast and six.PY2 else exc - # If we're not failing fast and all executions are raising, there is a chance of recursing # here as subsequent requests are attempted. If we hit this threshold, schedule this result/retry # and let the event loop thread return. if self._exec_depth < self.max_error_recursion: - self._put_result(e, idx, False) + self._put_result(exc, idx, False) else: - self.session.submit(self._put_result, e, idx, False) + self.session.submit(self._put_result, exc, idx, False) self._exec_depth -= 1 def _on_success(self, result, future, idx): @@ -163,14 +157,6 @@ def _on_success(self, result, future, idx): def _on_error(self, result, future, idx): self._put_result(result, idx, False) - @staticmethod - def _raise(exc): - if six.PY2 and isinstance(exc, tuple): - (exc_type, value, traceback) = exc - six.reraise(exc_type, value, traceback) - else: - raise exc - class ConcurrentExecutorGenResults(_ConcurrentExecutor): @@ -190,7 +176,7 @@ def _results(self): try: self._condition.release() if self._fail_fast and not res[0]: - self._raise(res[1]) + raise res[1] yield res finally: self._condition.acquire() @@ -221,9 +207,9 @@ def _results(self): while self._current < self._exec_count: self._condition.wait() if self._exception and self._fail_fast: - self._raise(self._exception) + raise self._exception if self._exception and self._fail_fast: # raise the exception even if there was no wait - self._raise(self._exception) + raise self._exception return [r[1] for r in sorted(self._results_queue)] diff --git a/cassandra/connection.py b/cassandra/connection.py index 0869584663..195c93c889 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -19,8 +19,6 @@ from heapq import heappush, heappop import io import logging -import six -from six.moves import range import socket import struct import sys @@ -33,7 +31,7 @@ if 'gevent.monkey' in sys.modules: from gevent.queue import Queue, Empty else: - from six.moves.queue import Queue, Empty # noqa + from queue import Queue, Empty # noqa from cassandra import ConsistencyLevel, AuthenticationFailed, OperationTimedOut, ProtocolVersion from cassandra.marshal import int32_pack @@ -605,12 +603,6 @@ def wrapper(self, *args, **kwargs): DEFAULT_CQL_VERSION = '3.0.0' -if six.PY3: - def int_from_buf_item(i): - return i -else: - int_from_buf_item = ord - class _ConnectionIOBuffer(object): """ @@ -1122,7 +1114,7 @@ def _read_frame_header(self): buf = self._io_buffer.cql_frame_buffer.getvalue() pos = len(buf) if pos: - version = int_from_buf_item(buf[0]) & PROTOCOL_VERSION_MASK + version = buf[0] & PROTOCOL_VERSION_MASK if version not in ProtocolVersion.SUPPORTED_VERSIONS: raise ProtocolError("This version of the driver does not support protocol version %d" % version) frame_header = frame_header_v3 if version >= 3 else frame_header_v1_v2 @@ -1321,7 +1313,7 @@ def _handle_options_response(self, options_response): remote_supported_compressions) else: compression_type = None - if isinstance(self.compression, six.string_types): + if isinstance(self.compression, str): # the user picked a specific compression type ('snappy' or 'lz4') if self.compression not in remote_supported_compressions: raise ProtocolError( diff --git a/cassandra/cqlengine/__init__.py b/cassandra/cqlengine/__init__.py index e2a952d682..b9466e961b 100644 --- a/cassandra/cqlengine/__init__.py +++ b/cassandra/cqlengine/__init__.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six - - # Caching constants. CACHING_ALL = "ALL" CACHING_KEYS_ONLY = "KEYS_ONLY" @@ -31,7 +28,4 @@ class ValidationError(CQLEngineException): class UnicodeMixin(object): - if six.PY3: - __str__ = lambda x: x.__unicode__() - else: - __str__ = lambda x: six.text_type(x).encode('utf-8') + __str__ = lambda x: x.__unicode__() diff --git a/cassandra/cqlengine/columns.py b/cassandra/cqlengine/columns.py index 49116129fc..7c20ec6642 100644 --- a/cassandra/cqlengine/columns.py +++ b/cassandra/cqlengine/columns.py @@ -15,7 +15,6 @@ from copy import deepcopy, copy from datetime import date, datetime, timedelta import logging -import six from uuid import UUID as _UUID from cassandra import util @@ -327,7 +326,7 @@ class Blob(Column): def to_database(self, value): - if not isinstance(value, (six.binary_type, bytearray)): + if not isinstance(value, (bytes, bytearray)): raise Exception("expecting a binary, got a %s" % type(value)) val = super(Bytes, self).to_database(value) @@ -381,7 +380,7 @@ def __init__(self, min_length=None, max_length=None, **kwargs): def validate(self, value): value = super(Text, self).validate(value) - if not isinstance(value, (six.string_types, bytearray)) and value is not None: + if not isinstance(value, (str, bytearray)) and value is not None: raise ValidationError('{0} {1} is not a string'.format(self.column_name, type(value))) if self.max_length is not None: if value and len(value) > self.max_length: @@ -655,7 +654,7 @@ def validate(self, value): return if isinstance(val, _UUID): return val - if isinstance(val, six.string_types): + if isinstance(val, str): try: return _UUID(val) except ValueError: diff --git a/cassandra/cqlengine/connection.py b/cassandra/cqlengine/connection.py index 90e6d90317..588e512a2d 100644 --- a/cassandra/cqlengine/connection.py +++ b/cassandra/cqlengine/connection.py @@ -14,7 +14,6 @@ from collections import defaultdict import logging -import six import threading from cassandra.cluster import Cluster, _ConfigMode, _NOT_SET, NoHostAvailable, UserTypeDoesNotExist, ConsistencyLevel @@ -346,7 +345,7 @@ def execute(query, params=None, consistency_level=None, timeout=NOT_SET, connect elif isinstance(query, BaseCQLStatement): params = query.get_context() query = SimpleStatement(str(query), consistency_level=consistency_level, fetch_size=query.fetch_size) - elif isinstance(query, six.string_types): + elif isinstance(query, str): query = SimpleStatement(query, consistency_level=consistency_level) log.debug(format_log_context('Query: {}, Params: {}'.format(query.query_string, params), connection=connection)) diff --git a/cassandra/cqlengine/management.py b/cassandra/cqlengine/management.py index 536bde6349..6790a117c7 100644 --- a/cassandra/cqlengine/management.py +++ b/cassandra/cqlengine/management.py @@ -16,7 +16,6 @@ import json import logging import os -import six import warnings from itertools import product @@ -232,7 +231,7 @@ def _sync_table(model, connection=None): except CQLEngineException as ex: # 1.2 doesn't return cf names, so we have to examine the exception # and ignore if it says the column family already exists - if "Cannot add already existing column family" not in six.text_type(ex): + if "Cannot add already existing column family" not in str(ex): raise else: log.debug(format_log_context("sync_table checking existing table %s", keyspace=ks_name, connection=connection), cf_name) @@ -477,7 +476,7 @@ def _update_options(model, connection=None): except KeyError: msg = format_log_context("Invalid table option: '%s'; known options: %s", keyspace=ks_name, connection=connection) raise KeyError(msg % (name, existing_options.keys())) - if isinstance(existing_value, six.string_types): + if isinstance(existing_value, str): if value != existing_value: update_options[name] = value else: diff --git a/cassandra/cqlengine/models.py b/cassandra/cqlengine/models.py index b3c7c9e37f..bc00001666 100644 --- a/cassandra/cqlengine/models.py +++ b/cassandra/cqlengine/models.py @@ -14,7 +14,6 @@ import logging import re -import six from warnings import warn from cassandra.cqlengine import CQLEngineException, ValidationError @@ -614,7 +613,7 @@ def __iter__(self): def __getitem__(self, key): """ Returns column's value. """ - if not isinstance(key, six.string_types): + if not isinstance(key, str): raise TypeError if key not in self._columns.keys(): raise KeyError @@ -622,7 +621,7 @@ def __getitem__(self, key): def __setitem__(self, key, val): """ Sets a column's value. """ - if not isinstance(key, six.string_types): + if not isinstance(key, str): raise TypeError if key not in self._columns.keys(): raise KeyError @@ -1042,8 +1041,7 @@ def _transform_column(col_name, col_obj): return klass -@six.add_metaclass(ModelMetaClass) -class Model(BaseModel): +class Model(BaseModel, metaclass=ModelMetaClass): __abstract__ = True """ *Optional.* Indicates that this model is only intended to be used as a base class for other models. diff --git a/cassandra/cqlengine/operators.py b/cassandra/cqlengine/operators.py index bba505583c..2adf51758d 100644 --- a/cassandra/cqlengine/operators.py +++ b/cassandra/cqlengine/operators.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import six from cassandra.cqlengine import UnicodeMixin @@ -44,8 +43,7 @@ def __init__(cls, name, bases, dct): super(OpMapMeta, cls).__init__(name, bases, dct) -@six.add_metaclass(OpMapMeta) -class BaseWhereOperator(BaseQueryOperator): +class BaseWhereOperator(BaseQueryOperator, metaclass=OpMapMeta): """ base operator used for where clauses """ @classmethod def get_operator(cls, symbol): diff --git a/cassandra/cqlengine/query.py b/cassandra/cqlengine/query.py index 1978d319f4..73f48a5928 100644 --- a/cassandra/cqlengine/query.py +++ b/cassandra/cqlengine/query.py @@ -16,7 +16,6 @@ from datetime import datetime, timedelta from functools import partial import time -import six from warnings import warn from cassandra.query import SimpleStatement, BatchType as CBatchType, BatchStatement @@ -103,29 +102,29 @@ def in_(self, item): used where you'd typically want to use python's `in` operator """ - return WhereClause(six.text_type(self), InOperator(), item) + return WhereClause(str(self), InOperator(), item) def contains_(self, item): """ Returns a CONTAINS operator """ - return WhereClause(six.text_type(self), ContainsOperator(), item) + return WhereClause(str(self), ContainsOperator(), item) def __eq__(self, other): - return WhereClause(six.text_type(self), EqualsOperator(), self._to_database(other)) + return WhereClause(str(self), EqualsOperator(), self._to_database(other)) def __gt__(self, other): - return WhereClause(six.text_type(self), GreaterThanOperator(), self._to_database(other)) + return WhereClause(str(self), GreaterThanOperator(), self._to_database(other)) def __ge__(self, other): - return WhereClause(six.text_type(self), GreaterThanOrEqualOperator(), self._to_database(other)) + return WhereClause(str(self), GreaterThanOrEqualOperator(), self._to_database(other)) def __lt__(self, other): - return WhereClause(six.text_type(self), LessThanOperator(), self._to_database(other)) + return WhereClause(str(self), LessThanOperator(), self._to_database(other)) def __le__(self, other): - return WhereClause(six.text_type(self), LessThanOrEqualOperator(), self._to_database(other)) + return WhereClause(str(self), LessThanOrEqualOperator(), self._to_database(other)) class BatchType(object): @@ -231,7 +230,7 @@ def execute(self): opener = 'BEGIN ' + (str(batch_type) + ' ' if batch_type else '') + ' BATCH' if self.timestamp: - if isinstance(self.timestamp, six.integer_types): + if isinstance(self.timestamp, int): ts = self.timestamp elif isinstance(self.timestamp, (datetime, timedelta)): ts = self.timestamp @@ -407,7 +406,7 @@ def _execute(self, statement): return result def __unicode__(self): - return six.text_type(self._select_query()) + return str(self._select_query()) def __str__(self): return str(self.__unicode__()) @@ -604,7 +603,7 @@ def batch(self, batch_obj): def first(self): try: - return six.next(iter(self)) + return next(iter(self)) except StopIteration: return None @@ -901,7 +900,7 @@ def limit(self, v): if v is None: v = 0 - if not isinstance(v, six.integer_types): + if not isinstance(v, int): raise TypeError if v == self._limit: return self @@ -925,7 +924,7 @@ def fetch_size(self, v): print(user) """ - if not isinstance(v, six.integer_types): + if not isinstance(v, int): raise TypeError if v == self._fetch_size: return self diff --git a/cassandra/cqlengine/statements.py b/cassandra/cqlengine/statements.py index c6ceb16607..d92d0b2452 100644 --- a/cassandra/cqlengine/statements.py +++ b/cassandra/cqlengine/statements.py @@ -14,8 +14,6 @@ from datetime import datetime, timedelta import time -import six -from six.moves import filter from cassandra.query import FETCH_SIZE_UNSET from cassandra.cqlengine import columns @@ -114,7 +112,7 @@ def __init__(self, field, operator, value, quote_field=True): def __unicode__(self): field = ('"{0}"' if self.quote_field else '{0}').format(self.field) - return u'{0} {1} {2}'.format(field, self.operator, six.text_type(self.query_value)) + return u'{0} {1} {2}'.format(field, self.operator, str(self.query_value)) def __hash__(self): return super(WhereClause, self).__hash__() ^ hash(self.operator) @@ -186,8 +184,7 @@ def __init__(cls, name, bases, dct): super(ContainerUpdateTypeMapMeta, cls).__init__(name, bases, dct) -@six.add_metaclass(ContainerUpdateTypeMapMeta) -class ContainerUpdateClause(AssignmentClause): +class ContainerUpdateClause(AssignmentClause, metaclass=ContainerUpdateTypeMapMeta): def __init__(self, field, value, operation=None, previous=None): super(ContainerUpdateClause, self).__init__(field, value) @@ -563,7 +560,7 @@ def add_conditional_clause(self, clause): self.conditionals.append(clause) def _get_conditionals(self): - return 'IF {0}'.format(' AND '.join([six.text_type(c) for c in self.conditionals])) + return 'IF {0}'.format(' AND '.join([str(c) for c in self.conditionals])) def get_context_size(self): return len(self.get_context()) @@ -584,7 +581,7 @@ def timestamp_normalized(self): if not self.timestamp: return None - if isinstance(self.timestamp, six.integer_types): + if isinstance(self.timestamp, int): return self.timestamp if isinstance(self.timestamp, timedelta): @@ -602,7 +599,7 @@ def __repr__(self): @property def _where(self): - return 'WHERE {0}'.format(' AND '.join([six.text_type(c) for c in self.where_clauses])) + return 'WHERE {0}'.format(' AND '.join([str(c) for c in self.where_clauses])) class SelectStatement(BaseCQLStatement): @@ -629,10 +626,10 @@ def __init__(self, fetch_size=fetch_size ) - self.fields = [fields] if isinstance(fields, six.string_types) else (fields or []) + self.fields = [fields] if isinstance(fields, str) else (fields or []) self.distinct_fields = distinct_fields self.count = count - self.order_by = [order_by] if isinstance(order_by, six.string_types) else order_by + self.order_by = [order_by] if isinstance(order_by, str) else order_by self.limit = limit self.allow_filtering = allow_filtering @@ -653,7 +650,7 @@ def __unicode__(self): qs += [self._where] if self.order_by and not self.count: - qs += ['ORDER BY {0}'.format(', '.join(six.text_type(o) for o in self.order_by))] + qs += ['ORDER BY {0}'.format(', '.join(str(o) for o in self.order_by))] if self.limit: qs += ['LIMIT {0}'.format(self.limit)] @@ -798,7 +795,7 @@ def __unicode__(self): qs += ["USING {0}".format(" AND ".join(using_options))] qs += ['SET'] - qs += [', '.join([six.text_type(c) for c in self.assignments])] + qs += [', '.join([str(c) for c in self.assignments])] if self.where_clauses: qs += [self._where] @@ -849,7 +846,7 @@ def __init__(self, table, fields=None, where=None, timestamp=None, conditionals= conditionals=conditionals ) self.fields = [] - if isinstance(fields, six.string_types): + if isinstance(fields, str): fields = [fields] for field in fields or []: self.add_field(field) @@ -874,7 +871,7 @@ def get_context(self): return ctx def add_field(self, field): - if isinstance(field, six.string_types): + if isinstance(field, str): field = FieldDeleteClause(field) if not isinstance(field, BaseClause): raise StatementException("only instances of AssignmentClause can be added to statements") diff --git a/cassandra/cqlengine/usertype.py b/cassandra/cqlengine/usertype.py index 155068d99e..7fa85f1919 100644 --- a/cassandra/cqlengine/usertype.py +++ b/cassandra/cqlengine/usertype.py @@ -13,7 +13,6 @@ # limitations under the License. import re -import six from cassandra.util import OrderedDict from cassandra.cqlengine import CQLEngineException @@ -72,7 +71,7 @@ def __ne__(self, other): return not self.__eq__(other) def __str__(self): - return "{{{0}}}".format(', '.join("'{0}': {1}".format(k, getattr(self, k)) for k, v in six.iteritems(self._values))) + return "{{{0}}}".format(', '.join("'{0}': {1}".format(k, getattr(self, k)) for k, v in self._values.items())) def has_changed_fields(self): return any(v.changed for v in self._values.values()) @@ -93,14 +92,14 @@ def __getattr__(self, attr): raise AttributeError(attr) def __getitem__(self, key): - if not isinstance(key, six.string_types): + if not isinstance(key, str): raise TypeError if key not in self._fields.keys(): raise KeyError return getattr(self, key) def __setitem__(self, key, val): - if not isinstance(key, six.string_types): + if not isinstance(key, str): raise TypeError if key not in self._fields.keys(): raise KeyError @@ -198,8 +197,7 @@ def _transform_column(field_name, field_obj): return klass -@six.add_metaclass(UserTypeMetaClass) -class UserType(BaseUserType): +class UserType(BaseUserType, metaclass=UserTypeMetaClass): """ This class is used to model User Defined Types. To define a type, declare a class inheriting from this, and assign field types as class attributes: diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 6cc89aafbb..d1d7e888f9 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -39,8 +39,6 @@ import re import socket import time -import six -from six.moves import range import struct import sys from uuid import UUID @@ -54,10 +52,7 @@ from cassandra import util _little_endian_flag = 1 # we always serialize LE -if six.PY3: - import ipaddress - -_ord = ord if six.PY2 else lambda x: x +import ipaddress apache_cassandra_type_prefix = 'org.apache.cassandra.db.marshal.' @@ -66,16 +61,12 @@ log = logging.getLogger(__name__) -if six.PY3: - _number_types = frozenset((int, float)) - long = int +_number_types = frozenset((int, float)) + - def _name_from_hex_string(encoded_name): - bin_str = unhexlify(encoded_name) - return bin_str.decode('ascii') -else: - _number_types = frozenset((int, long, float)) - _name_from_hex_string = unhexlify +def _name_from_hex_string(encoded_name): + bin_str = unhexlify(encoded_name) + return bin_str.decode('ascii') def trim_if_startswith(s, prefix): @@ -279,8 +270,7 @@ def __str__(self): EMPTY = EmptyValue() -@six.add_metaclass(CassandraTypeType) -class _CassandraType(object): +class _CassandraType(object, metaclass=CassandraTypeType): subtypes = () num_subtypes = 0 empty_binary_ok = False @@ -383,8 +373,6 @@ def apply_parameters(cls, subtypes, names=None): raise ValueError("%s types require %d subtypes (%d given)" % (cls.typename, cls.num_subtypes, len(subtypes))) newname = cls.cass_parameterized_type_with(subtypes) - if six.PY2 and isinstance(newname, unicode): - newname = newname.encode('utf-8') return type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names}) @classmethod @@ -415,16 +403,10 @@ class _UnrecognizedType(_CassandraType): num_subtypes = 'UNKNOWN' -if six.PY3: - def mkUnrecognizedType(casstypename): - return CassandraTypeType(casstypename, - (_UnrecognizedType,), - {'typename': "'%s'" % casstypename}) -else: - def mkUnrecognizedType(casstypename): # noqa - return CassandraTypeType(casstypename.encode('utf8'), - (_UnrecognizedType,), - {'typename': "'%s'" % casstypename}) +def mkUnrecognizedType(casstypename): + return CassandraTypeType(casstypename, + (_UnrecognizedType,), + {'typename': "'%s'" % casstypename}) class BytesType(_CassandraType): @@ -433,7 +415,7 @@ class BytesType(_CassandraType): @staticmethod def serialize(val, protocol_version): - return six.binary_type(val) + return bytes(val) class DecimalType(_CassandraType): @@ -500,25 +482,20 @@ def serialize(byts, protocol_version): return int8_pack(byts) -if six.PY2: - class AsciiType(_CassandraType): - typename = 'ascii' - empty_binary_ok = True -else: - class AsciiType(_CassandraType): - typename = 'ascii' - empty_binary_ok = True +class AsciiType(_CassandraType): + typename = 'ascii' + empty_binary_ok = True - @staticmethod - def deserialize(byts, protocol_version): - return byts.decode('ascii') + @staticmethod + def deserialize(byts, protocol_version): + return byts.decode('ascii') - @staticmethod - def serialize(var, protocol_version): - try: - return var.encode('ascii') - except UnicodeDecodeError: - return var + @staticmethod + def serialize(var, protocol_version): + try: + return var.encode('ascii') + except UnicodeDecodeError: + return var class FloatType(_CassandraType): @@ -603,7 +580,7 @@ def serialize(addr, protocol_version): # since we've already determined the AF return socket.inet_aton(addr) except: - if six.PY3 and isinstance(addr, (ipaddress.IPv4Address, ipaddress.IPv6Address)): + if isinstance(addr, (ipaddress.IPv4Address, ipaddress.IPv6Address)): return addr.packed raise ValueError("can't interpret %r as an inet address" % (addr,)) @@ -662,7 +639,7 @@ def serialize(v, protocol_version): raise TypeError('DateType arguments must be a datetime, date, or timestamp') timestamp = v - return int64_pack(long(timestamp)) + return int64_pack(int(timestamp)) class TimestampType(DateType): @@ -706,7 +683,7 @@ def serialize(val, protocol_version): try: days = val.days_from_epoch except AttributeError: - if isinstance(val, six.integer_types): + if isinstance(val, int): # the DB wants offset int values, but util.Date init takes days from epoch # here we assume int values are offset, as they would appear in CQL # short circuit to avoid subtracting just to add offset @@ -826,7 +803,7 @@ def deserialize_safe(cls, byts, protocol_version): @classmethod def serialize_safe(cls, items, protocol_version): - if isinstance(items, six.string_types): + if isinstance(items, str): raise TypeError("Received a string for a type that expects a sequence") subtype, = cls.subtypes @@ -900,7 +877,7 @@ def serialize_safe(cls, themap, protocol_version): buf = io.BytesIO() buf.write(pack(len(themap))) try: - items = six.iteritems(themap) + items = themap.items() except AttributeError: raise TypeError("Got a non-map object for a map value") inner_proto = max(3, protocol_version) @@ -975,9 +952,6 @@ class UserType(TupleType): def make_udt_class(cls, keyspace, udt_name, field_names, field_types): assert len(field_names) == len(field_types) - if six.PY2 and isinstance(udt_name, unicode): - udt_name = udt_name.encode('utf-8') - instance = cls._cache.get((keyspace, udt_name)) if not instance or instance.fieldnames != field_names or instance.subtypes != field_types: instance = type(udt_name, (cls,), {'subtypes': field_types, @@ -992,8 +966,6 @@ def make_udt_class(cls, keyspace, udt_name, field_names, field_types): @classmethod def evict_udt_class(cls, keyspace, udt_name): - if six.PY2 and isinstance(udt_name, unicode): - udt_name = udt_name.encode('utf-8') try: del cls._cache[(keyspace, udt_name)] except KeyError: @@ -1150,7 +1122,7 @@ def serialize_safe(cls, val, protocol_version): def is_counter_type(t): - if isinstance(t, six.string_types): + if isinstance(t, str): t = lookup_casstype(t) return issubclass(t, CounterColumnType) @@ -1186,7 +1158,7 @@ def serialize(val, protocol_version): @staticmethod def deserialize(byts, protocol_version): - is_little_endian = bool(_ord(byts[0])) + is_little_endian = bool(byts[0]) point = point_le if is_little_endian else point_be return util.Point(*point.unpack_from(byts, 5)) # ofs = endian byte + int type @@ -1203,7 +1175,7 @@ def serialize(val, protocol_version): @staticmethod def deserialize(byts, protocol_version): - is_little_endian = bool(_ord(byts[0])) + is_little_endian = bool(byts[0]) point = point_le if is_little_endian else point_be coords = ((point.unpack_from(byts, offset) for offset in range(1 + 4 + 4, len(byts), point.size))) # start = endian + int type + int count return util.LineString(coords) @@ -1232,7 +1204,7 @@ def serialize(val, protocol_version): @staticmethod def deserialize(byts, protocol_version): - is_little_endian = bool(_ord(byts[0])) + is_little_endian = bool(byts[0]) if is_little_endian: int_fmt = ' MAX_INT32 or value < MIN_INT32): + if type(value) is int and (value > MAX_INT32 or value < MIN_INT32): return Int64TypeIO return Int32TypeIO @@ -164,9 +158,7 @@ class Int64TypeIO(IntegerTypeIO): @classmethod def deserialize(cls, value, reader=None): - if six.PY3: - return value - return long(value) + return value class FloatTypeIO(GraphSONTypeIO): @@ -274,8 +266,7 @@ class BlobTypeIO(GraphSONTypeIO): @classmethod def serialize(cls, value, writer=None): value = base64.b64encode(value) - if six.PY3: - value = value.decode('utf-8') + value = value.decode('utf-8') return value @classmethod @@ -343,7 +334,7 @@ def deserialize(cls, value, reader=None): raise ValueError('Invalid duration: {0}'.format(value)) duration = {k: float(v) if v is not None else 0 - for k, v in six.iteritems(duration.groupdict())} + for k, v in duration.groupdict().items()} return datetime.timedelta(days=duration['days'], hours=duration['hours'], minutes=duration['minutes'], seconds=duration['seconds']) @@ -512,7 +503,7 @@ class JsonMapTypeIO(GraphSONTypeIO): @classmethod def serialize(cls, value, writer=None): out = {} - for k, v in six.iteritems(value): + for k, v in value.items(): out[k] = writer.serialize(v, writer) return out @@ -528,7 +519,7 @@ class MapTypeIO(GraphSONTypeIO): def definition(cls, value, writer=None): out = OrderedDict([('cqlType', cls.cql_type)]) out['definition'] = [] - for k, v in six.iteritems(value): + for k, v in value.items(): # we just need the first pair to write the def out['definition'].append(writer.definition(k)) out['definition'].append(writer.definition(v)) @@ -538,7 +529,7 @@ def definition(cls, value, writer=None): @classmethod def serialize(cls, value, writer=None): out = [] - for k, v in six.iteritems(value): + for k, v in value.items(): out.append(writer.serialize(k, writer)) out.append(writer.serialize(v, writer)) @@ -841,16 +832,10 @@ class GraphSON1Serializer(_BaseGraphSONSerializer): ]) -if ipaddress: - GraphSON1Serializer.register(ipaddress.IPv4Address, InetTypeIO) - GraphSON1Serializer.register(ipaddress.IPv6Address, InetTypeIO) - -if six.PY2: - GraphSON1Serializer.register(buffer, ByteBufferTypeIO) - GraphSON1Serializer.register(unicode, TextTypeIO) -else: - GraphSON1Serializer.register(memoryview, ByteBufferTypeIO) - GraphSON1Serializer.register(bytes, ByteBufferTypeIO) +GraphSON1Serializer.register(ipaddress.IPv4Address, InetTypeIO) +GraphSON1Serializer.register(ipaddress.IPv6Address, InetTypeIO) +GraphSON1Serializer.register(memoryview, ByteBufferTypeIO) +GraphSON1Serializer.register(bytes, ByteBufferTypeIO) class _BaseGraphSONDeserializer(object): @@ -922,9 +907,7 @@ def deserialize_int(cls, value): @classmethod def deserialize_bigint(cls, value): - if six.PY3: - return cls.deserialize_int(value) - return long(value) + return cls.deserialize_int(value) @classmethod def deserialize_double(cls, value): @@ -1007,8 +990,6 @@ def serialize(self, value, writer=None): GraphSON2Serializer.register(int, IntegerTypeIO) -if six.PY2: - GraphSON2Serializer.register(long, IntegerTypeIO) class GraphSON2Deserializer(_BaseGraphSONDeserializer): @@ -1055,7 +1036,7 @@ def deserialize(self, obj): except KeyError: pass # list and map are treated as normal json objs (could be isolated deserializers) - return {self.deserialize(k): self.deserialize(v) for k, v in six.iteritems(obj)} + return {self.deserialize(k): self.deserialize(v) for k, v in obj.items()} elif isinstance(obj, list): return [self.deserialize(o) for o in obj] else: @@ -1109,7 +1090,7 @@ def get_serializer(self, value): if self.user_types is None: try: user_types = self.context['cluster']._user_types[self.context['graph_name']] - self.user_types = dict(map(reversed, six.iteritems(user_types))) + self.user_types = dict(map(reversed, user_types.items())) except KeyError: self.user_types = {} diff --git a/cassandra/datastax/graph/query.py b/cassandra/datastax/graph/query.py index 7c0e265dbf..866df7a94c 100644 --- a/cassandra/datastax/graph/query.py +++ b/cassandra/datastax/graph/query.py @@ -15,8 +15,6 @@ import json from warnings import warn -import six - from cassandra import ConsistencyLevel from cassandra.query import Statement, SimpleStatement from cassandra.datastax.graph.types import Vertex, Edge, Path, VertexProperty @@ -77,7 +75,7 @@ def __init__(self, **kwargs): self._graph_options = {} kwargs.setdefault('graph_source', 'g') kwargs.setdefault('graph_language', GraphOptions.DEFAULT_GRAPH_LANGUAGE) - for attr, value in six.iteritems(kwargs): + for attr, value in kwargs.items(): if attr not in _graph_option_names: warn("Unknown keyword argument received for GraphOptions: {0}".format(attr)) setattr(self, attr, value) @@ -103,7 +101,7 @@ def get_options_map(self, other_options=None): for cl in ('graph-write-consistency', 'graph-read-consistency'): cl_enum = options.get(cl) if cl_enum is not None: - options[cl] = six.b(ConsistencyLevel.value_to_name[cl_enum]) + options[cl] = ConsistencyLevel.value_to_name[cl_enum].encode() return options def set_source_default(self): @@ -157,8 +155,8 @@ def get(self, key=opt[2]): def set(self, value, key=opt[2]): if value is not None: # normalize text here so it doesn't have to be done every time we get options map - if isinstance(value, six.text_type) and not isinstance(value, six.binary_type): - value = six.b(value) + if isinstance(value, str): + value = value.encode() self._graph_options[key] = value else: self._graph_options.pop(key, None) @@ -278,7 +276,7 @@ def __getattr__(self, attr): raise AttributeError("Result has no top-level attribute %r" % (attr,)) def __getitem__(self, item): - if isinstance(self.value, dict) and isinstance(item, six.string_types): + if isinstance(self.value, dict) and isinstance(item, str): return self.value[item] elif isinstance(self.value, list) and isinstance(item, int): return self.value[item] diff --git a/cassandra/datastax/insights/registry.py b/cassandra/datastax/insights/registry.py index 3dd1d255ae..03daebd86e 100644 --- a/cassandra/datastax/insights/registry.py +++ b/cassandra/datastax/insights/registry.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six from collections import OrderedDict from warnings import warn @@ -59,7 +58,7 @@ def _get_serializer(self, cls): try: return self._mapping_dict[cls] except KeyError: - for registered_cls, serializer in six.iteritems(self._mapping_dict): + for registered_cls, serializer in self._mapping_dict.items(): if issubclass(cls, registered_cls): return self._mapping_dict[registered_cls] raise ValueError diff --git a/cassandra/datastax/insights/reporter.py b/cassandra/datastax/insights/reporter.py index b05a88deb0..83205fc458 100644 --- a/cassandra/datastax/insights/reporter.py +++ b/cassandra/datastax/insights/reporter.py @@ -24,7 +24,6 @@ import sys from threading import Event, Thread import time -import six from cassandra.policies import HostDistance from cassandra.util import ms_timestamp_from_datetime @@ -199,9 +198,9 @@ def _get_startup_data(self): }, 'platformInfo': { 'os': { - 'name': uname_info.system if six.PY3 else uname_info[0], - 'version': uname_info.release if six.PY3 else uname_info[2], - 'arch': uname_info.machine if six.PY3 else uname_info[4] + 'name': uname_info.system, + 'version': uname_info.release, + 'arch': uname_info.machine }, 'cpus': { 'length': multiprocessing.cpu_count(), diff --git a/cassandra/datastax/insights/serializers.py b/cassandra/datastax/insights/serializers.py index aec4467a6a..289c165e8a 100644 --- a/cassandra/datastax/insights/serializers.py +++ b/cassandra/datastax/insights/serializers.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six - def initialize_registry(insights_registry): # This will be called from the cluster module, so we put all this behavior @@ -203,8 +201,8 @@ def graph_options_insights_serializer(options): 'language': options.graph_language, 'graphProtocol': options.graph_protocol } - updates = {k: v.decode('utf-8') for k, v in six.iteritems(rv) - if isinstance(v, six.binary_type)} + updates = {k: v.decode('utf-8') for k, v in rv.items() + if isinstance(v, bytes)} rv.update(updates) return rv diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx index 7de6949099..7c256674b0 100644 --- a/cassandra/deserializers.pyx +++ b/cassandra/deserializers.pyx @@ -29,8 +29,6 @@ from uuid import UUID from cassandra import cqltypes from cassandra import util -cdef bint PY2 = six.PY2 - cdef class Deserializer: """Cython-based deserializer class for a cqltype""" @@ -90,8 +88,6 @@ cdef class DesAsciiType(Deserializer): cdef deserialize(self, Buffer *buf, int protocol_version): if buf.size == 0: return "" - if PY2: - return to_bytes(buf) return to_bytes(buf).decode('ascii') diff --git a/cassandra/encoder.py b/cassandra/encoder.py index f2c3f8dfed..31d90549f4 100644 --- a/cassandra/encoder.py +++ b/cassandra/encoder.py @@ -27,28 +27,15 @@ import sys import types from uuid import UUID -import six +import ipaddress from cassandra.util import (OrderedDict, OrderedMap, OrderedMapSerializedKey, sortedset, Time, Date, Point, LineString, Polygon) -if six.PY3: - import ipaddress - -if six.PY3: - long = int - def cql_quote(term): - # The ordering of this method is important for the result of this method to - # be a native str type (for both Python 2 and 3) - if isinstance(term, str): return "'%s'" % str(term).replace("'", "''") - # This branch of the if statement will only be used by Python 2 to catch - # unicode strings, text_type is used to prevent type errors with Python 3. - elif isinstance(term, six.text_type): - return "'%s'" % term.encode('utf8').replace("'", "''") else: return str(term) @@ -97,21 +84,13 @@ def __init__(self): Polygon: self.cql_encode_str_quoted } - if six.PY2: - self.mapping.update({ - unicode: self.cql_encode_unicode, - buffer: self.cql_encode_bytes, - long: self.cql_encode_object, - types.NoneType: self.cql_encode_none, - }) - else: - self.mapping.update({ - memoryview: self.cql_encode_bytes, - bytes: self.cql_encode_bytes, - type(None): self.cql_encode_none, - ipaddress.IPv4Address: self.cql_encode_ipaddress, - ipaddress.IPv6Address: self.cql_encode_ipaddress - }) + self.mapping.update({ + memoryview: self.cql_encode_bytes, + bytes: self.cql_encode_bytes, + type(None): self.cql_encode_none, + ipaddress.IPv4Address: self.cql_encode_ipaddress, + ipaddress.IPv6Address: self.cql_encode_ipaddress + }) def cql_encode_none(self, val): """ @@ -134,16 +113,8 @@ def cql_encode_str(self, val): def cql_encode_str_quoted(self, val): return "'%s'" % val - if six.PY3: - def cql_encode_bytes(self, val): - return (b'0x' + hexlify(val)).decode('utf-8') - elif sys.version_info >= (2, 7): - def cql_encode_bytes(self, val): # noqa - return b'0x' + hexlify(val) - else: - # python 2.6 requires string or read-only buffer for hexlify - def cql_encode_bytes(self, val): # noqa - return b'0x' + hexlify(buffer(val)) + def cql_encode_bytes(self, val): + return (b'0x' + hexlify(val)).decode('utf-8') def cql_encode_object(self, val): """ @@ -169,7 +140,7 @@ def cql_encode_datetime(self, val): with millisecond precision. """ timestamp = calendar.timegm(val.utctimetuple()) - return str(long(timestamp * 1e3 + getattr(val, 'microsecond', 0) / 1e3)) + return str(int(timestamp * 1e3 + getattr(val, 'microsecond', 0) / 1e3)) def cql_encode_date(self, val): """ @@ -214,7 +185,7 @@ def cql_encode_map_collection(self, val): return '{%s}' % ', '.join('%s: %s' % ( self.mapping.get(type(k), self.cql_encode_object)(k), self.mapping.get(type(v), self.cql_encode_object)(v) - ) for k, v in six.iteritems(val)) + ) for k, v in val.items()) def cql_encode_list_collection(self, val): """ @@ -236,14 +207,13 @@ def cql_encode_all_types(self, val, as_text_type=False): if :attr:`~Encoder.mapping` does not contain an entry for the type. """ encoded = self.mapping.get(type(val), self.cql_encode_object)(val) - if as_text_type and not isinstance(encoded, six.text_type): + if as_text_type and not isinstance(encoded, str): return encoded.decode('utf-8') return encoded - if six.PY3: - def cql_encode_ipaddress(self, val): - """ - Converts an ipaddress (IPV4Address, IPV6Address) to a CQL string. This - is suitable for ``inet`` type columns. - """ - return "'%s'" % val.compressed + def cql_encode_ipaddress(self, val): + """ + Converts an ipaddress (IPV4Address, IPV6Address) to a CQL string. This + is suitable for ``inet`` type columns. + """ + return "'%s'" % val.compressed diff --git a/cassandra/io/asyncorereactor.py b/cassandra/io/asyncorereactor.py index 0abdbbfe0a..a45d657828 100644 --- a/cassandra/io/asyncorereactor.py +++ b/cassandra/io/asyncorereactor.py @@ -24,7 +24,6 @@ import sys import ssl -from six.moves import range try: from weakref import WeakSet diff --git a/cassandra/io/eventletreactor.py b/cassandra/io/eventletreactor.py index 162661f468..42874036d5 100644 --- a/cassandra/io/eventletreactor.py +++ b/cassandra/io/eventletreactor.py @@ -23,8 +23,6 @@ from threading import Event import time -from six.moves import xrange - from cassandra.connection import Connection, ConnectionShutdown, Timer, TimerManager try: from eventlet.green.OpenSSL import SSL @@ -190,5 +188,5 @@ def handle_read(self): def push(self, data): chunk_size = self.out_buffer_size - for i in xrange(0, len(data), chunk_size): + for i in range(0, len(data), chunk_size): self._write_queue.put(data[i:i + chunk_size]) diff --git a/cassandra/io/geventreactor.py b/cassandra/io/geventreactor.py index ebc664d485..4f1f158aa7 100644 --- a/cassandra/io/geventreactor.py +++ b/cassandra/io/geventreactor.py @@ -20,7 +20,6 @@ import logging import time -from six.moves import range from cassandra.connection import Connection, ConnectionShutdown, Timer, TimerManager diff --git a/cassandra/io/libevreactor.py b/cassandra/io/libevreactor.py index 54e2d0de03..484690da89 100644 --- a/cassandra/io/libevreactor.py +++ b/cassandra/io/libevreactor.py @@ -21,7 +21,6 @@ from threading import Lock, Thread import time -from six.moves import range from cassandra.connection import (Connection, ConnectionShutdown, NONBLOCKING, Timer, TimerManager) diff --git a/cassandra/marshal.py b/cassandra/marshal.py index 43cb627b08..726f0819eb 100644 --- a/cassandra/marshal.py +++ b/cassandra/marshal.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six import struct @@ -45,35 +44,16 @@ def _make_packer(format_string): v3_header_unpack = v3_header_struct.unpack -if six.PY3: - def byte2int(b): - return b - - - def varint_unpack(term): - val = int(''.join("%02x" % i for i in term), 16) - if (term[0] & 128) != 0: - len_term = len(term) # pulling this out of the expression to avoid overflow in cython optimized code - val -= 1 << (len_term * 8) - return val -else: - def byte2int(b): - return ord(b) - - - def varint_unpack(term): # noqa - val = int(term.encode('hex'), 16) - if (ord(term[0]) & 128) != 0: - len_term = len(term) # pulling this out of the expression to avoid overflow in cython optimized code - val = val - (1 << (len_term * 8)) - return val +def varint_unpack(term): + val = int(''.join("%02x" % i for i in term), 16) + if (term[0] & 128) != 0: + len_term = len(term) # pulling this out of the expression to avoid overflow in cython optimized code + val -= 1 << (len_term * 8) + return val def bit_length(n): - if six.PY3 or isinstance(n, int): - return int.bit_length(n) - else: - return long.bit_length(n) + return int.bit_length(n) def varint_pack(big): @@ -91,7 +71,7 @@ def varint_pack(big): if pos and revbytes[-1] & 0x80: revbytes.append(0) revbytes.reverse() - return six.binary_type(revbytes) + return bytes(revbytes) point_be = struct.Struct('>dd') @@ -113,7 +93,7 @@ def vints_unpack(term): # noqa values = [] n = 0 while n < len(term): - first_byte = byte2int(term[n]) + first_byte = term[n] if (first_byte & 128) == 0: val = first_byte @@ -124,7 +104,7 @@ def vints_unpack(term): # noqa while n < end: n += 1 val <<= 8 - val |= byte2int(term[n]) & 0xff + val |= term[n] & 0xff n += 1 values.append(decode_zig_zag(val)) @@ -162,4 +142,4 @@ def vints_pack(values): revbytes.append(abs(v)) revbytes.reverse() - return six.binary_type(revbytes) + return bytes(revbytes) diff --git a/cassandra/metadata.py b/cassandra/metadata.py index a82fbe48e3..f52bfd9317 100644 --- a/cassandra/metadata.py +++ b/cassandra/metadata.py @@ -15,13 +15,12 @@ from binascii import unhexlify from bisect import bisect_left from collections import defaultdict +from collections.abc import Mapping from functools import total_ordering from hashlib import md5 import json import logging import re -import six -from six.moves import zip import sys from threading import RLock import struct @@ -42,7 +41,6 @@ from cassandra.util import OrderedDict, Version from cassandra.pool import HostDistance from cassandra.connection import EndPoint -from cassandra.compat import Mapping log = logging.getLogger(__name__) @@ -292,7 +290,7 @@ def rebuild_token_map(self, partitioner, token_map): token_to_host_owner = {} ring = [] - for host, token_strings in six.iteritems(token_map): + for host, token_strings in token_map.items(): for token_string in token_strings: token = token_class.from_string(token_string) ring.append(token) @@ -350,7 +348,7 @@ def get_host(self, endpoint_or_address, port=None): return self._hosts.get(endpoint_or_address) def _get_host_by_address(self, address, port=None): - for host in six.itervalues(self._hosts): + for host in self._hosts.values(): if (host.broadcast_rpc_address == address and (port is None or host.broadcast_rpc_port is None or host.broadcast_rpc_port == port)): return host @@ -387,8 +385,7 @@ def __new__(metacls, name, bases, dct): -@six.add_metaclass(ReplicationStrategyTypeType) -class _ReplicationStrategy(object): +class _ReplicationStrategy(object, metaclass=ReplicationStrategyTypeType): options_map = None @classmethod @@ -627,7 +624,7 @@ def make_token_replica_map(self, token_to_host_owner, ring): racks_this_dc = dc_racks[dc] hosts_this_dc = len(hosts_per_dc[dc]) - for token_offset_index in six.moves.range(index, index+num_tokens): + for token_offset_index in range(index, index+num_tokens): if token_offset_index >= len(token_offsets): token_offset_index = token_offset_index - len(token_offsets) @@ -854,7 +851,7 @@ def _add_table_metadata(self, table_metadata): # note the intentional order of add before remove # this makes sure the maps are never absent something that existed before this update - for index_name, index_metadata in six.iteritems(table_metadata.indexes): + for index_name, index_metadata in table_metadata.indexes.items(): self.indexes[index_name] = index_metadata for index_name in (n for n in old_indexes if n not in table_metadata.indexes): @@ -1341,7 +1338,7 @@ def _all_as_cql(self): if self.extensions: registry = _RegisteredExtensionType._extension_registry - for k in six.viewkeys(registry) & self.extensions: # no viewkeys on OrderedMapSerializeKey + for k in registry.keys() & self.extensions: # no viewkeys on OrderedMapSerializeKey ext = registry[k] cql = ext.after_table_cql(self, k, self.extensions[k]) if cql: @@ -1557,8 +1554,7 @@ def __new__(mcs, name, bases, dct): return cls -@six.add_metaclass(_RegisteredExtensionType) -class RegisteredTableExtension(TableExtensionInterface): +class RegisteredTableExtension(TableExtensionInterface, metaclass=_RegisteredExtensionType): """ Extending this class registers it by name (associated by key in the `system_schema.tables.extensions` map). """ @@ -1864,7 +1860,7 @@ class MD5Token(HashToken): @classmethod def hash_fn(cls, key): - if isinstance(key, six.text_type): + if isinstance(key, str): key = key.encode('UTF-8') return abs(varint_unpack(md5(key).digest())) @@ -1878,7 +1874,7 @@ class BytesToken(Token): def from_string(cls, token_string): """ `token_string` should be the string representation from the server. """ # unhexlify works fine with unicode input in everythin but pypy3, where it Raises "TypeError: 'str' does not support the buffer interface" - if isinstance(token_string, six.text_type): + if isinstance(token_string, str): token_string = token_string.encode('ascii') # The BOP stores a hex string return cls(unhexlify(token_string)) @@ -2970,17 +2966,17 @@ def _build_table_graph_metadata(table_meta): try: # Make sure we process vertices before edges - for table_meta in [t for t in six.itervalues(keyspace_meta.tables) + for table_meta in [t for t in keyspace_meta.tables.values() if t.name in self.keyspace_table_vertex_rows[keyspace_meta.name]]: _build_table_graph_metadata(table_meta) # all other tables... - for table_meta in [t for t in six.itervalues(keyspace_meta.tables) + for table_meta in [t for t in keyspace_meta.tables.values() if t.name not in self.keyspace_table_vertex_rows[keyspace_meta.name]]: _build_table_graph_metadata(table_meta) except Exception: # schema error, remove all graph metadata for this keyspace - for t in six.itervalues(keyspace_meta.tables): + for t in keyspace_meta.tables.values(): t.edge = t.vertex = None keyspace_meta._exc_info = sys.exc_info() log.exception("Error while parsing graph metadata for keyspace %s", keyspace_meta.name) @@ -3194,7 +3190,7 @@ def as_cql_query(self, formatted=False): if self.extensions: registry = _RegisteredExtensionType._extension_registry - for k in six.viewkeys(registry) & self.extensions: # no viewkeys on OrderedMapSerializeKey + for k in registry.keys() & self.extensions: # no viewkeys on OrderedMapSerializeKey ext = registry[k] cql = ext.after_table_cql(self, k, self.extensions[k]) if cql: diff --git a/cassandra/murmur3.py b/cassandra/murmur3.py index 7c8d641b32..282c43578d 100644 --- a/cassandra/murmur3.py +++ b/cassandra/murmur3.py @@ -1,4 +1,3 @@ -from six.moves import range import struct diff --git a/cassandra/protocol.py b/cassandra/protocol.py index 5e3610811e..3e4e984410 100644 --- a/cassandra/protocol.py +++ b/cassandra/protocol.py @@ -18,8 +18,6 @@ import socket from uuid import UUID -import six -from six.moves import range import io from cassandra import ProtocolVersion @@ -86,8 +84,7 @@ def __init__(cls, name, bases, dct): register_class(cls) -@six.add_metaclass(_RegisterMessageType) -class _MessageType(object): +class _MessageType(object, metaclass=_RegisterMessageType): tracing = False custom_payload = None @@ -137,8 +134,6 @@ def recv_body(cls, f, protocol_version, *args): def summary_msg(self): msg = 'Error from server: code=%04x [%s] message="%s"' \ % (self.code, self.summary, self.message) - if six.PY2 and isinstance(msg, six.text_type): - msg = msg.encode('utf-8') return msg def __str__(self): @@ -159,8 +154,7 @@ def __init__(cls, name, bases, dct): error_classes[cls.error_code] = cls -@six.add_metaclass(ErrorMessageSubclass) -class ErrorMessageSub(ErrorMessage): +class ErrorMessageSub(ErrorMessage, metaclass=ErrorMessageSubclass): error_code = None @@ -1358,7 +1352,7 @@ def read_binary_string(f): def write_string(f, s): - if isinstance(s, six.text_type): + if isinstance(s, str): s = s.encode('utf8') write_short(f, len(s)) f.write(s) @@ -1375,7 +1369,7 @@ def read_longstring(f): def write_longstring(f, s): - if isinstance(s, six.text_type): + if isinstance(s, str): s = s.encode('utf8') write_int(f, len(s)) f.write(s) diff --git a/cassandra/query.py b/cassandra/query.py index 7e4efc2511..e656124403 100644 --- a/cassandra/query.py +++ b/cassandra/query.py @@ -23,8 +23,6 @@ import re import struct import time -import six -from six.moves import range, zip import warnings from cassandra import ConsistencyLevel, OperationTimedOut @@ -817,7 +815,7 @@ def add(self, statement, parameters=None): Like with other statements, parameters must be a sequence, even if there is only one item. """ - if isinstance(statement, six.string_types): + if isinstance(statement, str): if parameters: encoder = Encoder() if self._session is None else self._session.encoder statement = bind_params(statement, parameters, encoder) @@ -901,10 +899,8 @@ def __str__(self): def bind_params(query, params, encoder): - if six.PY2 and isinstance(query, six.text_type): - query = query.encode('utf-8') if isinstance(params, dict): - return query % dict((k, encoder.cql_encode_all_types(v)) for k, v in six.iteritems(params)) + return query % dict((k, encoder.cql_encode_all_types(v)) for k, v in params.items()) else: return query % tuple(encoder.cql_encode_all_types(v) for v in params) diff --git a/cassandra/scylla/cloud.py b/cassandra/scylla/cloud.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cassandra/segment.py b/cassandra/segment.py index e3881c4402..78161fe520 100644 --- a/cassandra/segment.py +++ b/cassandra/segment.py @@ -13,7 +13,6 @@ # limitations under the License. import zlib -import six from cassandra import DriverException from cassandra.marshal import int32_pack @@ -54,9 +53,6 @@ def compute_crc24(data, length): def compute_crc32(data, value): crc32 = zlib.crc32(data, value) - if six.PY2: - crc32 &= 0xffffffff - return crc32 diff --git a/cassandra/util.py b/cassandra/util.py index dd5c58b01d..06d338f2e1 100644 --- a/cassandra/util.py +++ b/cassandra/util.py @@ -13,16 +13,22 @@ # limitations under the License. from __future__ import with_statement +from _weakref import ref import calendar +from collections import OrderedDict +from collections.abc import Mapping import datetime from functools import total_ordering -import logging from itertools import chain +import keyword +import logging +import pickle import random import re -import six -import uuid +import socket import sys +import time +import uuid _HAS_GEOMET = True try: @@ -212,147 +218,6 @@ def _resolve_contact_points_to_string_map(contact_points): ) -try: - from collections import OrderedDict -except ImportError: - # OrderedDict from Python 2.7+ - - # Copyright (c) 2009 Raymond Hettinger - # - # Permission is hereby granted, free of charge, to any person - # obtaining a copy of this software and associated documentation files - # (the "Software"), to deal in the Software without restriction, - # including without limitation the rights to use, copy, modify, merge, - # publish, distribute, sublicense, and/or sell copies of the Software, - # and to permit persons to whom the Software is furnished to do so, - # subject to the following conditions: - # - # The above copyright notice and this permission notice shall be - # included in all copies or substantial portions of the Software. - # - # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - # OTHER DEALINGS IN THE SOFTWARE. - from UserDict import DictMixin - - class OrderedDict(dict, DictMixin): # noqa - """ A dictionary which maintains the insertion order of keys. """ - - def __init__(self, *args, **kwds): - """ A dictionary which maintains the insertion order of keys. """ - - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__end - except AttributeError: - self.clear() - self.update(*args, **kwds) - - def clear(self): - self.__end = end = [] - end += [None, end, end] # sentinel node for doubly linked list - self.__map = {} # key --> [key, prev, next] - dict.clear(self) - - def __setitem__(self, key, value): - if key not in self: - end = self.__end - curr = end[1] - curr[2] = end[1] = self.__map[key] = [key, curr, end] - dict.__setitem__(self, key, value) - - def __delitem__(self, key): - dict.__delitem__(self, key) - key, prev, next = self.__map.pop(key) - prev[2] = next - next[1] = prev - - def __iter__(self): - end = self.__end - curr = end[2] - while curr is not end: - yield curr[0] - curr = curr[2] - - def __reversed__(self): - end = self.__end - curr = end[1] - while curr is not end: - yield curr[0] - curr = curr[1] - - def popitem(self, last=True): - if not self: - raise KeyError('dictionary is empty') - if last: - key = next(reversed(self)) - else: - key = next(iter(self)) - value = self.pop(key) - return key, value - - def __reduce__(self): - items = [[k, self[k]] for k in self] - tmp = self.__map, self.__end - del self.__map, self.__end - inst_dict = vars(self).copy() - self.__map, self.__end = tmp - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def keys(self): - return list(self) - - setdefault = DictMixin.setdefault - update = DictMixin.update - pop = DictMixin.pop - values = DictMixin.values - items = DictMixin.items - iterkeys = DictMixin.iterkeys - itervalues = DictMixin.itervalues - iteritems = DictMixin.iteritems - - def __repr__(self): - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - - def copy(self): - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - if isinstance(other, OrderedDict): - if len(self) != len(other): - return False - for p, q in zip(self.items(), other.items()): - if p != q: - return False - return True - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other - - -# WeakSet from Python 2.7+ (https://code.google.com/p/weakrefset) - -from _weakref import ref - - class _IterationGuard(object): # This context manager registers itself in the current iterators of the # weak container, such as to delay all removals until the context manager @@ -789,10 +654,6 @@ def _find_insertion(self, x): sortedset = SortedSet # backwards-compatibility -from cassandra.compat import Mapping -from six.moves import cPickle - - class OrderedMap(Mapping): ''' An ordered map that accepts non-hashable types for keys. It also maintains the @@ -835,7 +696,7 @@ def __init__(self, *args, **kwargs): for k, v in e: self._insert(k, v) - for k, v in six.iteritems(kwargs): + for k, v in kwargs.items(): self._insert(k, v) def _insert(self, key, value): @@ -901,7 +762,7 @@ def popitem(self): raise KeyError() def _serialize_key(self, key): - return cPickle.dumps(key) + return pickle.dumps(key) class OrderedMapSerializedKey(OrderedMap): @@ -919,13 +780,6 @@ def _serialize_key(self, key): return self.cass_key_type.serialize(key, self.protocol_version) -import datetime -import time - -if six.PY3: - long = int - - @total_ordering class Time(object): ''' @@ -951,11 +805,11 @@ def __init__(self, value): - datetime.time: built-in time - string_type: a string time of the form "HH:MM:SS[.mmmuuunnn]" """ - if isinstance(value, six.integer_types): + if isinstance(value, int): self._from_timestamp(value) elif isinstance(value, datetime.time): self._from_time(value) - elif isinstance(value, six.string_types): + elif isinstance(value, str): self._from_timestring(value) else: raise TypeError('Time arguments must be a whole number, datetime.time, or string') @@ -1031,7 +885,7 @@ def __eq__(self, other): if isinstance(other, Time): return self.nanosecond_time == other.nanosecond_time - if isinstance(other, six.integer_types): + if isinstance(other, int): return self.nanosecond_time == other return self.nanosecond_time % Time.MICRO == 0 and \ @@ -1080,11 +934,11 @@ def __init__(self, value): - datetime.date: built-in date - string_type: a string time of the form "yyyy-mm-dd" """ - if isinstance(value, six.integer_types): + if isinstance(value, int): self.days_from_epoch = value elif isinstance(value, (datetime.date, datetime.datetime)): self._from_timetuple(value.timetuple()) - elif isinstance(value, six.string_types): + elif isinstance(value, str): self._from_datestring(value) else: raise TypeError('Date arguments must be a whole number, datetime.date, or string') @@ -1124,7 +978,7 @@ def __eq__(self, other): if isinstance(other, Date): return self.days_from_epoch == other.days_from_epoch - if isinstance(other, six.integer_types): + if isinstance(other, int): return self.days_from_epoch == other try: @@ -1151,97 +1005,9 @@ def __str__(self): # If we overflow datetime.[MIN|MAX] return str(self.days_from_epoch) -import socket -if hasattr(socket, 'inet_pton'): - inet_pton = socket.inet_pton - inet_ntop = socket.inet_ntop -else: - """ - Windows doesn't have socket.inet_pton and socket.inet_ntop until Python 3.4 - This is an alternative impl using ctypes, based on this win_inet_pton project: - https://github.com/hickeroar/win_inet_pton - """ - import ctypes - - class sockaddr(ctypes.Structure): - """ - Shared struct for ipv4 and ipv6. - - https://msdn.microsoft.com/en-us/library/windows/desktop/ms740496(v=vs.85).aspx - - ``__pad1`` always covers the port. - - When being used for ``sockaddr_in6``, ``ipv4_addr`` actually covers ``sin6_flowinfo``, resulting - in proper alignment for ``ipv6_addr``. - """ - _fields_ = [("sa_family", ctypes.c_short), - ("__pad1", ctypes.c_ushort), - ("ipv4_addr", ctypes.c_byte * 4), - ("ipv6_addr", ctypes.c_byte * 16), - ("__pad2", ctypes.c_ulong)] - - if hasattr(ctypes, 'windll'): - WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA - WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA - else: - def not_windows(*args): - raise OSError("IPv6 addresses cannot be handled on Windows. " - "Missing ctypes.windll") - WSAStringToAddressA = not_windows - WSAAddressToStringA = not_windows - - def inet_pton(address_family, ip_string): - if address_family == socket.AF_INET: - return socket.inet_aton(ip_string) - - addr = sockaddr() - addr.sa_family = address_family - addr_size = ctypes.c_int(ctypes.sizeof(addr)) - - if WSAStringToAddressA( - ip_string, - address_family, - None, - ctypes.byref(addr), - ctypes.byref(addr_size) - ) != 0: - raise socket.error(ctypes.FormatError()) - - if address_family == socket.AF_INET6: - return ctypes.string_at(addr.ipv6_addr, 16) - - raise socket.error('unknown address family') - - def inet_ntop(address_family, packed_ip): - if address_family == socket.AF_INET: - return socket.inet_ntoa(packed_ip) - - addr = sockaddr() - addr.sa_family = address_family - addr_size = ctypes.c_int(ctypes.sizeof(addr)) - ip_string = ctypes.create_string_buffer(128) - ip_string_size = ctypes.c_int(ctypes.sizeof(ip_string)) - - if address_family == socket.AF_INET6: - if len(packed_ip) != ctypes.sizeof(addr.ipv6_addr): - raise socket.error('packed IP wrong length for inet_ntoa') - ctypes.memmove(addr.ipv6_addr, packed_ip, 16) - else: - raise socket.error('unknown address family') - - if WSAAddressToStringA( - ctypes.byref(addr), - addr_size, - None, - ip_string, - ctypes.byref(ip_string_size) - ) != 0: - raise socket.error(ctypes.FormatError()) - - return ip_string[:ip_string_size.value - 1] - -import keyword +inet_pton = socket.inet_pton +inet_ntop = socket.inet_ntop # similar to collections.namedtuple, reproduced here because Python 2.6 did not have the rename logic @@ -1688,7 +1454,7 @@ def __init__(self, value, precision): if value is None: milliseconds = None - elif isinstance(value, six.integer_types): + elif isinstance(value, int): milliseconds = value elif isinstance(value, datetime.datetime): value = value.replace( @@ -1956,12 +1722,10 @@ def __init__(self, version): try: self.major = int(parts.pop()) - except ValueError: - six.reraise( - ValueError, - ValueError("Couldn't parse version {}. Version should start with a number".format(version)), - sys.exc_info()[2] - ) + except ValueError as e: + raise ValueError( + "Couldn't parse version {}. Version should start with a number".format(version))\ + .with_traceback(e.__traceback__) try: self.minor = int(parts.pop()) if parts else 0 self.patch = int(parts.pop()) if parts else 0 @@ -1994,8 +1758,8 @@ def __str__(self): @staticmethod def _compare_version_part(version, other_version, cmp): - if not (isinstance(version, six.integer_types) and - isinstance(other_version, six.integer_types)): + if not (isinstance(version, int) and + isinstance(other_version, int)): version = str(version) other_version = str(other_version) diff --git a/docs/installation.rst b/docs/installation.rst index bea6d6ece5..17a4e63324 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -70,10 +70,10 @@ support this:: *Optional:* Column-Level Encryption (CLE) Support -------------------------------------------------- The driver has built-in support for client-side encryption and -decryption of data. For more, see :doc:`column_encryption`. +decryption of data. For more, see :doc:`column_encryption`. -CLE depends on the Python `cryptography `_ module. -When installing Python driver 3.27.0. the `cryptography` module is +CLE depends on the Python `cryptography `_ module. +When installing Python driver 3.27.0. the `cryptography` module is also downloaded and installed. If you are using Python driver 3.28.0 or later and want to use CLE, you must install the `cryptography `_ module. @@ -211,7 +211,7 @@ If your sudo configuration does not allow SETENV, you must push the option flag applies these options to all dependencies (which break on the custom flag). Therefore, you must first install dependencies, then use install-option:: - sudo pip install six futures + sudo pip install futures sudo pip install --install-option="--no-cython" diff --git a/examples/concurrent_executions/execute_async_with_queue.py b/examples/concurrent_executions/execute_async_with_queue.py index 60d2a69c3c..72d2c101cb 100644 --- a/examples/concurrent_executions/execute_async_with_queue.py +++ b/examples/concurrent_executions/execute_async_with_queue.py @@ -19,7 +19,7 @@ import time import uuid -from six.moves import queue +import queue from cassandra.cluster import Cluster diff --git a/requirements.txt b/requirements.txt index f784fba1b9..100a12905a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1 @@ geomet>=0.1,<0.3 -six >=1.9 -futures <=2.2.0 -# Futures is not required for Python 3, but it works up through 2.2.0 (after which it introduced breaking syntax). -# This is left here to make sure install -r works with any runtime. When installing via setup.py, futures is omitted -# for Python 3, in favor of the standard library implementation. -# see PYTHON-393 diff --git a/setup.py b/setup.py index 30ce602c3e..86e50e8b22 100644 --- a/setup.py +++ b/setup.py @@ -401,8 +401,7 @@ def run_setup(extensions): else: sys.stderr.write("Bypassing Cython setup requirement\n") - dependencies = ['six >=1.9', - 'geomet>=0.1,<0.3'] + dependencies = ['geomet>=0.1,<0.3'] _EXTRAS_REQUIRE = { 'graph': ['gremlinpython==3.4.6'], diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index a344931a4e..b158ed2bc0 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -30,7 +30,6 @@ from threading import Event from subprocess import call from itertools import groupby -import six import shutil from cassandra import OperationTimedOut, ReadTimeout, ReadFailure, WriteTimeout, WriteFailure, AlreadyExists,\ @@ -343,7 +342,6 @@ def _id_and_mark(f): lessthandse60 = unittest.skipUnless(DSE_VERSION and DSE_VERSION < Version('6.0'), "DSE version less than 6.0 required") pypy = unittest.skipUnless(platform.python_implementation() == "PyPy", "Test is skipped unless it's on PyPy") -notpy3 = unittest.skipIf(sys.version_info >= (3, 0), "Test not applicable for Python 3.x runtime") requiresmallclockgranularity = unittest.skipIf("Windows" in platform.system() or "asyncore" in EVENT_LOOP_MANAGER, "This test is not suitible for environments with large clock granularity") requiressimulacron = unittest.skipIf(SIMULACRON_JAR is None or CASSANDRA_VERSION < Version("2.1"), "Simulacron jar hasn't been specified or C* version is 2.0") @@ -610,7 +608,7 @@ def use_cluster(cluster_name, nodes, ipformat=None, start=True, workloads=None, if os.name == "nt": if CCM_CLUSTER: - for node in six.itervalues(CCM_CLUSTER.nodes): + for node in CCM_CLUSTER.nodes.items(): os.system("taskkill /F /PID " + str(node.pid)) else: call(["pkill", "-9", "-f", ".ccm"]) diff --git a/tests/integration/advanced/__init__.py b/tests/integration/advanced/__init__.py index e2fa1a4a4a..dffaccd190 100644 --- a/tests/integration/advanced/__init__.py +++ b/tests/integration/advanced/__init__.py @@ -14,7 +14,7 @@ import unittest -from six.moves.urllib.request import build_opener, Request, HTTPHandler +from urllib.request import build_opener, Request, HTTPHandler import re import os import time diff --git a/tests/integration/advanced/graph/__init__.py b/tests/integration/advanced/graph/__init__.py index 6c9458dd02..91c9287e11 100644 --- a/tests/integration/advanced/graph/__init__.py +++ b/tests/integration/advanced/graph/__init__.py @@ -22,7 +22,6 @@ import datetime from cassandra.util import Point, LineString, Polygon, Duration -import six from cassandra.cluster import EXEC_PROFILE_GRAPH_DEFAULT, EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT from cassandra.cluster import GraphAnalyticsExecutionProfile, GraphExecutionProfile, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT, \ @@ -457,15 +456,11 @@ def datatypes(): "duration1": ["Duration()", datetime.timedelta(1, 16, 0), GraphSON1Deserializer.deserialize_duration], "duration2": ["Duration()", datetime.timedelta(days=1, seconds=16, milliseconds=15), - GraphSON1Deserializer.deserialize_duration] + GraphSON1Deserializer.deserialize_duration], + "blob3": ["Blob()", bytes(b"Hello World Again"), GraphSON1Deserializer.deserialize_blob], + "blob4": ["Blob()", memoryview(b"And Again Hello World"), GraphSON1Deserializer.deserialize_blob] } - if six.PY2: - data["blob2"] = ["Blob()", buffer(b"Hello World"), GraphSON1Deserializer.deserialize_blob] - else: - data["blob3"] = ["Blob()", bytes(b"Hello World Again"), GraphSON1Deserializer.deserialize_blob] - data["blob4"] = ["Blob()", memoryview(b"And Again Hello World"), GraphSON1Deserializer.deserialize_blob] - if DSE_VERSION >= Version("5.1"): data["time1"] = ["Time()", datetime.time(12, 6, 12, 444), GraphSON1Deserializer.deserialize_time] data["time2"] = ["Time()", datetime.time(12, 6, 12), GraphSON1Deserializer.deserialize_time] @@ -965,7 +960,7 @@ def generate_tests(cls, schema=None, graphson=None, traversal=False): """Generate tests for a graph configuration""" def decorator(klass): if DSE_VERSION: - predicate = inspect.ismethod if six.PY2 else inspect.isfunction + predicate = inspect.isfunction for name, func in inspect.getmembers(klass, predicate=predicate): if not name.startswith('_test'): continue @@ -984,7 +979,7 @@ def generate_schema_tests(cls, schema=None): """Generate schema tests for a graph configuration""" def decorator(klass): if DSE_VERSION: - predicate = inspect.ismethod if six.PY2 else inspect.isfunction + predicate = inspect.isfunction for name, func in inspect.getmembers(klass, predicate=predicate): if not name.startswith('_test'): continue @@ -1026,7 +1021,7 @@ def __init__(self, properties): @property def non_pk_properties(self): - return {p: v for p, v in six.iteritems(self.properties) if p != 'pkid'} + return {p: v for p, v in self.properties.items() if p != 'pkid'} class GraphSchema(object): @@ -1134,7 +1129,7 @@ def clear(session): @classmethod def create_vertex_label(cls, session, vertex_label, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): statements = ["schema.propertyKey('pkid').Int().ifNotExists().create();"] - for k, v in six.iteritems(vertex_label.non_pk_properties): + for k, v in vertex_label.non_pk_properties.items(): typ = cls.sanitize_type(v) statements.append("schema.propertyKey('{name}').{type}.create();".format( name=k, type=typ @@ -1142,7 +1137,7 @@ def create_vertex_label(cls, session, vertex_label, execution_profile=EXEC_PROFI statements.append("schema.vertexLabel('{label}').partitionKey('pkid').properties(".format( label=vertex_label.label)) - property_names = [name for name in six.iterkeys(vertex_label.non_pk_properties)] + property_names = [name for name in vertex_label.non_pk_properties.keys()] statements.append(", ".join(["'{}'".format(p) for p in property_names])) statements.append(").create();") @@ -1189,7 +1184,7 @@ def create_vertex_label(cls, session, vertex_label, execution_profile=EXEC_PROFI statements = ["schema.vertexLabel('{label}').partitionBy('pkid', Int)".format( label=vertex_label.label)] - for name, typ in six.iteritems(vertex_label.non_pk_properties): + for name, typ in vertex_label.non_pk_properties.items(): typ = cls.sanitize_type(typ) statements.append(".property('{name}', {type})".format(name=name, type=typ)) statements.append(".create();") diff --git a/tests/integration/advanced/graph/fluent/__init__.py b/tests/integration/advanced/graph/fluent/__init__.py index 3962029f45..155de026c5 100644 --- a/tests/integration/advanced/graph/fluent/__init__.py +++ b/tests/integration/advanced/graph/fluent/__init__.py @@ -14,7 +14,6 @@ import sys import datetime -import six import time from collections import namedtuple from packaging.version import Version @@ -457,10 +456,10 @@ def _test_udt_with_namedtuples(self, schema, graphson): def _write_and_read_data_types(self, schema, graphson, use_schema=True): g = self.fetch_traversal_source(graphson) ep = self.get_execution_profile(graphson) - for data in six.itervalues(schema.fixtures.datatypes()): + for data in schema.fixtures.datatypes().values(): typ, value, deserializer = data vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + property_name = next(iter(vertex_label.non_pk_properties.keys())) if use_schema or schema is CoreGraphSchema: schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) @@ -536,9 +535,9 @@ def __test_udt(self, schema, graphson, address_class, address_with_tags_class, } g = self.fetch_traversal_source(graphson) - for typ, value in six.itervalues(data): + for typ, value in data.values(): vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + property_name = next(iter(vertex_label.non_pk_properties.keys())) schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) write_traversal = g.addV(str(vertex_label.label)).property('pkid', vertex_label.id). \ @@ -597,7 +596,7 @@ def _validate_prop(key, value, unittest): elif any(key.startswith(t) for t in ('Linestring',)): typ = LineString elif any(key.startswith(t) for t in ('neg',)): - typ = six.string_types + typ = str elif any(key.startswith(t) for t in ('date',)): typ = datetime.date elif any(key.startswith(t) for t in ('time',)): diff --git a/tests/integration/advanced/graph/fluent/test_graph.py b/tests/integration/advanced/graph/fluent/test_graph.py index d46a74a146..911e6d5d57 100644 --- a/tests/integration/advanced/graph/fluent/test_graph.py +++ b/tests/integration/advanced/graph/fluent/test_graph.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six - from cassandra import cluster from cassandra.cluster import ContinuousPagingOptions from cassandra.datastax.graph.fluent import DseGraph @@ -120,10 +118,10 @@ def _send_batch_and_read_results(self, schema, graphson, add_all=False, use_sche ep = self.get_execution_profile(graphson) batch = DseGraph.batch(session=self.session, execution_profile=self.get_execution_profile(graphson, traversal=True)) - for data in six.itervalues(datatypes): + for data in datatypes.values(): typ, value, deserializer = data vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + property_name = next(iter(vertex_label.non_pk_properties.keys())) values[property_name] = value if use_schema or schema is CoreGraphSchema: schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) diff --git a/tests/integration/advanced/graph/test_graph.py b/tests/integration/advanced/graph/test_graph.py index 277283ea5a..7f55229911 100644 --- a/tests/integration/advanced/graph/test_graph.py +++ b/tests/integration/advanced/graph/test_graph.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six import re from cassandra import OperationTimedOut, InvalidRequest diff --git a/tests/integration/advanced/graph/test_graph_datatype.py b/tests/integration/advanced/graph/test_graph_datatype.py index 0445ce8030..8a261c94d9 100644 --- a/tests/integration/advanced/graph/test_graph_datatype.py +++ b/tests/integration/advanced/graph/test_graph_datatype.py @@ -15,7 +15,6 @@ import unittest import time -import six import logging from packaging.version import Version from collections import namedtuple @@ -67,13 +66,13 @@ def _validate_type(self, vertex): if any(type_indicator.startswith(t) for t in ('int', 'short', 'long', 'bigint', 'decimal', 'smallint', 'varint')): - typ = six.integer_types + typ = int elif any(type_indicator.startswith(t) for t in ('float', 'double')): typ = float elif any(type_indicator.startswith(t) for t in ('duration', 'date', 'negdate', 'time', 'blob', 'timestamp', 'point', 'linestring', 'polygon', 'inet', 'uuid')): - typ = six.text_type + typ = str else: pass self.fail("Received unexpected type: %s" % type_indicator) @@ -85,10 +84,10 @@ class GenericGraphDataTypeTest(GraphUnitTestCase): def _test_all_datatypes(self, schema, graphson): ep = self.get_execution_profile(graphson) - for data in six.itervalues(schema.fixtures.datatypes()): + for data in schema.fixtures.datatypes().values(): typ, value, deserializer = data vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + property_name = next(iter(vertex_label.non_pk_properties.keys())) schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] @@ -167,9 +166,9 @@ def __test_udt(self, schema, graphson, address_class, address_with_tags_class, ), 'hello')] } - for typ, value in six.itervalues(data): + for typ, value in data.values(): vertex_label = VertexLabel([typ]) - property_name = next(six.iterkeys(vertex_label.non_pk_properties)) + property_name = next(iter(vertex_label.non_pk_properties.keys())) schema.create_vertex_label(self.session, vertex_label, execution_profile=ep) vertex = list(schema.add_vertex(self.session, vertex_label, property_name, value, execution_profile=ep))[0] diff --git a/tests/integration/advanced/graph/test_graph_query.py b/tests/integration/advanced/graph/test_graph_query.py index 9bc23e611a..0c889938d8 100644 --- a/tests/integration/advanced/graph/test_graph_query.py +++ b/tests/integration/advanced/graph/test_graph_query.py @@ -14,7 +14,6 @@ import sys -import six from packaging.version import Version from copy import copy @@ -83,7 +82,7 @@ def test_consistency_passing(self): res = s.execute_graph("null") for k, v in cl.items(): - self.assertEqual(res.response_future.message.custom_payload[graph_params[k]], six.b(ConsistencyLevel.value_to_name[v])) + self.assertEqual(res.response_future.message.custom_payload[graph_params[k]], ConsistencyLevel.value_to_name[v].encode()) # passed profile values override session defaults cl = {0: ConsistencyLevel.ALL, 1: ConsistencyLevel.QUORUM} @@ -97,7 +96,7 @@ def test_consistency_passing(self): res = s.execute_graph("null", execution_profile=tmp_profile) for k, v in cl.items(): - self.assertEqual(res.response_future.message.custom_payload[graph_params[k]], six.b(ConsistencyLevel.value_to_name[v])) + self.assertEqual(res.response_future.message.custom_payload[graph_params[k]], ConsistencyLevel.value_to_name[v].encode()) finally: default_profile.graph_options = default_graph_opts @@ -588,7 +587,7 @@ def _test_basic_query_with_type_wrapper(self, schema, graphson): vl = VertexLabel(['tupleOf(Int, Bigint)']) schema.create_vertex_label(self.session, vl, execution_profile=ep) - prop_name = next(six.iterkeys(vl.non_pk_properties)) + prop_name = next(iter(vl.non_pk_properties.keys())) with self.assertRaises(InvalidRequest): schema.add_vertex(self.session, vl, prop_name, (1, 42), execution_profile=ep) diff --git a/tests/integration/advanced/test_cont_paging.py b/tests/integration/advanced/test_cont_paging.py index 2e75d7061d..99de82647d 100644 --- a/tests/integration/advanced/test_cont_paging.py +++ b/tests/integration/advanced/test_cont_paging.py @@ -21,7 +21,6 @@ import unittest from itertools import cycle, count -from six.moves import range from packaging.version import Version import time diff --git a/tests/integration/cloud/test_cloud.py b/tests/integration/cloud/test_cloud.py index ef4909a257..03ff8237be 100644 --- a/tests/integration/cloud/test_cloud.py +++ b/tests/integration/cloud/test_cloud.py @@ -20,7 +20,6 @@ import unittest -import six from ssl import SSLContext, PROTOCOL_TLS from cassandra import DriverException, ConsistencyLevel, InvalidRequest @@ -114,10 +113,7 @@ def test_error_when_bundle_doesnt_exist(self): try: self.connect('/invalid/path/file.zip') except Exception as e: - if six.PY2: - self.assertIsInstance(e, IOError) - else: - self.assertIsInstance(e, FileNotFoundError) + self.assertIsInstance(e, FileNotFoundError) def test_load_balancing_policy_is_dcawaretokenlbp(self): self.connect(self.creds) @@ -163,7 +159,7 @@ def test_default_consistency(self): self.assertEqual(self.session.default_consistency_level, ConsistencyLevel.LOCAL_QUORUM) # Verify EXEC_PROFILE_DEFAULT, EXEC_PROFILE_GRAPH_DEFAULT, # EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT, EXEC_PROFILE_GRAPH_ANALYTICS_DEFAULT - for ep_key in six.iterkeys(self.cluster.profile_manager.profiles): + for ep_key in self.cluster.profile_manager.profiles.keys(): ep = self.cluster.profile_manager.profiles[ep_key] self.assertEqual( ep.consistency_level, diff --git a/tests/integration/cqlengine/columns/test_container_columns.py b/tests/integration/cqlengine/columns/test_container_columns.py index 2acf36457b..1f51770eac 100644 --- a/tests/integration/cqlengine/columns/test_container_columns.py +++ b/tests/integration/cqlengine/columns/test_container_columns.py @@ -15,7 +15,6 @@ from datetime import datetime, timedelta import json import logging -import six import sys import traceback from uuid import uuid4 @@ -48,7 +47,7 @@ class JsonTestColumn(columns.Column): def to_python(self, value): if value is None: return - if isinstance(value, six.string_types): + if isinstance(value, str): return json.loads(value) else: return value diff --git a/tests/integration/cqlengine/columns/test_value_io.py b/tests/integration/cqlengine/columns/test_value_io.py index 2c82fe16f7..758ca714a6 100644 --- a/tests/integration/cqlengine/columns/test_value_io.py +++ b/tests/integration/cqlengine/columns/test_value_io.py @@ -16,7 +16,6 @@ from datetime import datetime, timedelta, time from decimal import Decimal from uuid import uuid1, uuid4, UUID -import six from cassandra.cqlengine import columns from cassandra.cqlengine.management import sync_table @@ -101,15 +100,15 @@ def test_column_io(self): class TestBlobIO(BaseColumnIOTest): column = columns.Blob - pkey_val = six.b('blake'), uuid4().bytes - data_val = six.b('eggleston'), uuid4().bytes + pkey_val = b'blake', uuid4().bytes + data_val = b'eggleston', uuid4().bytes class TestBlobIO2(BaseColumnIOTest): column = columns.Blob - pkey_val = bytearray(six.b('blake')), uuid4().bytes - data_val = bytearray(six.b('eggleston')), uuid4().bytes + pkey_val = bytearray(b'blake'), uuid4().bytes + data_val = bytearray(b'eggleston'), uuid4().bytes class TestTextIO(BaseColumnIOTest): diff --git a/tests/integration/cqlengine/management/test_compaction_settings.py b/tests/integration/cqlengine/management/test_compaction_settings.py index 604e225586..554d941ecc 100644 --- a/tests/integration/cqlengine/management/test_compaction_settings.py +++ b/tests/integration/cqlengine/management/test_compaction_settings.py @@ -14,7 +14,6 @@ import copy from mock import patch -import six from cassandra.cqlengine import columns from cassandra.cqlengine.management import drop_table, sync_table, _get_table_metadata, _update_options @@ -110,7 +109,7 @@ def _verify_options(self, table_meta, expected_options): cql = table_meta.export_as_string() for name, value in expected_options.items(): - if isinstance(value, six.string_types): + if isinstance(value, str): self.assertIn("%s = '%s'" % (name, value), cql) else: start = cql.find("%s = {" % (name,)) diff --git a/tests/integration/cqlengine/management/test_management.py b/tests/integration/cqlengine/management/test_management.py index 2fd35b865e..e4febcc14b 100644 --- a/tests/integration/cqlengine/management/test_management.py +++ b/tests/integration/cqlengine/management/test_management.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import six import mock import logging from packaging.version import Version diff --git a/tests/integration/cqlengine/model/test_class_construction.py b/tests/integration/cqlengine/model/test_class_construction.py index f764e78e5c..dae97c4438 100644 --- a/tests/integration/cqlengine/model/test_class_construction.py +++ b/tests/integration/cqlengine/model/test_class_construction.py @@ -15,7 +15,6 @@ from uuid import uuid4 import warnings -import six from cassandra.cqlengine import columns, CQLEngineException from cassandra.cqlengine.models import Model, ModelException, ModelDefinitionException, ColumnQueryEvaluator from cassandra.cqlengine.query import ModelQuerySet, DMLQuery diff --git a/tests/integration/cqlengine/operators/test_where_operators.py b/tests/integration/cqlengine/operators/test_where_operators.py index 555af11025..1e0134dbac 100644 --- a/tests/integration/cqlengine/operators/test_where_operators.py +++ b/tests/integration/cqlengine/operators/test_where_operators.py @@ -27,8 +27,6 @@ from tests.integration.cqlengine.operators import check_lookup from tests.integration import greaterthanorequalcass30 -import six - class TestWhereOperators(unittest.TestCase): @@ -47,15 +45,15 @@ def test_symbol_lookup(self): def test_operator_rendering(self): """ tests symbols are rendered properly """ - self.assertEqual("=", six.text_type(EqualsOperator())) - self.assertEqual("!=", six.text_type(NotEqualsOperator())) - self.assertEqual("IN", six.text_type(InOperator())) - self.assertEqual(">", six.text_type(GreaterThanOperator())) - self.assertEqual(">=", six.text_type(GreaterThanOrEqualOperator())) - self.assertEqual("<", six.text_type(LessThanOperator())) - self.assertEqual("<=", six.text_type(LessThanOrEqualOperator())) - self.assertEqual("CONTAINS", six.text_type(ContainsOperator())) - self.assertEqual("LIKE", six.text_type(LikeOperator())) + self.assertEqual("=", str(EqualsOperator())) + self.assertEqual("!=", str(NotEqualsOperator())) + self.assertEqual("IN", str(InOperator())) + self.assertEqual(">", str(GreaterThanOperator())) + self.assertEqual(">=", str(GreaterThanOrEqualOperator())) + self.assertEqual("<", str(LessThanOperator())) + self.assertEqual("<=", str(LessThanOrEqualOperator())) + self.assertEqual("CONTAINS", str(ContainsOperator())) + self.assertEqual("LIKE", str(LikeOperator())) class TestIsNotNull(BaseCassEngTestCase): diff --git a/tests/integration/cqlengine/statements/test_base_statement.py b/tests/integration/cqlengine/statements/test_base_statement.py index 3b5be60520..f245744352 100644 --- a/tests/integration/cqlengine/statements/test_base_statement.py +++ b/tests/integration/cqlengine/statements/test_base_statement.py @@ -14,7 +14,6 @@ import unittest from uuid import uuid4 -import six from cassandra.query import FETCH_SIZE_UNSET from cassandra.cqlengine.statements import BaseCQLStatement @@ -127,7 +126,7 @@ def test_like_operator(self): ss = SelectStatement(self.table_name) like_clause = "text_for_%" ss.add_where(Column(db_field='text'), LikeOperator(), like_clause) - self.assertEqual(six.text_type(ss), + self.assertEqual(str(ss), 'SELECT * FROM {} WHERE "text" LIKE %(0)s'.format(self.table_name)) result = execute(ss) diff --git a/tests/integration/cqlengine/statements/test_delete_statement.py b/tests/integration/cqlengine/statements/test_delete_statement.py index 5e2894a06b..745881f42f 100644 --- a/tests/integration/cqlengine/statements/test_delete_statement.py +++ b/tests/integration/cqlengine/statements/test_delete_statement.py @@ -17,7 +17,6 @@ from cassandra.cqlengine.columns import Column from cassandra.cqlengine.statements import DeleteStatement, WhereClause, MapDeleteClause, ConditionalClause from cassandra.cqlengine.operators import * -import six class DeleteStatementTests(TestCase): @@ -31,24 +30,24 @@ def test_single_field_is_listified(self): def test_field_rendering(self): """ tests that fields are properly added to the select statement """ ds = DeleteStatement('table', ['f1', 'f2']) - self.assertTrue(six.text_type(ds).startswith('DELETE "f1", "f2"'), six.text_type(ds)) + self.assertTrue(str(ds).startswith('DELETE "f1", "f2"'), str(ds)) self.assertTrue(str(ds).startswith('DELETE "f1", "f2"'), str(ds)) def test_none_fields_rendering(self): """ tests that a '*' is added if no fields are passed in """ ds = DeleteStatement('table', None) - self.assertTrue(six.text_type(ds).startswith('DELETE FROM'), six.text_type(ds)) + self.assertTrue(str(ds).startswith('DELETE FROM'), str(ds)) self.assertTrue(str(ds).startswith('DELETE FROM'), str(ds)) def test_table_rendering(self): ds = DeleteStatement('table', None) - self.assertTrue(six.text_type(ds).startswith('DELETE FROM table'), six.text_type(ds)) + self.assertTrue(str(ds).startswith('DELETE FROM table'), str(ds)) self.assertTrue(str(ds).startswith('DELETE FROM table'), str(ds)) def test_where_clause_rendering(self): ds = DeleteStatement('table', None) ds.add_where(Column(db_field='a'), EqualsOperator(), 'b') - self.assertEqual(six.text_type(ds), 'DELETE FROM table WHERE "a" = %(0)s', six.text_type(ds)) + self.assertEqual(str(ds), 'DELETE FROM table WHERE "a" = %(0)s', str(ds)) def test_context_update(self): ds = DeleteStatement('table', None) @@ -56,7 +55,7 @@ def test_context_update(self): ds.add_where(Column(db_field='a'), EqualsOperator(), 'b') ds.update_context_id(7) - self.assertEqual(six.text_type(ds), 'DELETE "d"[%(8)s] FROM table WHERE "a" = %(7)s') + self.assertEqual(str(ds), 'DELETE "d"[%(8)s] FROM table WHERE "a" = %(7)s') self.assertEqual(ds.get_context(), {'7': 'b', '8': 3}) def test_context(self): @@ -69,23 +68,23 @@ def test_range_deletion_rendering(self): ds.add_where(Column(db_field='a'), EqualsOperator(), 'b') ds.add_where(Column(db_field='created_at'), GreaterThanOrEqualOperator(), '0') ds.add_where(Column(db_field='created_at'), LessThanOrEqualOperator(), '10') - self.assertEqual(six.text_type(ds), 'DELETE FROM table WHERE "a" = %(0)s AND "created_at" >= %(1)s AND "created_at" <= %(2)s', six.text_type(ds)) + self.assertEqual(str(ds), 'DELETE FROM table WHERE "a" = %(0)s AND "created_at" >= %(1)s AND "created_at" <= %(2)s', str(ds)) ds = DeleteStatement('table', None) ds.add_where(Column(db_field='a'), EqualsOperator(), 'b') ds.add_where(Column(db_field='created_at'), InOperator(), ['0', '10', '20']) - self.assertEqual(six.text_type(ds), 'DELETE FROM table WHERE "a" = %(0)s AND "created_at" IN %(1)s', six.text_type(ds)) + self.assertEqual(str(ds), 'DELETE FROM table WHERE "a" = %(0)s AND "created_at" IN %(1)s', str(ds)) ds = DeleteStatement('table', None) ds.add_where(Column(db_field='a'), NotEqualsOperator(), 'b') - self.assertEqual(six.text_type(ds), 'DELETE FROM table WHERE "a" != %(0)s', six.text_type(ds)) + self.assertEqual(str(ds), 'DELETE FROM table WHERE "a" != %(0)s', str(ds)) def test_delete_conditional(self): where = [WhereClause('id', EqualsOperator(), 1)] conditionals = [ConditionalClause('f0', 'value0'), ConditionalClause('f1', 'value1')] ds = DeleteStatement('table', where=where, conditionals=conditionals) self.assertEqual(len(ds.conditionals), len(conditionals)) - self.assertEqual(six.text_type(ds), 'DELETE FROM table WHERE "id" = %(0)s IF "f0" = %(1)s AND "f1" = %(2)s', six.text_type(ds)) + self.assertEqual(str(ds), 'DELETE FROM table WHERE "id" = %(0)s IF "f0" = %(1)s AND "f1" = %(2)s', str(ds)) fields = ['one', 'two'] ds = DeleteStatement('table', fields=fields, where=where, conditionals=conditionals) - self.assertEqual(six.text_type(ds), 'DELETE "one", "two" FROM table WHERE "id" = %(0)s IF "f0" = %(1)s AND "f1" = %(2)s', six.text_type(ds)) + self.assertEqual(str(ds), 'DELETE "one", "two" FROM table WHERE "id" = %(0)s IF "f0" = %(1)s AND "f1" = %(2)s', str(ds)) diff --git a/tests/integration/cqlengine/statements/test_insert_statement.py b/tests/integration/cqlengine/statements/test_insert_statement.py index a1dcd08968..45485af912 100644 --- a/tests/integration/cqlengine/statements/test_insert_statement.py +++ b/tests/integration/cqlengine/statements/test_insert_statement.py @@ -13,8 +13,6 @@ # limitations under the License. import unittest -import six - from cassandra.cqlengine.columns import Column from cassandra.cqlengine.statements import InsertStatement @@ -27,7 +25,7 @@ def test_statement(self): ist.add_assignment(Column(db_field='c'), 'd') self.assertEqual( - six.text_type(ist), + str(ist), 'INSERT INTO table ("a", "c") VALUES (%(0)s, %(1)s)' ) @@ -38,7 +36,7 @@ def test_context_update(self): ist.update_context_id(4) self.assertEqual( - six.text_type(ist), + str(ist), 'INSERT INTO table ("a", "c") VALUES (%(4)s, %(5)s)' ) ctx = ist.get_context() @@ -48,4 +46,4 @@ def test_additional_rendering(self): ist = InsertStatement('table', ttl=60) ist.add_assignment(Column(db_field='a'), 'b') ist.add_assignment(Column(db_field='c'), 'd') - self.assertIn('USING TTL 60', six.text_type(ist)) + self.assertIn('USING TTL 60', str(ist)) diff --git a/tests/integration/cqlengine/statements/test_select_statement.py b/tests/integration/cqlengine/statements/test_select_statement.py index c6d1ac69f4..26c9c804cb 100644 --- a/tests/integration/cqlengine/statements/test_select_statement.py +++ b/tests/integration/cqlengine/statements/test_select_statement.py @@ -16,7 +16,6 @@ from cassandra.cqlengine.columns import Column from cassandra.cqlengine.statements import SelectStatement, WhereClause from cassandra.cqlengine.operators import * -import six class SelectStatementTests(unittest.TestCase): @@ -28,42 +27,42 @@ def test_single_field_is_listified(self): def test_field_rendering(self): """ tests that fields are properly added to the select statement """ ss = SelectStatement('table', ['f1', 'f2']) - self.assertTrue(six.text_type(ss).startswith('SELECT "f1", "f2"'), six.text_type(ss)) + self.assertTrue(str(ss).startswith('SELECT "f1", "f2"'), str(ss)) self.assertTrue(str(ss).startswith('SELECT "f1", "f2"'), str(ss)) def test_none_fields_rendering(self): """ tests that a '*' is added if no fields are passed in """ ss = SelectStatement('table') - self.assertTrue(six.text_type(ss).startswith('SELECT *'), six.text_type(ss)) + self.assertTrue(str(ss).startswith('SELECT *'), str(ss)) self.assertTrue(str(ss).startswith('SELECT *'), str(ss)) def test_table_rendering(self): ss = SelectStatement('table') - self.assertTrue(six.text_type(ss).startswith('SELECT * FROM table'), six.text_type(ss)) + self.assertTrue(str(ss).startswith('SELECT * FROM table'), str(ss)) self.assertTrue(str(ss).startswith('SELECT * FROM table'), str(ss)) def test_where_clause_rendering(self): ss = SelectStatement('table') ss.add_where(Column(db_field='a'), EqualsOperator(), 'b') - self.assertEqual(six.text_type(ss), 'SELECT * FROM table WHERE "a" = %(0)s', six.text_type(ss)) + self.assertEqual(str(ss), 'SELECT * FROM table WHERE "a" = %(0)s', str(ss)) def test_count(self): ss = SelectStatement('table', count=True, limit=10, order_by='d') ss.add_where(Column(db_field='a'), EqualsOperator(), 'b') - self.assertEqual(six.text_type(ss), 'SELECT COUNT(*) FROM table WHERE "a" = %(0)s LIMIT 10', six.text_type(ss)) - self.assertIn('LIMIT', six.text_type(ss)) - self.assertNotIn('ORDER', six.text_type(ss)) + self.assertEqual(str(ss), 'SELECT COUNT(*) FROM table WHERE "a" = %(0)s LIMIT 10', str(ss)) + self.assertIn('LIMIT', str(ss)) + self.assertNotIn('ORDER', str(ss)) def test_distinct(self): ss = SelectStatement('table', distinct_fields=['field2']) ss.add_where(Column(db_field='field1'), EqualsOperator(), 'b') - self.assertEqual(six.text_type(ss), 'SELECT DISTINCT "field2" FROM table WHERE "field1" = %(0)s', six.text_type(ss)) + self.assertEqual(str(ss), 'SELECT DISTINCT "field2" FROM table WHERE "field1" = %(0)s', str(ss)) ss = SelectStatement('table', distinct_fields=['field1', 'field2']) - self.assertEqual(six.text_type(ss), 'SELECT DISTINCT "field1", "field2" FROM table') + self.assertEqual(str(ss), 'SELECT DISTINCT "field1", "field2" FROM table') ss = SelectStatement('table', distinct_fields=['field1'], count=True) - self.assertEqual(six.text_type(ss), 'SELECT DISTINCT COUNT("field1") FROM table') + self.assertEqual(str(ss), 'SELECT DISTINCT COUNT("field1") FROM table') def test_context(self): ss = SelectStatement('table') @@ -89,20 +88,20 @@ def test_additional_rendering(self): limit=15, allow_filtering=True ) - qstr = six.text_type(ss) + qstr = str(ss) self.assertIn('LIMIT 15', qstr) self.assertIn('ORDER BY x, y', qstr) self.assertIn('ALLOW FILTERING', qstr) def test_limit_rendering(self): ss = SelectStatement('table', None, limit=10) - qstr = six.text_type(ss) + qstr = str(ss) self.assertIn('LIMIT 10', qstr) ss = SelectStatement('table', None, limit=0) - qstr = six.text_type(ss) + qstr = str(ss) self.assertNotIn('LIMIT', qstr) ss = SelectStatement('table', None, limit=None) - qstr = six.text_type(ss) + qstr = str(ss) self.assertNotIn('LIMIT', qstr) diff --git a/tests/integration/cqlengine/statements/test_update_statement.py b/tests/integration/cqlengine/statements/test_update_statement.py index 99105069dd..4429625bf4 100644 --- a/tests/integration/cqlengine/statements/test_update_statement.py +++ b/tests/integration/cqlengine/statements/test_update_statement.py @@ -18,7 +18,6 @@ from cassandra.cqlengine.statements import (UpdateStatement, WhereClause, AssignmentClause, SetUpdateClause, ListUpdateClause) -import six class UpdateStatementTests(unittest.TestCase): @@ -26,7 +25,7 @@ class UpdateStatementTests(unittest.TestCase): def test_table_rendering(self): """ tests that fields are properly added to the select statement """ us = UpdateStatement('table') - self.assertTrue(six.text_type(us).startswith('UPDATE table SET'), six.text_type(us)) + self.assertTrue(str(us).startswith('UPDATE table SET'), str(us)) self.assertTrue(str(us).startswith('UPDATE table SET'), str(us)) def test_rendering(self): @@ -34,10 +33,10 @@ def test_rendering(self): us.add_assignment(Column(db_field='a'), 'b') us.add_assignment(Column(db_field='c'), 'd') us.add_where(Column(db_field='a'), EqualsOperator(), 'x') - self.assertEqual(six.text_type(us), 'UPDATE table SET "a" = %(0)s, "c" = %(1)s WHERE "a" = %(2)s', six.text_type(us)) + self.assertEqual(str(us), 'UPDATE table SET "a" = %(0)s, "c" = %(1)s WHERE "a" = %(2)s', str(us)) us.add_where(Column(db_field='a'), NotEqualsOperator(), 'y') - self.assertEqual(six.text_type(us), 'UPDATE table SET "a" = %(0)s, "c" = %(1)s WHERE "a" = %(2)s AND "a" != %(3)s', six.text_type(us)) + self.assertEqual(str(us), 'UPDATE table SET "a" = %(0)s, "c" = %(1)s WHERE "a" = %(2)s AND "a" != %(3)s', str(us)) def test_context(self): us = UpdateStatement('table') @@ -52,19 +51,19 @@ def test_context_update(self): us.add_assignment(Column(db_field='c'), 'd') us.add_where(Column(db_field='a'), EqualsOperator(), 'x') us.update_context_id(3) - self.assertEqual(six.text_type(us), 'UPDATE table SET "a" = %(4)s, "c" = %(5)s WHERE "a" = %(3)s') + self.assertEqual(str(us), 'UPDATE table SET "a" = %(4)s, "c" = %(5)s WHERE "a" = %(3)s') self.assertEqual(us.get_context(), {'4': 'b', '5': 'd', '3': 'x'}) def test_additional_rendering(self): us = UpdateStatement('table', ttl=60) us.add_assignment(Column(db_field='a'), 'b') us.add_where(Column(db_field='a'), EqualsOperator(), 'x') - self.assertIn('USING TTL 60', six.text_type(us)) + self.assertIn('USING TTL 60', str(us)) def test_update_set_add(self): us = UpdateStatement('table') us.add_update(Set(Text, db_field='a'), set((1,)), 'add') - self.assertEqual(six.text_type(us), 'UPDATE table SET "a" = "a" + %(0)s') + self.assertEqual(str(us), 'UPDATE table SET "a" = "a" + %(0)s') def test_update_empty_set_add_does_not_assign(self): us = UpdateStatement('table') diff --git a/tests/integration/cqlengine/statements/test_where_clause.py b/tests/integration/cqlengine/statements/test_where_clause.py index 21671be086..0090fa0123 100644 --- a/tests/integration/cqlengine/statements/test_where_clause.py +++ b/tests/integration/cqlengine/statements/test_where_clause.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import six from cassandra.cqlengine.operators import EqualsOperator from cassandra.cqlengine.statements import StatementException, WhereClause @@ -30,7 +29,7 @@ def test_where_clause_rendering(self): wc = WhereClause('a', EqualsOperator(), 'c') wc.set_context_id(5) - self.assertEqual('"a" = %(5)s', six.text_type(wc), six.text_type(wc)) + self.assertEqual('"a" = %(5)s', str(wc), str(wc)) self.assertEqual('"a" = %(5)s', str(wc), type(wc)) def test_equality_method(self): diff --git a/tests/integration/cqlengine/test_batch_query.py b/tests/integration/cqlengine/test_batch_query.py index 94496727a7..7887949bb8 100644 --- a/tests/integration/cqlengine/test_batch_query.py +++ b/tests/integration/cqlengine/test_batch_query.py @@ -13,9 +13,6 @@ # limitations under the License. import warnings -import six -import sure - from cassandra.cqlengine import columns from cassandra.cqlengine.management import drop_table, sync_table from cassandra.cqlengine.models import Model diff --git a/tests/integration/cqlengine/test_lwt_conditional.py b/tests/integration/cqlengine/test_lwt_conditional.py index f8459a95ad..45dbf86c68 100644 --- a/tests/integration/cqlengine/test_lwt_conditional.py +++ b/tests/integration/cqlengine/test_lwt_conditional.py @@ -14,7 +14,6 @@ import unittest import mock -import six from uuid import uuid4 from cassandra.cqlengine import columns @@ -113,7 +112,7 @@ def test_conditional_clause(self): tc = ConditionalClause('some_value', 23) tc.set_context_id(3) - self.assertEqual('"some_value" = %(3)s', six.text_type(tc)) + self.assertEqual('"some_value" = %(3)s', str(tc)) self.assertEqual('"some_value" = %(3)s', str(tc)) def test_batch_update_conditional(self): diff --git a/tests/integration/datatype_utils.py b/tests/integration/datatype_utils.py index 8a1c813baa..1f7fb50a05 100644 --- a/tests/integration/datatype_utils.py +++ b/tests/integration/datatype_utils.py @@ -14,8 +14,8 @@ from decimal import Decimal from datetime import datetime, date, time +import ipaddress from uuid import uuid1, uuid4 -import six from cassandra.util import OrderedMap, Date, Time, sortedset, Duration @@ -91,11 +91,10 @@ def get_sample_data(): sample_data[datatype] = 3.4028234663852886e+38 elif datatype == 'inet': - sample_data[datatype] = ('123.123.123.123', '2001:db8:85a3:8d3:1319:8a2e:370:7348') - if six.PY3: - import ipaddress - sample_data[datatype] += (ipaddress.IPv4Address("123.123.123.123"), - ipaddress.IPv6Address('2001:db8:85a3:8d3:1319:8a2e:370:7348')) + sample_data[datatype] = ('123.123.123.123', + '2001:db8:85a3:8d3:1319:8a2e:370:7348', + ipaddress.IPv4Address("123.123.123.123"), + ipaddress.IPv6Address('2001:db8:85a3:8d3:1319:8a2e:370:7348')) elif datatype == 'int': sample_data[datatype] = 2147483647 diff --git a/tests/integration/long/test_ipv6.py b/tests/integration/long/test_ipv6.py index 3e2f2ffc5e..4a741b70b3 100644 --- a/tests/integration/long/test_ipv6.py +++ b/tests/integration/long/test_ipv6.py @@ -13,7 +13,6 @@ # limitations under the License. import os, socket, errno -import six from ccmlib import common from cassandra.cluster import NoHostAvailable diff --git a/tests/integration/simulacron/test_connection.py b/tests/integration/simulacron/test_connection.py index 1def601d2e..d08676659f 100644 --- a/tests/integration/simulacron/test_connection.py +++ b/tests/integration/simulacron/test_connection.py @@ -14,7 +14,6 @@ import unittest import logging -import six import time from mock import Mock, patch diff --git a/tests/integration/simulacron/utils.py b/tests/integration/simulacron/utils.py index ba9573fd23..01d94fc539 100644 --- a/tests/integration/simulacron/utils.py +++ b/tests/integration/simulacron/utils.py @@ -15,7 +15,7 @@ import json import subprocess import time -from six.moves.urllib.request import build_opener, Request, HTTPHandler +from urllib.request import build_opener, Request, HTTPHandler from cassandra.metadata import SchemaParserV4, SchemaParserDSE68 diff --git a/tests/integration/standard/test_authentication.py b/tests/integration/standard/test_authentication.py index 2f8ffbb068..94f77a6916 100644 --- a/tests/integration/standard/test_authentication.py +++ b/tests/integration/standard/test_authentication.py @@ -16,7 +16,6 @@ import logging import time -import six from cassandra.cluster import NoHostAvailable from cassandra.auth import PlainTextAuthProvider, SASLClient, SaslAuthProvider diff --git a/tests/integration/standard/test_client_warnings.py b/tests/integration/standard/test_client_warnings.py index 37003d5213..8d6818c91a 100644 --- a/tests/integration/standard/test_client_warnings.py +++ b/tests/integration/standard/test_client_warnings.py @@ -15,7 +15,6 @@ import unittest -import six from cassandra.query import BatchStatement from tests.integration import use_singledc, PROTOCOL_VERSION, local, TestCluster diff --git a/tests/integration/standard/test_concurrent.py b/tests/integration/standard/test_concurrent.py index 15da526bde..ba891b4bd0 100644 --- a/tests/integration/standard/test_concurrent.py +++ b/tests/integration/standard/test_concurrent.py @@ -24,8 +24,6 @@ from tests.integration import use_singledc, PROTOCOL_VERSION, TestCluster -from six import next - import unittest log = logging.getLogger(__name__) diff --git a/tests/integration/standard/test_connection.py b/tests/integration/standard/test_connection.py index 76c8216d41..3323baf20b 100644 --- a/tests/integration/standard/test_connection.py +++ b/tests/integration/standard/test_connection.py @@ -17,7 +17,6 @@ from functools import partial from mock import patch import logging -from six.moves import range import sys import threading from threading import Thread, Event diff --git a/tests/integration/standard/test_custom_payload.py b/tests/integration/standard/test_custom_payload.py index 3290852862..f33ab4f04f 100644 --- a/tests/integration/standard/test_custom_payload.py +++ b/tests/integration/standard/test_custom_payload.py @@ -15,8 +15,6 @@ import unittest -import six - from cassandra.query import (SimpleStatement, BatchStatement, BatchType) from tests.integration import use_singledc, PROTOCOL_VERSION, local, TestCluster @@ -138,16 +136,16 @@ def validate_various_custom_payloads(self, statement): # Long key value pair key_value = "x" * 10 - custom_payload = {key_value: six.b(key_value)} + custom_payload = {key_value: key_value.encode()} self.execute_async_validate_custom_payload(statement=statement, custom_payload=custom_payload) # Max supported value key pairs according C* binary protocol v4 should be 65534 (unsigned short max value) for i in range(65534): - custom_payload[str(i)] = six.b('x') + custom_payload[str(i)] = b'x' self.execute_async_validate_custom_payload(statement=statement, custom_payload=custom_payload) # Add one custom payload to this is too many key value pairs and should fail - custom_payload[str(65535)] = six.b('x') + custom_payload[str(65535)] = b'x' with self.assertRaises(ValueError): self.execute_async_validate_custom_payload(statement=statement, custom_payload=custom_payload) diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py index 60c5fb8969..aa74f18d1c 100644 --- a/tests/integration/standard/test_custom_protocol_handler.py +++ b/tests/integration/standard/test_custom_protocol_handler.py @@ -25,7 +25,6 @@ TestCluster, greaterthanorequalcass40, requirecassandra from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params -from six import binary_type import uuid import mock @@ -78,7 +77,7 @@ def test_custom_raw_uuid_row_results(self): session.client_protocol_handler = CustomTestRawRowType result_set = session.execute("SELECT schema_version FROM system.local") raw_value = result_set[0][0] - self.assertTrue(isinstance(raw_value, binary_type)) + self.assertTrue(isinstance(raw_value, bytes)) self.assertEqual(len(raw_value), 16) # Ensure that we get normal uuid back when we re-connect diff --git a/tests/integration/standard/test_metadata.py b/tests/integration/standard/test_metadata.py index b83df22032..3534f29f9f 100644 --- a/tests/integration/standard/test_metadata.py +++ b/tests/integration/standard/test_metadata.py @@ -17,7 +17,6 @@ from collections import defaultdict import difflib import logging -import six import sys import time import os @@ -990,7 +989,7 @@ class Ext1(Ext0): update_v = s.prepare('UPDATE system_schema.views SET extensions=? WHERE keyspace_name=? AND view_name=?') # extensions registered, one present # -------------------------------------- - ext_map = {Ext0.name: six.b("THA VALUE")} + ext_map = {Ext0.name: b"THA VALUE"} [(s.execute(update_t, (ext_map, ks, t)), s.execute(update_v, (ext_map, ks, v))) for _ in self.cluster.metadata.all_hosts()] # we're manipulating metadata - do it on all hosts self.cluster.refresh_table_metadata(ks, t) @@ -1012,8 +1011,8 @@ class Ext1(Ext0): # extensions registered, one present # -------------------------------------- - ext_map = {Ext0.name: six.b("THA VALUE"), - Ext1.name: six.b("OTHA VALUE")} + ext_map = {Ext0.name: b"THA VALUE", + Ext1.name: b"OTHA VALUE"} [(s.execute(update_t, (ext_map, ks, t)), s.execute(update_v, (ext_map, ks, v))) for _ in self.cluster.metadata.all_hosts()] # we're manipulating metadata - do it on all hosts self.cluster.refresh_table_metadata(ks, t) @@ -1046,7 +1045,7 @@ def test_export_schema(self): cluster = TestCluster() cluster.connect() - self.assertIsInstance(cluster.metadata.export_schema_as_string(), six.string_types) + self.assertIsInstance(cluster.metadata.export_schema_as_string(), str) cluster.shutdown() def test_export_keyspace_schema(self): @@ -1059,8 +1058,8 @@ def test_export_keyspace_schema(self): for keyspace in cluster.metadata.keyspaces: keyspace_metadata = cluster.metadata.keyspaces[keyspace] - self.assertIsInstance(keyspace_metadata.export_as_string(), six.string_types) - self.assertIsInstance(keyspace_metadata.as_cql_query(), six.string_types) + self.assertIsInstance(keyspace_metadata.export_as_string(), str) + self.assertIsInstance(keyspace_metadata.as_cql_query(), str) cluster.shutdown() def assert_equal_diff(self, received, expected): @@ -1238,8 +1237,8 @@ def test_replicas(self): cluster.connect('test3rf') - self.assertNotEqual(list(cluster.metadata.get_replicas('test3rf', six.b('key'))), []) - host = list(cluster.metadata.get_replicas('test3rf', six.b('key')))[0] + self.assertNotEqual(list(cluster.metadata.get_replicas('test3rf', b'key')), []) + host = list(cluster.metadata.get_replicas('test3rf', b'key'))[0] self.assertEqual(host.datacenter, 'dc1') self.assertEqual(host.rack, 'r1') cluster.shutdown() diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py index 8d2a3d74e2..8157e4c96a 100644 --- a/tests/integration/standard/test_query.py +++ b/tests/integration/standard/test_query.py @@ -34,7 +34,6 @@ import re import mock -import six log = logging.getLogger(__name__) @@ -460,10 +459,10 @@ def make_query_plan(self, working_keyspace=None, query=None): try: host = [live_hosts[self.host_index_to_use]] except IndexError as e: - six.raise_from(IndexError( + raise IndexError( 'You specified an index larger than the number of hosts. Total hosts: {}. Index specified: {}'.format( len(live_hosts), self.host_index_to_use - )), e) + )) from e return host diff --git a/tests/integration/standard/test_query_paging.py b/tests/integration/standard/test_query_paging.py index 8e0ca8becc..26c1ca0da6 100644 --- a/tests/integration/standard/test_query_paging.py +++ b/tests/integration/standard/test_query_paging.py @@ -19,7 +19,6 @@ import unittest from itertools import cycle, count -from six.moves import range from threading import Event from cassandra import ConsistencyLevel diff --git a/tests/integration/standard/test_single_interface.py b/tests/integration/standard/test_single_interface.py index 8d407be958..3a49541032 100644 --- a/tests/integration/standard/test_single_interface.py +++ b/tests/integration/standard/test_single_interface.py @@ -14,8 +14,6 @@ import unittest -import six - from cassandra import ConsistencyLevel from cassandra.query import SimpleStatement @@ -54,7 +52,7 @@ def test_single_interface(self): broadcast_rpc_ports = [] broadcast_ports = [] self.assertEqual(len(hosts), 3) - for endpoint, host in six.iteritems(hosts): + for endpoint, host in hosts.items(): self.assertEqual(endpoint.address, host.broadcast_rpc_address) self.assertEqual(endpoint.port, host.broadcast_rpc_port) diff --git a/tests/integration/standard/test_types.py b/tests/integration/standard/test_types.py index 6e2e9f7328..016c2b9785 100644 --- a/tests/integration/standard/test_types.py +++ b/tests/integration/standard/test_types.py @@ -15,9 +15,9 @@ import unittest from datetime import datetime +import ipaddress import math from packaging.version import Version -import six import cassandra from cassandra import InvalidRequest @@ -60,25 +60,7 @@ def test_can_insert_blob_type_as_string(self): params = ['key1', b'blobbyblob'] query = "INSERT INTO blobstring (a, b) VALUES (%s, %s)" - # In python2, with Cassandra > 2.0, we don't treat the 'byte str' type as a blob, so we'll encode it - # as a string literal and have the following failure. - if six.PY2 and self.cql_version >= (3, 1, 0): - # Blob values can't be specified using string notation in CQL 3.1.0 and - # above which is used by default in Cassandra 2.0. - if self.cass_version >= (2, 1, 0): - msg = r'.*Invalid STRING constant \(.*?\) for "b" of type blob.*' - else: - msg = r'.*Invalid STRING constant \(.*?\) for b of type blob.*' - self.assertRaisesRegex(InvalidRequest, msg, s.execute, query, params) - return - - # In python2, with Cassandra < 2.0, we can manually encode the 'byte str' type as hex for insertion in a blob. - if six.PY2: - cass_params = [params[0], params[1].encode('hex')] - s.execute(query, cass_params) - # In python 3, the 'bytes' type is treated as a blob, so we can correctly encode it with hex notation. - else: - s.execute(query, params) + s.execute(query, params) results = s.execute("SELECT * FROM blobstring")[0] for expected, actual in zip(params, results): @@ -176,10 +158,9 @@ def test_can_insert_primitive_datatypes(self): # verify data result = s.execute("SELECT {0} FROM alltypes WHERE zz=%s".format(single_columns_string), (key,))[0][1] compare_value = data_sample - if six.PY3: - import ipaddress - if isinstance(data_sample, ipaddress.IPv4Address) or isinstance(data_sample, ipaddress.IPv6Address): - compare_value = str(data_sample) + + if isinstance(data_sample, ipaddress.IPv4Address) or isinstance(data_sample, ipaddress.IPv6Address): + compare_value = str(data_sample) self.assertEqual(result, compare_value) # try the same thing with a prepared statement @@ -1061,7 +1042,7 @@ def _daterange_round_trip(self, to_insert, expected=None): dr = results[0].dr # sometimes this is truncated in the assertEqual output on failure; - if isinstance(expected, six.string_types): + if isinstance(expected, str): self.assertEqual(str(dr), expected) else: self.assertEqual(dr, expected or to_insert) @@ -1115,7 +1096,7 @@ def _daterange_round_trip(self, to_insert, expected=None): dr = results[0].dr # sometimes this is truncated in the assertEqual output on failure; - if isinstance(expected, six.string_types): + if isinstance(expected, str): self.assertEqual(str(dr), expected) else: self.assertEqual(dr, expected or to_insert) diff --git a/tests/integration/standard/test_udts.py b/tests/integration/standard/test_udts.py index 4c7826fb98..ae056d7773 100644 --- a/tests/integration/standard/test_udts.py +++ b/tests/integration/standard/test_udts.py @@ -16,7 +16,6 @@ from collections import namedtuple from functools import partial -import six from cassandra import InvalidRequest from cassandra.cluster import UserTypeDoesNotExist, ExecutionProfile, EXEC_PROFILE_DEFAULT @@ -291,9 +290,9 @@ def test_can_insert_udts_with_nulls(self): self.assertEqual((None, None, None, None), s.execute(select)[0].b) # also test empty strings - s.execute(insert, [User('', None, None, six.binary_type())]) + s.execute(insert, [User('', None, None, bytes())]) results = s.execute("SELECT b FROM mytable WHERE a=0") - self.assertEqual(('', None, None, six.binary_type()), results[0].b) + self.assertEqual(('', None, None, bytes()), results[0].b) c.shutdown() @@ -718,7 +717,7 @@ def test_type_alteration(self): s.execute("INSERT INTO %s (k, v) VALUES (0, {v0 : 3, v1 : 0xdeadbeef})" % (self.table_name,)) val = s.execute('SELECT v FROM %s' % self.table_name)[0][0] self.assertEqual(val['v0'], 3) - self.assertEqual(val['v1'], six.b('\xde\xad\xbe\xef')) + self.assertEqual(val['v1'], b'\xde\xad\xbe\xef') @lessthancass30 def test_alter_udt(self): diff --git a/tests/unit/advanced/cloud/test_cloud.py b/tests/unit/advanced/cloud/test_cloud.py index a7cd83a8ce..f253e70454 100644 --- a/tests/unit/advanced/cloud/test_cloud.py +++ b/tests/unit/advanced/cloud/test_cloud.py @@ -9,7 +9,6 @@ import tempfile import os import shutil -import six import unittest @@ -96,8 +95,7 @@ def clean_tmp_dir(): } # The directory is not writtable.. we expect a permission error - exc = PermissionError if six.PY3 else OSError - with self.assertRaises(exc): + with self.assertRaises(PermissionError): cloud.get_cloud_config(config) # With use_default_tempdir, we expect an connection refused diff --git a/tests/unit/advanced/test_graph.py b/tests/unit/advanced/test_graph.py index a98a48c82f..2870b9b1ee 100644 --- a/tests/unit/advanced/test_graph.py +++ b/tests/unit/advanced/test_graph.py @@ -17,8 +17,6 @@ import unittest -import six - from cassandra import ConsistencyLevel from cassandra.policies import RetryPolicy from cassandra.graph import (SimpleGraphStatement, GraphOptions, GraphProtocol, Result, @@ -278,7 +276,7 @@ def test_get_options(self): other = GraphOptions(**kwargs) options = base.get_options_map(other) updated = self.opt_mapping['graph_name'] - self.assertEqual(options[updated], six.b('unit_test')) + self.assertEqual(options[updated], b'unit_test') for name in (n for n in self.opt_mapping.values() if n != updated): self.assertEqual(options[name], base._graph_options[name]) @@ -288,22 +286,22 @@ def test_get_options(self): def test_set_attr(self): expected = 'test@@@@' opts = GraphOptions(graph_name=expected) - self.assertEqual(opts.graph_name, six.b(expected)) + self.assertEqual(opts.graph_name, expected.encode()) expected = 'somethingelse####' opts.graph_name = expected - self.assertEqual(opts.graph_name, six.b(expected)) + self.assertEqual(opts.graph_name, expected.encode()) # will update options with set value another = GraphOptions() self.assertIsNone(another.graph_name) another.update(opts) - self.assertEqual(another.graph_name, six.b(expected)) + self.assertEqual(another.graph_name, expected.encode()) opts.graph_name = None self.assertIsNone(opts.graph_name) # will not update another with its set-->unset value another.update(opts) - self.assertEqual(another.graph_name, six.b(expected)) # remains unset + self.assertEqual(another.graph_name, expected.encode()) # remains unset opt_map = another.get_options_map(opts) self.assertEqual(opt_map, another._graph_options) @@ -318,7 +316,7 @@ def _verify_api_params(self, opts, api_params): self.assertEqual(len(opts._graph_options), len(api_params)) for name, value in api_params.items(): try: - value = six.b(value) + value = value.encode() except: pass # already bytes self.assertEqual(getattr(opts, name), value) @@ -335,8 +333,8 @@ def test_consistency_levels(self): # mapping from base opt_map = opts.get_options_map() - self.assertEqual(opt_map['graph-read-consistency'], six.b(ConsistencyLevel.value_to_name[read_cl])) - self.assertEqual(opt_map['graph-write-consistency'], six.b(ConsistencyLevel.value_to_name[write_cl])) + self.assertEqual(opt_map['graph-read-consistency'], ConsistencyLevel.value_to_name[read_cl].encode()) + self.assertEqual(opt_map['graph-write-consistency'], ConsistencyLevel.value_to_name[write_cl].encode()) # empty by default new_opts = GraphOptions() @@ -346,8 +344,8 @@ def test_consistency_levels(self): # set from other opt_map = new_opts.get_options_map(opts) - self.assertEqual(opt_map['graph-read-consistency'], six.b(ConsistencyLevel.value_to_name[read_cl])) - self.assertEqual(opt_map['graph-write-consistency'], six.b(ConsistencyLevel.value_to_name[write_cl])) + self.assertEqual(opt_map['graph-read-consistency'], ConsistencyLevel.value_to_name[read_cl].encode()) + self.assertEqual(opt_map['graph-write-consistency'], ConsistencyLevel.value_to_name[write_cl].encode()) def test_graph_source_convenience_attributes(self): opts = GraphOptions() diff --git a/tests/unit/cqlengine/test_connection.py b/tests/unit/cqlengine/test_connection.py index 962ee06b52..56136b6e8b 100644 --- a/tests/unit/cqlengine/test_connection.py +++ b/tests/unit/cqlengine/test_connection.py @@ -14,8 +14,6 @@ import unittest -import six - from cassandra.cluster import _ConfigMode from cassandra.cqlengine import connection from cassandra.query import dict_factory diff --git a/tests/unit/io/utils.py b/tests/unit/io/utils.py index ddfa2c3198..1475347ca6 100644 --- a/tests/unit/io/utils.py +++ b/tests/unit/io/utils.py @@ -26,8 +26,7 @@ import random from functools import wraps from itertools import cycle -import six -from six import binary_type, BytesIO +from io import BytesIO from mock import Mock import errno @@ -202,7 +201,7 @@ def set_socket(self, connection, obj): return setattr(connection, self.socket_attr_name, obj) def make_header_prefix(self, message_class, version=2, stream_id=0): - return binary_type().join(map(uint8_pack, [ + return bytes().join(map(uint8_pack, [ 0xff & (HEADER_DIRECTION_TO_CLIENT | version), 0, # flags (compression) stream_id, @@ -230,7 +229,7 @@ def make_error_body(self, code, msg): write_string(buf, msg) return buf.getvalue() - def make_msg(self, header, body=binary_type()): + def make_msg(self, header, body=bytes()): return header + uint32_pack(len(body)) + body def test_successful_connection(self): @@ -289,7 +288,7 @@ def recv_side_effect(*args): c.process_io_buffer = Mock() def chunk(size): - return six.b('a') * size + return b'a' * size buf_size = c.in_buffer_size @@ -436,7 +435,7 @@ def test_partial_header_read(self): self.get_socket(c).recv.return_value = message[1:] c.handle_read(*self.null_handle_function_args) - self.assertEqual(six.binary_type(), c._io_buffer.io_buffer.getvalue()) + self.assertEqual(bytes(), c._io_buffer.io_buffer.getvalue()) # let it write out a StartupMessage c.handle_write(*self.null_handle_function_args) @@ -463,7 +462,7 @@ def test_partial_message_read(self): # ... then read in the rest self.get_socket(c).recv.return_value = message[9:] c.handle_read(*self.null_handle_function_args) - self.assertEqual(six.binary_type(), c._io_buffer.io_buffer.getvalue()) + self.assertEqual(bytes(), c._io_buffer.io_buffer.getvalue()) # let it write out a StartupMessage c.handle_write(*self.null_handle_function_args) @@ -499,7 +498,7 @@ def test_mixed_message_and_buffer_sizes(self): for i in range(1, 15): c.process_io_buffer.reset_mock() c._io_buffer._io_buffer = io.BytesIO() - message = io.BytesIO(six.b('a') * (2**i)) + message = io.BytesIO(b'a' * (2**i)) def recv_side_effect(*args): if random.randint(1,10) % 3 == 0: diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 68cce526e7..0a2427c7ff 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six from cassandra.auth import PlainTextAuthenticator import unittest @@ -24,6 +23,6 @@ class TestPlainTextAuthenticator(unittest.TestCase): def test_evaluate_challenge_with_unicode_data(self): authenticator = PlainTextAuthenticator("johnӁ", "doeӁ") self.assertEqual( - authenticator.evaluate_challenge(six.ensure_binary('PLAIN-START')), - six.ensure_binary("\x00johnӁ\x00doeӁ") + authenticator.evaluate_challenge(b'PLAIN-START'), + "\x00johnӁ\x00doeӁ".encode('utf-8') ) diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py index 6755f118fd..c5f5def082 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/test_cluster.py @@ -14,7 +14,6 @@ import unittest import logging -import six from mock import patch, Mock @@ -286,7 +285,7 @@ def test_default_profile(self): rf = session.execute_async("query", execution_profile='non-default') self._verify_response_future_profile(rf, non_default_profile) - for name, ep in six.iteritems(cluster.profile_manager.profiles): + for name, ep in cluster.profile_manager.profiles.items(): self.assertEqual(ep, session.get_execution_profile(name)) # invalid ep diff --git a/tests/unit/test_concurrent.py b/tests/unit/test_concurrent.py index 9f67531a3c..3e84220b27 100644 --- a/tests/unit/test_concurrent.py +++ b/tests/unit/test_concurrent.py @@ -19,7 +19,7 @@ from mock import Mock import time import threading -from six.moves.queue import PriorityQueue +from queue import PriorityQueue import sys import platform diff --git a/tests/unit/test_connection.py b/tests/unit/test_connection.py index bc6749a477..1d81376d4a 100644 --- a/tests/unit/test_connection.py +++ b/tests/unit/test_connection.py @@ -14,8 +14,7 @@ import unittest from mock import Mock, ANY, call, patch -import six -from six import BytesIO +from io import BytesIO import time from threading import Lock @@ -41,14 +40,14 @@ def make_connection(self): def make_header_prefix(self, message_class, version=Connection.protocol_version, stream_id=0): if Connection.protocol_version < 3: - return six.binary_type().join(map(uint8_pack, [ + return bytes().join(map(uint8_pack, [ 0xff & (HEADER_DIRECTION_TO_CLIENT | version), 0, # flags (compression) stream_id, message_class.opcode # opcode ])) else: - return six.binary_type().join(map(uint8_pack, [ + return bytes().join(map(uint8_pack, [ 0xff & (HEADER_DIRECTION_TO_CLIENT | version), 0, # flags (compression) 0, # MSB for v3+ stream diff --git a/tests/unit/test_control_connection.py b/tests/unit/test_control_connection.py index 53a5d6affc..cb863da0b1 100644 --- a/tests/unit/test_control_connection.py +++ b/tests/unit/test_control_connection.py @@ -14,8 +14,6 @@ import unittest -import six - from concurrent.futures import ThreadPoolExecutor from mock import Mock, ANY, call @@ -48,7 +46,7 @@ def __init__(self): def get_host(self, endpoint_or_address, port=None): if not isinstance(endpoint_or_address, EndPoint): - for host in six.itervalues(self.hosts): + for host in self.hosts.values(): if (host.address == endpoint_or_address and (port is None or host.broadcast_rpc_port is None or host.broadcast_rpc_port == port)): return host diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index b0a8b63b16..94fed13455 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -17,7 +17,6 @@ import logging from mock import Mock import os -import six import timeit import cassandra @@ -485,11 +484,11 @@ def test_murmur3_c(self): raise unittest.SkipTest('The cmurmur3 extension is not available') def _verify_hash(self, fn): - self.assertEqual(fn(six.b('123')), -7468325962851647638) + self.assertEqual(fn(b'123'), -7468325962851647638) self.assertEqual(fn(b'\x00\xff\x10\xfa\x99' * 10), 5837342703291459765) self.assertEqual(fn(b'\xfe' * 8), -8927430733708461935) self.assertEqual(fn(b'\x10' * 8), 1446172840243228796) - self.assertEqual(fn(six.b(str(cassandra.metadata.MAX_LONG))), 7162290910810015547) + self.assertEqual(fn(str(cassandra.metadata.MAX_LONG).encode()), 7162290910810015547) class MD5TokensTest(unittest.TestCase): @@ -504,28 +503,28 @@ def test_md5_tokens(self): class BytesTokensTest(unittest.TestCase): def test_bytes_tokens(self): - bytes_token = BytesToken(unhexlify(six.b('01'))) - self.assertEqual(bytes_token.value, six.b('\x01')) + bytes_token = BytesToken(unhexlify(b'01')) + self.assertEqual(bytes_token.value, b'\x01') self.assertEqual(str(bytes_token), "" % bytes_token.value) self.assertEqual(bytes_token.hash_fn('123'), '123') self.assertEqual(bytes_token.hash_fn(123), 123) self.assertEqual(bytes_token.hash_fn(str(cassandra.metadata.MAX_LONG)), str(cassandra.metadata.MAX_LONG)) def test_from_string(self): - from_unicode = BytesToken.from_string(six.text_type('0123456789abcdef')) - from_bin = BytesToken.from_string(six.b('0123456789abcdef')) + from_unicode = BytesToken.from_string('0123456789abcdef') + from_bin = BytesToken.from_string(b'0123456789abcdef') self.assertEqual(from_unicode, from_bin) - self.assertIsInstance(from_unicode.value, six.binary_type) - self.assertIsInstance(from_bin.value, six.binary_type) + self.assertIsInstance(from_unicode.value, bytes) + self.assertIsInstance(from_bin.value, bytes) def test_comparison(self): - tok = BytesToken.from_string(six.text_type('0123456789abcdef')) + tok = BytesToken.from_string('0123456789abcdef') token_high_order = uint16_unpack(tok.value[0:2]) self.assertLess(BytesToken(uint16_pack(token_high_order - 1)), tok) self.assertGreater(BytesToken(uint16_pack(token_high_order + 1)), tok) def test_comparison_unicode(self): - value = six.b('\'_-()"\xc2\xac') + value = b'\'_-()"\xc2\xac' t0 = BytesToken(value) t1 = BytesToken.from_string('00') self.assertGreater(t0, t1) @@ -642,7 +641,7 @@ class UnicodeIdentifiersTests(unittest.TestCase): Looking for encoding errors like PYTHON-447 """ - name = six.text_type(b'\'_-()"\xc2\xac'.decode('utf-8')) + name = b'\'_-()"\xc2\xac'.decode('utf-8') def test_keyspace_name(self): km = KeyspaceMetadata(self.name, False, 'SimpleStrategy', {'replication_factor': 1}) diff --git a/tests/unit/test_orderedmap.py b/tests/unit/test_orderedmap.py index 9ca5699204..5d99fc74a8 100644 --- a/tests/unit/test_orderedmap.py +++ b/tests/unit/test_orderedmap.py @@ -16,7 +16,6 @@ from cassandra.util import OrderedMap, OrderedMapSerializedKey from cassandra.cqltypes import EMPTY, UTF8Type, lookup_casstype -import six class OrderedMapTest(unittest.TestCase): def test_init(self): @@ -118,11 +117,11 @@ def test_iter(self): itr = iter(om) self.assertEqual(sum([1 for _ in itr]), len(keys)) - self.assertRaises(StopIteration, six.next, itr) + self.assertRaises(StopIteration, next, itr) self.assertEqual(list(iter(om)), keys) - self.assertEqual(list(six.iteritems(om)), items) - self.assertEqual(list(six.itervalues(om)), values) + self.assertEqual(list(om.items()), items) + self.assertEqual(list(om.values()), values) def test_len(self): self.assertEqual(len(OrderedMap()), 0) diff --git a/tests/unit/test_parameter_binding.py b/tests/unit/test_parameter_binding.py index 8820114dc3..78f3898e01 100644 --- a/tests/unit/test_parameter_binding.py +++ b/tests/unit/test_parameter_binding.py @@ -21,9 +21,6 @@ from cassandra.cqltypes import Int32Type from cassandra.util import OrderedDict -from six.moves import xrange -import six - class ParamBindingTest(unittest.TestCase): @@ -40,7 +37,7 @@ def test_sequence_param(self): self.assertEqual(result, "(1, 'a', 2.0)") def test_generator_param(self): - result = bind_params("%s", ((i for i in xrange(3)),), Encoder()) + result = bind_params("%s", ((i for i in range(3)),), Encoder()) self.assertEqual(result, "[0, 1, 2]") def test_none_param(self): @@ -149,7 +146,7 @@ def test_missing_value(self): def test_extra_value(self): self.bound.bind({'rk0': 0, 'rk1': 0, 'ck0': 0, 'v0': 0, 'should_not_be_here': 123}) # okay to have extra keys in dict - self.assertEqual(self.bound.values, [six.b('\x00') * 4] * 4) # four encoded zeros + self.assertEqual(self.bound.values, [b'\x00' * 4] * 4) # four encoded zeros self.assertRaises(ValueError, self.bound.bind, (0, 0, 0, 0, 123)) def test_values_none(self): diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index ec004ca9fe..f340f58634 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -17,8 +17,7 @@ from itertools import islice, cycle from mock import Mock, patch, call from random import randint -import six -from six.moves._thread import LockType +from _thread import LockType import sys import struct from threading import Thread @@ -37,8 +36,6 @@ from cassandra.pool import Host from cassandra.query import Statement -from six.moves import xrange - class LoadBalancingPolicyTest(unittest.TestCase): def test_non_implemented(self): @@ -75,7 +72,7 @@ def test_multiple_query_plans(self): hosts = [0, 1, 2, 3] policy = RoundRobinPolicy() policy.populate(None, hosts) - for i in xrange(20): + for i in range(20): qplan = list(policy.make_query_plan()) self.assertEqual(sorted(qplan), hosts) @@ -121,17 +118,17 @@ def test_thread_safety_during_modification(self): def check_query_plan(): try: - for i in xrange(100): + for i in range(100): list(policy.make_query_plan()) except Exception as exc: errors.append(exc) def host_up(): - for i in xrange(1000): + for i in range(1000): policy.on_up(randint(0, 99)) def host_down(): - for i in xrange(1000): + for i in range(1000): policy.on_down(randint(0, 99)) threads = [] @@ -142,7 +139,7 @@ def host_down(): # make the GIL switch after every instruction, maximizing # the chance of race conditions - check = six.PY2 or '__pypy__' in sys.builtin_module_names + check = '__pypy__' in sys.builtin_module_names if check: original_interval = sys.getcheckinterval() else: diff --git a/tests/unit/test_protocol.py b/tests/unit/test_protocol.py index 0f251ffc0e..eec9d73ca4 100644 --- a/tests/unit/test_protocol.py +++ b/tests/unit/test_protocol.py @@ -14,7 +14,6 @@ import unittest -import six from mock import Mock from cassandra import ProtocolVersion, UnsupportedOperation diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 2a2901aaff..8a3f00fa9d 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -14,8 +14,6 @@ import unittest -import six - from cassandra.query import BatchStatement, SimpleStatement @@ -25,7 +23,7 @@ class BatchStatementTest(unittest.TestCase): def test_clear(self): keyspace = 'keyspace' routing_key = 'routing_key' - custom_payload = {'key': six.b('value')} + custom_payload = {'key': b'value'} ss = SimpleStatement('whatever', keyspace=keyspace, routing_key=routing_key, custom_payload=custom_payload) diff --git a/tests/unit/test_response_future.py b/tests/unit/test_response_future.py index 273490072f..8d5850413e 100644 --- a/tests/unit/test_response_future.py +++ b/tests/unit/test_response_future.py @@ -17,7 +17,6 @@ from collections import deque from threading import RLock -import six from mock import Mock, MagicMock, ANY from cassandra import ConsistencyLevel, Unavailable, SchemaTargetType, SchemaChangeType, OperationTimedOut diff --git a/tests/unit/test_segment.py b/tests/unit/test_segment.py index f794b38b1d..0d0f146c16 100644 --- a/tests/unit/test_segment.py +++ b/tests/unit/test_segment.py @@ -14,7 +14,7 @@ import unittest -import six +from io import BytesIO from cassandra import DriverException from cassandra.segment import Segment, CrcException @@ -22,8 +22,6 @@ def to_bits(b): - if six.PY2: - b = six.byte2int(b) return '{:08b}'.format(b) class SegmentCodecTest(unittest.TestCase): @@ -50,7 +48,7 @@ def _header_to_bits(data): return bits[7:24] + bits[6:7] + bits[:6] def test_encode_uncompressed_header(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) self.assertEqual(buffer.tell(), 6) self.assertEqual( @@ -59,7 +57,7 @@ def test_encode_uncompressed_header(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_encode_compressed_header(self): - buffer = six.BytesIO() + buffer = BytesIO() compressed_length = len(segment_codec_lz4.compress(self.small_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.small_msg), True) @@ -69,7 +67,7 @@ def test_encode_compressed_header(self): "{:017b}".format(compressed_length) + "00000000000110010" + "1" + "00000") def test_encode_uncompressed_header_with_max_payload(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.max_msg), -1, True) self.assertEqual(buffer.tell(), 6) self.assertEqual( @@ -77,13 +75,13 @@ def test_encode_uncompressed_header_with_max_payload(self): "11111111111111111" + "1" + "000000") def test_encode_header_fails_if_payload_too_big(self): - buffer = six.BytesIO() + buffer = BytesIO() for codec in [c for c in [segment_codec_no_compression, segment_codec_lz4] if c is not None]: with self.assertRaises(DriverException): codec.encode_header(buffer, len(self.large_msg), -1, False) def test_encode_uncompressed_header_not_self_contained_msg(self): - buffer = six.BytesIO() + buffer = BytesIO() # simulate the first chunk with the max size segment_codec_no_compression.encode_header(buffer, len(self.max_msg), -1, False) self.assertEqual(buffer.tell(), 6) @@ -95,7 +93,7 @@ def test_encode_uncompressed_header_not_self_contained_msg(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_encode_compressed_header_with_max_payload(self): - buffer = six.BytesIO() + buffer = BytesIO() compressed_length = len(segment_codec_lz4.compress(self.max_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.max_msg), True) self.assertEqual(buffer.tell(), 8) @@ -105,7 +103,7 @@ def test_encode_compressed_header_with_max_payload(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_encode_compressed_header_not_self_contained_msg(self): - buffer = six.BytesIO() + buffer = BytesIO() # simulate the first chunk with the max size compressed_length = len(segment_codec_lz4.compress(self.max_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.max_msg), False) @@ -118,7 +116,7 @@ def test_encode_compressed_header_not_self_contained_msg(self): "00000")) def test_decode_uncompressed_header(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) buffer.seek(0) header = segment_codec_no_compression.decode_header(buffer) @@ -128,7 +126,7 @@ def test_decode_uncompressed_header(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_compressed_header(self): - buffer = six.BytesIO() + buffer = BytesIO() compressed_length = len(segment_codec_lz4.compress(self.small_msg)) segment_codec_lz4.encode_header(buffer, compressed_length, len(self.small_msg), True) buffer.seek(0) @@ -138,7 +136,7 @@ def test_decode_compressed_header(self): self.assertEqual(header.is_self_contained, True) def test_decode_header_fails_if_corrupted(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_no_compression.encode_header(buffer, len(self.small_msg), -1, True) # corrupt one byte buffer.seek(buffer.tell()-1) @@ -149,7 +147,7 @@ def test_decode_header_fails_if_corrupted(self): segment_codec_no_compression.decode_header(buffer) def test_decode_uncompressed_self_contained_segment(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_no_compression.encode(buffer, self.small_msg) buffer.seek(0) @@ -163,7 +161,7 @@ def test_decode_uncompressed_self_contained_segment(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_compressed_self_contained_segment(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_lz4.encode(buffer, self.small_msg) buffer.seek(0) @@ -176,7 +174,7 @@ def test_decode_compressed_self_contained_segment(self): self.assertEqual(segment.payload, self.small_msg) def test_decode_multi_segments(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_no_compression.encode(buffer, self.large_msg) buffer.seek(0) @@ -194,7 +192,7 @@ def test_decode_multi_segments(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_fails_if_corrupted(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_lz4.encode(buffer, self.small_msg) buffer.seek(buffer.tell()-1) buffer.write(b'0') @@ -205,7 +203,7 @@ def test_decode_fails_if_corrupted(self): @unittest.skipUnless(segment_codec_lz4, ' lz4 not installed') def test_decode_tiny_msg_not_compressed(self): - buffer = six.BytesIO() + buffer = BytesIO() segment_codec_lz4.encode(buffer, b'b') buffer.seek(0) header = segment_codec_lz4.decode_header(buffer) diff --git a/tests/unit/test_timestamps.py b/tests/unit/test_timestamps.py index fc1be071ad..ef8ac36f7b 100644 --- a/tests/unit/test_timestamps.py +++ b/tests/unit/test_timestamps.py @@ -15,7 +15,6 @@ import unittest import mock -import six from cassandra import timestamps from threading import Thread, Lock @@ -106,10 +105,7 @@ def assertLastCallArgRegex(self, call, pattern): last_warn_args, last_warn_kwargs = call self.assertEqual(len(last_warn_args), 1) self.assertEqual(len(last_warn_kwargs), 0) - six.assertRegex(self, - last_warn_args[0], - pattern, - ) + self.assertRegex(last_warn_args[0], pattern) def test_basic_log_content(self): """ diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index e85f5dbe67..a06bbd452d 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -18,8 +18,6 @@ import time from binascii import unhexlify -import six - import cassandra from cassandra import util from cassandra.cqltypes import ( @@ -167,7 +165,7 @@ def __init__(self, subtypes, names): @classmethod def apply_parameters(cls, subtypes, names): - return cls(subtypes, [unhexlify(six.b(name)) if name is not None else name for name in names]) + return cls(subtypes, [unhexlify(name.encode()) if name is not None else name for name in names]) class BarType(FooType): typename = 'org.apache.cassandra.db.marshal.BarType' @@ -556,8 +554,8 @@ class no_bounds_object(object): self.assertRaises(ValueError, DateRangeType.serialize, no_bounds_object, 5) def test_serialized_value_round_trip(self): - vals = [six.b('\x01\x00\x00\x01%\xe9a\xf9\xd1\x06\x00\x00\x01v\xbb>o\xff\x00'), - six.b('\x01\x00\x00\x00\xdcm\x03-\xd1\x06\x00\x00\x01v\xbb>o\xff\x00')] + vals = [b'\x01\x00\x00\x01%\xe9a\xf9\xd1\x06\x00\x00\x01v\xbb>o\xff\x00', + b'\x01\x00\x00\x00\xdcm\x03-\xd1\x06\x00\x00\x01v\xbb>o\xff\x00'] for serialized in vals: self.assertEqual( serialized, diff --git a/tox.ini b/tox.ini index 4aae7a0140..7d4dfe898e 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,6 @@ envlist = py{37,38},pypy [base] deps = nose mock<=1.0.1 - six packaging cython eventlet From 7845379b61fd50d4c3d6f20b215454f3d74d2c28 Mon Sep 17 00:00:00 2001 From: MohammadHossein Shahmohammadi Date: Mon, 30 Oct 2023 18:04:53 +0330 Subject: [PATCH 199/211] docs: Fix typo in add_callbacks (#1177) --- docs/api/cassandra/cluster.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/cassandra/cluster.rst b/docs/api/cassandra/cluster.rst index 2b3d7828a8..a9a9d378a4 100644 --- a/docs/api/cassandra/cluster.rst +++ b/docs/api/cassandra/cluster.rst @@ -215,7 +215,7 @@ .. automethod:: add_errback(fn, *args, **kwargs) - .. automethod:: add_callbacks(callback, errback, callback_args=(), callback_kwargs=None, errback_args=(), errback_args=None) + .. automethod:: add_callbacks(callback, errback, callback_args=(), callback_kwargs=None, errback_args=(), errback_kwargs=None) .. autoclass:: ResultSet () :members: From 0e25845502ce4fed12648c365189c7805dc28c1d Mon Sep 17 00:00:00 2001 From: Brad Schoening <5796692+bschoening@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:54:06 -0500 Subject: [PATCH 200/211] Removed backup(.bak) files (#1185) --- Jenkinsfile.bak | 873 ------------------------------------------------ build.yaml.bak | 264 --------------- 2 files changed, 1137 deletions(-) delete mode 100644 Jenkinsfile.bak delete mode 100644 build.yaml.bak diff --git a/Jenkinsfile.bak b/Jenkinsfile.bak deleted file mode 100644 index 87b20804ca..0000000000 --- a/Jenkinsfile.bak +++ /dev/null @@ -1,873 +0,0 @@ -#!groovy - -def initializeEnvironment() { - env.DRIVER_DISPLAY_NAME = 'Cassandra Python Driver' - env.DRIVER_METRIC_TYPE = 'oss' - if (env.GIT_URL.contains('riptano/python-driver')) { - env.DRIVER_DISPLAY_NAME = 'private ' + env.DRIVER_DISPLAY_NAME - env.DRIVER_METRIC_TYPE = 'oss-private' - } else if (env.GIT_URL.contains('python-dse-driver')) { - env.DRIVER_DISPLAY_NAME = 'DSE Python Driver' - env.DRIVER_METRIC_TYPE = 'dse' - } - - env.GIT_SHA = "${env.GIT_COMMIT.take(7)}" - env.GITHUB_PROJECT_URL = "https://${GIT_URL.replaceFirst(/(git@|http:\/\/|https:\/\/)/, '').replace(':', '/').replace('.git', '')}" - env.GITHUB_BRANCH_URL = "${GITHUB_PROJECT_URL}/tree/${env.BRANCH_NAME}" - env.GITHUB_COMMIT_URL = "${GITHUB_PROJECT_URL}/commit/${env.GIT_COMMIT}" - - sh label: 'Assign Python global environment', script: '''#!/bin/bash -lex - pyenv global ${PYTHON_VERSION} - ''' - - sh label: 'Install socat; required for unix socket tests', script: '''#!/bin/bash -lex - sudo apt-get install socat - ''' - - sh label: 'Install the latest setuptools', script: '''#!/bin/bash -lex - pip install --upgrade pip - pip install -U setuptools - ''' - - sh label: 'Install CCM', script: '''#!/bin/bash -lex - pip install ${HOME}/ccm - ''' - - // Determine if server version is Apache Cassandra� or DataStax Enterprise - if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { - sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex - pip install -r test-datastax-requirements.txt - ''' - } else { - sh label: 'Install Apache CassandraⓇ requirements', script: '''#!/bin/bash -lex - pip install -r test-requirements.txt - ''' - - sh label: 'Uninstall the geomet dependency since it is not required for Cassandra', script: '''#!/bin/bash -lex - pip uninstall -y geomet - ''' - - } - - sh label: 'Install unit test modules', script: '''#!/bin/bash -lex - pip install nose-ignore-docstring nose-exclude service_identity - ''' - - if (env.CYTHON_ENABLED == 'True') { - sh label: 'Install cython modules', script: '''#!/bin/bash -lex - pip install cython numpy - ''' - } - - sh label: 'Download Apache CassandraⓇ or DataStax Enterprise', script: '''#!/bin/bash -lex - . ${CCM_ENVIRONMENT_SHELL} ${CASSANDRA_VERSION} - ''' - - sh label: 'Display Python and environment information', script: '''#!/bin/bash -le - # Load CCM environment variables - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - python --version - pip --version - printenv | sort - ''' -} - -def installDriverAndCompileExtensions() { - if (env.CYTHON_ENABLED == 'True') { - sh label: 'Install the driver and compile with C extensions with Cython', script: '''#!/bin/bash -lex - python setup.py build_ext --inplace - ''' - } else { - sh label: 'Install the driver and compile with C extensions without Cython', script: '''#!/bin/bash -lex - python setup.py build_ext --inplace --no-cython - ''' - } -} - -def executeStandardTests() { - - sh label: 'Execute unit tests', script: '''#!/bin/bash -lex - # Load CCM environment variables - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true - EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true - EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true - ''' - - sh label: 'Execute Simulacron integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variables - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - SIMULACRON_JAR="${HOME}/simulacron.jar" - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true - - # Run backpressure tests separately to avoid memory issue - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_1_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_paused_connections || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_2_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_queued_requests_timeout || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_3_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_cluster_busy || true - SIMULACRON_JAR=${SIMULACRON_JAR} EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --exclude test_backpressure.py --xunit-file=simulacron_backpressure_4_results.xml tests/integration/simulacron/test_backpressure.py:TCPBackpressureTests.test_node_busy || true - ''' - - sh label: 'Execute CQL engine integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variables - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true - ''' - - sh label: 'Execute Apache CassandraⓇ integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variables - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true - ''' - - if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { - sh label: 'Execute DataStax Enterprise integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true - ''' - } - - sh label: 'Execute DataStax Constellation integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CLOUD_PROXY_PATH="${HOME}/proxy/" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true - ''' - - if (env.EXECUTE_LONG_TESTS == 'True') { - sh label: 'Execute long running integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true - ''' - } -} - -def executeDseSmokeTests() { - sh label: 'Execute profile DataStax Enterprise smoke test integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} DSE_VERSION=${DSE_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true - ''' -} - -def executeEventLoopTests() { - sh label: 'Execute profile event loop manager integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_TESTS=( - "tests/integration/standard/test_cluster.py" - "tests/integration/standard/test_concurrent.py" - "tests/integration/standard/test_connection.py" - "tests/integration/standard/test_control_connection.py" - "tests/integration/standard/test_metrics.py" - "tests/integration/standard/test_query.py" - "tests/integration/simulacron/test_endpoint.py" - "tests/integration/long/test_ssl.py" - ) - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} CCM_ARGS="${CCM_ARGS}" DSE_VERSION=${DSE_VERSION} CASSANDRA_VERSION=${CCM_CASSANDRA_VERSION} MAPPED_CASSANDRA_VERSION=${MAPPED_CASSANDRA_VERSION} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true - ''' -} - -def executeUpgradeTests() { - sh label: 'Execute profile upgrade integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP_MANAGER=${EVENT_LOOP_MANAGER} VERIFY_CYTHON=${CYTHON_ENABLED} nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=upgrade_results.xml tests/integration/upgrade || true - ''' -} - -def executeTests() { - switch(params.PROFILE) { - case 'DSE-SMOKE-TEST': - executeDseSmokeTests() - break - case 'EVENT-LOOP': - executeEventLoopTests() - break - case 'UPGRADE': - executeUpgradeTests() - break - default: - executeStandardTests() - break - } -} - -def notifySlack(status = 'started') { - // Set the global pipeline scoped environment (this is above each matrix) - env.BUILD_STATED_SLACK_NOTIFIED = 'true' - - def buildType = 'Commit' - if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { - buildType = "${params.CI_SCHEDULE.toLowerCase().capitalize()}" - } - - def color = 'good' // Green - if (status.equalsIgnoreCase('aborted')) { - color = '808080' // Grey - } else if (status.equalsIgnoreCase('unstable')) { - color = 'warning' // Orange - } else if (status.equalsIgnoreCase('failed')) { - color = 'danger' // Red - } - - def message = """Build ${status} for ${env.DRIVER_DISPLAY_NAME} [${buildType}] -<${env.GITHUB_BRANCH_URL}|${env.BRANCH_NAME}> - <${env.RUN_DISPLAY_URL}|#${env.BUILD_NUMBER}> - <${env.GITHUB_COMMIT_URL}|${env.GIT_SHA}>""" - if (params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION') { - message += " - ${params.CI_SCHEDULE_PYTHON_VERSION} - ${params.EVENT_LOOP_MANAGER}" - } - if (!status.equalsIgnoreCase('Started')) { - message += """ -${status} after ${currentBuild.durationString - ' and counting'}""" - } - - slackSend color: "${color}", - channel: "#python-driver-dev-bots", - message: "${message}" -} - -def submitCIMetrics(buildType) { - long durationMs = currentBuild.duration - long durationSec = durationMs / 1000 - long nowSec = (currentBuild.startTimeInMillis + durationMs) / 1000 - def branchNameNoPeriods = env.BRANCH_NAME.replaceAll('\\.', '_') - def durationMetric = "okr.ci.python.${env.DRIVER_METRIC_TYPE}.${buildType}.${branchNameNoPeriods} ${durationSec} ${nowSec}" - - timeout(time: 1, unit: 'MINUTES') { - withCredentials([string(credentialsId: 'lab-grafana-address', variable: 'LAB_GRAFANA_ADDRESS'), - string(credentialsId: 'lab-grafana-port', variable: 'LAB_GRAFANA_PORT')]) { - withEnv(["DURATION_METRIC=${durationMetric}"]) { - sh label: 'Send runtime metrics to labgrafana', script: '''#!/bin/bash -lex - echo "${DURATION_METRIC}" | nc -q 5 ${LAB_GRAFANA_ADDRESS} ${LAB_GRAFANA_PORT} - ''' - } - } - } -} - -def describePerCommitStage() { - script { - def type = 'standard' - def serverDescription = 'current Apache CassandaraⓇ and supported DataStax Enterprise versions' - if (env.BRANCH_NAME ==~ /long-python.*/) { - type = 'long' - } else if (env.BRANCH_NAME ==~ /dev-python.*/) { - type = 'dev' - } - - currentBuild.displayName = "Per-Commit (${env.EVENT_LOOP_MANAGER} | ${type.capitalize()})" - currentBuild.description = "Per-Commit build and ${type} testing of ${serverDescription} against Python v2.7.18 and v3.5.9 using ${env.EVENT_LOOP_MANAGER} event loop manager" - } - - sh label: 'Describe the python environment', script: '''#!/bin/bash -lex - python -V - pip freeze - ''' -} - -def describeScheduledTestingStage() { - script { - def type = params.CI_SCHEDULE.toLowerCase().capitalize() - def displayName = "${type} schedule (${env.EVENT_LOOP_MANAGER}" - if (env.CYTHON_ENABLED == 'True') { - displayName += " | Cython" - } - if (params.PROFILE != 'NONE') { - displayName += " | ${params.PROFILE}" - } - displayName += ")" - currentBuild.displayName = displayName - - def serverVersionDescription = "${params.CI_SCHEDULE_SERVER_VERSION.replaceAll(' ', ', ')} server version(s) in the matrix" - def pythonVersionDescription = "${params.CI_SCHEDULE_PYTHON_VERSION.replaceAll(' ', ', ')} Python version(s) in the matrix" - def description = "${type} scheduled testing using ${env.EVENT_LOOP_MANAGER} event loop manager" - if (env.CYTHON_ENABLED == 'True') { - description += ", with Cython enabled" - } - if (params.PROFILE != 'NONE') { - description += ", ${params.PROFILE} profile" - } - description += ", ${serverVersionDescription}, and ${pythonVersionDescription}" - currentBuild.description = description - } -} - -def describeAdhocTestingStage() { - script { - def serverType = params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[0] - def serverDisplayName = 'Apache CassandaraⓇ' - def serverVersion = " v${serverType}" - if (serverType == 'ALL') { - serverDisplayName = "all ${serverDisplayName} and DataStax Enterprise server versions" - serverVersion = '' - } else { - try { - serverVersion = " v${env.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION.split('-')[1]}" - } catch (e) { - ;; // no-op - } - if (serverType == 'dse') { - serverDisplayName = 'DataStax Enterprise' - } - } - def displayName = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION} for v${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION} (${env.EVENT_LOOP_MANAGER}" - if (env.CYTHON_ENABLED == 'True') { - displayName += " | Cython" - } - if (params.PROFILE != 'NONE') { - displayName += " | ${params.PROFILE}" - } - displayName += ")" - currentBuild.displayName = displayName - - def description = "Testing ${serverDisplayName} ${serverVersion} using ${env.EVENT_LOOP_MANAGER} against Python ${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" - if (env.CYTHON_ENABLED == 'True') { - description += ", with Cython" - } - if (params.PROFILE == 'NONE') { - if (params.EXECUTE_LONG_TESTS) { - description += ", with" - } else { - description += ", without" - } - description += " long tests executed" - } else { - description += ", ${params.PROFILE} profile" - } - currentBuild.description = description - } -} - -def branchPatternCron = ~"(master)" -def riptanoPatternCron = ~"(riptano)" - -pipeline { - agent none - - // Global pipeline timeout - options { - timeout(time: 10, unit: 'HOURS') - buildDiscarder(logRotator(artifactNumToKeepStr: '10', // Keep only the last 10 artifacts - numToKeepStr: '50')) // Keep only the last 50 build records - } - - parameters { - choice( - name: 'ADHOC_BUILD_TYPE', - choices: ['BUILD', 'BUILD-AND-EXECUTE-TESTS'], - description: '''

Perform a adhoc build operation

-
FULL All server versions, python runtimes tested with and without Cython.
DEVELOPSmaller matrix for dev purpose.
CASSANDRA All cassandra server versions.DEFAULT Default to the build context.
2.1Apache CassandraⓇ; v2.1.x
2.2Apache CassandarⓇ; v2.2.x
3.0 Apache CassandraⓇ v3.0.x4.0 Apache CassandraⓇ v4.0.x
dse-5.0.15DataStax Enterprise v5.0.x (Long Term Support)
dse-5.1.35 DataStax Enterprise v5.1.x
dse-6.0.18DataStax Enterprise v6.0.x
dse-6.7.17DataStax Enterprise v6.7.x
dse-6.8.30 DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
- - - - - - - - - - - - - - -
ChoiceDescription
BUILDPerforms a Per-Commit build
BUILD-AND-EXECUTE-TESTSPerforms a build and executes the integration and unit tests
''') - choice( - name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION', - choices: ['2.7.18', '3.4.10', '3.5.9', '3.6.10', '3.7.7', '3.8.3'], - description: 'Python version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY!') - choice( - name: 'ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION', - choices: ['2.1', // Legacy Apache CassandraⓇ - '2.2', // Legacy Apache CassandraⓇ - '3.0', // Previous Apache CassandraⓇ - '3.11', // Current Apache CassandraⓇ - '4.0', // Development Apache CassandraⓇ - 'dse-5.0', // Long Term Support DataStax Enterprise - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7', // Previous DataStax Enterprise - 'dse-6.8', // Current DataStax Enterprise - 'ALL'], - description: '''Apache CassandraⓇ and DataStax Enterprise server version to use for adhoc BUILD-AND-EXECUTE-TESTS ONLY! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ChoiceDescription
2.1Apache CassandaraⓇ; v2.1.x
2.2Apache CassandarⓇ; v2.2.x
3.0Apache CassandaraⓇ v3.0.x
3.11Apache CassandaraⓇ v3.11.x
4.0Apache CassandaraⓇ v4.x (CURRENTLY UNDER DEVELOPMENT)
dse-5.0DataStax Enterprise v5.0.x (Long Term Support)
dse-5.1DataStax Enterprise v5.1.x
dse-6.0DataStax Enterprise v6.0.x
dse-6.7DataStax Enterprise v6.7.x
dse-6.8DataStax Enterprise v6.8.x (CURRENTLY UNDER DEVELOPMENT)
''') - booleanParam( - name: 'CYTHON', - defaultValue: false, - description: 'Flag to determine if Cython should be enabled for scheduled or adhoc builds') - booleanParam( - name: 'EXECUTE_LONG_TESTS', - defaultValue: false, - description: 'Flag to determine if long integration tests should be executed for scheduled or adhoc builds') - choice( - name: 'EVENT_LOOP_MANAGER', - choices: ['LIBEV', 'GEVENT', 'EVENTLET', 'ASYNCIO', 'ASYNCORE', 'TWISTED'], - description: '''

Event loop manager to utilize for scheduled or adhoc builds

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ChoiceDescription
LIBEVA full-featured and high-performance event loop that is loosely modeled after libevent, but without its limitations and bugs
GEVENTA co-routine -based Python networking library that uses greenlet to provide a high-level synchronous API on top of the libev or libuv event loop
EVENTLETA concurrent networking library for Python that allows you to change how you run your code, not how you write it
ASYNCIOA library to write concurrent code using the async/await syntax
ASYNCOREA module provides the basic infrastructure for writing asynchronous socket service clients and servers
TWISTEDAn event-driven networking engine written in Python and licensed under the open source MIT license
''') - choice( - name: 'PROFILE', - choices: ['NONE', 'DSE-SMOKE-TEST', 'EVENT-LOOP', 'UPGRADE'], - description: '''

Profile to utilize for scheduled or adhoc builds

- - - - - - - - - - - - - - - - - - - - - - - -
ChoiceDescription
NONEExecute the standard tests for the driver
DSE-SMOKE-TESTExecute only the DataStax Enterprise smoke tests
EVENT-LOOPExecute only the event loop tests for the specified event loop manager (see: EVENT_LOOP_MANAGER)
UPGRADEExecute only the upgrade tests
''') - choice( - name: 'CI_SCHEDULE', - choices: ['DO-NOT-CHANGE-THIS-SELECTION', 'WEEKNIGHTS', 'WEEKENDS'], - description: 'CI testing schedule to execute periodically scheduled builds and tests of the driver (DO NOT CHANGE THIS SELECTION)') - string( - name: 'CI_SCHEDULE_PYTHON_VERSION', - defaultValue: 'DO-NOT-CHANGE-THIS-SELECTION', - description: 'CI testing python version to utilize for scheduled test runs of the driver (DO NOT CHANGE THIS SELECTION)') - string( - name: 'CI_SCHEDULE_SERVER_VERSION', - defaultValue: 'DO-NOT-CHANGE-THIS-SELECTION', - description: 'CI testing server version to utilize for scheduled test runs of the driver (DO NOT CHANGE THIS SELECTION)') - } - - triggers { - parameterizedCron((branchPatternCron.matcher(env.BRANCH_NAME).matches() && !riptanoPatternCron.matcher(GIT_URL).find()) ? """ - # Every weeknight (Monday - Friday) around 4:00 AM - # These schedules will run with and without Cython enabled for Python v2.7.18 and v3.5.9 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 1-5 %CI_SCHEDULE=WEEKNIGHTS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.2 3.11 dse-5.1 dse-6.0 dse-6.7 - - # Every Saturday around 12:00, 4:00 and 8:00 PM - # These schedules are for weekly libev event manager runs with and without Cython for most of the Python versions (excludes v3.5.9.x) - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.4.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - H 12 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=LIBEV;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 3.0 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly gevent event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 16 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=GEVENT;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly eventlet event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 20 * * 6 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=EVENTLET;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - - # Every Sunday around 12:00 and 4:00 AM - # These schedules are for weekly asyncore event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 0 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=ASYNCORE;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - # These schedules are for weekly twisted event manager event loop only runs with and without Cython for most of the Python versions (excludes v3.4.10.x) - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=2.7.18;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.5.9;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.6.10;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.7.7;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - H 4 * * 7 %CI_SCHEDULE=WEEKENDS;EVENT_LOOP_MANAGER=TWISTED;PROFILE=EVENT-LOOP;CI_SCHEDULE_PYTHON_VERSION=3.8.3;CI_SCHEDULE_SERVER_VERSION=2.1 2.2 3.0 3.11 dse-5.1 dse-6.0 dse-6.7 - """ : "") - } - - environment { - OS_VERSION = 'ubuntu/bionic64/python-driver' - CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" - EVENT_LOOP_MANAGER = "${params.EVENT_LOOP_MANAGER.toLowerCase()}" - EXECUTE_LONG_TESTS = "${params.EXECUTE_LONG_TESTS ? 'True' : 'False'}" - CCM_ENVIRONMENT_SHELL = '/usr/local/bin/ccm_environment.sh' - CCM_MAX_HEAP_SIZE = '1536M' - } - - stages { - stage ('Per-Commit') { - options { - timeout(time: 2, unit: 'HOURS') - } - when { - beforeAgent true - branch pattern: '((dev|long)-)?python-.*', comparator: 'REGEXP' - allOf { - expression { params.ADHOC_BUILD_TYPE == 'BUILD' } - expression { params.CI_SCHEDULE == 'DO-NOT-CHANGE-THIS-SELECTION' } - not { buildingTag() } - } - } - - matrix { - axes { - axis { - name 'CASSANDRA_VERSION' - values '3.11', // Current Apache Cassandra - 'dse-6.8' // Current DataStax Enterprise - } - axis { - name 'PYTHON_VERSION' - values '2.7.18', '3.5.9' - } - axis { - name 'CYTHON_ENABLED' - values 'False' - } - } - - agent { - label "${OS_VERSION}" - } - - stages { - stage('Initialize-Environment') { - steps { - initializeEnvironment() - script { - if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { - notifySlack() - } - } - } - } - stage('Describe-Build') { - steps { - describePerCommitStage() - } - } - stage('Install-Driver-And-Compile-Extensions') { - steps { - installDriverAndCompileExtensions() - } - } - stage('Execute-Tests') { - steps { - - script { - if (env.BRANCH_NAME ==~ /long-python.*/) { - withEnv(["EXECUTE_LONG_TESTS=True"]) { - executeTests() - } - } - else { - executeTests() - } - } - } - post { - always { - junit testResults: '*_results.xml' - } - } - } - } - } - post { - always { - node('master') { - submitCIMetrics('commit') - } - } - aborted { - notifySlack('aborted') - } - success { - notifySlack('completed') - } - unstable { - notifySlack('unstable') - } - failure { - notifySlack('FAILED') - } - } - } - - stage ('Scheduled-Testing') { - when { - beforeAgent true - allOf { - expression { params.ADHOC_BUILD_TYPE == 'BUILD' } - expression { params.CI_SCHEDULE != 'DO-NOT-CHANGE-THIS-SELECTION' } - not { buildingTag() } - } - } - matrix { - axes { - axis { - name 'CASSANDRA_VERSION' - values '2.1', // Legacy Apache Cassandra - '2.2', // Legacy Apache Cassandra - '3.0', // Previous Apache Cassandra - '3.11', // Current Apache Cassandra - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7' // Current DataStax Enterprise - } - axis { - name 'CYTHON_ENABLED' - values 'True', 'False' - } - } - when { - beforeAgent true - allOf { - expression { return params.CI_SCHEDULE_SERVER_VERSION.split(' ').any { it =~ /(ALL|${env.CASSANDRA_VERSION})/ } } - } - } - - environment { - PYTHON_VERSION = "${params.CI_SCHEDULE_PYTHON_VERSION}" - } - agent { - label "${OS_VERSION}" - } - - stages { - stage('Initialize-Environment') { - steps { - initializeEnvironment() - script { - if (env.BUILD_STATED_SLACK_NOTIFIED != 'true') { - notifySlack() - } - } - } - } - stage('Describe-Build') { - steps { - describeScheduledTestingStage() - } - } - stage('Install-Driver-And-Compile-Extensions') { - steps { - installDriverAndCompileExtensions() - } - } - stage('Execute-Tests') { - steps { - executeTests() - } - post { - always { - junit testResults: '*_results.xml' - } - } - } - } - } - post { - aborted { - notifySlack('aborted') - } - success { - notifySlack('completed') - } - unstable { - notifySlack('unstable') - } - failure { - notifySlack('FAILED') - } - } - } - - - stage('Adhoc-Testing') { - when { - beforeAgent true - allOf { - expression { params.ADHOC_BUILD_TYPE == 'BUILD-AND-EXECUTE-TESTS' } - not { buildingTag() } - } - } - - environment { - CYTHON_ENABLED = "${params.CYTHON ? 'True' : 'False'}" - PYTHON_VERSION = "${params.ADHOC_BUILD_AND_EXECUTE_TESTS_PYTHON_VERSION}" - } - - matrix { - axes { - axis { - name 'CASSANDRA_VERSION' - values '2.1', // Legacy Apache Cassandra - '2.2', // Legacy Apache Cassandra - '3.0', // Previous Apache Cassandra - '3.11', // Current Apache Cassandra - '4.0', // Development Apache Cassandra - 'dse-5.0', // Long Term Support DataStax Enterprise - 'dse-5.1', // Legacy DataStax Enterprise - 'dse-6.0', // Previous DataStax Enterprise - 'dse-6.7', // Current DataStax Enterprise - 'dse-6.8' // Development DataStax Enterprise - } - } - when { - beforeAgent true - allOf { - expression { params.ADHOC_BUILD_AND_EXECUTE_TESTS_SERVER_VERSION ==~ /(ALL|${env.CASSANDRA_VERSION})/ } - } - } - - agent { - label "${OS_VERSION}" - } - - stages { - stage('Describe-Build') { - steps { - describeAdhocTestingStage() - } - } - stage('Initialize-Environment') { - steps { - initializeEnvironment() - } - } - stage('Install-Driver-And-Compile-Extensions') { - steps { - installDriverAndCompileExtensions() - } - } - stage('Execute-Tests') { - steps { - executeTests() - } - post { - always { - junit testResults: '*_results.xml' - } - } - } - } - } - } - } -} diff --git a/build.yaml.bak b/build.yaml.bak deleted file mode 100644 index 100c86558a..0000000000 --- a/build.yaml.bak +++ /dev/null @@ -1,264 +0,0 @@ -schedules: - nightly_master: - schedule: nightly - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='libev' - matrix: - exclude: - - python: [3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', '4.0', 'test-dse'] - - commit_long_test: - schedule: per_commit - disable_pull_requests: true - branches: - include: [/long-python.*/] - env_vars: | - EVENT_LOOP_MANAGER='libev' - matrix: - exclude: - - python: [3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] - - commit_branches: - schedule: per_commit - disable_pull_requests: true - branches: - include: [/python.*/] - env_vars: | - EVENT_LOOP_MANAGER='libev' - EXCLUDE_LONG=1 - matrix: - exclude: - - python: [3.6, 3.7, 3.8] - - cassandra: ['2.1', '3.0', 'test-dse'] - - commit_branches_dev: - schedule: per_commit - disable_pull_requests: true - branches: - include: [/dev-python.*/] - env_vars: | - EVENT_LOOP_MANAGER='libev' - EXCLUDE_LONG=1 - matrix: - exclude: - - python: [2.7, 3.7, 3.6, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse', 'dse-4.8', 'dse-5.0', 'dse-6.0', 'dse-6.8'] - - release_test: - schedule: per_commit - disable_pull_requests: true - branches: - include: [/release-.+/] - env_vars: | - EVENT_LOOP_MANAGER='libev' - - weekly_master: - schedule: 0 10 * * 6 - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='libev' - matrix: - exclude: - - python: [3.5] - - cassandra: ['2.2', '3.1'] - - weekly_gevent: - schedule: 0 14 * * 6 - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='gevent' - JUST_EVENT_LOOP=1 - - weekly_eventlet: - schedule: 0 18 * * 6 - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='eventlet' - JUST_EVENT_LOOP=1 - - weekly_asyncio: - schedule: 0 22 * * 6 - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='asyncio' - JUST_EVENT_LOOP=1 - matrix: - exclude: - - python: [2.7] - - weekly_async: - schedule: 0 10 * * 7 - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='asyncore' - JUST_EVENT_LOOP=1 - - weekly_twister: - schedule: 0 14 * * 7 - disable_pull_requests: true - branches: - include: [master] - env_vars: | - EVENT_LOOP_MANAGER='twisted' - JUST_EVENT_LOOP=1 - - upgrade_tests: - schedule: adhoc - branches: - include: [master, python-546] - env_vars: | - EVENT_LOOP_MANAGER='libev' - JUST_UPGRADE=True - matrix: - exclude: - - python: [3.6, 3.7, 3.8] - - cassandra: ['2.0', '2.1', '2.2', '3.0', '4.0', 'test-dse'] - -python: - - 2.7 - - 3.5 - - 3.6 - - 3.7 - - 3.8 - -os: - - ubuntu/bionic64/python-driver - -cassandra: - - '2.1' - - '2.2' - - '3.0' - - '3.11' - - '4.0' - - 'dse-4.8' - - 'dse-5.0' - - 'dse-5.1' - - 'dse-6.0' - - 'dse-6.7' - - 'dse-6.8.0' - -env: - CYTHON: - - CYTHON - - NO_CYTHON - -build: - - script: | - export JAVA_HOME=$CCM_JAVA_HOME - export PATH=$JAVA_HOME/bin:$PATH - export PYTHONPATH="" - export CCM_MAX_HEAP_SIZE=1024M - - # Required for unix socket tests - sudo apt-get install socat - - # Install latest setuptools - pip install --upgrade pip - pip install -U setuptools - - pip install git+ssh://git@github.com/riptano/ccm-private.git@cassandra-7544-native-ports-with-dse-fix - - #pip install $HOME/ccm - - if [ -n "$CCM_IS_DSE" ]; then - pip install -r test-datastax-requirements.txt - else - pip install -r test-requirements.txt - fi - - pip install nose-ignore-docstring - pip install nose-exclude - pip install service_identity - - FORCE_CYTHON=False - if [[ $CYTHON == 'CYTHON' ]]; then - FORCE_CYTHON=True - pip install cython - pip install numpy - # Install the driver & compile C extensions - python setup.py build_ext --inplace - else - # Install the driver & compile C extensions with no cython - python setup.py build_ext --inplace --no-cython - fi - - echo "JUST_UPGRADE: $JUST_UPGRADE" - if [[ $JUST_UPGRADE == 'True' ]]; then - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=upgrade_results.xml tests/integration/upgrade || true - exit 0 - fi - - if [[ $JUST_SMOKE == 'true' ]]; then - # When we ONLY want to run the smoke tests - echo "JUST_SMOKE: $JUST_SMOKE" - echo "==========RUNNING SMOKE TESTS===========" - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CCM_ARGS="$CCM_ARGS" CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION DSE_VERSION='6.7.0' MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/test_dse.py || true - exit 0 - fi - - # Run the unit tests, this is not done in travis because - # it takes too much time for the whole matrix to build with cython - if [[ $CYTHON == 'CYTHON' ]]; then - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER VERIFY_CYTHON=1 nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_results.xml tests/unit/ || true - EVENT_LOOP_MANAGER=eventlet VERIFY_CYTHON=1 nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_eventlet_results.xml tests/unit/io/test_eventletreactor.py || true - EVENT_LOOP_MANAGER=gevent VERIFY_CYTHON=1 nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=unit_gevent_results.xml tests/unit/io/test_geventreactor.py || true - fi - - if [ -n "$JUST_EVENT_LOOP" ]; then - echo "Running integration event loop subset with $EVENT_LOOP_MANAGER" - EVENT_LOOP_TESTS=( - "tests/integration/standard/test_cluster.py" - "tests/integration/standard/test_concurrent.py" - "tests/integration/standard/test_connection.py" - "tests/integration/standard/test_control_connection.py" - "tests/integration/standard/test_metrics.py" - "tests/integration/standard/test_query.py" - "tests/integration/simulacron/test_endpoint.py" - "tests/integration/long/test_ssl.py" - ) - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CCM_ARGS="$CCM_ARGS" DSE_VERSION=$DSE_VERSION CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml ${EVENT_LOOP_TESTS[@]} || true - exit 0 - fi - - echo "Running with event loop manager: $EVENT_LOOP_MANAGER" - echo "==========RUNNING SIMULACRON TESTS==========" - SIMULACRON_JAR="$HOME/simulacron.jar" - SIMULACRON_JAR=$SIMULACRON_JAR EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CASSANDRA_DIR=$CCM_INSTALL_DIR CCM_ARGS="$CCM_ARGS" DSE_VERSION=$DSE_VERSION CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=simulacron_results.xml tests/integration/simulacron/ || true - - echo "Running with event loop manager: $EVENT_LOOP_MANAGER" - echo "==========RUNNING CQLENGINE TESTS==========" - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CCM_ARGS="$CCM_ARGS" DSE_VERSION=$DSE_VERSION CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=cqle_results.xml tests/integration/cqlengine/ || true - - echo "==========RUNNING INTEGRATION TESTS==========" - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CCM_ARGS="$CCM_ARGS" DSE_VERSION=$DSE_VERSION CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=standard_results.xml tests/integration/standard/ || true - - if [ -n "$DSE_VERSION" ] && ! [[ $DSE_VERSION == "4.8"* ]]; then - echo "==========RUNNING DSE INTEGRATION TESTS==========" - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CASSANDRA_DIR=$CCM_INSTALL_DIR DSE_VERSION=$DSE_VERSION ADS_HOME=$HOME/ VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true - fi - - echo "==========RUNNING CLOUD TESTS==========" - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CLOUD_PROXY_PATH="$HOME/proxy/" CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=advanced_results.xml tests/integration/cloud/ || true - - if [ -z "$EXCLUDE_LONG" ]; then - echo "==========RUNNING LONG INTEGRATION TESTS==========" - EVENT_LOOP_MANAGER=$EVENT_LOOP_MANAGER CCM_ARGS="$CCM_ARGS" DSE_VERSION=$DSE_VERSION CASSANDRA_VERSION=$CCM_CASSANDRA_VERSION MAPPED_CASSANDRA_VERSION=$MAPPED_CASSANDRA_VERSION VERIFY_CYTHON=$FORCE_CYTHON nosetests -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --exclude-dir=tests/integration/long/upgrade --with-ignore-docstrings --with-xunit --xunit-file=long_results.xml tests/integration/long/ || true - fi - - - xunit: - - "*_results.xml" From 3ca41e2115f58277c80732eaa892a9b860126de7 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 10 Nov 2023 16:26:57 -0600 Subject: [PATCH 201/211] PYTHON-1366 Handle removal of asyncore in Python 3.12 (#1187) --- cassandra/__init__.py | 16 +++++ cassandra/cluster.py | 67 +++++++++++++------ cassandra/io/asyncorereactor.py | 10 ++- cassandra/io/libevreactor.py | 4 +- tests/__init__.py | 16 +++-- tests/integration/standard/test_connection.py | 15 +++-- 6 files changed, 96 insertions(+), 32 deletions(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index b048bd9358..4398c86f69 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -728,3 +728,19 @@ class UnresolvableContactPoints(DriverException): contact points, only when lookup fails for all hosts """ pass + +class DependencyException(Exception): + """ + Specific exception class for handling issues with driver dependencies + """ + + excs = [] + """ + A sequence of child exceptions + """ + + def __init__(self, msg, excs=[]): + complete_msg = msg + if excs: + complete_msg += ("The following exceptions were observed: \n" + '\n'.join(str(e) for e in excs)) + Exception.__init__(self, complete_msg) \ No newline at end of file diff --git a/cassandra/cluster.py b/cassandra/cluster.py index 6514838050..e0c09ca64f 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -24,7 +24,7 @@ from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor, FIRST_COMPLETED, wait as wait_futures from copy import copy -from functools import partial, wraps +from functools import partial, reduce, wraps from itertools import groupby, count, chain import json import logging @@ -44,7 +44,7 @@ from cassandra import (ConsistencyLevel, AuthenticationFailed, OperationTimedOut, UnsupportedOperation, SchemaTargetType, DriverException, ProtocolVersion, - UnresolvableContactPoints) + UnresolvableContactPoints, DependencyException) from cassandra.auth import _proxy_execute_key, PlainTextAuthProvider from cassandra.connection import (ConnectionException, ConnectionShutdown, ConnectionHeartbeat, ProtocolVersionUnsupported, @@ -111,6 +111,19 @@ except ImportError: from cassandra.util import WeakSet # NOQA +def _is_gevent_monkey_patched(): + if 'gevent.monkey' not in sys.modules: + return False + import gevent.socket + return socket.socket is gevent.socket.socket + +def _try_gevent_import(): + if _is_gevent_monkey_patched(): + from cassandra.io.geventreactor import GeventConnection + return (GeventConnection,None) + else: + return (None,None) + def _is_eventlet_monkey_patched(): if 'eventlet.patcher' not in sys.modules: return False @@ -121,26 +134,42 @@ def _is_eventlet_monkey_patched(): except AttributeError: return False +def _try_eventlet_import(): + if _is_eventlet_monkey_patched(): + from cassandra.io.eventletreactor import EventletConnection + return (EventletConnection,None) + else: + return (None,None) -def _is_gevent_monkey_patched(): - if 'gevent.monkey' not in sys.modules: - return False - import gevent.socket - return socket.socket is gevent.socket.socket - +def _try_libev_import(): + try: + from cassandra.io.libevreactor import LibevConnection + return (LibevConnection,None) + except DependencyException as e: + return (None, e) -# default to gevent when we are monkey patched with gevent, eventlet when -# monkey patched with eventlet, otherwise if libev is available, use that as -# the default because it's fastest. Otherwise, use asyncore. -if _is_gevent_monkey_patched(): - from cassandra.io.geventreactor import GeventConnection as DefaultConnection -elif _is_eventlet_monkey_patched(): - from cassandra.io.eventletreactor import EventletConnection as DefaultConnection -else: +def _try_asyncore_import(): try: - from cassandra.io.libevreactor import LibevConnection as DefaultConnection # NOQA - except ImportError: - from cassandra.io.asyncorereactor import AsyncoreConnection as DefaultConnection # NOQA + from cassandra.io.asyncorereactor import AsyncoreConnection + return (AsyncoreConnection,None) + except DependencyException as e: + return (None, e) + +def _connection_reduce_fn(val,import_fn): + (rv, excs) = val + # If we've already found a workable Connection class return immediately + if rv: + return val + (import_result, exc) = import_fn() + if exc: + excs.append(exc) + return (rv or import_result, excs) + +conn_fns = (_try_gevent_import, _try_eventlet_import, _try_libev_import, _try_asyncore_import) +(conn_class, excs) = reduce(_connection_reduce_fn, conn_fns, (None,[])) +if excs: + raise DependencyException("Exception loading connection class dependencies", excs) +DefaultConnection = conn_class # Forces load of utf8 encoding module to avoid deadlock that occurs # if code that is being imported tries to import the module in a seperate diff --git a/cassandra/io/asyncorereactor.py b/cassandra/io/asyncorereactor.py index a45d657828..a50b719c5d 100644 --- a/cassandra/io/asyncorereactor.py +++ b/cassandra/io/asyncorereactor.py @@ -30,7 +30,15 @@ except ImportError: from cassandra.util import WeakSet # noqa -import asyncore +from cassandra import DependencyException +try: + import asyncore +except ModuleNotFoundError: + raise DependencyException( + "Unable to import asyncore module. Note that this module has been removed in Python 3.12 " + "so when using the driver with this version (or anything newer) you will need to use one of the " + "other event loop implementations." + ) from cassandra.connection import Connection, ConnectionShutdown, NONBLOCKING, Timer, TimerManager diff --git a/cassandra/io/libevreactor.py b/cassandra/io/libevreactor.py index 484690da89..4d4098ca7b 100644 --- a/cassandra/io/libevreactor.py +++ b/cassandra/io/libevreactor.py @@ -21,13 +21,13 @@ from threading import Lock, Thread import time - +from cassandra import DependencyException from cassandra.connection import (Connection, ConnectionShutdown, NONBLOCKING, Timer, TimerManager) try: import cassandra.io.libevwrapper as libev except ImportError: - raise ImportError( + raise DependencyException( "The C extension needed to use libev was not found. This " "probably means that you didn't have the required build dependencies " "when installing the driver. See " diff --git a/tests/__init__.py b/tests/__init__.py index 48c589c424..4735bbd383 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -20,6 +20,8 @@ import os from concurrent.futures import ThreadPoolExecutor +from cassandra import DependencyException + log = logging.getLogger() log.setLevel('DEBUG') # if nose didn't already attach a log handler, add one here @@ -32,9 +34,12 @@ def is_eventlet_monkey_patched(): if 'eventlet.patcher' not in sys.modules: return False - import eventlet.patcher - return eventlet.patcher.is_monkey_patched('socket') - + try: + import eventlet.patcher + return eventlet.patcher.is_monkey_patched('socket') + # Yet another case related to PYTHON-1364 + except AttributeError: + return False def is_gevent_monkey_patched(): if 'gevent.monkey' not in sys.modules: @@ -86,17 +91,18 @@ def is_monkey_patched(): elif "asyncio" in EVENT_LOOP_MANAGER: from cassandra.io.asyncioreactor import AsyncioConnection connection_class = AsyncioConnection - else: + log.debug("Using default event loop (libev)") try: from cassandra.io.libevreactor import LibevConnection connection_class = LibevConnection - except ImportError as e: + except DependencyException as e: log.debug('Could not import LibevConnection, ' 'using connection_class=None; ' 'failed with error:\n {}'.format( repr(e) )) + log.debug("Will attempt to set connection class at cluster initialization") connection_class = None diff --git a/tests/integration/standard/test_connection.py b/tests/integration/standard/test_connection.py index 3323baf20b..463080fc32 100644 --- a/tests/integration/standard/test_connection.py +++ b/tests/integration/standard/test_connection.py @@ -23,12 +23,9 @@ import time from unittest import SkipTest -from cassandra import ConsistencyLevel, OperationTimedOut +from cassandra import ConsistencyLevel, OperationTimedOut, DependencyException from cassandra.cluster import NoHostAvailable, ConnectionShutdown, ExecutionProfile, EXEC_PROFILE_DEFAULT -import cassandra.io.asyncorereactor -from cassandra.io.asyncorereactor import AsyncoreConnection from cassandra.protocol import QueryMessage -from cassandra.connection import Connection from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, HostStateListener from cassandra.pool import HostConnectionPool @@ -36,10 +33,16 @@ from tests.integration import use_singledc, get_node, CASSANDRA_IP, local, \ requiresmallclockgranularity, greaterthancass20, TestCluster +try: + import cassandra.io.asyncorereactor + from cassandra.io.asyncorereactor import AsyncoreConnection +except DependencyException: + AsyncoreConnection = None + try: from cassandra.io.libevreactor import LibevConnection import cassandra.io.libevreactor -except ImportError: +except DependencyException: LibevConnection = None @@ -440,6 +443,8 @@ class AsyncoreConnectionTests(ConnectionTests, unittest.TestCase): def setUp(self): if is_monkey_patched(): raise unittest.SkipTest("Can't test asyncore with monkey patching") + if AsyncoreConnection is None: + raise unittest.SkipTest('Unable to import asyncore module') ConnectionTests.setUp(self) def clean_global_loop(self): From cea6e006e63ee20b5558d1e355286301adafa2d2 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 13 Nov 2023 09:22:55 -0600 Subject: [PATCH 202/211] PYTHON-1368 Avoid installing DSE deps + executing DSE tests for Python 3.12 (#1188) --- Jenkinsfile | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7e4a3c4761..d654558b8c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -177,9 +177,14 @@ def initializeEnvironment() { // Determine if server version is Apache CassandraⓇ or DataStax Enterprise if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { - sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex - pip install -r test-datastax-requirements.txt - ''' + if (env.PYTHON_VERSION =~ /3\.12\.\d+/) { + echo "Cannot install DSE dependencies for Python 3.12.x. See PYTHON-1368 for more detail." + } + else { + sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex + pip install -r test-datastax-requirements.txt + ''' + } } else { sh label: 'Install Apache CassandraⓇ requirements', script: '''#!/bin/bash -lex pip install -r test-requirements.txt @@ -292,17 +297,22 @@ def executeStandardTests() { ''' if (env.CASSANDRA_VERSION.split('-')[0] == 'dse' && env.CASSANDRA_VERSION.split('-')[1] != '4.8') { - sh label: 'Execute DataStax Enterprise integration tests', script: '''#!/bin/bash -lex - # Load CCM environment variable - set -o allexport - . ${HOME}/environment.txt - set +o allexport - - EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true - ''' + if (env.PYTHON_VERSION =~ /3\.12\.\d+/) { + echo "Cannot install DSE dependencies for Python 3.12.x. See PYTHON-1368 for more detail." + } + else { + sh label: 'Execute DataStax Enterprise integration tests', script: '''#!/bin/bash -lex + # Load CCM environment variable + set -o allexport + . ${HOME}/environment.txt + set +o allexport + + EVENT_LOOP=${EVENT_LOOP} CASSANDRA_DIR=${CCM_INSTALL_DIR} DSE_VERSION=${DSE_VERSION} ADS_HOME="${HOME}/" VERIFY_CYTHON=${CYTHON_ENABLED} pynose -s -v --logging-format="[%(levelname)s] %(asctime)s %(thread)d: %(message)s" --with-ignore-docstrings --with-xunit --xunit-file=dse_results.xml tests/integration/advanced/ || true + ''' + } } - sh label: 'Execute DataStax Constellation integration tests', script: '''#!/bin/bash -lex + sh label: 'Execute DataStax Astra integration tests', script: '''#!/bin/bash -lex # Load CCM environment variable set -o allexport . ${HOME}/environment.txt From 120277da36f880ac6a5508480144fe436bf0d8c1 Mon Sep 17 00:00:00 2001 From: Brad Schoening <5796692+bschoening@users.noreply.github.com> Date: Mon, 13 Nov 2023 15:12:14 -0500 Subject: [PATCH 203/211] Remove outdated Python pre-3.7 references (#1186) --- README-dev.rst | 2 +- test-requirements.txt | 7 ++----- tests/integration/long/test_ssl.py | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/README-dev.rst b/README-dev.rst index 5c0555f3a7..bcc1777ac8 100644 --- a/README-dev.rst +++ b/README-dev.rst @@ -176,7 +176,7 @@ Use tee to capture logs and see them on your terminal:: Testing Multiple Python Versions -------------------------------- -If you want to test all of python 2.7, 3.5, 3.6, 3.7, and pypy, use tox (this is what +If you want to test all of python 3.7, 3.8, and pypy, use tox (this is what TravisCI runs):: tox diff --git a/test-requirements.txt b/test-requirements.txt index 996cf4341f..7d3c021240 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -6,13 +6,10 @@ ccm>=2.1.2 pytz sure pure-sasl -twisted[tls]; python_version >= '3.5' -twisted[tls]==19.2.1; python_version < '3.5' +twisted[tls] gevent>=1.0 eventlet cython>=0.20,<0.30 packaging -backports.ssl_match_hostname; python_version < '2.7.9' futurist; python_version >= '3.7' -asynctest; python_version >= '3.5' -ipaddress; python_version < '3.3.0' +asynctest diff --git a/tests/integration/long/test_ssl.py b/tests/integration/long/test_ssl.py index 69285001f8..0e39cb21ad 100644 --- a/tests/integration/long/test_ssl.py +++ b/tests/integration/long/test_ssl.py @@ -28,7 +28,7 @@ if not hasattr(ssl, 'match_hostname'): try: - from backports.ssl_match_hostname import match_hostname + from ssl import match_hostname ssl.match_hostname = match_hostname except ImportError: pass # tests will fail From e90c0f5d71f4cac94ed80ed72c8789c0818e11d0 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Sun, 17 Dec 2023 23:54:16 -0600 Subject: [PATCH 204/211] PYTHON-1371 Add explicit exception type for serialization failures (#1193) --- cassandra/__init__.py | 8 ++++- cassandra/cqltypes.py | 17 ++++++++-- tests/unit/test_types.py | 73 ++++++++++++++++++++++++++++++++++------ 3 files changed, 84 insertions(+), 14 deletions(-) diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 4398c86f69..8b4b6f1a1b 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -743,4 +743,10 @@ def __init__(self, msg, excs=[]): complete_msg = msg if excs: complete_msg += ("The following exceptions were observed: \n" + '\n'.join(str(e) for e in excs)) - Exception.__init__(self, complete_msg) \ No newline at end of file + Exception.__init__(self, complete_msg) + +class VectorDeserializationFailure(DriverException): + """ + The driver was unable to deserialize a given vector + """ + pass diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index d1d7e888f9..b413b1c9e5 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -49,7 +49,7 @@ float_pack, float_unpack, double_pack, double_unpack, varint_pack, varint_unpack, point_be, point_le, vints_pack, vints_unpack) -from cassandra import util +from cassandra import util, VectorDeserializationFailure _little_endian_flag = 1 # we always serialize LE import ipaddress @@ -461,6 +461,7 @@ def serialize(uuid, protocol_version): class BooleanType(_CassandraType): typename = 'boolean' + serial_size = 1 @staticmethod def deserialize(byts, protocol_version): @@ -500,6 +501,7 @@ def serialize(var, protocol_version): class FloatType(_CassandraType): typename = 'float' + serial_size = 4 @staticmethod def deserialize(byts, protocol_version): @@ -512,6 +514,7 @@ def serialize(byts, protocol_version): class DoubleType(_CassandraType): typename = 'double' + serial_size = 8 @staticmethod def deserialize(byts, protocol_version): @@ -524,6 +527,7 @@ def serialize(byts, protocol_version): class LongType(_CassandraType): typename = 'bigint' + serial_size = 8 @staticmethod def deserialize(byts, protocol_version): @@ -536,6 +540,7 @@ def serialize(byts, protocol_version): class Int32Type(_CassandraType): typename = 'int' + serial_size = 4 @staticmethod def deserialize(byts, protocol_version): @@ -648,6 +653,7 @@ class TimestampType(DateType): class TimeUUIDType(DateType): typename = 'timeuuid' + serial_size = 16 def my_timestamp(self): return util.unix_time_from_uuid1(self.val) @@ -694,6 +700,7 @@ def serialize(val, protocol_version): class ShortType(_CassandraType): typename = 'smallint' + serial_size = 2 @staticmethod def deserialize(byts, protocol_version): @@ -706,6 +713,7 @@ def serialize(byts, protocol_version): class TimeType(_CassandraType): typename = 'time' + serial_size = 8 @staticmethod def deserialize(byts, protocol_version): @@ -1411,8 +1419,11 @@ def apply_parameters(cls, params, names): @classmethod def deserialize(cls, byts, protocol_version): - indexes = (4 * x for x in range(0, cls.vector_size)) - return [cls.subtype.deserialize(byts[idx:idx + 4], protocol_version) for idx in indexes] + serialized_size = getattr(cls.subtype, "serial_size", None) + if not serialized_size: + raise VectorDeserializationFailure("Cannot determine serialized size for vector with subtype %s" % cls.subtype.__name__) + indexes = (serialized_size * x for x in range(0, cls.vector_size)) + return [cls.subtype.deserialize(byts[idx:idx + serialized_size], protocol_version) for idx in indexes] @classmethod def serialize(cls, v, protocol_version): diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index a06bbd452d..5db7f087b7 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -16,10 +16,11 @@ import datetime import tempfile import time +import uuid from binascii import unhexlify import cassandra -from cassandra import util +from cassandra import util, VectorDeserializationFailure from cassandra.cqltypes import ( CassandraType, DateRangeType, DateType, DecimalType, EmptyValue, LongType, SetType, UTF8Type, @@ -308,15 +309,67 @@ def test_cql_quote(self): self.assertEqual(cql_quote('test'), "'test'") self.assertEqual(cql_quote(0), '0') - def test_vector_round_trip(self): - base = [3.4, 2.9, 41.6, 12.0] - ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") - base_bytes = ctype.serialize(base, 0) - self.assertEqual(16, len(base_bytes)) - result = ctype.deserialize(base_bytes, 0) - self.assertEqual(len(base), len(result)) - for idx in range(0,len(base)): - self.assertAlmostEqual(base[idx], result[idx], places=5) + def test_vector_round_trip_types_with_serialized_size(self): + # Test all the types which specify a serialized size... see PYTHON-1371 for details + self._round_trip_test([True, False, False, True], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.BooleanType, 4)") + self._round_trip_test([3.4, 2.9, 41.6, 12.0], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") + self._round_trip_test([3.4, 2.9, 41.6, 12.0], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.DoubleType, 4)") + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.LongType, 4)") + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.Int32Type, 4)") + self._round_trip_test([uuid.uuid1(), uuid.uuid1(), uuid.uuid1(), uuid.uuid1()], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.TimeUUIDType, 4)") + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.ShortType, 4)") + self._round_trip_test([datetime.time(1,1,1), datetime.time(2,2,2), datetime.time(3,3,3)], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.TimeType, 3)") + + def test_vector_round_trip_types_without_serialized_size(self): + # Test all the types which do not specify a serialized size... see PYTHON-1371 for details + # Varints + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.IntegerType, 4)") + # ASCII text + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test(["abc", "def", "ghi", "jkl"], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.AsciiType, 4)") + # UTF8 text + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test(["abc", "def", "ghi", "jkl"], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.UTF8Type, 4)") + # Duration (containts varints) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([util.Duration(1,1,1), util.Duration(2,2,2), util.Duration(3,3,3)], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.DurationType, 3)") + # List (of otherwise serializable type) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([[3.4], [2.9], [41.6], [12.0]], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.FloatType), 4)") + # Set (of otherwise serializable type) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([set([3.4]), set([2.9]), set([41.6]), set([12.0])], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.FloatType), 4)") + # Map (of otherwise serializable types) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([{1:3.4}, {2:2.9}, {3:41.6}, {4:12.0}], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.MapType \ + (org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.FloatType), 4)") + + def _round_trip_test(self, data, ctype_str): + ctype = parse_casstype_args(ctype_str) + data_bytes = ctype.serialize(data, 0) + serialized_size = getattr(ctype.subtype, "serial_size", None) + if serialized_size: + self.assertEqual(serialized_size * len(data), len(data_bytes)) + result = ctype.deserialize(data_bytes, 0) + self.assertEqual(len(data), len(result)) + for idx in range(0,len(data)): + self.assertAlmostEqual(data[idx], result[idx], places=5) def test_vector_cql_parameterized_type(self): ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") From 8ff0ba0db62512cdbd868b809ac15a16e01ef94b Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Mon, 18 Dec 2023 23:35:24 -0600 Subject: [PATCH 205/211] PYTHON-1331 ssl.match_hostname() is deprecated in 3.7 (#1191) --- Jenkinsfile | 8 +++- cassandra/__init__.py | 2 +- cassandra/cluster.py | 20 +++++--- cassandra/connection.py | 84 ++++++++++++++++++++++++--------- cassandra/io/eventletreactor.py | 15 +++--- test-requirements.txt | 2 +- 6 files changed, 91 insertions(+), 40 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d654558b8c..fdc5e74269 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -178,7 +178,10 @@ def initializeEnvironment() { // Determine if server version is Apache CassandraⓇ or DataStax Enterprise if (env.CASSANDRA_VERSION.split('-')[0] == 'dse') { if (env.PYTHON_VERSION =~ /3\.12\.\d+/) { - echo "Cannot install DSE dependencies for Python 3.12.x. See PYTHON-1368 for more detail." + echo "Cannot install DSE dependencies for Python 3.12.x; installing Apache CassandraⓇ requirements only. See PYTHON-1368 for more detail." + sh label: 'Install Apache CassandraⓇ requirements', script: '''#!/bin/bash -lex + pip install -r test-requirements.txt + ''' } else { sh label: 'Install DataStax Enterprise requirements', script: '''#!/bin/bash -lex @@ -196,7 +199,8 @@ def initializeEnvironment() { } sh label: 'Install unit test modules', script: '''#!/bin/bash -lex - pip install pynose nose-ignore-docstring nose-exclude service_identity + pip install --no-deps nose-ignore-docstring nose-exclude + pip install service_identity ''' if (env.CYTHON_ENABLED == 'True') { diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 8b4b6f1a1b..8d453f5975 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -742,7 +742,7 @@ class DependencyException(Exception): def __init__(self, msg, excs=[]): complete_msg = msg if excs: - complete_msg += ("The following exceptions were observed: \n" + '\n'.join(str(e) for e in excs)) + complete_msg += ("\nThe following exceptions were observed: \n - " + '\n - '.join(str(e) for e in excs)) Exception.__init__(self, complete_msg) class VectorDeserializationFailure(DriverException): diff --git a/cassandra/cluster.py b/cassandra/cluster.py index e0c09ca64f..d5f80290a9 100644 --- a/cassandra/cluster.py +++ b/cassandra/cluster.py @@ -165,10 +165,12 @@ def _connection_reduce_fn(val,import_fn): excs.append(exc) return (rv or import_result, excs) +log = logging.getLogger(__name__) + conn_fns = (_try_gevent_import, _try_eventlet_import, _try_libev_import, _try_asyncore_import) (conn_class, excs) = reduce(_connection_reduce_fn, conn_fns, (None,[])) -if excs: - raise DependencyException("Exception loading connection class dependencies", excs) +if not conn_class: + raise DependencyException("Unable to load a default connection class", excs) DefaultConnection = conn_class # Forces load of utf8 encoding module to avoid deadlock that occurs @@ -177,8 +179,6 @@ def _connection_reduce_fn(val,import_fn): # See http://bugs.python.org/issue10923 "".encode('utf8') -log = logging.getLogger(__name__) - DEFAULT_MIN_REQUESTS = 5 DEFAULT_MAX_REQUESTS = 100 @@ -811,9 +811,9 @@ def default_retry_policy(self, policy): Using ssl_options without ssl_context is deprecated and will be removed in the next major release. - An optional dict which will be used as kwargs for ``ssl.SSLContext.wrap_socket`` (or - ``ssl.wrap_socket()`` if used without ssl_context) when new sockets are created. - This should be used when client encryption is enabled in Cassandra. + An optional dict which will be used as kwargs for ``ssl.SSLContext.wrap_socket`` + when new sockets are created. This should be used when client encryption is enabled + in Cassandra. The following documentation only applies when ssl_options is used without ssl_context. @@ -829,6 +829,12 @@ def default_retry_policy(self, policy): should almost always require the option ``'cert_reqs': ssl.CERT_REQUIRED``. Note also that this functionality was not built into Python standard library until (2.7.9, 3.2). To enable this mechanism in earlier versions, patch ``ssl.match_hostname`` with a custom or `back-ported function `_. + + .. versionchanged:: 3.29.0 + + ``ssl.match_hostname`` has been deprecated since Python 3.7 (and removed in Python 3.12). This functionality is now implemented + via ``ssl.SSLContext.check_hostname``. All options specified above (including ``check_hostname``) should continue to behave in a + way that is consistent with prior implementations. """ ssl_context = None diff --git a/cassandra/connection.py b/cassandra/connection.py index 195c93c889..bfe38fc702 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -733,7 +733,6 @@ class Connection(object): _socket = None _socket_impl = socket - _ssl_impl = ssl _check_hostname = False _product_type = None @@ -757,7 +756,7 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, self.endpoint = host if isinstance(host, EndPoint) else DefaultEndPoint(host, port) self.authenticator = authenticator - self.ssl_options = ssl_options.copy() if ssl_options else None + self.ssl_options = ssl_options.copy() if ssl_options else {} self.ssl_context = ssl_context self.sockopts = sockopts self.compression = compression @@ -777,15 +776,20 @@ def __init__(self, host='127.0.0.1', port=9042, authenticator=None, self._on_orphaned_stream_released = on_orphaned_stream_released if ssl_options: - self._check_hostname = bool(self.ssl_options.pop('check_hostname', False)) - if self._check_hostname: - if not getattr(ssl, 'match_hostname', None): - raise RuntimeError("ssl_options specify 'check_hostname', but ssl.match_hostname is not provided. " - "Patch or upgrade Python to use this option.") self.ssl_options.update(self.endpoint.ssl_options or {}) elif self.endpoint.ssl_options: self.ssl_options = self.endpoint.ssl_options + # PYTHON-1331 + # + # We always use SSLContext.wrap_socket() now but legacy configs may have other params that were passed to ssl.wrap_socket()... + # and either could have 'check_hostname'. Remove these params into a separate map and use them to build an SSLContext if + # we need to do so. + # + # Note the use of pop() here; we are very deliberately removing these params from ssl_options if they're present. After this + # operation ssl_options should contain only args needed for the ssl_context.wrap_socket() call. + if not self.ssl_context and self.ssl_options: + self.ssl_context = self._build_ssl_context_from_options() if protocol_version >= 3: self.max_request_id = min(self.max_in_flight - 1, (2 ** 15) - 1) @@ -852,21 +856,57 @@ def factory(cls, endpoint, timeout, *args, **kwargs): else: return conn + def _build_ssl_context_from_options(self): + + # Extract a subset of names from self.ssl_options which apply to SSLContext creation + ssl_context_opt_names = ['ssl_version', 'cert_reqs', 'check_hostname', 'keyfile', 'certfile', 'ca_certs', 'ciphers'] + opts = {k:self.ssl_options.get(k, None) for k in ssl_context_opt_names if k in self.ssl_options} + + # Python >= 3.10 requires either PROTOCOL_TLS_CLIENT or PROTOCOL_TLS_SERVER so we'll get ahead of things by always + # being explicit + ssl_version = opts.get('ssl_version', None) or ssl.PROTOCOL_TLS_CLIENT + cert_reqs = opts.get('cert_reqs', None) or ssl.CERT_REQUIRED + rv = ssl.SSLContext(protocol=int(ssl_version)) + rv.check_hostname = bool(opts.get('check_hostname', False)) + rv.options = int(cert_reqs) + + certfile = opts.get('certfile', None) + keyfile = opts.get('keyfile', None) + if certfile: + rv.load_cert_chain(certfile, keyfile) + ca_certs = opts.get('ca_certs', None) + if ca_certs: + rv.load_verify_locations(ca_certs) + ciphers = opts.get('ciphers', None) + if ciphers: + rv.set_ciphers(ciphers) + + return rv + def _wrap_socket_from_context(self): - ssl_options = self.ssl_options or {} + + # Extract a subset of names from self.ssl_options which apply to SSLContext.wrap_socket (or at least the parts + # of it that don't involve building an SSLContext under the covers) + wrap_socket_opt_names = ['server_side', 'do_handshake_on_connect', 'suppress_ragged_eofs', 'server_hostname'] + opts = {k:self.ssl_options.get(k, None) for k in wrap_socket_opt_names if k in self.ssl_options} + # PYTHON-1186: set the server_hostname only if the SSLContext has # check_hostname enabled and it is not already provided by the EndPoint ssl options - if (self.ssl_context.check_hostname and - 'server_hostname' not in ssl_options): - ssl_options = ssl_options.copy() - ssl_options['server_hostname'] = self.endpoint.address - self._socket = self.ssl_context.wrap_socket(self._socket, **ssl_options) + #opts['server_hostname'] = self.endpoint.address + if (self.ssl_context.check_hostname and 'server_hostname' not in opts): + server_hostname = self.endpoint.address + opts['server_hostname'] = server_hostname + + return self.ssl_context.wrap_socket(self._socket, **opts) def _initiate_connection(self, sockaddr): self._socket.connect(sockaddr) - def _match_hostname(self): - ssl.match_hostname(self._socket.getpeercert(), self.endpoint.address) + # PYTHON-1331 + # + # Allow implementations specific to an event loop to add additional behaviours + def _validate_hostname(self): + pass def _get_socket_addresses(self): address, port = self.endpoint.resolve() @@ -887,16 +927,18 @@ def _connect_socket(self): try: self._socket = self._socket_impl.socket(af, socktype, proto) if self.ssl_context: - self._wrap_socket_from_context() - elif self.ssl_options: - if not self._ssl_impl: - raise RuntimeError("This version of Python was not compiled with SSL support") - self._socket = self._ssl_impl.wrap_socket(self._socket, **self.ssl_options) + self._socket = self._wrap_socket_from_context() self._socket.settimeout(self.connect_timeout) self._initiate_connection(sockaddr) self._socket.settimeout(None) + + # PYTHON-1331 + # + # Most checking is done via the check_hostname param on the SSLContext. + # Subclasses can add additional behaviours via _validate_hostname() so + # run that here. if self._check_hostname: - self._match_hostname() + self._validate_hostname() sockerr = None break except socket.error as err: diff --git a/cassandra/io/eventletreactor.py b/cassandra/io/eventletreactor.py index 42874036d5..c51bfd7591 100644 --- a/cassandra/io/eventletreactor.py +++ b/cassandra/io/eventletreactor.py @@ -103,11 +103,12 @@ def __init__(self, *args, **kwargs): def _wrap_socket_from_context(self): _check_pyopenssl() - self._socket = SSL.Connection(self.ssl_context, self._socket) - self._socket.set_connect_state() + rv = SSL.Connection(self.ssl_context, self._socket) + rv.set_connect_state() if self.ssl_options and 'server_hostname' in self.ssl_options: # This is necessary for SNI - self._socket.set_tlsext_host_name(self.ssl_options['server_hostname'].encode('ascii')) + rv.set_tlsext_host_name(self.ssl_options['server_hostname'].encode('ascii')) + return rv def _initiate_connection(self, sockaddr): if self.uses_legacy_ssl_options: @@ -117,14 +118,12 @@ def _initiate_connection(self, sockaddr): if self.ssl_context or self.ssl_options: self._socket.do_handshake() - def _match_hostname(self): - if self.uses_legacy_ssl_options: - super(EventletConnection, self)._match_hostname() - else: + def _validate_hostname(self): + if not self.uses_legacy_ssl_options: cert_name = self._socket.get_peer_certificate().get_subject().commonName if cert_name != self.endpoint.address: raise Exception("Hostname verification failed! Certificate name '{}' " - "doesn't endpoint '{}'".format(cert_name, self.endpoint.address)) + "doesn't match endpoint '{}'".format(cert_name, self.endpoint.address)) def close(self): with self.lock: diff --git a/test-requirements.txt b/test-requirements.txt index 7d3c021240..4ebb23df53 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,6 +1,6 @@ -r requirements.txt scales -nose +pynose mock>1.1 ccm>=2.1.2 pytz From 1a947f8437d10e52a66488c1eab45456d089a92d Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 20 Dec 2023 10:47:23 -0600 Subject: [PATCH 206/211] Documentation (and other) updates for 3.29.0 (#1194) --- .travis.yml | 7 ++++--- CHANGELOG.rst | 16 ++++++++++++++++ README-dev.rst | 2 +- README.rst | 2 +- cassandra/__init__.py | 2 +- cassandra/scylla/cloud.py | 0 docs/index.rst | 2 +- docs/installation.rst | 15 +++++++++------ setup.py | 6 +++++- test-requirements.txt | 4 ++-- tests/unit/test_cluster.py | 4 +++- tox.ini | 19 +++++++------------ 12 files changed, 50 insertions(+), 29 deletions(-) delete mode 100644 cassandra/scylla/cloud.py diff --git a/.travis.yml b/.travis.yml index dbabf61378..5a483f9a03 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,11 @@ -dist: xenial +dist: jammy sudo: false language: python python: - - "3.7" - "3.8" + - "3.9" + - "3.10" env: - CASS_DRIVER_NO_CYTHON=1 @@ -13,7 +14,7 @@ addons: apt: packages: - build-essential - - python-dev + - python3-dev - pypy-dev - libc-ares-dev - libev4 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 472881dbc5..a7780b4ade 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,19 @@ +3.29.0 +====== +December 19, 2023 + +Features +-------- +* Add support for Python 3.9 through 3.12, drop support for 3.7 (PYTHON-1283) +* Removal of dependency on six module (PR 1172) +* Raise explicit exception when deserializing a vector with a subtype that isn’t a constant size (PYTHON-1371) + +Others +------ +* Remove outdated Python pre-3.7 references (PR 1186) +* Remove backup(.bak) files (PR 1185) +* Fix doc typo in add_callbacks (PR 1177) + 3.28.0 ====== June 5, 2023 diff --git a/README-dev.rst b/README-dev.rst index bcc1777ac8..adca510412 100644 --- a/README-dev.rst +++ b/README-dev.rst @@ -176,7 +176,7 @@ Use tee to capture logs and see them on your terminal:: Testing Multiple Python Versions -------------------------------- -If you want to test all of python 3.7, 3.8, and pypy, use tox (this is what +Use tox to test all of Python 3.8 through 3.12 and pypy (this is what TravisCI runs):: tox diff --git a/README.rst b/README.rst index 47483f3881..98884008b0 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ DataStax Driver for Apache Cassandra A modern, `feature-rich `_ and highly-tunable Python client library for Apache Cassandra (2.1+) and DataStax Enterprise (4.7+) using exclusively Cassandra's binary protocol and Cassandra Query Language v3. -The driver supports Python 3.7 and 3.8. +The driver supports Python 3.8 through 3.12. **Note:** DataStax products do not support big-endian systems. diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 8d453f5975..f4c88c1c91 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 28, 0) +__version_info__ = (3, 29, 0) __version__ = '.'.join(map(str, __version_info__)) diff --git a/cassandra/scylla/cloud.py b/cassandra/scylla/cloud.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/docs/index.rst b/docs/index.rst index 6f34f249fb..2370ccefaf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ A Python client driver for `Apache Cassandra® `_. This driver works exclusively with the Cassandra Query Language v3 (CQL3) and Cassandra's native protocol. Cassandra 2.1+ is supported, including DSE 4.7+. -The driver supports Python 3.7 and 3.8. +The driver supports Python 3.8 through 3.12. This driver is open source under the `Apache v2 License `_. diff --git a/docs/installation.rst b/docs/installation.rst index 17a4e63324..e235f398fe 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -3,7 +3,7 @@ Installation Supported Platforms ------------------- -Python 3.7 and 3.8 are supported. Both CPython (the standard Python +Python 3.8 through 3.12 are supported. Both CPython (the standard Python implementation) and `PyPy `_ are supported and tested. Linux, OSX, and Windows are supported. @@ -26,7 +26,7 @@ To check if the installation was successful, you can run:: python -c 'import cassandra; print cassandra.__version__' -It should print something like "3.27.0". +It should print something like "3.29.0". .. _installation-datastax-graph: @@ -215,12 +215,15 @@ dependencies, then use install-option:: sudo pip install --install-option="--no-cython" +Supported Event Loops +^^^^^^^^^^^^^^^^^^^^^ +For Python versions before 3.12 the driver uses the ``asyncore`` module for its default +event loop. Other event loops such as ``libev``, ``gevent`` and ``eventlet`` are also +available via Python modules or C extensions. Python 3.12 has removed ``asyncore`` entirely +so for this platform one of these other event loops must be used. + libev support ^^^^^^^^^^^^^ -The driver currently uses Python's ``asyncore`` module for its default -event loop. For better performance, ``libev`` is also supported through -a C extension. - If you're on Linux, you should be able to install libev through a package manager. For example, on Debian/Ubuntu:: diff --git a/setup.py b/setup.py index 86e50e8b22..386bdfb9af 100644 --- a/setup.py +++ b/setup.py @@ -413,6 +413,7 @@ def run_setup(extensions): version=__version__, description=' DataStax Driver for Apache Cassandra', long_description=long_description, + long_description_content_type='text/x-rst', url='http://github.com/datastax/python-driver', project_urls={ 'Documentation': 'https://docs.datastax.com/en/developer/python-driver/latest/', @@ -438,8 +439,11 @@ def run_setup(extensions): 'Natural Language :: English', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Software Development :: Libraries :: Python Modules' diff --git a/test-requirements.txt b/test-requirements.txt index 4ebb23df53..e3f8e1cac6 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -7,9 +7,9 @@ pytz sure pure-sasl twisted[tls] -gevent>=1.0 +gevent eventlet cython>=0.20,<0.30 packaging -futurist; python_version >= '3.7' +futurist asynctest diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py index c5f5def082..d6e00407f7 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/test_cluster.py @@ -14,6 +14,7 @@ import unittest import logging +import socket from mock import patch, Mock @@ -88,8 +89,9 @@ class ClusterTest(unittest.TestCase): def test_tuple_for_contact_points(self): cluster = Cluster(contact_points=[('localhost', 9045), ('127.0.0.2', 9046), '127.0.0.3'], port=9999) + localhost_addr = set([addr[0] for addr in [t for (_,_,_,_,t) in socket.getaddrinfo("localhost",80)]]) for cp in cluster.endpoints_resolved: - if cp.address in ('::1', '127.0.0.1'): + if cp.address in localhost_addr: self.assertEqual(cp.port, 9045) elif cp.address == '127.0.0.2': self.assertEqual(cp.port, 9046) diff --git a/tox.ini b/tox.ini index 7d4dfe898e..b4a01e53df 100644 --- a/tox.ini +++ b/tox.ini @@ -1,23 +1,22 @@ [tox] -envlist = py{37,38},pypy +envlist = py{38,39,310,311,312},pypy [base] -deps = nose - mock<=1.0.1 +deps = pynose + mock>1.1 packaging - cython + cython>=0.20,<0.30 eventlet - twisted <15.5.0 + gevent + twisted[tls] pure-sasl kerberos futurist - greenlet>=0.4.14,<0.4.17 + lz4 cryptography>=35.0 -lz4_dependency = py37,py38: lz4 [testenv] deps = {[base]deps} - {[base]lz4_dependency} setenv = LIBEV_EMBED=0 CARES_EMBED=0 @@ -28,8 +27,6 @@ commands = nosetests --verbosity=2 --no-path-adjustment {toxinidir}/tests/unit/ [testenv:gevent_loop] deps = {[base]deps} - {[base]lz4_dependency} - gevent>=1.4,<1.5 setenv = LIBEV_EMBED=0 CARES_EMBED=0 @@ -41,8 +38,6 @@ commands = [testenv:eventlet_loop] deps = {[base]deps} - {[base]lz4_dependency} - gevent>=1.4,<1.5 setenv = LIBEV_EMBED=0 CARES_EMBED=0 From 9941ddb5908229b7cdb32f6347c4574c31b49489 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 21 Dec 2023 17:24:17 -0600 Subject: [PATCH 207/211] Added 3.29.0 to docs.yaml --- docs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs.yaml b/docs.yaml index 7dde5a0299..07e2742637 100644 --- a/docs.yaml +++ b/docs.yaml @@ -22,6 +22,8 @@ sections: # build extensions like libev CASS_DRIVER_NO_CYTHON=1 python setup.py build_ext --inplace --force versions: + - name: '3.29' + ref: 1a947f84 - name: '3.28' ref: 4325afb6 - name: '3.27' From 2c61ab22792475445d21b77bd165cf330ee9b87b Mon Sep 17 00:00:00 2001 From: Yago Riveiro Date: Tue, 27 Feb 2024 22:01:23 +0000 Subject: [PATCH 208/211] fix build from source on macos using homebrew (#1196) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 386bdfb9af..e8842146fb 100644 --- a/setup.py +++ b/setup.py @@ -144,7 +144,7 @@ def __init__(self, ext): libev_ext = Extension('cassandra.io.libevwrapper', sources=['cassandra/io/libevwrapper.c'], - include_dirs=['/usr/include/libev', '/usr/local/include', '/opt/local/include'], + include_dirs=['/usr/include/libev', '/usr/local/include', '/opt/local/include', '/opt/homebrew/include', os.path.expanduser('~/homebrew/include')], libraries=['ev'], library_dirs=['/usr/local/lib', '/opt/local/lib']) From 7e0923a86e6b8d55f5a88698f4c1e6ded65a348b Mon Sep 17 00:00:00 2001 From: Alexandre Detiste Date: Tue, 27 Feb 2024 23:54:42 +0100 Subject: [PATCH 209/211] clean up last "import six" (#1197) --- docs/upgrading.rst | 4 ---- tests/integration/standard/test_cluster.py | 1 - 2 files changed, 5 deletions(-) diff --git a/docs/upgrading.rst b/docs/upgrading.rst index 3a600e9ac0..3fd937d7bc 100644 --- a/docs/upgrading.rst +++ b/docs/upgrading.rst @@ -382,7 +382,3 @@ The following dependencies have officially been made optional: * ``scales`` * ``blist`` - -And one new dependency has been added (to enable Python 3 support): - -* ``six`` diff --git a/tests/integration/standard/test_cluster.py b/tests/integration/standard/test_cluster.py index ae6e3e5a4e..11a9fba0ab 100644 --- a/tests/integration/standard/test_cluster.py +++ b/tests/integration/standard/test_cluster.py @@ -23,7 +23,6 @@ import warnings from packaging.version import Version -import six import cassandra from cassandra.cluster import NoHostAvailable, ExecutionProfile, EXEC_PROFILE_DEFAULT, ControlConnection, Cluster from cassandra.concurrent import execute_concurrent From dbd4ea56cc0633e36a72d7abafd265d0da17d555 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 19 Mar 2024 12:15:40 -0500 Subject: [PATCH 210/211] PYTHON-1378 Expand search directories for includes (#1198) --- setup.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index e8842146fb..fa93fc5d8f 100644 --- a/setup.py +++ b/setup.py @@ -138,15 +138,22 @@ class BuildFailed(Exception): def __init__(self, ext): self.ext = ext +is_windows = sys.platform.startswith('win32') +is_macos = sys.platform.startswith('darwin') murmur3_ext = Extension('cassandra.cmurmur3', sources=['cassandra/cmurmur3.c']) +libev_includes = ['/usr/include/libev', '/usr/local/include', '/opt/local/include', '/usr/include'] +libev_libdirs = ['/usr/local/lib', '/opt/local/lib', '/usr/lib64'] +if is_macos: + libev_includes.extend(['/opt/homebrew/include', os.path.expanduser('~/homebrew/include')]) + libev_libdirs.extend(['/opt/homebrew/lib']) libev_ext = Extension('cassandra.io.libevwrapper', sources=['cassandra/io/libevwrapper.c'], - include_dirs=['/usr/include/libev', '/usr/local/include', '/opt/local/include', '/opt/homebrew/include', os.path.expanduser('~/homebrew/include')], + include_dirs=libev_includes, libraries=['ev'], - library_dirs=['/usr/local/lib', '/opt/local/lib']) + library_dirs=libev_libdirs) platform_unsupported_msg = \ """ @@ -169,8 +176,6 @@ def __init__(self, ext): ================================================================================= """ -is_windows = os.name == 'nt' - is_pypy = "PyPy" in sys.version if is_pypy: sys.stderr.write(pypy_unsupported_msg) From 9629c2ad4b828d0f719c45e2a3eade11152707a6 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 19 Mar 2024 12:44:21 -0500 Subject: [PATCH 211/211] Release 3.29.1: changelog & version (#1199) --- CHANGELOG.rst | 9 +++++++++ cassandra/__init__.py | 2 +- docs/installation.rst | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a7780b4ade..9dce17dcb6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,12 @@ +3.29.1 +====== +March 19, 2024 + +Bug Fixes +-------- +* cassandra-driver for Python 3.12 Linux is compiled without libev support (PYTHON-1378) +* Consider moving to native wheel builds for OS X and removing universal2 wheels (PYTHON-1379) + 3.29.0 ====== December 19, 2023 diff --git a/cassandra/__init__.py b/cassandra/__init__.py index f4c88c1c91..4a5b8b29a3 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -22,7 +22,7 @@ def emit(self, record): logging.getLogger('cassandra').addHandler(NullHandler()) -__version_info__ = (3, 29, 0) +__version_info__ = (3, 29, 1) __version__ = '.'.join(map(str, __version_info__)) diff --git a/docs/installation.rst b/docs/installation.rst index e235f398fe..5a400387e5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -26,7 +26,7 @@ To check if the installation was successful, you can run:: python -c 'import cassandra; print cassandra.__version__' -It should print something like "3.29.0". +It should print something like "3.29.1". .. _installation-datastax-graph: