diff --git a/backend/entityservice/database/insertions.py b/backend/entityservice/database/insertions.py index e04e1b25..6c123be7 100644 --- a/backend/entityservice/database/insertions.py +++ b/backend/entityservice/database/insertions.py @@ -213,6 +213,18 @@ def update_run_mark_failure(conn, run_id): cur.execute(sql_query, [run_id]) +def update_project_mark_all_runs_failed(conn, project_id): + with conn.cursor() as cur: + sql_query = """ + UPDATE runs SET + state = 'error', + time_completed = now() + WHERE + project = %s + """ + cur.execute(sql_query, [project_id]) + + def mark_project_deleted(db, project_id): with db.cursor() as cur: sql_query = """ diff --git a/backend/entityservice/tasks/comparing.py b/backend/entityservice/tasks/comparing.py index f13eba6a..e2f598c8 100644 --- a/backend/entityservice/tasks/comparing.py +++ b/backend/entityservice/tasks/comparing.py @@ -135,12 +135,16 @@ def compute_filter_similarity(chunk_info, project_id, run_id, threshold, encodin span.log_kv({'event': 'chunks are fetched and deserialized'}) log.debug("Calculating filter similarity") span.log_kv({'size1': chunk_dp1_size, 'size2': chunk_dp2_size}) - chunk_results = anonlink.concurrency.process_chunk( - chunk_info, - (chunk_dp1, chunk_dp2), - anonlink.similarities.dice_coefficient_accelerated, - threshold, - k=min(chunk_dp1_size, chunk_dp2_size)) + try: + chunk_results = anonlink.concurrency.process_chunk( + chunk_info, + (chunk_dp1, chunk_dp2), + anonlink.similarities.dice_coefficient_accelerated, + threshold, + k=min(chunk_dp1_size, chunk_dp2_size)) + except NotImplementedError as e: + log.warning("Encodings couldn't be compared using anonlink.") + return t3 = time.time() span.log_kv({'event': 'similarities calculated'}) diff --git a/backend/entityservice/tasks/pre_run_check.py b/backend/entityservice/tasks/pre_run_check.py index 06e51226..aa231cd6 100644 --- a/backend/entityservice/tasks/pre_run_check.py +++ b/backend/entityservice/tasks/pre_run_check.py @@ -1,6 +1,7 @@ from entityservice.async_worker import celery, logger from entityservice.database import DBConn, get_created_runs_and_queue, get_uploaded_encoding_sizes, \ - get_project_schema_encoding_size, get_project_encoding_size, set_project_encoding_size + get_project_schema_encoding_size, get_project_encoding_size, set_project_encoding_size, \ + update_project_mark_all_runs_failed from entityservice.models.run import progress_run_stage as progress_stage from entityservice.settings import Config as config from entityservice.tasks.base_task import TracedTask @@ -25,7 +26,8 @@ def check_for_executable_runs(project_id, parent_span=None): check_and_set_project_encoding_size(project_id, conn) except ValueError as e: log.warning(e.args[0]) - # todo make sure this can be exposed to user + # make sure this error can be exposed to user by marking the run/s as failed + update_project_mark_all_runs_failed(conn, project_id) return new_runs = get_created_runs_and_queue(conn, project_id) @@ -72,4 +74,5 @@ def check_and_set_project_encoding_size(project_id, conn): handle_invalid_encoding_data(project_id, dp_id) raise ValueError("Encoding size out of configured bounds") - + if encoding_size % 8: + raise ValueError("Encoding size must be multiple of 8 bytes (64 bits)")