diff --git a/pyproject.toml b/pyproject.toml index 2c53b9b..565f58d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,6 +108,7 @@ ignore = [ [tool.mypy] strict = true +mypy_path = "src" plugins = [ "pydantic.mypy" ] diff --git a/tests/config_test.py b/tests/config_test.py index 287b128..3218f80 100644 --- a/tests/config_test.py +++ b/tests/config_test.py @@ -9,21 +9,21 @@ def test_apply_defaults_to_siblings_applies_defaults() -> None: input_ = {"defaults": {1: 1}, "other": {}} expected = {"other": {1: 1}} output = _apply_defaults_to_siblings(input_) - assert expected == output + assert output == expected def test_apply_defaults_to_siblings_does_not_override() -> None: input_ = {"defaults": {1: 1}, "other": {1: 2}} expected = {"other": {1: 2}} output = _apply_defaults_to_siblings(input_) - assert expected == output + assert output == expected def test_apply_defaults_to_siblings_ignores_nontables() -> None: input_ = {"defaults": {1: 1}, "other": {1: 2}, "not-a-table": 3} expected = {"other": {1: 2}, "not-a-table": 3} output = _apply_defaults_to_siblings(input_) - assert expected == output + assert output == expected def test_load_configuration_adds_environment_variables(default_configuration_file: Path) -> None: diff --git a/tests/dependencies/fetch_user_test.py b/tests/dependencies/fetch_user_test.py index faae3d5..116bbdd 100644 --- a/tests/dependencies/fetch_user_test.py +++ b/tests/dependencies/fetch_user_test.py @@ -21,8 +21,8 @@ async def test_fetch_user(api_key: str, user: User, user_test: AsyncConnection) async with aclosing(fetch_user(api_key, user_data=user_test)) as agen: db_user = await anext(agen) assert isinstance(db_user, User) - assert user.user_id == db_user.user_id - assert set(await user.get_groups()) == set(await db_user.get_groups()) + assert db_user.user_id == user.user_id + assert set(await db_user.get_groups()) == set(await user.get_groups()) async def test_fetch_user_no_key_no_user() -> None: diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index 41746f8..cddd0d8 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -36,13 +36,13 @@ async def test_dataset_tag_invalid_tag_is_rejected( tag: str, py_api: httpx.AsyncClient, ) -> None: - new = await py_api.post( + response = await py_api.post( f"/datasets/tag?api_key={ApiKey.ADMIN}", json={"data_id": 1, "tag": tag}, ) - assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY - assert new.json()["detail"][0]["loc"] == ["body", "tag"] + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + assert response.json()["detail"][0]["loc"] == ["body", "tag"] # ── Direct call tests: tag_dataset ── diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index d8fb573..be08927 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -74,39 +74,39 @@ async def test_list_data_identical( uri += f"/{'/'.join([str(v) for q in query for v in q])}" uri += api_key_query - new, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.post(f"/datasets/list{api_key_query}", json=new_style), php_api.get(uri), ) # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results) # and the error response format, so we can't compare error responses directly. - php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED - py_is_error = new.status_code == HTTPStatus.NOT_FOUND + php_is_error = php_response.status_code == HTTPStatus.PRECONDITION_FAILED + py_is_error = py_response.status_code == HTTPStatus.NOT_FOUND if php_is_error or py_is_error: # Both should be errors in the same cases assert php_is_error == py_is_error, ( - f"PHP status={original.status_code}, Python status={new.status_code}" + f"PHP status={php_response.status_code}, Python status={py_response.status_code}" ) # Verify Python API returns RFC 9457 format - assert new.headers["content-type"] == "application/problem+json" - error = new.json() + assert py_response.headers["content-type"] == "application/problem+json" + error = py_response.json() assert error["type"] == NoResultsError.uri assert error["code"] == "372" - assert original.json()["error"]["message"] == "No results" + assert php_response.json()["error"]["message"] == "No results" assert error["detail"] == "No datasets match the search criteria." return None - new_json = new.json() + py_json = py_response.json() # Qualities in new response are typed - for dataset in new_json: + for dataset in py_json: for quality in dataset["quality"]: quality["value"] = str(quality["value"]) # PHP API has a double nested dictionary that never has other entries - php_json = original.json()["data"]["dataset"] - assert len(php_json) == len(new_json) - assert php_json == new_json + php_json = php_response.json()["data"]["dataset"] + assert len(py_json) == len(php_json) + assert py_json == php_json return None diff --git a/tests/routers/openml/datasets_qualities_test.py b/tests/routers/openml/datasets_qualities_test.py index 841c320..fb3559c 100644 --- a/tests/routers/openml/datasets_qualities_test.py +++ b/tests/routers/openml/datasets_qualities_test.py @@ -119,7 +119,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None: {"name": "kNN1NErrRate", "value": 0.06347438752783964}, {"name": "kNN1NKappa", "value": 0.8261102938928316}, ] - difference = deepdiff.DeepDiff(expected, response.json(), ignore_order=True) + difference = deepdiff.DeepDiff(response.json(), expected, ignore_order=True) assert not difference @@ -130,28 +130,28 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None: async def test_get_quality_identical( data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - python_response, php_response = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/datasets/qualities/{data_id}"), php_api.get(f"/data/qualities/{data_id}"), ) if php_response.status_code == HTTPStatus.OK: - _assert_get_quality_success_equal(python_response, php_response) + _assert_get_quality_success_equal(py_response, php_response) return php_error_code = int(php_response.json()["error"]["code"]) if php_error_code == 361: # noqa: PLR2004 - _assert_get_quality_error_dataset_not_found(python_response, php_response) + _assert_get_quality_error_dataset_not_found(py_response, php_response) elif php_error_code == 364: # noqa: PLR2004 - _assert_get_quality_error_dataset_process_error(python_response, php_response) + _assert_get_quality_error_dataset_process_error(py_response, php_response) else: msg = f"Dataset {data_id} response not under test:", php_response.json() raise AssertionError(msg) def _assert_get_quality_success_equal( - python_response: httpx.Response, php_response: httpx.Response + py_response: httpx.Response, php_response: httpx.Response ) -> None: - assert python_response.status_code == php_response.status_code + assert py_response.status_code == php_response.status_code expected = [ { "name": quality["name"], @@ -159,34 +159,34 @@ def _assert_get_quality_success_equal( } for quality in php_response.json()["data_qualities"]["quality"] ] - assert python_response.json() == expected + assert py_response.json() == expected def _assert_get_quality_error_dataset_not_found( - python_response: httpx.Response, php_response: httpx.Response + py_response: httpx.Response, php_response: httpx.Response ) -> None: assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert python_response.status_code == HTTPStatus.NOT_FOUND + assert py_response.status_code == HTTPStatus.NOT_FOUND php_error = php_response.json()["error"] - py_error = python_response.json() + py_error = py_response.json() - assert php_error["code"] == py_error["code"] + assert py_error["code"] == php_error["code"] assert php_error["message"] == "Unknown dataset" assert re.match(r"Dataset with id \d+ not found.", py_error["detail"]) def _assert_get_quality_error_dataset_process_error( - python_response: httpx.Response, php_response: httpx.Response + py_response: httpx.Response, php_response: httpx.Response ) -> None: - assert php_response.status_code == python_response.status_code + assert py_response.status_code == php_response.status_code php_error = php_response.json()["error"] - py_error = python_response.json() + py_error = py_response.json() - assert php_error["code"] == py_error["code"] + assert py_error["code"] == php_error["code"] assert php_error["message"] == "Dataset processed with error" assert py_error["title"] == "Dataset Processing Error" # The PHP can add some additional unnecessary escapes. - assert php_error["additional_information"][:30] == py_error["detail"][:30] - assert php_error["additional_information"][-30:] == py_error["detail"][-30:] + assert py_error["detail"][:30] == php_error["additional_information"][:30] + assert py_error["detail"][-30:] == php_error["additional_information"][-30:] diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 73874a3..718c06f 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -20,39 +20,39 @@ async def test_dataset_response_is_identical( # noqa: C901, PLR0912 py_api: httpx.AsyncClient, php_api: httpx.AsyncClient, ) -> None: - new, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/datasets/{dataset_id}"), php_api.get(f"/data/{dataset_id}"), ) - if new.status_code == HTTPStatus.FORBIDDEN: - assert original.status_code == HTTPStatus.PRECONDITION_FAILED + if py_response.status_code == HTTPStatus.FORBIDDEN: + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED else: - assert original.status_code == new.status_code + assert py_response.status_code == php_response.status_code - if new.status_code != HTTPStatus.OK: + if py_response.status_code != HTTPStatus.OK: # RFC 9457: Python API now returns problem+json format - assert new.headers["content-type"] == "application/problem+json" + assert py_response.headers["content-type"] == "application/problem+json" # Both APIs should return error responses in the same cases - assert original.json()["error"]["code"] == new.json()["code"] - old_error_message = original.json()["error"]["message"] - assert new.json()["detail"].startswith(old_error_message) + assert py_response.json()["code"] == php_response.json()["error"]["code"] + old_error_message = php_response.json()["error"]["message"] + assert py_response.json()["detail"].startswith(old_error_message) return try: - original_json = original.json()["data_set_description"] + php_json = php_response.json()["data_set_description"] except json.decoder.JSONDecodeError: pytest.skip("A PHP error occurred on the test server.") - if "div" in original_json: + if "div" in php_json: pytest.skip("A PHP error occurred on the test server.") # There are a few changes between the old API and the new API, so we convert here: # The new API has normalized `format` field: - original_json["format"] = original_json["format"].lower() + php_json["format"] = php_json["format"].lower() # Pydantic HttpURL serialization omits port 80 for HTTP urls. - original_json["url"] = original_json["url"].replace(":80", "") + php_json["url"] = php_json["url"].replace(":80", "") # There is odd behavior in the live server that I don't want to recreate: # when the creator is a list of csv names, it can either be a str or a list @@ -60,42 +60,42 @@ async def test_dataset_response_is_identical( # noqa: C901, PLR0912 # '"Alice", "Bob"' -> ["Alice", "Bob"] # 'Alice, Bob' -> 'Alice, Bob' if ( - "creator" in original_json - and isinstance(original_json["creator"], str) - and len(original_json["creator"].split(",")) > 1 + "creator" in php_json + and isinstance(php_json["creator"], str) + and len(php_json["creator"].split(",")) > 1 ): - original_json["creator"] = [name.strip() for name in original_json["creator"].split(",")] + php_json["creator"] = [name.strip() for name in php_json["creator"].split(",")] - new_body = new.json() - if processing_data := new_body.get("processing_date"): - new_body["processing_date"] = str(processing_data).replace("T", " ") + py_json = py_response.json() + if processing_data := py_json.get("processing_date"): + py_json["processing_date"] = str(processing_data).replace("T", " ") manual = [] # ref test.openml.org/d/33 (contributor) and d/34 (creator) # contributor/creator in database is '""' # json content is [] for field in ["contributor", "creator"]: - if new_body[field] == [""]: - new_body[field] = [] + if py_json[field] == [""]: + py_json[field] = [] manual.append(field) - if isinstance(new_body["original_data_url"], list): - new_body["original_data_url"] = ", ".join(str(url) for url in new_body["original_data_url"]) + if isinstance(py_json["original_data_url"], list): + py_json["original_data_url"] = ", ".join(str(url) for url in py_json["original_data_url"]) - for field, value in list(new_body.items()): + for field, value in list(py_json.items()): if field in manual: continue if isinstance(value, int): - new_body[field] = str(value) + py_json[field] = str(value) elif isinstance(value, list) and len(value) == 1: - new_body[field] = str(value[0]) - if not new_body[field]: - del new_body[field] + py_json[field] = str(value[0]) + if not py_json[field]: + del py_json[field] - if "description" not in new_body: - new_body["description"] = [] + if "description" not in py_json: + py_json["description"] = [] - assert original_json == new_body + assert py_json == php_json @pytest.mark.parametrize( @@ -141,13 +141,13 @@ async def test_private_dataset_owner_access( api_key: str, ) -> None: [private_dataset] = tests.constants.PRIVATE_DATASET_ID - new_response, old_response = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/datasets/{private_dataset}?api_key={api_key}"), php_api.get(f"/data/{private_dataset}?api_key={api_key}"), ) - assert old_response.status_code == HTTPStatus.OK - assert old_response.status_code == new_response.status_code - assert new_response.json()["id"] == private_dataset + assert php_response.status_code == HTTPStatus.OK + assert py_response.status_code == php_response.status_code + assert py_response.json()["id"] == private_dataset @pytest.mark.mut @@ -173,13 +173,13 @@ async def test_dataset_tag_response_is_identical( php_api: httpx.AsyncClient, ) -> None: # PHP request must happen first to check state, can't parallelize - original = await php_api.post( + php_response = await php_api.post( "/data/tag", data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, ) already_tagged = ( - original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - and "already tagged" in original.json()["error"]["message"] + php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + and "already tagged" in php_response.json()["error"]["message"] ) if not already_tagged: # undo the tag, because we don't want to persist this change to the database @@ -189,36 +189,36 @@ async def test_dataset_tag_response_is_identical( data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, ) if ( - original.status_code != HTTPStatus.OK - and original.json()["error"]["message"] == "An Elastic Search Exception occured." + php_response.status_code != HTTPStatus.OK + and php_response.json()["error"]["message"] == "An Elastic Search Exception occured." ): pytest.skip("Encountered Elastic Search error.") - new = await py_api.post( + py_response = await py_api.post( f"/datasets/tag?api_key={api_key}", json={"data_id": dataset_id, "tag": tag}, ) # RFC 9457: Tag conflict now returns 409 instead of 500 - if original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged: - assert new.status_code == HTTPStatus.CONFLICT - assert original.json()["error"]["code"] == new.json()["code"] - assert original.json()["error"]["message"] == "Entity already tagged by this tag." + if php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged: + assert py_response.status_code == HTTPStatus.CONFLICT + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." assert re.match( pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.", - string=new.json()["detail"], + string=py_response.json()["detail"], ) return - assert original.status_code == new.status_code, original.json() - if new.status_code != HTTPStatus.OK: - assert original.json()["error"]["code"] == new.json()["code"] - assert original.json()["error"]["message"] == new.json()["detail"] + assert py_response.status_code == php_response.status_code, php_response.json() + if py_response.status_code != HTTPStatus.OK: + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert py_response.json()["detail"] == php_response.json()["error"]["message"] return - original = original.json() - new = new.json() - new = nested_remove_single_element_list(new) - assert original == new + php_json = php_response.json() + py_json = py_response.json() + py_json = nested_remove_single_element_list(py_json) + assert py_json == php_json @pytest.mark.parametrize( @@ -230,24 +230,24 @@ async def test_datasets_feature_is_identical( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient, ) -> None: - new, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/datasets/features/{data_id}"), php_api.get(f"/data/features/{data_id}"), ) - assert new.status_code == original.status_code + assert py_response.status_code == php_response.status_code - if new.status_code != HTTPStatus.OK: - error = original.json()["error"] - assert error["code"] == new.json()["code"] + if py_response.status_code != HTTPStatus.OK: + error = php_response.json()["error"] + assert py_response.json()["code"] == error["code"] if error["message"] == "No features found. Additionally, dataset processed with error": pattern = r"No features found. Additionally, dataset \d+ processed with error\." - assert re.match(pattern, new.json()["detail"]) + assert re.match(pattern, py_response.json()["detail"]) else: - assert error["message"] == new.json()["detail"] + assert py_response.json()["detail"] == error["message"] return - python_body = new.json() - for feature in python_body: + py_json = py_response.json() + for feature in py_json: for key, value in list(feature.items()): if key == "nominal_values": # The old API uses `nominal_value` instead of `nominal_values` @@ -261,5 +261,5 @@ async def test_datasets_feature_is_identical( else: # The old API formats bool as string in lower-case feature[key] = str(value) if not isinstance(value, bool) else str(value).lower() - original_features = original.json()["data_features"]["feature"] - assert python_body == original_features + php_features = php_response.json()["data_features"]["feature"] + assert py_json == php_features diff --git a/tests/routers/openml/migration/evaluations_migration_test.py b/tests/routers/openml/migration/evaluations_migration_test.py index 074284f..08fb75f 100644 --- a/tests/routers/openml/migration/evaluations_migration_test.py +++ b/tests/routers/openml/migration/evaluations_migration_test.py @@ -7,29 +7,29 @@ async def test_evaluationmeasure_list( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - new, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get("/evaluationmeasure/list"), php_api.get("/evaluationmeasure/list"), ) - assert new.status_code == original.status_code - assert new.json() == original.json()["evaluation_measures"]["measures"]["measure"] + assert py_response.status_code == php_response.status_code + assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"] async def test_estimation_procedure_list( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - new, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get("/estimationprocedure/list"), php_api.get("/estimationprocedure/list"), ) - assert new.status_code == original.status_code - expected = original.json()["estimationprocedures"]["estimationprocedure"] + assert py_response.status_code == php_response.status_code + expected = php_response.json()["estimationprocedures"]["estimationprocedure"] - def new_to_old(procedure: dict[str, Any]) -> dict[str, Any]: + def py_to_php(procedure: dict[str, Any]) -> dict[str, Any]: procedure = {k: str(v) for k, v in procedure.items()} if "stratified_sampling" in procedure: procedure["stratified_sampling"] = procedure["stratified_sampling"].lower() procedure["ttid"] = procedure.pop("task_type_id") return procedure - assert [new_to_old(procedure) for procedure in new.json()] == expected + assert [py_to_php(procedure) for procedure in py_response.json()] == expected diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index 0061c34..2ef7da1 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -62,13 +62,13 @@ async def test_flow_exists( async def test_get_flow_equal( flow_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - response, php_response = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/flows/{flow_id}"), php_api.get(f"/flow/{flow_id}"), ) - assert response.status_code == HTTPStatus.OK + assert py_response.status_code == HTTPStatus.OK - new = response.json() + py_json = py_response.json() # PHP sets parameter default value to [], None is more appropriate, omission is considered # Similar for the default "identifier" of subflows. @@ -86,16 +86,16 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]: flow.pop("component") return flow - new = convert_flow_naming_and_defaults(new) - new = nested_remove_single_element_list(new) + py_json = convert_flow_naming_and_defaults(py_json) + py_json = nested_remove_single_element_list(py_json) - expected = php_response.json()["flow"] - # The reason we don't transform "new" to str is that it becomes harder to ignore numeric type + php_json = php_response.json()["flow"] + # The reason we don't transform py_json to str is that it becomes harder to ignore numeric type # differences (e.g., '1.0' vs '1') - expected = nested_str_to_num(expected) + php_json = nested_str_to_num(php_json) difference = deepdiff.diff.DeepDiff( - expected, - new, + py_json, + php_json, ignore_order=True, ignore_numeric_type_changes=True, ) diff --git a/tests/routers/openml/migration/runs_migration_test.py b/tests/routers/openml/migration/runs_migration_test.py index dff9f60..826aa18 100644 --- a/tests/routers/openml/migration/runs_migration_test.py +++ b/tests/routers/openml/migration/runs_migration_test.py @@ -33,7 +33,7 @@ async def test_get_run_trace_equal( php_error = php_response.json()["error"] py_error = py_response.json() - assert php_error["code"] == py_error["code"] + assert py_error["code"] == php_error["code"] if php_error["code"] == "571": assert php_error["message"] == "Run not found." assert py_error["detail"] == f"Run {run_id} not found." @@ -51,16 +51,16 @@ def _assert_trace_response_success( assert py_response.status_code == HTTPStatus.OK assert php_response.status_code == HTTPStatus.OK - new_json = py_response.json() + py_json = py_response.json() # PHP nests response under "trace" key — match that structure - new_json = {"trace": new_json} + py_json = {"trace": py_json} # PHP uses "trace_iteration" key, Python uses "trace" - new_json["trace"]["trace_iteration"] = new_json["trace"].pop("trace") + py_json["trace"]["trace_iteration"] = py_json["trace"].pop("trace") # PHP returns all numeric values as strings — normalize Python response - new_json = nested_num_to_str(new_json) + py_json = nested_num_to_str(py_json) def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]: """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison.""" @@ -73,7 +73,7 @@ def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]: return copied differences = deepdiff.diff.DeepDiff( - _sort_trace(new_json), + _sort_trace(py_json), _sort_trace(php_response.json()), ignore_order=False, ) diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py index 37c8bc1..34613fc 100644 --- a/tests/routers/openml/migration/setups_migration_test.py +++ b/tests/routers/openml/migration/setups_migration_test.py @@ -71,7 +71,7 @@ async def test_setup_untag_response_is_identical_when_tag_exists( all_tags = [tag, *other_tags] async with temporary_tags(tags=all_tags, setup_id=setup_id, persist=True): - original = await php_api.post( + php_response = await php_api.post( "/setup/untag", data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, ) @@ -79,33 +79,33 @@ async def test_setup_untag_response_is_identical_when_tag_exists( # expdb_test transaction shared with Python API, # no commit needed and rolled back at the end of the test async with temporary_tags(tags=all_tags, setup_id=setup_id): - new = await py_api.post( + py_response = await py_api.post( f"/setup/untag?api_key={api_key}", json={"setup_id": setup_id, "tag": tag}, ) - if new.status_code == HTTPStatus.OK: - assert original.status_code == new.status_code - original_untag = original.json()["setup_untag"] - new_untag = new.json()["setup_untag"] - assert original_untag["id"] == new_untag["id"] - if tags := original_untag.get("tag"): + if py_response.status_code == HTTPStatus.OK: + assert py_response.status_code == php_response.status_code + php_untag = php_response.json()["setup_untag"] + py_untag = py_response.json()["setup_untag"] + assert py_untag["id"] == php_untag["id"] + if tags := php_untag.get("tag"): if isinstance(tags, str): - assert tags == new_untag["tag"][0] + assert py_untag["tag"][0] == tags else: - assert tags == new_untag["tag"] + assert py_untag["tag"] == tags else: - assert new_untag["tag"] == [] + assert py_untag["tag"] == [] return - code, message = original.json()["error"].values() - assert original.status_code == HTTPStatus.PRECONDITION_FAILED - assert new.status_code == HTTPStatus.FORBIDDEN - assert code == new.json()["code"] + code, message = php_response.json()["error"].values() + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.FORBIDDEN + assert py_response.json()["code"] == code assert message == "Tag is not owned by you" assert re.match( r"You may not remove tag \S+ of setup \d+ because it was not created by you.", - new.json()["detail"], + py_response.json()["detail"], ) @@ -117,7 +117,7 @@ async def test_setup_untag_response_is_identical_setup_doesnt_exist( tag = "totally_new_tag_for_migration_testing" api_key = ApiKey.SOME_USER - original, new = await asyncio.gather( + php_response, py_response = await asyncio.gather( php_api.post( "/setup/untag", data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, @@ -128,13 +128,13 @@ async def test_setup_untag_response_is_identical_setup_doesnt_exist( ), ) - assert original.status_code == HTTPStatus.PRECONDITION_FAILED - assert new.status_code == HTTPStatus.NOT_FOUND - assert original.json()["error"]["message"] == "Entity not found." - assert original.json()["error"]["code"] == new.json()["code"] + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.json()["error"]["message"] == "Entity not found." + assert py_response.json()["code"] == php_response.json()["error"]["code"] assert re.match( r"Setup \d+ not found.", - new.json()["detail"], + py_response.json()["detail"], ) @@ -146,7 +146,7 @@ async def test_setup_untag_response_is_identical_tag_doesnt_exist( tag = "totally_new_tag_for_migration_testing" api_key = ApiKey.SOME_USER - original, new = await asyncio.gather( + php_response, py_response = await asyncio.gather( php_api.post( "/setup/untag", data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, @@ -157,13 +157,13 @@ async def test_setup_untag_response_is_identical_tag_doesnt_exist( ), ) - assert original.status_code == HTTPStatus.PRECONDITION_FAILED - assert new.status_code == HTTPStatus.NOT_FOUND - assert original.json()["error"]["code"] == new.json()["code"] - assert original.json()["error"]["message"] == "Tag not found." + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert php_response.json()["error"]["message"] == "Tag not found." assert re.match( r"Setup \d+ does not have tag '\S+'.", - new.json()["detail"], + py_response.json()["detail"], ) @@ -190,7 +190,7 @@ async def test_setup_tag_response_is_identical_when_tag_doesnt_exist( # noqa: P tag = "totally_new_tag_for_migration_testing" async with temporary_tags(tags=other_tags, setup_id=setup_id, persist=True): - original = await php_api.post( + php_response = await php_api.post( "/setup/tag", data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, ) @@ -202,23 +202,23 @@ async def test_setup_tag_response_is_identical_when_tag_doesnt_exist( # noqa: P await expdb_test.commit() async with temporary_tags(tags=other_tags, setup_id=setup_id): - new = await py_api.post( + py_response = await py_api.post( f"/setup/tag?api_key={api_key}", json={"setup_id": setup_id, "tag": tag}, ) - assert new.status_code == HTTPStatus.OK - assert original.status_code == new.status_code - original_tag = original.json()["setup_tag"] - new_tag = new.json()["setup_tag"] - assert original_tag["id"] == new_tag["id"] - if tags := original_tag.get("tag"): + assert py_response.status_code == HTTPStatus.OK + assert py_response.status_code == php_response.status_code + php_tag = php_response.json()["setup_tag"] + py_tag = py_response.json()["setup_tag"] + assert py_tag["id"] == php_tag["id"] + if tags := php_tag.get("tag"): if isinstance(tags, str): - assert tags == new_tag["tag"][0] + assert py_tag["tag"][0] == tags else: - assert set(tags) == set(new_tag["tag"]) + assert set(py_tag["tag"]) == set(tags) else: - assert new_tag["tag"] == [] + assert py_tag["tag"] == [] async def test_setup_tag_response_is_identical_setup_doesnt_exist( @@ -229,7 +229,7 @@ async def test_setup_tag_response_is_identical_setup_doesnt_exist( tag = "totally_new_tag_for_migration_testing" api_key = ApiKey.SOME_USER - original, new = await asyncio.gather( + php_response, py_response = await asyncio.gather( php_api.post( "/setup/tag", data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, @@ -240,13 +240,13 @@ async def test_setup_tag_response_is_identical_setup_doesnt_exist( ), ) - assert original.status_code == HTTPStatus.PRECONDITION_FAILED - assert new.status_code == HTTPStatus.NOT_FOUND - assert original.json()["error"]["message"] == "Entity not found." - assert original.json()["error"]["code"] == new.json()["code"] + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.json()["error"]["message"] == "Entity not found." + assert py_response.json()["code"] == php_response.json()["error"]["code"] assert re.match( r"Setup \d+ not found.", - new.json()["detail"], + py_response.json()["detail"], ) @@ -262,7 +262,7 @@ async def test_setup_tag_response_is_identical_tag_already_exists( async with temporary_tags(tags=[tag], setup_id=setup_id, persist=True): # Both APIs can be tested in parallel since the tag is already persisted - original, new = await asyncio.gather( + php_response, py_response = await asyncio.gather( php_api.post( "/setup/tag", data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, @@ -273,10 +273,10 @@ async def test_setup_tag_response_is_identical_tag_already_exists( ), ) - assert original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - assert new.status_code == HTTPStatus.CONFLICT - assert original.json()["error"]["message"] == "Entity already tagged by this tag." - assert new.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}." + assert php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + assert py_response.status_code == HTTPStatus.CONFLICT + assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." + assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}." async def test_get_setup_response_is_identical_setup_doesnt_exist( @@ -285,16 +285,16 @@ async def test_get_setup_response_is_identical_setup_doesnt_exist( ) -> None: setup_id = 999999 - original, new = await asyncio.gather( + php_response, py_response = await asyncio.gather( php_api.get(f"/setup/{setup_id}"), py_api.get(f"/setup/{setup_id}"), ) - assert original.status_code == HTTPStatus.PRECONDITION_FAILED - assert new.status_code == HTTPStatus.NOT_FOUND - assert original.json()["error"]["message"] == "Unknown setup" - assert original.json()["error"]["code"] == new.json()["code"] - assert new.json()["detail"] == f"Setup {setup_id} not found." + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.json()["error"]["message"] == "Unknown setup" + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert py_response.json()["detail"] == f"Setup {setup_id} not found." @pytest.mark.parametrize("setup_id", range(1, 125)) @@ -303,27 +303,27 @@ async def test_get_setup_response_is_identical( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient, ) -> None: - original, new = await asyncio.gather( + php_response, py_response = await asyncio.gather( php_api.get(f"/setup/{setup_id}"), py_api.get(f"/setup/{setup_id}"), ) - if original.status_code == HTTPStatus.PRECONDITION_FAILED: - assert new.status_code == HTTPStatus.NOT_FOUND + if php_response.status_code == HTTPStatus.PRECONDITION_FAILED: + assert py_response.status_code == HTTPStatus.NOT_FOUND return - assert original.status_code == HTTPStatus.OK - assert new.status_code == HTTPStatus.OK + assert php_response.status_code == HTTPStatus.OK + assert py_response.status_code == HTTPStatus.OK - original_json = original.json() + php_json = php_response.json() # PHP returns integer fields as strings. To compare, we recursively convert string digits # to integers. # PHP also returns `[]` instead of null for empty string optional fields, which Python omits. - original_json = nested_str_to_num(original_json) - original_json = nested_remove_values(original_json, values=[[], None]) + php_json = nested_str_to_num(php_json) + php_json = nested_remove_values(php_json, values=[[], None]) - new_json = nested_str_to_num(new.json()) - new_json = nested_remove_values(new_json, values=[[], None]) + py_json = nested_str_to_num(py_response.json()) + py_json = nested_remove_values(py_json, values=[[], None]) - assert original_json == new_json + assert py_json == php_json diff --git a/tests/routers/openml/migration/studies_migration_test.py b/tests/routers/openml/migration/studies_migration_test.py index 07cdd0c..fc1340c 100644 --- a/tests/routers/openml/migration/studies_migration_test.py +++ b/tests/routers/openml/migration/studies_migration_test.py @@ -7,31 +7,31 @@ async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None: - new, old = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get("/studies/1"), php_api.get("/study/1"), ) - assert new.status_code == old.status_code + assert py_response.status_code == php_response.status_code - new_json = new.json() + py_json = py_response.json() # New implementation is typed - new_json = nested_num_to_str(new_json) + py_json = nested_num_to_str(py_json) # New implementation has same fields even if empty - new_json = nested_remove_values(new_json, values=[None]) - new_json["tasks"] = {"task_id": new_json.pop("task_ids")} - new_json["data"] = {"data_id": new_json.pop("data_ids")} - if runs := new_json.pop("run_ids", None): - new_json["runs"] = {"run_id": runs} - if flows := new_json.pop("flow_ids", None): - new_json["flows"] = {"flow_id": flows} - if setups := new_json.pop("setup_ids", None): - new_json["setup"] = {"setup_id": setups} + py_json = nested_remove_values(py_json, values=[None]) + py_json["tasks"] = {"task_id": py_json.pop("task_ids")} + py_json["data"] = {"data_id": py_json.pop("data_ids")} + if runs := py_json.pop("run_ids", None): + py_json["runs"] = {"run_id": runs} + if flows := py_json.pop("flow_ids", None): + py_json["flows"] = {"flow_id": flows} + if setups := py_json.pop("setup_ids", None): + py_json["setup"] = {"setup_id": setups} # New implementation is not nested - new_json = {"study": new_json} + py_json = {"study": py_json} difference = deepdiff.diff.DeepDiff( - new_json, - old.json(), + py_json, + php_response.json(), ignore_order=True, ignore_numeric_type_changes=True, ) diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py index 8957b78..a11f1a5 100644 --- a/tests/routers/openml/migration/tasks_migration_test.py +++ b/tests/routers/openml/migration/tasks_migration_test.py @@ -20,25 +20,25 @@ async def test_get_task_equal( task_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - response, php_response = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/tasks/{task_id}"), php_api.get(f"/task/{task_id}"), ) - assert response.status_code == HTTPStatus.OK + assert py_response.status_code == HTTPStatus.OK assert php_response.status_code == HTTPStatus.OK - new_json = response.json() + py_json = py_response.json() # Some fields are renamed (old = tag, new = tags) - new_json["tag"] = new_json.pop("tags") - new_json["task_id"] = new_json.pop("id") - new_json["task_name"] = new_json.pop("name") + py_json["tag"] = py_json.pop("tags") + py_json["task_id"] = py_json.pop("id") + py_json["task_name"] = py_json.pop("name") # PHP is not typed *and* automatically removes None values - new_json = nested_remove_values(new_json, values=[None]) - new_json = nested_num_to_str(new_json) + py_json = nested_remove_values(py_json, values=[None]) + py_json = nested_num_to_str(py_json) # It also removes "value" entries for parameters if the list is empty, # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept estimation_procedure = next( - v["estimation_procedure"] for v in new_json["input"] if "estimation_procedure" in v + v["estimation_procedure"] for v in py_json["input"] if "estimation_procedure" in v ) if "parameter" in estimation_procedure: estimation_procedure["parameter"] = [ @@ -46,16 +46,16 @@ async def test_get_task_equal( for parameter in estimation_procedure["parameter"] ] # Fields that may return in a list now always return a list - new_json = nested_remove_single_element_list(new_json) + py_json = nested_remove_single_element_list(py_json) # Tags are not returned if they are an empty list: - if new_json["tag"] == []: - new_json.pop("tag") + if py_json["tag"] == []: + py_json.pop("tag") # The response is no longer nested - new_json = {"task": new_json} + py_json = {"task": py_json} differences = deepdiff.diff.DeepDiff( - new_json, + py_json, php_response.json(), ignore_order=True, ) @@ -168,7 +168,7 @@ async def test_list_tasks_equal( php_ids = {int(t["task_id"]) for t in php_tasks} py_ids = {int(t["task_id"]) for t in py_tasks} - assert php_ids == py_ids, ( + assert py_ids == php_ids, ( f"PHP and Python must return the exact same task IDs: {php_ids ^ py_ids}" ) diff --git a/tests/routers/openml/qualities_list_test.py b/tests/routers/openml/qualities_list_test.py index 6ca21ec..8eb51a5 100644 --- a/tests/routers/openml/qualities_list_test.py +++ b/tests/routers/openml/qualities_list_test.py @@ -31,12 +31,12 @@ async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConn async def test_list_qualities_identical( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - new, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get("/datasets/qualities/list"), php_api.get("/data/qualities/list"), ) - assert original.status_code == new.status_code - assert original.json() == new.json() + assert py_response.status_code == php_response.status_code + assert py_response.json() == php_response.json() # To keep the test idempotent, we cannot test if reaction to database changes is identical @@ -157,11 +157,11 @@ async def test_list_qualities(py_api: httpx.AsyncClient, expdb_test: AsyncConnec ], }, } - assert expected == response.json() + assert response.json() == expected deleted = expected["data_qualities_list"]["quality"].pop() await _remove_quality_from_database(quality_name=deleted, expdb_test=expdb_test) response = await py_api.get("/datasets/qualities/list") assert response.status_code == HTTPStatus.OK - assert expected == response.json() + assert response.json() == expected diff --git a/tests/routers/openml/setups_tag_test.py b/tests/routers/openml/setups_tag_test.py index 12011a4..db629bc 100644 --- a/tests/routers/openml/setups_tag_test.py +++ b/tests/routers/openml/setups_tag_test.py @@ -29,7 +29,7 @@ async def test_setup_tag_api_success( assert response.status_code == HTTPStatus.OK expected = {"setup_tag": {"id": "1", "tag": ["setup_tag_via_http"]}} - assert expected == response.json() + assert response.json() == expected rows = await expdb_test.execute( text("SELECT * FROM setup_tag WHERE id = 1 AND tag = :tag"), diff --git a/tests/routers/openml/setups_untag_test.py b/tests/routers/openml/setups_untag_test.py index 3adc37f..b96671e 100644 --- a/tests/routers/openml/setups_untag_test.py +++ b/tests/routers/openml/setups_untag_test.py @@ -34,7 +34,7 @@ async def test_setup_untag_api_success( assert response.status_code == HTTPStatus.OK expected = {"setup_untag": {"id": "1", "tag": []}} - assert expected == response.json() + assert response.json() == expected rows = await expdb_test.execute( text("SELECT * FROM setup_tag WHERE id = 1 AND tag = :tag"), diff --git a/tests/routers/openml/study_post_test.py b/tests/routers/openml/study_post_test.py index df0e581..0cb00fd 100644 --- a/tests/routers/openml/study_post_test.py +++ b/tests/routers/openml/study_post_test.py @@ -21,9 +21,9 @@ async def test_create_task_study(py_api: httpx.AsyncClient) -> None: }, ) assert response.status_code == HTTPStatus.OK - new = response.json() - assert "study_id" in new - study_id = new["study_id"] + body = response.json() + assert "study_id" in body + study_id = body["study_id"] assert isinstance(study_id, int) study = await py_api.get(f"/studies/{study_id}") diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py index 0136d20..4667967 100644 --- a/tests/routers/openml/task_list_test.py +++ b/tests/routers/openml/task_list_test.py @@ -299,9 +299,8 @@ async def test_list_tasks_quality_values_are_strings(expdb_test: AsyncConnection """Quality values must be strings (to match PHP API behaviour).""" tasks = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test) assert any(task["quality"] for task in tasks), "Expected at least one task to have qualities" - for task in tasks: - for quality in task["quality"]: - assert isinstance(quality["value"], str) + qualities = [quality for task in tasks for quality in task["quality"]] + assert all(isinstance(quality["value"], str) for quality in qualities) @pytest.mark.parametrize( diff --git a/tests/routers/openml/task_type_get_test.py b/tests/routers/openml/task_type_get_test.py index ef8e554..61bd0c9 100644 --- a/tests/routers/openml/task_type_get_test.py +++ b/tests/routers/openml/task_type_get_test.py @@ -15,14 +15,14 @@ async def test_get_task_type( ttype_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: - response, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get(f"/tasktype/{ttype_id}"), php_api.get(f"/tasktype/{ttype_id}"), ) - assert response.status_code == original.status_code + assert py_response.status_code == php_response.status_code - py_json = response.json() - php_json = original.json() + py_json = py_response.json() + php_json = php_response.json() # The PHP types distinguish between single (str) or multiple (list) creator/contrib for field in ["contributor", "creator"]: diff --git a/tests/routers/openml/task_type_list_test.py b/tests/routers/openml/task_type_list_test.py index d562838..871def3 100644 --- a/tests/routers/openml/task_type_list_test.py +++ b/tests/routers/openml/task_type_list_test.py @@ -4,9 +4,9 @@ async def test_list_task_type(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None: - response, original = await asyncio.gather( + py_response, php_response = await asyncio.gather( py_api.get("/tasktype/list"), php_api.get("/tasktype/list"), ) - assert response.status_code == original.status_code - assert response.json() == original.json() + assert py_response.status_code == php_response.status_code + assert py_response.json() == php_response.json()