diff --git a/pyproject.toml b/pyproject.toml index d3b013c7..d1383b43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,19 +56,49 @@ line-length = 100 select = ["ALL"] ignore = [ "CPY", # we do not require copyright in every file - "D", # todo: docstring linting "D203", "D204", "D213", - "DTZ", # To add # Linter does not detect when types are used for Pydantic "TC001", "TC003", ] [tool.ruff.lint.per-file-ignores] -"tests/*" = [ "S101", "COM812", "D"] -"src/core/conversions.py" = ["ANN401"] +"tests/*" = ["S101", "COM812", "D"] +"src/config.py" = ["D100", "D101", "D102", "D103"] +"src/core/access.py" = ["D100", "D101", "D102", "D103"] +"src/core/conversions.py" = ["ANN401", "D100", "D101", "D102", "D103"] +"src/core/errors.py" = ["D100", "D101", "D102", "D103"] +"src/core/formatting.py" = ["D100", "D101", "D102", "D103"] +"src/database/datasets.py" = ["D100", "D101", "D102", "D103"] +"src/database/evaluations.py" = ["D100", "D101", "D102", "D103"] +"src/database/flows.py" = ["D100", "D101", "D102", "D103"] +"src/database/qualities.py" = ["D100", "D101", "D102", "D103"] +"src/database/setup.py" = ["D100", "D101", "D102", "D103"] +"src/database/studies.py" = ["D100", "D101", "D102", "D103"] +"src/database/tasks.py" = ["D100", "D101", "D102", "D103"] +"src/database/users.py" = ["D100", "D101", "D102", "D103"] +"src/main.py" = ["D100", "D101", "D102", "D103"] +"src/routers/dependencies.py" = ["D100", "D101", "D102", "D103"] +"src/routers/mldcat_ap/dataset.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/datasets.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/estimation_procedure.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/evaluations.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/flows.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/qualities.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/study.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/tasks.py" = ["D100", "D101", "D102", "D103"] +"src/routers/openml/tasktype.py" = ["D100", "D101", "D102", "D103"] +"src/routers/types.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/core.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/datasets/__init__.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/datasets/convertor.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/datasets/dcat.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/datasets/mldcat_ap.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/datasets/openml.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/flows.py" = ["D100", "D101", "D102", "D103"] +"src/schemas/study.py" = ["D100", "D101", "D102", "D103"] [tool.mypy] strict = true diff --git a/src/core/__init__.py b/src/core/__init__.py index e69de29b..4a99f102 100644 --- a/src/core/__init__.py +++ b/src/core/__init__.py @@ -0,0 +1 @@ +"""Core functionality for the OpenML server API.""" diff --git a/src/core/conversions.py b/src/core/conversions.py index 1e1fbe16..07ec71ef 100644 --- a/src/core/conversions.py +++ b/src/core/conversions.py @@ -3,7 +3,7 @@ def _str_to_num(string: str) -> int | float | str: - """Tries to convert the string to integer, otherwise float, otherwise returns the input.""" + """Try to convert the string to integer, otherwise float, otherwise returns the input.""" if string.isdigit(): return int(string) try: @@ -13,8 +13,10 @@ def _str_to_num(string: str) -> int | float | str: def nested_str_to_num(obj: Any) -> Any: - """Recursively tries to convert all strings in the object to numbers. - For dictionaries, only the values will be converted.""" + """Recursively try to convert all strings in the object to numbers. + + For dictionaries, only the values will be converted. + """ if isinstance(obj, str): return _str_to_num(obj) if isinstance(obj, Mapping): @@ -25,8 +27,10 @@ def nested_str_to_num(obj: Any) -> Any: def nested_num_to_str(obj: Any) -> Any: - """Recursively tries to convert all numbers in the object to strings. - For dictionaries, only the values will be converted.""" + """Recursively try to convert all numbers in the object to strings. + + For dictionaries, only the values will be converted. + """ if isinstance(obj, str): return obj if isinstance(obj, Mapping): diff --git a/src/database/__init__.py b/src/database/__init__.py index e69de29b..a47c12a6 100644 --- a/src/database/__init__.py +++ b/src/database/__init__.py @@ -0,0 +1 @@ +"""Database access for the OpenML server API.""" diff --git a/src/database/datasets.py b/src/database/datasets.py index f011a651..7478f41e 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -1,4 +1,4 @@ -"""Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707""" +"""Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707.""" import datetime @@ -162,7 +162,7 @@ def update_status( parameters={ "dataset": dataset_id, "status": status, - "date": datetime.datetime.now(), + "date": datetime.datetime.now(datetime.UTC), "user": user_id, }, ) diff --git a/src/database/flows.py b/src/database/flows.py index 3129e91e..b678a6d6 100644 --- a/src/database/flows.py +++ b/src/database/flows.py @@ -51,7 +51,7 @@ def get_parameters(flow_id: int, expdb: Connection) -> Sequence[Row]: def get_by_name(name: str, external_version: str, expdb: Connection) -> Row | None: - """Gets flow by name and external version.""" + """Get flow by name and external version.""" return expdb.execute( text( """ diff --git a/src/database/studies.py b/src/database/studies.py index 35c1b790..e45717a8 100644 --- a/src/database/studies.py +++ b/src/database/studies.py @@ -1,6 +1,6 @@ import re from collections.abc import Sequence -from datetime import datetime +from datetime import UTC, datetime from typing import cast from sqlalchemy import Connection, Row, text @@ -98,7 +98,7 @@ def create(study: CreateStudy, user: User, expdb: Connection) -> int: "main_entity_type": study.main_entity_type, "description": study.description, "creator": user.user_id, - "creation_date": datetime.now(), + "creation_date": datetime.now(UTC), "benchmark_suite": study.benchmark_suite, }, ) diff --git a/src/routers/__init__.py b/src/routers/__init__.py index e69de29b..21016d13 100644 --- a/src/routers/__init__.py +++ b/src/routers/__init__.py @@ -0,0 +1 @@ +"""API routers for the OpenML server API.""" diff --git a/src/routers/mldcat_ap/__init__.py b/src/routers/mldcat_ap/__init__.py index e69de29b..f78d8ba4 100644 --- a/src/routers/mldcat_ap/__init__.py +++ b/src/routers/mldcat_ap/__init__.py @@ -0,0 +1 @@ +"""Routers for the MLDCAT-AP API.""" diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py index db34e5ce..7fb3bc8a 100644 --- a/src/routers/mldcat_ap/dataset.py +++ b/src/routers/mldcat_ap/dataset.py @@ -1,4 +1,4 @@ -"""Router for MLDCAT-AP endpoints: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/#examples +"""Router for MLDCAT-AP endpoints: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/#examples. Incredibly inefficient, but it's just a proof of concept. Specific queries could be written to fetch e.g., a single feature or quality. diff --git a/src/routers/openml/__init__.py b/src/routers/openml/__init__.py index e69de29b..5d924fcf 100644 --- a/src/routers/openml/__init__.py +++ b/src/routers/openml/__init__.py @@ -0,0 +1 @@ +"""Routers for the OpenML API.""" diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index dda25117..79ce8718 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -264,7 +264,7 @@ def _get_dataset_raise_otherwise( user: User | None, expdb: Connection, ) -> Row: - """Fetches the dataset from the database if it exists and the user has permissions. + """Fetch the dataset from the database if it exists and the user has permissions. Raises HTTPException if the dataset does not exist or the user can not access it. """ diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py index 8397f1da..0fecc4c2 100644 --- a/src/routers/openml/tasks.py +++ b/src/routers/openml/tasks.py @@ -33,8 +33,9 @@ def fill_template( task_inputs: dict[str, str | int], connection: Connection, ) -> dict[str, JSON]: - """Fill in the XML template as used for task descriptions and return the result, - converted to JSON. + """Fill in the XML template as used for task descriptions and return the result. + + The result is converted to JSON. template, str: A string represent XML, as detailed below. diff --git a/src/schemas/__init__.py b/src/schemas/__init__.py index e69de29b..89fb0dba 100644 --- a/src/schemas/__init__.py +++ b/src/schemas/__init__.py @@ -0,0 +1 @@ +"""Pydantic schemas for the OpenML server API.""" diff --git a/src/schemas/datasets/__init__.py b/src/schemas/datasets/__init__.py index 913fea99..c34a2d8f 100644 --- a/src/schemas/datasets/__init__.py +++ b/src/schemas/datasets/__init__.py @@ -1,3 +1,5 @@ +"""Dataset schemas for the OpenML server API.""" + from enum import StrEnum diff --git a/src/schemas/datasets/dcat.py b/src/schemas/datasets/dcat.py index 06194814..848866d2 100644 --- a/src/schemas/datasets/dcat.py +++ b/src/schemas/datasets/dcat.py @@ -1,8 +1,9 @@ -""" +"""DCAT-AP schema definitions. + This file is copied over from an external source. Original Author: Jos van der Velde Source: https://github.com/aiondemand/AIOD-rest-api/blob/develop/src/converters/schema/dcat.py -License: MIT +License: MIT. Based on DCAT Application Profile for data portals in Europe Version 2.1.1 @@ -36,7 +37,7 @@ class DcatAPContext(BaseModel): class DcatAPObject(BaseModel, ABC): - """Base class for all DCAT-AP objects""" + """Base class for all DCAT-AP objects.""" id_: str = Field(serialization_alias="@id") @@ -198,7 +199,7 @@ class DcatAPDataset(DcatAPObject): class DcatApWrapper(BaseModel): - """The resulting class, containing a dataset and related entities in the graph""" + """The resulting class, containing a dataset and related entities in the graph.""" context_: DcatAPContext = Field( default=DcatAPContext(), diff --git a/src/schemas/datasets/mldcat_ap.py b/src/schemas/datasets/mldcat_ap.py index ffbe6443..1827270a 100644 --- a/src/schemas/datasets/mldcat_ap.py +++ b/src/schemas/datasets/mldcat_ap.py @@ -1,5 +1,6 @@ -""" -Based on MLDCAT-AP 1.0.0: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/ +"""MLDCAT-AP schema definitions based on MLDCAT-AP 1.0.0. + +See: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/ This is an application profile, aimed to extend the use of DCAT-AP, originally envisaged for the description of a machine learning process, @@ -18,7 +19,7 @@ class JsonLDQualifiedLiteral(BaseModel): - """Base class for all JSON-LD objects""" + """Base class for all JSON-LD objects.""" type_: str = Field(serialization_alias="@type") value: str = Field(serialization_alias="@value") @@ -30,7 +31,7 @@ class JsonLDQualifiedLiteral(BaseModel): class JsonLDObject(BaseModel, ABC): - """Base class for all JSON-LD objects""" + """Base class for all JSON-LD objects.""" id_: str = Field(serialization_alias="@id") type_: str = Field(serialization_alias="@type") @@ -48,7 +49,7 @@ class JsonLDObjectReference[T: JsonLDObject](BaseModel): @classmethod def to(cls, json_ld_object: T) -> JsonLDObjectReference[T]: - """Create a reference to `json_ld_object`""" + """Create a reference to `json_ld_object`.""" return cls(id_=json_ld_object.id_) @model_serializer @@ -57,7 +58,7 @@ def ser_model(self) -> str: class AccessRights(StrEnum): - """Recommend values for 'access rights' within DCAT-AP context""" + """Recommend values for 'access rights' within DCAT-AP context.""" # https://op.europa.eu/en/web/eu-vocabularies/dataset/-/resource?uri=http://publications.europa.eu/resource/dataset/access-right PUBLIC = "PUBLIC" @@ -66,9 +67,10 @@ class AccessRights(StrEnum): class Agent(JsonLDObject): - """Any entity carrying out actions with respect to the (Core) entities Catalogue, - Datasets, Data Services and Distributions. If the Agent is an organisation, - the use of the Organization Ontology is recommended. + """Any entity carrying out actions with respect to the (Core) entities. + + Catalogue, Datasets, Data Services and Distributions. If the Agent is an + organisation, the use of the Organization Ontology is recommended. """ type_: Literal["Agent"] = Field(default="Agent", serialization_alias="@type") @@ -81,6 +83,7 @@ class Agent(JsonLDObject): class MD5Checksum(JsonLDObject): """A value that allows the contents of a file to be authenticated. + This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented. """ diff --git a/src/schemas/datasets/openml.py b/src/schemas/datasets/openml.py index 8edb373c..b1c23a73 100644 --- a/src/schemas/datasets/openml.py +++ b/src/schemas/datasets/openml.py @@ -1,6 +1,6 @@ from __future__ import annotations -from datetime import datetime +from datetime import UTC, datetime from enum import StrEnum from typing import Any @@ -91,10 +91,10 @@ class DatasetMetadata(BaseModel): }, ) upload_date: datetime = Field( - json_schema_extra={"example": str(datetime(2014, 4, 6, 23, 12, 20))}, + json_schema_extra={"example": str(datetime(2014, 4, 6, 23, 12, 20, tzinfo=UTC))}, ) processing_date: datetime | None = Field( - json_schema_extra={"example": str(datetime(2019, 7, 9, 15, 22, 3))}, + json_schema_extra={"example": str(datetime(2019, 7, 9, 15, 22, 3, tzinfo=UTC))}, ) processing_error: str | None = Field( json_schema_extra={"example": "Please provide description XML."}, diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py index a9a8ed4a..d7e9eb15 100644 --- a/tests/routers/openml/study_test.py +++ b/tests/routers/openml/study_test.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import UTC, datetime from http import HTTPStatus import httpx @@ -494,11 +494,8 @@ def test_create_task_study(py_api: TestClient) -> None: } new_study = study.json() - creation_date = datetime.strptime( - new_study.pop("creation_date"), - "%Y-%m-%dT%H:%M:%S", - ) - assert creation_date.date() == datetime.now().date() + creation_date = datetime.fromisoformat(new_study.pop("creation_date")) + assert creation_date.date() == datetime.now(UTC).date() assert new_study == expected