openml · Nidhicodes · Feb 20, 2026 · coderabbitai · Feb 20, 2026 · sourcery-ai
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,19 +56,49 @@ line-length = 100
 select = ["ALL"]
 ignore = [
     "CPY",  # we do not require copyright in every file
-    "D",  # todo: docstring linting
     "D203",
     "D204",
     "D213",
-    "DTZ", # To add
     # Linter does not detect when types are used for Pydantic
     "TC001",
     "TC003",
 ]
 
 [tool.ruff.lint.per-file-ignores]
-"tests/*" = [ "S101", "COM812", "D"]
-"src/core/conversions.py" = ["ANN401"]
+"tests/*" = ["S101", "COM812", "D"]
-"tests/*" = ["S101", "COM812", "D"]
+"tests/**" = ["S101", "COM812", "D"]
-"tests/*" = ["S101", "COM812", "D"]
+"tests/**" = ["S101", "COM812", "D"]
+"src/config.py" = ["D100", "D101", "D102", "D103"]
+"src/core/access.py" = ["D100", "D101", "D102", "D103"]
+"src/core/conversions.py" = ["ANN401", "D100", "D101", "D102", "D103"]
+"src/core/errors.py" = ["D100", "D101", "D102", "D103"]
+"src/core/formatting.py" = ["D100", "D101", "D102", "D103"]
+"src/database/datasets.py" = ["D100", "D101", "D102", "D103"]
+"src/database/evaluations.py" = ["D100", "D101", "D102", "D103"]
+"src/database/flows.py" = ["D100", "D101", "D102", "D103"]
+"src/database/qualities.py" = ["D100", "D101", "D102", "D103"]
+"src/database/setup.py" = ["D100", "D101", "D102", "D103"]
+"src/database/studies.py" = ["D100", "D101", "D102", "D103"]
+"src/database/tasks.py" = ["D100", "D101", "D102", "D103"]
+"src/database/users.py" = ["D100", "D101", "D102", "D103"]
+"src/main.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/dependencies.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/mldcat_ap/dataset.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/datasets.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/estimation_procedure.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/evaluations.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/flows.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/qualities.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/study.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/tasks.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/openml/tasktype.py" = ["D100", "D101", "D102", "D103"]
+"src/routers/types.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/core.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/datasets/__init__.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/datasets/convertor.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/datasets/dcat.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/datasets/mldcat_ap.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/datasets/openml.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/flows.py" = ["D100", "D101", "D102", "D103"]
+"src/schemas/study.py" = ["D100", "D101", "D102", "D103"]
 
 [tool.mypy]
 strict = true

diff --git a/src/core/__init__.py b/src/core/__init__.py
@@ -0,0 +1 @@
+"""Core functionality for the OpenML server API."""
diff --git a/src/core/conversions.py b/src/core/conversions.py
@@ -3,7 +3,7 @@
 
 
 def _str_to_num(string: str) -> int | float | str:
-    """Tries to convert the string to integer, otherwise float, otherwise returns the input."""
+    """Try to convert the string to integer, otherwise float, otherwise returns the input."""
     if string.isdigit():
         return int(string)
     try:
@@ -13,8 +13,10 @@ def _str_to_num(string: str) -> int | float | str:
 
 
 def nested_str_to_num(obj: Any) -> Any:
-    """Recursively tries to convert all strings in the object to numbers.
-    For dictionaries, only the values will be converted."""
+    """Recursively try to convert all strings in the object to numbers.
+
+    For dictionaries, only the values will be converted.
+    """
-def nested_str_to_num(obj: Any) -> Any:
-    """Recursively tries to convert all strings in the object to numbers.
-    For dictionaries, only the values will be converted."""
-    """Recursively try to convert all strings in the object to numbers.
-
-    For dictionaries, only the values will be converted.
-    """
+def nested_str_to_num(obj: Any) -> Any:
+    """Recursively try to convert all strings in the object to numbers.
+
+    For dictionaries, only the values will be converted.
+
+    For non-dict containers (e.g., lists, tuples, sets), all elements are
+    recursively traversed and any string elements are converted to numbers
+    where possible, preserving the original container type and structure.
+    """
-def nested_str_to_num(obj: Any) -> Any:
-    """Recursively tries to convert all strings in the object to numbers.
-    For dictionaries, only the values will be converted."""
-    """Recursively try to convert all strings in the object to numbers.
-
-    For dictionaries, only the values will be converted.
-    """
+def nested_str_to_num(obj: Any) -> Any:
+    """Recursively try to convert all strings in the object to numbers.
+
+    For dictionaries, only the values will be converted.
+
+    For non-dict containers (e.g., lists, tuples, sets), all elements are
+    recursively traversed and any string elements are converted to numbers
+    where possible, preserving the original container type and structure.
+    """
     if isinstance(obj, str):
         return _str_to_num(obj)
     if isinstance(obj, Mapping):
@@ -25,8 +27,10 @@ def nested_str_to_num(obj: Any) -> Any:
 
 
 def nested_num_to_str(obj: Any) -> Any:
-    """Recursively tries to convert all numbers in the object to strings.
-    For dictionaries, only the values will be converted."""
+    """Recursively try to convert all numbers in the object to strings.
+
+    For dictionaries, only the values will be converted.
+    """
     if isinstance(obj, str):
         return obj
     if isinstance(obj, Mapping):

diff --git a/src/database/__init__.py b/src/database/__init__.py
@@ -0,0 +1 @@
+"""Database access for the OpenML server API."""
diff --git a/src/database/datasets.py b/src/database/datasets.py
@@ -1,4 +1,4 @@
-"""Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707"""
+"""Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707."""
 
 import datetime
 
@@ -162,7 +162,7 @@ def update_status(
         parameters={
             "dataset": dataset_id,
             "status": status,
-            "date": datetime.datetime.now(),
+            "date": datetime.datetime.now(datetime.UTC),
             "user": user_id,
         },
     )

diff --git a/src/database/flows.py b/src/database/flows.py
@@ -51,7 +51,7 @@ def get_parameters(flow_id: int, expdb: Connection) -> Sequence[Row]:
 
 
 def get_by_name(name: str, external_version: str, expdb: Connection) -> Row | None:
-    """Gets flow by name and external version."""
+    """Get flow by name and external version."""
     return expdb.execute(
         text(
             """

diff --git a/src/database/studies.py b/src/database/studies.py
@@ -1,6 +1,6 @@
 import re
 from collections.abc import Sequence
-from datetime import datetime
+from datetime import UTC, datetime
 from typing import cast
 
 from sqlalchemy import Connection, Row, text
@@ -98,7 +98,7 @@ def create(study: CreateStudy, user: User, expdb: Connection) -> int:
             "main_entity_type": study.main_entity_type,
             "description": study.description,
             "creator": user.user_id,
-            "creation_date": datetime.now(),
+            "creation_date": datetime.now(UTC),
             "benchmark_suite": study.benchmark_suite,
         },
     )

diff --git a/src/routers/__init__.py b/src/routers/__init__.py
@@ -0,0 +1 @@
+"""API routers for the OpenML server API."""
diff --git a/src/routers/mldcat_ap/__init__.py b/src/routers/mldcat_ap/__init__.py
@@ -0,0 +1 @@
+"""Routers for the MLDCAT-AP API."""
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
@@ -1,4 +1,4 @@
-"""Router for MLDCAT-AP endpoints: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/#examples
+"""Router for MLDCAT-AP endpoints: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/#examples.
 
 Incredibly inefficient, but it's just a proof of concept.
 Specific queries could be written to fetch e.g., a single feature or quality.

diff --git a/src/routers/openml/__init__.py b/src/routers/openml/__init__.py
@@ -0,0 +1 @@
+"""Routers for the OpenML API."""
diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
@@ -264,7 +264,7 @@ def _get_dataset_raise_otherwise(
     user: User | None,
     expdb: Connection,
 ) -> Row:
-    """Fetches the dataset from the database if it exists and the user has permissions.
+    """Fetch the dataset from the database if it exists and the user has permissions.
 
     Raises HTTPException if the dataset does not exist or the user can not access it.
     """

diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py
@@ -33,8 +33,9 @@ def fill_template(
     task_inputs: dict[str, str | int],
     connection: Connection,
 ) -> dict[str, JSON]:
-    """Fill in the XML template as used for task descriptions and return the result,
-     converted to JSON.
+    """Fill in the XML template as used for task descriptions and return the result.
+
+    The result is converted to JSON.
 
     template, str:
         A string represent XML, as detailed below.

diff --git a/src/schemas/__init__.py b/src/schemas/__init__.py
@@ -0,0 +1 @@
+"""Pydantic schemas for the OpenML server API."""
diff --git a/src/schemas/datasets/__init__.py b/src/schemas/datasets/__init__.py
@@ -1,3 +1,5 @@
+"""Dataset schemas for the OpenML server API."""
+
 from enum import StrEnum
 
 

diff --git a/src/schemas/datasets/dcat.py b/src/schemas/datasets/dcat.py
@@ -1,8 +1,9 @@
-"""
+"""DCAT-AP schema definitions.
+
 This file is copied over from an external source.
 Original Author: Jos van der Velde
 Source: https://github.com/aiondemand/AIOD-rest-api/blob/develop/src/converters/schema/dcat.py
-License: MIT
+License: MIT.
 
 Based on DCAT Application Profile for data portals in Europe Version 2.1.1
 
@@ -36,7 +37,7 @@ class DcatAPContext(BaseModel):
 
 
 class DcatAPObject(BaseModel, ABC):
-    """Base class for all DCAT-AP objects"""
+    """Base class for all DCAT-AP objects."""
 
     id_: str = Field(serialization_alias="@id")
 
@@ -198,7 +199,7 @@ class DcatAPDataset(DcatAPObject):
 
 
 class DcatApWrapper(BaseModel):
-    """The resulting class, containing a dataset and related entities in the graph"""
+    """The resulting class, containing a dataset and related entities in the graph."""
 
     context_: DcatAPContext = Field(
         default=DcatAPContext(),

diff --git a/src/schemas/datasets/mldcat_ap.py b/src/schemas/datasets/mldcat_ap.py
@@ -1,5 +1,6 @@
-"""
-Based on MLDCAT-AP 1.0.0: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/
+"""MLDCAT-AP schema definitions based on MLDCAT-AP 1.0.0.
+
+See: https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/
 
 This is an application profile, aimed to extend the use of DCAT-AP,
 originally envisaged for the description of a machine learning process,
@@ -18,7 +19,7 @@
 
 
 class JsonLDQualifiedLiteral(BaseModel):
-    """Base class for all JSON-LD objects"""
+    """Base class for all JSON-LD objects."""
-    """Base class for all JSON-LD objects."""
+    """A JSON-LD qualified literal pairing an explicit type with a string value."""
-    """Base class for all JSON-LD objects."""
+    """A JSON-LD qualified literal with an explicit type and value."""
-    """Base class for all JSON-LD objects."""
+    """A JSON-LD qualified literal pairing an explicit type with a string value."""
-    """Base class for all JSON-LD objects."""
+    """A JSON-LD qualified literal with an explicit type and value."""
 
     type_: str = Field(serialization_alias="@type")
     value: str = Field(serialization_alias="@value")
@@ -30,7 +31,7 @@ class JsonLDQualifiedLiteral(BaseModel):
 
 
 class JsonLDObject(BaseModel, ABC):
-    """Base class for all JSON-LD objects"""
+    """Base class for all JSON-LD objects."""
 
     id_: str = Field(serialization_alias="@id")
     type_: str = Field(serialization_alias="@type")
@@ -48,7 +49,7 @@ class JsonLDObjectReference[T: JsonLDObject](BaseModel):
 
     @classmethod
     def to(cls, json_ld_object: T) -> JsonLDObjectReference[T]:
-        """Create a reference to `json_ld_object`"""
+        """Create a reference to `json_ld_object`."""
         return cls(id_=json_ld_object.id_)
 
     @model_serializer
@@ -57,7 +58,7 @@ def ser_model(self) -> str:
 
 
 class AccessRights(StrEnum):
-    """Recommend values for 'access rights' within DCAT-AP context"""
+    """Recommend values for 'access rights' within DCAT-AP context."""
 
     #  https://op.europa.eu/en/web/eu-vocabularies/dataset/-/resource?uri=http://publications.europa.eu/resource/dataset/access-right
     PUBLIC = "PUBLIC"
@@ -66,9 +67,10 @@ class AccessRights(StrEnum):
 
 
 class Agent(JsonLDObject):
-    """Any entity carrying out actions with respect to the (Core) entities Catalogue,
-    Datasets, Data Services and Distributions. If the Agent is an organisation,
-    the use of the Organization Ontology is recommended.
+    """Any entity carrying out actions with respect to the (Core) entities.
+
+    Catalogue, Datasets, Data Services and Distributions. If the Agent is an
+    organisation, the use of the Organization Ontology is recommended.
     """
-class Agent(JsonLDObject):
-    """Any entity carrying out actions with respect to the (Core) entities Catalogue,
-    Datasets, Data Services and Distributions. If the Agent is an organisation,
-    the use of the Organization Ontology is recommended.
-    """Any entity carrying out actions with respect to the (Core) entities.
-
-    Catalogue, Datasets, Data Services and Distributions. If the Agent is an
-    organisation, the use of the Organization Ontology is recommended.
-    """
+class Agent(JsonLDObject):
+    """Any entity carrying out actions with respect to the (Core) entities Catalogue,
+    Datasets, Data Services and Distributions. If the Agent is an organisation, the
+    use of the Organization Ontology is recommended.
+    """
-class Agent(JsonLDObject):
-    """Any entity carrying out actions with respect to the (Core) entities Catalogue,
-    Datasets, Data Services and Distributions. If the Agent is an organisation,
-    the use of the Organization Ontology is recommended.
-    """Any entity carrying out actions with respect to the (Core) entities.
-
-    Catalogue, Datasets, Data Services and Distributions. If the Agent is an
-    organisation, the use of the Organization Ontology is recommended.
-    """
+class Agent(JsonLDObject):
+    """Any entity carrying out actions with respect to the (Core) entities Catalogue,
+    Datasets, Data Services and Distributions. If the Agent is an organisation, the
+    use of the Organization Ontology is recommended.
+    """
 
     type_: Literal["Agent"] = Field(default="Agent", serialization_alias="@type")
@@ -81,6 +83,7 @@ class Agent(JsonLDObject):
 
 class MD5Checksum(JsonLDObject):
     """A value that allows the contents of a file to be authenticated.
+
     This class allows the results of a variety of checksum and cryptographic
     message digest algorithms to be represented.
     """

diff --git a/src/schemas/datasets/openml.py b/src/schemas/datasets/openml.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from datetime import datetime
+from datetime import UTC, datetime
 from enum import StrEnum
 from typing import Any
 
@@ -91,10 +91,10 @@ class DatasetMetadata(BaseModel):
         },
     )
     upload_date: datetime = Field(
-        json_schema_extra={"example": str(datetime(2014, 4, 6, 23, 12, 20))},
+        json_schema_extra={"example": str(datetime(2014, 4, 6, 23, 12, 20, tzinfo=UTC))},
     )
     processing_date: datetime | None = Field(
-        json_schema_extra={"example": str(datetime(2019, 7, 9, 15, 22, 3))},
+        json_schema_extra={"example": str(datetime(2019, 7, 9, 15, 22, 3, tzinfo=UTC))},
     )
     processing_error: str | None = Field(
         json_schema_extra={"example": "Please provide description XML."},

diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py
@@ -1,4 +1,4 @@
-from datetime import datetime
+from datetime import UTC, datetime
 from http import HTTPStatus
 
 import httpx
@@ -494,11 +494,8 @@ def test_create_task_study(py_api: TestClient) -> None:
     }
     new_study = study.json()
 
-    creation_date = datetime.strptime(
-        new_study.pop("creation_date"),
-        "%Y-%m-%dT%H:%M:%S",
-    )
-    assert creation_date.date() == datetime.now().date()
+    creation_date = datetime.fromisoformat(new_study.pop("creation_date"))
+    assert creation_date.date() == datetime.now(UTC).date()
     assert new_study == expected
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Core functionality for the OpenML server API."""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Database access for the OpenML server API."""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Pydantic schemas for the OpenML server API."""
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,5 @@
		"""Dataset schemas for the OpenML server API."""

		from enum import StrEnum


Expand Down