From 8d44b2500803516e0daad2863916c2ca69eba15d Mon Sep 17 00:00:00 2001 From: Oleksander Piskun Date: Thu, 22 May 2025 10:40:56 +0300 Subject: [PATCH 1/5] feat: high level wrapper around Docker API Signed-off-by: Oleksander Piskun --- README.md | 33 + development/debugging/Dockerfile | 49 + development/debugging/frpc.toml | 18 + development/debugging/frps.toml | 20 + development/debugging/haproxy.cfg | 94 ++ .../debugging/redeploy_haproxy_host.sh | 17 + haproxy.cfg.template | 39 +- haproxy_agent.py | 1166 ++++++++++++++++- 8 files changed, 1357 insertions(+), 79 deletions(-) create mode 100644 development/debugging/Dockerfile create mode 100644 development/debugging/frpc.toml create mode 100644 development/debugging/frps.toml create mode 100644 development/debugging/haproxy.cfg create mode 100755 development/debugging/redeploy_haproxy_host.sh diff --git a/README.md b/README.md index a8c4016..abda8cd 100644 --- a/README.md +++ b/README.md @@ -346,6 +346,39 @@ docker run \ -d nextcloud-appapi-harp:local ``` +#### Debugging HaRP + +##### One time initializing steps: + +1. Create virtual environment +2. Install `pydantic` (you can look at exact version in the **Dockerfile*) and `git+https://github.com/cloud-py-api/haproxy-python-spoa.git` +3. Set next environment variables for running `haproxy_agent.py` script: + ``` + HP_LOG_LEVEL=info;NC_INSTANCE_URL=http://nextcloud.local;HP_SHARED_KEY=some_very_secure_password;HP_FRP_DISABLE_TLS=true + ``` +4. Create folder `dev` at the root of repository, extract there content of the desired archive with the [FRP](https://github.com/fatedier/frp/releases/latest) archive which is located at `exapps_dev` folder of this repo. +5. Edit the `data/nginx/vhost.d/nextcloud.local_location` file from the `nextcloud-docker-dev` to point `/exapps/` web route to the host: + ``` + proxy_pass http://172.17.0.1:8780; + ``` + + > **Note:** my original content from my dev machine of file `nextcloud.local_location`: + > ```nginx + > location /exapps/ { + > proxy_pass http://172.17.0.1:8780; + > } + > ``` +6. Use `docker compose up -d --force-recreate proxy` command from Julius `nextcloud-docker-dev` to recreate the proxy container. +7. Register `HaRP` from the **Host** template. Replace `localhost` with `host.docker.internal` in `HaRP Host` field. + +##### Steps to run all parts of HaRP after initializing: + +1. Run FRP Server with `./dev/frps -c ./development/debugging/frps.toml` command. +2. Run the FRP Client to connect Docker Engine to the FRP Server with `./dev/frpc -c ./development/debugging/frpc.toml` command. +3. Run `./development/debugging/redeploy_haproxy_host.sh` command to redeploy `appapi-harp` container **with HaProxy only**. + +> **Note:** Existing `appapi-harp` container will be removed. + ## Contributing Contributions to HaRP are welcome. Feel free to open issues, discussions or submit pull requests with improvements, bug fixes, or new features. diff --git a/development/debugging/Dockerfile b/development/debugging/Dockerfile new file mode 100644 index 0000000..e255263 --- /dev/null +++ b/development/debugging/Dockerfile @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +FROM haproxy:3.1.2-alpine3.21 + +USER root + +# Bind addresses for 2 frontends (HTTP + HTTPS for exapps) and FRP Server. +# If /certs/cert.pem does not exist, EXAPPS HTTPS frontend are disabled automatically. +ENV HP_EXAPPS_ADDRESS="0.0.0.0:8780" \ + HP_EXAPPS_HTTPS_ADDRESS="0.0.0.0:8781" \ + HP_FRP_ADDRESS="0.0.0.0:8782" \ + HP_FRP_DISABLE_TLS="false" \ + HP_TIMEOUT_CONNECT="30s" \ + HP_TIMEOUT_CLIENT="30s" \ + HP_TIMEOUT_SERVER="1800s" \ + NC_INSTANCE_URL="" \ + HP_LOG_LEVEL="warning" + +RUN set -ex; \ + apk add --no-cache \ + git \ + ca-certificates \ + tzdata \ + bash \ + curl \ + openssl \ + bind-tools \ + nano \ + vim \ + envsubst \ + frp \ + python3 \ + py3-pip \ + py3-aiohttp \ + wget \ + tar \ + netcat-openbsd; \ + chmod -R 777 /tmp; + +# Main haproxy config template +COPY --chmod=664 ./development/debugging/haproxy.cfg /haproxy.cfg + +# SPOE config +COPY --chmod=664 spoe-agent.conf /etc/haproxy/spoe-agent.conf + +ENTRYPOINT ["haproxy", "-f", "/haproxy.cfg", "-W", "-db"] + +LABEL com.centurylinklabs.watchtower.enable="false" diff --git a/development/debugging/frpc.toml b/development/debugging/frpc.toml new file mode 100644 index 0000000..afb8016 --- /dev/null +++ b/development/debugging/frpc.toml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +serverAddr = "127.0.0.1" # Replace with your HP_FRP_ADDRESS host +serverPort = 8782 # Default port for FRP or the port your reverse proxy listens on +loginFailExit = false # If the FRP (HaRP) server is unavailable, continue trying to log in. + +metadatas.token = "some_very_secure_password" + +log.level = "info" + +[[proxies]] +remotePort = 24000 # we set it to 24000 as it is the basic Docker Engine +name = "bundled-deploy-daemon" # Unique name for each Docker Engine +type = "tcp" +[proxies.plugin] +type = "unix_domain_socket" +unixPath = "/var/run/docker.sock" diff --git a/development/debugging/frps.toml b/development/debugging/frps.toml new file mode 100644 index 0000000..45afcb1 --- /dev/null +++ b/development/debugging/frps.toml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +bindAddr = "127.0.0.1" +bindPort = 8782 + +transport.tls.force = false + +log.level = "info" + +maxPortsPerClient = 1 +allowPorts = [ + { start = 23000, end = 23999 }, + { start = 24000, end = 24099 } +] + +[[httpPlugins]] +addr = "127.0.0.1:8200" +path = "/frp_handler" +ops = ["Login"] diff --git a/development/debugging/haproxy.cfg b/development/debugging/haproxy.cfg new file mode 100644 index 0000000..a7a9fa0 --- /dev/null +++ b/development/debugging/haproxy.cfg @@ -0,0 +1,94 @@ +# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +global + log stdout local0 info + maxconn 8192 + ca-base /etc/ssl/certs + +defaults + log global + option httplog + option dontlognull + timeout connect 30s + timeout client 30s + timeout server 1800s + + +############################################################################### +# FRONTEND: ex_apps (HTTP) +############################################################################### +frontend ex_apps + mode http + bind 0.0.0.0:8780 + + filter spoe engine exapps-spoe config /etc/haproxy/spoe-agent.conf + http-request silent-drop if { var(txn.exapps.bad_request) -m int eq 1 } + http-request return status 401 content-type text/plain string "401 Unauthorized" if { var(txn.exapps.unauthorized) -m int eq 1 } + http-request return status 403 content-type text/plain string "403 Forbidden" if { var(txn.exapps.forbidden) -m int eq 1 } + http-request return status 404 content-type text/plain string "404 Not Found" if { var(txn.exapps.not_found) -m int eq 1 } + use_backend %[var(txn.exapps.backend)] + +############################################################################### +# BACKENDS: ex_apps & ex_apps_backend_w_bruteforce +############################################################################### +backend ex_apps_backend + mode http + server frp_server 0.0.0.0 + http-request set-path %[var(txn.exapps.target_path)] + http-request set-dst var(txn.exapps.target_ip) + http-request set-dst-port var(txn.exapps.target_port) + http-request set-header EX-APP-ID %[var(txn.exapps.exapp_id)] + http-request set-header EX-APP-VERSION %[var(txn.exapps.exapp_version)] + http-request set-header AUTHORIZATION-APP-API %[var(txn.exapps.exapp_token)] + http-request set-header AA-VERSION "32" # TO-DO: temporary, remove it after we update all ExApps. + +backend ex_apps_backend_w_bruteforce + mode http + server frp_server 0.0.0.0 + http-request set-path %[var(txn.exapps.target_path)] + http-request set-dst var(txn.exapps.target_ip) + http-request set-dst-port var(txn.exapps.target_port) + http-request set-header EX-APP-ID %[var(txn.exapps.exapp_id)] + http-request set-header EX-APP-VERSION %[var(txn.exapps.exapp_version)] + http-request set-header AUTHORIZATION-APP-API %[var(txn.exapps.exapp_token)] + http-request set-header AA-VERSION "32" # TO-DO: temporary, remove it after we update all ExApps. + filter spoe engine exapps-bruteforce-protection-spoe config /etc/haproxy/spoe-agent.conf + +############################################################################### +# BACKEND: nextcloud_control (HTTP) +############################################################################### +backend nextcloud_control_backend + mode http + server nextcloud_control 127.0.0.1:8200 + http-request set-path %[var(txn.exapps.target_path)] + +############################################################################### +# BACKEND: docker_engine (HTTP) +############################################################################### +backend docker_engine_backend + mode http + server frp_server 127.0.0.1 + http-request set-dst-port var(txn.exapps.target_port) + http-request set-path %[var(txn.exapps.target_path)] + + # docker system _ping + http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/_ping$ } METH_GET + # docker inspect image + http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/images/.*/json } METH_GET + # container inspect: GET containers/%s/json + http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/json } METH_GET + # container inspect: GET containers/%s/logs + http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/logs } METH_GET + + # image pull: POST images/create?fromImage=%s + http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/images/create } METH_POST + http-request deny + + +backend agents + mode tcp + timeout connect 5s + timeout server 3m + option spop-check + server agent1 127.0.0.1:9600 check diff --git a/development/debugging/redeploy_haproxy_host.sh b/development/debugging/redeploy_haproxy_host.sh new file mode 100755 index 0000000..1f87b74 --- /dev/null +++ b/development/debugging/redeploy_haproxy_host.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +# This file can be used for development for the "manual install" deployment type when FRP is disabled. +# For Julius Docker-Dev, you need to additionally edit the `data/nginx/vhost.d/nextcloud.local_location` file, +# changing `appapi-harp` to `172.17.0.1` and restart the "proxy" container. + +docker container remove --force appapi-harp + +docker build -f development/debugging/Dockerfile -t nextcloud-appapi-harp:debug . + +docker run \ + --name appapi-harp -h appapi-harp \ + --restart unless-stopped \ + --network=host \ + -d nextcloud-appapi-harp:debug diff --git a/haproxy.cfg.template b/haproxy.cfg.template index 5566e9f..37c0b6b 100644 --- a/haproxy.cfg.template +++ b/haproxy.cfg.template @@ -108,44 +108,7 @@ backend docker_engine_backend http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/json } METH_GET # container inspect: GET containers/%s/logs http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/logs } METH_GET - # container start/stop: POST containers/%s/start containers/%s/stop - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/((start)|(stop)) } METH_POST - # container rm: DELETE containers/%s - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+ } METH_DELETE - # container update/exec: POST containers/%s/update containers/%s/exec - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/((update)|(exec)) } METH_POST - # container put: PUT containers/%s/archive - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/nc_app_[a-zA-Z0-9_.-]+/archive } METH_PUT - # run exec instance: POST exec/%s - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/exec/[a-zA-Z0-9_.-]+/start } METH_POST - - # container create: POST containers/create?name=%s - # ACL to restrict container name to nc_app_[a-zA-Z0-9_.-]+ - acl nc_app_container_name url_param(name) -m reg -i "^nc_app_[a-zA-Z0-9_.-]+" - - # ACL to restrict the number of Mounts to 1 - acl one_mount_volume req.body -m reg -i "\"Mounts\"\s*:\s*\[\s*(?:(?!\"Mounts\"\s*:\s*\[)[^}]*)}[^}]*\]" - # ACL to deny if there are any binds - acl binds_present req.body -m reg -i "\"HostConfig\"\s*:.*\"Binds\"\s*:" - # ACL to restrict the type of Mounts to volume - acl type_not_volume req.body -m reg -i "\"Mounts\":\s*\[[^\]]*(\"Type\":\s*\"(?!volume\b)\w+\"[^\]]*)+\]" - http-request deny if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/create } nc_app_container_name !one_mount_volume binds_present type_not_volume METH_POST - - # ACL to restrict container creation, that it has HostConfig.Privileged(by searching for "Privileged" word in all payload) not set - acl no_privileged_flag req.body -m reg -i "\"Privileged\"" - # ACL to allow mount volume with strict pattern for name: nc_app_[a-zA-Z0-9_.-]+_data - acl nc_app_volume_data_only req.body -m reg -i "\"Mounts\":\s?\[\s?{[^}]*\"Source\":\s?\"nc_app_[a-zA-Z0-9_.-]+_data\"" - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/containers/create } nc_app_container_name !no_privileged_flag nc_app_volume_data_only METH_POST - # end of container create - - # volume create: POST volumes/create - # restrict name - acl nc_app_volume_data req.body -m reg -i "\"Name\":\s?\"nc_app_[a-zA-Z0-9_.-]+_data\"" - # do not allow to use "device" word e.g., "--opt device=:/path/to/dir" - acl volume_no_device req.body -m reg -i "\"device\"" - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/volumes/create } nc_app_volume_data !volume_no_device METH_POST - # volume rm: DELETE volumes/%s - http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/volumes/nc_app_[a-zA-Z0-9_.-]+_data } METH_DELETE + # image pull: POST images/create?fromImage=%s http-request allow if { path,url_dec -m reg -i ^(/v[\d\.]+)?/images/create } METH_POST http-request deny diff --git a/haproxy_agent.py b/haproxy_agent.py index 8a363e7..7ee348a 100644 --- a/haproxy_agent.py +++ b/haproxy_agent.py @@ -5,23 +5,25 @@ import asyncio import contextlib +import io import ipaddress import json import logging import os import re import socket +import tarfile import time from base64 import b64encode from enum import IntEnum from ipaddress import IPv4Address, IPv6Address, ip_address -from typing import Self +from typing import Literal, Self import aiohttp from aiohttp import web from haproxyspoa.payloads.ack import AckPayload from haproxyspoa.spoa_server import SpoaServer -from pydantic import BaseModel, Field, ValidationError, model_validator +from pydantic import BaseModel, Field, ValidationError, computed_field, model_validator APPID_PATTERN = re.compile(r"(?:^|/)exapps/([^/]+)") SHARED_KEY = os.environ.get("HP_SHARED_KEY") @@ -40,6 +42,7 @@ EXCLUDE_HEADERS_USER_INFO = {"host", "content-length"} SPOA_AGENT = SpoaServer() +DOCKER_API_HOST = "127.0.0.1" ############################################################################### # Definitions @@ -82,6 +85,45 @@ class NcUser(BaseModel): access_level: AccessLevel = Field(..., description="ADMIN(2), USER(1), or PUBLIC(0)") +class ExAppName(BaseModel): + name: str = Field(..., description="ExApp name.") + instance_id: str = Field("", description="Nextcloud instance ID.") + + @computed_field + @property + def exapp_container_name(self) -> str: + return f"nc_app_{self.instance_id}_{self.name}" if self.instance_id else f"nc_app_{self.name}" + + @computed_field + @property + def exapp_container_volume(self) -> str: + return f"{self.exapp_container_name}_data" + + +class CreateExAppMounts(BaseModel): + source: str = Field(...) + target: str = Field(...) + mode: str = Field("rw") + + +class CreateExAppPayload(ExAppName): + image_id: str = Field(..., description="Docker image ID.") + network_mode: str = Field(..., description="Desired NetworkMode for the container.") + environment_variables: list[str] = Field([], description="ExApp environment variables.") + restart_policy: str = Field("unless-stopped", description="Desired RestartPolicy for the container.") + compute_device: Literal["cpu", "rocm", "cuda"] = Field("cpu", description="Possible values: 'cpu', 'rocm' or 'cuda'") + mount_points: list[CreateExAppMounts] = Field([], description="List of mount points for the container.") + + +class RemoveExAppPayload(ExAppName): + remove_data: bool = Field(False, description="Flag indicating whether the Docker ExApp volume should be deleted.") + + +class InstallCertificatesPayload(ExAppName): + system_certs_bundle: str | None = Field(None, description="Content of the system CA bundle (concatenated PEMs).") + install_frp_certs: bool = Field(True, description="Flag to control installation of FRP certificates.") + + ############################################################################### # In-memory caches ############################################################################### @@ -353,7 +395,7 @@ async def handle_app_api_request( await record_ip_failure(str_client_ip) return reply.set_txn_var("unauthorized", 1) docker_engine_port = request_headers.get("docker-engine-port") - if docker_engine_port: + if docker_engine_port and not target_path.startswith("/docker/"): reply = reply.set_txn_var("target_port", int(docker_engine_port)) return reply.set_txn_var("backend", "docker_engine_backend") return reply.set_txn_var("backend", "nextcloud_control_backend") @@ -444,41 +486,6 @@ async def delete_exapp(request: web.Request): return web.HTTPNoContent() -############################################################################### -# Special routes for AppAPI -############################################################################### - - -async def get_frp_certificates(request: web.Request): - """Returns the generated FRP TLS certificate files(client.crt, ca.crt, client.key) for the client. - - If any of these files do not exist, TLS is considered disabled. - """ - cert_dir = "/certs/frp" - client_crt_path = os.path.join(cert_dir, "client.crt") - ca_crt_path = os.path.join(cert_dir, "ca.crt") - client_key_path = os.path.join(cert_dir, "client.key") - - if not (os.path.exists(client_crt_path) and os.path.exists(ca_crt_path) and os.path.exists(client_key_path)): - return web.json_response({"tls_enabled": False}) - - with open(client_crt_path) as f: - client_crt = f.read() - with open(ca_crt_path) as f: - ca_crt = f.read() - with open(client_key_path) as f: - client_key = f.read() - - return web.json_response( - { - "tls_enabled": True, - "ca_crt": ca_crt, - "client_crt": client_crt, - "client_key": client_key, - } - ) - - ############################################################################### # FRP Plugin Authentication ############################################################################### @@ -504,6 +511,1077 @@ async def frp_auth(request: web.Request): raise web.HTTPBadRequest() +############################################################################### +# Endpoints for AppAPI to work with the Docker API +############################################################################### + + +def get_docker_engine_port(request: web.Request) -> int: + docker_engine_port_str = request.headers.get("docker-engine-port") + if not docker_engine_port_str: + LOGGER.error("Missing 'docker-engine-port' header.") + raise web.HTTPBadRequest(text="Missing 'docker-engine-port' header.") + + try: + docker_engine_port = int(docker_engine_port_str) + if not (0 < docker_engine_port < 65536): + raise ValueError("Port out of valid range") from None + return docker_engine_port + except ValueError: + LOGGER.error("Invalid 'docker-engine-port' header value: %s", docker_engine_port_str) + raise web.HTTPBadRequest(text=f"Invalid 'docker-engine-port' header value: {docker_engine_port_str}") from None + + +async def docker_exapp_exists(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = ExAppName.model_validate(payload_dict) + except ValidationError as e: + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + docker_api_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/json" + LOGGER.debug("Checking for container '%s' via Docker API at %s", container_name, docker_api_url) + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15.0)) as session: + try: + async with session.get(docker_api_url) as resp: + if resp.status == 200: + LOGGER.info("Container '%s' exists.", container_name) + return web.json_response({"exists": True}) + if resp.status == 404: + LOGGER.info("Container '%s' does not exist.", container_name) + return web.json_response({"exists": False}) + error_text = await resp.text() + LOGGER.error( + "Error checking container '%s' with Docker API (status %s): %s", + container_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable(text=f"Error communicating with Docker Engine: Status {resp.status}") + except aiohttp.ClientConnectorError as e: + LOGGER.error("Could not connect to Docker Engine at %s:%s: %s", DOCKER_API_HOST, docker_engine_port, e) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError as e: + LOGGER.error( + "Timeout while trying to communicate with Docker Engine at %s:%s for container '%s'", + DOCKER_API_HOST, + docker_engine_port, + container_name, + ) + raise web.HTTPGatewayTimeout( + text=f"Timeout communicating with Docker Engine on port {docker_engine_port}" + ) from e + except Exception as e: + LOGGER.exception("Unexpected error while checking container '%s' existence via Docker API.", container_name) + raise web.HTTPInternalServerError( + text="An unexpected error occurred while checking container status." + ) from e + + +async def docker_exapp_create(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + LOGGER.warning("Invalid JSON body received for /docker/exapp/create") + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = CreateExAppPayload.model_validate(payload_dict) + except ValidationError as e: + LOGGER.warning("Payload validation error for /docker/exapp/create: %s", e) + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + volume_name = payload.exapp_container_volume + image_id = payload.image_id + + container_config = { + "Image": image_id, + "Hostname": payload.name, + "HostConfig": { + "NetworkMode": payload.network_mode, + "Mounts": [ + { + "Type": "volume", + "Source": volume_name, + "Target": f"/{volume_name}", + "ReadOnly": False, + } + ], + "RestartPolicy": { + "Name": payload.restart_policy, + }, + }, + "Env": payload.environment_variables, + } + if payload.network_mode not in ("host", "bridge"): + container_config["NetworkingConfig"] = {"EndpointsConfig": {payload.network_mode: {"Aliases": [payload.name]}}} + + if payload.compute_device == "cuda": + container_config["HostConfig"]["DeviceRequests"] = { + "Driver": "nvidia", + "Count": -1, + "Capabilities": [["compute", "utility"]], + } + elif payload.compute_device == "rocm": + devices = [] + for device in ("/dev/kfd", "/dev/dri"): + devices.append({"PathOnHost": device, "PathInContainer": device, "CgroupPermissions": "rwm"}) + container_config["HostConfig"]["Devices"] = devices + + for extra_mount in payload.mount_points: + container_config["HostConfig"]["Mounts"].append( + { + "Source": extra_mount.source, + "Target": extra_mount.target, + "Type": "bind", + "Readonly": extra_mount.mode == "ro", + } + ) + + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60.0)) as session: + create_volume_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/volumes/create" + inspect_volume_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/volumes/{volume_name}" + + LOGGER.debug("Checking/Creating volume '%s' via Docker API", volume_name) + try: + async with session.get(inspect_volume_url) as resp_inspect: + if resp_inspect.status == 200: + LOGGER.info("Volume '%s' already exists.", volume_name) + elif resp_inspect.status == 404: + LOGGER.info("Volume '%s' not found, attempting to create.", volume_name) + async with session.post(create_volume_url, json={"Name": volume_name}) as resp_create: + if resp_create.status == 201: + LOGGER.info("Volume '%s' created successfully.", volume_name) + else: + error_text = await resp_create.text() + LOGGER.error( + "Failed to create volume '%s' (status %s): %s", + volume_name, + resp_create.status, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Failed to create volume '{volume_name}': Status {resp_create.status}" + ) + else: + error_text = await resp_inspect.text() + LOGGER.error( + "Error inspecting volume '%s' (status %s): %s", volume_name, resp_inspect.status, error_text + ) + raise web.HTTPServiceUnavailable( + text=f"Error inspecting volume '{volume_name}': Status {resp_inspect.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error("Could not connect to Docker Engine for volume operation: %s", e) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError as e: + LOGGER.error("Timeout during volume operation for '%s'", volume_name) + raise web.HTTPGatewayTimeout(text="Timeout communicating with Docker Engine for volume operation") from e + except web.HTTPServiceUnavailable: + raise + except Exception as e: + LOGGER.exception("Unexpected error during volume management for '%s'", volume_name) + raise web.HTTPInternalServerError(text="Unexpected error during volume management.") from e + + create_container_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/create?name={container_name}" + LOGGER.debug( + "Attempting to create container '%s' with image '%s' via Docker API at %s", + container_name, + image_id, + create_container_url, + ) + try: + async with session.post(create_container_url, json=container_config) as resp: + if resp.status == 201: + container_data = await resp.json() + container_id = container_data.get("Id") + LOGGER.info("Container '%s' (ID: %s) created successfully.", container_name, container_id) + return web.json_response({"id": container_id, "name": container_name}, status=201) + if resp.status == 409: + error_text = await resp.text() + LOGGER.warning("Container '%s' already exists (status 409): %s.", container_name, error_text) + raise web.HTTPConflict(text=f"Container with name '{container_name}' already exists.") + error_text = await resp.text() + LOGGER.error( + "Error creating container '%s' with Docker API (status %s): %s", + container_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Error creating container '{container_name}': Status {resp.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error("Could not connect to Docker Engine for container creation: %s", e) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError as e: + LOGGER.error("Timeout during container creation for '%s'", container_name) + raise web.HTTPGatewayTimeout(text="Timeout communicating with Docker Engine for container creation") from e + except (web.HTTPServiceUnavailable, web.HTTPConflict, web.HTTPInternalServerError): + raise + except Exception as e: + LOGGER.exception("Unexpected error during container creation for '%s'", container_name) + raise web.HTTPInternalServerError(text="An unexpected error occurred during container creation.") from e + + +async def docker_exapp_start(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + LOGGER.warning("Invalid JSON body received for /docker/exapp/start") + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = ExAppName.model_validate(payload_dict) + except ValidationError as e: + LOGGER.warning("Payload validation error for /docker/exapp/start: %s", e) + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + start_container_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/start" + + LOGGER.info("Attempting to start container '%s' via Docker API at %s", container_name, start_container_url) + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30.0)) as session: + try: + async with session.post(start_container_url) as resp: + if resp.status == 204: + LOGGER.info("Container '%s' started successfully.", container_name) + return web.HTTPNoContent() + if resp.status == 304: + LOGGER.info("Container '%s' was already started.", container_name) + return web.HTTPOk(text="Container already started") + if resp.status == 404: + LOGGER.warning("Container '%s' not found, cannot start.", container_name) + raise web.HTTPNotFound(text=f"Container '{container_name}' not found.") + error_text = await resp.text() + LOGGER.error( + "Error starting container '%s' with Docker API (status %s): %s", + container_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Error starting container '{container_name}' via Docker Engine: Status {resp.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error( + "Could not connect to Docker Engine at %s:%s to start container: %s", + DOCKER_API_HOST, + docker_engine_port, + e, + ) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError as e: + LOGGER.error( + "Timeout while trying to start container '%s' via Docker Engine at %s:%s", + DOCKER_API_HOST, + container_name, + docker_engine_port, + ) + raise web.HTTPGatewayTimeout(text="Timeout communicating with Docker Engine for container start") from e + except (web.HTTPNotFound, web.HTTPServiceUnavailable): + raise + except Exception as e: + LOGGER.exception("Unexpected error while starting container '%s' via Docker API.", container_name) + raise web.HTTPInternalServerError(text="An unexpected error occurred during container start.") from e + + +async def docker_exapp_stop(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + LOGGER.warning("Invalid JSON body received for /docker/exapp/stop") + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = ExAppName.model_validate(payload_dict) + except ValidationError as e: + LOGGER.warning("Payload validation error for /docker/exapp/stop: %s", e) + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + stop_container_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/stop" + + LOGGER.info("Attempting to stop container '%s' via Docker API at %s", container_name, stop_container_url) + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30.0)) as session: + try: + async with session.post(stop_container_url) as resp: + if resp.status == 204: + LOGGER.info("Container '%s' stopped successfully.", container_name) + return web.HTTPNoContent() + if resp.status == 304: + LOGGER.info("Container '%s' was already stopped.", container_name) + return web.HTTPOk(text="Container already stopped") + if resp.status == 404: + LOGGER.warning("Container '%s' not found, cannot stop.", container_name) + raise web.HTTPNotFound(text=f"Container '{container_name}' not found.") + error_text = await resp.text() + LOGGER.error( + "Error stopping container '%s' with Docker API (status %s): %s", + container_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Error stopping container '{container_name}' via Docker Engine: Status {resp.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error( + "Could not connect to Docker Engine at %s:%s to stop container: %s", + DOCKER_API_HOST, + docker_engine_port, + e, + ) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError as e: + LOGGER.error( + "Timeout while trying to stop container '%s' via Docker Engine at %s:%s", + DOCKER_API_HOST, + container_name, + docker_engine_port, + ) + raise web.HTTPGatewayTimeout(text="Timeout communicating with Docker Engine for container stop") from e + except (web.HTTPNotFound, web.HTTPServiceUnavailable): + raise + except Exception as e: + LOGGER.exception("Unexpected error while stopping container '%s' via Docker API.", container_name) + raise web.HTTPInternalServerError(text="An unexpected error occurred during container stop.") from e + + +async def docker_exapp_wait_for_start(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + LOGGER.warning("Invalid JSON body received for /docker/exapp/wait_for_start") + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = ExAppName.model_validate(payload_dict) + except ValidationError as e: + LOGGER.warning("Payload validation error for /docker/exapp/wait_for_start: %s", e) + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + inspect_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/json" + + max_tries = 180 + sleep_interval = 0.5 + total_wait_time = max_tries * sleep_interval + client_timeout = aiohttp.ClientTimeout(total=total_wait_time + 15.0) + + LOGGER.info( + "Waiting for container '%s' to start (max %d tries, interval %.1fs, total wait %.1fs).", + container_name, + max_tries, + sleep_interval, + total_wait_time, + ) + + last_known_status: str | None = "unknown" + last_known_health: str | None = None + + async with aiohttp.ClientSession(timeout=client_timeout) as session: + for attempt in range(max_tries): + try: + async with session.get(inspect_url) as resp: + if resp.status == 200: + container_info = await resp.json() + state = container_info.get("State", {}) + current_status = state.get("Status") + current_health = state.get("Health", {}).get("Status") + + last_known_status = current_status + last_known_health = current_health + + LOGGER.debug( + "Container '%s' attempt %d/%d: Status='%s', Health='%s'", + container_name, + attempt + 1, + max_tries, + current_status, + current_health, + ) + if current_status == "running": + if current_health is None or current_health == "healthy": + LOGGER.info( + "Container '%s' is running and healthy (or no healthcheck).", container_name + ) + return web.json_response( + {"started": True, "status": current_status, "health": current_health} + ) + if current_health == "unhealthy": + LOGGER.warning( + "Container '%s' is running but unhealthy. Reporting as not successfully started.", + container_name, + ) + return web.json_response( + { + "started": True, + "status": current_status, + "health": current_health, + "reason": "unhealthy", + } + ) + if current_health == "starting": + LOGGER.info( + "Container '%s' is running, health status is 'starting'. Continuing to wait.", + container_name, + ) + elif current_status in ("created", "restarting"): + LOGGER.info("Container '%s' is '%s'. Continuing to wait.", container_name, current_status) + elif current_status in ("paused", "exited", "dead"): + LOGGER.warning( + "Container '%s' is in a non-recoverable state (current state: %s). Stopping wait.", + container_name, + current_status, + ) + return web.json_response( + { + "started": False, + "status": current_status, + "health": current_health, + "reason": f"non-recoverable state: {current_status}", + } + ) + elif resp.status == 404: + LOGGER.warning( + "Container '%s' not found while waiting for start (attempt %d).", + container_name, + attempt + 1, + ) + return web.json_response( + {"started": False, "status": "not_found", "reason": "container not found"} + ) + else: + error_text = await resp.text() + LOGGER.error( + "Error inspecting container '%s' while waiting (status %s, attempt %d): %s", + container_name, + resp.status, + attempt + 1, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Error inspecting container '{container_name}': Status {resp.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error( + "Could not connect to Docker Engine at %s:%s while waiting for container '%s' (attempt %d): %s", + DOCKER_API_HOST, + docker_engine_port, + container_name, + attempt + 1, + e, + ) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError: + LOGGER.warning( + "Overall timeout reached while waiting for container '%s' to start after %d attempts.", + container_name, + attempt + 1, + ) + return web.json_response( + { + "started": False, + "status": last_known_status, + "health": last_known_health, + "reason": "overall timeout", + } + ) + except web.HTTPServiceUnavailable: + raise + except Exception as e: + LOGGER.exception( + "Unexpected error while waiting for container '%s' to start (attempt %d).", + container_name, + attempt + 1, + ) + raise web.HTTPInternalServerError( + text="An unexpected error occurred while waiting for container start." + ) from e + if attempt < max_tries - 1: + await asyncio.sleep(sleep_interval) + else: + LOGGER.info("Max tries reached for container '%s'. Reporting last known state.", container_name) + + LOGGER.warning( + "Container '%s' did not reach desired 'running' and 'healthy' state within %d attempts.", + container_name, + max_tries, + ) + return web.json_response( + { + "started": False, + "status": last_known_status, + "health": last_known_health, + "reason": "timeout after max tries", + } + ) + + +async def docker_exapp_remove(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + LOGGER.warning("Invalid JSON body received for /docker/exapp/remove") + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = RemoveExAppPayload.model_validate(payload_dict) + except ValidationError as e: + LOGGER.warning("Payload validation error for /docker/exapp/remove: %s", e) + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + volume_name = payload.exapp_container_volume + + remove_container_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}?force=true" + remove_volume_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/volumes/{volume_name}" + + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30.0)) as session: + LOGGER.info("Attempting to remove container '%s' via Docker API at %s", container_name, remove_container_url) + try: + async with session.delete(remove_container_url) as resp: + if resp.status == 204: + LOGGER.info("Container '%s' removed successfully.", container_name) + elif resp.status == 404: + LOGGER.info("Container '%s' not found, considering it removed.", container_name) + elif resp.status == 409: + error_text = await resp.text() + LOGGER.warning( + "Conflict while removing container '%s' (status %s): %s. Assuming it might be gone or handled.", + container_name, + resp.status, + error_text, + ) + else: + error_text = await resp.text() + LOGGER.error( + "Error removing container '%s' with Docker API (status %s): %s", + container_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Error removing container '{container_name}' via Docker Engine: Status {resp.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error( + "Could not connect to Docker Engine at %s:%s to remove container: %s", + DOCKER_API_HOST, + docker_engine_port, + e, + ) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port}" + ) from e + except TimeoutError as e: + LOGGER.error( + "Timeout while trying to remove container '%s' via Docker Engine at %s:%s", + DOCKER_API_HOST, + container_name, + docker_engine_port, + ) + raise web.HTTPGatewayTimeout(text="Timeout communicating with Docker Engine for container removal") from e + except web.HTTPServiceUnavailable: + raise + except Exception as e: + LOGGER.exception("Unexpected error while removing container '%s' via Docker API.", container_name) + raise web.HTTPInternalServerError(text="An unexpected error occurred during container removal.") from e + + if payload.remove_data: + LOGGER.info("Attempting to remove volume '%s' via Docker API at %s", volume_name, remove_volume_url) + try: + async with session.delete(remove_volume_url) as resp: + if resp.status == 204: + LOGGER.info("Volume '%s' removed successfully.", volume_name) + elif resp.status == 404: + LOGGER.info("Volume '%s' not found, considering it removed.", volume_name) + elif resp.status == 409: + error_text = await resp.text() + LOGGER.error( + "Cannot remove volume '%s' as it is in use (status %s): %s.", + volume_name, + resp.status, + error_text, + ) + raise web.HTTPConflict(text=f"Volume '{volume_name}' is in use and could not be removed.") + else: + error_text = await resp.text() + LOGGER.error( + "Error removing volume '%s' with Docker API (status %s): %s", + volume_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable( + text=f"Error removing volume '{volume_name}' via Docker Engine: Status {resp.status}" + ) + except aiohttp.ClientConnectorError as e: + LOGGER.error( + "Could not connect to Docker Engine at %s:%s to remove volume: %s", + DOCKER_API_HOST, + docker_engine_port, + e, + ) + raise web.HTTPServiceUnavailable( + text=f"Could not connect to Docker Engine on port {docker_engine_port} for volume removal" + ) from e + except TimeoutError as e: + LOGGER.error( + "Timeout while trying to remove volume '%s' via Docker Engine at %s:%s", + DOCKER_API_HOST, + volume_name, + docker_engine_port, + ) + raise web.HTTPGatewayTimeout(text="Timeout communicating with Docker Engine for volume removal") from e + except (web.HTTPServiceUnavailable, web.HTTPConflict): + raise + except Exception as e: + LOGGER.exception("Unexpected error while removing volume '%s' via Docker API.", volume_name) + raise web.HTTPInternalServerError(text="An unexpected error occurred during volume removal.") from e + + LOGGER.info( + "ExApp remove operation completed for container '%s' (remove_data=%s).", + container_name, + payload.remove_data, + ) + return web.HTTPNoContent() + + +async def docker_exapp_install_certificates(request: web.Request): + docker_engine_port = get_docker_engine_port(request) + try: + payload_dict = await request.json() + except json.JSONDecodeError: + LOGGER.warning("Invalid JSON body received for /docker/exapp/install_certificates") + raise web.HTTPBadRequest(text="Invalid JSON body") from None + try: + payload = InstallCertificatesPayload.model_validate(payload_dict) + except ValidationError as e: + LOGGER.warning("Payload validation error for /docker/exapp/install_certificates: %s", e) + raise web.HTTPBadRequest(text=f"Payload validation error: {e}") from None + + container_name = payload.exapp_container_name + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120.0)) as session: + original_state_running = False + try: + inspect_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/json" + async with session.get(inspect_url) as resp_inspect: + if resp_inspect.status == 200: + inspect_data = await resp_inspect.json() + original_state_running = inspect_data.get("State", {}).get("Running", False) + elif resp_inspect.status == 404: + LOGGER.error("Container '%s' not found for certificate installation.", container_name) + raise web.HTTPNotFound(text=f"Container '{container_name}' not found.") + else: + error_text = await resp_inspect.text() + LOGGER.error( + "Failed to inspect container '%s' (status %s): %s", + container_name, + resp_inspect.status, + error_text, + ) + raise web.HTTPServiceUnavailable(text=f"Failed to inspect container: Status {resp_inspect.status}") + + if not original_state_running: + LOGGER.info( + "Container '%s' is not running. Attempting to start for certificate installation.", container_name + ) + start_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/start" + async with session.post(start_url) as resp_start: + if resp_start.status not in (204, 304): + error_text = await resp_start.text() + LOGGER.error( + "Failed to start container '%s' for cert install (status %s): %s", + container_name, + resp_start.status, + error_text, + ) + raise web.HTTPServiceUnavailable(text=f"Failed to start container: Status {resp_start.status}") + LOGGER.info("Container '%s' started/is running for certificate installation.", container_name) + + exit_code, os_info_content = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, ["cat", "/etc/os-release"] + ) + if exit_code != 0: + LOGGER.error( + "Failed to get OS info from container '%s'. Exit code: %s, Raw Output: %s", + container_name, + exit_code, + os_info_content, + ) + raise web.HTTPInternalServerError( + text=f"Failed to get OS info. Exit: {exit_code}. Output: {os_info_content[:200]}" + ) + LOGGER.info("OS Info for container '%s':\n%s", container_name, os_info_content.strip()) + + if payload.system_certs_bundle: + target_cert_dir = _get_target_cert_dir(os_info_content) + if target_cert_dir: + LOGGER.info("Target system cert directory for container '%s': %s", container_name, target_cert_dir) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, ["mkdir", "-p", target_cert_dir] + ) + if exit_code != 0: + LOGGER.error( + "Failed to create cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", + target_cert_dir, + container_name, + exit_code, + raw_output, + ) + raise web.HTTPInternalServerError( + text=f"Failed to create cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" + ) + + certs_to_install = {} + parsed_certs = _parse_certs_from_bundle(payload.system_certs_bundle) + for i, cert_content in enumerate(parsed_certs): + cert_filename = f"custom_ca_cert_{i}.crt" + certs_to_install[os.path.join(target_cert_dir.lstrip("/"), cert_filename)] = cert_content + + if certs_to_install: + tar_bytes = _create_tar_archive_in_memory(certs_to_install) + await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes) + LOGGER.info( + "Installed %d system CA certificates into '%s' in container '%s'.", + len(parsed_certs), + target_cert_dir, + container_name, + ) + + update_cmd_list = _get_certificate_update_command(os_info_content) + if update_cmd_list: + LOGGER.info("Running certificate update command: %s", " ".join(update_cmd_list)) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, update_cmd_list + ) + if exit_code != 0: + LOGGER.error( + "Certificate update command failed in container '%s'. Exit: %s, Raw Output: %s", + container_name, + exit_code, + raw_output, + ) + else: + LOGGER.info("Certificate update command successful. Raw Output: %s", raw_output.strip()) + else: + LOGGER.warning( + "No certificate update command found for OS in container '%s'.", container_name + ) + else: + LOGGER.info( + "No individual certificates parsed from system_certs_bundle for container '%s'.", + container_name, + ) + else: + LOGGER.warning( + "OS in container '%s' not supported for sys cert installation, or bundle empty. Skipping.", + container_name, + ) + else: + LOGGER.info( + "No system_certs_bundle provided for container '%s'. Skipping system cert installation.", + container_name, + ) + + if payload.install_frp_certs: + frp_cert_dir_on_harp = "/certs/frp" + frp_target_dir_in_container = "/certs/frp" + + frp_files_to_read = { + "ca.crt": os.path.join(frp_cert_dir_on_harp, "ca.crt"), + "client.crt": os.path.join(frp_cert_dir_on_harp, "client.crt"), + "client.key": os.path.join(frp_cert_dir_on_harp, "client.key"), + } + frp_certs_content = {} + all_frp_files_exist = True + for name, path_on_harp in frp_files_to_read.items(): + if os.path.exists(path_on_harp): + try: + with open(path_on_harp, encoding="utf-8") as f: + frp_certs_content[name] = f.read() + except Exception as e_read: + LOGGER.error("Failed to read FRP cert file '%s' from HaRP agent: %s", path_on_harp, e_read) + all_frp_files_exist = False + break + else: + LOGGER.warning( + "FRP certificate file '%s' not found on HaRP agent. Skipping FRP cert installation.", + path_on_harp, + ) + all_frp_files_exist = False + break + + if all_frp_files_exist and frp_certs_content: + LOGGER.info( + "Installing FRP certificates from HaRP agent into '%s' in container '%s'.", + frp_target_dir_in_container, + container_name, + ) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, ["mkdir", "-p", frp_target_dir_in_container] + ) + if exit_code != 0: + LOGGER.error( + "Failed to create FRP cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", + frp_target_dir_in_container, + container_name, + exit_code, + raw_output, + ) + raise web.HTTPInternalServerError( + text=f"Failed to create FRP cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" + ) + + frp_files_for_tar = { + os.path.join(frp_target_dir_in_container.lstrip("/"), "ca.crt"): frp_certs_content["ca.crt"], + os.path.join(frp_target_dir_in_container.lstrip("/"), "client.crt"): frp_certs_content[ + "client.crt" + ], + os.path.join(frp_target_dir_in_container.lstrip("/"), "client.key"): frp_certs_content[ + "client.key" + ], + } + tar_bytes_frp = _create_tar_archive_in_memory(frp_files_for_tar) + await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes_frp) + LOGGER.info("FRP certificates installed successfully into '%s'.", frp_target_dir_in_container) + elif not all_frp_files_exist: + LOGGER.info( + "One or more FRP cert files missing, skipping FRP installation for container '%s'", + container_name, + ) + else: + LOGGER.warning( + "FRP cert content is empty. Skipping FRP installation for '%s'", + container_name, + ) + else: + LOGGER.info( + "install_frp_certs is false. Skipping FRP cert installation for container '%s'.", container_name + ) + + return web.HTTPNoContent() + + except (web.HTTPException, aiohttp.ClientError) as e: + LOGGER.error("Error during certificate installation for '%s': %s", container_name, e) + raise + except Exception as e: + LOGGER.exception("Unexpected fatal error during certificate installation for '%s'", container_name) + raise web.HTTPInternalServerError(text=f"Unexpected error during certificate installation: {e}") from e + finally: + if not original_state_running: + LOGGER.info( + "Attempting to stop container '%s' as it was started for certificate installation.", container_name + ) + stop_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/stop" + try: + async with session.post(stop_url) as resp_stop: + if resp_stop.status not in (204, 304, 404): + error_text = await resp_stop.text() + LOGGER.warning( + "Failed to stop container '%s' after cert install (status %s): %s", + container_name, + resp_stop.status, + error_text, + ) + else: + LOGGER.info( + "Container '%s' stopped or was already stopped/gone after cert install.", container_name + ) + except Exception as e_stop: + LOGGER.error("Error stopping container '%s' in finally block: %s", container_name, e_stop) + + +def _create_tar_archive_in_memory(files_to_add: dict[str, str]) -> bytes: + tar_stream = io.BytesIO() + with tarfile.open(fileobj=tar_stream, mode="w") as tar: + for path_in_archive, content_str in files_to_add.items(): + content_bytes = content_str.encode("utf-8") + tarinfo = tarfile.TarInfo(name=path_in_archive) + tarinfo.size = len(content_bytes) + tarinfo.mtime = int(time.time()) + tar.addfile(tarinfo, io.BytesIO(content_bytes)) + return tar_stream.getvalue() + + +async def _put_archive_to_container( + session: aiohttp.ClientSession, docker_engine_port: int, container_name: str, path: str, data: bytes +): + upload_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_name}/archive?path={path}" + headers = {"Content-Type": "application/x-tar"} + try: + async with session.put(upload_url, data=data, headers=headers) as resp: + if resp.status != 200: + error_text = await resp.text() + LOGGER.error( + "Failed to put archive to container '%s' at path '%s' (status %s): %s", + container_name, + path, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable(text=f"Failed to upload archive: Status {resp.status}") + LOGGER.info("Successfully put archive to container '%s' at path '%s'", container_name, path) + except aiohttp.ClientError as e: + LOGGER.error("Client error putting archive to container '%s': %s", container_name, e) + raise web.HTTPServiceUnavailable(text=f"Client error uploading archive: {e}") from e + except Exception as e: + LOGGER.exception("Unexpected error putting archive to container '%s'", container_name) + raise web.HTTPInternalServerError(text=f"Unexpected error uploading archive: {e}") from e + + +def _parse_certs_from_bundle(bundle_content: str) -> list[str]: + """Parses individual PEM certificates from a bundle string.""" + return re.findall(r"-----BEGIN CERTIFICATE-----.+?-----END CERTIFICATE-----", bundle_content, re.DOTALL) + + +async def _execute_command_in_container_simplified( + session: aiohttp.ClientSession, docker_engine_port: int, container_id_or_name: str, cmd: list[str] +) -> tuple[int, str]: + """Executes a command in a running container and returns (exit_code, raw_output_str).""" + exec_create_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/containers/{container_id_or_name}/exec" + exec_create_payload = { + "AttachStdout": True, + "AttachStderr": True, + "Cmd": cmd, + } + exec_id = None + raw_output_str = "" + exit_code = -1 # Default to error + + try: + async with session.post(exec_create_url, json=exec_create_payload) as resp: + if resp.status != 201: + error_text = await resp.text() + LOGGER.error( + "Failed to create exec instance for command '%s' in container '%s' (status %s): %s", + " ".join(cmd), + container_id_or_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable(text=f"Failed to create exec for command: {error_text}") + exec_id_data = await resp.json() + exec_id = exec_id_data.get("Id") + + if not exec_id: + raise web.HTTPInternalServerError(text="Exec ID not found after creation.") + + exec_start_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/exec/{exec_id}/start" + exec_start_payload = {"Detach": False, "Tty": False} # Tty=False gives raw stream + + async with session.post(exec_start_url, json=exec_start_payload) as resp: + if resp.status != 200: + error_text = await resp.text() + LOGGER.error( + "Failed to start exec instance '%s' for command '%s' in container '%s' (status %s): %s", + exec_id, + " ".join(cmd), + container_id_or_name, + resp.status, + error_text, + ) + raise web.HTTPServiceUnavailable(text=f"Failed to start exec: {error_text}") + raw_output_bytes = await resp.read() + raw_output_str = raw_output_bytes.decode(errors="ignore") + + exec_inspect_url = f"http://{DOCKER_API_HOST}:{docker_engine_port}/exec/{exec_id}/json" + async with session.get(exec_inspect_url) as resp_inspect: + if resp_inspect.status != 200: + error_text = await resp_inspect.text() + LOGGER.error( + "Failed to inspect exec instance '%s' (status %s): %s. Output was: %s", + exec_id, + resp_inspect.status, + error_text, + raw_output_str, + ) + else: + exec_info = await resp_inspect.json() + ret_code = exec_info.get("ExitCode") + if ret_code is None: # Should not happen if process exited + LOGGER.warning( + "Exec inspect for '%s' did not contain ExitCode. Output was: %s", exec_id, raw_output_str + ) + else: + exit_code = ret_code + + return exit_code, raw_output_str + + except aiohttp.ClientError as e: + LOGGER.error( + "Client error during exec command '%s' in container '%s': %s", " ".join(cmd), container_id_or_name, e + ) + raise web.HTTPServiceUnavailable(text=f"Client error during exec: {e}") from e + except Exception as e: + LOGGER.exception( + "Unexpected error during exec command '%s' in container '%s'", " ".join(cmd), container_id_or_name + ) + raise web.HTTPInternalServerError(text=f"Unexpected error during exec: {e}") from e + + +def _get_target_cert_dir(os_info_content: str | None) -> str | None: + if not os_info_content: + LOGGER.warning("OS info content is empty, cannot determine target cert directory.") + return None + os_info_lower = os_info_content.lower() + if "alpine" in os_info_lower: + return "/usr/local/share/ca-certificates" + if "debian" in os_info_lower or "ubuntu" in os_info_lower: + return "/usr/local/share/ca-certificates" + if ( + "centos" in os_info_lower + or "almalinux" in os_info_lower + or "rhel" in os_info_lower + or "fedora" in os_info_lower + ): + return "/etc/pki/ca-trust/source/anchors" + LOGGER.warning( + "Unsupported OS for SSL certificate installation: %s", + os_info_content.splitlines()[0] if os_info_content else "Unknown", + ) + return None + + +def _get_certificate_update_command(os_info_content: str | None) -> list[str] | None: + if not os_info_content: + LOGGER.warning("OS info content is empty, cannot determine certificate update command.") + return None + os_info_lower = os_info_content.lower() + if "alpine" in os_info_lower: + return ["update-ca-certificates"] + if "debian" in os_info_lower or "ubuntu" in os_info_lower: + return ["update-ca-certificates"] + if ( + "centos" in os_info_lower + or "almalinux" in os_info_lower + or "rhel" in os_info_lower + or "fedora" in os_info_lower + ): + return ["update-ca-trust", "extract"] + return None + + ############################################################################### # HTTP Server Setup ############################################################################### @@ -516,11 +1594,17 @@ def create_web_app() -> web.Application: app.router.add_post("/exapp_storage/{app_id}", add_exapp) app.router.add_delete("/exapp_storage/{app_id}", delete_exapp) - # FRP certificates - app.router.add_get("/frp_certificates", get_frp_certificates) - # FRP auth (FRP Server will call it) app.router.add_post("/frp_handler", frp_auth) + + # Docker Engine APIs wrappers + app.router.add_post("/docker/exapp/exists", docker_exapp_exists) + app.router.add_post("/docker/exapp/create", docker_exapp_create) + app.router.add_post("/docker/exapp/start", docker_exapp_start) + app.router.add_post("/docker/exapp/stop", docker_exapp_stop) + app.router.add_post("/docker/exapp/wait_for_start", docker_exapp_wait_for_start) + app.router.add_post("/docker/exapp/remove", docker_exapp_remove) + app.router.add_post("/docker/exapp/install_certificates", docker_exapp_install_certificates) return app From 36b9f41b4cf8f1f8b82d534394c5afb068dba628 Mon Sep 17 00:00:00 2001 From: Oleksander Piskun Date: Wed, 28 May 2025 16:50:34 +0300 Subject: [PATCH 2/5] refactor: created helper _install_frp_certificates function Signed-off-by: Oleksander Piskun --- haproxy_agent.py | 146 ++++++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 71 deletions(-) diff --git a/haproxy_agent.py b/haproxy_agent.py index 7ee348a..67b6339 100644 --- a/haproxy_agent.py +++ b/haproxy_agent.py @@ -111,7 +111,9 @@ class CreateExAppPayload(ExAppName): network_mode: str = Field(..., description="Desired NetworkMode for the container.") environment_variables: list[str] = Field([], description="ExApp environment variables.") restart_policy: str = Field("unless-stopped", description="Desired RestartPolicy for the container.") - compute_device: Literal["cpu", "rocm", "cuda"] = Field("cpu", description="Possible values: 'cpu', 'rocm' or 'cuda'") + compute_device: Literal["cpu", "rocm", "cuda"] = Field( + "cpu", description="Possible values: 'cpu', 'rocm' or 'cuda'" + ) mount_points: list[CreateExAppMounts] = Field([], description="List of mount points for the container.") @@ -1305,76 +1307,7 @@ async def docker_exapp_install_certificates(request: web.Request): ) if payload.install_frp_certs: - frp_cert_dir_on_harp = "/certs/frp" - frp_target_dir_in_container = "/certs/frp" - - frp_files_to_read = { - "ca.crt": os.path.join(frp_cert_dir_on_harp, "ca.crt"), - "client.crt": os.path.join(frp_cert_dir_on_harp, "client.crt"), - "client.key": os.path.join(frp_cert_dir_on_harp, "client.key"), - } - frp_certs_content = {} - all_frp_files_exist = True - for name, path_on_harp in frp_files_to_read.items(): - if os.path.exists(path_on_harp): - try: - with open(path_on_harp, encoding="utf-8") as f: - frp_certs_content[name] = f.read() - except Exception as e_read: - LOGGER.error("Failed to read FRP cert file '%s' from HaRP agent: %s", path_on_harp, e_read) - all_frp_files_exist = False - break - else: - LOGGER.warning( - "FRP certificate file '%s' not found on HaRP agent. Skipping FRP cert installation.", - path_on_harp, - ) - all_frp_files_exist = False - break - - if all_frp_files_exist and frp_certs_content: - LOGGER.info( - "Installing FRP certificates from HaRP agent into '%s' in container '%s'.", - frp_target_dir_in_container, - container_name, - ) - exit_code, raw_output = await _execute_command_in_container_simplified( - session, docker_engine_port, container_name, ["mkdir", "-p", frp_target_dir_in_container] - ) - if exit_code != 0: - LOGGER.error( - "Failed to create FRP cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", - frp_target_dir_in_container, - container_name, - exit_code, - raw_output, - ) - raise web.HTTPInternalServerError( - text=f"Failed to create FRP cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" - ) - - frp_files_for_tar = { - os.path.join(frp_target_dir_in_container.lstrip("/"), "ca.crt"): frp_certs_content["ca.crt"], - os.path.join(frp_target_dir_in_container.lstrip("/"), "client.crt"): frp_certs_content[ - "client.crt" - ], - os.path.join(frp_target_dir_in_container.lstrip("/"), "client.key"): frp_certs_content[ - "client.key" - ], - } - tar_bytes_frp = _create_tar_archive_in_memory(frp_files_for_tar) - await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes_frp) - LOGGER.info("FRP certificates installed successfully into '%s'.", frp_target_dir_in_container) - elif not all_frp_files_exist: - LOGGER.info( - "One or more FRP cert files missing, skipping FRP installation for container '%s'", - container_name, - ) - else: - LOGGER.warning( - "FRP cert content is empty. Skipping FRP installation for '%s'", - container_name, - ) + await _install_frp_certificates(session, docker_engine_port, container_name) else: LOGGER.info( "install_frp_certs is false. Skipping FRP cert installation for container '%s'.", container_name @@ -1412,6 +1345,77 @@ async def docker_exapp_install_certificates(request: web.Request): LOGGER.error("Error stopping container '%s' in finally block: %s", container_name, e_stop) +async def _install_frp_certificates( + session: aiohttp.ClientSession, docker_engine_port: int, container_name: str +) -> None: + frp_cert_dir_on_harp = "/certs/frp" + frp_target_dir_in_container = "/certs/frp" + + frp_files_to_read = { + "ca.crt": os.path.join(frp_cert_dir_on_harp, "ca.crt"), + "client.crt": os.path.join(frp_cert_dir_on_harp, "client.crt"), + "client.key": os.path.join(frp_cert_dir_on_harp, "client.key"), + } + frp_certs_content = {} + all_frp_files_exist = True + for name, path_on_harp in frp_files_to_read.items(): + if os.path.exists(path_on_harp): + try: + with open(path_on_harp, encoding="utf-8") as f: + frp_certs_content[name] = f.read() + except Exception as e_read: + LOGGER.error("Failed to read FRP cert file '%s' from HaRP agent: %s", path_on_harp, e_read) + all_frp_files_exist = False + break + else: + LOGGER.warning( + "FRP certificate file '%s' not found on HaRP agent. Skipping FRP cert installation.", + path_on_harp, + ) + all_frp_files_exist = False + break + + if all_frp_files_exist and frp_certs_content: + LOGGER.info( + "Installing FRP certificates from HaRP agent into '%s' in container '%s'.", + frp_target_dir_in_container, + container_name, + ) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, ["mkdir", "-p", frp_target_dir_in_container] + ) + if exit_code != 0: + LOGGER.error( + "Failed to create FRP cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", + frp_target_dir_in_container, + container_name, + exit_code, + raw_output, + ) + raise web.HTTPInternalServerError( + text=f"Failed to create FRP cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" + ) + + frp_files_for_tar = { + os.path.join(frp_target_dir_in_container.lstrip("/"), "ca.crt"): frp_certs_content["ca.crt"], + os.path.join(frp_target_dir_in_container.lstrip("/"), "client.crt"): frp_certs_content["client.crt"], + os.path.join(frp_target_dir_in_container.lstrip("/"), "client.key"): frp_certs_content["client.key"], + } + tar_bytes_frp = _create_tar_archive_in_memory(frp_files_for_tar) + await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes_frp) + LOGGER.info("FRP certificates installed successfully into '%s'.", frp_target_dir_in_container) + elif not all_frp_files_exist: + LOGGER.info( + "One or more FRP cert files missing, skipping FRP installation for container '%s'", + container_name, + ) + else: + LOGGER.warning( + "FRP cert content is empty. Skipping FRP installation for '%s'", + container_name, + ) + + def _create_tar_archive_in_memory(files_to_add: dict[str, str]) -> bytes: tar_stream = io.BytesIO() with tarfile.open(fileobj=tar_stream, mode="w") as tar: From 77fc4643886da5dddba6112a14ad73397c189aaa Mon Sep 17 00:00:00 2001 From: Oleksander Piskun Date: Wed, 28 May 2025 19:16:16 +0300 Subject: [PATCH 3/5] fix: CUDA deploy Signed-off-by: Oleksander Piskun --- haproxy_agent.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/haproxy_agent.py b/haproxy_agent.py index 67b6339..8e7f472 100644 --- a/haproxy_agent.py +++ b/haproxy_agent.py @@ -627,11 +627,13 @@ async def docker_exapp_create(request: web.Request): container_config["NetworkingConfig"] = {"EndpointsConfig": {payload.network_mode: {"Aliases": [payload.name]}}} if payload.compute_device == "cuda": - container_config["HostConfig"]["DeviceRequests"] = { - "Driver": "nvidia", - "Count": -1, - "Capabilities": [["compute", "utility"]], - } + container_config["HostConfig"]["DeviceRequests"] = [ + { + "Driver": "nvidia", + "Count": -1, + "Capabilities": [["compute", "utility"]], + } + ] elif payload.compute_device == "rocm": devices = [] for device in ("/dev/kfd", "/dev/dri"): From fc658f7c892558e4ffdb1759205203ee44b8a6b0 Mon Sep 17 00:00:00 2001 From: Oleksander Piskun Date: Wed, 28 May 2025 20:41:36 +0300 Subject: [PATCH 4/5] refactor: new _install_system_certificates sub-function Signed-off-by: Oleksander Piskun --- haproxy_agent.py | 136 +++++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 63 deletions(-) diff --git a/haproxy_agent.py b/haproxy_agent.py index 8e7f472..b1aadb8 100644 --- a/haproxy_agent.py +++ b/haproxy_agent.py @@ -1239,69 +1239,9 @@ async def docker_exapp_install_certificates(request: web.Request): LOGGER.info("OS Info for container '%s':\n%s", container_name, os_info_content.strip()) if payload.system_certs_bundle: - target_cert_dir = _get_target_cert_dir(os_info_content) - if target_cert_dir: - LOGGER.info("Target system cert directory for container '%s': %s", container_name, target_cert_dir) - exit_code, raw_output = await _execute_command_in_container_simplified( - session, docker_engine_port, container_name, ["mkdir", "-p", target_cert_dir] - ) - if exit_code != 0: - LOGGER.error( - "Failed to create cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", - target_cert_dir, - container_name, - exit_code, - raw_output, - ) - raise web.HTTPInternalServerError( - text=f"Failed to create cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" - ) - - certs_to_install = {} - parsed_certs = _parse_certs_from_bundle(payload.system_certs_bundle) - for i, cert_content in enumerate(parsed_certs): - cert_filename = f"custom_ca_cert_{i}.crt" - certs_to_install[os.path.join(target_cert_dir.lstrip("/"), cert_filename)] = cert_content - - if certs_to_install: - tar_bytes = _create_tar_archive_in_memory(certs_to_install) - await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes) - LOGGER.info( - "Installed %d system CA certificates into '%s' in container '%s'.", - len(parsed_certs), - target_cert_dir, - container_name, - ) - - update_cmd_list = _get_certificate_update_command(os_info_content) - if update_cmd_list: - LOGGER.info("Running certificate update command: %s", " ".join(update_cmd_list)) - exit_code, raw_output = await _execute_command_in_container_simplified( - session, docker_engine_port, container_name, update_cmd_list - ) - if exit_code != 0: - LOGGER.error( - "Certificate update command failed in container '%s'. Exit: %s, Raw Output: %s", - container_name, - exit_code, - raw_output, - ) - else: - LOGGER.info("Certificate update command successful. Raw Output: %s", raw_output.strip()) - else: - LOGGER.warning( - "No certificate update command found for OS in container '%s'.", container_name - ) - else: - LOGGER.info( - "No individual certificates parsed from system_certs_bundle for container '%s'.", - container_name, - ) - else: - LOGGER.warning( - "OS in container '%s' not supported for sys cert installation, or bundle empty. Skipping.", - container_name, - ) + await _install_system_certificates( + session, docker_engine_port, container_name, payload.system_certs_bundle, os_info_content + ) else: LOGGER.info( "No system_certs_bundle provided for container '%s'. Skipping system cert installation.", @@ -1347,6 +1287,76 @@ async def docker_exapp_install_certificates(request: web.Request): LOGGER.error("Error stopping container '%s' in finally block: %s", container_name, e_stop) +async def _install_system_certificates( + session: aiohttp.ClientSession, + docker_engine_port: int, + container_name: str, + system_certs_bundle: str, + os_info_content: str, +) -> None: + target_cert_dir = _get_target_cert_dir(os_info_content) + if target_cert_dir: + LOGGER.info("Target system cert directory for container '%s': %s", container_name, target_cert_dir) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, ["mkdir", "-p", target_cert_dir] + ) + if exit_code != 0: + LOGGER.error( + "Failed to create cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", + target_cert_dir, + container_name, + exit_code, + raw_output, + ) + raise web.HTTPInternalServerError( + text=f"Failed to create cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" + ) + + certs_to_install = {} + parsed_certs = _parse_certs_from_bundle(system_certs_bundle) + for i, cert_content in enumerate(parsed_certs): + cert_filename = f"custom_ca_cert_{i}.crt" + certs_to_install[os.path.join(target_cert_dir.lstrip("/"), cert_filename)] = cert_content + + if certs_to_install: + tar_bytes = _create_tar_archive_in_memory(certs_to_install) + await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes) + LOGGER.info( + "Installed %d system CA certificates into '%s' in container '%s'.", + len(parsed_certs), + target_cert_dir, + container_name, + ) + + update_cmd_list = _get_certificate_update_command(os_info_content) + if update_cmd_list: + LOGGER.info("Running certificate update command: %s", " ".join(update_cmd_list)) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, update_cmd_list + ) + if exit_code != 0: + LOGGER.error( + "Certificate update command failed in container '%s'. Exit: %s, Raw Output: %s", + container_name, + exit_code, + raw_output, + ) + else: + LOGGER.info("Certificate update command successful. Raw Output: %s", raw_output.strip()) + else: + LOGGER.warning("No certificate update command found for OS in container '%s'.", container_name) + else: + LOGGER.info( + "No individual certificates parsed from system_certs_bundle for container '%s'.", + container_name, + ) + else: + LOGGER.warning( + "OS in container '%s' not supported for sys cert installation, or bundle empty. Skipping.", + container_name, + ) + + async def _install_frp_certificates( session: aiohttp.ClientSession, docker_engine_port: int, container_name: str ) -> None: From 49e23806ad33260ae0cebba4e60dc19b4aea3124 Mon Sep 17 00:00:00 2001 From: Oleksander Piskun Date: Thu, 29 May 2025 08:40:58 +0300 Subject: [PATCH 5/5] final small refactor Signed-off-by: Oleksander Piskun --- haproxy_agent.py | 102 ++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/haproxy_agent.py b/haproxy_agent.py index b1aadb8..8fb93ce 100644 --- a/haproxy_agent.py +++ b/haproxy_agent.py @@ -1295,66 +1295,68 @@ async def _install_system_certificates( os_info_content: str, ) -> None: target_cert_dir = _get_target_cert_dir(os_info_content) - if target_cert_dir: - LOGGER.info("Target system cert directory for container '%s': %s", container_name, target_cert_dir) + if not target_cert_dir: + LOGGER.warning( + "OS in container '%s' not supported for sys cert installation, or bundle empty. Skipping.", + container_name, + ) + return + + LOGGER.info("Target system cert directory for container '%s': %s", container_name, target_cert_dir) + exit_code, raw_output = await _execute_command_in_container_simplified( + session, docker_engine_port, container_name, ["mkdir", "-p", target_cert_dir] + ) + if exit_code != 0: + LOGGER.error( + "Failed to create cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", + target_cert_dir, + container_name, + exit_code, + raw_output, + ) + raise web.HTTPInternalServerError( + text=f"Failed to create cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" + ) + + certs_to_install = {} + parsed_certs = _parse_certs_from_bundle(system_certs_bundle) + for i, cert_content in enumerate(parsed_certs): + cert_filename = f"custom_ca_cert_{i}.crt" + certs_to_install[os.path.join(target_cert_dir.lstrip("/"), cert_filename)] = cert_content + + if not certs_to_install: + LOGGER.info( + "No individual certificates parsed from system_certs_bundle for container '%s'.", + container_name, + ) + return + + tar_bytes = _create_tar_archive_in_memory(certs_to_install) + await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes) + LOGGER.info( + "Installed %d system CA certificates into '%s' in container '%s'.", + len(parsed_certs), + target_cert_dir, + container_name, + ) + + update_cmd_list = _get_certificate_update_command(os_info_content) + if update_cmd_list: + LOGGER.info("Running certificate update command: %s", " ".join(update_cmd_list)) exit_code, raw_output = await _execute_command_in_container_simplified( - session, docker_engine_port, container_name, ["mkdir", "-p", target_cert_dir] + session, docker_engine_port, container_name, update_cmd_list ) if exit_code != 0: LOGGER.error( - "Failed to create cert dir '%s' in container '%s'. Exit: %s, Raw Output: %s", - target_cert_dir, + "Certificate update command failed in container '%s'. Exit: %s, Raw Output: %s", container_name, exit_code, raw_output, ) - raise web.HTTPInternalServerError( - text=f"Failed to create cert directory. Exit: {exit_code}. Output: {raw_output[:200]}" - ) - - certs_to_install = {} - parsed_certs = _parse_certs_from_bundle(system_certs_bundle) - for i, cert_content in enumerate(parsed_certs): - cert_filename = f"custom_ca_cert_{i}.crt" - certs_to_install[os.path.join(target_cert_dir.lstrip("/"), cert_filename)] = cert_content - - if certs_to_install: - tar_bytes = _create_tar_archive_in_memory(certs_to_install) - await _put_archive_to_container(session, docker_engine_port, container_name, "/", tar_bytes) - LOGGER.info( - "Installed %d system CA certificates into '%s' in container '%s'.", - len(parsed_certs), - target_cert_dir, - container_name, - ) - - update_cmd_list = _get_certificate_update_command(os_info_content) - if update_cmd_list: - LOGGER.info("Running certificate update command: %s", " ".join(update_cmd_list)) - exit_code, raw_output = await _execute_command_in_container_simplified( - session, docker_engine_port, container_name, update_cmd_list - ) - if exit_code != 0: - LOGGER.error( - "Certificate update command failed in container '%s'. Exit: %s, Raw Output: %s", - container_name, - exit_code, - raw_output, - ) - else: - LOGGER.info("Certificate update command successful. Raw Output: %s", raw_output.strip()) - else: - LOGGER.warning("No certificate update command found for OS in container '%s'.", container_name) else: - LOGGER.info( - "No individual certificates parsed from system_certs_bundle for container '%s'.", - container_name, - ) + LOGGER.info("Certificate update command successful. Raw Output: %s", raw_output.strip()) else: - LOGGER.warning( - "OS in container '%s' not supported for sys cert installation, or bundle empty. Skipping.", - container_name, - ) + LOGGER.warning("No certificate update command found for OS in container '%s'.", container_name) async def _install_frp_certificates(