diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 10b67253fe..2b4a04907e 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -69,6 +69,11 @@ ) from astrbot.core.utils.file_extract import extract_file_moonshotai from astrbot.core.utils.llm_metadata import LLM_METADATAS +from astrbot.core.utils.media_utils import ( + IMAGE_COMPRESS_DEFAULT_MAX_SIZE, + IMAGE_COMPRESS_DEFAULT_QUALITY, + compress_image, +) from astrbot.core.utils.quoted_message.settings import ( SETTINGS as DEFAULT_QUOTED_MESSAGE_SETTINGS, ) @@ -473,16 +478,23 @@ async def _request_img_caption( async def _ensure_img_caption( + event: AstrMessageEvent, req: ProviderRequest, cfg: dict, plugin_context: Context, image_caption_provider: str, ) -> None: try: + compressed_urls = [] + for url in req.image_urls: + compressed_url = await _compress_image_for_provider(url, cfg) + compressed_urls.append(compressed_url) + if _is_generated_compressed_image_path(url, compressed_url): + event.track_temporary_local_file(compressed_url) caption = await _request_img_caption( image_caption_provider, cfg, - req.image_urls, + compressed_urls, plugin_context, ) if caption: @@ -492,6 +504,9 @@ async def _ensure_img_caption( req.image_urls = [] except Exception as exc: # noqa: BLE001 logger.error("处理图片描述失败: %s", exc) + req.extra_user_content_parts.append(TextPart(text="[Image Captioning Failed]")) + finally: + req.image_urls = [] def _append_quoted_image_attachment(req: ProviderRequest, image_path: str) -> None: @@ -511,12 +526,64 @@ def _get_quoted_message_parser_settings( return DEFAULT_QUOTED_MESSAGE_SETTINGS.with_overrides(overrides) +def _get_image_compress_args( + provider_settings: dict[str, object] | None, +) -> tuple[bool, int, int]: + if not isinstance(provider_settings, dict): + return True, IMAGE_COMPRESS_DEFAULT_MAX_SIZE, IMAGE_COMPRESS_DEFAULT_QUALITY + + enabled = provider_settings.get("image_compress_enabled", True) + if not isinstance(enabled, bool): + enabled = True + + raw_options = provider_settings.get("image_compress_options", {}) + options = raw_options if isinstance(raw_options, dict) else {} + + max_size = options.get("max_size", IMAGE_COMPRESS_DEFAULT_MAX_SIZE) + if not isinstance(max_size, int): + max_size = IMAGE_COMPRESS_DEFAULT_MAX_SIZE + max_size = max(max_size, 1) + + quality = options.get("quality", IMAGE_COMPRESS_DEFAULT_QUALITY) + if not isinstance(quality, int): + quality = IMAGE_COMPRESS_DEFAULT_QUALITY + quality = min(max(quality, 1), 100) + + return enabled, max_size, quality + + +async def _compress_image_for_provider( + url_or_path: str, + provider_settings: dict[str, object] | None, +) -> str: + try: + enabled, max_size, quality = _get_image_compress_args(provider_settings) + if not enabled: + return url_or_path + return await compress_image(url_or_path, max_size=max_size, quality=quality) + except Exception as exc: # noqa: BLE001 + logger.error("Image compression failed: %s", exc) + return url_or_path + + +def _is_generated_compressed_image_path( + original_path: str, + compressed_path: str | None, +) -> bool: + if not compressed_path or compressed_path == original_path: + return False + if compressed_path.startswith("http") or compressed_path.startswith("data:image"): + return False + return os.path.exists(compressed_path) + + async def _process_quote_message( event: AstrMessageEvent, req: ProviderRequest, img_cap_prov_id: str, plugin_context: Context, quoted_message_settings: QuotedMessageParserSettings = DEFAULT_QUOTED_MESSAGE_SETTINGS, + config: MainAgentBuildConfig | None = None, ) -> None: quote = None for comp in event.message_obj.message: @@ -549,15 +616,24 @@ async def _process_quote_message( if image_seg: try: prov = None + path = None + compress_path = None if img_cap_prov_id: prov = plugin_context.get_provider_by_id(img_cap_prov_id) if prov is None: prov = plugin_context.get_using_provider(event.unified_msg_origin) if prov and isinstance(prov, Provider): + path = await image_seg.convert_to_file_path() + compress_path = await _compress_image_for_provider( + path, + config.provider_settings if config else None, + ) + if path and _is_generated_compressed_image_path(path, compress_path): + event.track_temporary_local_file(compress_path) llm_resp = await prov.text_chat( prompt="Please describe the image content.", - image_urls=[await image_seg.convert_to_file_path()], + image_urls=[compress_path], ) if llm_resp.completion_text: content_parts.append( @@ -567,6 +643,16 @@ async def _process_quote_message( logger.warning("No provider found for image captioning in quote.") except BaseException as exc: logger.error("处理引用图片失败: %s", exc) + finally: + if ( + compress_path + and compress_path != path + and os.path.exists(compress_path) + ): + try: + os.remove(compress_path) + except Exception as exc: # noqa: BLE001 + logger.warning("Fail to remove temporary compressed image: %s", exc) quoted_content = "\n".join(content_parts) quoted_text = f"\n{quoted_content}\n" @@ -635,6 +721,7 @@ async def _decorate_llm_request( img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or "" if img_cap_prov_id and req.image_urls: await _ensure_img_caption( + event, req, cfg, plugin_context, @@ -649,6 +736,7 @@ async def _decorate_llm_request( img_cap_prov_id, plugin_context, quoted_message_settings, + config, ) tz = config.timezone @@ -1025,7 +1113,13 @@ async def build_main_agent( # media files attachments for comp in event.message_obj.message: if isinstance(comp, Image): - image_path = await comp.convert_to_file_path() + path = await comp.convert_to_file_path() + image_path = await _compress_image_for_provider( + path, + config.provider_settings, + ) + if _is_generated_compressed_image_path(path, image_path): + event.track_temporary_local_file(image_path) req.image_urls.append(image_path) req.extra_user_content_parts.append( TextPart(text=f"[Image Attachment: path {image_path}]") @@ -1052,7 +1146,13 @@ async def build_main_agent( for reply_comp in comp.chain: if isinstance(reply_comp, Image): has_embedded_image = True - image_path = await reply_comp.convert_to_file_path() + path = await reply_comp.convert_to_file_path() + image_path = await _compress_image_for_provider( + path, + config.provider_settings, + ) + if _is_generated_compressed_image_path(path, image_path): + event.track_temporary_local_file(image_path) req.image_urls.append(image_path) _append_quoted_image_attachment(req, image_path) elif isinstance(reply_comp, File): diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index feb5c0c769..cca5fcffe3 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -174,6 +174,11 @@ "shipyard_neo_profile": "python-default", "shipyard_neo_ttl": 3600, }, + "image_compress_enabled": True, + "image_compress_options": { + "max_size": 1280, + "quality": 95, + }, }, # SubAgent orchestrator mode: # - main_enable = False: disabled; main LLM mounts tools normally (persona selection). @@ -3452,6 +3457,29 @@ class ChatProviderTemplate(TypedDict): "type": "string", "hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。", }, + "provider_settings.image_compress_enabled": { + "description": "启用图片压缩", + "type": "bool", + "hint": "启用后,发送给多模态模型前会先压缩本地大图片。", + }, + "provider_settings.image_compress_options.max_size": { + "description": "最大边长", + "type": "int", + "hint": "压缩后图片的最长边,单位为像素。超过该尺寸时会按比例缩放。", + "condition": { + "provider_settings.image_compress_enabled": True, + }, + "slider": {"min": 256, "max": 4096, "step": 64}, + }, + "provider_settings.image_compress_options.quality": { + "description": "压缩质量", + "type": "int", + "hint": "JPEG 输出质量,范围为 1-100。值越高,画质越好,文件也越大。", + "condition": { + "provider_settings.image_compress_enabled": True, + }, + "slider": {"min": 1, "max": 100, "step": 1}, + }, "provider_tts_settings.dual_output": { "description": "开启 TTS 时同时输出语音和文字内容", "type": "bool", diff --git a/astrbot/core/pipeline/scheduler.py b/astrbot/core/pipeline/scheduler.py index ca69b8475a..243d03378c 100644 --- a/astrbot/core/pipeline/scheduler.py +++ b/astrbot/core/pipeline/scheduler.py @@ -92,4 +92,5 @@ async def execute(self, event: AstrMessageEvent) -> None: logger.debug("pipeline 执行完毕。") finally: + event.cleanup_temporary_local_files() active_event_registry.unregister(event) diff --git a/astrbot/core/platform/astr_message_event.py b/astrbot/core/platform/astr_message_event.py index 021a4bff7c..82c03dbb0d 100644 --- a/astrbot/core/platform/astr_message_event.py +++ b/astrbot/core/platform/astr_message_event.py @@ -1,6 +1,7 @@ import abc import asyncio import hashlib +import os import re import uuid from collections.abc import AsyncGenerator @@ -88,6 +89,8 @@ def __init__( """在此次事件中是否有过至少一次发送消息的操作""" self.call_llm = False """是否在此消息事件中禁止默认的 LLM 请求""" + self._temporary_local_files: list[str] = [] + """Temporary local files created during this event and safe to delete when it finishes.""" self.plugins_name: list[str] | None = None """该事件启用的插件名称列表。None 表示所有插件都启用。空列表表示没有启用任何插件。""" @@ -228,6 +231,24 @@ def clear_extra(self) -> None: logger.info(f"清除 {self.get_platform_name()} 的额外信息: {self._extras}") self._extras.clear() + def track_temporary_local_file(self, path: str) -> None: + if path and path not in self._temporary_local_files: + self._temporary_local_files.append(path) + + def cleanup_temporary_local_files(self) -> None: + paths = list(self._temporary_local_files) + self._temporary_local_files.clear() + for path in paths: + try: + if os.path.exists(path): + os.remove(path) + except OSError as e: + logger.warning( + "Failed to remove temporary local file %s: %s", + path, + e, + ) + def is_private_chat(self) -> bool: """是否是私聊。""" return self.get_message_type() == MessageType.FRIEND_MESSAGE diff --git a/astrbot/core/utils/media_utils.py b/astrbot/core/utils/media_utils.py index 8d833514fb..d3f3cc75d3 100644 --- a/astrbot/core/utils/media_utils.py +++ b/astrbot/core/utils/media_utils.py @@ -4,14 +4,23 @@ """ import asyncio +import base64 +import io import os import subprocess import uuid from pathlib import Path +from PIL import Image as PILImage + from astrbot import logger from astrbot.core.utils.astrbot_path import get_astrbot_temp_path +IMAGE_COMPRESS_DEFAULT_MAX_SIZE = 1280 +IMAGE_COMPRESS_DEFAULT_QUALITY = 95 +IMAGE_COMPRESS_DEFAULT_OPTIMIZE = True +IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB = 1.0 + async def get_media_duration(file_path: str) -> int | None: """使用ffprobe获取媒体文件时长 @@ -316,3 +325,88 @@ async def extract_video_cover( return output_path except FileNotFoundError: raise Exception("ffmpeg not found") + + +def _compress_image_sync( + data: bytes, + temp_dir: Path, + max_size: int, + quality: int, + optimize: bool, +) -> str: + """Run image compression synchronously via ``asyncio.to_thread``.""" + with PILImage.open(io.BytesIO(data)) as opened_img: + img = opened_img + converted_img: PILImage.Image | None = None + + try: + if img.mode != "RGB": + converted_img = img.convert("RGB") + img = converted_img + + if max(img.size) > max_size: + img.thumbnail((max_size, max_size), PILImage.Resampling.LANCZOS) + + new_uuid = uuid.uuid4().hex + save_path = temp_dir / f"compressed_{new_uuid}.jpg" + img.save(save_path, "JPEG", quality=quality, optimize=optimize) + logger.debug(f"Image compressed successfully: {save_path}") + return str(save_path) + finally: + if converted_img is not None: + converted_img.close() + + +async def compress_image( + url_or_path: str, + max_size: int = IMAGE_COMPRESS_DEFAULT_MAX_SIZE, + quality: int = IMAGE_COMPRESS_DEFAULT_QUALITY, +) -> str: + """Compress large user-uploaded images. + + Args: + url_or_path: Image path or URL. + max_size: Longest edge of the compressed image in pixels. + quality: JPEG output quality in the range 1-100. + + Returns: + The compressed image path. Returns the original path if compression + fails or the source does not need compression. + """ + max_size = max(int(max_size), 1) + quality = min(max(int(quality), 1), 100) + optimize = IMAGE_COMPRESS_DEFAULT_OPTIMIZE + min_file_size_bytes = int(IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB * 1024 * 1024) + data = None + # Skip compression for remote images and return the original value. + if url_or_path.startswith("http"): + return url_or_path + elif url_or_path.startswith("data:image"): + _header, encoded = url_or_path.split(",", 1) + data = base64.b64decode(encoded) + if len(data) < min_file_size_bytes: + return url_or_path + else: + local_path = Path(url_or_path) + if not local_path.exists(): + return url_or_path + if local_path.stat().st_size < min_file_size_bytes: + return url_or_path + with local_path.open("rb") as f: + data = f.read() + + if not data: + return url_or_path + + temp_dir = Path(get_astrbot_temp_path()) + temp_dir.mkdir(parents=True, exist_ok=True) + + # Offload the blocking image processing task to a thread. + return await asyncio.to_thread( + _compress_image_sync, + data, + temp_dir, + max_size, + quality, + optimize, + ) diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json index 3b9ff3df8a..5beba764d2 100644 --- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json +++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json @@ -335,6 +335,22 @@ "description": "User Prompt", "hint": "You can use {{prompt}} as a placeholder for user input. If no placeholder is provided, it will be added before the user input." }, + "image_compress_enabled": { + "description": "Enable image compression", + "hint": "When enabled, large local images are compressed before being sent to multimodal models." + }, + "image_compress_options": { + "description": "Image compression settings", + "hint": "Control image resize limits, JPEG quality, and the minimum size threshold for compression.", + "max_size": { + "description": "Maximum edge length", + "hint": "Longest edge of the compressed image in pixels. Images larger than this are resized proportionally." + }, + "quality": { + "description": "JPEG quality", + "hint": "JPEG output quality from 1 to 100. Higher values preserve more detail but produce larger files." + } + }, "reachability_check": { "description": "Provider Reachability Check", "hint": "When running the /provider command, test provider connectivity in parallel. This actively pings models and may consume extra tokens." diff --git a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json index 4cde9b932a..3e96c8f256 100644 --- a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json +++ b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json @@ -335,6 +335,22 @@ "description": "Промпт пользователя", "hint": "Вы можете использовать {{prompt}} как заполнитель для ввода. Если заполнитель не указан, он будет добавлен перед текстом пользователя." }, + "image_compress_enabled": { + "description": "Включить сжатие изображений", + "hint": "Когда включено, большие локальные изображения сжимаются перед отправкой в мультимодальные модели." + }, + "image_compress_options": { + "description": "Настройки сжатия изображений", + "hint": "Управляет ограничением размера, качеством JPEG и минимальным порогом размера для сжатия.", + "max_size": { + "description": "Максимальная длина стороны", + "hint": "Максимальная длина стороны сжатого изображения в пикселях. Более крупные изображения пропорционально уменьшаются." + }, + "quality": { + "description": "Качество JPEG", + "hint": "Качество JPEG от 1 до 100. Более высокие значения сохраняют больше деталей, но увеличивают размер файла." + } + }, "reachability_check": { "description": "Проверка доступности провайдеров", "hint": "При выполнении команды /provider проверяет связь со всеми моделями. Это может расходовать токены." diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json index 2c3cbfd16b..9cbbd38de6 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json +++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json @@ -337,6 +337,22 @@ "description": "用户提示词", "hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。" }, + "image_compress_enabled": { + "description": "启用图片压缩", + "hint": "启用后,发送给多模态模型前会先压缩本地大图片。" + }, + "image_compress_options": { + "description": "图片压缩配置", + "hint": "用于控制图片压缩的尺寸、质量和触发阈值。", + "max_size": { + "description": "最大边长", + "hint": "压缩后图片的最长边,单位为像素。超过该尺寸时会按比例缩放。" + }, + "quality": { + "description": "JPEG 质量", + "hint": "JPEG 输出质量,范围为 1-100。值越高,画质越好,文件也越大。" + } + }, "reachability_check": { "description": "提供商可达性检测", "hint": "/provider 命令列出模型时并发检测连通性。开启后会主动调用模型测试连通性,可能产生额外 token 消耗。"