PrunaAI · sdiazlor · Sep 17, 2025 · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025
diff --git a/docs/tutorials/image_generation.ipynb b/docs/tutorials/image_generation.ipynb
@@ -324,11 +324,11 @@
     {
      "data": {
       "text/markdown": [
-       "| Metric | Base Model | Compressed Model | Relative Difference |\n",
+       "| Metric | Base Model | Compressed Model | Improvement % |\n",
        "|--------|----------|-----------|------------|\n",
        "| clip_score | 29.1295  | 29.0439  | -0.29% |\n",
-       "| latency | 3723.4453 ms/num_iterations | 1874.1925 ms/num_iterations | -49.67% |\n",
-       "| throughput | 0.0003 num_iterations/ms | 0.0005 num_iterations/ms | +98.67% |\n"
+       "| latency | 3723.4453 ms/num_iterations | 1874.1925 ms/num_iterations | 49.67% |\n",
+       "| throughput | 0.0003 num_iterations/ms | 0.0005 num_iterations/ms | 98.67% |\n"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -342,36 +342,25 @@
     "from IPython.display import Markdown, display  # noqa\n",
     "\n",
     "\n",
-    "# Calculate percentage differences for each metric\n",
-    "def calculate_percentage_diff(original, optimized):  # noqa\n",
-    "    return ((optimized - original) / original) * 100\n",
-    "\n",
-    "\n",
-    "# Calculate differences and prepare table data\n",
-    "table_data = []\n",
-    "for base_metric_result in base_model_results:\n",
-    "    for smashed_metric_result in smashed_model_results:\n",
-    "        if base_metric_result.name == smashed_metric_result.name:\n",
-    "            diff = calculate_percentage_diff(base_metric_result.result, smashed_metric_result.result)\n",
-    "            table_data.append(\n",
-    "                {\n",
-    "                    \"Metric\": base_metric_result.name,\n",
-    "                    \"Base Model\": f\"{base_metric_result.result:.4f}\",\n",
-    "                    \"Compressed Model\": f\"{smashed_metric_result.result:.4f}\",\n",
-    "                    \"Relative Difference\": f\"{diff:+.2f}%\",\n",
-    "                }\n",
-    "            )\n",
-    "            break\n",
-    "\n",
-    "# Create and display markdown table manually\n",
-    "markdown_table = \"| Metric | Base Model | Compressed Model | Relative Difference |\\n\"\n",
-    "markdown_table += \"|--------|----------|-----------|------------|\\n\"\n",
-    "for row in table_data:\n",
-    "    metric = [m for m in metrics if m.metric_name == row[\"Metric\"]][0]\n",
-    "    unit = metric.metric_units if hasattr(metric, \"metric_units\") else \"\"\n",
-    "    markdown_table += f\"| {row['Metric']} | {row['Base Model']} {unit} | {row['Compressed Model']} {unit} | {row['Relative Difference']} |\\n\"  # noqa: E501\n",
-    "\n",
-    "display(Markdown(markdown_table))"
+    "def make_comparison_table(base_model_results, smashed_model_results):  # noqa\n",
+    "    header = \"| Metric | Base Model | Smashed Model | Improvement % |\\n\"\n",
+    "    header += \"|\" + \"-----|\" * 4 + \"\\n\"\n",
+    "    rows = []\n",
+    "\n",
+    "    for base, smashed in zip(base_model_results, smashed_model_results):\n",
+    "        base_result = base.result\n",
+    "        smashed_result = smashed.result\n",
+    "        if base.higher_is_better:\n",
+    "            diff = ((smashed_result - base_result) / base_result) * 100\n",
+    "        else:\n",
+    "            diff = ((base_result - smashed_result) / base_result) * 100\n",
+    "        row = f\"| {base.name} | {base_result:.4f} {base.metric_units or ''}\"\n",
+    "        row += f\"| {smashed_result:.4f} {smashed.metric_units or ''} | {diff:.2f}% |\"\n",
+    "        rows.append(row)\n",
+    "    return header + \"\\n\".join(rows)\n",
+    "\n",
+    "\n",
+    "display(Markdown(make_comparison_table(base_model_results, smashed_model_results)))"
    ]
   },
   {

diff --git a/docs/tutorials/llms.ipynb b/docs/tutorials/llms.ipynb
@@ -71,13 +71,7 @@
    "source": [
     "import torch\n",
     "\n",
-    "device = (\n",
-    "    \"cuda\"\n",
-    "    if torch.cuda.is_available()\n",
-    "    else \"mps\"\n",
-    "    if torch.backends.mps.is_available()\n",
-    "    else \"cpu\"\n",
-    ")"
+    "device = \"cuda\" if torch.cuda.is_available() else \"mps\" if torch.backends.mps.is_available() else \"cpu\""
    ]
   },
   {
@@ -353,10 +347,10 @@
       "text/markdown": [
        "| Metric | Base Model | Compressed Model | Relative Difference |\n",
        "|--------|----------|-----------|------------|\n",
-       "| perplexity | 41.8264 | 46.9769 | +12.31% |\n",
-       "| energy_consumed | 0.0050 kWh | 0.0012 kWh | -76.72% |\n",
-       "| throughput | 0.0009 num_iterations/ms | 0.0050 num_iterations/ms | +427.17% |\n",
-       "| total_time | 53180.1827 ms | 10087.8858 ms | -81.03% |\n"
+       "| perplexity | 41.8264 | 46.9769 | -12.31% |\n",
+       "| energy_consumed | 0.0050 kWh | 0.0012 kWh | 76.72% |\n",
+       "| throughput | 0.0009 num_iterations/ms | 0.0050 num_iterations/ms | 427.17% |\n",
+       "| total_time | 53180.1827 ms | 10087.8858 ms | 81.03% |\n"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -370,39 +364,25 @@
     "from IPython.display import Markdown, display  # noqa\n",
     "\n",
     "\n",
-    "# Calculate percentage differences for each metric\n",
-    "def calculate_percentage_diff(original, optimized):  # noqa\n",
-    "    return ((optimized - original) / original) * 100\n",
-    "\n",
-    "\n",
-    "# Calculate differences and prepare table data\n",
-    "table_data = []\n",
-    "for base_metric_result, smashed_metric_result in zip(\n",
-    "    base_model_results, smashed_model_results\n",
-    "):\n",
-    "    diff = calculate_percentage_diff(\n",
-    "        base_metric_result.result, smashed_metric_result.result\n",
-    "    )\n",
-    "    table_data.append(\n",
-    "        {\n",
-    "            \"Metric\": base_metric_result.name,\n",
-    "            \"Base Model\": f\"{base_metric_result.result:.4f}\",\n",
-    "            \"Compressed Model\": f\"{smashed_metric_result.result:.4f}\",\n",
-    "            \"Relative Difference\": f\"{diff:+.2f}%\",\n",
-    "        }\n",
-    "    )\n",
-    "\n",
-    "# Create and display markdown table manually\n",
-    "markdown_table = \"| Metric | Base Model | Compressed Model | Relative Difference |\\n\"\n",
-    "markdown_table += \"|--------|----------|-----------|------------|\\n\"\n",
-    "for row in table_data:\n",
-    "    metric_obj = [metric for metric in metrics if metric.metric_name == row[\"Metric\"]][\n",
-    "        0\n",
-    "    ]\n",
-    "    unit = f\" {metric_obj.metric_units}\" if hasattr(metric_obj, \"metric_units\") else \"\"\n",
-    "    markdown_table += f\"| {row['Metric']} | {row['Base Model']} {unit} | {row['Compressed Model']} {unit} | {row['Relative Difference']} |\\n\"  # noqa: E501\n",
-    "\n",
-    "display(Markdown(markdown_table))"
+    "def make_comparison_table(base_model_results, smashed_model_results):  # noqa\n",
+    "    header = \"| Metric | Base Model | Smashed Model | Improvement % |\\n\"\n",
+    "    header += \"|\" + \"-----|\" * 4 + \"\\n\"\n",
+    "    rows = []\n",
+    "\n",
+    "    for base, smashed in zip(base_model_results, smashed_model_results):\n",
+    "        base_result = base.result\n",
+    "        smashed_result = smashed.result\n",
+    "        if base.higher_is_better:\n",
+    "            diff = ((smashed_result - base_result) / base_result) * 100\n",
+    "        else:\n",
+    "            diff = ((base_result - smashed_result) / base_result) * 100\n",
+    "        row = f\"| {base.name} | {base_result:.4f} {base.metric_units or ''}\"\n",
+    "        row += f\"| {smashed_result:.4f} {smashed.metric_units or ''} | {diff:.2f}% |\"\n",
+    "        rows.append(row)\n",
+    "    return header + \"\\n\".join(rows)\n",
+    "\n",
+    "\n",
+    "display(Markdown(make_comparison_table(base_model_results, smashed_model_results)))"
    ]
   },
   {

diff --git a/docs/tutorials/reasoning_llm.ipynb b/docs/tutorials/reasoning_llm.ipynb
@@ -386,12 +386,12 @@
     {
      "data": {
       "text/markdown": [
-       "| Metric | Base Model | Compressed Model | Relative Difference |\n",
-       "|--------|----------|-----------|------------|\n",
-       "| perplexity | 3.3330  | 2.8230  | -15.30% |\n",
-       "| total_time | 42390.9036  ms | 6869.6069  ms | -83.79% |\n",
-       "| throughput | 0.0189  num_iterations/ms | 0.1165  num_iterations/ms | +517.08% |\n",
-       "| energy_consumed | 0.0059  kWh | 0.0011  kWh | -81.92% |\n"
+       "| Metric | Base Model | Smashed Model | Improvement % |\n",
+       "|-----|-----|-----|-----|\n",
+       "| perplexity | 3.3330  | 2.8230  | 15.30% |\n",
+       "| total_time | 42390.9036 ms | 6869.6069 ms | 83.79% |\n",
+       "| throughput | 0.0189 num_iterations/ms | 0.1165 num_iterations/ms | 517.08% |\n",
+       "| energy_consumed | 0.0059 kWh | 0.0011 kWh | 81.92% |"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -405,33 +405,25 @@
     "from IPython.display import Markdown, display  # noqa\n",
     "\n",
     "\n",
-    "# Calculate percentage differences for each metric\n",
-    "def calculate_percentage_diff(original, optimized):  # noqa\n",
-    "    return ((optimized - original) / original) * 100\n",
-    "\n",
-    "\n",
-    "# Calculate differences and prepare table data\n",
-    "table_data = []\n",
-    "for base_metric_result, smashed_metric_result in zip(base_model_results, smashed_model_results):\n",
-    "    diff = calculate_percentage_diff(base_metric_result.result, smashed_metric_result.result)\n",
-    "    table_data.append(\n",
-    "        {\n",
-    "            \"Metric\": base_metric_result.name,\n",
-    "            \"Base Model\": f\"{base_metric_result.result:.4f}\",\n",
-    "            \"Compressed Model\": f\"{smashed_metric_result.result:.4f}\",\n",
-    "            \"Relative Difference\": f\"{diff:+.2f}%\",\n",
-    "        }\n",
-    "    )\n",
-    "\n",
-    "# Create and display markdown table manually\n",
-    "markdown_table = \"| Metric | Base Model | Compressed Model | Relative Difference |\\n\"\n",
-    "markdown_table += \"|--------|----------|-----------|------------|\\n\"\n",
-    "for row in table_data:\n",
-    "    metric_obj = [metric for metric in metrics if metric.metric_name == row[\"Metric\"]][0]\n",
-    "    unit = f\" {metric_obj.metric_units}\" if hasattr(metric_obj, \"metric_units\") else \"\"\n",
-    "    markdown_table += f\"| {row['Metric']} | {row['Base Model']} {unit} | {row['Compressed Model']} {unit} | {row['Relative Difference']} |\\n\"  # noqa: E501\n",
-    "\n",
-    "display(Markdown(markdown_table))"
+    "def make_comparison_table(base_model_results, smashed_model_results):  # noqa\n",
+    "    header = \"| Metric | Base Model | Smashed Model | Improvement % |\\n\"\n",
+    "    header += \"|\" + \"-----|\" * 4 + \"\\n\"\n",
+    "    rows = []\n",
+    "\n",
+    "    for base, smashed in zip(base_model_results, smashed_model_results):\n",
+    "        base_result = base.result\n",
+    "        smashed_result = smashed.result\n",
+    "        if base.higher_is_better:\n",
+    "            diff = ((smashed_result - base_result) / base_result) * 100\n",
+    "        else:\n",
+    "            diff = ((base_result - smashed_result) / base_result) * 100\n",
+    "        row = f\"| {base.name} | {base_result:.4f} {base.metric_units or ''}\"\n",
+    "        row += f\"| {smashed_result:.4f} {smashed.metric_units or ''} | {diff:.2f}% |\"\n",
+    "        rows.append(row)\n",
+    "    return header + \"\\n\".join(rows)\n",
+    "\n",
+    "\n",
+    "display(Markdown(make_comparison_table(base_model_results, smashed_model_results)))"
    ]
   },
   {

diff --git a/docs/tutorials/video_generation.ipynb b/docs/tutorials/video_generation.ipynb
@@ -66,13 +66,7 @@
    "source": [
     "import torch\n",
     "\n",
-    "device = (\n",
-    "    \"cuda\"\n",
-    "    if torch.cuda.is_available()\n",
-    "    else \"mps\"\n",
-    "    if torch.backends.mps.is_available()\n",
-    "    else \"cpu\"\n",
-    ")"
+    "device = \"cuda\" if torch.cuda.is_available() else \"mps\" if torch.backends.mps.is_available() else \"cpu\""
    ]
   },
   {
@@ -96,13 +90,9 @@
     "\n",
     "model_id = \"Wan-AI/Wan2.1-T2V-1.3B-Diffusers\"\n",
     "\n",
-    "vae = AutoencoderKLWan.from_pretrained(\n",
-    "    model_id, subfolder=\"vae\", torch_dtype=torch.float32\n",
-    ")\n",
+    "vae = AutoencoderKLWan.from_pretrained(model_id, subfolder=\"vae\", torch_dtype=torch.float32)\n",
     "\n",
-    "pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16).to(\n",
-    "    device\n",
-    ")"
+    "pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16).to(device)"
    ]
   },
   {
@@ -362,13 +352,13 @@
     {
      "data": {
       "text/markdown": [
-       "| Metric | Base Model | Compressed Model | Relative Difference |\n",
-       "|--------|----------|-----------|------------|\n",
-       "| total_time | 460992.1875000 ms | 265793.1718750 ms | -42.34% |\n",
-       "| latency | 153664.0625000 ms/num_iterations | 88597.7239583 ms/num_iterations | -42.34% |\n",
-       "| throughput | 0.0000065 num_iterations/ms | 0.0000113 num_iterations/ms | +73.44% |\n",
-       "| co2_emissions | 0.0031181 kgCO2e | 0.0018072 kgCO2e | -42.04% |\n",
-       "| energy_consumed | 0.0556424 kWh | 0.0322483 kWh | -42.04% |\n"
+       "| Metric | Base Model | Smashed Model | Improvement % |\n",
+       "|-----|-----|-----|-----|\n",
+       "| total_time | 460992.1875000 ms | 265793.1718750 ms | 42.34% |\n",
+       "| latency | 153664.0625000 ms/num_iterations | 88597.7239583 ms/num_iterations | 42.34% |\n",
+       "| throughput | 0.0000065 num_iterations/ms | 0.0000113 num_iterations/ms | 73.44% |\n",
+       "| co2_emissions | 0.0031181 kgCO2e | 0.0018072 kgCO2e | 42.04% |\n",
+       "| energy_consumed | 0.0556424 kWh | 0.0322483 kWh | 42.04% |"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -382,38 +372,25 @@
     "from IPython.display import Markdown, display  # noqa\n",
     "\n",
     "\n",
-    "# Calculate percentage differences for each metric\n",
-    "def calculate_percentage_diff(original, optimized):  # noqa\n",
-    "    return ((optimized - original) / original) * 100\n",
-    "\n",
-    "\n",
-    "# Calculate differences and prepare table data\n",
-    "table_data = []\n",
-    "for base_metric_result in base_model_results:\n",
-    "    for smashed_metric_result in smashed_model_results:\n",
-    "        if base_metric_result.name == smashed_metric_result.name:\n",
-    "            diff = calculate_percentage_diff(\n",
-    "                base_metric_result.result, smashed_metric_result.result\n",
-    "            )\n",
-    "            table_data.append(\n",
-    "                {\n",
-    "                    \"Metric\": base_metric_result.name,\n",
-    "                    \"Base Model\": f\"{base_metric_result.result:.7f}\",\n",
-    "                    \"Compressed Model\": f\"{smashed_metric_result.result:.7f}\",\n",
-    "                    \"Relative Difference\": f\"{diff:+.2f}%\",\n",
-    "                }\n",
-    "            )\n",
-    "            break\n",
-    "\n",
-    "# Create and display markdown table manually\n",
-    "markdown_table = \"| Metric | Base Model | Compressed Model | Relative Difference |\\n\"\n",
-    "markdown_table += \"|--------|----------|-----------|------------|\\n\"\n",
-    "for row in table_data:\n",
-    "    metric = [m for m in metrics if m.metric_name == row[\"Metric\"]][0]\n",
-    "    unit = metric.metric_units if hasattr(metric, \"metric_units\") else \"\"\n",
-    "    markdown_table += f\"| {row['Metric']} | {row['Base Model']} {unit} | {row['Compressed Model']} {unit} | {row['Relative Difference']} |\\n\"  # noqa: E501\n",
-    "\n",
-    "display(Markdown(markdown_table))"
+    "def make_comparison_table(base_model_results, smashed_model_results):  # noqa\n",
+    "    header = \"| Metric | Base Model | Smashed Model | Improvement % |\\n\"\n",
+    "    header += \"|\" + \"-----|\" * 4 + \"\\n\"\n",
+    "    rows = []\n",
+    "\n",
+    "    for base, smashed in zip(base_model_results, smashed_model_results):\n",
+    "        base_result = base.result\n",
+    "        smashed_result = smashed.result\n",
+    "        if base.higher_is_better:\n",
+    "            diff = ((smashed_result - base_result) / base_result) * 100\n",
+    "        else:\n",
+    "            diff = ((base_result - smashed_result) / base_result) * 100\n",
+    "        row = f\"| {base.name} | {base_result:.7f} {base.metric_units or ''}\"\n",
+    "        row += f\"| {smashed_result:.7f} {smashed.metric_units or ''} | {diff:.2f}% |\"\n",
+    "        rows.append(row)\n",
+    "    return header + \"\\n\".join(rows)\n",
+    "\n",
+    "\n",
+    "display(Markdown(make_comparison_table(base_model_results, smashed_model_results)))"
    ]
   },
   {

diff --git a/src/pruna/evaluation/metrics/result.py b/src/pruna/evaluation/metrics/result.py
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 
 @dataclass
@@ -30,11 +30,24 @@ class MetricResult:
         The parameters of the metric.
     result : float | int
         The result of the metric.
+    metric_units: Optional[str]
+        The units of the metric.
+    higher_is_better: Optional[bool]
+        Whether larger values mean better performance.
     """
 
     name: str
     params: Dict[str, Any]
     result: float | int
+    higher_is_better: Optional[bool] = None
+    metric_units: Optional[str] = None
+
+    def __post_init__(self):
+        """Checker that metric_units and higher_is_better are consistent with the result."""
+        if self.metric_units is None:
+            object.__setattr__(self, "metric_units", self.params.get("metric_units"))
+        if self.higher_is_better is None:
+            object.__setattr__(self, "higher_is_better", self.params.get("higher_is_better"))
 
     def __str__(self) -> str:
         """
@@ -45,7 +58,8 @@ def __str__(self) -> str:
         str
             A string representation of the MetricResult.
         """
-        return f"{self.name}: {self.result}"
+        units = f" {self.metric_units}" if self.metric_units else ""
+        return f"{self.name}: {self.result}{units}"
 
     @classmethod
     def from_results_dict(