diff --git a/modelopt/torch/_compress/compress.py b/modelopt/torch/_compress/compress.py index 265fd5eeb..455cf3f8e 100644 --- a/modelopt/torch/_compress/compress.py +++ b/modelopt/torch/_compress/compress.py @@ -28,8 +28,7 @@ from omegaconf import DictConfig from puzzle_tools.runtime import IRuntime -# TODO Move initialize_hydra_config_for_dir from tests to main -from tests.utils.test_utils import initialize_hydra_config_for_dir +from modelopt.torch._compress.hydra import initialize_hydra_config_for_dir def compress( diff --git a/modelopt/torch/_compress/decilm/converters/convert_llama3_to_decilm.py b/modelopt/torch/_compress/decilm/converters/convert_llama3_to_decilm.py index 4b65eeada..d17e7ef74 100644 --- a/modelopt/torch/_compress/decilm/converters/convert_llama3_to_decilm.py +++ b/modelopt/torch/_compress/decilm/converters/convert_llama3_to_decilm.py @@ -19,6 +19,7 @@ #!/usr/bin/env python3 from pathlib import Path +import torch from fire import Fire from puzzle_tools.checkpoint_utils import copy_tokenizer from puzzle_tools.checkpoint_utils_hf import copy_deci_lm_hf_code @@ -46,7 +47,7 @@ def convert_llama3_config_to_decilm_config(config: LlamaConfig) -> DeciLMConfig: dtype = getattr(config, "torch_dtype", None) # Convert torch.dtype to string if needed (for JSON serialization) - if dtype is not None and hasattr(dtype, "__module__") and "torch" in dtype.__module__: + if dtype is not None and isinstance(dtype, torch.dtype): dtype = str(dtype).replace("torch.", "") # Track which global values will be removed (moved to per-layer configs) diff --git a/modelopt/torch/_compress/hydra.py b/modelopt/torch/_compress/hydra.py new file mode 100644 index 000000000..8c36d309e --- /dev/null +++ b/modelopt/torch/_compress/hydra.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from hydra import compose, initialize, initialize_config_dir +from omegaconf import DictConfig, OmegaConf + +""" +Utilities for hydra config initialization. +""" + + +def initialize_hydra_config_for_dir( + config_dir: str, config_name: str, overrides: list[str] +) -> DictConfig: + """Initialize a hydra config from an absolute path for a config directory + + Args: + config_dir (str): + config_name (str): + overrides (List[str]): + + Returns: + DictConfig: + """ + + with initialize_config_dir(version_base=None, config_dir=config_dir): + args = compose(config_name, overrides) + args._set_flag("allow_objects", True) + OmegaConf.resolve(args) # resolve object attributes + OmegaConf.set_struct(args, False) + + return args + + +def initialize_hydra_config(config_path: str, config_name: str, overrides: list[str]) -> DictConfig: + with initialize(version_base=None, config_path=config_path): + args = compose(config_name, overrides) + args._set_flag("allow_objects", True) + OmegaConf.resolve(args) # resolve object attributes + OmegaConf.set_struct(args, False) + + return args diff --git a/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py b/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py new file mode 100644 index 000000000..9c4b5acad --- /dev/null +++ b/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py @@ -0,0 +1,167 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Compress NAS plugin for the Modelopt framework (based on Puzzle algorithm: https://arxiv.org/abs/2411.19146). +""" + +import datetime +from pathlib import Path + +import pruning_ckpts +import score_pruning_activations +import torch +from scripts.convert_llama3_to_decilm import convert_llama3_to_decilm +from torch import nn + +from modelopt.torch._compress.hydra import initialize_hydra_config_for_dir +from modelopt.torch._compress.runtime import NativeDdpRuntime +from modelopt.torch.nas.conversion import NASModeRegistry +from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField +from modelopt.torch.opt.mode import ( + ConvertEntrypoint, + ConvertReturnType, + MetadataDict, + ModeDescriptor, + RestoreEntrypoint, +) +from modelopt.torch.opt.searcher import BaseSearcher + + +class CompressModel(nn.Module): + pass # No model implementation is needed for the compress mode + + +class CompressConfig(ModeloptBaseConfig): + """Configuration for Compress NAS algorithm.""" + + # Input model path to compress in the HF format + input_model_path: str = ModeloptField( + default="", + title="", + description="", + ) + + # Hydra config directory containing the search space definition + hydra_config_dir: str = ModeloptField( + default="", + title="", + description="", + ) + + # Hydra config name containing the search space definition + hydra_config_name: str = ModeloptField( + default="", + title="", + description="", + ) + + # Directory to save the compressed model and intermediate results + puzzle_dir: str = ModeloptField( + default="", + title="", + description="", + ) + + # Dataset path to use for scoring in prunining and NAS search + dataset_path: str = ModeloptField( + default="", + title="", + description="", + ) + + +def convert_compress_model(model: nn.Module, config: CompressConfig) -> ConvertReturnType: + """1. Convert the model from HF format to DeciLM format. + 2. Score the pruning activations. + 3. Prune the model and save pruned checkpoints + + The output of this step will be used by mnt.search() to perform the NAS search. + """ + runtime = NativeDdpRuntime( + dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10) + ) + + # Load hydra config + hydra_cfg = initialize_hydra_config_for_dir( + config_dir=config.hydra_config_dir, + config_name=config.hydra_config_name, + overrides=[ + f"puzzle_dir={config.puzzle_dir}", + f"dataset_path={config.dataset_path}", + ], + ) + + # Convert Llama3 model to DeciLM model + hf_ckpt_teacher_dir = "ckpts/teacher" # TODO: make it configurable + convert_llama3_to_decilm( + input_dir=config.input_model_path, + output_dir=Path(config.puzzle_dir) / hf_ckpt_teacher_dir, + ) + + # Score_pruning_activations (distributed processing) + score_pruning_activations.launch_score_activations(hydra_cfg, runtime) + + # Prune the model and save pruned checkpoints + if runtime.global_rank == 0: + pruning_ckpts.launch_prune_ckpt(hydra_cfg) + runtime.wait_for_everyone() + + return model, {} + + +def restore_compress_model( + model: nn.Module, config: CompressConfig, metadata: MetadataDict +) -> nn.Module: + """Restore is not needed for the compress mode as we are not saving any model state""" + return model + + +@NASModeRegistry.register_mode +class CompressDescriptor(ModeDescriptor): + """Descriptor for the Compress mode.""" + + @property + def name(self) -> str: + """String identifier for this mode.""" + return "compress" + + @property + def config_class(self) -> type[ModeloptBaseConfig]: + """Configuration class for this mode.""" + return CompressConfig + + @property + def search_algorithm(self) -> type[BaseSearcher]: + """Return the associated searcher implementation.""" + raise NotImplementedError("Compress mode does not have a search algorithm yet.") + + @property + def convert(self) -> ConvertEntrypoint: + """Entrypoint to convert a model.""" + return convert_compress_model + + @property + def restore(self) -> RestoreEntrypoint: + """Entrypoint to restore a model.""" + return restore_compress_model + + @property + def export_mode(self) -> str | None: + """The mode that corresponds to the export mode. + For now, this will be a no-op as there is no modelopt's concept of search space defined + for the compress algorithm. + """ + return "export_nas" diff --git a/setup.py b/setup.py index cfadd5170..568131f48 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,11 @@ "setuptools-scm>=8", ], # Dependedencies for modelopt.torch._compress subpackage - "compress": ["fire"], + "compress": [ + "fire", + "hydra-core==1.3.2", + "omegaconf==2.3.0", + ], } # create "compound" optional dependencies diff --git a/tests/experimental/torch/_compress/compress_test_utils.py b/tests/experimental/torch/_compress/compress_test_utils.py new file mode 100644 index 000000000..21ca622da --- /dev/null +++ b/tests/experimental/torch/_compress/compress_test_utils.py @@ -0,0 +1,119 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +from pathlib import Path + +import torch +from datasets import Dataset, DatasetDict +from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerBase + + +def create_and_save_small_llama_model( + output_path: str, vocab_size: int, tokenizer: PreTrainedTokenizerBase +): + """ + Create and save a small Llama model for testing the conversion pipeline. + This mimics having a real Llama checkpoint that needs to be converted. + """ + os.makedirs(output_path, exist_ok=True) + + # Create a minimal Llama config (small for testing) + # Note: intermediate_size must be divisible by 256 per DeciLM config requirements + # Note: hidden_size must give head_dim >= 8 for Flash Attention 2 compatibility + llama_config = LlamaConfig( + vocab_size=vocab_size, + hidden_size=256, # 32 heads times 8 head_dim = 256 (matches bypass config expectations) + intermediate_size=512, # Must be divisible by 256 + num_hidden_layers=2, + num_attention_heads=32, # Matches original test + num_key_value_heads=8, # GQA: 32÷4=8 (matches original n_heads_in_group=4) + max_position_embeddings=512, + rms_norm_eps=1e-5, + rope_theta=10000.0, + attention_bias=False, + hidden_act="silu", + tie_word_embeddings=False, + ) + + # Create and save the Llama model + model = LlamaForCausalLM(llama_config) + model.to(dtype=torch.bfloat16).save_pretrained(output_path) + + # Save tokenizer + tokenizer.save_pretrained(output_path) + + # Save config + llama_config.save_pretrained(output_path) + + +def create_tokenizer(project_root_path: Path) -> PreTrainedTokenizerBase: + """ + Create a tokenizer for the Llama model. + """ + tokenizer_path = project_root_path / "tests/experimental/torch/_compress/resources/tokenizer" + tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + return tokenizer + + +def setup_puzzle_dir(puzzle_dir: str): + """ + Setup puzzle directory by removing existing directory and creating a new one. + """ + if Path(puzzle_dir).exists(): + shutil.rmtree(puzzle_dir) + Path(puzzle_dir).mkdir(parents=True, exist_ok=True) + + +def save_dummy_dataset(dataset_path: str): + """ + Save a dummy dataset for testing purposes. + """ + # dummy sample + sample = [ + {"role": "user", "content": "please cite Lorem Ipsum?"}, + { + "role": "assistant", + "content": ( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed in blandit ante. " + "Sed tempus erat urna, ac elementum nisl facilisis quis. Aliquam consectetur mollis massa, " + "in elementum sem venenatis posuere. Fusce lorem arcu, egestas vel massa sollicitudin, " + "dictum mollis purus. Proin in ullamcorper elit. Nam tellus nisi, volutpat a mattis vel, " + "pretium in purus. Nunc at lectus facilisis risus scelerisque rhoncus eu nec ex. " + "Maecenas semper, tellus non placerat vulputate, urna felis facilisis diam, " + "sit amet vestibulum erat sapien nec libero. Praesent non massa velit. Donec faucibus mi eros. " + "Nam turpis nulla, congue sit amet mi at, porttitor scelerisque elit. Nunc id sodales lorem, " + "nec tincidunt leo. Quisque a neque nec ligula porttitor auctor. " + "Nunc accumsan nunc ac tellus congue vehicula. Praesent tellus eros, luctus non gravida dapibus, " + "faucibus eu ex. Quisque bibendum leo pharetra, tristique est vitae, hendrerit nunc. " + "Duis nec congue dolor. Donec commodo ipsum non efficitur volutpat. " + "Nulla risus nulla, efficitur et urna at, imperdiet sodales lorem. " + "Suspendisse erat est, sollicitudin at nisl tincidunt, vehicula hendrerit lectus. " + "Nam quis nisi ullamcorper, rhoncus massa vel, tempus purus. " + "Duis pulvinar eros vel nulla pellentesque, at dapibus justo laoreet. " + "Praesent tortor orci, vulputate fermentum dapibus nec, feugiat vitae tortor. " + "Donec mollis convallis massa quis iaculis." + ), + }, + ] + + # Prepare train and val splits with sample repeated, 2500 samples are for + # 128 samples with block-size 8192 and LLama3 tokenizer + data = [{"conversation": sample}] * 2500 + + # For train-val splits + data_dict = DatasetDict({"train": Dataset.from_list(data), "valid": Dataset.from_list(data)}) + data_dict.save_to_disk(dataset_path) diff --git a/tests/experimental/torch/_compress/conftest.py b/tests/experimental/torch/_compress/conftest.py index 4dedf5363..cae1bfbca 100644 --- a/tests/experimental/torch/_compress/conftest.py +++ b/tests/experimental/torch/_compress/conftest.py @@ -13,108 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import shutil from pathlib import Path import pytest -import torch -from datasets import Dataset, DatasetDict -from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerBase @pytest.fixture def project_root_path(request: pytest.FixtureRequest) -> Path: """Fixture providing the project root path for tests.""" return Path(request.config.rootpath) - - -def create_and_save_small_llama_model( - output_path: str, vocab_size: int, tokenizer: PreTrainedTokenizerBase -): - """ - Create and save a small Llama model for testing the conversion pipeline. - This mimics having a real Llama checkpoint that needs to be converted. - """ - os.makedirs(output_path, exist_ok=True) - - # Create a minimal Llama config (small for testing) - # Note: intermediate_size must be divisible by 256 per DeciLM config requirements - # Note: hidden_size must give head_dim >= 8 for Flash Attention 2 compatibility - llama_config = LlamaConfig( - vocab_size=vocab_size, - hidden_size=256, # 32 heads times 8 head_dim = 256 (matches bypass config expectations) - intermediate_size=512, # Must be divisible by 256 - num_hidden_layers=2, - num_attention_heads=32, # Matches original test - num_key_value_heads=8, # GQA: 32÷4=8 (matches original n_heads_in_group=4) - max_position_embeddings=512, - rms_norm_eps=1e-5, - rope_theta=10000.0, - attention_bias=False, - hidden_act="silu", - tie_word_embeddings=False, - ) - - # Create and save the Llama model - model = LlamaForCausalLM(llama_config) - model.to(dtype=torch.bfloat16).save_pretrained(output_path) - - # Save tokenizer - tokenizer.save_pretrained(output_path) - - # Save config - llama_config.save_pretrained(output_path) - - -def create_tokenizer(project_root_path: Path) -> PreTrainedTokenizerBase: - """ - Create a tokenizer for the Llama model. - """ - tokenizer_path = project_root_path / "tests/experimental/torch/_compress/resources/tokenizer" - tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) - return tokenizer - - -def setup_puzzle_dir(puzzle_dir: str): - if Path(puzzle_dir).exists(): - shutil.rmtree(puzzle_dir) - Path(puzzle_dir).mkdir(parents=True, exist_ok=True) - - -def save_dummy_dataset(dataset_path: str): - # dummy sample - sample = [ - {"role": "user", "content": "please cite Lorem Ipsum?"}, - { - "role": "assistant", - "content": ( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed in blandit ante. " - "Sed tempus erat urna, ac elementum nisl facilisis quis. Aliquam consectetur mollis massa, " - "in elementum sem venenatis posuere. Fusce lorem arcu, egestas vel massa sollicitudin, " - "dictum mollis purus. Proin in ullamcorper elit. Nam tellus nisi, volutpat a mattis vel, " - "pretium in purus. Nunc at lectus facilisis risus scelerisque rhoncus eu nec ex. " - "Maecenas semper, tellus non placerat vulputate, urna felis facilisis diam, " - "sit amet vestibulum erat sapien nec libero. Praesent non massa velit. Donec faucibus mi eros. " - "Nam turpis nulla, congue sit amet mi at, porttitor scelerisque elit. Nunc id sodales lorem, " - "nec tincidunt leo. Quisque a neque nec ligula porttitor auctor. " - "Nunc accumsan nunc ac tellus congue vehicula. Praesent tellus eros, luctus non gravida dapibus, " - "faucibus eu ex. Quisque bibendum leo pharetra, tristique est vitae, hendrerit nunc. " - "Duis nec congue dolor. Donec commodo ipsum non efficitur volutpat. " - "Nulla risus nulla, efficitur et urna at, imperdiet sodales lorem. " - "Suspendisse erat est, sollicitudin at nisl tincidunt, vehicula hendrerit lectus. " - "Nam quis nisi ullamcorper, rhoncus massa vel, tempus purus. " - "Duis pulvinar eros vel nulla pellentesque, at dapibus justo laoreet. " - "Praesent tortor orci, vulputate fermentum dapibus nec, feugiat vitae tortor. " - "Donec mollis convallis massa quis iaculis." - ), - }, - ] - - # Prepare train and val splits with sample repeated, 2500 samples are for - # 128 samples with block-size 8192 and LLama3 tokenizer - data = [{"conversation": sample}] * 2500 - - # For train-val splits - data_dict = DatasetDict({"train": Dataset.from_list(data), "valid": Dataset.from_list(data)}) - data_dict.save_to_disk(dataset_path) diff --git a/tests/experimental/torch/_compress/decilm/converters/test_convert_llama3_config_to_decilm_config.py b/tests/experimental/torch/_compress/decilm/converters/test_convert_llama3_config_to_decilm_config.py index a1d897ceb..1f0283b3e 100644 --- a/tests/experimental/torch/_compress/decilm/converters/test_convert_llama3_config_to_decilm_config.py +++ b/tests/experimental/torch/_compress/decilm/converters/test_convert_llama3_config_to_decilm_config.py @@ -16,7 +16,7 @@ import json from pathlib import Path -from experimental.torch._compress.conftest import ( +from experimental.torch._compress.compress_test_utils import ( create_and_save_small_llama_model, create_tokenizer, ) diff --git a/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py b/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py new file mode 100644 index 000000000..ad8580467 --- /dev/null +++ b/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py @@ -0,0 +1,114 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os +from functools import partial +from pathlib import Path + +import torch +from _test_utils.torch.distributed.utils import spawn_multiprocess_job +from experimental.torch._compress.compress_test_utils import ( + create_and_save_small_llama_model, + create_tokenizer, + save_dummy_dataset, + setup_puzzle_dir, +) +from puzzle_tools.hydra_utils import register_hydra_resolvers + +import modelopt.torch.nas as mtn +from modelopt.torch._compress.nas.plugins.compress_nas_plugin import CompressModel +from modelopt.torch._compress.runtime import NativeDdpRuntime + + +# +# See tests/experimental/torch/_compress/test_compress.py for instructions on how to run this test +# TODO: Remove those instructions once this test runs automatically on CI +# +def test_nas_convert(project_root_path: Path, tmp_path: Path): + spawn_multiprocess_job( + size=torch.cuda.device_count(), + job=partial(_test_nas_convert_multiprocess_job, project_root_path, tmp_path), + backend="nccl", + ) + + +def _test_nas_convert_multiprocess_job( + project_root_path: Path, tmp_path: Path, rank: int, size: int +): + # Register Hydra custom resolvers (needed for config resolution) + register_hydra_resolvers() + + # + # The inputs for the nas.convert() step. + # + puzzle_dir = tmp_path + llama_checkpoint_path = puzzle_dir / "ckpts/llama" + dataset_path = puzzle_dir / "dummy_dataset" + hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs" + hydra_config_name = "Llama-3_1-8B" + + with NativeDdpRuntime( + dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10) + ) as runtime: + if rank == 0: + # Setup puzzle_dir and dataset + setup_puzzle_dir(puzzle_dir) + save_dummy_dataset(dataset_path) + + # Create a small Llama model + tokenizer = create_tokenizer(project_root_path) + create_and_save_small_llama_model( + llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer + ) + runtime.wait_for_everyone() + + # + # Run the mnt.convert() step + # + input_model = CompressModel() + mtn.convert( + input_model, + mode=[ + ( + "compress", + { + "puzzle_dir": str(puzzle_dir), + "input_model_path": str(llama_checkpoint_path), + "hydra_config_dir": str(hydra_config_dir), + "hydra_config_name": hydra_config_name, + "dataset_path": str(dataset_path), + }, + ) + ], + ) + + # + # Check assertions + # + if rank == 0: + # assertions for the score_pruning_activations step + rank = int(os.environ["RANK"]) + rank_filepath = ( + f"pruning/pruning_scores/ffn_iterative/100samples_diverse_mini/rank_{rank}.pth" + ) + assert (puzzle_dir / rank_filepath).is_file() + + # assertions for the pruning_ckpts step + assert (puzzle_dir / "ckpts/ffn_256_attn_no_op").exists() + + runtime.wait_for_everyone() + + print("PYTEST SUMMARY: test_nas_convert() test has finished successfully") diff --git a/tests/experimental/torch/_compress/test_compress.py b/tests/experimental/torch/_compress/test_compress.py index 018b78e1a..0bd116d16 100644 --- a/tests/experimental/torch/_compress/test_compress.py +++ b/tests/experimental/torch/_compress/test_compress.py @@ -20,7 +20,7 @@ import torch from _test_utils.torch.distributed.utils import spawn_multiprocess_job -from experimental.torch._compress.conftest import ( +from experimental.torch._compress.compress_test_utils import ( create_and_save_small_llama_model, create_tokenizer, save_dummy_dataset, @@ -66,16 +66,17 @@ def test_compress(project_root_path: Path, tmp_path: Path): def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, rank: int, size: int): register_hydra_resolvers() + # + # The inputs for the compress() algorihm. + # puzzle_dir = tmp_path dataset_path = puzzle_dir / "dummy_dataset" hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs" hydra_config_name = "Llama-3_1-8B" - _runtime = NativeDdpRuntime( + with NativeDdpRuntime( dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10) - ) - - with _runtime as runtime: + ) as runtime: # # Test setup # @@ -91,8 +92,9 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran # Create a small Llama model (not DeciLM) to match the normal conversion pipeline tokenizer = create_tokenizer(project_root_path) - hf_ckpt_teacher_dir = "ckpts/teacher" - llama_checkpoint_path = puzzle_dir / hf_ckpt_teacher_dir + # TODO: change it to "ckpts/llama" once the conversion script is fixed + # Currently, the build replacement library step will fail with such a path. + llama_checkpoint_path = puzzle_dir / "ckpts/teacher" create_and_save_small_llama_model( llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer ) @@ -100,7 +102,7 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran # Use the full conversion pipeline (matches normal usage) convert_llama3_to_decilm( input_dir=llama_checkpoint_path, - output_dir=llama_checkpoint_path, + output_dir=puzzle_dir / "ckpts/teacher", ) runtime.wait_for_everyone() @@ -148,4 +150,4 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran runtime.wait_for_everyone() - print("PYTEST SUMMARY: test_compress_model() test has finished successfully") + print("PYTEST SUMMARY: test_compress_model() test has finished successfully")