diff --git a/tests/integ/sagemaker/serve/constants.py b/tests/integ/sagemaker/serve/constants.py index 6b27ff2db6..cf4c6919aa 100644 --- a/tests/integ/sagemaker/serve/constants.py +++ b/tests/integ/sagemaker/serve/constants.py @@ -12,24 +12,23 @@ # language governing permissions and limitations under the License. from __future__ import absolute_import -# import os +import os import platform -# from tests.integ import DATA_DIR +from tests.integ import DATA_DIR -# SERVE_IN_PROCESS_TIMEOUT = 5 -# SERVE_MODEL_PACKAGE_TIMEOUT = 10 -# SERVE_LOCAL_CONTAINER_TIMEOUT = 10 +SERVE_IN_PROCESS_TIMEOUT = 5 +SERVE_MODEL_PACKAGE_TIMEOUT = 10 +SERVE_LOCAL_CONTAINER_TIMEOUT = 10 SERVE_SAGEMAKER_ENDPOINT_TIMEOUT = 15 -# SERVE_SAVE_TIMEOUT = 2 +SERVE_SAVE_TIMEOUT = 2 -# NOT_RUNNING_ON_PY38 = platform.python_version_tuple()[1] != "8" -NOT_RUNNING_ON_PY310 = platform.python_version_tuple()[1] != "10" -# NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE = os.getenv("TEST_OWNER") != "INF_EXP_DEV" +PYTHON_VERSION_IS_NOT_38 = platform.python_version_tuple()[1] != "8" +PYTHON_VERSION_IS_NOT_310 = platform.python_version_tuple()[1] != "10" -# XGB_RESOURCE_DIR = os.path.join(DATA_DIR, "serve_resources", "xgboost") -# PYTORCH_SQUEEZENET_RESOURCE_DIR = os.path.join(DATA_DIR, "serve_resources", "pytorch") -# TF_EFFICIENT_RESOURCE_DIR = os.path.join(DATA_DIR, "serve_resources", "tensorflow") -# HF_DIR = os.path.join(DATA_DIR, "serve_resources", "hf") +XGB_RESOURCE_DIR = os.path.join(DATA_DIR, "serve_resources", "xgboost") +PYTORCH_SQUEEZENET_RESOURCE_DIR = os.path.join(DATA_DIR, "serve_resources", "pytorch") +TF_EFFICIENT_RESOURCE_DIR = os.path.join(DATA_DIR, "serve_resources", "tensorflow") +HF_DIR = os.path.join(DATA_DIR, "serve_resources", "hf") -# BYOC_IMAGE_URI_TEMPLATE = "661407751302.dkr.ecr.{}.amazonaws.com/byoc-integ-test-images:{}" +BYOC_IMAGE_URI_TEMPLATE = "661407751302.dkr.ecr.{}.amazonaws.com/byoc-integ-test-images:{}" diff --git a/tests/integ/sagemaker/serve/test_serve_js_happy.py b/tests/integ/sagemaker/serve/test_serve_js_happy.py index 66341c471a..1050be3bb1 100644 --- a/tests/integ/sagemaker/serve/test_serve_js_happy.py +++ b/tests/integ/sagemaker/serve/test_serve_js_happy.py @@ -17,7 +17,7 @@ from sagemaker.serve.builder.schema_builder import SchemaBuilder from tests.integ.sagemaker.serve.constants import ( SERVE_SAGEMAKER_ENDPOINT_TIMEOUT, - NOT_RUNNING_ON_PY310, + PYTHON_VERSION_IS_NOT_310, ) from tests.integ.timeout import timeout @@ -47,9 +47,10 @@ def happy_model_builder(sagemaker_session): @pytest.mark.skipif( - NOT_RUNNING_ON_PY310, + PYTHON_VERSION_IS_NOT_310, reason="The goal of these test are to test the serving components of our feature", ) +@pytest.mark.slow_test def test_happy_tgi_sagemaker_endpoint(happy_model_builder, gpu_instance_type): logger.info("Running in SAGEMAKER_ENDPOINT mode...") caught_ex = None diff --git a/tests/integ/sagemaker/serve/test_serve_pt_happy.py b/tests/integ/sagemaker/serve/test_serve_pt_happy.py index 37233da859..67ac9dc7fd 100644 --- a/tests/integ/sagemaker/serve/test_serve_pt_happy.py +++ b/tests/integ/sagemaker/serve/test_serve_pt_happy.py @@ -10,127 +10,159 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -# from __future__ import absolute_import - -# import pytest -# import torch -# from PIL import Image -# import os - -# from sagemaker.serve.builder.model_builder import ModelBuilder, Mode -# from sagemaker.serve.builder.schema_builder import SchemaBuilder -# from sagemaker.serve.spec.inference_spec import InferenceSpec -# from torchvision.transforms import transforms -# from torchvision.models.squeezenet import squeezenet1_1 - -# from tests.integ.sagemaker.serve.constants import ( -# PYTORCH_SQUEEZENET_RESOURCE_DIR, -# SERVE_SAGEMAKER_ENDPOINT_TIMEOUT, -# NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE, -# NOT_RUNNING_ON_PY310, -# ) -# from tests.integ.timeout import timeout -# from tests.integ.utils import cleanup_model_resources -# import logging - -# logger = logging.getLogger(__name__) - -# ROLE_NAME = "Admin" - -# GH_USER_NAME = os.getenv("GH_USER_NAME") -# GH_ACCESS_TOKEN = os.getenv("GH_ACCESS_TOKEN") - - -# @pytest.fixture -# def pt_dependencies(): -# return { -# "auto": True, -# "custom": [ -# "boto3==1.26.*", -# "botocore==1.29.*", -# "s3transfer==0.6.*", -# ( -# f"git+https://{GH_USER_NAME}:{GH_ACCESS_TOKEN}@github.com" -# "/aws/sagemaker-python-sdk-staging.git@inference-experience-dev" -# ), -# ], -# } - - -# @pytest.fixture -# def test_image(): -# return Image.open(str(os.path.join(PYTORCH_SQUEEZENET_RESOURCE_DIR, "zidane.jpeg"))) - - -# @pytest.fixture -# def squeezenet_inference_spec(): -# class MySqueezeNetModel(InferenceSpec): -# def __init__(self) -> None: -# super().__init__() -# self.transform = transforms.Compose( -# [ -# transforms.Resize(256), -# transforms.CenterCrop(224), -# transforms.ToTensor(), -# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), -# ] -# ) - -# def invoke(self, input_object: object, model: object): -# # transform -# image_tensor = self.transform(input_object) -# input_batch = image_tensor.unsqueeze(0) -# # invoke -# with torch.no_grad(): -# output = model(input_batch) -# return output - -# def load(self, model_dir: str): -# model = squeezenet1_1() -# model.load_state_dict(torch.load(model_dir + "/model.pth")) -# model.eval() -# return model - -# return MySqueezeNetModel() - - -# @pytest.fixture -# def squeezenet_schema(): -# input_image = Image.open(os.path.join(PYTORCH_SQUEEZENET_RESOURCE_DIR, "zidane.jpeg")) -# output_tensor = torch.rand(3, 4) -# return SchemaBuilder(sample_input=input_image, sample_output=output_tensor) - +from __future__ import absolute_import + +import pytest +import torch +from PIL import Image +import os +import io +import numpy as np + +from sagemaker.serve.builder.model_builder import ModelBuilder, Mode +from sagemaker.serve.builder.schema_builder import SchemaBuilder, CustomPayloadTranslator +from sagemaker.serve.spec.inference_spec import InferenceSpec +from torchvision.transforms import transforms +from torchvision.models.squeezenet import squeezenet1_1 + +from tests.integ.sagemaker.serve.constants import ( + PYTORCH_SQUEEZENET_RESOURCE_DIR, + SERVE_SAGEMAKER_ENDPOINT_TIMEOUT, + PYTHON_VERSION_IS_NOT_310, +) +from tests.integ.timeout import timeout +from tests.integ.utils import cleanup_model_resources +import logging + +logger = logging.getLogger(__name__) + +ROLE_NAME = "SageMakerRole" + + +@pytest.fixture +def test_image(): + return Image.open(str(os.path.join(PYTORCH_SQUEEZENET_RESOURCE_DIR, "zidane.jpeg"))) + + +@pytest.fixture +def squeezenet_inference_spec(): + class MySqueezeNetModel(InferenceSpec): + def __init__(self) -> None: + super().__init__() + + def invoke(self, input_object: object, model: object): + with torch.no_grad(): + output = model(input_object) + return output + + def load(self, model_dir: str): + model = squeezenet1_1() + model.load_state_dict(torch.load(model_dir + "/model.pth")) + model.eval() + return model + + return MySqueezeNetModel() + + +@pytest.fixture +def custom_request_translator(): + # request translator + class MyRequestTranslator(CustomPayloadTranslator): + def __init__(self): + super().__init__() + # Define image transformation + self.transform = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + + # This function converts the payload to bytes - happens on client side + def serialize_payload_to_bytes(self, payload: object) -> bytes: + # converts an image to bytes + image_tensor = self.transform(payload) + input_batch = image_tensor.unsqueeze(0) + input_ndarray = input_batch.numpy() + return self._convert_numpy_to_bytes(input_ndarray) + + # This function converts the bytes to payload - happens on server side + def deserialize_payload_from_stream(self, stream) -> torch.Tensor: + # convert payload back to torch.Tensor + np_array = np.load(io.BytesIO(stream.read())) + return torch.from_numpy(np_array) + + def _convert_numpy_to_bytes(self, np_array: np.ndarray) -> bytes: + buffer = io.BytesIO() + np.save(buffer, np_array) + return buffer.getvalue() + + return MyRequestTranslator() + + +@pytest.fixture +def custom_response_translator(): + # response translator + class MyResponseTranslator(CustomPayloadTranslator): + # This function converts the payload to bytes - happens on server side + def serialize_payload_to_bytes(self, payload: torch.Tensor) -> bytes: + return self._convert_numpy_to_bytes(payload.numpy()) + + # This function converts the bytes to payload - happens on client side + def deserialize_payload_from_stream(self, stream) -> object: + return torch.from_numpy(np.load(io.BytesIO(stream.read()))) + + def _convert_numpy_to_bytes(self, np_array: np.ndarray) -> bytes: + buffer = io.BytesIO() + np.save(buffer, np_array) + return buffer.getvalue() + + return MyResponseTranslator() + + +@pytest.fixture +def squeezenet_schema(custom_request_translator, custom_response_translator): + input_image = Image.open(os.path.join(PYTORCH_SQUEEZENET_RESOURCE_DIR, "zidane.jpeg")) + output_tensor = torch.rand(3, 4) + return SchemaBuilder( + sample_input=input_image, + sample_output=output_tensor, + input_translator=custom_request_translator, + output_translator=custom_response_translator, + ) + -# @pytest.fixture -# def model_builder_inference_spec_schema_builder( -# squeezenet_inference_spec, squeezenet_schema, pt_dependencies -# ): -# return ModelBuilder( -# model_path=PYTORCH_SQUEEZENET_RESOURCE_DIR, -# inference_spec=squeezenet_inference_spec, -# schema_builder=squeezenet_schema, -# dependencies=pt_dependencies, -# ) - - -# @pytest.fixture -# def model_builder(request): -# return request.getfixturevalue(request.param) +@pytest.fixture +def model_builder_inference_spec_schema_builder(squeezenet_inference_spec, squeezenet_schema): + return ModelBuilder( + model_path=PYTORCH_SQUEEZENET_RESOURCE_DIR, + inference_spec=squeezenet_inference_spec, + schema_builder=squeezenet_schema, + ) + + +@pytest.fixture +def model_builder(request): + return request.getfixturevalue(request.param) # @pytest.mark.skipif( -# NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE or NOT_RUNNING_ON_PY310, +# PYTHON_VERSION_IS_NOT_310, # reason="The goal of these test are to test the serving components of our feature", # ) # @pytest.mark.parametrize( # "model_builder", ["model_builder_inference_spec_schema_builder"], indirect=True # ) +# @pytest.mark.slow_test +# @pytest.mark.flaky(reruns=5, reruns_delay=2) # def test_happy_pytorch_local_container(sagemaker_session, model_builder, test_image): # logger.info("Running in LOCAL_CONTAINER mode...") # caught_ex = None - +# # model = model_builder.build(mode=Mode.LOCAL_CONTAINER, sagemaker_session=sagemaker_session) - +# # with timeout(minutes=SERVE_LOCAL_CONTAINER_TIMEOUT): # try: # logger.info("Deploying and predicting in LOCAL_CONTAINER mode...") @@ -149,68 +181,70 @@ # ), f"{caught_ex} was thrown when running pytorch squeezenet local container test" -# @pytest.mark.skipif( -# NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE or NOT_RUNNING_ON_PY310, -# reason="The goal of these test are to test the serving components of our feature", -# ) -# @pytest.mark.parametrize( -# "model_builder", ["model_builder_inference_spec_schema_builder"], indirect=True -# ) -# def test_happy_pytorch_sagemaker_endpoint( -# sagemaker_session, model_builder, cpu_instance_type, test_image -# ): -# logger.info("Running in SAGEMAKER_ENDPOINT mode...") -# caught_ex = None - -# iam_client = sagemaker_session.boto_session.client("iam") -# role_arn = iam_client.get_role(RoleName=ROLE_NAME)["Role"]["Arn"] - -# model = model_builder.build( -# mode=Mode.SAGEMAKER_ENDPOINT, role_arn=role_arn, sagemaker_session=sagemaker_session -# ) - -# with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): -# try: -# logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") -# predictor = model.deploy(instance_type=cpu_instance_type, initial_instance_count=1) -# logger.info("Endpoint successfully deployed.") -# predictor.predict(test_image) -# except Exception as e: -# caught_ex = e -# finally: -# cleanup_model_resources( -# sagemaker_session=model_builder.sagemaker_session, -# model_name=model.name, -# endpoint_name=model.endpoint_name, -# ) -# if caught_ex: -# logger.exception(caught_ex) -# assert ( -# False -# ), f"{caught_ex} was thrown when running pytorch squeezenet sagemaker endpoint test" +@pytest.mark.skipif( + PYTHON_VERSION_IS_NOT_310, # or NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE, + reason="The goal of these test are to test the serving components of our feature", +) +@pytest.mark.parametrize( + "model_builder", ["model_builder_inference_spec_schema_builder"], indirect=True +) +@pytest.mark.slow_test +def test_happy_pytorch_sagemaker_endpoint( + sagemaker_session, model_builder, cpu_instance_type, test_image +): + logger.info("Running in SAGEMAKER_ENDPOINT mode...") + caught_ex = None + + iam_client = sagemaker_session.boto_session.client("iam") + role_arn = iam_client.get_role(RoleName=ROLE_NAME)["Role"]["Arn"] + + model = model_builder.build( + mode=Mode.SAGEMAKER_ENDPOINT, role_arn=role_arn, sagemaker_session=sagemaker_session + ) + + with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): + try: + logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") + predictor = model.deploy(instance_type=cpu_instance_type, initial_instance_count=1) + logger.info("Endpoint successfully deployed.") + predictor.predict(test_image) + except Exception as e: + caught_ex = e + finally: + cleanup_model_resources( + sagemaker_session=model_builder.sagemaker_session, + model_name=model.name, + endpoint_name=model.endpoint_name, + ) + if caught_ex: + logger.exception(caught_ex) + assert ( + False + ), f"{caught_ex} was thrown when running pytorch squeezenet sagemaker endpoint test" # @pytest.mark.skipif( -# NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE or NOT_RUNNING_ON_PY310, +# PYTHON_VERSION_IS_NOT_310, # reason="The goal of these test are to test the serving components of our feature", # ) # @pytest.mark.parametrize( # "model_builder", ["model_builder_inference_spec_schema_builder"], indirect=True # ) +# @pytest.mark.slow_test # def test_happy_pytorch_local_container_overwrite_to_sagemaker_endpoint( # sagemaker_session, model_builder, cpu_instance_type, test_image # ): # logger.info("Building model in LOCAL_CONTAINER mode...") # caught_ex = None - +# # iam_client = sagemaker_session.boto_session.client("iam") # role_arn = iam_client.get_role(RoleName=ROLE_NAME)["Role"]["Arn"] # logger.debug("Role arn: %s", role_arn) - +# # model = model_builder.build( # mode=Mode.LOCAL_CONTAINER, role_arn=role_arn, sagemaker_session=sagemaker_session # ) - +# # with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): # try: # logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") @@ -237,25 +271,26 @@ # @pytest.mark.skipif( -# NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE or NOT_RUNNING_ON_PY310, +# PYTHON_VERSION_IS_NOT_310, # reason="The goal of these test are to test the serving components of our feature", # ) # @pytest.mark.parametrize( # "model_builder", ["model_builder_inference_spec_schema_builder"], indirect=True # ) +# @pytest.mark.slow_test # def test_happy_pytorch_sagemaker_endpoint_overwrite_to_local_container( # sagemaker_session, model_builder, test_image # ): # logger.info("Building model in SAGEMAKER_ENDPOINT mode...") # caught_ex = None - +# # iam_client = sagemaker_session.boto_session.client("iam") # role_arn = iam_client.get_role(RoleName=ROLE_NAME)["Role"]["Arn"] - +# # model = model_builder.build( # mode=Mode.SAGEMAKER_ENDPOINT, role_arn=role_arn, sagemaker_session=sagemaker_session # ) - +# # with timeout(minutes=SERVE_LOCAL_CONTAINER_TIMEOUT): # try: # logger.info("Deploying and predicting in LOCAL_CONTAINER mode...")