diff --git a/assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml b/assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml index 8bd149a34a..495e21dc4a 100644 --- a/assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml +++ b/assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml @@ -59,6 +59,7 @@ data: # revisions, which prevents the reconciliation loop to be triggered if the annotations is # configured here are used. # Default values are: + # "autoscaling.knative.dev/initial-scale", # "autoscaling.knative.dev/min-scale", # "autoscaling.knative.dev/max-scale", # "internal.serving.kserve.io/storage-initializer-sourceuri", diff --git a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterservingruntimes.yaml b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterservingruntimes.yaml index ae9e15eed3..556950f31b 100644 --- a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterservingruntimes.yaml +++ b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterservingruntimes.yaml @@ -1002,6 +1002,8 @@ spec: properties: name: type: string + request: + type: string required: - name type: object @@ -1336,10 +1338,12 @@ spec: diskURI: type: string fsType: + default: ext4 type: string kind: type: string readOnly: + default: false type: boolean required: - diskName @@ -1699,6 +1703,13 @@ spec: required: - path type: object + image: + properties: + pullPolicy: + type: string + reference: + type: string + type: object iscsi: properties: chapAuthDiscovery: @@ -1712,6 +1723,7 @@ spec: iqn: type: string iscsiInterface: + default: default type: string lun: format: int32 @@ -1960,6 +1972,7 @@ spec: image: type: string keyring: + default: /etc/ceph/keyring type: string monitors: items: @@ -1967,6 +1980,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd type: string readOnly: type: boolean @@ -1978,6 +1992,7 @@ spec: type: object x-kubernetes-map-type: atomic user: + default: admin type: string required: - image @@ -1986,6 +2001,7 @@ spec: scaleIO: properties: fsType: + default: xfs type: string gateway: type: string @@ -2003,6 +2019,7 @@ spec: sslEnabled: type: boolean storageMode: + default: ThinProvisioned type: string storagePool: type: string @@ -2964,6 +2981,8 @@ spec: properties: name: type: string + request: + type: string required: - name type: object @@ -3264,10 +3283,12 @@ spec: diskURI: type: string fsType: + default: ext4 type: string kind: type: string readOnly: + default: false type: boolean required: - diskName @@ -3627,6 +3648,13 @@ spec: required: - path type: object + image: + properties: + pullPolicy: + type: string + reference: + type: string + type: object iscsi: properties: chapAuthDiscovery: @@ -3640,6 +3668,7 @@ spec: iqn: type: string iscsiInterface: + default: default type: string lun: format: int32 @@ -3888,6 +3917,7 @@ spec: image: type: string keyring: + default: /etc/ceph/keyring type: string monitors: items: @@ -3895,6 +3925,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd type: string readOnly: type: boolean @@ -3906,6 +3937,7 @@ spec: type: object x-kubernetes-map-type: atomic user: + default: admin type: string required: - image @@ -3914,6 +3946,7 @@ spec: scaleIO: properties: fsType: + default: xfs type: string gateway: type: string @@ -3931,6 +3964,7 @@ spec: sslEnabled: type: boolean storageMode: + default: ThinProvisioned type: string storagePool: type: string diff --git a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterstoragecontainers.yaml b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterstoragecontainers.yaml index 2a55ab532f..e48c266037 100644 --- a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterstoragecontainers.yaml +++ b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_clusterstoragecontainers.yaml @@ -470,6 +470,8 @@ spec: properties: name: type: string + request: + type: string required: - name type: object diff --git a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_localmodelnodegroups.yaml b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_localmodelnodegroups.yaml index f7b995c34b..51beae879b 100644 --- a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_localmodelnodegroups.yaml +++ b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_localmodelnodegroups.yaml @@ -144,10 +144,12 @@ spec: diskURI: type: string fsType: + default: ext4 type: string kind: type: string readOnly: + default: false type: boolean required: - diskName @@ -396,6 +398,7 @@ spec: iqn: type: string iscsiInterface: + default: default type: string lun: format: int32 @@ -547,6 +550,7 @@ spec: image: type: string keyring: + default: /etc/ceph/keyring type: string monitors: items: @@ -554,6 +558,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd type: string readOnly: type: boolean @@ -566,6 +571,7 @@ spec: type: object x-kubernetes-map-type: atomic user: + default: admin type: string required: - image @@ -574,6 +580,7 @@ spec: scaleIO: properties: fsType: + default: xfs type: string gateway: type: string @@ -592,6 +599,7 @@ spec: sslEnabled: type: boolean storageMode: + default: ThinProvisioned type: string storagePool: type: string diff --git a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_servingruntimes.yaml b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_servingruntimes.yaml index c63bee0926..b5f5e6bc55 100644 --- a/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_servingruntimes.yaml +++ b/assets/optional/ai-model-serving/kserve/crd/full/serving.kserve.io_servingruntimes.yaml @@ -1002,6 +1002,8 @@ spec: properties: name: type: string + request: + type: string required: - name type: object @@ -1336,10 +1338,12 @@ spec: diskURI: type: string fsType: + default: ext4 type: string kind: type: string readOnly: + default: false type: boolean required: - diskName @@ -1699,6 +1703,13 @@ spec: required: - path type: object + image: + properties: + pullPolicy: + type: string + reference: + type: string + type: object iscsi: properties: chapAuthDiscovery: @@ -1712,6 +1723,7 @@ spec: iqn: type: string iscsiInterface: + default: default type: string lun: format: int32 @@ -1960,6 +1972,7 @@ spec: image: type: string keyring: + default: /etc/ceph/keyring type: string monitors: items: @@ -1967,6 +1980,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd type: string readOnly: type: boolean @@ -1978,6 +1992,7 @@ spec: type: object x-kubernetes-map-type: atomic user: + default: admin type: string required: - image @@ -1986,6 +2001,7 @@ spec: scaleIO: properties: fsType: + default: xfs type: string gateway: type: string @@ -2003,6 +2019,7 @@ spec: sslEnabled: type: boolean storageMode: + default: ThinProvisioned type: string storagePool: type: string @@ -2964,6 +2981,8 @@ spec: properties: name: type: string + request: + type: string required: - name type: object @@ -3264,10 +3283,12 @@ spec: diskURI: type: string fsType: + default: ext4 type: string kind: type: string readOnly: + default: false type: boolean required: - diskName @@ -3627,6 +3648,13 @@ spec: required: - path type: object + image: + properties: + pullPolicy: + type: string + reference: + type: string + type: object iscsi: properties: chapAuthDiscovery: @@ -3640,6 +3668,7 @@ spec: iqn: type: string iscsiInterface: + default: default type: string lun: format: int32 @@ -3888,6 +3917,7 @@ spec: image: type: string keyring: + default: /etc/ceph/keyring type: string monitors: items: @@ -3895,6 +3925,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd type: string readOnly: type: boolean @@ -3906,6 +3937,7 @@ spec: type: object x-kubernetes-map-type: atomic user: + default: admin type: string required: - image @@ -3914,6 +3946,7 @@ spec: scaleIO: properties: fsType: + default: xfs type: string gateway: type: string @@ -3931,6 +3964,7 @@ spec: sslEnabled: type: boolean storageMode: + default: ThinProvisioned type: string storagePool: type: string diff --git a/assets/optional/ai-model-serving/kserve/inferenceservice-config-microshift-patch.yaml b/assets/optional/ai-model-serving/kserve/inferenceservice-config-microshift-patch.yaml index 7ac806186a..e58d90c467 100644 --- a/assets/optional/ai-model-serving/kserve/inferenceservice-config-microshift-patch.yaml +++ b/assets/optional/ai-model-serving/kserve/inferenceservice-config-microshift-patch.yaml @@ -40,7 +40,7 @@ data: "localGatewayService" : "kserve-local-gateway.istio-system.svc.cluster.local", "ingressDomain" : "example.com", "ingressClassName" : "istio", - "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", + "domainTemplate": "example.com", "urlScheme": "https", "disableIstioVirtualHost": false, "disableIngressCreation": true @@ -96,10 +96,18 @@ data: inferenceService: |- { "serviceAnnotationDisallowedList": [ + "autoscaling.knative.dev/initial-scale", "autoscaling.knative.dev/min-scale", "autoscaling.knative.dev/max-scale", "internal.serving.kserve.io/storage-initializer-sourceuri", "kubectl.kubernetes.io/last-applied-configuration", - "security.opendatahub.io/enable-auth" + "security.opendatahub.io/enable-auth", + "networking.knative.dev/visibility", + "haproxy.router.openshift.io/timeout" ] } + + security: |- + { + "autoMountServiceAccountToken": false + } \ No newline at end of file diff --git a/assets/optional/ai-model-serving/kserve/overlays/odh/inferenceservice-config-patch.yaml b/assets/optional/ai-model-serving/kserve/overlays/odh/inferenceservice-config-patch.yaml index a37e7e0a96..6a232d7583 100644 --- a/assets/optional/ai-model-serving/kserve/overlays/odh/inferenceservice-config-patch.yaml +++ b/assets/optional/ai-model-serving/kserve/overlays/odh/inferenceservice-config-patch.yaml @@ -32,7 +32,7 @@ data: "localGatewayService" : "kserve-local-gateway.istio-system.svc.cluster.local", "ingressDomain" : "example.com", "ingressClassName" : "istio", - "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", + "domainTemplate": "example.com", "urlScheme": "https", "disableIstioVirtualHost": false, "disableIngressCreation": true @@ -88,10 +88,18 @@ data: inferenceService: |- { "serviceAnnotationDisallowedList": [ + "autoscaling.knative.dev/initial-scale", "autoscaling.knative.dev/min-scale", "autoscaling.knative.dev/max-scale", "internal.serving.kserve.io/storage-initializer-sourceuri", "kubectl.kubernetes.io/last-applied-configuration", - "security.opendatahub.io/enable-auth" + "security.opendatahub.io/enable-auth", + "networking.knative.dev/visibility", + "haproxy.router.openshift.io/timeout" ] } + + security: |- + { + "autoMountServiceAccountToken": false + } \ No newline at end of file diff --git a/assets/optional/ai-model-serving/kserve/overlays/odh/params.env b/assets/optional/ai-model-serving/kserve/overlays/odh/params.env index 2685e7968a..8231db7d23 100644 --- a/assets/optional/ai-model-serving/kserve/overlays/odh/params.env +++ b/assets/optional/ai-model-serving/kserve/overlays/odh/params.env @@ -1,5 +1,5 @@ -kserve-controller=quay.io/modh/kserve-controller@sha256:2a6881717d49e8b54032f429faa2b5bcb6917c992f1d6e95c5109762de197313 -kserve-agent=quay.io/modh/kserve-agent@sha256:9e4d959ecd76aeeb4c275628b0276c6123967e953e838a2b43bc9c750c6db844 -kserve-router=quay.io/modh/kserve-router@sha256:86bc4329cb89664e5ea42a09269843b534fd608437fe4592d115fbe35027ca8f -kserve-storage-initializer=quay.io/modh/kserve-storage-initializer@sha256:670a34b21ca339b0d27cfbb23bacfd86bd71118af1c47506070feb12078bd8c1 +kserve-controller=quay.io/modh/kserve-controller@sha256:67672b25f5e4c2ba5457d29f0c5ae5b7e0ec3b635bf58a79892306ddda30ac2a +kserve-agent=quay.io/modh/kserve-agent@sha256:26967e2807991a1fbd3f69dfb46ffe451edb610bc9ea24c64ca835e0135d5a96 +kserve-router=quay.io/modh/kserve-router@sha256:8e73a4b58e1218115236f920ffe2671fb9a2166498ea17b885862df142be9036 +kserve-storage-initializer=quay.io/modh/kserve-storage-initializer@sha256:87272d0875618b3768f72cefc70169fea17be8f7513dab0ad74dd6205d0ce3f8 oauth-proxy=registry.redhat.io/openshift4/ose-oauth-proxy@sha256:bd49cfc8452b3d96467cc222db9487e120abc6cc5ba81349c6b3703706f36a08 diff --git a/assets/optional/ai-model-serving/kserve/rbac/role.yaml b/assets/optional/ai-model-serving/kserve/rbac/role.yaml index a282400787..1371ff8b70 100644 --- a/assets/optional/ai-model-serving/kserve/rbac/role.yaml +++ b/assets/optional/ai-model-serving/kserve/rbac/role.yaml @@ -38,9 +38,17 @@ rules: - "" resources: - secrets + verbs: + - get +- apiGroups: + - "" + resources: - serviceaccounts verbs: + - create + - delete - get + - patch - apiGroups: - admissionregistration.k8s.io resources: @@ -111,12 +119,32 @@ rules: - patch - update - watch +- apiGroups: + - operator.knative.dev + resources: + - knativeservings + verbs: + - get + - list + - watch +- apiGroups: + - rbac.authorization.k8s.io + resourceNames: + - kserve-inferencegraph-auth-verifiers + resources: + - clusterrolebindings + verbs: + - create + - get + - patch + - update - apiGroups: - route.openshift.io resources: - routes verbs: - create + - delete - get - list - patch diff --git a/assets/optional/ai-model-serving/release-ai-model-serving-x86_64.json b/assets/optional/ai-model-serving/release-ai-model-serving-x86_64.json index ecba53f5b5..ceacfb829a 100644 --- a/assets/optional/ai-model-serving/release-ai-model-serving-x86_64.json +++ b/assets/optional/ai-model-serving/release-ai-model-serving-x86_64.json @@ -1,20 +1,24 @@ { "release": { - "base": "2.19.2" + "base": "2.22.0" }, "images": { - "vllm-cpu-image": "quay.io/modh/vllm@sha256:4642b4337883ee6f768096829529e69e42689ef082f3b7e1439199fb20b7dc6a", - "vllm-gaudi-image": "quay.io/modh/vllm@sha256:f2d69a8c2ab85e5da0daa4ec42902599b7de0a24526ee2a970e23405718def8a", - "vllm-rocm-image": "quay.io/modh/vllm@sha256:51d79c688db3652143b954cca41f56ab775cf6456386681c60bcf4c2a1541e5c", - "caikit-tgis-image": "quay.io/modh/caikit-tgis-serving@sha256:2c7eef73708b5d73de33153459ac7238fdcf517cdc3544c775981f5814b4b6ed", - "caikit-standalone-image": "quay.io/modh/caikit-nlp@sha256:24d74898dc50ebdd3526f6cb8a028a521e79adf5bfe52559afd3734900da975e", - "tgis-image": "quay.io/modh/text-generation-inference@sha256:aebf545d8048a59174f70334dc90c6b97ead4602a39cb7598ea68c8d199168a2", - "ovms-image": "quay.io/modh/openvino_model_server@sha256:53b7fcf95de9b81e4c8652d0bf4e84e22d5b696827a5d951d863420c68b9cfe8", - "vllm-cuda-image": "quay.io/modh/vllm@sha256:79e1f24bba1d3e694f47f66ba9f8184e70310a10b77bf11c0febd0c926234950", - "kserve-controller": "quay.io/modh/kserve-controller@sha256:2a6881717d49e8b54032f429faa2b5bcb6917c992f1d6e95c5109762de197313", - "kserve-agent": "quay.io/modh/kserve-agent@sha256:9e4d959ecd76aeeb4c275628b0276c6123967e953e838a2b43bc9c750c6db844", - "kserve-router": "quay.io/modh/kserve-router@sha256:86bc4329cb89664e5ea42a09269843b534fd608437fe4592d115fbe35027ca8f", - "kserve-storage-initializer": "quay.io/modh/kserve-storage-initializer@sha256:670a34b21ca339b0d27cfbb23bacfd86bd71118af1c47506070feb12078bd8c1", + "caikit-tgis-image": "quay.io/modh/caikit-tgis-serving@sha256:d8294f446afefc6cbba0df882bba12cdad142f79b29bda46227a6328128d7fb6", + "caikit-standalone-image": "quay.io/modh/caikit-nlp@sha256:c867652a543eacd8288d08e9d38d8d25468c04fb838eebf450906293890b040d", + "tgis-image": "quay.io/modh/text-generation-inference@sha256:e923ebb8e6b8bbc28144d3f143c8453a4c526bc1852952209976cc27a311883b", + "ovms-image": "quay.io/modh/openvino_model_server@sha256:eee2857ddd5cc2df9ea7373ae7c6ae814d77bd631db5c52b91edad18ac6c73c8", + "vllm-cuda-image": "quay.io/modh/vllm@sha256:56aa86c6ed6ba6cc9557a8583ff9d4ee535193f6cda030bd1268064bc70120e3", + "guardrails-detector-huggingface-runtime-image": "quay.io/modh/odh-trustyai-hf-detector-runtime-rhel9@sha256:86316078a2d70ed2d754d683f0daedb76e632dd2b0588afa7713ce2a2a593e2f", + "vllm-cpu-image": "quay.io/modh/vllm@sha256:40e3b271035a750fb185cedb241a108ba0c11de90793d88983aaf805e5357f45", + "vllm-gaudi-image": "quay.io/modh/vllm@sha256:6025bfb31163b7db86d7f0a844da068c88babf4e970c5244492e82bdbaf8dcbe", + "vllm-ppc64le-image": "quay.io/opendatahub/vllm:fast-ppc64le", + "vllm-rocm-image": "quay.io/modh/vllm@sha256:2a90ded5cab4c03d4937c81cf0587cb0109c589e37058a50099139ab62a0666c", + "vllm-s390x-image": "quay.io/opendatahub/vllm:fast-s390x", + "ray-tls-generator-image": "registry.redhat.io/ubi9/ubi-minimal:latest", + "kserve-controller": "quay.io/modh/kserve-controller@sha256:67672b25f5e4c2ba5457d29f0c5ae5b7e0ec3b635bf58a79892306ddda30ac2a", + "kserve-agent": "quay.io/modh/kserve-agent@sha256:26967e2807991a1fbd3f69dfb46ffe451edb610bc9ea24c64ca835e0135d5a96", + "kserve-router": "quay.io/modh/kserve-router@sha256:8e73a4b58e1218115236f920ffe2671fb9a2166498ea17b885862df142be9036", + "kserve-storage-initializer": "quay.io/modh/kserve-storage-initializer@sha256:87272d0875618b3768f72cefc70169fea17be8f7513dab0ad74dd6205d0ce3f8", "oauth-proxy": "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:bd49cfc8452b3d96467cc222db9487e120abc6cc5ba81349c6b3703706f36a08" } } diff --git a/assets/optional/ai-model-serving/runtimes/hf-detector.yaml b/assets/optional/ai-model-serving/runtimes/hf-detector.yaml new file mode 100644 index 0000000000..19b15ae912 --- /dev/null +++ b/assets/optional/ai-model-serving/runtimes/hf-detector.yaml @@ -0,0 +1,36 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + name: guardrails-detector-huggingface-runtime + annotations: + openshift.io/display-name: Hugging Face Detector ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' +spec: + annotations: + prometheus.io/port: '8080' + prometheus.io/path: '/metrics' + multiModel: false + supportedModelFormats: + - autoSelect: true + name: guardrails-detector-hf-runtime + containers: + - name: kserve-container + image: guardrails-detector-huggingface-runtime-image + command: + - uvicorn + - app:app + args: + - "--workers=1" + - "--host=0.0.0.0" + - "--port=8000" + - "--log-config=/common/log_conf.yaml" + env: + - name: MODEL_DIR + value: /mnt/models + - name: HF_HOME + value: /tmp/hf_home + ports: + - containerPort: 8000 + protocol: TCP diff --git a/assets/optional/ai-model-serving/runtimes/kustomization.x86_64.yaml b/assets/optional/ai-model-serving/runtimes/kustomization.x86_64.yaml index a142ca6974..d6d4165f75 100644 --- a/assets/optional/ai-model-serving/runtimes/kustomization.x86_64.yaml +++ b/assets/optional/ai-model-serving/runtimes/kustomization.x86_64.yaml @@ -1,26 +1,38 @@ images: - - name: vllm-cpu-image - newName: quay.io/modh/vllm - digest: sha256:4642b4337883ee6f768096829529e69e42689ef082f3b7e1439199fb20b7dc6a - - name: vllm-gaudi-image - newName: quay.io/modh/vllm - digest: sha256:f2d69a8c2ab85e5da0daa4ec42902599b7de0a24526ee2a970e23405718def8a - - name: vllm-rocm-image - newName: quay.io/modh/vllm - digest: sha256:51d79c688db3652143b954cca41f56ab775cf6456386681c60bcf4c2a1541e5c - name: caikit-tgis-image newName: quay.io/modh/caikit-tgis-serving - digest: sha256:2c7eef73708b5d73de33153459ac7238fdcf517cdc3544c775981f5814b4b6ed + digest: sha256:d8294f446afefc6cbba0df882bba12cdad142f79b29bda46227a6328128d7fb6 - name: caikit-standalone-image newName: quay.io/modh/caikit-nlp - digest: sha256:24d74898dc50ebdd3526f6cb8a028a521e79adf5bfe52559afd3734900da975e + digest: sha256:c867652a543eacd8288d08e9d38d8d25468c04fb838eebf450906293890b040d - name: tgis-image newName: quay.io/modh/text-generation-inference - digest: sha256:aebf545d8048a59174f70334dc90c6b97ead4602a39cb7598ea68c8d199168a2 + digest: sha256:e923ebb8e6b8bbc28144d3f143c8453a4c526bc1852952209976cc27a311883b - name: ovms-image newName: quay.io/modh/openvino_model_server - digest: sha256:53b7fcf95de9b81e4c8652d0bf4e84e22d5b696827a5d951d863420c68b9cfe8 + digest: sha256:eee2857ddd5cc2df9ea7373ae7c6ae814d77bd631db5c52b91edad18ac6c73c8 - name: vllm-cuda-image newName: quay.io/modh/vllm - digest: sha256:79e1f24bba1d3e694f47f66ba9f8184e70310a10b77bf11c0febd0c926234950 + digest: sha256:56aa86c6ed6ba6cc9557a8583ff9d4ee535193f6cda030bd1268064bc70120e3 + - name: guardrails-detector-huggingface-runtime-image + newName: quay.io/modh/odh-trustyai-hf-detector-runtime-rhel9 + digest: sha256:86316078a2d70ed2d754d683f0daedb76e632dd2b0588afa7713ce2a2a593e2f + - name: vllm-cpu-image + newName: quay.io/modh/vllm + digest: sha256:40e3b271035a750fb185cedb241a108ba0c11de90793d88983aaf805e5357f45 + - name: vllm-gaudi-image + newName: quay.io/modh/vllm + digest: sha256:6025bfb31163b7db86d7f0a844da068c88babf4e970c5244492e82bdbaf8dcbe + - name: vllm-ppc64le-image + newName: quay.io/opendatahub/vllm:fast-ppc64le + digest: quay.io/opendatahub/vllm:fast-ppc64le + - name: vllm-rocm-image + newName: quay.io/modh/vllm + digest: sha256:2a90ded5cab4c03d4937c81cf0587cb0109c589e37058a50099139ab62a0666c + - name: vllm-s390x-image + newName: quay.io/opendatahub/vllm:fast-s390x + digest: quay.io/opendatahub/vllm:fast-s390x + - name: ray-tls-generator-image + newName: registry.redhat.io/ubi9/ubi-minimal:latest + digest: registry.redhat.io/ubi9/ubi-minimal:latest diff --git a/assets/optional/ai-model-serving/runtimes/kustomization.yaml b/assets/optional/ai-model-serving/runtimes/kustomization.yaml index cbec4e4be9..069a0fedf7 100644 --- a/assets/optional/ai-model-serving/runtimes/kustomization.yaml +++ b/assets/optional/ai-model-serving/runtimes/kustomization.yaml @@ -6,8 +6,8 @@ namespace: redhat-ods-applications resources: - caikit-standalone.yaml - caikit-tgis.yaml +- hf-detector.yaml - ovms-kserve.yaml -- tgis.yaml +- vllm-cuda.yaml - vllm-gaudi.yaml - vllm-rocm.yaml -- vllm.yaml diff --git a/assets/optional/ai-model-serving/runtimes/ovms-kserve.yaml b/assets/optional/ai-model-serving/runtimes/ovms-kserve.yaml index 80dbf3e8cf..278ed3e85d 100644 --- a/assets/optional/ai-model-serving/runtimes/ovms-kserve.yaml +++ b/assets/optional/ai-model-serving/runtimes/ovms-kserve.yaml @@ -4,6 +4,7 @@ metadata: annotations: openshift.io/display-name: OpenVINO Model Server opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + opendatahub.io/runtime-version: 'v2025.1' name: kserve-ovms labels: opendatahub.io/dashboard: 'true' diff --git a/assets/optional/ai-model-serving/runtimes/tgis.yaml b/assets/optional/ai-model-serving/runtimes/tgis.yaml deleted file mode 100644 index 6da02146cd..0000000000 --- a/assets/optional/ai-model-serving/runtimes/tgis.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: serving.kserve.io/v1alpha1 -kind: ServingRuntime -metadata: - name: tgis-grpc-runtime - annotations: - openshift.io/display-name: TGIS Standalone ServingRuntime for KServe - opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' - labels: - opendatahub.io/dashboard: 'true' -spec: - annotations: - prometheus.io/port: '3000' - prometheus.io/path: '/metrics' - multiModel: false - supportedModelFormats: - - autoSelect: true - name: pytorch - containers: - - name: kserve-container - image: tgis-image - command: ['text-generation-launcher'] - args: - - '--model-name=/mnt/models/' - - '--port=3000' - - '--grpc-port=8033' - env: - - name: HF_HOME - value: /tmp/hf_home - ports: - - containerPort: 8033 - name: h2c - protocol: TCP diff --git a/assets/optional/ai-model-serving/runtimes/vllm.yaml b/assets/optional/ai-model-serving/runtimes/vllm-cuda.yaml similarity index 69% rename from assets/optional/ai-model-serving/runtimes/vllm.yaml rename to assets/optional/ai-model-serving/runtimes/vllm-cuda.yaml index 44917806e6..de0960f325 100644 --- a/assets/optional/ai-model-serving/runtimes/vllm.yaml +++ b/assets/optional/ai-model-serving/runtimes/vllm-cuda.yaml @@ -1,23 +1,24 @@ apiVersion: serving.kserve.io/v1alpha1 kind: ServingRuntime metadata: - name: vllm-runtime + name: vllm-cuda-runtime annotations: - openshift.io/display-name: vLLM ServingRuntime for KServe + openshift.io/display-name: vLLM NVIDIA GPU ServingRuntime for KServe opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + opendatahub.io/runtime-version: 'v0.9.1.0' labels: - opendatahub.io/dashboard: 'true' + opendatahub.io/dashboard: "true" spec: annotations: - prometheus.io/port: '8080' - prometheus.io/path: '/metrics' + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" multiModel: false supportedModelFormats: - autoSelect: true name: vLLM containers: - name: kserve-container - image: vllm-image + image: vllm-cuda-image command: - python - -m diff --git a/assets/optional/ai-model-serving/runtimes/vllm-gaudi.yaml b/assets/optional/ai-model-serving/runtimes/vllm-gaudi.yaml index b8192b3ff9..90081ba36e 100644 --- a/assets/optional/ai-model-serving/runtimes/vllm-gaudi.yaml +++ b/assets/optional/ai-model-serving/runtimes/vllm-gaudi.yaml @@ -5,6 +5,7 @@ metadata: annotations: openshift.io/display-name: vLLM Intel Gaudi Accelerator ServingRuntime for KServe opendatahub.io/recommended-accelerators: '["habana.ai/gaudi"]' + opendatahub.io/runtime-version: 'v0.7.2' labels: opendatahub.io/dashboard: 'true' spec: diff --git a/assets/optional/ai-model-serving/runtimes/vllm-rocm.yaml b/assets/optional/ai-model-serving/runtimes/vllm-rocm.yaml index 0c892f4e81..1c45c0f7dc 100644 --- a/assets/optional/ai-model-serving/runtimes/vllm-rocm.yaml +++ b/assets/optional/ai-model-serving/runtimes/vllm-rocm.yaml @@ -5,6 +5,7 @@ metadata: annotations: openshift.io/display-name: vLLM AMD GPU ServingRuntime for KServe opendatahub.io/recommended-accelerators: '["amd.com/gpu"]' + opendatahub.io/runtime-version: 'v0.8.4.3' labels: opendatahub.io/dashboard: 'true' spec: diff --git a/scripts/auto-rebase/assets_ai_model_serving.yaml b/scripts/auto-rebase/assets_ai_model_serving.yaml index df148b2540..b76ce580ac 100644 --- a/scripts/auto-rebase/assets_ai_model_serving.yaml +++ b/scripts/auto-rebase/assets_ai_model_serving.yaml @@ -120,13 +120,13 @@ assets: src: caikit-standalone-template.yaml - file: caikit-tgis.yaml src: caikit-tgis-template.yaml + - file: hf-detector.yaml + src: hf-detector-template.yaml - file: ovms-kserve.yaml src: ovms-kserve-template.yaml - - file: tgis.yaml - src: tgis-template.yaml + - file: vllm-cuda.yaml + src: vllm-cuda-template.yaml - file: vllm-gaudi.yaml src: vllm-gaudi-template.yaml - file: vllm-rocm.yaml src: vllm-rocm-template.yaml - - file: vllm.yaml - src: vllm-template.yaml diff --git a/scripts/auto-rebase/last_rebase_ai_model_serving.sh b/scripts/auto-rebase/last_rebase_ai_model_serving.sh index a32d330d77..6342e5d719 100755 --- a/scripts/auto-rebase/last_rebase_ai_model_serving.sh +++ b/scripts/auto-rebase/last_rebase_ai_model_serving.sh @@ -1,2 +1,2 @@ #!/bin/bash -x -./scripts/auto-rebase/rebase_ai_model_serving.sh to "registry.redhat.io/rhoai/odh-operator-bundle:v2.19" +./scripts/auto-rebase/rebase_ai_model_serving.sh to "registry.redhat.io/rhoai/odh-operator-bundle:v2.22" diff --git a/scripts/auto-rebase/rebase_ai_model_serving.sh b/scripts/auto-rebase/rebase_ai_model_serving.sh index 450a2d0108..f18199a10c 100755 --- a/scripts/auto-rebase/rebase_ai_model_serving.sh +++ b/scripts/auto-rebase/rebase_ai_model_serving.sh @@ -85,7 +85,7 @@ download_rhoai_manifests() { "${bundle_ref}" \ --filter-by-os amd64 || return 1 - local -r operator_ref=$(yq '.spec.relatedImages[] | select(.name == "odh-rhel8-operator-*") | .image' "${STAGING_BUNDLE}/${CSV_FILENAME}") + local -r operator_ref=$(yq '.spec.relatedImages[] | select(.name == "odh-rhel9-operator-*") | .image' "${STAGING_BUNDLE}/${CSV_FILENAME}") title "Fetching RHOAI manifests" # shellcheck disable=SC2086 oc image extract \ @@ -163,7 +163,8 @@ update_runtimes() { images: EOF - local -r images=$(cat "${STAGING_OPERATOR}"/modelcontroller/base/*.env | grep "\-image") + # shellcheck disable=SC2046 + local -r images=$(grep --no-filename "\-image" $(find "${STAGING_OPERATOR}"/modelcontroller/base -iname '*.env')) for image in ${images}; do local image_name="${image%=*}" local image_ref="${image#*=}" diff --git a/scripts/auto-rebase/rebase_job_entrypoint.sh b/scripts/auto-rebase/rebase_job_entrypoint.sh index 6195db4775..2f61903c16 100755 --- a/scripts/auto-rebase/rebase_job_entrypoint.sh +++ b/scripts/auto-rebase/rebase_job_entrypoint.sh @@ -92,7 +92,7 @@ fi # # New references can be obtained from: # https://catalog.redhat.com/software/containers/rhoai/odh-operator-bundle/659803ca929f3c931af06f28 -rhoai_release="registry.redhat.io/rhoai/odh-operator-bundle:v2.19" +rhoai_release="registry.redhat.io/rhoai/odh-operator-bundle:v2.22" APP_ID=$(cat /secrets/pr-creds/app_id) \ KEY=/secrets/pr-creds/key.pem \ diff --git a/scripts/ci-ai-model-serving/tests/06-test-vllm.sh b/scripts/ci-ai-model-serving/tests/06-test-vllm.sh index 44437bbb5c..0b712ef39b 100755 --- a/scripts/ci-ai-model-serving/tests/06-test-vllm.sh +++ b/scripts/ci-ai-model-serving/tests/06-test-vllm.sh @@ -28,9 +28,9 @@ pull_image "${VLLM_IMAGE}" pull_image quay.io/microshift/ai-testing-model:vllm-granite-3b-code-base-2k # Create ServingRuntime -cp /usr/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/vllm.yaml /tmp/vllm.yaml -sed -i "s,image: vllm-image,image: ${VLLM_IMAGE}," /tmp/vllm.yaml -oc apply -n test-vllm -f /tmp/vllm.yaml +cp /usr/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/vllm-cuda.yaml /tmp/vllm-cuda.yaml +sed -i "s,image: vllm-cuda-image,image: ${VLLM_IMAGE}," /tmp/vllm-cuda.yaml +oc apply -n test-vllm -f /tmp/vllm-cuda.yaml # Create InferenceService # --dtype=half will be passed through to the deployment and to the vLLM model server. @@ -123,7 +123,7 @@ fi oc delete -n test-vllm route granite oc delete -n test-vllm InferenceService granite-3b-code-base-2k -oc delete -n test-vllm ServingRuntime vllm-runtime +oc delete -n test-vllm ServingRuntime vllm-cuda-runtime oc delete ns test-vllm exit "${res}"