openshift · openshift-merge-bot · Dec 17, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/assets/optional/ai-model-serving/kserve/certmanager/certificate.yaml b/assets/optional/ai-model-serving/kserve/certmanager/certificate.yaml
@@ -0,0 +1,24 @@
+# The following manifests contain a self-signed issuer CR and a certificate CR.
+# More document can be found at https://docs.cert-manager.io
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+  name: selfsigned-issuer
+  namespace: kserve
+spec:
+  selfSigned: {}
+---
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: serving-cert  # this name should match the one appeared in kustomizeconfig.yaml
+  namespace: kserve
+spec:
+  # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
+  commonName: $(webhookServiceName).$(kserveNamespace).svc
+  dnsNames:
+    - $(webhookServiceName).$(kserveNamespace).svc
+  issuerRef:
+    kind: Issuer
+    name: selfsigned-issuer
+  secretName: kserve-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
diff --git a/assets/optional/ai-model-serving/kserve/certmanager/kustomization.yaml b/assets/optional/ai-model-serving/kserve/certmanager/kustomization.yaml
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+- certificate.yaml
+
diff --git a/assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml b/assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml
@@ -42,24 +42,23 @@ data:
            }
        }
     # ====================================== ISVC CONFIGURATION ======================================
-    # Example   
+    # Example - setting custom annotation  
      inferenceService: |-
-      {
-        "serviceAnnotationDisallowedList": [
-          "my.custom.annotation/1"  
-        ],
-        "serviceLabelDisallowedList": [
-          "my.custom.label.1"  
-        ]
+       {
+         "serviceAnnotationDisallowedList": [
+            "my.custom.annotation/1"  
+         ],
+         "serviceLabelDisallowedList": [
+            "my.custom.label.1"  
+         ]
        }
-    # Example of isvc configuration
+    # Example - setting custom annotation
     inferenceService: |-
       {
         # ServiceAnnotationDisallowedList is a list of annotations that are not allowed to be propagated to Knative 
         # revisions, which prevents the reconciliation loop to be triggered if the annotations is 
         # configured here are used.
         # Default values are:
-        #  "autoscaling.knative.dev/initial-scale",
         #  "autoscaling.knative.dev/min-scale",
         #  "autoscaling.knative.dev/max-scale",
         #  "internal.serving.kserve.io/storage-initializer-sourceuri",
@@ -73,7 +72,68 @@ data:
         "serviceLabelDisallowedList": [
           "my.custom.label.1"  
         ]
+      } 
+    # Example - setting custom resource
+    inferenceService: |-
+      {
+        "resource": {
+          "cpuLimit": "1",
+          "memoryLimit": "2Gi",
+          "cpuRequest": "1",
+          "memoryRequest": "2Gi"
+        }
+      }
+    # Example - setting custom resource
+    inferenceService: |-
+      {
+        # resource contains the default resource configuration for the inference service.
+        # you can override this configuration by specifying the resources in the inference service yaml.
+        # If you want to unbound the resource (limits and requests), you can set the value to null or "" 
+        # or just remove the specific field from the config.
+        "resource": {
+           # cpuLimit is the limits.cpu to set for the inference service.
+           "cpuLimit": "1",
+
+           # memoryLimit is the limits.memory to set for the inference service.
+           "memoryLimit": "2Gi",
+
+           # cpuRequest is the requests.cpu to set for the inference service.
+           "cpuRequest": "1",
+
+           # memoryRequest is the requests.memory to set for the inference service.
+           "memoryRequest": "2Gi"
+        }
+     }
+    # ====================================== MultiNode CONFIGURATION ======================================
+    # Example   
+    multiNode: |-
+      {
+        "customGPUResourceTypeList": [
+          "custom.com/gpu"
+        ]
+      }
+    # Example of multinode configuration
+    multiNode: |-
+      {      
+        # CustomGPUResourceTypeList is a list of custom GPU resource types intended to identify the GPU type of a resource,
+        # not to restrict the user from using a specific GPU type.
+        # The MultiNode runtime pod will dynamically add GPU resources based on the registered GPU types.
+        "customGPUResourceTypeList": [
+          "custom.com/gpu"
+        ]
       }  
+     # ====================================== OTelCollector CONFIGURATION ======================================
+     # Example
+     opentelemetryCollector: |-
+       {
+         # scrapeInterval is the interval at which the OpenTelemetry Collector will scrape the metrics.
+         "scrapeInterval": "5s",
+         # metricScalerEndpoint is the endpoint from which the KEDA's ScaledObject will scrape the metrics.
+         "metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318",
+         # metricReceiverEndpoint is the endpoint from which the OpenTelemetry Collector will scrape the metrics.
+          "metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317"
+       }
+
      # ====================================== STORAGE INITIALIZER CONFIGURATION ======================================
      # Example
      storageInitializer: |-
@@ -117,7 +177,7 @@ data:
            # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.
            # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737
            "enableDirectPvcVolumeMount": true,
-    
+
            # enableModelcar enabled allows you to directly access an OCI container image by
            # using a source URL with an "oci://" schema.
            "enableModelcar": false,
@@ -222,7 +282,9 @@ data:
      # ====================================== INGRESS CONFIGURATION ======================================
      # Example
      ingress: |-
-       {
+       {    
+           "enableGatewayApi": false,
+           "kserveIngressGateway": "kserve/kserve-ingress-gateway",
            "ingressGateway" : "knative-serving/knative-ingress-gateway",
            "localGateway" : "knative-serving/knative-local-gateway",
            "localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
@@ -235,7 +297,17 @@ data:
            "disableIngressCreation": false
        }
      ingress: |-
-       {
+       {   
+           # enableGatewayApi specifies whether to use Gateway API instead of Ingress to serve external traffic.
+           "enableGatewayApi": false,
+
+           # KServe implements [Gateway API](https://gateway-api.sigs.k8s.io/) to serve external traffic. 
+           # By default, KServe configures a default gateway to serve external traffic.
+           # But, KServe can be configured to use a custom gateway by modifying this configuration.
+           # The gateway should be specified in format <gateway namespace>/<gateway name>
+           # NOTE: This configuration only applicable for raw deployment.
+           "kserveIngressGateway": "kserve/kserve-ingress-gateway",
+
            # ingressGateway specifies the ingress gateway to serve external traffic.
            # The gateway should be specified in format <gateway namespace>/<gateway name>
            # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.
@@ -414,7 +486,9 @@ data:
            "cpuLimit": "1",
            "headers": {
              "propagate": []
-           }
+           },
+           "imagePullPolicy": "IfNotPresent",
+           "imagePullSecrets": ["docker-secret"]
        }
      # router is the implementation of inference graph.
      router: |-
@@ -444,6 +518,13 @@ data:
                 "*Trace-Id*"
              ]
            }
+
+           # imagePullPolicy specifies when the router image should be pulled from registry.
+           "imagePullPolicy": "IfNotPresent",
+
+           # # imagePullSecrets specifies the list of secrets to be used for pulling the router image from registry.
+           # https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+           "imagePullSecrets": ["docker-secret"]
        }
 
      # ====================================== DEPLOYMENT CONFIGURATION ======================================
@@ -462,7 +543,7 @@ data:
          # ModelMesh https://kserve.github.io/website/master/admin/modelmesh/
          "defaultDeploymentMode": "Serverless"
        }
-    
+
      # ====================================== SERVICE CONFIGURATION ======================================
      # Example
      service: |-
@@ -507,7 +588,14 @@ data:
          # defaultJobImage specifies the default image used for the download job.
          "defaultJobImage" : "kserve/storage-initializer:latest",
          # Kubernetes modifies the filesystem group ID on the attached volume.
-         "FSGroup": 1000
+         "fsGroup": 1000,
+         # TTL for the download job after it is finished.
+         "jobTTLSecondsAfterFinished": 3600,
+         # The frequency at which the local model agent reconciles the local models
+         # This is to detect if models are missing from local disk
+         "reconcilationFrequencyInSecs": 60,
+         # This is to disable localmodel pv and pvc management for namespaces without isvcs
+         "disableVolumeManagement": false
        }
 
   explainers: |-
@@ -528,9 +616,10 @@ data:
         "caBundleConfigMapName": "",
         "caBundleVolumeMountPath": "/etc/ssl/custom-certs",
         "enableDirectPvcVolumeMount": true,
-        "enableModelcar": false,
+        "enableModelcar": true,
         "cpuModelcar": "10m",
-        "memoryModelcar": "15Mi"
+        "memoryModelcar": "15Mi",
+        "uidModelcar": 1010
     }
 
   credentials: |-
@@ -555,7 +644,9 @@ data:
     }
 
   ingress: |-
-    {
+    {   
+        "enableGatewayApi": false,
+        "kserveIngressGateway": "kserve/kserve-ingress-gateway",
         "ingressGateway" : "knative-serving/knative-ingress-gateway",
         "localGateway" : "knative-serving/knative-local-gateway",
         "localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
@@ -603,7 +694,8 @@ data:
         "memoryRequest": "100Mi",
         "memoryLimit": "1Gi",
         "cpuRequest": "100m",
-        "cpuLimit": "1"
+        "cpuLimit": "1",
+        "imagePullPolicy": "IfNotPresent"
     }
 
   deploy: |-
@@ -622,9 +714,9 @@ data:
       "enabled": false,
       "jobNamespace": "kserve-localmodel-jobs",
       "defaultJobImage" : "kserve/storage-initializer:latest",
-      "FSGroup": 1000
+      "fsGroup": 1000
     }
-  
+
   security: |-
     {
       "autoMountServiceAccountToken": true
@@ -633,4 +725,21 @@ data:
   service: |-
     {
         "serviceClusterIPNone": true
-    }
+    }
+
+  inferenceService: |-
+    {
+      "resource": {
+          "cpuLimit": "1",
+          "memoryLimit": "2Gi",
+          "cpuRequest": "1",
+          "memoryRequest": "2Gi"
+        }
+    }
+
+  opentelemetryCollector: |-
+    {
+      "scrapeInterval": "5s",
+      "metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317",
+      "metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318"
+    }