Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# The following manifests contain a self-signed issuer CR and a certificate CR.
# More document can be found at https://docs.cert-manager.io
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: selfsigned-issuer
namespace: kserve
spec:
selfSigned: {}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml
namespace: kserve
spec:
# $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
commonName: $(webhookServiceName).$(kserveNamespace).svc
dnsNames:
- $(webhookServiceName).$(kserveNamespace).svc
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: kserve-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- certificate.yaml

155 changes: 132 additions & 23 deletions assets/optional/ai-model-serving/kserve/configmap/inferenceservice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,23 @@ data:
}
}
# ====================================== ISVC CONFIGURATION ======================================
# Example
# Example - setting custom annotation
inferenceService: |-
{
"serviceAnnotationDisallowedList": [
"my.custom.annotation/1"
],
"serviceLabelDisallowedList": [
"my.custom.label.1"
]
{
"serviceAnnotationDisallowedList": [
"my.custom.annotation/1"
],
"serviceLabelDisallowedList": [
"my.custom.label.1"
]
}
# Example of isvc configuration
# Example - setting custom annotation
inferenceService: |-
{
# ServiceAnnotationDisallowedList is a list of annotations that are not allowed to be propagated to Knative
# revisions, which prevents the reconciliation loop to be triggered if the annotations is
# configured here are used.
# Default values are:
# "autoscaling.knative.dev/initial-scale",
# "autoscaling.knative.dev/min-scale",
# "autoscaling.knative.dev/max-scale",
# "internal.serving.kserve.io/storage-initializer-sourceuri",
Expand All @@ -73,7 +72,68 @@ data:
"serviceLabelDisallowedList": [
"my.custom.label.1"
]
}
# Example - setting custom resource
inferenceService: |-
{
"resource": {
"cpuLimit": "1",
"memoryLimit": "2Gi",
"cpuRequest": "1",
"memoryRequest": "2Gi"
}
}
# Example - setting custom resource
inferenceService: |-
{
# resource contains the default resource configuration for the inference service.
# you can override this configuration by specifying the resources in the inference service yaml.
# If you want to unbound the resource (limits and requests), you can set the value to null or ""
# or just remove the specific field from the config.
"resource": {
# cpuLimit is the limits.cpu to set for the inference service.
"cpuLimit": "1",

# memoryLimit is the limits.memory to set for the inference service.
"memoryLimit": "2Gi",

# cpuRequest is the requests.cpu to set for the inference service.
"cpuRequest": "1",

# memoryRequest is the requests.memory to set for the inference service.
"memoryRequest": "2Gi"
}
}
# ====================================== MultiNode CONFIGURATION ======================================
# Example
multiNode: |-
{
"customGPUResourceTypeList": [
"custom.com/gpu"
]
}
# Example of multinode configuration
multiNode: |-
{
# CustomGPUResourceTypeList is a list of custom GPU resource types intended to identify the GPU type of a resource,
# not to restrict the user from using a specific GPU type.
# The MultiNode runtime pod will dynamically add GPU resources based on the registered GPU types.
"customGPUResourceTypeList": [
"custom.com/gpu"
]
}
# ====================================== OTelCollector CONFIGURATION ======================================
# Example
opentelemetryCollector: |-
{
# scrapeInterval is the interval at which the OpenTelemetry Collector will scrape the metrics.
"scrapeInterval": "5s",
# metricScalerEndpoint is the endpoint from which the KEDA's ScaledObject will scrape the metrics.
"metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318",
# metricReceiverEndpoint is the endpoint from which the OpenTelemetry Collector will scrape the metrics.
"metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317"
}

# ====================================== STORAGE INITIALIZER CONFIGURATION ======================================
# Example
storageInitializer: |-
Expand Down Expand Up @@ -117,7 +177,7 @@ data:
# if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.
# rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737
"enableDirectPvcVolumeMount": true,

# enableModelcar enabled allows you to directly access an OCI container image by
# using a source URL with an "oci://" schema.
"enableModelcar": false,
Expand Down Expand Up @@ -222,7 +282,9 @@ data:
# ====================================== INGRESS CONFIGURATION ======================================
# Example
ingress: |-
{
{
"enableGatewayApi": false,
"kserveIngressGateway": "kserve/kserve-ingress-gateway",
"ingressGateway" : "knative-serving/knative-ingress-gateway",
"localGateway" : "knative-serving/knative-local-gateway",
"localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
Expand All @@ -235,7 +297,17 @@ data:
"disableIngressCreation": false
}
ingress: |-
{
{
# enableGatewayApi specifies whether to use Gateway API instead of Ingress to serve external traffic.
"enableGatewayApi": false,

# KServe implements [Gateway API](https://gateway-api.sigs.k8s.io/) to serve external traffic.
# By default, KServe configures a default gateway to serve external traffic.
# But, KServe can be configured to use a custom gateway by modifying this configuration.
# The gateway should be specified in format <gateway namespace>/<gateway name>
# NOTE: This configuration only applicable for raw deployment.
"kserveIngressGateway": "kserve/kserve-ingress-gateway",

# ingressGateway specifies the ingress gateway to serve external traffic.
# The gateway should be specified in format <gateway namespace>/<gateway name>
# NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.
Expand Down Expand Up @@ -414,7 +486,9 @@ data:
"cpuLimit": "1",
"headers": {
"propagate": []
}
},
"imagePullPolicy": "IfNotPresent",
"imagePullSecrets": ["docker-secret"]
}
# router is the implementation of inference graph.
router: |-
Expand Down Expand Up @@ -444,6 +518,13 @@ data:
"*Trace-Id*"
]
}

# imagePullPolicy specifies when the router image should be pulled from registry.
"imagePullPolicy": "IfNotPresent",

# # imagePullSecrets specifies the list of secrets to be used for pulling the router image from registry.
# https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
"imagePullSecrets": ["docker-secret"]
}

# ====================================== DEPLOYMENT CONFIGURATION ======================================
Expand All @@ -462,7 +543,7 @@ data:
# ModelMesh https://kserve.github.io/website/master/admin/modelmesh/
"defaultDeploymentMode": "Serverless"
}

# ====================================== SERVICE CONFIGURATION ======================================
# Example
service: |-
Expand Down Expand Up @@ -507,7 +588,14 @@ data:
# defaultJobImage specifies the default image used for the download job.
"defaultJobImage" : "kserve/storage-initializer:latest",
# Kubernetes modifies the filesystem group ID on the attached volume.
"FSGroup": 1000
"fsGroup": 1000,
# TTL for the download job after it is finished.
"jobTTLSecondsAfterFinished": 3600,
# The frequency at which the local model agent reconciles the local models
# This is to detect if models are missing from local disk
"reconcilationFrequencyInSecs": 60,
# This is to disable localmodel pv and pvc management for namespaces without isvcs
"disableVolumeManagement": false
}

explainers: |-
Expand All @@ -528,9 +616,10 @@ data:
"caBundleConfigMapName": "",
"caBundleVolumeMountPath": "/etc/ssl/custom-certs",
"enableDirectPvcVolumeMount": true,
"enableModelcar": false,
"enableModelcar": true,
"cpuModelcar": "10m",
"memoryModelcar": "15Mi"
"memoryModelcar": "15Mi",
"uidModelcar": 1010
}

credentials: |-
Expand All @@ -555,7 +644,9 @@ data:
}

ingress: |-
{
{
"enableGatewayApi": false,
"kserveIngressGateway": "kserve/kserve-ingress-gateway",
"ingressGateway" : "knative-serving/knative-ingress-gateway",
"localGateway" : "knative-serving/knative-local-gateway",
"localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
Expand Down Expand Up @@ -603,7 +694,8 @@ data:
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1"
"cpuLimit": "1",
"imagePullPolicy": "IfNotPresent"
}

deploy: |-
Expand All @@ -622,9 +714,9 @@ data:
"enabled": false,
"jobNamespace": "kserve-localmodel-jobs",
"defaultJobImage" : "kserve/storage-initializer:latest",
"FSGroup": 1000
"fsGroup": 1000
}

security: |-
{
"autoMountServiceAccountToken": true
Expand All @@ -633,4 +725,21 @@ data:
service: |-
{
"serviceClusterIPNone": true
}
}

inferenceService: |-
{
"resource": {
"cpuLimit": "1",
"memoryLimit": "2Gi",
"cpuRequest": "1",
"memoryRequest": "2Gi"
}
}

opentelemetryCollector: |-
{
"scrapeInterval": "5s",
"metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317",
"metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318"
}
Loading