diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml index f8ce171f53..ad6ec8ff26 100644 --- a/.github/workflows/conformance.yml +++ b/.github/workflows/conformance.yml @@ -12,6 +12,9 @@ on: enable-experimental: required: true type: boolean + enable-inference-extension: + required: true + type: boolean production-release: required: false type: boolean @@ -28,6 +31,7 @@ defaults: env: PLUS_USAGE_ENDPOINT: ${{ secrets.JWT_PLUS_REPORTING_ENDPOINT }} ENABLE_EXPERIMENTAL: ${{ inputs.enable-experimental }} + ENABLE_INFERENCE_EXTENSION: ${{ inputs.enable-inference-extension }} permissions: contents: read diff --git a/.nvmrc b/.nvmrc index 2bd5a0a98a..a45fd52cc5 100644 --- a/.nvmrc +++ b/.nvmrc @@ -1 +1 @@ -22 +24 diff --git a/Makefile b/Makefile index c794b3a9e2..4236d49d14 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ TELEMETRY_ENDPOINT=# if empty, NGF will report telemetry in its logs at debug le TELEMETRY_ENDPOINT_INSECURE = false ENABLE_EXPERIMENTAL ?= false +ENABLE_INFERENCE_EXTENSION ?= false # go build flags - should not be overridden by the user GO_LINKER_FlAGS_VARS = -X main.version=${VERSION} -X main.telemetryReportPeriod=${TELEMETRY_REPORT_PERIOD} -X main.telemetryEndpoint=${TELEMETRY_ENDPOINT} -X main.telemetryEndpointInsecure=${TELEMETRY_ENDPOINT_INSECURE} @@ -33,7 +34,7 @@ GEN_CRD_API_REFERENCE_DOCS_VERSION = v0.3.0 # renovate: datasource=go depName=sigs.k8s.io/controller-tools CONTROLLER_TOOLS_VERSION = v0.19.0 # renovate: datasource=docker depName=node -NODE_VERSION = 22 +NODE_VERSION = 24 # renovate: datasource=docker depName=quay.io/helmpack/chart-testing CHART_TESTING_VERSION = v3.13.0 # renovate: datasource=github-tags depName=dadav/helm-schema @@ -136,6 +137,14 @@ install-gateway-crds: ## Install Gateway API CRDs uninstall-gateway-crds: ## Uninstall Gateway API CRDs kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f - +.PHONY: install-inference-crds +install-inference-crds: ## Install Gateway API Inference Extension CRDs + kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f - + +.PHONY: uninstall-inference-crds +uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs + kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f - + .PHONY: generate-manifests generate-manifests: ## Generate manifests using Helm. ./scripts/generate-manifests.sh @@ -226,10 +235,16 @@ install-ngf-local-build-with-plus: check-for-plus-usage-endpoint build-images-wi .PHONY: helm-install-local helm-install-local: install-gateway-crds ## Helm install NGF on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build. + @if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \ + $(MAKE) install-inference-crds; \ + fi helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS) .PHONY: helm-install-local-with-plus helm-install-local-with-plus: check-for-plus-usage-endpoint install-gateway-crds ## Helm install NGF with NGINX Plus on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build-with-plus. + @if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \ + $(MAKE) install-inference-crds; \ + fi kubectl create namespace nginx-gateway || true kubectl -n nginx-gateway create secret generic nplus-license --from-file $(PLUS_LICENSE_FILE) || true helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PLUS_PREFIX) --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway --set nginx.plus=true --set nginx.usage.endpoint=$(PLUS_USAGE_ENDPOINT) $(HELM_PARAMETERS) diff --git a/build/Dockerfile.nginx b/build/Dockerfile.nginx index 378e8c909b..84696eea82 100644 --- a/build/Dockerfile.nginx +++ b/build/Dockerfile.nginx @@ -23,7 +23,7 @@ RUN apk add --no-cache bash \ && ln -sf /dev/stderr /var/log/nginx/error.log COPY build/entrypoint.sh /agent/entrypoint.sh -COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js +COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/ COPY ${NGINX_CONF_DIR}/nginx.conf /etc/nginx/nginx.conf COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf diff --git a/build/Dockerfile.nginxplus b/build/Dockerfile.nginxplus index b92dc19516..6fb5d49773 100644 --- a/build/Dockerfile.nginxplus +++ b/build/Dockerfile.nginxplus @@ -29,7 +29,7 @@ RUN apk add --no-cache bash \ && ln -sf /dev/stderr /var/log/nginx/error.log COPY build/entrypoint.sh /agent/entrypoint.sh -COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js +COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/ COPY ${NGINX_CONF_DIR}/nginx-plus.conf /etc/nginx/nginx.conf COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf diff --git a/charts/nginx-gateway-fabric/README.md b/charts/nginx-gateway-fabric/README.md index eb7f3ce114..2c55f514f8 100644 --- a/charts/nginx-gateway-fabric/README.md +++ b/charts/nginx-gateway-fabric/README.md @@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri | `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` | | `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` | | `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` | -| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` | +| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` | | `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` | | `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` | | `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` | @@ -257,6 +257,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri | `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` | | `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` | | `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` | +| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` | | `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` | | `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` | | `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` | diff --git a/charts/nginx-gateway-fabric/templates/clusterrole.yaml b/charts/nginx-gateway-fabric/templates/clusterrole.yaml index 8fc4da400e..9be339c04a 100644 --- a/charts/nginx-gateway-fabric/templates/clusterrole.yaml +++ b/charts/nginx-gateway-fabric/templates/clusterrole.yaml @@ -129,6 +129,22 @@ rules: {{- end }} verbs: - update +{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }} +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +{{- end }} {{- if .Values.nginxGateway.leaderElection.enable }} - apiGroups: - coordination.k8s.io diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml index 5bc292bdb4..604acd768c 100644 --- a/charts/nginx-gateway-fabric/templates/deployment.yaml +++ b/charts/nginx-gateway-fabric/templates/deployment.yaml @@ -100,6 +100,9 @@ spec: {{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }} - --gateway-api-experimental-features {{- end }} + {{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }} + - --gateway-api-inference-extension + {{- end }} {{- if .Values.nginxGateway.snippetsFilters.enable }} - --snippets-filters {{- end }} diff --git a/charts/nginx-gateway-fabric/values.schema.json b/charts/nginx-gateway-fabric/values.schema.json index 9f44991db3..c1456d2503 100644 --- a/charts/nginx-gateway-fabric/values.schema.json +++ b/charts/nginx-gateway-fabric/values.schema.json @@ -838,6 +838,20 @@ "title": "gwAPIExperimentalFeatures", "type": "object" }, + "gwAPIInferenceExtension": { + "properties": { + "enable": { + "default": false, + "description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.", + "required": [], + "title": "enable", + "type": "boolean" + } + }, + "required": [], + "title": "gwAPIInferenceExtension", + "type": "object" + }, "image": { "description": "The image configuration for the NGINX Gateway Fabric control plane.", "properties": { diff --git a/charts/nginx-gateway-fabric/values.yaml b/charts/nginx-gateway-fabric/values.yaml index 52f1e03e55..4e3747a9d1 100644 --- a/charts/nginx-gateway-fabric/values.yaml +++ b/charts/nginx-gateway-fabric/values.yaml @@ -210,6 +210,10 @@ nginxGateway: # APIs installed from the experimental channel. enable: false + gwAPIInferenceExtension: + # -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. + enable: false + snippetsFilters: # -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX # config for HTTPRoute and GRPCRoute resources. diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go index f334d499a7..76b0ecbc96 100644 --- a/cmd/gateway/commands.go +++ b/cmd/gateway/commands.go @@ -85,6 +85,7 @@ func createControllerCommand() *cobra.Command { leaderElectionLockNameFlag = "leader-election-lock-name" productTelemetryDisableFlag = "product-telemetry-disable" gwAPIExperimentalFlag = "gateway-api-experimental-features" + gwAPIInferenceExtensionFlag = "gateway-api-inference-extension" nginxDockerSecretFlag = "nginx-docker-secret" //nolint:gosec // not credentials usageReportSecretFlag = "usage-report-secret" usageReportEndpointFlag = "usage-report-endpoint" @@ -151,6 +152,7 @@ func createControllerCommand() *cobra.Command { } gwExperimentalFeatures bool + gwInferenceExtension bool disableProductTelemetry bool @@ -270,6 +272,7 @@ func createControllerCommand() *cobra.Command { }, Plus: plus, ExperimentalFeatures: gwExperimentalFeatures, + InferenceExtension: gwInferenceExtension, ImageSource: imageSource, Flags: config.Flags{ Names: flagKeys, @@ -430,6 +433,14 @@ func createControllerCommand() *cobra.Command { "Requires the Gateway APIs installed from the experimental channel.", ) + cmd.Flags().BoolVar( + &gwInferenceExtension, + gwAPIInferenceExtensionFlag, + false, + "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+ + "traffic to AI workloads.", + ) + cmd.Flags().Var( &nginxDockerSecrets, nginxDockerSecretFlag, @@ -746,6 +757,20 @@ func createSleepCommand() *cobra.Command { return cmd } +func createEndpointPickerCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "endpoint-picker", + Short: "Shim server for communication between NGINX and the Gateway API Inference Extension Endpoint Picker", + RunE: func(_ *cobra.Command, _ []string) error { + logger := ctlrZap.New().WithName("endpoint-picker-shim") + handler := createEndpointPickerHandler(realExtProcClientFactory(), logger) + return endpointPickerServer(handler) + }, + } + + return cmd +} + func parseFlags(flags *pflag.FlagSet) ([]string, []string) { var flagKeys, flagValues []string diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go new file mode 100644 index 0000000000..118f95684b --- /dev/null +++ b/cmd/gateway/endpoint_picker.go @@ -0,0 +1,191 @@ +package main + +import ( + "crypto/tls" + "errors" + "fmt" + "io" + "net" + "net/http" + "time" + + corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + "github.com/go-logr/logr" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata" + + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types" +) + +// extProcClientFactory creates a new ExternalProcessorClient and returns a close function. +type extProcClientFactory func(target string) (extprocv3.ExternalProcessorClient, func() error, error) + +// endpointPickerServer starts an HTTP server on the given port with the provided handler. +func endpointPickerServer(handler http.Handler) error { + server := &http.Server{ + Addr: fmt.Sprintf("127.0.0.1:%d", types.GoShimPort), + Handler: handler, + ReadHeaderTimeout: 10 * time.Second, + } + return server.ListenAndServe() +} + +// realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request. +func realExtProcClientFactory() extProcClientFactory { + return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) { + creds := credentials.NewTLS(&tls.Config{ + // add RootCAs or, if you have a self-signed server cert: + InsecureSkipVerify: true, + }) + conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(creds)) + if err != nil { + return nil, nil, err + } + client := extprocv3.NewExternalProcessorClient(conn) + return client, conn.Close, nil + } +} + +// createEndpointPickerHandler returns an http.Handler that forwards requests to the EndpointPicker. +func createEndpointPickerHandler(factory extProcClientFactory, logger logr.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + host := r.Header.Get(types.EPPEndpointHostHeader) + port := r.Header.Get(types.EPPEndpointPortHeader) + if host == "" || port == "" { + msg := fmt.Sprintf( + "missing at least one of required headers: %s and %s", + types.EPPEndpointHostHeader, + types.EPPEndpointPortHeader, + ) + logger.Error(errors.New(msg), "error contacting EndpointPicker") + http.Error(w, msg, http.StatusBadRequest) + return + } + + target := net.JoinHostPort(host, port) + logger.Info("Getting inference workload endpoint from EndpointPicker", "endpointPicker", target) + + client, closeConn, err := factory(target) + if err != nil { + logger.Error(err, "error creating gRPC client") + http.Error(w, fmt.Sprintf("error creating gRPC client: %v", err), http.StatusInternalServerError) + return + } + defer func() { + if err := closeConn(); err != nil { + logger.Error(err, "error closing gRPC connection") + } + }() + + stream, err := client.Process(r.Context()) + if err != nil { + logger.Error(err, "error opening ext_proc stream") + http.Error(w, fmt.Sprintf("error opening ext_proc stream: %v", err), http.StatusBadGateway) + return + } + + if code, err := sendRequest(stream, r); err != nil { + logger.Error(err, "error sending request") + http.Error(w, err.Error(), code) + return + } + + // Receive response and extract header + for { + resp, err := stream.Recv() + if errors.Is(err, io.EOF) { + break // End of stream + } else if err != nil { + logger.Error(err, "error receiving from ext_proc") + http.Error(w, fmt.Sprintf("error receiving from ext_proc: %v", err), http.StatusBadGateway) + return + } + + if ir := resp.GetImmediateResponse(); ir != nil { + code := int(ir.GetStatus().GetCode()) + body := ir.GetBody() + logger.Error(fmt.Errorf("code: %d, body: %s", code, body), "received immediate response") + http.Error(w, string(body), code) + return + } + + headers := resp.GetRequestHeaders().GetResponse().GetHeaderMutation().GetSetHeaders() + for _, h := range headers { + if h.GetHeader().GetKey() == eppMetadata.DestinationEndpointKey { + endpoint := string(h.GetHeader().GetRawValue()) + w.Header().Set(h.GetHeader().GetKey(), endpoint) + logger.Info("Found endpoint", "endpoint", endpoint) + } + } + } + w.WriteHeader(http.StatusOK) + }) +} + +func sendRequest(stream extprocv3.ExternalProcessor_ProcessClient, r *http.Request) (int, error) { + if err := stream.Send(buildHeaderRequest(r)); err != nil { + return http.StatusBadGateway, fmt.Errorf("error sending headers: %w", err) + } + + bodyReq, err := buildBodyRequest(r) + if err != nil { + return http.StatusInternalServerError, fmt.Errorf("error building body request: %w", err) + } + + if err := stream.Send(bodyReq); err != nil { + return http.StatusBadGateway, fmt.Errorf("error sending body: %w", err) + } + + if err := stream.CloseSend(); err != nil { + return http.StatusInternalServerError, fmt.Errorf("error closing stream: %w", err) + } + + return 0, nil +} + +func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest { + headerList := make([]*corev3.HeaderValue, 0, len(r.Header)) + headerMap := &corev3.HeaderMap{ + Headers: headerList, + } + + for key, values := range r.Header { + for _, value := range values { + headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{ + Key: key, + Value: value, + }) + } + } + + return &extprocv3.ProcessingRequest{ + Request: &extprocv3.ProcessingRequest_RequestHeaders{ + RequestHeaders: &extprocv3.HttpHeaders{ + Headers: headerMap, + EndOfStream: false, + }, + }, + } +} + +func buildBodyRequest(r *http.Request) (*extprocv3.ProcessingRequest, error) { + if r.ContentLength == 0 { + return nil, errors.New("request body is empty") + } + + body, err := io.ReadAll(r.Body) + if err != nil { + return nil, fmt.Errorf("error reading request body: %w", err) + } + + return &extprocv3.ProcessingRequest{ + Request: &extprocv3.ProcessingRequest_RequestBody{ + RequestBody: &extprocv3.HttpBody{ + Body: body, + EndOfStream: true, + }, + }, + }, nil +} diff --git a/cmd/gateway/endpoint_picker_test.go b/cmd/gateway/endpoint_picker_test.go new file mode 100644 index 0000000000..99fd95aa90 --- /dev/null +++ b/cmd/gateway/endpoint_picker_test.go @@ -0,0 +1,289 @@ +package main + +import ( + "context" + "errors" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3" + "github.com/go-logr/logr" + . "github.com/onsi/gomega" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" + eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata" + + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types" +) + +type mockExtProcClient struct { + ProcessFunc func( + context.Context, + ...grpc.CallOption, + ) (extprocv3.ExternalProcessor_ProcessClient, error) +} + +func (m *mockExtProcClient) Process( + ctx context.Context, + opts ...grpc.CallOption, +) (extprocv3.ExternalProcessor_ProcessClient, error) { + if m.ProcessFunc != nil { + return m.ProcessFunc(ctx, opts...) + } + return nil, errors.New("not implemented") +} + +type mockProcessClient struct { + SendFunc func(*extprocv3.ProcessingRequest) error + RecvFunc func() (*extprocv3.ProcessingResponse, error) + CloseSendFunc func() error + Ctx context.Context +} + +func (m *mockProcessClient) Send(req *extprocv3.ProcessingRequest) error { + if m.SendFunc != nil { + return m.SendFunc(req) + } + return nil +} + +func (m *mockProcessClient) Recv() (*extprocv3.ProcessingResponse, error) { + if m.RecvFunc != nil { + return m.RecvFunc() + } + return nil, io.EOF +} + +func (*mockProcessClient) RecvMsg(any) error { return nil } +func (*mockProcessClient) SendMsg(any) error { return nil } + +func (m *mockProcessClient) CloseSend() error { + if m.CloseSendFunc != nil { + return m.CloseSendFunc() + } + return nil +} + +func (m *mockProcessClient) Context() context.Context { + if m.Ctx != nil { + return m.Ctx + } + return context.Background() +} + +func (*mockProcessClient) Header() (metadata.MD, error) { return nil, nil } //nolint:nilnil // interface satisfier +func (*mockProcessClient) Trailer() metadata.MD { return nil } + +func TestEndpointPickerHandler_Success(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + // Prepare mock client to simulate gRPC responses + callCount := 0 + client := &mockProcessClient{ + SendFunc: func(*extprocv3.ProcessingRequest) error { return nil }, + RecvFunc: func() (*extprocv3.ProcessingResponse, error) { + if callCount == 0 { + callCount++ + resp := &extprocv3.ProcessingResponse{ + Response: &extprocv3.ProcessingResponse_RequestHeaders{ + RequestHeaders: &extprocv3.HeadersResponse{ + Response: &extprocv3.CommonResponse{ + HeaderMutation: &extprocv3.HeaderMutation{ + SetHeaders: []*corev3.HeaderValueOption{{ + Header: &corev3.HeaderValue{ + Key: eppMetadata.DestinationEndpointKey, + RawValue: []byte("test-value"), + }, + }}, + }, + }, + }, + }, + } + return resp, nil + } + return nil, io.EOF + }, + } + + extProcClient := &mockExtProcClient{ + ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) { + return client, nil + }, + } + + factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return extProcClient, func() error { return nil }, nil + } + + h := createEndpointPickerHandler(factory, logr.Discard()) + req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body")) + req.Header.Set(types.EPPEndpointHostHeader, "test-host") + req.Header.Set(types.EPPEndpointPortHeader, "1234") + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + h.ServeHTTP(w, req) + + resp := w.Result() + g.Expect(resp.StatusCode).To(Equal(http.StatusOK)) + g.Expect(resp.Header.Get(eppMetadata.DestinationEndpointKey)).To(Equal("test-value")) +} + +func TestEndpointPickerHandler_ImmediateResponse(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + client := &mockProcessClient{ + SendFunc: func(*extprocv3.ProcessingRequest) error { return nil }, + RecvFunc: func() (*extprocv3.ProcessingResponse, error) { + resp := &extprocv3.ProcessingResponse{ + Response: &extprocv3.ProcessingResponse_ImmediateResponse{ + ImmediateResponse: &extprocv3.ImmediateResponse{ + Status: &typev3.HttpStatus{Code: http.StatusInternalServerError}, + Body: []byte("some error"), + }, + }, + } + return resp, nil + }, + } + + extClient := &mockExtProcClient{ + ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) { + return client, nil + }, + } + + factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return extClient, func() error { return nil }, nil + } + + h := createEndpointPickerHandler(factory, logr.Discard()) + req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body")) + req.Header.Set(types.EPPEndpointHostHeader, "test-host") + req.Header.Set(types.EPPEndpointPortHeader, "1234") + w := httptest.NewRecorder() + + h.ServeHTTP(w, req) + + resp := w.Result() + + g.Expect(resp.StatusCode).To(Equal(http.StatusInternalServerError)) + body, _ := io.ReadAll(resp.Body) + g.Expect(string(body)).To(ContainSubstring("some error")) +} + +func TestEndpointPickerHandler_Errors(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + runErrorTestCase := func(factory func(string) (extprocv3.ExternalProcessorClient, func() error, error), + setHeaders bool, + expectedStatus int, + expectedBodySubstring string, + ) { + h := createEndpointPickerHandler(factory, logr.Discard()) + req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body")) + if setHeaders { + req.Header.Set(types.EPPEndpointHostHeader, "test-host") + req.Header.Set(types.EPPEndpointPortHeader, "1234") + } + w := httptest.NewRecorder() + h.ServeHTTP(w, req) + resp := w.Result() + g.Expect(resp.StatusCode).To(Equal(expectedStatus)) + body, _ := io.ReadAll(resp.Body) + g.Expect(string(body)).To(ContainSubstring(expectedBodySubstring)) + } + + // 1. Error creating gRPC client + factoryErr := errors.New("factory error") + factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return nil, nil, factoryErr + } + runErrorTestCase(factory, true, http.StatusInternalServerError, "error creating gRPC client") + + // 2. Error opening ext_proc stream + extProcClient := &mockExtProcClient{ + ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) { + return nil, errors.New("process error") + }, + } + factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return extProcClient, func() error { return nil }, nil + } + runErrorTestCase(factory, true, http.StatusBadGateway, "error opening ext_proc stream") + + // 3. Error sending headers + client := &mockProcessClient{ + SendFunc: func(*extprocv3.ProcessingRequest) error { + return errors.New("send headers error") + }, + RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF }, + } + extProcClient = &mockExtProcClient{ + ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) { + return client, nil + }, + } + factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return extProcClient, func() error { return nil }, nil + } + runErrorTestCase(factory, true, http.StatusBadGateway, "error sending headers") + + // 4a. Error building body request (content length 0) + client = &mockProcessClient{ + SendFunc: func(*extprocv3.ProcessingRequest) error { + return nil + }, + RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF }, + } + extProcClient = &mockExtProcClient{ + ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) { + return client, nil + }, + } + factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return extProcClient, func() error { return nil }, nil + } + h := createEndpointPickerHandler(factory, logr.Discard()) + req := httptest.NewRequest(http.MethodPost, "/", nil) // nil body, ContentLength = 0 + req.Header.Set(types.EPPEndpointHostHeader, "test-host") + req.Header.Set(types.EPPEndpointPortHeader, "1234") + w := httptest.NewRecorder() + h.ServeHTTP(w, req) + resp := w.Result() + g.Expect(resp.StatusCode).To(Equal(http.StatusInternalServerError)) + body, _ := io.ReadAll(resp.Body) + g.Expect(string(body)).To(ContainSubstring("request body is empty")) + + // 4b. Error sending body + client = &mockProcessClient{ + SendFunc: func(req *extprocv3.ProcessingRequest) error { + if req.GetRequestBody() != nil { + return errors.New("send body error") + } + return nil + }, + RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF }, + } + extProcClient = &mockExtProcClient{ + ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) { + return client, nil + }, + } + factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) { + return extProcClient, func() error { return nil }, nil + } + runErrorTestCase(factory, true, http.StatusBadGateway, "error sending body") + + // 5. Error with empty headers + runErrorTestCase(factory, false, http.StatusBadRequest, "missing at least one of required headers") +} diff --git a/cmd/gateway/main.go b/cmd/gateway/main.go index 515fcc3f16..c932a4ee4c 100644 --- a/cmd/gateway/main.go +++ b/cmd/gateway/main.go @@ -25,6 +25,7 @@ func main() { createGenerateCertsCommand(), createInitializeCommand(), createSleepCommand(), + createEndpointPickerCommand(), ) if err := rootCmd.Execute(); err != nil { diff --git a/config/crd/inference-extension/kustomization.yaml b/config/crd/inference-extension/kustomization.yaml new file mode 100644 index 0000000000..6b6e210cf5 --- /dev/null +++ b/config/crd/inference-extension/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?timeout=120&ref=v1.0.0 diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml new file mode 100644 index 0000000000..025cfeb410 --- /dev/null +++ b/deploy/inference-nginx-plus/deploy.yaml @@ -0,0 +1,442 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nginx-gateway +--- +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - create + - update + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +rules: +- apiGroups: + - "" + - apps + - autoscaling + resources: + - secrets + - configmaps + - serviceaccounts + - services + - deployments + - daemonsets + - horizontalpodautoscalers + verbs: + - create + - update + - delete + - list + - get + - watch +- apiGroups: + - "" + resources: + - namespaces + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list +- apiGroups: + - "" + resources: + - nodes + verbs: + - list +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - gateway.networking.k8s.io + resources: + - gatewayclasses + - gateways + - httproutes + - referencegrants + - grpcroutes + verbs: + - list + - watch +- apiGroups: + - gateway.networking.k8s.io + resources: + - httproutes/status + - gateways/status + - gatewayclasses/status + - grpcroutes/status + verbs: + - update +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways + verbs: + - get + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxproxies + - clientsettingspolicies + - observabilitypolicies + - upstreamsettingspolicies + verbs: + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways/status + - clientsettingspolicies/status + - observabilitypolicies/status + - upstreamsettingspolicies/status + verbs: + - update +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nginx-gateway-cert-generator +subjects: +- kind: ServiceAccount + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nginx-gateway +subjects: +- kind: ServiceAccount + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + ports: + - name: agent-grpc + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + template: + metadata: + annotations: + prometheus.io/port: "9113" + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + spec: + automountServiceAccountToken: true + containers: + - args: + - controller + - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller + - --gatewayclass=nginx + - --config=nginx-gateway-config + - --service=nginx-gateway + - --agent-tls-secret=agent-tls + - --nginx-docker-secret=nginx-plus-registry-secret + - --nginx-plus + - --usage-report-secret=nplus-license + - --usage-report-enforce-initial-report=true + - --metrics-port=9113 + - --health-port=8081 + - --leader-election-lock-name=nginx-gateway-leader-election + - --gateway-api-inference-extension + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: INSTANCE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app.kubernetes.io/instance'] + - name: IMAGE_NAME + value: ghcr.io/nginx/nginx-gateway-fabric:edge + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: nginx-gateway + ports: + - containerPort: 8443 + name: agent-grpc + - containerPort: 9113 + name: metrics + - containerPort: 8081 + name: health + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 3 + periodSeconds: 1 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /var/run/secrets/ngf + name: nginx-agent-tls + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway + terminationGracePeriodSeconds: 30 + volumes: + - name: nginx-agent-tls + secret: + secretName: server-tls +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +spec: + template: + metadata: + annotations: null + spec: + containers: + - args: + - generate-certs + - --service=nginx-gateway + - --cluster-domain=cluster.local + - --server-tls-secret=server-tls + - --agent-tls-secret=agent-tls + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: cert-generator + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + restartPolicy: Never + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway-cert-generator + ttlSecondsAfterFinished: 30 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx +spec: + controllerName: gateway.nginx.org/nginx-gateway-controller + parametersRef: + group: gateway.nginx.org + kind: NginxProxy + name: nginx-gateway-proxy-config + namespace: nginx-gateway +--- +apiVersion: gateway.nginx.org/v1alpha1 +kind: NginxGateway +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-config + namespace: nginx-gateway +spec: + logging: + level: info +--- +apiVersion: gateway.nginx.org/v1alpha2 +kind: NginxProxy +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-proxy-config + namespace: nginx-gateway +spec: + kubernetes: + deployment: + container: + image: + pullPolicy: Always + repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus + tag: edge + replicas: 1 + service: + externalTrafficPolicy: Local + type: LoadBalancer diff --git a/deploy/inference/deploy.yaml b/deploy/inference/deploy.yaml new file mode 100644 index 0000000000..49a8f85053 --- /dev/null +++ b/deploy/inference/deploy.yaml @@ -0,0 +1,438 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nginx-gateway +--- +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - create + - update + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +rules: +- apiGroups: + - "" + - apps + - autoscaling + resources: + - secrets + - configmaps + - serviceaccounts + - services + - deployments + - daemonsets + - horizontalpodautoscalers + verbs: + - create + - update + - delete + - list + - get + - watch +- apiGroups: + - "" + resources: + - namespaces + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list +- apiGroups: + - "" + resources: + - nodes + verbs: + - list +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - gateway.networking.k8s.io + resources: + - gatewayclasses + - gateways + - httproutes + - referencegrants + - grpcroutes + verbs: + - list + - watch +- apiGroups: + - gateway.networking.k8s.io + resources: + - httproutes/status + - gateways/status + - gatewayclasses/status + - grpcroutes/status + verbs: + - update +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways + verbs: + - get + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxproxies + - clientsettingspolicies + - observabilitypolicies + - upstreamsettingspolicies + verbs: + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways/status + - clientsettingspolicies/status + - observabilitypolicies/status + - upstreamsettingspolicies/status + verbs: + - update +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nginx-gateway-cert-generator +subjects: +- kind: ServiceAccount + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nginx-gateway +subjects: +- kind: ServiceAccount + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + ports: + - name: agent-grpc + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + template: + metadata: + annotations: + prometheus.io/port: "9113" + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + spec: + automountServiceAccountToken: true + containers: + - args: + - controller + - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller + - --gatewayclass=nginx + - --config=nginx-gateway-config + - --service=nginx-gateway + - --agent-tls-secret=agent-tls + - --metrics-port=9113 + - --health-port=8081 + - --leader-election-lock-name=nginx-gateway-leader-election + - --gateway-api-inference-extension + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: INSTANCE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app.kubernetes.io/instance'] + - name: IMAGE_NAME + value: ghcr.io/nginx/nginx-gateway-fabric:edge + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: nginx-gateway + ports: + - containerPort: 8443 + name: agent-grpc + - containerPort: 9113 + name: metrics + - containerPort: 8081 + name: health + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 3 + periodSeconds: 1 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /var/run/secrets/ngf + name: nginx-agent-tls + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway + terminationGracePeriodSeconds: 30 + volumes: + - name: nginx-agent-tls + secret: + secretName: server-tls +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +spec: + template: + metadata: + annotations: null + spec: + containers: + - args: + - generate-certs + - --service=nginx-gateway + - --cluster-domain=cluster.local + - --server-tls-secret=server-tls + - --agent-tls-secret=agent-tls + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: cert-generator + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + restartPolicy: Never + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway-cert-generator + ttlSecondsAfterFinished: 30 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx +spec: + controllerName: gateway.nginx.org/nginx-gateway-controller + parametersRef: + group: gateway.nginx.org + kind: NginxProxy + name: nginx-gateway-proxy-config + namespace: nginx-gateway +--- +apiVersion: gateway.nginx.org/v1alpha1 +kind: NginxGateway +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-config + namespace: nginx-gateway +spec: + logging: + level: info +--- +apiVersion: gateway.nginx.org/v1alpha2 +kind: NginxProxy +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-proxy-config + namespace: nginx-gateway +spec: + kubernetes: + deployment: + container: + image: + pullPolicy: Always + repository: ghcr.io/nginx/nginx-gateway-fabric/nginx + tag: edge + replicas: 1 + service: + externalTrafficPolicy: Local + type: LoadBalancer diff --git a/docs/proposals/gateway-inference-extension.md b/docs/proposals/gateway-inference-extension.md index fb6abe3ae2..0b549dc3f6 100644 --- a/docs/proposals/gateway-inference-extension.md +++ b/docs/proposals/gateway-inference-extension.md @@ -106,6 +106,14 @@ InferenceObjective represents the desired state of a specific model use case. As It is my impression that this API is purely for the EPP to handle, and does not need to be handled by NGINX Gateway Fabric. +### Inference Status + +Each InferencePool publishes two conditions that together describe its overall state. The first is the `Accepted` condition, which communicates whether the pool is referenced by an HTTPRoute that the Gateway has accepted. When the route is not accepted, this condition is explicitly set to `False` with the reason `InferencePoolReasonHTTPRouteNotAccepted`, making it clear that the Gateway rejected the route referencing the pool. + +The second is the `ResolvedRefs` condition, which reflects whether the `EndpointPickerRef` associated with the pool is valid. If it is misconfigured such as being an unsupported kind, left undefined, or pointing to a non-existent Service, this condition is set to `False` with the reason `InferencePoolReasonInvalidExtensionRef`. + +The status of an InferencePool records the Gateway as its parent reference and associates it with the relevant conditions; when all conditions are `True`, the pool is valid and traffic can be directed to it. + ### Personas and Processes Two new personas are introduced, the `Inference Platform Owner/Admin` and `Inference Workload Owner`. @@ -126,7 +134,11 @@ For development purposes, the [Getting started guide](https://gateway-api-infere ## Security Considerations -If the Endpoint Picker (EPP) supports it, we should use a secure TLS connection. This ensures an encrypted and authenticated communication channel between the NGINX data plane and the EPP. For production environments, an integration with `cert-manager` is likely the best solution, as we recommend this for various other secure channels within the NGF ecosystem. Otherwise, our control plane may have to provision certificates in the default case (similar to NGF's startup `cert-generator` Job). +Secure TLS gRPC connection between Endpoint Picker (EPP) and Go Shim Server is ideal. This would ensure an encrypted and authenticated communication channel between the NGINX data plane and the EPP. However, this is not possible with the current EPP implementation and is a [known issue](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582), with a separate issue currently open to [provide further support to tls](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1556). + +Since NGF does not provision the EPP, is not in charge of modifying it, and the current [EPP Helm template](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/config/charts/inferencepool/templates/epp-deployment.yaml) does not support providing extra volume/volumeMounts, there is no way to mount a cert on the EPP. Even if specifying extra volume/volumeMounts are allowed through a feature request, the implementation on our side would be hacky and unconventional. + +Given that the gateway inference project remains in `Alpha` and explicitly warns against production use, we will follow existing implementations and use an insecure gRPC connection to the EPP. For our goal of meeting the API’s core specifications with a basic solution, secure gRPC is not strictly required at this stage. At some point, there may be opportunities for attaching Policies (like a BackendTLSPolicy) to an InferenceModel to secure the NGINX -> AI workload connection, however that is not in scope for now. diff --git a/examples/cafe-example/cafe-routes.yaml b/examples/cafe-example/cafe-routes.yaml deleted file mode 100644 index 67927335cb..0000000000 --- a/examples/cafe-example/cafe-routes.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: coffee -spec: - parentRefs: - - name: gateway - sectionName: http - hostnames: - - "cafe.example.com" - rules: - - matches: - - path: - type: PathPrefix - value: /coffee - backendRefs: - - name: coffee - port: 80 ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: tea -spec: - parentRefs: - - name: gateway - sectionName: http - hostnames: - - "cafe.example.com" - rules: - - matches: - - path: - type: Exact - value: /tea - backendRefs: - - name: tea - port: 80 diff --git a/examples/cafe-example/cafe.yaml b/examples/cafe-example/cafe.yaml deleted file mode 100644 index 2d03ae59ff..0000000000 --- a/examples/cafe-example/cafe.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: coffee -spec: - replicas: 1 - selector: - matchLabels: - app: coffee - template: - metadata: - labels: - app: coffee - spec: - containers: - - name: coffee - image: nginxdemos/nginx-hello:plain-text - ports: - - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: coffee -spec: - ports: - - port: 80 - targetPort: 8080 - protocol: TCP - name: http - selector: - app: coffee ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tea -spec: - replicas: 1 - selector: - matchLabels: - app: tea - template: - metadata: - labels: - app: tea - spec: - containers: - - name: tea - image: nginxdemos/nginx-hello:plain-text - ports: - - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: tea -spec: - ports: - - port: 80 - targetPort: 8080 - protocol: TCP - name: http - selector: - app: tea diff --git a/examples/cafe-example/eppinvalidservice.yaml b/examples/cafe-example/eppinvalidservice.yaml new file mode 100644 index 0000000000..f0a0e62ea9 --- /dev/null +++ b/examples/cafe-example/eppinvalidservice.yaml @@ -0,0 +1,35 @@ +apiVersion: inference.networking.k8s.io/v1 +kind: InferencePool +metadata: + name: pool-with-invalid-epp + namespace: gateway-conformance-app-backend +spec: + selector: + matchLabels: + app: primary-inference-model-server + targetPorts: + - number: 3000 + endpointPickerRef: + name: non-existent-epp-svc + kind: Service + port: + number: 9002 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-for-invalid-epp-pool + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - name: conformance-primary + namespace: gateway-conformance-infra + rules: + - backendRefs: + - name: pool-with-invalid-epp + kind: InferencePool + group: inference.networking.k8s.io + matches: + - path: + type: PathPrefix + value: /invalid-epp-test \ No newline at end of file diff --git a/examples/cafe-example/epprouting.yaml b/examples/cafe-example/epprouting.yaml new file mode 100644 index 0000000000..dd199f1ce6 --- /dev/null +++ b/examples/cafe-example/epprouting.yaml @@ -0,0 +1,23 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-for-primary-gw + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "primary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + matches: + - path: + type: PathPrefix + value: /primary-gateway-test diff --git a/examples/cafe-example/eppunavailablefailopen.yaml b/examples/cafe-example/eppunavailablefailopen.yaml new file mode 100644 index 0000000000..6549cd9188 --- /dev/null +++ b/examples/cafe-example/eppunavailablefailopen.yaml @@ -0,0 +1,23 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-for-failopen-pool-gw + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-secondary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "secondary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: secondary-inference-pool # Use secondary-inferencePool because it has failureMode set to failOpen + matches: + - path: + type: PathPrefix + value: /failopen-pool-test \ No newline at end of file diff --git a/examples/cafe-example/gateway.yaml b/examples/cafe-example/gateway.yaml index e6507f613b..61acb91d17 100644 --- a/examples/cafe-example/gateway.yaml +++ b/examples/cafe-example/gateway.yaml @@ -1,11 +1,394 @@ +# Base Kubernetes resources for the Gateway API Inference Extension conformance tests. +# This includes namespaces and a minimal set of resources (Gateway, Backend) +# required by many tests. More specific resources should be defined within +# individual test files or other resource directories (e.g., sample_backends). + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: gateway-conformance-infra + labels: + gateway-conformance: infra +--- +apiVersion: v1 +kind: Namespace +metadata: + name: gateway-conformance-app-backend + labels: + gateway-conformance: backend +--- +# A basic Gateway resource that allows HTTPRoutes from the same namespace. +# Tests can use this as a parent reference for routes that target InferencePools. apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: - name: gateway + name: conformance-primary + namespace: gateway-conformance-infra spec: gatewayClassName: nginx listeners: - name: http port: 80 protocol: HTTP - hostname: "*.example.com" + allowedRoutes: + namespaces: + from: All + kinds: + - group: gateway.networking.k8s.io + kind: HTTPRoute +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: conformance-secondary + namespace: gateway-conformance-infra +spec: + gatewayClassName: nginx + listeners: + - name: http + port: 80 + protocol: HTTP + hostname: "secondary.example.com" + allowedRoutes: + namespaces: + from: All + +### The following defines the essential resources for the gateway conformance test. +### All resources are created in the 'gateway-conformance-app-backend' namespace. +--- +# Deploys a mock backend service to act as a model server. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: primary-inference-model-server-deployment + namespace: gateway-conformance-app-backend + labels: + app: primary-inference-model-server +spec: + replicas: 3 + selector: + matchLabels: + app: primary-inference-model-server + template: + metadata: + labels: + app: primary-inference-model-server + spec: + containers: + - name: echoserver + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd + ports: + - containerPort: 3000 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 3 + periodSeconds: 5 + failureThreshold: 2 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP +--- +# Deploys a secondary mock backend service to act as a model server. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: secondary-inference-model-server-deployment + namespace: gateway-conformance-app-backend + labels: + app: secondary-inference-model-server +spec: + replicas: 3 + selector: + matchLabels: + app: secondary-inference-model-server + template: + metadata: + labels: + app: secondary-inference-model-server + spec: + containers: + - name: echoserver + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd + ports: + - containerPort: 3000 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 3 + periodSeconds: 5 + failureThreshold: 2 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP +--- +# --- Primary InferencePool Definition --- +apiVersion: inference.networking.k8s.io/v1 +kind: InferencePool +metadata: + name: primary-inference-pool + namespace: gateway-conformance-app-backend +spec: + selector: + matchLabels: + app: primary-inference-model-server + targetPorts: + - number: 3000 + endpointPickerRef: + name: primary-endpoint-picker-svc + port: + number: 9002 +--- +# --- Primary Conformance EPP service Definition --- +apiVersion: v1 +kind: Service +metadata: + name: primary-endpoint-picker-svc + namespace: gateway-conformance-app-backend +spec: + selector: + app: primary-app-backend-epp + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + appProtocol: http2 + type: ClusterIP +--- +# --- Primary Conformance EPP Deployment --- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: primary-app-endpoint-picker + namespace: gateway-conformance-app-backend + labels: + app: primary-app-backend-epp +spec: + replicas: 1 + selector: + matchLabels: + app: primary-app-backend-epp + template: + metadata: + labels: + app: primary-app-backend-epp + spec: + # Conservatively, this timeout should mirror the longest grace period of the pods within the pool + terminationGracePeriodSeconds: 130 + containers: + - name: epp + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0 + imagePullPolicy: Always + args: + - --pool-name + - "primary-inference-pool" + - --pool-namespace + - "gateway-conformance-app-backend" + - --v + - "4" + - --zap-encoder + - "json" + - --grpc-port + - "9002" + - --grpc-health-port + - "9003" + - "--config-file" + - "/config/conformance-plugins.yaml" + env: + - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test. + value: "true" + ports: + - containerPort: 9002 + - containerPort: 9003 + - name: metrics + containerPort: 9090 + livenessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + volumeMounts: + - name: plugins-config-volume + mountPath: "/config" + volumes: + - name: plugins-config-volume + configMap: + name: plugins-config +--- +# --- Secondary InferencePool Definition --- +apiVersion: inference.networking.k8s.io/v1 +kind: InferencePool +metadata: + name: secondary-inference-pool + namespace: gateway-conformance-app-backend +spec: + selector: + matchLabels: + app: secondary-inference-model-server + targetPorts: + - number: 3000 + endpointPickerRef: + name: secondary-endpoint-picker-svc + failureMode: FailOpen + port: + number: 9002 +--- +# --- Secondary Conformance EPP service Definition --- +apiVersion: v1 +kind: Service +metadata: + name: secondary-endpoint-picker-svc + namespace: gateway-conformance-app-backend +spec: + selector: + app: secondary-app-backend-epp + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + appProtocol: http2 + type: ClusterIP +--- +# --- Secondary Conformance EPP Deployment --- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: secondary-app-endpoint-picker + namespace: gateway-conformance-app-backend + labels: + app: secondary-app-backend-epp +spec: + replicas: 1 + selector: + matchLabels: + app: secondary-app-backend-epp + template: + metadata: + labels: + app: secondary-app-backend-epp + spec: + # Conservatively, this timeout should mirror the longest grace period of the pods within the pool + terminationGracePeriodSeconds: 130 + containers: + - name: epp + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0 + imagePullPolicy: Always + args: + - --pool-name + - "secondary-inference-pool" + - --pool-namespace + - "gateway-conformance-app-backend" + - --v + - "4" + - --zap-encoder + - "json" + - --grpc-port + - "9002" + - --grpc-health-port + - "9003" + - "--config-file" + - "/config/conformance-plugins.yaml" + env: + - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test. + value: "true" + ports: + - containerPort: 9002 + - containerPort: 9003 + - name: metrics + containerPort: 9090 + livenessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + volumeMounts: + - name: plugins-config-volume + mountPath: "/config" + volumes: + - name: plugins-config-volume + configMap: + name: plugins-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: plugins-config + namespace: gateway-conformance-app-backend +data: + conformance-plugins.yaml: | + apiVersion: inference.networking.x-k8s.io/v1alpha1 + kind: EndpointPickerConfig + plugins: + - type: header-based-testing-filter + schedulingProfiles: + - name: conformance-profile + plugins: + - pluginRef: header-based-testing-filter +--- +# --- Required Role and RoleBinding for Conformance Test for EPP --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: inference-model-reader + namespace: gateway-conformance-app-backend +rules: +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferenceobjectives", "inferencepools"] + verbs: ["get", "list", "watch"] +- apiGroups: ["inference.networking.k8s.io"] + resources: ["inferencepools"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: epp-to-inference-model-reader + namespace: gateway-conformance-app-backend +subjects: +- kind: ServiceAccount + name: default + namespace: gateway-conformance-app-backend +roleRef: + kind: Role + name: inference-model-reader + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/examples/cafe-example/httproutemultiplerules.yaml b/examples/cafe-example/httproutemultiplerules.yaml new file mode 100644 index 0000000000..e506fec959 --- /dev/null +++ b/examples/cafe-example/httproutemultiplerules.yaml @@ -0,0 +1,26 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-multiple-rules-different-pools + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - name: conformance-primary + namespace: gateway-conformance-infra + rules: + - matches: + - path: + type: PathPrefix + value: /primary + backendRefs: + - name: primary-inference-pool + kind: InferencePool + group: inference.networking.k8s.io + - matches: + - path: + type: PathPrefix + value: /secondary + backendRefs: + - name: secondary-inference-pool + kind: InferencePool + group: inference.networking.k8s.io diff --git a/examples/cafe-example/httproutesmultiplegateways.yaml b/examples/cafe-example/httproutesmultiplegateways.yaml new file mode 100644 index 0000000000..caded16d89 --- /dev/null +++ b/examples/cafe-example/httproutesmultiplegateways.yaml @@ -0,0 +1,44 @@ +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: route-for-primary-gateway + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + hostnames: + - "primary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + matches: + - path: + type: PathPrefix + value: /test-primary-gateway +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: route-for-secondary-gateway + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - kind: Gateway + name: conformance-secondary + namespace: gateway-conformance-infra + hostnames: + - "secondary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: secondary-inference-pool + matches: + - path: + type: PathPrefix + value: /test-secondary-gateway diff --git a/examples/cafe-example/inference-pool-port-validation.yaml b/examples/cafe-example/inference-pool-port-validation.yaml new file mode 100644 index 0000000000..9c78117d97 --- /dev/null +++ b/examples/cafe-example/inference-pool-port-validation.yaml @@ -0,0 +1,79 @@ +# --- HTTPRoute Scenario 1: Port Unspecified --- +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-pool-port-unspecified + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "port-unspecified.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + # Port is intentionally unspecified here + matches: + - path: + type: PathPrefix + value: /test-port-unspecified +--- +# --- HTTPRoute Scenario 2: Port Matching --- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-pool-port-matching + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "port-matching.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + port: 3000 # Port matches InferencePool's targetPortNumber + matches: + - path: + type: PathPrefix + value: /test-port-matching +--- +# --- HTTPRoute Scenario 3: Port Non-Matching --- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-pool-port-non-matching + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "port-non-matching.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + port: 8888 # Port does NOT match InferencePool's targetPortNumber + matches: + - path: + type: PathPrefix + value: /test-port-non-matching +--- diff --git a/examples/cafe-example/inferencepool-resolvedrefs.yaml b/examples/cafe-example/inferencepool-resolvedrefs.yaml new file mode 100644 index 0000000000..81a7091963 --- /dev/null +++ b/examples/cafe-example/inferencepool-resolvedrefs.yaml @@ -0,0 +1,54 @@ +# conformance/tests/basic/inferencepool_resolvedrefs_condition.yaml + +# This manifest defines the initial resources for the +# inferencepool_resolvedrefs_condition.go conformance test. + +# --- HTTPRoute for Primary Gateway (conformance-primary) --- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-for-primary-gw + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "primary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + matches: + - path: + type: PathPrefix + value: /primary-gateway-test +--- +# --- HTTPRoute for Secondary Gateway (conformance-secondary) --- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-for-secondary-gw + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-secondary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "secondary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + matches: + - path: + type: PathPrefix + value: /secondary-gateway-test \ No newline at end of file diff --git a/examples/helm/inference-nginx-plus/values.yaml b/examples/helm/inference-nginx-plus/values.yaml new file mode 100644 index 0000000000..1d89293db2 --- /dev/null +++ b/examples/helm/inference-nginx-plus/values.yaml @@ -0,0 +1,10 @@ +nginxGateway: + name: nginx-gateway + gwAPIInferenceExtension: + enable: true + +nginx: + plus: true + image: + repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus + imagePullSecret: nginx-plus-registry-secret diff --git a/examples/helm/inference/values.yaml b/examples/helm/inference/values.yaml new file mode 100644 index 0000000000..0bb54b57e9 --- /dev/null +++ b/examples/helm/inference/values.yaml @@ -0,0 +1,4 @@ +nginxGateway: + name: nginx-gateway + gwAPIInferenceExtension: + enable: true diff --git a/go.mod b/go.mod index 0b35914cbb..b568b83870 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/nginx/nginx-gateway-fabric/v2 go 1.24.2 require ( + github.com/envoyproxy/go-control-plane/envoy v1.32.4 github.com/fsnotify/fsnotify v1.9.0 github.com/go-logr/logr v1.4.3 github.com/google/go-cmp v0.7.0 @@ -28,6 +29,7 @@ require ( k8s.io/klog/v2 v2.130.1 sigs.k8s.io/controller-runtime v0.22.1 sigs.k8s.io/gateway-api v1.3.0 + sigs.k8s.io/gateway-api-inference-extension v1.0.0 ) require ( @@ -36,8 +38,10 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -59,6 +63,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect @@ -80,7 +85,7 @@ require ( golang.org/x/sync v0.17.0 // indirect golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.34.0 // indirect - golang.org/x/time v0.9.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect diff --git a/go.sum b/go.sum index b8716c1de8..2779075b54 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1x github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls= +github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= @@ -39,6 +41,10 @@ github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0o github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= +github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= +github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= +github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= @@ -66,8 +72,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/goccy/go-yaml v1.17.1 h1:LI34wktB2xEE3ONG/2Ar54+/HJVBriAGJ55PHls4YuY= -github.com/goccy/go-yaml v1.17.1/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= @@ -155,6 +161,8 @@ github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNH github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -218,8 +226,8 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= @@ -283,8 +291,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -336,6 +344,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M= sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk= +sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8= +sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/internal/controller/config/config.go b/internal/controller/config/config.go index e23f73ca59..d1e77df07b 100644 --- a/internal/controller/config/config.go +++ b/internal/controller/config/config.go @@ -48,6 +48,8 @@ type Config struct { Plus bool // ExperimentalFeatures indicates if experimental features are enabled. ExperimentalFeatures bool + // InferenceExtension indicates if Gateway API Inference Extension support is enabled. + InferenceExtension bool // SnippetsFilters indicates if SnippetsFilters are enabled. SnippetsFilters bool } diff --git a/internal/controller/handler.go b/internal/controller/handler.go index f79e9dc268..5e3b91a8ee 100644 --- a/internal/controller/handler.go +++ b/internal/controller/handler.go @@ -13,9 +13,11 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1" @@ -79,6 +81,8 @@ type eventHandlerConfig struct { controlConfigNSName types.NamespacedName // gatewayCtlrName is the name of the NGF controller. gatewayCtlrName string + // gatewayInstanceName is the name of the NGINX Gateway instance. + gatewayInstanceName string // gatewayClassName is the name of the GatewayClass. gatewayClassName string // plus is whether or not we are running NGINX Plus. @@ -116,8 +120,10 @@ type eventHandlerImpl struct { // objectFilters contains all created objectFilters, with the key being a filterKey objectFilters map[filterKey]objectFilter - cfg eventHandlerConfig - lock sync.Mutex + cfg eventHandlerConfig + lock sync.RWMutex + leaderLock sync.RWMutex + leader bool } // newEventHandlerImpl creates a new eventHandlerImpl. @@ -170,6 +176,10 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log // enable is called when the pod becomes leader to ensure the provisioner has // the latest configuration. func (h *eventHandlerImpl) enable(ctx context.Context) { + h.leaderLock.Lock() + h.leader = true + h.leaderLock.Unlock() + h.sendNginxConfig(ctx, h.cfg.logger, h.cfg.processor.GetLatestGraph()) } @@ -187,6 +197,9 @@ func (h *eventHandlerImpl) sendNginxConfig(ctx context.Context, logger logr.Logg return } + // ensure headless "shadow" Services are created for any referenced InferencePools + h.ensureInferencePoolServices(ctx, gr.ReferencedInferencePools) + for _, gw := range gr.Gateways { go func() { if err := h.cfg.nginxProvisioner.RegisterGateway(ctx, gw, gw.DeploymentName.Name); err != nil { @@ -348,17 +361,19 @@ func (h *eventHandlerImpl) updateStatuses(ctx context.Context, gr *graph.Graph, transitionTime, h.cfg.gatewayCtlrName, ) + inferencePoolReqs := status.PrepareInferencePoolRequests(gr.ReferencedInferencePools, transitionTime) reqs := make( []status.UpdateRequest, 0, - len(gcReqs)+len(routeReqs)+len(polReqs)+len(ngfPolReqs)+len(snippetsFilterReqs), + len(gcReqs)+len(routeReqs)+len(polReqs)+len(ngfPolReqs)+len(snippetsFilterReqs)+len(inferencePoolReqs), ) reqs = append(reqs, gcReqs...) reqs = append(reqs, routeReqs...) reqs = append(reqs, polReqs...) reqs = append(reqs, ngfPolReqs...) reqs = append(reqs, snippetsFilterReqs...) + reqs = append(reqs, inferencePoolReqs...) h.cfg.statusUpdater.UpdateGroup(ctx, groupAllExceptGateways, reqs...) @@ -547,8 +562,8 @@ func (h *eventHandlerImpl) getDeploymentContext(ctx context.Context) (dataplane. // GetLatestConfiguration gets the latest configuration. func (h *eventHandlerImpl) GetLatestConfiguration() []*dataplane.Configuration { - h.lock.Lock() - defer h.lock.Unlock() + h.lock.RLock() + defer h.lock.RUnlock() configs := make([]*dataplane.Configuration, 0, len(h.latestConfigurations)) for _, cfg := range h.latestConfigurations { @@ -574,6 +589,111 @@ func objectFilterKey(obj client.Object, nsName types.NamespacedName) filterKey { return filterKey(fmt.Sprintf("%T_%s_%s", obj, nsName.Namespace, nsName.Name)) } +// ensureInferencePoolServices ensures a headless Service exists and is up to date for each InferencePool. +func (h *eventHandlerImpl) ensureInferencePoolServices( + ctx context.Context, + pools map[types.NamespacedName]*graph.ReferencedInferencePool, +) { + if !h.isLeader() { + return + } + + for _, pool := range pools { + if pool.Source == nil { + continue + } + + selectors := make(map[string]string) + for k, v := range pool.Source.Spec.Selector.MatchLabels { + selectors[string(k)] = string(v) + } + + // v1 of InferencePool only supports a single port right now + ports := []v1.ServicePort{ + { + Port: int32(pool.Source.Spec.TargetPorts[0].Number), + TargetPort: intstr.FromInt32(int32(pool.Source.Spec.TargetPorts[0].Number)), + }, + } + + labels := map[string]string{ + controller.AppManagedByLabel: controller.CreateNginxResourceName( + h.cfg.gatewayInstanceName, + h.cfg.gatewayClassName, + ), + } + + svc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: controller.CreateInferencePoolServiceName(pool.Source.Name), + Namespace: pool.Source.Namespace, + Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: pool.Source.APIVersion, + Kind: pool.Source.Kind, + Name: pool.Source.Name, + UID: pool.Source.UID, + }, + }, + }, + Spec: v1.ServiceSpec{ + ClusterIP: v1.ClusterIPNone, // headless + Selector: selectors, + Ports: ports, + }, + } + + svcCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + res, err := controllerutil.CreateOrUpdate( + svcCtx, + h.cfg.k8sClient, + svc, + serviceSpecSetter(svc, svc.Spec, svc.ObjectMeta), + ) + if err != nil { + cancel() + msg := "Failed to upsert headless Service for InferencePool" + h.cfg.logger.Error(err, msg, "Service", svc.Name, "InferencePool", pool.Source.Name) + h.cfg.eventRecorder.Eventf( + svc, + v1.EventTypeWarning, + "ServiceCreateOrUpdateFailed", + "%s %q: %v", msg, pool.Source.Name, err, + ) + continue + } + cancel() + + if res == controllerutil.OperationResultCreated || res == controllerutil.OperationResultUpdated { + h.cfg.logger.Info( + fmt.Sprintf("Successfully %s headless Service for InferencePool", res), + "Service", svc.Name, "InferencePool", pool.Source.Name, + ) + } + } +} + +func serviceSpecSetter( + service *v1.Service, + spec v1.ServiceSpec, + objectMeta metav1.ObjectMeta, +) controllerutil.MutateFn { + return func() error { + service.Labels = objectMeta.Labels + service.Spec = spec + return nil + } +} + +// isLeader returns whether or not this handler is the leader. +func (h *eventHandlerImpl) isLeader() bool { + h.leaderLock.RLock() + defer h.leaderLock.RUnlock() + + return h.leader +} + /* Handler Callback functions diff --git a/internal/controller/handler_test.go b/internal/controller/handler_test.go index ec9fe05848..4c23a12e55 100644 --- a/internal/controller/handler_test.go +++ b/internal/controller/handler_test.go @@ -12,11 +12,13 @@ import ( "go.uber.org/zap" v1 "k8s.io/api/core/v1" discoveryV1 "k8s.io/api/discovery/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1" @@ -149,6 +151,7 @@ var _ = Describe("eventHandler", func() { metricsCollector: collectors.NewControllerNoopCollector(), }) Expect(handler.cfg.graphBuiltHealthChecker.ready).To(BeFalse()) + handler.leader = true }) AfterEach(func() { @@ -518,6 +521,115 @@ var _ = Describe("eventHandler", func() { Expect(handler.cfg.graphBuiltHealthChecker.readyCheck(nil)).To(Succeed()) }) + It("should create a headless Service for each referenced InferencePool", func() { + namespace := "test-ns" + poolName1 := "pool1" + poolName2 := "pool2" + poolUID1 := types.UID("uid1") + poolUID2 := types.UID("uid2") + + pool1 := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName1, + Namespace: namespace, + UID: poolUID1, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8081}, + }, + }, + } + + g := &graph.Graph{ + Gateways: map[types.NamespacedName]*graph.Gateway{ + {}: { + Source: &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "gateway", + }, + }, + Valid: true, + }, + }, + ReferencedInferencePools: map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName1}: {Source: pool1}, + {Namespace: namespace, Name: poolName2}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName2, + Namespace: namespace, + UID: poolUID2, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "bar"}, + }, + TargetPorts: []inference.Port{ + {Number: 9090}, + }, + }, + }, + }, + }, + } + + fakeProcessor.ProcessReturns(g) + + e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} + batch := []any{e} + + handler.HandleEventBatch(context.Background(), logr.Discard(), batch) + + // Check Service for pool1 + svc1 := &v1.Service{} + svcName1 := controller.CreateInferencePoolServiceName(poolName1) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1) + Expect(err).ToNot(HaveOccurred()) + Expect(svc1.Spec.ClusterIP).To(Equal(v1.ClusterIPNone)) + Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "foo")) + Expect(svc1.Spec.Ports).To(HaveLen(1)) + Expect(svc1.Spec.Ports[0].Port).To(Equal(int32(8081))) + Expect(svc1.OwnerReferences).To(HaveLen(1)) + Expect(svc1.OwnerReferences[0].Name).To(Equal(poolName1)) + Expect(svc1.OwnerReferences[0].UID).To(Equal(poolUID1)) + + // Check Service for pool2 + svc2 := &v1.Service{} + svcName2 := controller.CreateInferencePoolServiceName(poolName2) + err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName2, Namespace: namespace}, svc2) + Expect(err).ToNot(HaveOccurred()) + Expect(svc2.Spec.ClusterIP).To(Equal(v1.ClusterIPNone)) + Expect(svc2.Spec.Selector).To(HaveKeyWithValue("app", "bar")) + Expect(svc2.Spec.Ports).To(HaveLen(1)) + Expect(svc2.Spec.Ports[0].Port).To(Equal(int32(9090))) + Expect(svc2.OwnerReferences).To(HaveLen(1)) + Expect(svc2.OwnerReferences[0].Name).To(Equal(poolName2)) + Expect(svc2.OwnerReferences[0].UID).To(Equal(poolUID2)) + + // Now update pool1's selector and ensure the Service selector is updated + updatedSelector := map[inference.LabelKey]inference.LabelValue{"app": "baz"} + pool1.Spec.Selector.MatchLabels = updatedSelector + + // Simulate the updated pool in the graph + g.ReferencedInferencePools[types.NamespacedName{Namespace: namespace, Name: poolName1}].Source = pool1 + fakeProcessor.ProcessReturns(g) + + e = &events.UpsertEvent{Resource: &inference.InferencePool{}} + batch = []any{e} + handler.HandleEventBatch(context.Background(), logr.Discard(), batch) + + // Check that the Service selector was updated + svc1 = &v1.Service{} + err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1) + Expect(err).ToNot(HaveOccurred()) + Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "baz")) + }) + It("should panic for an unknown event type", func() { e := &struct{}{} @@ -688,3 +800,156 @@ var _ = Describe("getDeploymentContext", func() { }) }) }) + +var _ = Describe("ensureInferencePoolServices", func() { + var ( + handler *eventHandlerImpl + fakeK8sClient client.Client + fakeEventRecorder *record.FakeRecorder + namespace = "test-ns" + poolName = "my-inference-pool" + poolUID = types.UID("pool-uid") + ) + + BeforeEach(func() { + fakeK8sClient = fake.NewFakeClient() + fakeEventRecorder = record.NewFakeRecorder(1) + handler = newEventHandlerImpl(eventHandlerConfig{ + ctx: context.Background(), + k8sClient: fakeK8sClient, + statusQueue: status.NewQueue(), + eventRecorder: fakeEventRecorder, + logger: logr.Discard(), + }) + // Set as leader so ensureInferencePoolServices will run + handler.leader = true + }) + + It("creates a headless Service for a referenced InferencePool", func() { + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: namespace, + UID: poolUID, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8080}, + }, + }, + }, + }, + } + + handler.ensureInferencePoolServices(context.Background(), pools) + + // The Service should have been created + svc := &v1.Service{} + svcName := controller.CreateInferencePoolServiceName(poolName) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc) + Expect(err).ToNot(HaveOccurred()) + Expect(svc.Spec.ClusterIP).To(Equal(v1.ClusterIPNone)) + Expect(svc.Spec.Selector).To(HaveKeyWithValue("app", "foo")) + Expect(svc.Spec.Ports).To(HaveLen(1)) + Expect(svc.Spec.Ports[0].Port).To(Equal(int32(8080))) + Expect(svc.OwnerReferences).To(HaveLen(1)) + Expect(svc.OwnerReferences[0].Name).To(Equal(poolName)) + Expect(svc.OwnerReferences[0].UID).To(Equal(poolUID)) + }) + + It("does nothing if not leader", func() { + handler.leader = false + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: namespace, + UID: poolUID, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8080}, + }, + }, + }, + }, + } + + handler.ensureInferencePoolServices(context.Background(), pools) + svc := &v1.Service{} + svcName := controller.CreateInferencePoolServiceName(poolName) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc) + Expect(err).To(HaveOccurred()) + }) + + It("skips pools with nil Source", func() { + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: nil, + }, + } + handler.ensureInferencePoolServices(context.Background(), pools) + // Should not panic or create anything + svc := &v1.Service{} + svcName := controller.CreateInferencePoolServiceName(poolName) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc) + Expect(err).To(HaveOccurred()) + }) + + It("emits an event if Service creation fails", func() { + // Use a client that will fail on CreateOrUpdate + handler.cfg.k8sClient = &badFakeClient{} + handler.leader = true + + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: namespace, + UID: poolUID, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8080}, + }, + }, + }, + }, + } + + handler.ensureInferencePoolServices(context.Background(), pools) + Eventually(func() int { return len(fakeEventRecorder.Events) }).Should(BeNumerically(">=", 1)) + event := <-fakeEventRecorder.Events + Expect(event).To(ContainSubstring("ServiceCreateOrUpdateFailed")) + }) +}) + +// badFakeClient always returns an error on Create or Update. +type badFakeClient struct { + client.Client +} + +func (*badFakeClient) Get(context.Context, client.ObjectKey, client.Object, ...client.GetOption) error { + return apiErrors.NewNotFound(v1.Resource("service"), "not-found") +} + +func (*badFakeClient) Create(context.Context, client.Object, ...client.CreateOption) error { + return errors.New("create error") +} + +func (*badFakeClient) Update(context.Context, client.Object, ...client.UpdateOption) error { + return errors.New("update error") +} diff --git a/internal/controller/manager.go b/internal/controller/manager.go index a4e9fd9cf0..dc9c4835bd 100644 --- a/internal/controller/manager.go +++ b/internal/controller/manager.go @@ -32,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/metrics" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" k8spredicate "sigs.k8s.io/controller-runtime/pkg/predicate" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2" gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -95,6 +96,7 @@ func init() { utilruntime.Must(autoscalingv2.AddToScheme(scheme)) utilruntime.Must(authv1.AddToScheme(scheme)) utilruntime.Must(rbacv1.AddToScheme(scheme)) + utilruntime.Must(inference.Install(scheme)) } func StartManager(cfg config.Config) error { @@ -218,6 +220,7 @@ func StartManager(cfg config.Config) error { NginxDockerSecretNames: cfg.NginxDockerSecretNames, PlusUsageConfig: &cfg.UsageReportConfig, NginxOneConsoleTelemetryConfig: cfg.NginxOneConsoleTelemetryConfig, + InferenceExtension: cfg.InferenceExtension, }, ) if err != nil { @@ -251,6 +254,7 @@ func StartManager(cfg config.Config) error { gatewayPodConfig: cfg.GatewayPodConfig, controlConfigNSName: controlConfigNSName, gatewayCtlrName: cfg.GatewayCtlrName, + gatewayInstanceName: cfg.GatewayPodConfig.InstanceName, gatewayClassName: cfg.GatewayClassName, plus: cfg.Plus, statusQueue: statusQueue, @@ -536,6 +540,18 @@ func registerControllers( controllerRegCfgs = append(controllerRegCfgs, gwExpFeatures...) } + if cfg.InferenceExtension { + inferenceExt := []ctlrCfg{ + { + objectType: &inference.InferencePool{}, + options: []controller.Option{ + controller.WithK8sPredicate(k8spredicate.GenerationChangedPredicate{}), + }, + }, + } + controllerRegCfgs = append(controllerRegCfgs, inferenceExt...) + } + if cfg.ConfigName != "" { controllerRegCfgs = append(controllerRegCfgs, ctlrCfg{ @@ -761,6 +777,10 @@ func prepareFirstEventBatchPreparerArgs(cfg config.Config) ([]client.Object, []c ) } + if cfg.InferenceExtension { + objectLists = append(objectLists, &inference.InferencePoolList{}) + } + if cfg.SnippetsFilters { objectLists = append( objectLists, diff --git a/internal/controller/manager_test.go b/internal/controller/manager_test.go index 60d7b0e5d5..76e613a1f6 100644 --- a/internal/controller/manager_test.go +++ b/internal/controller/manager_test.go @@ -14,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2" gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -47,9 +48,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { { name: "base case", cfg: config.Config{ - GatewayClassName: gcName, - ExperimentalFeatures: false, - SnippetsFilters: false, + GatewayClassName: gcName, }, expectedObjects: []client.Object{ &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, @@ -75,7 +74,6 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { cfg: config.Config{ GatewayClassName: gcName, ExperimentalFeatures: true, - SnippetsFilters: false, }, expectedObjects: []client.Object{ &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, @@ -99,12 +97,37 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { &ngfAPIv1alpha1.UpstreamSettingsPolicyList{}, }, }, + { + name: "inference extension enabled", + cfg: config.Config{ + GatewayClassName: gcName, + InferenceExtension: true, + }, + expectedObjects: []client.Object{ + &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, + }, + expectedObjectLists: []client.ObjectList{ + &apiv1.ServiceList{}, + &apiv1.SecretList{}, + &apiv1.NamespaceList{}, + &discoveryV1.EndpointSliceList{}, + &gatewayv1.HTTPRouteList{}, + &gatewayv1.GatewayList{}, + &gatewayv1beta1.ReferenceGrantList{}, + &ngfAPIv1alpha2.NginxProxyList{}, + &gatewayv1.GRPCRouteList{}, + partialObjectMetadataList, + &inference.InferencePoolList{}, + &ngfAPIv1alpha1.ClientSettingsPolicyList{}, + &ngfAPIv1alpha2.ObservabilityPolicyList{}, + &ngfAPIv1alpha1.UpstreamSettingsPolicyList{}, + }, + }, { name: "snippets filters enabled", cfg: config.Config{ - GatewayClassName: gcName, - ExperimentalFeatures: false, - SnippetsFilters: true, + GatewayClassName: gcName, + SnippetsFilters: true, }, expectedObjects: []client.Object{ &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, @@ -127,10 +150,11 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { }, }, { - name: "experimental and snippets filters enabled", + name: "experimental, inference, and snippets filters enabled", cfg: config.Config{ GatewayClassName: gcName, ExperimentalFeatures: true, + InferenceExtension: true, SnippetsFilters: true, }, expectedObjects: []client.Object{ @@ -147,6 +171,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { &gatewayv1beta1.ReferenceGrantList{}, &ngfAPIv1alpha2.NginxProxyList{}, partialObjectMetadataList, + &inference.InferencePoolList{}, &gatewayv1alpha3.BackendTLSPolicyList{}, &gatewayv1alpha2.TLSRouteList{}, &gatewayv1.GRPCRouteList{}, diff --git a/internal/controller/nginx/conf/nginx-plus.conf b/internal/controller/nginx/conf/nginx-plus.conf index f2b0ec0dc8..50ba9f970c 100644 --- a/internal/controller/nginx/conf/nginx-plus.conf +++ b/internal/controller/nginx/conf/nginx-plus.conf @@ -13,6 +13,7 @@ http { include /etc/nginx/conf.d/*.conf; include /etc/nginx/mime.types; js_import /usr/lib/nginx/modules/njs/httpmatches.js; + js_import /usr/lib/nginx/modules/njs/epp.js; default_type application/octet-stream; diff --git a/internal/controller/nginx/conf/nginx.conf b/internal/controller/nginx/conf/nginx.conf index 791994fdf8..6c4f6be8d9 100644 --- a/internal/controller/nginx/conf/nginx.conf +++ b/internal/controller/nginx/conf/nginx.conf @@ -13,6 +13,7 @@ http { include /etc/nginx/conf.d/*.conf; include /etc/nginx/mime.types; js_import /usr/lib/nginx/modules/njs/httpmatches.js; + js_import /usr/lib/nginx/modules/njs/epp.js; default_type application/octet-stream; diff --git a/internal/controller/nginx/config/http/config.go b/internal/controller/nginx/config/http/config.go index 3a76ab30b4..dedfd04349 100644 --- a/internal/controller/nginx/config/http/config.go +++ b/internal/controller/nginx/config/http/config.go @@ -26,26 +26,58 @@ type Server struct { type LocationType string const ( + // InternalLocationType defines an internal location that is only accessible within NGINX. InternalLocationType LocationType = "internal" + // ExternalLocationType defines a normal external location that is accessible by clients. ExternalLocationType LocationType = "external" + // RedirectLocationType defines an external location that redirects to an internal location + // based on HTTP matching conditions. RedirectLocationType LocationType = "redirect" + // InferenceExternalLocationType defines an external location that is used for calling NJS + // to get the inference workload endpoint and redirects to the internal location that will proxy_pass + // to that endpoint. + InferenceExternalLocationType LocationType = "inference-external" + // InferenceInternalLocationType defines an internal location that is used for calling NJS + // to get the inference workload endpoint and redirects to the internal location that will proxy_pass + // to that endpoint. This is used when an HTTP redirect location is also defined that redirects + // to this internal inference location. + InferenceInternalLocationType LocationType = "inference-internal" ) // Location holds all configuration for an HTTP location. type Location struct { - Path string - ProxyPass string - HTTPMatchKey string + // Return specifies a return directive (e.g., HTTP status or redirect) for this location block. + Return *Return + // ProxySSLVerify controls SSL verification for upstreams when proxying requests. + ProxySSLVerify *ProxySSLVerify + // ProxyPass is the upstream backend (URL or name) to which requests are proxied. + ProxyPass string + // HTTPMatchKey is the key for associating HTTP match rules, used for routing and NJS module logic. + HTTPMatchKey string + // MirrorSplitClientsVariableName is the variable name for split_clients, used in traffic mirroring scenarios. MirrorSplitClientsVariableName string - Type LocationType - ProxySetHeaders []Header - ProxySSLVerify *ProxySSLVerify - Return *Return - ResponseHeaders ResponseHeaders - Rewrites []string - MirrorPaths []string - Includes []shared.Include - GRPC bool + // EPPInternalPath is the internal path for the inference NJS module to redirect to. + EPPInternalPath string + // EPPHost is the host for the EndpointPicker, used for inference routing. + EPPHost string + // Type indicates the type of location (external, internal, redirect, etc). + Type LocationType + // Path is the NGINX location path. + Path string + // ResponseHeaders are custom response headers to be sent. + ResponseHeaders ResponseHeaders + // ProxySetHeaders are headers to set when proxying requests upstream. + ProxySetHeaders []Header + // Rewrites are rewrite rules for modifying request paths. + Rewrites []string + // MirrorPaths are paths to which requests are mirrored. + MirrorPaths []string + // Includes are additional NGINX config snippets or policies to include in this location. + Includes []shared.Include + // EPPPort is the port for the EndpointPicker, used for inference routing. + EPPPort int + // GRPC indicates if this location proxies gRPC traffic. + GRPC bool } // Header defines an HTTP header to be passed to the proxied server. diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go index 5a5e5ff189..5cf941807c 100644 --- a/internal/controller/nginx/config/maps.go +++ b/internal/controller/nginx/config/maps.go @@ -1,9 +1,12 @@ package config import ( + "fmt" "strings" gotemplate "text/template" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" + "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/shared" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/dataplane" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" @@ -26,6 +29,8 @@ const ( func executeMaps(conf dataplane.Configuration) []executeResult { maps := buildAddHeaderMaps(append(conf.HTTPServers, conf.SSLServers...)) + maps = append(maps, buildInferenceMaps(conf.BackendGroups)...) + result := executeResult{ dest: httpConfigFile, data: helpers.MustExecuteTemplate(mapsTemplate, maps), @@ -177,3 +182,57 @@ func createAddHeadersMap(name string) shared.Map { Parameters: params, } } + +// buildInferenceMaps creates maps for InferencePool back-ends. +func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map { + inferenceMaps := make([]shared.Map, 0, len(groups)) + + for _, group := range groups { + for _, backend := range group.Backends { + if backend.EndpointPickerConfig == nil { + continue + } + + // Decide what the map must return when the picker didn’t set a value. + var defaultResult string + switch backend.EndpointPickerConfig.FailureMode { + case inference.EndpointPickerFailClose: + defaultResult = invalidBackendRef + case inference.EndpointPickerFailOpen: + defaultResult = backend.UpstreamName + } + + // Build the ordered parameter list. + params := make([]shared.MapParameter, 0, 3) + + // no endpoint picked by EPP go to inference pool directly + params = append(params, shared.MapParameter{ + Value: `""`, + Result: backend.UpstreamName, + }) + + // endpoint picked by the EPP is stored in $inference_workload_endpoint. + params = append(params, shared.MapParameter{ + Value: `~.+`, + Result: `$inference_workload_endpoint`, + }) + + // this is set based on EPP failure mode, + // if EPP is failOpen, we set the default to the inference pool upstream, + // if EPP is failClose, we set the default to invalidBackendRef. + params = append(params, shared.MapParameter{ + Value: "default", + Result: defaultResult, + }) + + backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_") + + inferenceMaps = append(inferenceMaps, shared.Map{ + Source: `$inference_workload_endpoint`, + Variable: fmt.Sprintf("$inference_backend_%s", backendVarName), + Parameters: params, + }) + } + } + return inferenceMaps +} diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go index d133882d7b..736d7808ec 100644 --- a/internal/controller/nginx/config/maps_test.go +++ b/internal/controller/nginx/config/maps_test.go @@ -5,6 +5,7 @@ import ( "testing" . "github.com/onsi/gomega" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/shared" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/dataplane" @@ -59,22 +60,24 @@ func TestExecuteMaps(t *testing.T) { conf := dataplane.Configuration{ HTTPServers: []dataplane.VirtualServer{ - { - PathRules: pathRules, - }, - { - PathRules: pathRules, - }, - { - IsDefault: true, - }, + {PathRules: pathRules}, + {PathRules: pathRules}, + {IsDefault: true}, }, SSLServers: []dataplane.VirtualServer{ + {PathRules: pathRules}, + {IsDefault: true}, + }, + BackendGroups: []dataplane.BackendGroup{ { - PathRules: pathRules, - }, - { - IsDefault: true, + Backends: []dataplane.Backend{ + { + UpstreamName: "upstream1", + EndpointPickerConfig: &inference.EndpointPickerRef{ + FailureMode: inference.EndpointPickerFailClose, + }, + }, + }, }, }, } @@ -86,6 +89,9 @@ func TestExecuteMaps(t *testing.T) { "map ${http_my_second_add_header} $my_second_add_header_header_var {": 1, "~.* ${http_my_second_add_header},;": 1, "map ${http_my_set_header} $my_set_header_header_var {": 0, + "$inference_workload_endpoint": 2, + "$inference_backend": 1, + "invalid-backend-ref": 1, } mapResult := executeMaps(conf) @@ -385,3 +391,36 @@ func TestCreateStreamMapsWithEmpty(t *testing.T) { g.Expect(maps).To(BeNil()) } + +func TestBuildInferenceMaps(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + group := dataplane.BackendGroup{ + Backends: []dataplane.Backend{ + { + UpstreamName: "upstream1", + EndpointPickerConfig: &inference.EndpointPickerRef{ + FailureMode: inference.EndpointPickerFailClose, + }, + }, + { + UpstreamName: "upstream2", + EndpointPickerConfig: &inference.EndpointPickerRef{ + FailureMode: inference.EndpointPickerFailOpen, + }, + }, + { + UpstreamName: "upstream3", + EndpointPickerConfig: nil, + }, + }, + } + + maps := buildInferenceMaps([]dataplane.BackendGroup{group}) + g.Expect(maps).To(HaveLen(2)) + g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint")) + g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1")) + g.Expect(maps[0].Parameters[1].Result).To(Equal("invalid-backend-ref")) + g.Expect(maps[1].Parameters[1].Result).To(Equal("upstream2")) +} diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go index 88ba4fa8ea..4e9259ba8a 100644 --- a/internal/controller/nginx/config/servers.go +++ b/internal/controller/nginx/config/servers.go @@ -16,7 +16,13 @@ import ( "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" ) -var serversTemplate = gotemplate.Must(gotemplate.New("servers").Parse(serversTemplateText)) +var serversTemplate = gotemplate.Must( + gotemplate.New("servers").Funcs(gotemplate.FuncMap{ + "contains": func(str http.LocationType, substr string) bool { + return strings.Contains(string(str), substr) + }, + }).Parse(serversTemplateText), +) const ( // HeaderMatchSeparator is the separator for constructing header-based match for NJS. @@ -252,6 +258,78 @@ func extractMirrorTargetsWithPercentages(pathRules []dataplane.PathRule) map[str return mirrorTargets } +/* +There are several different flows of location blocks, depending on the user configuration. +The following describes them, with basic location examples. + +--------------- +Base case, no HTTP matching conditions or inference extension. + +External location proxies straight to backend. + +location /coffee { + proxy_pass http://backend; +} +--------------- +HTTP matching conditions. + +External location calls httpmatch NJS module. The module determines the HTTP request conditions that exist +and which backend to use, then redirects to the appropriate internal location. +The internal location proxies to the backend. + +location /coffee { + js_content httpmatches.match; // chooses backend1 or backend2, and redirects to appropriate internal location +} +location /_ngf-internal-rule0-route0 { + internal; + proxy_pass http://backend1; +} +location /_ngf-internal-rule1-route0 { + internal; + proxy_pass http://backend2; +} +--------------- +Inference extension, no HTTP matching conditions. + +External location calls inference NJS module. The module gets the AI endpoint to proxy to, +then redirects to the internal inference location that proxies to the backend. + +location /coffee { + set $epp_internal_path /_ngf-internal-rule0-route0-inference; + js_content epp.getEndpoint; // gets endpoint and redirects to /_ngf-internal-rule0-route0-inference +} +location /_ngf-internal-rule0-route0-inference { + internal; + proxy_pass http://$inference-backend; +} +--------------- +Inference extension with HTTP matching conditions. + +External location calls httpmatch NJS module. The module determines the HTTP request conditions that exist +and which backend to use, then redirects to the internal inference location. The internal inference +location calls the inference NJS module to get the AI endpoint to proxy to, then redirects to the +internal location that proxies to the backend. + +Note that the location path naming here is a little different than the previous example. +The final location that proxy_passes has the non-inference name to avoid too much refactoring +in the code, and the intermediate location has -inference in the name, whereas in the previous example +it was the final location that had -inference in the name. + +location /coffee { + js_content httpmatches.match; // chooses backend and redirects to appropriate internal inference location +} +location /_ngf-internal-rule0-route0-inference { + internal; + + set $epp_internal_path /_ngf-internal-rule0-route0; + js_content epp.getEndpoint; // redirects to /_ngf-internal-rule0-route0 +} +location /_ngf-internal-rule0-route0 { + internal; + proxy_pass http://$inference-backend; +} +*/ + type httpMatchPairs map[string][]routeMatch func createLocations( @@ -270,8 +348,6 @@ func createLocations( mirrorPathToPercentage := extractMirrorTargetsWithPercentages(server.PathRules) for pathRuleIdx, rule := range server.PathRules { - matches := make([]routeMatch, 0, len(rule.MatchRules)) - if rule.Path == rootPath { rootPathExists = true } @@ -281,7 +357,6 @@ func createLocations( } mirrorPercentage := mirrorPathToPercentage[rule.Path] - extLocations := initializeExternalLocations(rule, pathsAndTypes) for i := range extLocations { extLocations[i].Includes = createIncludesFromPolicyGenerateResult( @@ -289,54 +364,45 @@ func createLocations( ) } - if !needsInternalLocations(rule) { - for _, r := range rule.MatchRules { - extLocations = updateLocations( - r, - rule, - extLocations, - server.Port, - keepAliveCheck, - mirrorPercentage, - ) - } - - locs = append(locs, extLocations...) - continue - } - - internalLocations := make([]http.Location, 0, len(rule.MatchRules)) - - for matchRuleIdx, r := range rule.MatchRules { - intLocation, match := initializeInternalLocation(pathRuleIdx, matchRuleIdx, r.Match, rule.GRPC) - intLocation.Includes = createIncludesFromPolicyGenerateResult( - generator.GenerateForInternalLocation(rule.Policies), + switch { + case !needsInternalLocationsForMatches(rule) && !rule.HasInferenceBackends: + locs = append(locs, updateExternalLocationsForRule( + rule, + extLocations, + server.Port, + keepAliveCheck, + mirrorPercentage)..., ) - - intLocation = updateLocation( - r, + case needsInternalLocationsForMatches(rule): + internalLocations, matches := createInternalLocationsForRule( + pathRuleIdx, rule, - intLocation, + generator, server.Port, keepAliveCheck, mirrorPercentage, ) - - internalLocations = append(internalLocations, intLocation) - matches = append(matches, match) - } - - httpMatchKey := serverID + "_" + strconv.Itoa(pathRuleIdx) - for i := range extLocations { - // FIXME(sberman): De-dupe matches and associated locations - // so we don't need nginx/njs to perform unnecessary matching. - // https://github.com/nginx/nginx-gateway-fabric/issues/662 - extLocations[i].HTTPMatchKey = httpMatchKey - matchPairs[extLocations[i].HTTPMatchKey] = matches + httpMatchKey := serverID + "_" + strconv.Itoa(pathRuleIdx) + for i := range extLocations { + // FIXME(sberman): De-dupe matches and associated locations + // so we don't need nginx/njs to perform unnecessary matching. + // https://github.com/nginx/nginx-gateway-fabric/issues/662 + extLocations[i].HTTPMatchKey = httpMatchKey + matchPairs[extLocations[i].HTTPMatchKey] = matches + } + locs = append(locs, extLocations...) + locs = append(locs, internalLocations...) + case rule.HasInferenceBackends: + locs = append(locs, createInferenceLocationsForRule( + pathRuleIdx, + rule, + extLocations, + generator, + server.Port, + keepAliveCheck, + mirrorPercentage)..., + ) } - - locs = append(locs, extLocations...) - locs = append(locs, internalLocations...) } if !rootPathExists { @@ -346,10 +412,128 @@ func createLocations( return locs, matchPairs, grpcServer } -func needsInternalLocations(rule dataplane.PathRule) bool { +func updateExternalLocationsForRule( + rule dataplane.PathRule, + extLocations []http.Location, + port int32, + keepAliveCheck keepAliveChecker, + mirrorPercentage *float64, +) []http.Location { + for _, r := range rule.MatchRules { + extLocations = updateLocations( + r, + rule, + extLocations, + port, + keepAliveCheck, + mirrorPercentage, + ) + } + + return extLocations +} + +func createInternalLocationsForRule( + pathRuleIdx int, + rule dataplane.PathRule, + generator policies.Generator, + port int32, + keepAliveCheck keepAliveChecker, + mirrorPercentage *float64, +) ([]http.Location, []routeMatch) { + internalLocations := make([]http.Location, 0, len(rule.MatchRules)) + matches := make([]routeMatch, 0, len(rule.MatchRules)) + for matchRuleIdx, r := range rule.MatchRules { + var intLocation http.Location + var match routeMatch + if !rule.HasInferenceBackends { + intLocation, match = initializeInternalMatchLocation(pathRuleIdx, matchRuleIdx, r.Match, rule.GRPC) + } else { + intLocation, match = initializeInternalMatchLocationWithInference(pathRuleIdx, matchRuleIdx, r.Match) + intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx) + for _, b := range r.BackendGroup.Backends { + if b.EndpointPickerConfig != nil { + var portNum int + if b.EndpointPickerConfig.Port != nil { + portNum = int(b.EndpointPickerConfig.Port.Number) + } + intInfLocation.EPPInternalPath = intLocation.Path + if b.EndpointPickerNsName != "" { + intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName + } else { + intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + } + intInfLocation.EPPPort = portNum + } + } + internalLocations = append(internalLocations, intInfLocation) + } + intLocation.Includes = createIncludesFromPolicyGenerateResult( + generator.GenerateForInternalLocation(rule.Policies), + ) + intLocation = updateLocation( + r, + rule, + intLocation, + port, + keepAliveCheck, + mirrorPercentage, + ) + internalLocations = append(internalLocations, intLocation) + matches = append(matches, match) + } + + return internalLocations, matches +} + +func createInferenceLocationsForRule( + pathRuleIdx int, + rule dataplane.PathRule, + extLocations []http.Location, + generator policies.Generator, + port int32, + keepAliveCheck keepAliveChecker, + mirrorPercentage *float64, +) []http.Location { + locs := make([]http.Location, 0, len(rule.MatchRules)+len(extLocations)) + for matchRuleIdx, r := range rule.MatchRules { + intLocation := initializeInternalInferenceLocation(pathRuleIdx, matchRuleIdx) + intLocation.Includes = createIncludesFromPolicyGenerateResult( + generator.GenerateForInternalLocation(rule.Policies), + ) + intLocation = updateLocation( + r, + rule, + intLocation, + port, + keepAliveCheck, + mirrorPercentage, + ) + for _, b := range r.BackendGroup.Backends { + if b.EndpointPickerConfig != nil { + for i := range extLocations { + var portNum int + if b.EndpointPickerConfig.Port != nil { + portNum = int(b.EndpointPickerConfig.Port.Number) + } + extLocations[i].EPPInternalPath = intLocation.Path + extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name) + extLocations[i].EPPPort = portNum + } + } + } + locs = append(locs, intLocation) + } + locs = append(locs, extLocations...) + + return locs +} + +func needsInternalLocationsForMatches(rule dataplane.PathRule) bool { if len(rule.MatchRules) > 1 { return true } + return len(rule.MatchRules) == 1 && !isPathOnlyMatch(rule.MatchRules[0].Match) } @@ -362,12 +546,13 @@ type pathAndTypeMap map[string]map[dataplane.PathType]struct{} // 2. Each path rule may have an additional location if it contains non-path-only matches. // 3. Each prefix path rule may have an additional location if it doesn't contain trailing slash. // 4. There may be an additional location for the default root path. +// 5. There may be an additional location per parent location for the inference extension. // We also return a map of all paths and their types. func getMaxLocationCountAndPathMap(pathRules []dataplane.PathRule) (int, pathAndTypeMap) { maxLocs := 1 pathsAndTypes := make(pathAndTypeMap) for _, rule := range pathRules { - maxLocs += len(rule.MatchRules) + 2 + maxLocs += (len(rule.MatchRules) * 2) + 2 if pathsAndTypes[rule.Path] == nil { pathsAndTypes[rule.Path] = map[dataplane.PathType]struct{}{ rule.PathType: {}, @@ -431,14 +616,20 @@ func initializeExternalLocations( } func getLocationTypeForPathRule(rule dataplane.PathRule) http.LocationType { - if needsInternalLocations(rule) { + if needsInternalLocationsForMatches(rule) { return http.RedirectLocationType } + if rule.HasInferenceBackends { + return http.InferenceExternalLocationType + } + return http.ExternalLocationType } -func initializeInternalLocation( +// initializeInternalMatchLocation initializes the internal location that is redirected to by an +// external location HTTP matching decision. This location will proxy_pass to the backend. +func initializeInternalMatchLocation( pathruleIdx, matchRuleIdx int, match dataplane.Match, @@ -448,6 +639,45 @@ func initializeInternalLocation( return createMatchLocation(path, grpc), createRouteMatch(match, path) } +// initializeInternalInferenceRedirectLocation initializes the internal inference location that is redirected to by +// an external HTTP matching location. This location then redirects to the final proxy_pass location. +func initializeInternalInferenceRedirectLocation(pathruleIdx, matchRuleIdx int) http.Location { + return http.Location{ + Path: inferencePath(pathruleIdx, matchRuleIdx), + Type: http.InferenceInternalLocationType, + } +} + +// initializeInternalMatchLocationWithInference initializes the internal location that is redirected to by +// an internal inference location, which was redirected to by the external HTTP matching location. +// This location will proxy_pass to the backend. +// The routeMatch is created with the inference internal location path, so that the HTTP match in the external +// location can redirect to the proper inference location, which then redirects to this location. +func initializeInternalMatchLocationWithInference( + pathruleIdx, + matchRuleIdx int, + match dataplane.Match, +) (http.Location, routeMatch) { + path := fmt.Sprintf("%s-rule%d-route%d", http.InternalRoutePathPrefix, pathruleIdx, matchRuleIdx) + grpc := false + + return createMatchLocation(path, grpc), createRouteMatch(match, inferencePath(pathruleIdx, matchRuleIdx)) +} + +// initializeInternalInferenceLocation initializes the internal inference location that does the final +// proxy_pass to the inference backend. +// This is used when the external location redirects directly here, without any HTTP matching. +func initializeInternalInferenceLocation(pathruleIdx, matchRuleIdx int) http.Location { + return http.Location{ + Path: inferencePath(pathruleIdx, matchRuleIdx), + Type: http.InternalLocationType, + } +} + +func inferencePath(pathruleIdx int, matchRuleIdx int) string { + return fmt.Sprintf("%s-rule%d-route%d-inference", http.InternalRoutePathPrefix, pathruleIdx, matchRuleIdx) +} + // updateLocation updates a location with any relevant configurations, like proxy_pass, filters, tls settings, etc. func updateLocation( matchRule dataplane.MatchRule, @@ -460,6 +690,7 @@ func updateLocation( filters := matchRule.Filters path := pathRule.Path grpc := pathRule.GRPC + inferenceBackend := pathRule.HasInferenceBackends if filters.InvalidFilter != nil { location.Return = &http.Return{Code: http.StatusInternalServerError} @@ -475,7 +706,7 @@ func updateLocation( location = updateLocationRewriteFilter(location, filters.RequestURLRewrite, path) location = updateLocationMirrorFilters(location, filters.RequestMirrors, path, mirrorPercentage) - location = updateLocationProxySettings(location, matchRule, grpc, keepAliveCheck) + location = updateLocationProxySettings(location, matchRule, grpc, inferenceBackend, keepAliveCheck) return location } @@ -555,6 +786,7 @@ func updateLocationProxySettings( location http.Location, matchRule dataplane.MatchRule, grpc bool, + inferenceBackend bool, keepAliveCheck keepAliveChecker, ) http.Location { extraHeaders := make([]http.Header, 0, 3) @@ -575,6 +807,7 @@ func updateLocationProxySettings( matchRule.Filters.RequestURLRewrite, generateProtocolString(location.ProxySSLVerify, grpc), grpc, + inferenceBackend, ) location.ResponseHeaders = responseHeaders @@ -853,6 +1086,7 @@ func createProxyPass( filter *dataplane.HTTPURLRewriteFilter, protocol string, grpc bool, + inferenceBackend bool, ) string { var requestURI string if !grpc { @@ -862,6 +1096,12 @@ func createProxyPass( } backendName := backendGroupName(backendGroup) + + if inferenceBackend { + backendVarName := strings.ReplaceAll(backendName, "-", "_") + return "http://$inference_backend_" + backendVarName + requestURI + } + if backendGroupNeedsSplit(backendGroup) { return protocol + "://$" + convertStringToSafeVariableName(backendName) + requestURI } diff --git a/internal/controller/nginx/config/servers_template.go b/internal/controller/nginx/config/servers_template.go index 224e189a6e..82b692c88e 100644 --- a/internal/controller/nginx/config/servers_template.go +++ b/internal/controller/nginx/config/servers_template.go @@ -92,7 +92,7 @@ server { {{ range $l := $s.Locations }} location {{ $l.Path }} { - {{ if eq $l.Type "internal" -}} + {{ if contains $l.Type "internal" -}} internal; {{ end }} @@ -118,18 +118,31 @@ server { return {{ $l.Return.Code }} "{{ $l.Return.Body }}"; {{- end }} - {{- if eq $l.Type "redirect" }} + {{- if eq $l.Type "redirect" -}} set $match_key {{ $l.HTTPMatchKey }}; js_content httpmatches.redirect; {{- end }} + {{- if contains $l.Type "inference" -}} + if ($request_method = GET) { + set $inference_workload_endpoint ""; + rewrite ^ {{ $l.EPPInternalPath }} last; + } + + js_var $inference_workload_endpoint; + set $epp_internal_path {{ $l.EPPInternalPath }}; + set $epp_host {{ $l.EPPHost }}; + set $epp_port {{ $l.EPPPort }}; + js_content epp.getEndpoint; + break; + {{- end }} + {{ $proxyOrGRPC := "proxy" }}{{ if $l.GRPC }}{{ $proxyOrGRPC = "grpc" }}{{ end }} {{- if $l.GRPC }} include /etc/nginx/grpc-error-pages.conf; {{- end }} - proxy_http_version 1.1; {{- if $l.ProxyPass -}} {{ range $h := $l.ProxySetHeaders }} {{ $proxyOrGRPC }}_set_header {{ $h.Name }} "{{ $h.Value }}"; diff --git a/internal/controller/nginx/config/servers_test.go b/internal/controller/nginx/config/servers_test.go index 6b604d7bec..ab4fad31a5 100644 --- a/internal/controller/nginx/config/servers_test.go +++ b/internal/controller/nginx/config/servers_test.go @@ -9,6 +9,7 @@ import ( . "github.com/onsi/gomega" "github.com/onsi/gomega/format" "k8s.io/apimachinery/pkg/types" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/policies" @@ -1239,7 +1240,7 @@ func TestCreateServers(t *testing.T) { Filters: dataplane.HTTPFilters{ RequestRedirect: &dataplane.HTTPRequestRedirectFilter{ Hostname: helpers.GetPointer("redirect.example.com"), - StatusCode: helpers.GetPointer[int](301), + StatusCode: helpers.GetPointer(301), Port: helpers.GetPointer[int32](8080), Path: &dataplane.HTTPPathModifier{ Type: dataplane.ReplaceFullPath, @@ -2443,6 +2444,154 @@ func TestCreateLocations_Includes(t *testing.T) { } } +func TestCreateLocations_InferenceBackends(t *testing.T) { + t.Parallel() + + hrNsName := types.NamespacedName{Namespace: "test", Name: "route1"} + + fooGroup := dataplane.BackendGroup{ + Source: hrNsName, + RuleIdx: 0, + Backends: []dataplane.Backend{ + { + UpstreamName: "test_foo_80", + Valid: true, + Weight: 1, + EndpointPickerConfig: &inference.EndpointPickerRef{ + Name: "test-epp", + Port: &inference.Port{ + Number: 80, + }, + }, + }, + }, + } + + pathRuleInferenceOnly := dataplane.PathRule{ + Path: "/inference", + PathType: dataplane.PathTypeExact, + HasInferenceBackends: true, + MatchRules: []dataplane.MatchRule{ + { + Match: dataplane.Match{}, + BackendGroup: fooGroup, + }, + }, + } + + pathRuleInferenceWithMatch := dataplane.PathRule{ + Path: "/inference-match", + PathType: dataplane.PathTypeExact, + HasInferenceBackends: true, + MatchRules: []dataplane.MatchRule{ + { + Match: dataplane.Match{ + Method: helpers.GetPointer("POST"), + }, + BackendGroup: fooGroup, + }, + }, + } + + tests := []struct { + expMatches httpMatchPairs + name string + pathRules []dataplane.PathRule + expLocs []http.Location + }{ + { + name: "inference only, no internal locations for matches", + pathRules: []dataplane.PathRule{pathRuleInferenceOnly}, + expLocs: []http.Location{ + { + Path: "/_ngf-internal-rule0-route0-inference", + Type: http.InternalLocationType, + ProxyPass: "http://$inference_backend_test_foo_80$request_uri", + ProxySetHeaders: []http.Header{ + {Name: "Host", Value: "$gw_api_compliant_host"}, + {Name: "X-Forwarded-For", Value: "$proxy_add_x_forwarded_for"}, + {Name: "X-Real-IP", Value: "$remote_addr"}, + {Name: "X-Forwarded-Proto", Value: "$scheme"}, + {Name: "X-Forwarded-Host", Value: "$host"}, + {Name: "X-Forwarded-Port", Value: "$server_port"}, + {Name: "Upgrade", Value: "$http_upgrade"}, + {Name: "Connection", Value: "$connection_upgrade"}, + }, + }, + { + Path: "= /inference", + Type: http.InferenceExternalLocationType, + EPPInternalPath: "/_ngf-internal-rule0-route0-inference", + EPPHost: "test-epp", + EPPPort: 80, + }, + createDefaultRootLocation(), + }, + expMatches: httpMatchPairs{}, + }, + { + name: "inference with match, needs internal locations for matches", + pathRules: []dataplane.PathRule{pathRuleInferenceWithMatch}, + expLocs: []http.Location{ + { + Path: "= /inference-match", + Type: http.RedirectLocationType, + HTTPMatchKey: "1_0", + }, + { + Path: "/_ngf-internal-rule0-route0-inference", + Type: http.InferenceInternalLocationType, + EPPInternalPath: "/_ngf-internal-rule0-route0", + EPPHost: "test-epp", + EPPPort: 80, + }, + { + Path: "/_ngf-internal-rule0-route0", + Type: http.InternalLocationType, + ProxyPass: "http://$inference_backend_test_foo_80$request_uri", + ProxySetHeaders: []http.Header{ + {Name: "Host", Value: "$gw_api_compliant_host"}, + {Name: "X-Forwarded-For", Value: "$proxy_add_x_forwarded_for"}, + {Name: "X-Real-IP", Value: "$remote_addr"}, + {Name: "X-Forwarded-Proto", Value: "$scheme"}, + {Name: "X-Forwarded-Host", Value: "$host"}, + {Name: "X-Forwarded-Port", Value: "$server_port"}, + {Name: "Upgrade", Value: "$http_upgrade"}, + {Name: "Connection", Value: "$connection_upgrade"}, + }, + }, + createDefaultRootLocation(), + }, + expMatches: httpMatchPairs{ + "1_0": { + {Method: "POST", RedirectPath: "/_ngf-internal-rule0-route0-inference"}, + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + locs, matches, _ := createLocations( + &dataplane.VirtualServer{ + Hostname: "example.com", + PathRules: tc.pathRules, + Port: 80, + }, + "1", + &policiesfakes.FakeGenerator{}, + alwaysFalseKeepAliveChecker, + ) + + g.Expect(helpers.Diff(tc.expLocs, locs)).To(BeEmpty()) + g.Expect(matches).To(Equal(tc.expMatches)) + }) + } +} + func TestCreateLocationsRootPath(t *testing.T) { t.Parallel() hrNsName := types.NamespacedName{Namespace: "test", Name: "route1"} @@ -3332,10 +3481,11 @@ func TestCreateProxyPass(t *testing.T) { t.Parallel() tests := []struct { - rewrite *dataplane.HTTPURLRewriteFilter - expected string - grp dataplane.BackendGroup - GRPC bool + rewrite *dataplane.HTTPURLRewriteFilter + expected string + grp dataplane.BackendGroup + GRPC bool + inferenceBackend bool }{ { expected: "http://10.0.0.1:80$request_uri", @@ -3349,6 +3499,20 @@ func TestCreateProxyPass(t *testing.T) { }, }, }, + // Inference case + { + expected: "http://$inference_backend_upstream_inference$request_uri", + grp: dataplane.BackendGroup{ + Backends: []dataplane.Backend{ + { + UpstreamName: "upstream-inference", + Valid: true, + Weight: 1, + }, + }, + }, + inferenceBackend: true, + }, { expected: "http://$group_ns1__bg_rule0$request_uri", grp: dataplane.BackendGroup{ @@ -3401,7 +3565,13 @@ func TestCreateProxyPass(t *testing.T) { t.Run(tc.expected, func(t *testing.T) { t.Parallel() g := NewWithT(t) - result := createProxyPass(tc.grp, tc.rewrite, generateProtocolString(nil, tc.GRPC), tc.GRPC) + result := createProxyPass( + tc.grp, + tc.rewrite, + generateProtocolString(nil, tc.GRPC), + tc.GRPC, + tc.inferenceBackend, + ) g.Expect(result).To(Equal(tc.expected)) }) } diff --git a/internal/controller/nginx/modules/README.md b/internal/controller/nginx/modules/README.md index 9c7c805276..3313ea6604 100644 --- a/internal/controller/nginx/modules/README.md +++ b/internal/controller/nginx/modules/README.md @@ -22,6 +22,7 @@ dependencies. - [httpmatches](./src/httpmatches.js): a location handler for HTTP requests. It redirects requests to an internal location block based on the request's headers, arguments, and method. +- [epp](./src/epp.js): handles communication with the EndpointPicker (EPP) component. This is for acquiring a specific AI endpoint to route client traffic to when using the Gateway API Inference Extension. ### Helpful Resources for Module Development diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js new file mode 100644 index 0000000000..262366a9db --- /dev/null +++ b/internal/controller/nginx/modules/src/epp.js @@ -0,0 +1,66 @@ +import qs from 'querystring'; + +const EPP_HOST_HEADER_VAR = 'epp_host'; +const EPP_PORT_HEADER_VAR = 'epp_port'; +const EPP_HOST_HEADER = 'X-EPP-Host'; +const EPP_PORT_HEADER = 'X-EPP-Port'; +const ENDPOINT_HEADER = 'X-Gateway-Destination-Endpoint'; +const EPP_INTERNAL_PATH_VAR = 'epp_internal_path'; +const WORKLOAD_ENDPOINT_VAR = 'inference_workload_endpoint'; +const SHIM_URI = 'http://127.0.0.1:54800'; + +async function getEndpoint(r) { + const headerEndpoint = r.headersIn['test-epp-endpoint-selection']; + if (headerEndpoint) { + // Header is provided: Use endpoints directly and bypass Shim server + const endpoints = headerEndpoint.split(',').map(e => e.trim()); + r.variables[WORKLOAD_ENDPOINT_VAR] = endpoints.join(','); + r.log(`Using header-specified endpoints: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`); + } else { + if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) { + throw Error( + `Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`, + ); + } + if (!r.variables[EPP_INTERNAL_PATH_VAR]) { + throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`); + } + + let headers = Object.assign({}, r.headersIn); + headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR]; + headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR]; + + try { + const response = await ngx.fetch(SHIM_URI, { + method: r.method, + headers: headers, + body: r.requestText, + }); + const endpointHeader = response.headers.get(ENDPOINT_HEADER); + if (response.status === 200 && endpointHeader) { + r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader; + r.log( + `found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`, + ); + } else { + const body = await response.text(); + r.error( + `could not get specific inference endpoint from EndpointPicker; ` + + `status: ${response.status}; body: ${body}`, + ); + } + } catch (err) { + r.error(`Error in ngx.fetch: ${err}`); + } + } + + // If performing a rewrite, $request_uri won't be used, + // so we have to preserve args in the internal redirect. + let args = qs.stringify(r.args); + if (args) { + args = '?' + args; + } + + r.internalRedirect(r.variables[EPP_INTERNAL_PATH_VAR] + args); +} +export default { getEndpoint }; diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js new file mode 100644 index 0000000000..97b10f75b7 --- /dev/null +++ b/internal/controller/nginx/modules/test/epp.test.js @@ -0,0 +1,117 @@ +import { default as epp } from '../src/epp.js'; +import { expect, describe, it, beforeEach, afterEach, vi } from 'vitest'; + +function makeRequest({ + method = 'POST', + headersIn = {}, + args = {}, + requestText = '', + variables = {}, +} = {}) { + return { + method, + headersIn, + requestText, + variables, + args, + error: vi.fn(), + log: vi.fn(), + internalRedirect: vi.fn(), + }; +} + +describe('getEndpoint', () => { + let originalNgx; + beforeEach(() => { + originalNgx = globalThis.ngx; + }); + afterEach(() => { + globalThis.ngx = originalNgx; + }); + + it('throws if host or port is missing', async () => { + const r = makeRequest({ variables: { epp_internal_path: '/foo' } }); + await expect(epp.getEndpoint(r)).rejects.toThrow(/Missing required variables/); + }); + + it('throws if internal path is missing', async () => { + const r = makeRequest({ variables: { epp_host: 'host', epp_port: '1234' } }); + await expect(epp.getEndpoint(r)).rejects.toThrow(/Missing required variable/); + }); + + it('sets endpoint and logs on 200 with endpoint header', async () => { + const endpoint = 'http://endpoint'; + globalThis.ngx = { + fetch: vi.fn().mockResolvedValue({ + status: 200, + headers: { get: () => endpoint }, + text: vi.fn(), + }), + }; + const r = makeRequest({ + variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' }, + }); + await epp.getEndpoint(r); + expect(r.variables.inference_workload_endpoint).toBe(endpoint); + expect(r.log).toHaveBeenCalledWith(expect.stringContaining(endpoint)); + expect(r.internalRedirect).toHaveBeenCalledWith('/foo'); + }); + + it('calls error if response is not 200 or endpoint header missing', async () => { + globalThis.ngx = { + fetch: vi.fn().mockResolvedValue({ + status: 404, + headers: { get: () => null }, + text: vi.fn().mockResolvedValue('fail'), + }), + }; + const r = makeRequest({ + variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' }, + }); + await epp.getEndpoint(r); + expect(r.error).toHaveBeenCalledWith( + expect.stringContaining('could not get specific inference endpoint'), + ); + expect(r.internalRedirect).toHaveBeenCalledWith('/foo'); + }); + + it('calls error if fetch throws', async () => { + globalThis.ngx = { + fetch: vi.fn().mockRejectedValue(new Error('network fail')), + }; + const r = makeRequest({ + variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' }, + }); + await epp.getEndpoint(r); + expect(r.error).toHaveBeenCalledWith(expect.stringContaining('Error in ngx.fetch')); + expect(r.internalRedirect).toHaveBeenCalledWith('/foo'); + }); + + it('preserves args in internal redirect when args are present', async () => { + const endpoint = 'http://endpoint'; + globalThis.ngx = { + fetch: vi.fn().mockResolvedValue({ + status: 200, + headers: { get: () => endpoint }, + text: vi.fn(), + }), + }; + const r = makeRequest({ + variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' }, + args: { a: '1', b: '2' }, + }); + await epp.getEndpoint(r); + expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2'); + }); + it('returns the header-specified endpoints if provided', async () => { + const r = makeRequest({ + variables: {}, + headersIn: { 'X-Endpoint-Selector': '10.1.2.3, 10.1.2.4' }, + }); + await epp.getEndpoint(r); + expect(r.variables.inference_workload_endpoint).toBe('10.1.2.3,10.1.2.4'); + expect(r.log).toHaveBeenCalledWith( + expect.stringContaining('Using header-specified endpoints'), + ); + }); +}); \ No newline at end of file diff --git a/internal/controller/provisioner/objects.go b/internal/controller/provisioner/objects.go index 475a3e7319..e2e3eb8517 100644 --- a/internal/controller/provisioner/objects.go +++ b/internal/controller/provisioner/objects.go @@ -899,6 +899,7 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec( {MountPath: "/etc/nginx/events-includes", Name: "nginx-events-includes"}, }, SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: helpers.GetPointer(false), Capabilities: &corev1.Capabilities{ Drop: []corev1.Capability{"ALL"}, }, @@ -1119,6 +1120,30 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec( spec.Spec.Containers[0].VolumeMounts = volumeMounts } + if p.cfg.InferenceExtension { + spec.Spec.Containers = append(spec.Spec.Containers, corev1.Container{ + Name: "endpoint-picker-shim", + Image: p.cfg.GatewayPodConfig.Image, + ImagePullPolicy: pullPolicy, + Command: []string{ + "/usr/bin/gateway", + "endpoint-picker", + }, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: helpers.GetPointer(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + ReadOnlyRootFilesystem: helpers.GetPointer(true), + RunAsGroup: helpers.GetPointer[int64](1001), + RunAsUser: helpers.GetPointer[int64](101), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + }) + } + return spec } diff --git a/internal/controller/provisioner/objects_test.go b/internal/controller/provisioner/objects_test.go index 2327db259d..30403f85a7 100644 --- a/internal/controller/provisioner/objects_test.go +++ b/internal/controller/provisioner/objects_test.go @@ -1765,3 +1765,57 @@ func TestBuildNginxResourceObjects_Patches(t *testing.T) { g.Expect(svc.Labels).To(HaveKeyWithValue("app", "nginx")) g.Expect(dep.Labels).To(HaveKeyWithValue("app", "nginx")) } + +func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + agentTLSSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: agentTLSTestSecretName, + Namespace: ngfNamespace, + }, + Data: map[string][]byte{"tls.crt": []byte("tls")}, + } + fakeClient := fake.NewFakeClient(agentTLSSecret) + + provisioner := &NginxProvisioner{ + cfg: Config{ + GatewayPodConfig: &config.GatewayPodConfig{ + Namespace: ngfNamespace, + }, + AgentTLSSecretName: agentTLSTestSecretName, + InferenceExtension: true, + }, + k8sClient: fakeClient, + baseLabelSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "nginx"}, + }, + } + + gateway := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gw", + Namespace: "default", + }, + Spec: gatewayv1.GatewaySpec{ + Listeners: []gatewayv1.Listener{{Port: 80}}, + }, + } + + objects, err := provisioner.buildNginxResourceObjects("gw-nginx", gateway, &graph.EffectiveNginxProxy{}) + g.Expect(err).ToNot(HaveOccurred()) + + // Find the deployment object + var deployment *appsv1.Deployment + for _, obj := range objects { + if d, ok := obj.(*appsv1.Deployment); ok { + deployment = d + break + } + } + g.Expect(deployment).ToNot(BeNil()) + containers := deployment.Spec.Template.Spec.Containers + g.Expect(containers).To(HaveLen(2)) + g.Expect(containers[1].Name).To(Equal("endpoint-picker-shim")) +} diff --git a/internal/controller/provisioner/provisioner.go b/internal/controller/provisioner/provisioner.go index fe59f5be1b..8a2abffd0a 100644 --- a/internal/controller/provisioner/provisioner.go +++ b/internal/controller/provisioner/provisioner.go @@ -58,6 +58,7 @@ type Config struct { NginxDockerSecretNames []string NginxOneConsoleTelemetryConfig config.NginxOneConsoleTelemetryConfig Plus bool + InferenceExtension bool } // NginxProvisioner handles provisioning nginx kubernetes resources. diff --git a/internal/controller/state/change_processor.go b/internal/controller/state/change_processor.go index f3184adde8..27a62bb0e5 100644 --- a/internal/controller/state/change_processor.go +++ b/internal/controller/state/change_processor.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -98,6 +99,7 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl { TLSRoutes: make(map[types.NamespacedName]*v1alpha2.TLSRoute), NGFPolicies: make(map[graph.PolicyKey]policies.Policy), SnippetsFilters: make(map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter), + InferencePools: make(map[types.NamespacedName]*inference.InferencePool), } processor := &ChangeProcessorImpl{ @@ -166,6 +168,11 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl { store: newObjectStoreMapAdapter(clusterStore.Services), predicate: funcPredicate{stateChanged: isReferenced}, }, + { + gvk: cfg.MustExtractGVK(&inference.InferencePool{}), + store: newObjectStoreMapAdapter(clusterStore.InferencePools), + predicate: funcPredicate{stateChanged: isReferenced}, + }, { gvk: cfg.MustExtractGVK(&discoveryV1.EndpointSlice{}), store: nil, diff --git a/internal/controller/state/change_processor_test.go b/internal/controller/state/change_processor_test.go index 2d17e6f6e9..44dbdb0613 100644 --- a/internal/controller/state/change_processor_test.go +++ b/internal/controller/state/change_processor_test.go @@ -14,6 +14,7 @@ import ( "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -317,6 +318,7 @@ func createScheme() *runtime.Scheme { utilruntime.Must(apiext.AddToScheme(scheme)) utilruntime.Must(ngfAPIv1alpha1.AddToScheme(scheme)) utilruntime.Must(ngfAPIv1alpha2.AddToScheme(scheme)) + utilruntime.Must(inference.Install(scheme)) return scheme } diff --git a/internal/controller/state/conditions/conditions.go b/internal/controller/state/conditions/conditions.go index ad5d00a0dc..1664aa85b6 100644 --- a/internal/controller/state/conditions/conditions.go +++ b/internal/controller/state/conditions/conditions.go @@ -4,6 +4,7 @@ import ( "fmt" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" @@ -1108,3 +1109,55 @@ func NewBackendTLSPolicyNoValidCACertificate(message string) Condition { Message: message, } } + +// NewInferencePoolAccepted returns a Condition that indicates that the InferencePool is accepted by the Gateway. +func NewInferencePoolAccepted() Condition { + return Condition{ + Type: string(inference.InferencePoolConditionAccepted), + Status: metav1.ConditionTrue, + Reason: string(inference.InferencePoolConditionAccepted), + Message: "InferencePool is accepted by the Gateway.", + } +} + +// NewInferencePoolResolvedRefs returns a Condition that +// indicates that all references in the InferencePool are resolved. +func NewInferencePoolResolvedRefs() Condition { + return Condition{ + Type: string(inference.InferencePoolConditionResolvedRefs), + Status: metav1.ConditionTrue, + Reason: string(inference.InferencePoolConditionResolvedRefs), + Message: "Inference pool references a valid ExtensionRef.", + } +} + +// NewDefaultInferenceConditions returns the default Conditions +// that must be present in the status of an InferencePool. +func NewDefaultInferenceConditions() []Condition { + return []Condition{ + NewInferencePoolAccepted(), + NewInferencePoolResolvedRefs(), + } +} + +// NewInferencePoolInvalidHTTPRouteNotAccepted returns a Condition that indicates that the InferencePool is not +// accepted because the associated HTTPRoute is not accepted by the Gateway. +func NewInferencePoolInvalidHTTPRouteNotAccepted(msg string) Condition { + return Condition{ + Type: string(inference.InferencePoolConditionAccepted), + Status: metav1.ConditionFalse, + Reason: string(inference.InferencePoolReasonHTTPRouteNotAccepted), + Message: msg, + } +} + +// NewInferencePoolInvalidExtensionref returns a Condition that indicates that the InferencePool is not +// accepted because the ExtensionRef is invalid. +func NewInferencePoolInvalidExtensionref(msg string) Condition { + return Condition{ + Type: string(inference.InferencePoolConditionResolvedRefs), + Status: metav1.ConditionFalse, + Reason: string(inference.InferencePoolReasonInvalidExtensionRef), + Message: msg, + } +} diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go index 52306f4e0b..6c91b8d266 100644 --- a/internal/controller/state/dataplane/configuration.go +++ b/internal/controller/state/dataplane/configuration.go @@ -374,12 +374,13 @@ func newBackendGroup( gatewayName types.NamespacedName, sourceNsName types.NamespacedName, ruleIdx int, -) BackendGroup { +) (BackendGroup, bool) { var backends []Backend if len(refs) > 0 { backends = make([]Backend, 0, len(refs)) } + var inferencePoolBackendExists bool for _, ref := range refs { if ref.IsMirrorBackend { @@ -391,11 +392,15 @@ func newBackendGroup( valid = false } + inferencePoolBackendExists = inferencePoolBackendExists || ref.IsInferencePool + backends = append(backends, Backend{ - UpstreamName: ref.ServicePortReference(), - Weight: ref.Weight, - Valid: valid, - VerifyTLS: convertBackendTLS(ref.BackendTLSPolicy, gatewayName), + UpstreamName: ref.ServicePortReference(), + Weight: ref.Weight, + Valid: valid, + VerifyTLS: convertBackendTLS(ref.BackendTLSPolicy, gatewayName), + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, }) } @@ -403,7 +408,7 @@ func newBackendGroup( Backends: backends, Source: sourceNsName, RuleIdx: ruleIdx, - } + }, inferencePoolBackendExists } func convertBackendTLS(btp *graph.BackendTLSPolicy, gwNsName types.NamespacedName) *VerifyTLS { @@ -595,10 +600,19 @@ func (hpr *hostPathRules) upsertRoute( } hostRule.GRPC = GRPC + backendGroup, inferencePoolBackendExists := newBackendGroup( + rule.BackendRefs, + listener.GatewayName, + routeNsName, + idx, + ) + if inferencePoolBackendExists { + hostRule.HasInferenceBackends = true + } hostRule.MatchRules = append(hostRule.MatchRules, MatchRule{ Source: objectSrc, - BackendGroup: newBackendGroup(rule.BackendRefs, listener.GatewayName, routeNsName, idx), + BackendGroup: backendGroup, Filters: filters, Match: convertMatch(m), }) diff --git a/internal/controller/state/dataplane/configuration_test.go b/internal/controller/state/dataplane/configuration_test.go index b329b9d46a..3e1697590d 100644 --- a/internal/controller/state/dataplane/configuration_test.go +++ b/internal/controller/state/dataplane/configuration_test.go @@ -2777,6 +2777,93 @@ func TestBuildConfiguration_Plus(t *testing.T) { } } +func TestUpsertRoute_PathRuleHasInferenceBackend(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + // Setup minimal route with one BackendRef marked as IsInferencePool + backendRef := graph.BackendRef{ + SvcNsName: types.NamespacedName{Name: "svc", Namespace: "test"}, + ServicePort: apiv1.ServicePort{Port: 80}, + Valid: true, + IsInferencePool: true, + } + + listenerName := "listener-80" + gwName := types.NamespacedName{Namespace: "test", Name: "gw"} + + route := &graph.L7Route{ + RouteType: graph.RouteTypeHTTP, + Source: &v1.HTTPRoute{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hr", + Namespace: "test", + }, + }, + Spec: graph.L7RouteSpec{ + Rules: []graph.RouteRule{ + { + ValidMatches: true, + Filters: graph.RouteRuleFilters{Valid: true}, + BackendRefs: []graph.BackendRef{backendRef}, + Matches: []v1.HTTPRouteMatch{ + { + Path: &v1.HTTPPathMatch{ + Type: helpers.GetPointer(v1.PathMatchPathPrefix), + Value: helpers.GetPointer("/infer"), + }, + }, + }, + }, + }, + }, + ParentRefs: []graph.ParentRef{ + { + Attachment: &graph.ParentRefAttachmentStatus{ + AcceptedHostnames: map[string][]string{ + graph.CreateGatewayListenerKey(gwName, listenerName): {"*"}, + }, + }, + }, + }, + Valid: true, + } + + listener := &graph.Listener{ + Name: listenerName, + GatewayName: gwName, + Valid: true, + Routes: map[graph.RouteKey]*graph.L7Route{ + graph.CreateRouteKey(route.Source): route, + }, + } + + gateway := &graph.Gateway{ + Source: &v1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gw", + Namespace: "test", + }, + }, + Listeners: []*graph.Listener{listener}, + } + + hpr := newHostPathRules() + hpr.upsertRoute(route, listener, gateway) + + // Find the PathRule for "/infer" + found := false + for _, rules := range hpr.rulesPerHost { + for _, pr := range rules { + if pr.Path == "/infer" { + found = true + g.Expect(pr.HasInferenceBackends).To(BeTrue()) + } + } + } + g.Expect(found).To(BeTrue(), "PathRule for '/infer' not found") +} + func TestNewBackendGroup_Mirror(t *testing.T) { t.Parallel() g := NewWithT(t) @@ -2788,7 +2875,7 @@ func TestNewBackendGroup_Mirror(t *testing.T) { IsMirrorBackend: true, } - group := newBackendGroup([]graph.BackendRef{backendRef}, types.NamespacedName{}, types.NamespacedName{}, 0) + group, _ := newBackendGroup([]graph.BackendRef{backendRef}, types.NamespacedName{}, types.NamespacedName{}, 0) g.Expect(group.Backends).To(BeEmpty()) } diff --git a/internal/controller/state/dataplane/types.go b/internal/controller/state/dataplane/types.go index 08e7e0867b..0866af636b 100644 --- a/internal/controller/state/dataplane/types.go +++ b/internal/controller/state/dataplane/types.go @@ -5,6 +5,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/policies" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/graph" @@ -137,6 +138,8 @@ type PathRule struct { Policies []policies.Policy // GRPC indicates if this is a gRPC rule GRPC bool + // HasInferenceBackends indicates whether the PathRule contains a backend for an inference workload. + HasInferenceBackends bool } // InvalidHTTPFilter is a special filter for handling the case when configured filters are invalid. @@ -323,6 +326,10 @@ func (bg *BackendGroup) Name() string { type Backend struct { // VerifyTLS holds the backend TLS verification configuration. VerifyTLS *VerifyTLS + // EndpointPickerConfig holds the configuration for the EndpointPicker for this backend. + // This is set if this backend is for an inference workload. + EndpointPickerConfig *inference.EndpointPickerRef + EndpointPickerNsName string // UpstreamName is the name of the upstream for this backend. UpstreamName string // Weight is the weight of the BackendRef. diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go index d18a81cc43..97ffe61f4b 100644 --- a/internal/controller/state/graph/backend_refs.go +++ b/internal/controller/state/graph/backend_refs.go @@ -9,13 +9,16 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha3" ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/sort" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) const ( @@ -28,6 +31,11 @@ const ( type BackendRef struct { // BackendTLSPolicy is the BackendTLSPolicy of the Service which is referenced by the backendRef. BackendTLSPolicy *BackendTLSPolicy + // EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool. + EndpointPickerConfig *inference.EndpointPickerRef + // EndpointPickerNsName is the namespace where the EndpointPicker is deployed, + // if this backendRef is for an InferencePool. + EndpointPickerNsName string // InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding // condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways, // but not others. @@ -43,6 +51,8 @@ type BackendRef struct { Valid bool // IsMirrorBackend indicates whether the BackendGroup is for a mirrored backend. IsMirrorBackend bool + // IsInferencePool indicates whether the BackendRef is for an InferencePool. + IsInferencePool bool } // ServicePortReference returns a string representation for the service and port that is referenced by the BackendRef. @@ -57,10 +67,11 @@ func addBackendRefsToRouteRules( routes map[RouteKey]*L7Route, refGrantResolver *referenceGrantResolver, services map[types.NamespacedName]*v1.Service, + referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool, backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy, ) { for _, r := range routes { - addBackendRefsToRules(r, refGrantResolver, services, backendTLSPolicies) + addBackendRefsToRules(r, refGrantResolver, services, referencedInferencePools, backendTLSPolicies) } } @@ -70,6 +81,7 @@ func addBackendRefsToRules( route *L7Route, refGrantResolver *referenceGrantResolver, services map[types.NamespacedName]*v1.Service, + referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool, backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy, ) { if !route.Valid { @@ -99,6 +111,35 @@ func addBackendRefsToRules( } routeNs := route.Source.GetNamespace() + // if we have an InferencePool backend disguised as a Service, set the port value + if ref.IsInferencePool { + namespace := routeNs + if ref.Namespace != nil { + namespace = string(*ref.Namespace) + } + + poolName := types.NamespacedName{ + Name: controller.GetInferencePoolName(string(ref.Name)), + Namespace: namespace, + } + + if pool, exists := referencedInferencePools[poolName]; exists { + if !pool.Valid { + route.Conditions = append(route.Conditions, conditions.NewRouteBackendRefUnsupportedValue( + fmt.Sprintf("Referenced InferencePool %s/%s is invalid", + poolName.Namespace, + poolName.Name, + ), + )) + continue + } + port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number) + ref.Port = helpers.GetPointer(port) + ref.EndpointPickerConfig = &pool.Source.Spec.EndpointPickerRef + ref.EndpointPickerNsName = poolName.Namespace + } + } + ref, conds := createBackendRef( ref, route, @@ -149,13 +190,23 @@ func createBackendRef( } } - valid, cond := validateRouteBackendRef(ref, route.Source.GetNamespace(), refGrantResolver, refPath) + valid, cond := validateRouteBackendRef( + route.RouteType, + ref, + route.Source.GetNamespace(), + refGrantResolver, + refPath, + ) + if !valid { backendRef := BackendRef{ - Weight: weight, - Valid: false, - IsMirrorBackend: ref.MirrorBackendIdx != nil, - InvalidForGateways: make(map[types.NamespacedName]conditions.Condition), + Weight: weight, + Valid: false, + IsMirrorBackend: ref.MirrorBackendIdx != nil, + IsInferencePool: ref.IsInferencePool, + InvalidForGateways: make(map[types.NamespacedName]conditions.Condition), + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, } return backendRef, []conditions.Condition{cond} @@ -169,12 +220,15 @@ func createBackendRef( svcIPFamily, svcPort, err := getIPFamilyAndPortFromRef(ref.BackendRef, svcNsName, services, refPath) if err != nil { backendRef := BackendRef{ - Weight: weight, - Valid: false, - SvcNsName: svcNsName, - ServicePort: v1.ServicePort{}, - IsMirrorBackend: ref.MirrorBackendIdx != nil, - InvalidForGateways: make(map[types.NamespacedName]conditions.Condition), + Weight: weight, + Valid: false, + SvcNsName: svcNsName, + ServicePort: v1.ServicePort{}, + IsMirrorBackend: ref.MirrorBackendIdx != nil, + IsInferencePool: ref.IsInferencePool, + InvalidForGateways: make(map[types.NamespacedName]conditions.Condition), + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, } return backendRef, []conditions.Condition{conditions.NewRouteBackendRefRefBackendNotFound(err.Error())} @@ -191,12 +245,15 @@ func createBackendRef( // Check if externalName field is empty or whitespace-only if strings.TrimSpace(svc.Spec.ExternalName) == "" { backendRef := BackendRef{ - SvcNsName: svcNsName, - ServicePort: svcPort, - Weight: weight, - Valid: false, - IsMirrorBackend: ref.MirrorBackendIdx != nil, - InvalidForGateways: invalidForGateways, + SvcNsName: svcNsName, + ServicePort: svcPort, + Weight: weight, + Valid: false, + IsMirrorBackend: ref.MirrorBackendIdx != nil, + IsInferencePool: ref.IsInferencePool, + InvalidForGateways: invalidForGateways, + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, } return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue( @@ -220,12 +277,15 @@ func createBackendRef( ) if err != nil { backendRef := BackendRef{ - SvcNsName: svcNsName, - ServicePort: svcPort, - Weight: weight, - Valid: false, - IsMirrorBackend: ref.MirrorBackendIdx != nil, - InvalidForGateways: invalidForGateways, + SvcNsName: svcNsName, + ServicePort: svcPort, + Weight: weight, + Valid: false, + IsMirrorBackend: ref.MirrorBackendIdx != nil, + IsInferencePool: ref.IsInferencePool, + InvalidForGateways: invalidForGateways, + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, } return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(err.Error())) @@ -235,13 +295,16 @@ func createBackendRef( err = validateRouteBackendRefAppProtocol(route.RouteType, *svcPort.AppProtocol, backendTLSPolicy) if err != nil { backendRef := BackendRef{ - SvcNsName: svcNsName, - BackendTLSPolicy: backendTLSPolicy, - ServicePort: svcPort, - Weight: weight, - Valid: false, - IsMirrorBackend: ref.MirrorBackendIdx != nil, - InvalidForGateways: invalidForGateways, + SvcNsName: svcNsName, + BackendTLSPolicy: backendTLSPolicy, + ServicePort: svcPort, + Weight: weight, + Valid: false, + IsMirrorBackend: ref.MirrorBackendIdx != nil, + IsInferencePool: ref.IsInferencePool, + InvalidForGateways: invalidForGateways, + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, } return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedProtocol(err.Error())) @@ -249,13 +312,16 @@ func createBackendRef( } backendRef := BackendRef{ - SvcNsName: svcNsName, - BackendTLSPolicy: backendTLSPolicy, - ServicePort: svcPort, - Valid: true, - Weight: weight, - IsMirrorBackend: ref.MirrorBackendIdx != nil, - InvalidForGateways: invalidForGateways, + SvcNsName: svcNsName, + BackendTLSPolicy: backendTLSPolicy, + ServicePort: svcPort, + Valid: true, + Weight: weight, + IsMirrorBackend: ref.MirrorBackendIdx != nil, + IsInferencePool: ref.IsInferencePool, + InvalidForGateways: invalidForGateways, + EndpointPickerConfig: ref.EndpointPickerConfig, + EndpointPickerNsName: ref.EndpointPickerNsName, } return backendRef, conds @@ -440,6 +506,7 @@ func checkExternalNameValidForGateways( } func validateRouteBackendRef( + routeType RouteType, ref RouteBackendRef, routeNs string, refGrantResolver func(resource toResource) bool, @@ -451,6 +518,10 @@ func validateRouteBackendRef( return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error()) } + if routeType == RouteTypeHTTP { + return validateBackendRefHTTPRoute(ref, routeNs, refGrantResolver, path) + } + return validateBackendRef(ref.BackendRef, routeNs, refGrantResolver, path) } @@ -502,6 +573,120 @@ func validateBackendRef( return true, conditions.Condition{} } +func validateBackendRefHTTPRoute( + ref RouteBackendRef, + routeNs string, + refGrantResolver func(toResource toResource) bool, + path *field.Path, +) (valid bool, cond conditions.Condition) { + // Because all errors cause same condition but different reasons, we return as soon as we find an error + + if valid, cond := validateBackendRefHTTPRouteGroupKind(ref.BackendRef, path); !valid { + return false, cond + } + + // no need to validate ref.Name + + if ref.Namespace != nil && string(*ref.Namespace) != routeNs { + var inferencePool bool + var inferencePoolName types.NamespacedName + + switch { + case ref.Kind != nil && *ref.Kind == kinds.InferencePool: + inferencePool = true + inferencePoolName = types.NamespacedName{ + Namespace: string(*ref.Namespace), + Name: string(ref.Name), + } + case ref.IsInferencePool: + // Case where RouteBackendRef has been updated with headless Service backend for the InferencePool + inferencePool = true + inferencePoolName = types.NamespacedName{ + Namespace: string(*ref.Namespace), + Name: controller.GetInferencePoolName(string(ref.Name)), + } + default: + refNsName := types.NamespacedName{Namespace: string(*ref.Namespace), Name: string(ref.Name)} + + if !refGrantResolver(toService(refNsName)) { + msg := fmt.Sprintf("Backend ref to Service %s not permitted by any ReferenceGrant", refNsName) + valErr := field.Forbidden(path.Child("namespace"), msg) + + return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error()) + } + } + + if inferencePool { + if !refGrantResolver(toInferencePool(inferencePoolName)) { + msg := fmt.Sprintf( + "Backend ref to InferencePool %s not permitted by any ReferenceGrant", + inferencePoolName, + ) + valErr := field.Forbidden(path.Child("namespace"), msg) + return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error()) + } + } + } + + if ref.Port == nil && (ref.Kind == nil || *ref.Kind == kinds.Service) { + valErr := field.Required(path.Child("port"), "port cannot be nil") + return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error()) + } + + // any value of port is OK + + if ref.Weight != nil { + if err := validateWeight(*ref.Weight); err != nil { + valErr := field.Invalid(path.Child("weight"), *ref.Weight, err.Error()) + return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error()) + } + } + + return true, conditions.Condition{} +} + +func validateBackendRefHTTPRouteGroupKind( + ref gatewayv1.BackendRef, + path *field.Path, +) (bool, conditions.Condition) { + if ref.Group != nil { + group := *ref.Group + if group != "core" && group != "" && group != inferenceAPIGroup { + valErr := field.NotSupported(path.Child("group"), group, []string{"core", "", inferenceAPIGroup}) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + if group == inferenceAPIGroup { + if ref.Kind == nil || *ref.Kind != kinds.InferencePool { + valErr := field.Invalid( + path.Child("kind"), + ref.Kind, + fmt.Sprintf("kind must be InferencePool when group is %s", inferenceAPIGroup), + ) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + } + } + + if ref.Kind != nil { + kind := *ref.Kind + if kind != kinds.Service && kind != kinds.InferencePool { + valErr := field.NotSupported(path.Child("kind"), kind, []string{kinds.Service, kinds.InferencePool}) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + if kind == kinds.InferencePool { + if ref.Group == nil || *ref.Group != inferenceAPIGroup { + valErr := field.Invalid( + path.Child("group"), + ref.Group, + fmt.Sprintf("group must be %s when kind is InferencePool", inferenceAPIGroup), + ) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + } + } + return true, conditions.Condition{} +} + // validateRouteBackendRefAppProtocol checks if a given RouteType supports sending traffic to a service AppProtocol. // Returns nil if true or AppProtocol is not a Kubernetes Standard Application Protocol. func validateRouteBackendRefAppProtocol( diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go index 6e07bad538..b786daed9b 100644 --- a/internal/controller/state/graph/backend_refs_test.go +++ b/internal/controller/state/graph/backend_refs_test.go @@ -11,13 +11,16 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) func getNormalRef() gatewayv1.BackendRef { @@ -36,16 +39,37 @@ func getModifiedRef(mod func(ref gatewayv1.BackendRef) gatewayv1.BackendRef) gat return mod(getNormalRef()) } +func getNormalRouteBackendRef() RouteBackendRef { + return RouteBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind]("Service"), + Name: "service1", + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + Port: helpers.GetPointer[gatewayv1.PortNumber](80), + }, + Weight: helpers.GetPointer[int32](5), + }, + } +} + +func getModifiedRouteBackendRef(mod func(ref RouteBackendRef) RouteBackendRef) RouteBackendRef { + return mod(getNormalRouteBackendRef()) +} + func TestValidateRouteBackendRef(t *testing.T) { t.Parallel() + tests := []struct { + routeType RouteType expectedCondition conditions.Condition name string ref RouteBackendRef expectedValid bool }{ { - name: "normal case", + name: "normal case", + routeType: RouteTypeHTTP, ref: RouteBackendRef{ BackendRef: getNormalRef(), Filters: nil, @@ -53,7 +77,44 @@ func TestValidateRouteBackendRef(t *testing.T) { expectedValid: true, }, { - name: "filters not supported", + name: "normal case grpc", + routeType: RouteTypeGRPC, + ref: RouteBackendRef{ + BackendRef: getNormalRef(), + Filters: nil, + }, + expectedValid: true, + }, + { + name: "normal case; inferencepool backend", + routeType: RouteTypeHTTP, + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.BackendObjectReference = gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: "ipool", + } + return backend + }), + }, + expectedValid: true, + }, + { + name: "normal case; headless Service inferencepool backend", + routeType: RouteTypeHTTP, + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")) + return backend + }), + IsInferencePool: true, + }, + expectedValid: true, + }, + { + name: "filters not supported", + routeType: RouteTypeHTTP, ref: RouteBackendRef{ BackendRef: getNormalRef(), Filters: []any{ @@ -70,7 +131,8 @@ func TestValidateRouteBackendRef(t *testing.T) { ), }, { - name: "invalid base ref", + name: "invalid base ref", + routeType: RouteTypeHTTP, ref: RouteBackendRef{ BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService") @@ -79,7 +141,7 @@ func TestValidateRouteBackendRef(t *testing.T) { }, expectedValid: false, expectedCondition: conditions.NewRouteBackendRefInvalidKind( - `test.kind: Unsupported value: "NotService": supported values: "Service"`, + `test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, ), }, } @@ -90,7 +152,13 @@ func TestValidateRouteBackendRef(t *testing.T) { g := NewWithT(t) alwaysTrueRefGrantResolver := func(_ toResource) bool { return true } - valid, cond := validateRouteBackendRef(test.ref, "test", alwaysTrueRefGrantResolver, field.NewPath("test")) + valid, cond := validateRouteBackendRef( + test.routeType, + test.ref, + "test", + alwaysTrueRefGrantResolver, + field.NewPath("test"), + ) g.Expect(valid).To(Equal(test.expectedValid)) g.Expect(cond).To(Equal(test.expectedCondition)) @@ -156,7 +224,7 @@ func TestValidateBackendRef(t *testing.T) { ), }, { - name: "not a service kind", + name: "invalid kind", ref: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService") return backend @@ -218,6 +286,209 @@ func TestValidateBackendRef(t *testing.T) { } } +func TestValidateBackendRefHTTPRoute(t *testing.T) { + t.Parallel() + + alwaysFalseRefGrantResolver := func(_ toResource) bool { return false } + alwaysTrueRefGrantResolver := func(_ toResource) bool { return true } + + tests := []struct { + refGrantResolver func(resource toResource) bool + expectedCondition conditions.Condition + name string + ref RouteBackendRef + expectedValid bool + }{ + { + name: "normal case", + ref: getNormalRouteBackendRef(), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "normal case with implicit namespace", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Namespace = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "normal case with implicit kind Service", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "normal case with InferencePool", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup) + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool) + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "group is inference group but kind is not InferencePool", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup) + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.Service) + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.kind: Invalid value: "Service": kind must be InferencePool when group is inference.networking.k8s.io`, + ), + }, + { + name: "kind is InferencePool but group is not inference", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool) + backend.Group = helpers.GetPointer[gatewayv1.Group]("core") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.group: Invalid value: "core": group must be inference.networking.k8s.io when kind is InferencePool`, + ), + }, + { + name: "normal case with backend ref allowed by reference grant", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("cross-ns") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "inferencepool backend ref not allowed by reference grant", + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.BackendObjectReference = gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: "ipool", + Namespace: helpers.GetPointer[gatewayv1.Namespace]("invalid"), + } + return backend + }), + }, + refGrantResolver: alwaysFalseRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefRefNotPermitted( + "test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant", + ), + }, + { + name: "headless Service inferencepool backend ref not allowed by reference grant", + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")) + backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid") + return backend + }), + IsInferencePool: true, + }, + refGrantResolver: alwaysFalseRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefRefNotPermitted( + "test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant", + ), + }, + { + name: "invalid group", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Group = helpers.GetPointer[gatewayv1.Group]("invalid") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.group: Unsupported value: "invalid": supported values: "core", "", "inference.networking.k8s.io"`, + ), + }, + { + name: "invalid kind", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, + ), + }, + { + name: "backend ref not allowed by reference grant", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid") + return backend + }), + refGrantResolver: alwaysFalseRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefRefNotPermitted( + "test.namespace: Forbidden: Backend ref to Service invalid/service1 not permitted by any ReferenceGrant", + ), + }, + { + name: "invalid weight", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Weight = helpers.GetPointer[int32](-1) + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefUnsupportedValue( + "test.weight: Invalid value: -1: must be in the range [0, 1000000]", + ), + }, + { + name: "nil port", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Port = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefUnsupportedValue( + "test.port: Required value: port cannot be nil", + ), + }, + { + name: "nil port allowed for InferencePool kind", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool) + backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup) + backend.Port = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + valid, cond := validateBackendRefHTTPRoute(test.ref, "test", test.refGrantResolver, field.NewPath("test")) + + g.Expect(valid).To(Equal(test.expectedValid)) + g.Expect(cond).To(Equal(test.expectedCondition)) + }) + } +} + func TestValidateWeight(t *testing.T) { t.Parallel() validWeights := []int32{0, 1, 1000000} @@ -523,13 +794,21 @@ func TestAddBackendRefsToRules(t *testing.T) { Name: "svcGRPC", } + svcInferenceName := controller.CreateInferencePoolServiceName("ipool") + svcInference := getSvc(svcInferenceName) + svcInferenceNsName := types.NamespacedName{ + Namespace: "test", + Name: svcInferenceName, + } + services := map[types.NamespacedName]*v1.Service{ - {Namespace: "test", Name: "svc1"}: svc1, - {Namespace: "test", Name: "svc2"}: svc2, - {Namespace: "test", Name: "svcH2c"}: svcH2c, - {Namespace: "test", Name: "svcWS"}: svcWS, - {Namespace: "test", Name: "svcWSS"}: svcWSS, - {Namespace: "test", Name: "svcGRPC"}: svcGRPC, + svc1NsName: svc1, + svc2NsName: svc2, + svcH2cNsName: svcH2c, + svcWSNsName: svcWS, + svcWSSNsName: svcWSS, + svcGRPCNsName: svcGRPC, + svcInferenceNsName: svcInference, } emptyPolicies := map[types.NamespacedName]*BackendTLSPolicy{} @@ -892,7 +1171,7 @@ func TestAddBackendRefsToRules(t *testing.T) { }, expectedConditions: []conditions.Condition{ conditions.NewRouteBackendRefInvalidKind( - `spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service"`, + `spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, ), }, policies: emptyPolicies, @@ -938,6 +1217,31 @@ func TestAddBackendRefsToRules(t *testing.T) { expectedConditions: nil, name: "zero backendRefs", }, + { + route: func() *L7Route { + route := createRoute("hr-inference", RouteTypeHTTP, "Service", 1, svcInferenceName) + // Mark the backend ref as IsInferencePool and set the port to nil (simulate InferencePool logic) + route.Spec.Rules[0].RouteBackendRefs[0].IsInferencePool = true + route.Spec.Rules[0].RouteBackendRefs[0].Port = nil + return route + }(), + expectedBackendRefs: []BackendRef{ + { + SvcNsName: types.NamespacedName{Namespace: "test", Name: svcInferenceName}, + ServicePort: v1.ServicePort{ + Port: 80, + }, + Valid: true, + Weight: 1, + InvalidForGateways: map[types.NamespacedName]conditions.Condition{}, + IsInferencePool: true, + EndpointPickerConfig: &inference.EndpointPickerRef{}, + }, + }, + expectedConditions: nil, + policies: emptyPolicies, + name: "headless Service for InferencePool gets port set correctly", + }, } for _, test := range tests { @@ -946,7 +1250,22 @@ func TestAddBackendRefsToRules(t *testing.T) { g := NewWithT(t) resolver := newReferenceGrantResolver(nil) - addBackendRefsToRules(test.route, resolver, services, test.policies) + + referencedInferencePools := map[types.NamespacedName]*ReferencedInferencePool{ + {Namespace: "test", Name: "ipool"}: { + Source: &inference.InferencePool{ + Spec: inference.InferencePoolSpec{ + TargetPorts: []inference.Port{ + { + Number: 80, + }, + }, + }, + }, + }, + } + + addBackendRefsToRules(test.route, resolver, services, referencedInferencePools, test.policies) var actual []BackendRef if test.route.Spec.Rules != nil { @@ -1169,7 +1488,7 @@ func TestCreateBackend(t *testing.T) { expectedServicePortReference: "", expectedConditions: []conditions.Condition{ conditions.NewRouteBackendRefInvalidKind( - `test.kind: Unsupported value: "NotService": supported values: "Service"`, + `test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, ), }, name: "invalid kind", @@ -1403,11 +1722,13 @@ func TestCreateBackend(t *testing.T) { g := NewWithT(t) rbr := RouteBackendRef{ - nil, - test.ref.BackendRef, - []any{}, + MirrorBackendIdx: nil, + IsInferencePool: false, + BackendRef: test.ref.BackendRef, + Filters: []any{}, } route := &L7Route{ + RouteType: RouteTypeHTTP, Source: &gatewayv1.HTTPRoute{ ObjectMeta: metav1.ObjectMeta{ Namespace: "test", @@ -1467,12 +1788,14 @@ func TestCreateBackend(t *testing.T) { // test mirror backend case g := NewWithT(t) ref := RouteBackendRef{ - helpers.GetPointer(0), // mirrorFilterIdx - getNormalRef(), - []any{}, + MirrorBackendIdx: helpers.GetPointer(0), + IsInferencePool: false, + BackendRef: getNormalRef(), + Filters: []any{}, } route := &L7Route{ + RouteType: RouteTypeHTTP, Source: &gatewayv1.HTTPRoute{ ObjectMeta: metav1.ObjectMeta{ Namespace: "test", diff --git a/internal/controller/state/graph/graph.go b/internal/controller/state/graph/graph.go index e556c798ba..538a29a09d 100644 --- a/internal/controller/state/graph/graph.go +++ b/internal/controller/state/graph/graph.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -40,6 +41,7 @@ type ClusterState struct { GRPCRoutes map[types.NamespacedName]*gatewayv1.GRPCRoute NGFPolicies map[PolicyKey]policies.Policy SnippetsFilters map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter + InferencePools map[types.NamespacedName]*inference.InferencePool } // Graph is a Graph-like representation of Gateway API resources. @@ -65,6 +67,9 @@ type Graph struct { ReferencedNamespaces map[types.NamespacedName]*v1.Namespace // ReferencedServices includes the NamespacedNames of all the Services that are referenced by at least one Route. ReferencedServices map[types.NamespacedName]*ReferencedService + // ReferencedInferencePools includes the NamespacedNames of all the InferencePools + // that are referenced by at least one Route. + ReferencedInferencePools map[types.NamespacedName]*ReferencedInferencePool // ReferencedCaCertConfigMaps includes ConfigMaps that have been referenced by any BackendTLSPolicies. ReferencedCaCertConfigMaps map[types.NamespacedName]*CaCertConfigMap // ReferencedNginxProxies includes NginxProxies that have been referenced by a GatewayClass or a Gateway. @@ -115,11 +120,15 @@ func (g *Graph) IsReferenced(resourceType ngftypes.ObjectType, nsname types.Name _, existed := g.ReferencedNamespaces[nsname] exists := isNamespaceReferenced(obj, g.Gateways) return existed || exists - // Service reference exists if at least one HTTPRoute references it. + // Service reference exists if at least one Route references it. case *v1.Service: _, exists := g.ReferencedServices[nsname] return exists - // EndpointSlice reference exists if its Service owner is referenced by at least one HTTPRoute. + // InferencePool reference exists if at least one Route references it. + case *inference.InferencePool: + _, exists := g.ReferencedInferencePools[nsname] + return exists + // EndpointSlice reference exists if its Service owner is referenced by at least one Route. case *discoveryV1.EndpointSlice: svcName := index.GetServiceNameFromEndpointSlice(obj) @@ -249,8 +258,11 @@ func BuildGraph( state.GRPCRoutes, gws, processedSnippetsFilters, + state.InferencePools, ) + referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools, state.Services) + l4routes := buildL4RoutesForGateways( state.TLSRoutes, state.Services, @@ -262,6 +274,7 @@ func BuildGraph( routes, refGrantResolver, state.Services, + referencedInferencePools, processedBackendTLSPolicies, ) bindRoutesToListeners(routes, l4routes, gws, state.Namespaces) @@ -295,6 +308,7 @@ func BuildGraph( ReferencedSecrets: secretResolver.getResolvedSecrets(), ReferencedNamespaces: referencedNamespaces, ReferencedServices: referencedServices, + ReferencedInferencePools: referencedInferencePools, ReferencedCaCertConfigMaps: configMapResolver.getResolvedConfigMaps(), ReferencedNginxProxies: processedNginxProxies, BackendTLSPolicies: processedBackendTLSPolicies, diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go index ac5cfff3a2..a49202d96e 100644 --- a/internal/controller/state/graph/graph_test.go +++ b/internal/controller/state/graph/graph_test.go @@ -13,6 +13,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -25,6 +26,7 @@ import ( "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation/validationfakes" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller/index" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" @@ -214,6 +216,49 @@ func TestBuildGraph(t *testing.T) { return rule } + createValidRuleWithInferencePoolBackendRef := func(matches []gatewayv1.HTTPRouteMatch) RouteRule { + refs := []BackendRef{ + { + SvcNsName: types.NamespacedName{ + Namespace: testNs, + Name: controller.CreateInferencePoolServiceName("ipool"), + }, + ServicePort: v1.ServicePort{Port: 80}, + Valid: true, + Weight: 1, + InvalidForGateways: map[types.NamespacedName]conditions.Condition{}, + IsInferencePool: true, + EndpointPickerConfig: &inference.EndpointPickerRef{ + Kind: kinds.Service, + Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")), + }, + }, + } + rbrs := []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](""), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")), + Namespace: helpers.GetPointer(gatewayv1.Namespace(testNs)), + }, + }, + }, + } + return RouteRule{ + ValidMatches: true, + Filters: RouteRuleFilters{ + Filters: []Filter{}, + Valid: true, + }, + BackendRefs: refs, + Matches: matches, + RouteBackendRefs: rbrs, + } + } + routeMatches := []gatewayv1.HTTPRouteMatch{ { Path: &gatewayv1.HTTPPathMatch{ @@ -338,6 +383,36 @@ func TestBuildGraph(t *testing.T) { }, } + inferencePool := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNs, + Name: "ipool", + }, + Spec: inference.InferencePoolSpec{ + TargetPorts: []inference.Port{ + {Number: 80}, + }, + EndpointPickerRef: inference.EndpointPickerRef{ + Kind: kinds.Service, + Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")), + }, + }, + } + + ir := createRoute("ir", "gateway-1", "listener-80-1") + ir.Spec.Hostnames = []gatewayv1.Hostname{"inference.example.com"} + // Update the backend ref to point to the InferencePool instead of a Service + ir.Spec.Rules[0].BackendRefs[0] = gatewayv1.HTTPBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Name: gatewayv1.ObjectName(inferencePool.Name), + Namespace: helpers.GetPointer(gatewayv1.Namespace(inferencePool.Namespace)), + }, + }, + } + secret := &v1.Secret{ TypeMeta: metav1.TypeMeta{ Kind: "Secret", @@ -489,7 +564,20 @@ func TestBuildGraph(t *testing.T) { svc1 := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ - Namespace: "test", Name: "foo2", + Namespace: testNs, Name: "foo2", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Port: 80, + }, + }, + }, + } + + inferenceSvc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNs, Name: controller.CreateInferencePoolServiceName(inferencePool.Name), }, Spec: v1.ServiceSpec{ Ports: []v1.ServicePort{ @@ -691,6 +779,7 @@ func TestBuildGraph(t *testing.T) { client.ObjectKeyFromObject(hr1): hr1, client.ObjectKeyFromObject(hr2): hr2, client.ObjectKeyFromObject(hr3): hr3, + client.ObjectKeyFromObject(ir): ir, }, TLSRoutes: map[types.NamespacedName]*v1alpha2.TLSRoute{ client.ObjectKeyFromObject(tr): tr, @@ -700,8 +789,12 @@ func TestBuildGraph(t *testing.T) { client.ObjectKeyFromObject(gr): gr, }, Services: map[types.NamespacedName]*v1.Service{ - client.ObjectKeyFromObject(svc): svc, - client.ObjectKeyFromObject(svc1): svc1, + client.ObjectKeyFromObject(svc): svc, + client.ObjectKeyFromObject(svc1): svc1, + client.ObjectKeyFromObject(inferenceSvc): inferenceSvc, + }, + InferencePools: map[types.NamespacedName]*inference.InferencePool{ + client.ObjectKeyFromObject(inferencePool): inferencePool, }, Namespaces: map[types.NamespacedName]*v1.Namespace{ client.ObjectKeyFromObject(ns): ns, @@ -992,6 +1085,37 @@ func TestBuildGraph(t *testing.T) { }, } + inferenceRoute := &L7Route{ + RouteType: RouteTypeHTTP, + Valid: true, + Attachable: true, + Source: ir, + ParentRefs: []ParentRef{ + { + Idx: 0, + Gateway: &ParentRefGateway{ + NamespacedName: client.ObjectKeyFromObject(gw1.Source), + EffectiveNginxProxy: np1Effective, + }, + SectionName: ir.Spec.ParentRefs[0].SectionName, + Attachment: &ParentRefAttachmentStatus{ + Attached: true, + AcceptedHostnames: map[string][]string{ + CreateGatewayListenerKey( + client.ObjectKeyFromObject(gw1.Source), + "listener-80-1", + ): {"inference.example.com"}, + }, + ListenerPort: 80, + }, + }, + }, + Spec: L7RouteSpec{ + Hostnames: ir.Spec.Hostnames, + Rules: []RouteRule{createValidRuleWithInferencePoolBackendRef(routeMatches)}, + }, + } + supportedKindsForListeners := []gatewayv1.RouteGroupKind{ {Kind: gatewayv1.Kind(kinds.HTTPRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)}, {Kind: gatewayv1.Kind(kinds.GRPCRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)}, @@ -1021,6 +1145,7 @@ func TestBuildGraph(t *testing.T) { Routes: map[RouteKey]*L7Route{ CreateRouteKey(hr1): routeHR1, CreateRouteKey(gr): routeGR, + CreateRouteKey(ir): inferenceRoute, }, SupportedKinds: supportedKindsForListeners, L4Routes: map[L4RouteKey]*L4Route{}, @@ -1175,6 +1300,7 @@ func TestBuildGraph(t *testing.T) { CreateRouteKey(hr1): routeHR1, CreateRouteKey(hr3): routeHR3, CreateRouteKey(gr): routeGR, + CreateRouteKey(ir): inferenceRoute, }, L4Routes: map[L4RouteKey]*L4Route{ CreateRouteKeyL4(tr): routeTR, @@ -1199,6 +1325,21 @@ func TestBuildGraph(t *testing.T) { client.ObjectKeyFromObject(svc1): { GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}}, }, + client.ObjectKeyFromObject(inferenceSvc): { + GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}}, + }, + }, + ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{ + client.ObjectKeyFromObject(inferencePool): { + Source: inferencePool, + Gateways: []*gatewayv1.Gateway{ + gw1.Source, + }, + HTTPRoutes: []*L7Route{ + inferenceRoute, + }, + Conditions: []conditions.Condition{}, + }, }, ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{ client.ObjectKeyFromObject(cm): { @@ -1382,6 +1523,20 @@ func TestIsReferenced(t *testing.T) { } emptyService := &v1.Service{} + inferenceInGraph := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: "inferenceInGraph", + }, + } + inferenceNotInGraph := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: "inferenceNotInGraph", + }, + } + emptyInferencePool := &inference.InferencePool{} + createEndpointSlice := func(name string, svcName string) *discoveryV1.EndpointSlice { return &discoveryV1.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -1461,6 +1616,9 @@ func TestIsReferenced(t *testing.T) { ReferencedServices: map[types.NamespacedName]*ReferencedService{ client.ObjectKeyFromObject(serviceInGraph): {}, }, + ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{ + client.ObjectKeyFromObject(inferenceInGraph): {}, + }, ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{ client.ObjectKeyFromObject(baseConfigMap): { Source: baseConfigMap, @@ -1562,6 +1720,26 @@ func TestIsReferenced(t *testing.T) { expected: false, }, + // InferencePool tests + { + name: "InferencePool is referenced", + resource: inferenceInGraph, + graph: graph, + expected: true, + }, + { + name: "InferencePool is not referenced", + resource: inferenceNotInGraph, + graph: graph, + expected: false, + }, + { + name: "Empty InferencePool", + resource: emptyInferencePool, + graph: graph, + expected: false, + }, + // EndpointSlice tests { name: "EndpointSlice with Service owner in graph's ReferencedServices is referenced", diff --git a/internal/controller/state/graph/grpcroute_test.go b/internal/controller/state/graph/grpcroute_test.go index 8579c54627..71f87d58c0 100644 --- a/internal/controller/state/graph/grpcroute_test.go +++ b/internal/controller/state/graph/grpcroute_test.go @@ -230,6 +230,7 @@ func TestBuildGRPCRoutes(t *testing.T) { grRoutes, test.gateways, snippetsFilters, + nil, ) g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty()) }) diff --git a/internal/controller/state/graph/httproute.go b/internal/controller/state/graph/httproute.go index 48415d0573..ed8d46a664 100644 --- a/internal/controller/state/graph/httproute.go +++ b/internal/controller/state/graph/httproute.go @@ -7,13 +7,16 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/mirror" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) var ( @@ -27,6 +30,7 @@ func buildHTTPRoute( ghr *v1.HTTPRoute, gws map[types.NamespacedName]*Gateway, snippetsFilters map[types.NamespacedName]*SnippetsFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) *L7Route { r := &L7Route{ Source: ghr, @@ -59,9 +63,10 @@ func buildHTTPRoute( r.Attachable = true rules, valid, conds := processHTTPRouteRules( - ghr.Spec.Rules, + ghr, validator, getSnippetsFilterResolverForNamespace(snippetsFilters, r.Source.GetNamespace()), + inferencePools, ) r.Spec.Rules = rules @@ -113,6 +118,7 @@ func buildHTTPMirrorRoutes( tmpMirrorRoute, gateways, snippetsFilters, + nil, ) if mirrorRoute != nil { @@ -163,9 +169,11 @@ func removeHTTPMirrorFilters(filters []v1.HTTPRouteFilter) []v1.HTTPRouteFilter func processHTTPRouteRule( specRule v1.HTTPRouteRule, + routeNamespace string, rulePath *field.Path, validator validation.HTTPFieldsValidator, resolveExtRefFunc resolveExtRefFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) (RouteRule, routeRuleErrors) { var errors routeRuleErrors @@ -201,10 +209,43 @@ func processHTTPRouteRule( interfaceFilters = append(interfaceFilters, filter) } } + rbr := RouteBackendRef{ BackendRef: b.BackendRef, - Filters: interfaceFilters, } + + // If route specifies an InferencePool backend, we need to convert it to its associated + // headless Service backend (that we created), so nginx config can be built properly. + // Only do this if the InferencePool actually exists. + if inferencePoolBackend(b, routeNamespace, inferencePools) { + // We don't support traffic splitting at the Route level for + // InferencePool backends, so if there's more than one backendRef, and one of them + // is an InferencePool, we mark the rule as invalid. + if len(specRule.BackendRefs) > 1 { + err := field.Forbidden( + rulePath.Child("backendRefs"), + "cannot use InferencePool backend when multiple backendRefs are specified in a single rule", + ) + errors.invalid = append(errors.invalid, err) + break + } + + svcName := controller.CreateInferencePoolServiceName(string(b.Name)) + rbr = RouteBackendRef{ + IsInferencePool: true, + BackendRef: v1.BackendRef{ + BackendObjectReference: v1.BackendObjectReference{ + Group: helpers.GetPointer[v1.Group](""), + Kind: helpers.GetPointer[v1.Kind](kinds.Service), + Name: v1.ObjectName(svcName), + Namespace: b.Namespace, + }, + Weight: b.Weight, + }, + } + } + + rbr.Filters = interfaceFilters backendRefs = append(backendRefs, rbr) } @@ -233,25 +274,28 @@ func processHTTPRouteRule( } func processHTTPRouteRules( - specRules []v1.HTTPRouteRule, + route *v1.HTTPRoute, validator validation.HTTPFieldsValidator, resolveExtRefFunc resolveExtRefFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) (rules []RouteRule, valid bool, conds []conditions.Condition) { - rules = make([]RouteRule, len(specRules)) + rules = make([]RouteRule, len(route.Spec.Rules)) var ( allRulesErrors routeRuleErrors atLeastOneValid bool ) - for i, rule := range specRules { + for i, rule := range route.Spec.Rules { rulePath := field.NewPath("spec").Child("rules").Index(i) rr, errors := processHTTPRouteRule( rule, + route.GetNamespace(), rulePath, validator, resolveExtRefFunc, + inferencePools, ) if rr.ValidMatches && rr.Filters.Valid { @@ -288,6 +332,32 @@ func processHTTPRouteRules( return rules, valid, conds } +// inferencePoolBackend returns if a Route references an InferencePool backend +// and that InferencePool exists. +func inferencePoolBackend( + backendRef v1.HTTPBackendRef, + routeNamespace string, + inferencePools map[types.NamespacedName]*inference.InferencePool, +) bool { + if backendRef.Group != nil && + *backendRef.Group == inferenceAPIGroup && + *backendRef.Kind == kinds.InferencePool { + namespace := routeNamespace + if backendRef.Namespace != nil { + namespace = string(*backendRef.Namespace) + } + key := types.NamespacedName{ + Name: string(backendRef.Name), + Namespace: namespace, + } + if _, exists := inferencePools[key]; exists { + return true + } + } + + return false +} + func validateMatch( validator validation.HTTPFieldsValidator, match v1.HTTPRouteMatch, diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go index 3b90b0970f..d6d77c7296 100644 --- a/internal/controller/state/graph/httproute_test.go +++ b/internal/controller/state/graph/httproute_test.go @@ -9,6 +9,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1" @@ -117,6 +118,17 @@ var expRouteBackendRef = RouteBackendRef{ }, } +func createInferencePoolBackend(name, namespace string) gatewayv1.BackendRef { + return gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: gatewayv1.ObjectName(name), + Namespace: helpers.GetPointer(gatewayv1.Namespace(namespace)), + }, + } +} + func TestBuildHTTPRoutes(t *testing.T) { t.Parallel() @@ -263,6 +275,7 @@ func TestBuildHTTPRoutes(t *testing.T) { map[types.NamespacedName]*gatewayv1.GRPCRoute{}, test.gateways, snippetsFilters, + nil, ) g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty()) }) @@ -377,6 +390,21 @@ func TestBuildHTTPRoute(t *testing.T) { addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", invalidSnippetsFilterExtRef) addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", unresolvableSnippetsFilterExtRef) + // routes with an inference pool backend + hrInferencePool := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/") + hrInferencePool.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{ + { + BackendRef: createInferencePoolBackend("ipool", gatewayNsName.Namespace), + }, + } + // route with an inference pool backend that does not exist + hrInferencePoolDoesNotExist := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/") + hrInferencePoolDoesNotExist.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{ + { + BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace), + }, + } + validatorInvalidFieldsInRule := &validationfakes.FakeHTTPFieldsValidator{ ValidatePathInMatchStub: func(path string) error { if path == invalidPath { @@ -943,6 +971,86 @@ func TestBuildHTTPRoute(t *testing.T) { }, name: "rule with one invalid and one unresolvable snippets filter extension ref filter", }, + { + validator: &validationfakes.FakeHTTPFieldsValidator{}, + hr: hrInferencePool, + expected: &L7Route{ + RouteType: RouteTypeHTTP, + Source: hrInferencePool, + ParentRefs: []ParentRef{ + { + Idx: 0, + Gateway: CreateParentRefGateway(gw), + SectionName: hrInferencePool.Spec.ParentRefs[0].SectionName, + }, + }, + Valid: true, + Attachable: true, + Spec: L7RouteSpec{ + Hostnames: hrInferencePool.Spec.Hostnames, + Rules: []RouteRule{ + { + ValidMatches: true, + Filters: RouteRuleFilters{ + Valid: true, + Filters: []Filter{}, + }, + Matches: hrInferencePool.Spec.Rules[0].Matches, + RouteBackendRefs: []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](""), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: "ipool-pool-svc", + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + }, + }, + }, + }, + }, + }, + }, + }, + name: "route with an inference pool backend gets converted to service", + }, + { + validator: &validationfakes.FakeHTTPFieldsValidator{}, + hr: hrInferencePoolDoesNotExist, + expected: &L7Route{ + RouteType: RouteTypeHTTP, + Source: hrInferencePoolDoesNotExist, + ParentRefs: []ParentRef{ + { + Idx: 0, + Gateway: CreateParentRefGateway(gw), + SectionName: hrInferencePoolDoesNotExist.Spec.ParentRefs[0].SectionName, + }, + }, + Valid: true, + Attachable: true, + Spec: L7RouteSpec{ + Hostnames: hrInferencePoolDoesNotExist.Spec.Hostnames, + Rules: []RouteRule{ + { + ValidMatches: true, + Filters: RouteRuleFilters{ + Valid: true, + Filters: []Filter{}, + }, + Matches: hrInferencePoolDoesNotExist.Spec.Rules[0].Matches, + RouteBackendRefs: []RouteBackendRef{ + { + BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace), + }, + }, + }, + }, + }, + }, + name: "route with an inference pool backend that doesn't exist", + }, } gws := map[types.NamespacedName]*Gateway{ @@ -957,8 +1065,11 @@ func TestBuildHTTPRoute(t *testing.T) { snippetsFilters := map[types.NamespacedName]*SnippetsFilter{ {Namespace: "test", Name: "sf"}: {Valid: true}, } + inferencePools := map[types.NamespacedName]*inference.InferencePool{ + {Namespace: "test", Name: "ipool"}: {}, + } - route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters) + route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters, inferencePools) g.Expect(helpers.Diff(test.expected, route)).To(BeEmpty()) }) } @@ -1090,7 +1201,7 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) { g := NewWithT(t) routes := map[RouteKey]*L7Route{} - l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters) + l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters, nil) g.Expect(l7route).NotTo(BeNil()) buildHTTPMirrorRoutes(routes, l7route, hr, gateways, snippetsFilters) @@ -1102,6 +1213,67 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) { g.Expect(helpers.Diff(expectedMirrorRoute, routes[mirrorRouteKey])).To(BeEmpty()) } +func TestProcessHTTPRouteRule_InferencePoolWithMultipleBackendRefs(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + validator := &validationfakes.FakeHTTPFieldsValidator{} + inferencePoolName := "ipool" + routeNamespace := "test" + inferencePools := map[types.NamespacedName]*inference.InferencePool{ + {Namespace: routeNamespace, Name: inferencePoolName}: {}, + } + + // BackendRef 1: InferencePool + backendRef1 := gatewayv1.HTTPBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: gatewayv1.ObjectName(inferencePoolName), + Namespace: helpers.GetPointer(gatewayv1.Namespace(routeNamespace)), + }, + }, + } + // BackendRef 2: Service + backendRef2 := gatewayv1.HTTPBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: "backend", + }, + }, + } + + specRule := gatewayv1.HTTPRouteRule{ + Matches: []gatewayv1.HTTPRouteMatch{ + { + Path: &gatewayv1.HTTPPathMatch{ + Type: helpers.GetPointer(gatewayv1.PathMatchPathPrefix), + Value: helpers.GetPointer("/"), + }, + }, + }, + BackendRefs: []gatewayv1.HTTPBackendRef{backendRef1, backendRef2}, + } + + rulePath := field.NewPath("spec").Child("rules").Index(0) + + routeRule, errs := processHTTPRouteRule( + specRule, + routeNamespace, + rulePath, + validator, + nil, + inferencePools, + ) + + g.Expect(routeRule.RouteBackendRefs).To(BeEmpty()) + g.Expect(errs.invalid).To(HaveLen(1)) + errMsg := "cannot use InferencePool backend when multiple backendRefs are specified in a single rule" + g.Expect(errs.invalid[0].Error()).To(ContainSubstring(errMsg)) +} + func TestValidateMatch(t *testing.T) { t.Parallel() createAllValidValidator := func() *validationfakes.FakeHTTPFieldsValidator { diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go new file mode 100644 index 0000000000..7bf57a9f3d --- /dev/null +++ b/internal/controller/state/graph/inferencepools.go @@ -0,0 +1,180 @@ +package graph + +import ( + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" + apiv1 "sigs.k8s.io/gateway-api/apis/v1" + + "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" +) + +// A ReferencedInferencePool represents an InferencePool that is referenced by a Route and the +// Gateways it belongs to. +type ReferencedInferencePool struct { + // Source is the original InferencePool that this ReferencedInferencePool is based on. + Source *inference.InferencePool + // Gateways are the Gateways that this ReferencedInferencePool is attached to. + Gateways []*apiv1.Gateway + // HTTPRoutes are the HTTPRoutes that reference this InferencePool. + HTTPRoutes []*L7Route + // Conditions contains the conditions that should be applied to the InferencePool. + Conditions []conditions.Condition + // Valid indicates whether the InferencePool is valid or not. + Valid bool +} + +// buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes +// per Gateway that we process. +func buildReferencedInferencePools( + routes map[RouteKey]*L7Route, + gws map[types.NamespacedName]*Gateway, + inferencePools map[types.NamespacedName]*inference.InferencePool, + services map[types.NamespacedName]*v1.Service, +) map[types.NamespacedName]*ReferencedInferencePool { + referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool, len(inferencePools)) + + for _, gw := range gws { + if gw == nil { + continue + } + + processInferencePoolsForGateway(routes, gw, referencedInferencePools, inferencePools) + } + + if len(referencedInferencePools) == 0 { + return nil + } + + // validate each referenced InferencePool and add conditions. + for _, refPool := range referencedInferencePools { + if routeCond := validateInferencePoolRoutesAcceptance(refPool.Source, refPool.HTTPRoutes); routeCond != nil { + refPool.Conditions = append(refPool.Conditions, *routeCond) + } + + if extensionRefCond := validateInferencePoolExtensionRef(refPool.Source, services); extensionRefCond != nil { + refPool.Conditions = append(refPool.Conditions, *extensionRefCond) + } + + refPool.Valid = len(refPool.Conditions) == 0 + } + + return referencedInferencePools +} + +// processInferencePoolsForGateway processes all InferencePools that belong to the given gateway. +func processInferencePoolsForGateway( + routes map[RouteKey]*L7Route, + gw *Gateway, + referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool, + inferencePools map[types.NamespacedName]*inference.InferencePool, +) { + gwKey := client.ObjectKeyFromObject(gw.Source) + + for _, route := range routes { + if !routeBelongsToGateway(route.ParentRefs, gwKey) { + continue + } + + for _, rule := range route.Spec.Rules { + for _, ref := range rule.RouteBackendRefs { + if !ref.IsInferencePool && (ref.Kind == nil || *ref.Kind != kinds.InferencePool) { + continue + } + + namespace := route.Source.GetNamespace() + if ref.Namespace != nil { + namespace = string(*ref.Namespace) + } + + poolName := types.NamespacedName{ + Name: controller.GetInferencePoolName(string(ref.Name)), + Namespace: namespace, + } + + if _, referenced := referencedInferencePools[poolName]; !referenced { + referencedInferencePools[poolName] = &ReferencedInferencePool{ + Conditions: make([]conditions.Condition, 0, 2), + Gateways: make([]*apiv1.Gateway, 0), + HTTPRoutes: make([]*L7Route, 0), + } + } + + if pool, exists := inferencePools[poolName]; exists { + referencedInferencePools[poolName].Source = pool + referencedInferencePools[poolName].Gateways = append( + referencedInferencePools[poolName].Gateways, + gw.Source, + ) + referencedInferencePools[poolName].HTTPRoutes = append( + referencedInferencePools[poolName].HTTPRoutes, + route, + ) + } + } + } + } +} + +// validateInferencePoolExtensionRef validates the ExtensionRef of the InferencePool. +func validateInferencePoolExtensionRef( + ip *inference.InferencePool, + svc map[types.NamespacedName]*v1.Service, +) *conditions.Condition { + var failingCond conditions.Condition + if ip == nil { + return nil + } + + // if kind is empty, it defaults to Service + kind := string(ip.Spec.EndpointPickerRef.Kind) + if kind == "" { + kind = kinds.Service + } + + if kind != kinds.Service { + failingCond = conditions.NewInferencePoolInvalidExtensionref("Invalid ExtensionRef kind: " + kind) + return &failingCond + } + + eppNsName := types.NamespacedName{ + Name: string(ip.Spec.EndpointPickerRef.Name), + Namespace: ip.GetNamespace(), + } + + if _, ok := svc[eppNsName]; !ok { + failingCond = conditions.NewInferencePoolInvalidExtensionref("ExtensionRef Service not found: " + eppNsName.String()) + return &failingCond + } + + return nil +} + +// validateInferencePoolRoutesAcceptance checks if the routes that reference the InferencePool +// are accepted by the Gateway. +func validateInferencePoolRoutesAcceptance(ip *inference.InferencePool, routes []*L7Route) *conditions.Condition { + if ip == nil || len(routes) == 0 { + return nil + } + + // we do not need to validate that the route belongs to the gateway or not + // we only process routes that belong to the gateway in the first place + for _, route := range routes { + if !route.Valid { + cond := conditions.NewInferencePoolInvalidHTTPRouteNotAccepted( + fmt.Sprintf("Referenced HTTPRoute %s/%s is not accepted by the Gateway", + route.Source.GetNamespace(), + route.Source.GetName(), + ), + ) + return &cond + } + } + + return nil +} diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go new file mode 100644 index 0000000000..f6ea66215a --- /dev/null +++ b/internal/controller/state/graph/inferencepools_test.go @@ -0,0 +1,574 @@ +package graph + +import ( + "testing" + + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" + + "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" +) + +func TestBuildReferencedInferencePools(t *testing.T) { + t.Parallel() + + gwNsName := types.NamespacedName{Namespace: "test", Name: "gwNsname"} + gws := map[types.NamespacedName]*Gateway{ + gwNsName: { + Source: &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: gwNsName.Namespace, + Name: gwNsName.Name, + }, + }, + }, + } + + getNormalRoute := func() *L7Route { + return &L7Route{ + Source: &gatewayv1.HTTPRoute{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "valid-route", + }, + }, + ParentRefs: []ParentRef{ + { + Gateway: &ParentRefGateway{NamespacedName: gwNsName}, + }, + }, + Valid: true, + Spec: L7RouteSpec{ + Rules: []RouteRule{ + { + RouteBackendRefs: []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + Name: "pool", + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + }, + }, + }, + }, + }, + }, + }, + } + } + + getModifiedRoute := func(mod func(route *L7Route) *L7Route) *L7Route { + return mod(getNormalRoute()) + } + + validRoute := getNormalRoute() + + endpointPickerConfig := inference.EndpointPickerRef{ + Kind: "Service", + Name: "valid-svc", + } + + validSvcMap := map[types.NamespacedName]*v1.Service{ + {Name: "valid-svc", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-svc", + Namespace: "test", + }, + }, + {Name: "regular-svc", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{ + Name: "regular-svc", + Namespace: "test", + }, + }, + } + + modifiedRouteWithServiceBackend := getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs, + RouteBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: "regular-svc", + }, + }, + }, + ) + return route + }) + + routeWithInferencePoolHeadlessSvcBackend := getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules = []RouteRule{ + { + RouteBackendRefs: []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")), + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + }, + }, + }, + }, + }, + } + return route + }) + + routeWithNoNamespaceBackend := getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil + return route + }) + + invalidRoute := getModifiedRoute(func(route *L7Route) *L7Route { + route.Valid = false + return route + }) + + tests := []struct { + routes map[RouteKey]*L7Route + gws map[types.NamespacedName]*Gateway + services map[types.NamespacedName]*v1.Service + inferencePools map[types.NamespacedName]*inference.InferencePool + expPools map[types.NamespacedName]*ReferencedInferencePool + name string + }{ + { + name: "no gateways", + gws: nil, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): validRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: nil, + }, + { + name: "valid route with referenced inferencepool", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): validRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + }, + services: validSvcMap, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + Gateways: []*gatewayv1.Gateway{ + gws[gwNsName].Source, + }, + HTTPRoutes: []*L7Route{ + validRoute, + }, + Conditions: []conditions.Condition{}, + }, + }, + }, + { + name: "route with service backend", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules = []RouteRule{ + { + RouteBackendRefs: []RouteBackendRef{ + { + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + }, + }, + }, + }, + }, + } + return route + }), + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: nil, + }, + { + name: "route with both inferencepool and service backends", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): modifiedRouteWithServiceBackend, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + }, + services: validSvcMap, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + Gateways: []*gatewayv1.Gateway{ + gws[gwNsName].Source, + }, + HTTPRoutes: []*L7Route{ + modifiedRouteWithServiceBackend, + }, + Conditions: []conditions.Condition{}, + }, + }, + }, + { + name: "route with headless InferencePool Service backend", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): routeWithInferencePoolHeadlessSvcBackend, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + }, + services: validSvcMap, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + Gateways: []*gatewayv1.Gateway{ + gws[gwNsName].Source, + }, + HTTPRoutes: []*L7Route{ + routeWithInferencePoolHeadlessSvcBackend, + }, + Conditions: []conditions.Condition{}, + }, + }, + }, + { + name: "inferencepool backend with no namespace uses route namespace", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): routeWithNoNamespaceBackend, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + }, + services: validSvcMap, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: endpointPickerConfig, + }, + }, + Gateways: []*gatewayv1.Gateway{ + gws[gwNsName].Source, + }, + HTTPRoutes: []*L7Route{ + routeWithNoNamespaceBackend, + }, + Conditions: []conditions.Condition{}, + }, + }, + }, + { + name: "referenced inferencepool does not exist", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): validRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{}, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: nil, + Gateways: []*gatewayv1.Gateway{}, + HTTPRoutes: []*L7Route{}, + Conditions: []conditions.Condition{}, + }, + }, + }, + { + name: "inferencepool references invalid extensionRef and has invalid route", + gws: gws, + services: validSvcMap, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(invalidRoute.Source): invalidRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: inference.EndpointPickerRef{ + Kind: "Service", + Name: "invalid-extension-ref", + }, + }, + }, + }, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: inference.EndpointPickerRef{ + Kind: "Service", + Name: "invalid-extension-ref", + }, + }, + }, + Gateways: []*gatewayv1.Gateway{ + gws[gwNsName].Source, + }, + HTTPRoutes: []*L7Route{ + invalidRoute, + }, + Conditions: []conditions.Condition{ + conditions.NewInferencePoolInvalidHTTPRouteNotAccepted( + "Referenced HTTPRoute test/valid-route is not accepted by the Gateway", + ), + conditions.NewInferencePoolInvalidExtensionref( + "ExtensionRef Service not found: test/invalid-extension-ref", + ), + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools, test.services) + + g.Expect(helpers.Diff(test.expPools, pools)).To(BeEmpty()) + }) + } +} + +func TestValidateInferencePoolExtensionRef(t *testing.T) { + t.Parallel() + + tests := []struct { + pool *inference.InferencePool + services map[types.NamespacedName]*v1.Service + expCond *conditions.Condition + name string + }{ + { + name: "inference pool has a valid extensionRef", + pool: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "pool", + }, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: inference.EndpointPickerRef{ + Kind: "Service", + Name: "valid-svc", + }, + }, + }, + services: map[types.NamespacedName]*v1.Service{ + {Name: "valid-svc", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-svc", + Namespace: "test", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Port: 80, + }, + }, + }, + }, + }, + expCond: nil, + }, + { + name: "inference pool references a non-existent service", + pool: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "pool", + }, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: inference.EndpointPickerRef{ + Kind: "Service", + Name: "does-not-exist", + }, + }, + }, + services: map[types.NamespacedName]*v1.Service{}, + expCond: helpers.GetPointer( + conditions.NewInferencePoolInvalidExtensionref("ExtensionRef Service not found: test/does-not-exist"), + ), + }, + { + name: "inference pool references an extensionRef that is not a service", + pool: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "pool", + }, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: inference.EndpointPickerRef{ + Kind: "Invalid-Kind", + Name: "svc", + }, + }, + }, + services: map[types.NamespacedName]*v1.Service{ + {Name: "svc", Namespace: "test"}: { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc", + Namespace: "test", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Port: 80, + }, + }, + }, + }, + }, + expCond: helpers.GetPointer( + conditions.NewInferencePoolInvalidExtensionref("Invalid ExtensionRef kind: Invalid-Kind"), + ), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + conds := validateInferencePoolExtensionRef(test.pool, test.services) + g.Expect(conds).To(Equal(test.expCond)) + }) + } +} + +func TestValidateInferencePoolRoutesAcceptance(t *testing.T) { + t.Parallel() + + tests := []struct { + pool *inference.InferencePool + expCond *conditions.Condition + name string + routes []*L7Route + }{ + { + name: "no routes referencing the pool", + pool: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "pool", + }, + }, + routes: []*L7Route{}, + expCond: nil, + }, + { + name: "one valid route referencing the pool", + pool: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "pool", + }, + }, + routes: []*L7Route{ + { + Valid: true, + Source: &gatewayv1.HTTPRoute{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "valid-route", + }, + }, + }, + }, + expCond: nil, + }, + { + name: "one invalid route referencing the pool", + pool: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "pool", + }, + }, + routes: []*L7Route{ + { + Valid: false, + Source: &gatewayv1.HTTPRoute{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "invalid-route", + }, + }, + }, + }, + expCond: helpers.GetPointer( + conditions.NewInferencePoolInvalidHTTPRouteNotAccepted( + "Referenced HTTPRoute test/invalid-route is not accepted by the Gateway", + ), + ), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + conds := validateInferencePoolRoutesAcceptance(test.pool, test.routes) + g.Expect(conds).To(Equal(test.expCond)) + }) + } +} diff --git a/internal/controller/state/graph/reference_grant.go b/internal/controller/state/graph/reference_grant.go index b827d47024..3fa04ecc7a 100644 --- a/internal/controller/state/graph/reference_grant.go +++ b/internal/controller/state/graph/reference_grant.go @@ -51,7 +51,16 @@ func toSecret(nsname types.NamespacedName) toResource { func toService(nsname types.NamespacedName) toResource { return toResource{ - kind: "Service", + kind: kinds.Service, + name: nsname.Name, + namespace: nsname.Namespace, + } +} + +func toInferencePool(nsname types.NamespacedName) toResource { + return toResource{ + group: inferenceAPIGroup, + kind: kinds.InferencePool, name: nsname.Name, namespace: nsname.Namespace, } @@ -139,6 +148,7 @@ func (r *referenceGrantResolver) refAllowed(to toResource, from fromResource) bo // of the particular kind in the namespace allInNamespaceKey := allowedReference{ to: toResource{ + group: to.group, kind: to.kind, namespace: to.namespace, }, diff --git a/internal/controller/state/graph/reference_grant_test.go b/internal/controller/state/graph/reference_grant_test.go index 21fee614e1..bf97f22c25 100644 --- a/internal/controller/state/graph/reference_grant_test.go +++ b/internal/controller/state/graph/reference_grant_test.go @@ -189,7 +189,7 @@ func TestToService(t *testing.T) { ref := toService(types.NamespacedName{Namespace: "ns", Name: "service"}) exp := toResource{ - kind: "Service", + kind: kinds.Service, namespace: "ns", name: "service", } @@ -198,6 +198,21 @@ func TestToService(t *testing.T) { g.Expect(ref).To(Equal(exp)) } +func TestToInferencePool(t *testing.T) { + t.Parallel() + ref := toInferencePool(types.NamespacedName{Namespace: "ns", Name: "inference-pool"}) + + exp := toResource{ + group: inferenceAPIGroup, + kind: kinds.InferencePool, + namespace: "ns", + name: "inference-pool", + } + + g := NewWithT(t) + g.Expect(ref).To(Equal(exp)) +} + func TestFromGateway(t *testing.T) { t.Parallel() ref := fromGateway("ns") @@ -306,7 +321,24 @@ func TestRefAllowedFrom(t *testing.T) { }, To: []v1beta1.ReferenceGrantTo{ { - Kind: "Service", + Kind: kinds.Service, + }, + }, + }, + }, + {Namespace: allowedHTTPRouteNs, Name: "hr-2-ipool"}: { + Spec: v1beta1.ReferenceGrantSpec{ + From: []v1beta1.ReferenceGrantFrom{ + { + Group: v1beta1.GroupName, + Kind: kinds.HTTPRoute, + Namespace: v1beta1.Namespace(hrNs), + }, + }, + To: []v1beta1.ReferenceGrantTo{ + { + Group: inferenceAPIGroup, + Kind: kinds.InferencePool, }, }, }, @@ -322,7 +354,7 @@ func TestRefAllowedFrom(t *testing.T) { }, To: []v1beta1.ReferenceGrantTo{ { - Kind: "Service", + Kind: kinds.Service, }, }, }, @@ -338,7 +370,7 @@ func TestRefAllowedFrom(t *testing.T) { }, To: []v1beta1.ReferenceGrantTo{ { - Kind: "Service", + Kind: kinds.Service, }, }, }, @@ -375,6 +407,18 @@ func TestRefAllowedFrom(t *testing.T) { toResource: toService(notAllowedNsName), expAllowed: false, }, + { + name: "ref allowed from httproute to inferencepool", + refAllowedFrom: fromHTTPRoute(hrNs), + toResource: toInferencePool(allowedHTTPRouteNsName), + expAllowed: true, + }, + { + name: "ref not allowed from httproute to inferencepool", + refAllowedFrom: fromHTTPRoute(hrNs), + toResource: toInferencePool(notAllowedNsName), + expAllowed: false, + }, { name: "ref allowed from grpcroute to service", refAllowedFrom: fromGRPCRoute(grNs), diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go index c156ca738a..4fb5178a2e 100644 --- a/internal/controller/state/graph/route_common.go +++ b/internal/controller/state/graph/route_common.go @@ -10,6 +10,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" v1alpha "sigs.k8s.io/gateway-api/apis/v1alpha2" @@ -19,7 +20,10 @@ import ( "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) -const wildcardHostname = "~^" +const ( + wildcardHostname = "~^" + inferenceAPIGroup = "inference.networking.k8s.io" +) // ParentRef describes a reference to a parent in a Route. type ParentRef struct { @@ -157,11 +161,22 @@ type RouteRule struct { // RouteBackendRef is a wrapper for v1.BackendRef and any BackendRef filters from the HTTPRoute or GRPCRoute. type RouteBackendRef struct { + v1.BackendRef + // If this backend is defined in a RequestMirror filter, this value will indicate the filter's index. MirrorBackendIdx *int - v1.BackendRef + // EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool. + EndpointPickerConfig *inference.EndpointPickerRef + + // EndpointPickerNsName is the namespace where the EndpointPicker is deployed, + // if this backendRef is for an InferencePool. + EndpointPickerNsName string + Filters []any + + // IsInferencePool indicates if this backend is an InferencePool disguised as a Service. + IsInferencePool bool } // CreateRouteKey takes a client.Object and creates a RouteKey. @@ -242,6 +257,7 @@ func buildRoutesForGateways( grpcRoutes map[types.NamespacedName]*v1.GRPCRoute, gateways map[types.NamespacedName]*Gateway, snippetsFilters map[types.NamespacedName]*SnippetsFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) map[RouteKey]*L7Route { if len(gateways) == 0 { return nil @@ -250,7 +266,7 @@ func buildRoutesForGateways( routes := make(map[RouteKey]*L7Route) for _, route := range httpRoutes { - r := buildHTTPRoute(validator, route, gateways, snippetsFilters) + r := buildHTTPRoute(validator, route, gateways, snippetsFilters, inferencePools) if r == nil { continue } diff --git a/internal/controller/state/graph/service.go b/internal/controller/state/graph/service.go index d43ecacfd8..3a702facc9 100644 --- a/internal/controller/state/graph/service.go +++ b/internal/controller/state/graph/service.go @@ -34,7 +34,6 @@ func buildReferencedServices( } processL7RoutesForGateway(l7routes, gw, gwNsName, referencedServices, services) - processL4RoutesForGateway(l4Routes, gw, gwNsName, referencedServices, services) } diff --git a/internal/controller/status/prepare_requests.go b/internal/controller/status/prepare_requests.go index 87e3b441cc..e6f35822c4 100644 --- a/internal/controller/status/prepare_requests.go +++ b/internal/controller/status/prepare_requests.go @@ -8,6 +8,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -519,3 +520,56 @@ func PrepareNginxGatewayStatus( }), } } + +// PrepareInferencePoolRequests prepares status UpdateRequests for the given InferencePools. +func PrepareInferencePoolRequests( + inferencePools map[types.NamespacedName]*graph.ReferencedInferencePool, + transitionTime metav1.Time, +) []UpdateRequest { + reqs := make([]UpdateRequest, 0, len(inferencePools)) + + for nsname, pool := range inferencePools { + if pool.Source == nil { + continue + } + + defaultConds := conditions.NewDefaultInferenceConditions() + allConds := make([]conditions.Condition, 0, len(pool.Conditions)+2) + + allConds = append(allConds, defaultConds...) + + if len(pool.Conditions) != 0 { + allConds = append(allConds, pool.Conditions...) + } + + conds := conditions.DeduplicateConditions(allConds) + apiConds := conditions.ConvertConditions(conds, pool.Source.GetGeneration(), transitionTime) + + parents := make([]inference.ParentStatus, 0, len(pool.Gateways)) + for _, ref := range pool.Gateways { + parents = append(parents, inference.ParentStatus{ + ParentRef: inference.ParentReference{ + Name: inference.ObjectName(ref.GetName()), + Namespace: inference.Namespace(ref.GetNamespace()), + Group: helpers.GetPointer(inference.Group(ref.GroupVersionKind().Group)), + Kind: kinds.Gateway, + }, + Conditions: apiConds, + }) + } + + status := inference.InferencePoolStatus{ + Parents: parents, + } + + req := UpdateRequest{ + NsName: nsname, + ResourceType: pool.Source, + Setter: newInferencePoolStatusSetter(status), + } + + reqs = append(reqs, req) + } + + return reqs +} diff --git a/internal/controller/status/prepare_requests_test.go b/internal/controller/status/prepare_requests_test.go index 3cb629d3c2..f863d5cf97 100644 --- a/internal/controller/status/prepare_requests_test.go +++ b/internal/controller/status/prepare_requests_test.go @@ -15,6 +15,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -35,6 +36,7 @@ func createK8sClientFor(resourceType ngftypes.ObjectType) client.Client { utilruntime.Must(v1alpha2.Install(scheme)) utilruntime.Must(v1alpha3.Install(scheme)) utilruntime.Must(ngfAPI.AddToScheme(scheme)) + utilruntime.Must(inference.Install(scheme)) k8sClient := fake.NewClientBuilder(). WithScheme(scheme). @@ -2157,3 +2159,239 @@ func TestBuildSnippetsFilterStatuses(t *testing.T) { }) } } + +func TestBuildInferencePoolStatuses(t *testing.T) { + t.Parallel() + transitionTime := helpers.PrepareTimeForFakeClient(metav1.Now()) + group := "" + + validAcceptedCondition := metav1.Condition{ + Type: string(inference.InferencePoolConditionAccepted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: transitionTime, + Reason: string(inference.InferencePoolReasonAccepted), + Message: "InferencePool is accepted by the Gateway.", + } + + validResolvedRefsCondition := metav1.Condition{ + Type: string(inference.InferencePoolConditionResolvedRefs), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: transitionTime, + Reason: string(inference.InferencePoolConditionResolvedRefs), + Message: "Inference pool references a valid ExtensionRef.", + } + + tests := []struct { + inferencePool map[types.NamespacedName]*graph.ReferencedInferencePool + expectedPoolWithStatus map[types.NamespacedName]inference.InferencePoolStatus + name string + expectedReqs int + }{ + { + name: "no referenced inferencePools", + expectedReqs: 0, + }, + { + name: "an inference pool has valid status for multiple gateways", + inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: "test", Name: "valid-inference-pool"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-inference-pool", + Namespace: "test", + Generation: 1, + }, + }, + Gateways: []*v1.Gateway{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "gateway-1", + Namespace: "test", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "gateway-2", + Namespace: "test", + }, + }, + }, + }, + }, + expectedReqs: 1, + expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{ + {Namespace: "test", Name: "valid-inference-pool"}: { + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{ + validAcceptedCondition, + validResolvedRefsCondition, + }, + ParentRef: inference.ParentReference{ + Namespace: inference.Namespace("test"), + Name: "gateway-1", + Kind: kinds.Gateway, + Group: helpers.GetPointer(inference.Group(group)), + }, + }, + { + Conditions: []metav1.Condition{ + validAcceptedCondition, + validResolvedRefsCondition, + }, + ParentRef: inference.ParentReference{ + Namespace: inference.Namespace("test"), + Name: "gateway-2", + Kind: kinds.Gateway, + Group: helpers.GetPointer(inference.Group(group)), + }, + }, + }, + }, + }, + }, + { + name: "an inference pool has accepted valid status and is referenced by invalid extension ref", + inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: "test", Name: "valid-inference-pool"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-inference-pool", + Namespace: "test", + Generation: 1, + }, + Spec: inference.InferencePoolSpec{ + EndpointPickerRef: inference.EndpointPickerRef{ + Name: inference.ObjectName("invalid-extension-ref"), + }, + }, + }, + Gateways: []*v1.Gateway{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "gateway-1", + Namespace: "test", + }, + }, + }, + Conditions: []conditions.Condition{ + conditions.NewInferencePoolInvalidExtensionref("Invalid extension ref: test/invalid-extension-ref"), + }, + }, + }, + expectedReqs: 1, + expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{ + {Namespace: "test", Name: "valid-inference-pool"}: { + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{ + validAcceptedCondition, + { + Type: string(inference.InferencePoolConditionResolvedRefs), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + LastTransitionTime: transitionTime, + Reason: string(inference.InferencePoolReasonInvalidExtensionRef), + Message: "Invalid extension ref: test/invalid-extension-ref", + }, + }, + ParentRef: inference.ParentReference{ + Namespace: inference.Namespace("test"), + Name: "gateway-1", + Kind: kinds.Gateway, + Group: helpers.GetPointer(inference.Group(group)), + }, + }, + }, + }, + }, + }, + { + name: "an inference pool is referencing an invalid route and is referenced by invalid extension ref", + inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: "test", Name: "valid-inference-pool"}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-inference-pool", + Namespace: "test", + Generation: 1, + }, + }, + Gateways: []*v1.Gateway{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "gateway-1", + Namespace: "test", + }, + }, + }, + Conditions: []conditions.Condition{ + conditions.NewInferencePoolInvalidHTTPRouteNotAccepted("Invalid HTTPRoute: test/invalid-route not accepted"), + conditions.NewInferencePoolInvalidExtensionref("Invalid extension ref: test/invalid-extension-ref"), + }, + }, + }, + expectedReqs: 1, + expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{ + {Namespace: "test", Name: "valid-inference-pool"}: { + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(inference.InferencePoolConditionAccepted), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + LastTransitionTime: transitionTime, + Reason: string(inference.InferencePoolReasonHTTPRouteNotAccepted), + Message: "Invalid HTTPRoute: test/invalid-route not accepted", + }, + { + Type: string(inference.InferencePoolConditionResolvedRefs), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + LastTransitionTime: transitionTime, + Reason: string(inference.InferencePoolReasonInvalidExtensionRef), + Message: "Invalid extension ref: test/invalid-extension-ref", + }, + }, + ParentRef: inference.ParentReference{ + Namespace: inference.Namespace("test"), + Name: "gateway-1", + Kind: kinds.Gateway, + Group: helpers.GetPointer(inference.Group(group)), + }, + }, + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + k8sClient := createK8sClientFor(&inference.InferencePool{}) + for _, ip := range test.inferencePool { + err := k8sClient.Create(context.Background(), ip.Source) + g.Expect(err).ToNot(HaveOccurred()) + } + + updater := NewUpdater(k8sClient, logr.Discard()) + reqs := PrepareInferencePoolRequests(test.inferencePool, transitionTime) + g.Expect(reqs).To(HaveLen(test.expectedReqs)) + updater.Update(context.Background(), reqs...) + + for nsname, expected := range test.expectedPoolWithStatus { + var inferencePool inference.InferencePool + + err := k8sClient.Get(context.Background(), nsname, &inferencePool) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(helpers.Diff(expected, inferencePool.Status)).To(BeEmpty()) + } + }) + } +} diff --git a/internal/controller/status/status_setters.go b/internal/controller/status/status_setters.go index c4fcc7c128..efb9f68413 100644 --- a/internal/controller/status/status_setters.go +++ b/internal/controller/status/status_setters.go @@ -4,6 +4,7 @@ import ( "slices" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -404,3 +405,65 @@ func snippetsStatusEqual(status1, status2 ngfAPI.ControllerStatus) bool { return ConditionsEqual(status1.Conditions, status2.Conditions) } + +func newInferencePoolStatusSetter(status inference.InferencePoolStatus) Setter { + return func(obj client.Object) (wasSet bool) { + ip := helpers.MustCastObject[*inference.InferencePool](obj) + + // we build all the parent statuses at once so we can directly + // compare the previous and current statuses + if inferencePoolStatusEqual(ip.Status.Parents, status.Parents) { + return false + } + + ip.Status = status + return true + } +} + +func inferencePoolStatusEqual(prevParents, curParents []inference.ParentStatus) bool { + // Compare the previous and current parent statuses, ignoring order + // Check if any previous parent status is missing in the current status + for _, prevParent := range prevParents { + exists := slices.ContainsFunc(curParents, func(curParent inference.ParentStatus) bool { + return parentStatusEqual(prevParent, curParent) + }) + + if !exists { + return false + } + } + + // Check if any current parent status is missing in the previous status + for _, curParent := range curParents { + exists := slices.ContainsFunc(prevParents, func(prevParent inference.ParentStatus) bool { + return parentStatusEqual(curParent, prevParent) + }) + + if !exists { + return false + } + } + + return true +} + +func parentStatusEqual(p1, p2 inference.ParentStatus) bool { + if p1.ParentRef.Name != p2.ParentRef.Name { + return false + } + + if !helpers.EqualPointers(&p1.ParentRef.Namespace, &p2.ParentRef.Namespace) { + return false + } + + if !helpers.EqualPointers(&p1.ParentRef.Kind, &p2.ParentRef.Kind) { + return false + } + + if !helpers.EqualPointers(&p1.ParentRef.Group, &p2.ParentRef.Group) { + return false + } + + return ConditionsEqual(p1.Conditions, p2.Conditions) +} diff --git a/internal/controller/status/status_setters_test.go b/internal/controller/status/status_setters_test.go index 61a34a4e9f..9f3c6f1521 100644 --- a/internal/controller/status/status_setters_test.go +++ b/internal/controller/status/status_setters_test.go @@ -5,6 +5,7 @@ import ( . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -1726,3 +1727,303 @@ func TestNewSnippetsFilterStatusSetter(t *testing.T) { }) } } + +func TestInferencePoolStatusSetter(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + status, newStatus, expStatus inference.InferencePoolStatus + expStatusSet bool + }{ + { + name: "InferencePool has no status", + newStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatusSet: true, + }, + { + name: "InferencePool updates condition of an existing parent status", + status: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "old condition"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + newStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatusSet: true, + }, + { + name: "InferencePool has new parent statuses along with existing ones", + status: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + newStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway2", + Namespace: "test", + }, + }, + }, + }, + expStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway2", + Namespace: "test", + }, + }, + }, + }, + expStatusSet: true, + }, + { + name: "InferencePool has parent statuses and one is removed", + status: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway2", + Namespace: "test", + }, + }, + }, + }, + newStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatusSet: true, + }, + { + name: "InferencePool has existing multiple parent statuses, one gets changed condition", + status: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "parent ref gateway2 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway2", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway3", + Namespace: "test", + }, + }, + }, + }, + newStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "parent ref gateway2 is invalid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway2", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway3", + Namespace: "test", + }, + }, + }, + }, + expStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "parent ref gateway2 is invalid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway2", + Namespace: "test", + }, + }, + { + Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}}, + ParentRef: inference.ParentReference{ + Name: "gateway3", + Namespace: "test", + }, + }, + }, + }, + expStatusSet: true, + }, + { + name: "InferencePool has same status", + status: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + newStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatus: inference.InferencePoolStatus{ + Parents: []inference.ParentStatus{ + { + Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}}, + ParentRef: inference.ParentReference{ + Name: "gateway1", + Namespace: "test", + }, + }, + }, + }, + expStatusSet: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + setter := newInferencePoolStatusSetter(test.newStatus) + obj := &inference.InferencePool{Status: test.status} + + statusSet := setter(obj) + + g.Expect(statusSet).To(Equal(test.expStatusSet)) + g.Expect(obj.Status).To(Equal(test.expStatus)) + }) + } +} diff --git a/internal/framework/controller/resource.go b/internal/framework/controller/resource.go index a0d49e3789..d17662169e 100644 --- a/internal/framework/controller/resource.go +++ b/internal/framework/controller/resource.go @@ -2,10 +2,31 @@ package controller import ( "fmt" + "strings" ) +// inferencePoolServiceSuffix is the suffix of the headless Service name for an InferencePool. +const inferencePoolServiceSuffix = "-pool-svc" + // CreateNginxResourceName creates the base resource name for all nginx resources // created by the control plane. func CreateNginxResourceName(prefix, suffix string) string { return fmt.Sprintf("%s-%s", prefix, suffix) } + +// CreateInferencePoolServiceName creates the name for a headless Service that +// we create for an InferencePool. +func CreateInferencePoolServiceName(name string) string { + svcName := fmt.Sprintf("%s%s", name, inferencePoolServiceSuffix) + // if InferencePool name is already at or near max length, just use that name + if len(svcName) > 253 { + return name + } + + return svcName +} + +// GetInferencePoolName returns the name of the InferencePool for a given headless Service name. +func GetInferencePoolName(serviceName string) string { + return strings.TrimSuffix(serviceName, inferencePoolServiceSuffix) +} diff --git a/internal/framework/kinds/kinds.go b/internal/framework/kinds/kinds.go index 35ca8e2b00..b59b06df96 100644 --- a/internal/framework/kinds/kinds.go +++ b/internal/framework/kinds/kinds.go @@ -25,6 +25,12 @@ const ( BackendTLSPolicy = "BackendTLSPolicy" ) +// Gateway API Inference Extension kinds. +const ( + // InferencePool is the InferencePool kind. + InferencePool = "InferencePool" +) + // Core API Kinds. const ( // Service is the Service kind. diff --git a/internal/framework/types/types.go b/internal/framework/types/types.go index bf61bd23d7..0aeccd008d 100644 --- a/internal/framework/types/types.go +++ b/internal/framework/types/types.go @@ -5,3 +5,14 @@ import "sigs.k8s.io/controller-runtime/pkg/client" // ObjectType is used when we only care about the type of client.Object. // The fields of the client.Object may be empty. type ObjectType client.Object + +// Fields used for communication with the EndpointPicker service when using the Inference Extension. +const ( + // EPPEndpointHostHeader is the HTTP header used to specify the EPP endpoint host. + EPPEndpointHostHeader = "X-EPP-Host" + // EPPEndpointPortHeader is the HTTP header used to specify the EPP endpoint port. + EPPEndpointPortHeader = "X-EPP-Port" + // GoShimPort is the default port for the Go EPP shim server to listen on. If collisions become a problem, + // we can make this configurable via the NginxProxy resource. + GoShimPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100. +) diff --git a/tests/Makefile b/tests/Makefile index dcea49c341..90a15027e0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -18,6 +18,8 @@ EXPERIMENTAL_CONFORMANCE_PROFILES = GATEWAY-TLS CONFORMANCE_PROFILES = $(STANDARD_CONFORMANCE_PROFILES) # by default we use the standard conformance profiles. If experimental is enabled we override this and add the experimental profiles. SKIP_TESTS = CEL_TEST_TARGET = +INFERENCE_SUPPORTED_FEATURES = GatewayFollowingEPPRouting +INFERENCE_SKIP_TESTS = InferencePoolResolvedRefsCondition, EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService # Check if ENABLE_EXPERIMENTAL is true ifeq ($(ENABLE_EXPERIMENTAL),true) @@ -68,6 +70,22 @@ run-conformance-tests: ## Run conformance tests exit 2; \ fi +.PHONY: run-inference-conformance-tests +run-inference-conformance-tests: ## Run inference conformance tests + kind load docker-image $(CONFORMANCE_PREFIX):$(CONFORMANCE_TAG) --name $(CLUSTER_NAME) + kubectl apply -f conformance/conformance-rbac.yaml + kubectl run -i conformance \ + --image=$(CONFORMANCE_PREFIX):$(CONFORMANCE_TAG) --image-pull-policy=Never \ + --overrides='{ "spec": { "serviceAccountName": "conformance" } }' \ + --restart=Never -- sh -c "go test -v . -tags conformance -args --gateway-class=$(GATEWAY_CLASS) \ + --version=$(NGF_VERSION) \ + --skip-tests=$(INFERENCE_SKIP_TESTS) \ + --supported-features=$(INFERENCE_SUPPORTED_FEATURES) \ + --report-output=output.txt; cat output.txt" | tee output.txt + ./scripts/check-pod-exit-code.sh + sed -e '1,/CONFORMANCE PROFILE/d' output.txt > conformance-profile-inference.yaml + rm output.txt + .PHONY: cleanup-conformance-tests cleanup-conformance-tests: ## Clean up conformance tests fixtures kubectl delete pod conformance @@ -170,7 +188,7 @@ add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-ne update-firewall-with-local-ip: ## Update the firewall rule with local IP address ./scripts/update-firewall-with-local-ip.sh -HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --skip-schema-validation +HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --skip-schema-validation --set nginxGateway.gwAPIInferenceExtension.enable=$(ENABLE_INFERENCE_EXTENSION) --set nginxGateway.config.logging.level=debug # this target is used to install the gateway-api CRDs from the main branch (only used in the nightly CI job) # it overrides the target in the main Makefile when the GW_API_VERSION is set to main @@ -191,6 +209,9 @@ uninstall-ngf: ## Uninstall NGF on configured kind cluster -make uninstall-gateway-crds -kubectl delete namespace nginx-gateway -kubectl kustomize ../config/crd | kubectl delete -f - + @if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \ + $(MAKE) uninstall-inference-crds; \ + fi # Run CEL validation integration tests against a real cluster .PHONY: test-cel-validation diff --git a/tests/README.md b/tests/README.md index 883bc595bd..55fecaaa79 100644 --- a/tests/README.md +++ b/tests/README.md @@ -20,7 +20,9 @@ This directory contains the tests for NGINX Gateway Fabric. The tests are divide - [Option 1 - Build and install NGINX Gateway Fabric from local to configured kind cluster](#option-1---build-and-install-nginx-gateway-fabric-from-local-to-configured-kind-cluster) - [Option 2 - Install NGINX Gateway Fabric from local already built image to configured kind cluster](#option-2---install-nginx-gateway-fabric-from-local-already-built-image-to-configured-kind-cluster) - [Step 2 - Build conformance test runner image](#step-2---build-conformance-test-runner-image) - - [Step 3 - Run Gateway conformance tests](#step-3---run-gateway-conformance-tests) + - [Step 3 - Run Conformance tests](#step-3---run-conformance-tests) + - [To run Gateway conformance tests](#to-run-gateway-conformance-tests) + - [To run Inference conformance tests](#to-run-inference-conformance-tests) - [Step 4 - Cleanup the conformance test fixtures and uninstall NGINX Gateway Fabric](#step-4---cleanup-the-conformance-test-fixtures-and-uninstall-nginx-gateway-fabric) - [Step 5 - Revert changes to Go modules](#step-5---revert-changes-to-go-modules) - [Step 6 - Delete kind cluster](#step-6---delete-kind-cluster) @@ -138,6 +140,12 @@ TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster. export ENABLE_EXPERIMENTAL=true ``` +> If you want to run the Inference conformance tests, set the following environment variable before deploying NGF: + +```bash +export ENABLE_INFERENCE_EXTENSION=true +``` + #### Option 1 - Build and install NGINX Gateway Fabric from local to configured kind cluster ```makefile @@ -188,12 +196,20 @@ go mod tidy make build-test-runner-image ``` -### Step 3 - Run Gateway conformance tests +### Step 3 - Run Conformance tests + +#### To run Gateway conformance tests ```makefile make run-conformance-tests ``` +#### To run Inference conformance tests + +```makefile +make run-inference-conformance-tests +``` + ### Step 4 - Cleanup the conformance test fixtures and uninstall NGINX Gateway Fabric ```makefile diff --git a/tests/conformance-profile-inference.yaml b/tests/conformance-profile-inference.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/conformance/conformance-rbac.yaml b/tests/conformance/conformance-rbac.yaml index 6cdf2d0a86..f063191f98 100644 --- a/tests/conformance/conformance-rbac.yaml +++ b/tests/conformance/conformance-rbac.yaml @@ -24,6 +24,7 @@ rules: - get - list - update + - watch - apiGroups: - apps resources: @@ -33,6 +34,7 @@ rules: - delete - get - list + - update - apiGroups: - gateway.networking.k8s.io resources: @@ -48,6 +50,7 @@ rules: - get - list - patch + - update - apiGroups: - apiextensions.k8s.io resources: @@ -64,6 +67,43 @@ rules: - get - list - patch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch + - create + - delete + - update +- apiGroups: + - inference.networking.x-k8s.io + resources: + - inferencepools + - inferenceobjectives + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +- apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + - rolebindings + verbs: + - create + - delete + - get + - list + - update --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 diff --git a/tests/conformance/conformance_test.go b/tests/conformance/conformance_test.go index d792046e96..909a36e09f 100644 --- a/tests/conformance/conformance_test.go +++ b/tests/conformance/conformance_test.go @@ -18,10 +18,12 @@ limitations under the License. package conformance import ( + "fmt" "os" "testing" . "github.com/onsi/gomega" + "gopkg.in/yaml.v2" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1beta1" "sigs.k8s.io/gateway-api/conformance" @@ -29,12 +31,18 @@ import ( "sigs.k8s.io/gateway-api/conformance/tests" "sigs.k8s.io/gateway-api/conformance/utils/flags" "sigs.k8s.io/gateway-api/conformance/utils/suite" - "sigs.k8s.io/yaml" + + inference_conformance "sigs.k8s.io/gateway-api-inference-extension/conformance" ) -// unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation. -// This is needed to give the conformance tests an example valid ip unusable address. -const unusableGatewayIPAddress = "198.51.100.0" +const ( + // unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation. + // This is needed to give the conformance tests an example valid ip unusable address. + unusableGatewayIPAddress = "198.51.100.0" + + // inferenceBaseManifest is the base manifest used to deploy the resources needed for inference conformance tests. + inferenceBaseManifest = "manifests/base.yaml" +) func TestConformance(t *testing.T) { g := NewWithT(t) @@ -86,3 +94,38 @@ func TestConformance(t *testing.T) { _, err = f.Write(yamlReport) g.Expect(err).ToNot(HaveOccurred()) } + +func TestInferenceExtensionConformance(t *testing.T) { + g := NewWithT(t) + + t.Logf(`Running inference conformance tests with %s GatewayClass\n cleanup: %t\n`+ + `debug: %t\n enable all features: %t \n supported extended features: [%v]\n exempt features: [%v]\n`+ + `skip tests: [%v]`, + *flags.GatewayClassName, *flags.CleanupBaseResources, *flags.ShowDebug, + *flags.EnableAllSupportedFeatures, *flags.SupportedFeatures, *flags.ExemptFeatures, *flags.SkipTests, + ) + + opts := inference_conformance.DefaultOptions(t) + ipaddressType := v1.IPAddressType + opts.UnusableNetworkAddresses = []v1beta1.GatewaySpecAddress{{Type: &ipaddressType, Value: unusableGatewayIPAddress}} + opts.UsableNetworkAddresses = []v1beta1.GatewaySpecAddress{{Type: &ipaddressType, Value: "192.0.2.1"}} + + opts.Implementation = conf_v1.Implementation{ + Organization: "nginx", + Project: "nginx-gateway-fabric", + URL: "https://github.com/nginx/nginx-gateway-fabric", + Version: *flags.ImplementationVersion, + Contact: []string{ + "https://github.com/nginx/nginx-gateway-fabric/discussions/new/choose", + }, + } + + _, err := os.Stat(inferenceBaseManifest) + g.Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("base manifest file %s not found", inferenceBaseManifest)) + + opts.ManifestFS = append(opts.ManifestFS, os.DirFS(".")) + opts.BaseManifests = inferenceBaseManifest + + opts.ConformanceProfiles.Insert(inference_conformance.GatewayLayerProfileName) + inference_conformance.RunConformanceWithOptions(t, opts) +} diff --git a/tests/conformance/manifests/base.yaml b/tests/conformance/manifests/base.yaml new file mode 100644 index 0000000000..fc868800e8 --- /dev/null +++ b/tests/conformance/manifests/base.yaml @@ -0,0 +1,394 @@ +# Base Kubernetes resources for the Gateway API Inference Extension conformance tests. +# This includes namespaces and a minimal set of resources (Gateway, Backend) +# required by many tests. More specific resources should be defined within +# individual test files or other resource directories (e.g., sample_backends). + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: gateway-conformance-infra + labels: + gateway-conformance: infra +--- +apiVersion: v1 +kind: Namespace +metadata: + name: gateway-conformance-app-backend + labels: + gateway-conformance: backend +--- +# A basic Gateway resource that allows HTTPRoutes from the same namespace. +# Tests can use this as a parent reference for routes that target InferencePools. +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: conformance-primary + namespace: gateway-conformance-infra +spec: + gatewayClassName: "{GATEWAY_CLASS_NAME}" + listeners: + - name: http + port: 80 + protocol: HTTP + allowedRoutes: + namespaces: + from: All + kinds: + - group: gateway.networking.k8s.io + kind: HTTPRoute +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: conformance-secondary + namespace: gateway-conformance-infra +spec: + gatewayClassName: "{GATEWAY_CLASS_NAME}" + listeners: + - name: http + port: 80 + protocol: HTTP + hostname: "secondary.example.com" + allowedRoutes: + namespaces: + from: All + +### The following defines the essential resources for the gateway conformance test. +### All resources are created in the 'gateway-conformance-app-backend' namespace. +--- +# Deploys a mock backend service to act as a model server. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: primary-inference-model-server-deployment + namespace: gateway-conformance-app-backend + labels: + app: primary-inference-model-server +spec: + replicas: 3 + selector: + matchLabels: + app: primary-inference-model-server + template: + metadata: + labels: + app: primary-inference-model-server + spec: + containers: + - name: echoserver + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd + ports: + - containerPort: 3000 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 3 + periodSeconds: 5 + failureThreshold: 2 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP +--- +# Deploys a secondary mock backend service to act as a model server. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: secondary-inference-model-server-deployment + namespace: gateway-conformance-app-backend + labels: + app: secondary-inference-model-server +spec: + replicas: 3 + selector: + matchLabels: + app: secondary-inference-model-server + template: + metadata: + labels: + app: secondary-inference-model-server + spec: + containers: + - name: echoserver + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd + ports: + - containerPort: 3000 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 3 + periodSeconds: 5 + failureThreshold: 2 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP +--- +# --- Primary InferencePool Definition --- +apiVersion: inference.networking.k8s.io/v1 +kind: InferencePool +metadata: + name: primary-inference-pool + namespace: gateway-conformance-app-backend +spec: + selector: + matchLabels: + app: primary-inference-model-server + targetPorts: + - number: 3000 + endpointPickerRef: + name: primary-endpoint-picker-svc + port: + number: 9002 +--- +# --- Primary Conformance EPP service Definition --- +apiVersion: v1 +kind: Service +metadata: + name: primary-endpoint-picker-svc + namespace: gateway-conformance-app-backend +spec: + selector: + app: primary-app-backend-epp + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + appProtocol: http2 + type: ClusterIP +--- +# --- Primary Conformance EPP Deployment --- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: primary-app-endpoint-picker + namespace: gateway-conformance-app-backend + labels: + app: primary-app-backend-epp +spec: + replicas: 1 + selector: + matchLabels: + app: primary-app-backend-epp + template: + metadata: + labels: + app: primary-app-backend-epp + spec: + # Conservatively, this timeout should mirror the longest grace period of the pods within the pool + terminationGracePeriodSeconds: 130 + containers: + - name: epp + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0 + imagePullPolicy: Always + args: + - --pool-name + - "primary-inference-pool" + - --pool-namespace + - "gateway-conformance-app-backend" + - --v + - "4" + - --zap-encoder + - "json" + - --grpc-port + - "9002" + - --grpc-health-port + - "9003" + - "--config-file" + - "/config/conformance-plugins.yaml" + env: + - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test. + value: "true" + ports: + - containerPort: 9002 + - containerPort: 9003 + - name: metrics + containerPort: 9090 + livenessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + volumeMounts: + - name: plugins-config-volume + mountPath: "/config" + volumes: + - name: plugins-config-volume + configMap: + name: plugins-config +--- +# --- Secondary InferencePool Definition --- +apiVersion: inference.networking.k8s.io/v1 +kind: InferencePool +metadata: + name: secondary-inference-pool + namespace: gateway-conformance-app-backend +spec: + selector: + matchLabels: + app: secondary-inference-model-server + targetPorts: + - number: 3000 + endpointPickerRef: + name: secondary-endpoint-picker-svc + failureMode: FailOpen + port: + number: 9002 +--- +# --- Secondary Conformance EPP service Definition --- +apiVersion: v1 +kind: Service +metadata: + name: secondary-endpoint-picker-svc + namespace: gateway-conformance-app-backend +spec: + selector: + app: secondary-app-backend-epp + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + appProtocol: http2 + type: ClusterIP +--- +# --- Secondary Conformance EPP Deployment --- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: secondary-app-endpoint-picker + namespace: gateway-conformance-app-backend + labels: + app: secondary-app-backend-epp +spec: + replicas: 1 + selector: + matchLabels: + app: secondary-app-backend-epp + template: + metadata: + labels: + app: secondary-app-backend-epp + spec: + # Conservatively, this timeout should mirror the longest grace period of the pods within the pool + terminationGracePeriodSeconds: 130 + containers: + - name: epp + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0 + imagePullPolicy: Always + args: + - --pool-name + - "secondary-inference-pool" + - --pool-namespace + - "gateway-conformance-app-backend" + - --v + - "4" + - --zap-encoder + - "json" + - --grpc-port + - "9002" + - --grpc-health-port + - "9003" + - "--config-file" + - "/config/conformance-plugins.yaml" + env: + - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test. + value: "true" + ports: + - containerPort: 9002 + - containerPort: 9003 + - name: metrics + containerPort: 9090 + livenessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 5 + periodSeconds: 10 + volumeMounts: + - name: plugins-config-volume + mountPath: "/config" + volumes: + - name: plugins-config-volume + configMap: + name: plugins-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: plugins-config + namespace: gateway-conformance-app-backend +data: + conformance-plugins.yaml: | + apiVersion: inference.networking.x-k8s.io/v1alpha1 + kind: EndpointPickerConfig + plugins: + - type: header-based-testing-filter + schedulingProfiles: + - name: conformance-profile + plugins: + - pluginRef: header-based-testing-filter +--- +# --- Required Role and RoleBinding for Conformance Test for EPP --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: inference-model-reader + namespace: gateway-conformance-app-backend +rules: +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferenceobjectives", "inferencepools"] + verbs: ["get", "list", "watch"] +- apiGroups: ["inference.networking.k8s.io"] + resources: ["inferencepools"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: epp-to-inference-model-reader + namespace: gateway-conformance-app-backend +subjects: +- kind: ServiceAccount + name: default + namespace: gateway-conformance-app-backend +roleRef: + kind: Role + name: inference-model-reader + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/tests/conformance/manifests/epp-routing.yaml b/tests/conformance/manifests/epp-routing.yaml new file mode 100644 index 0000000000..14be8eee65 --- /dev/null +++ b/tests/conformance/manifests/epp-routing.yaml @@ -0,0 +1,23 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-for-primary-gw + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: conformance-primary + namespace: gateway-conformance-infra + sectionName: http + hostnames: + - "primary.example.com" + rules: + - backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: primary-inference-pool + matches: + - path: + type: PathPrefix + value: /primary-gateway-test \ No newline at end of file diff --git a/tests/conformance/manifests/gateway.yaml b/tests/conformance/manifests/gateway.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/go.mod b/tests/go.mod index 92c39e24f2..0340cd7c6b 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -5,18 +5,20 @@ go 1.24.2 replace github.com/nginx/nginx-gateway-fabric/v2 => ../ require ( - github.com/nginx/nginx-gateway-fabric/v2 v2.1.1 + github.com/nginx/nginx-gateway-fabric/v2 v2.1.4 github.com/onsi/ginkgo/v2 v2.25.3 github.com/onsi/gomega v1.38.2 github.com/prometheus/client_golang v1.23.2 github.com/prometheus/common v0.66.1 github.com/tsenart/vegeta/v12 v12.12.0 + gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.34.1 k8s.io/apiextensions-apiserver v0.34.1 k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.1 sigs.k8s.io/gateway-api v1.3.0 + sigs.k8s.io/gateway-api-inference-extension v1.0.0 sigs.k8s.io/yaml v1.6.0 ) @@ -68,7 +70,7 @@ require ( golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.34.0 // indirect golang.org/x/text v0.29.0 // indirect - golang.org/x/time v0.9.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect diff --git a/tests/go.sum b/tests/go.sum index 7d5375ca36..c9480b7020 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -161,8 +161,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA= +golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ= @@ -191,8 +191,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -223,6 +223,8 @@ gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnf gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= @@ -245,6 +247,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M= sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk= +sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8= +sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/tests/output.txt b/tests/output.txt new file mode 100644 index 0000000000..b74e00397d --- /dev/null +++ b/tests/output.txt @@ -0,0 +1,539 @@ +=== RUN TestConformance + conformance_test.go:50: Running conformance tests with nginx GatewayClass\n cleanup: true\ndebug: false\n enable all features: false \n supported extended features: []\n exempt features: []\nconformance profiles: []\n skip tests: [InferencePoolResolvedRefsCondition,] + conformance_test.go:75: + Unexpected error: + <*errors.errorString | 0x400005e220>: + no conformance profile, supported features, explicit tests were provided so no tests could be selected + { + s: "no conformance profile, supported features, explicit tests were provided so no tests could be selected", + } + occurred +--- FAIL: TestConformance (0.00s) +=== RUN TestInferenceExtensionConformance + conformance_test.go:101: Running inference conformance tests with nginx GatewayClass\n cleanup: true\ndebug: false\n enable all features: false \n supported extended features: []\n exempt features: []\nskip tests: [InferencePoolResolvedRefsCondition,] + conformance_test.go:108: Registering API types with scheme... + conformance_test.go:108: Attempting to install inferencev1alpha2 types into scheme from package: inference.networking.x-k8s.io + conformance_test.go:108: Attempting to install inferencev1 types into scheme from package: inference.networking.k8s.io + conformance_test.go:130: Running Inference Extension conformance tests with GatewayClass nginx + conformance.go:249: 2025-10-14T01:03:17.615900047Z: Test Setup: Ensuring GatewayClass has been accepted + conformance.go:255: 2025-10-14T01:03:17.624521922Z: Test Setup: Applying base manifests + apply.go:275: 2025-10-14T01:03:17.629990464Z: Creating gateway-conformance-infra Namespace + apply.go:275: 2025-10-14T01:03:17.668296422Z: Creating gateway-conformance-app-backend Namespace + apply.go:275: 2025-10-14T01:03:17.68502838Z: Creating conformance-primary Gateway + apply.go:275: 2025-10-14T01:03:17.700455672Z: Creating conformance-secondary Gateway + apply.go:275: 2025-10-14T01:03:17.720530089Z: Creating primary-inference-model-server-deployment Deployment + apply.go:275: 2025-10-14T01:03:17.734617255Z: Creating secondary-inference-model-server-deployment Deployment + apply.go:275: 2025-10-14T01:03:17.747788672Z: Creating primary-inference-pool InferencePool + apply.go:275: 2025-10-14T01:03:17.763545755Z: Creating primary-endpoint-picker-svc Service + apply.go:275: 2025-10-14T01:03:17.797173297Z: Creating primary-app-endpoint-picker Deployment + apply.go:275: 2025-10-14T01:03:17.855388922Z: Creating secondary-inference-pool InferencePool + apply.go:275: 2025-10-14T01:03:17.912740089Z: Creating secondary-endpoint-picker-svc Service + apply.go:275: 2025-10-14T01:03:17.965884089Z: Creating secondary-app-endpoint-picker Deployment + apply.go:275: 2025-10-14T01:03:17.988395839Z: Creating plugins-config ConfigMap + apply.go:275: 2025-10-14T01:03:18.008423881Z: Creating inference-model-reader Role + apply.go:275: 2025-10-14T01:03:18.026004839Z: Creating epp-to-inference-model-reader RoleBinding + conformance.go:258: 2025-10-14T01:03:18.125679214Z: Test Setup: Ensuring Gateways and Pods from base manifests are ready + helpers.go:216: 2025-10-14T01:03:18.128281714Z: Gateway gateway-conformance-infra/conformance-primary expected observedGeneration to be updated to 1 for all conditions, only 0/2 were updated. stale conditions are: Accepted (generation 0), Programmed (generation 0) + helpers.go:240: 2025-10-14T01:03:19.138385881Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:19.138570048Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:19.138578256Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:20.135867882Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:20.135924132Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:20.135938465Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:21.135367007Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:21.135453382Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:21.135469924Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:22.138447632Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:22.138600674Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:22.138612882Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:23.140596008Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:23.140670675Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:23.140684591Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:24.132289342Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:24.132335592Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:24.132342883Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:25.145361926Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:25.145474134Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:25.145500634Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:26.141522593Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:26.141676259Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:26.141727134Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:27.141143468Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:27.141202718Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:27.141216177Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:28.131819885Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:28.131871677Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:28.131878719Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:29.133710844Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:29.133776219Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:29.133784011Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:30.131837136Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:30.131903303Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:30.13191447Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:31.134555012Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:31.134599178Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:31.134607595Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:32.133003804Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:32.133049721Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:32.133060096Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:33.136018554Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:33.136180346Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:33.136208554Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:34.141493597Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:34.141638847Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:34.141660013Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet + helpers.go:240: 2025-10-14T01:03:35.140798972Z: Ready condition set to False, expected True + helpers.go:240: 2025-10-14T01:03:35.14086293Z: Ready was not in conditions list + helpers.go:243: 2025-10-14T01:03:35.14087168Z: Pod gateway-conformance-app-backend/primary-app-endpoint-picker-7579b47bc6-rtt47 not ready yet + helpers.go:248: 2025-10-14T01:03:36.135304833Z: Gateways and Pods in gateway-conformance-infra, gateway-conformance-app-backend namespaces ready + conformance.go:265: Attempting to fetch Gateway gateway-conformance-infra/conformance-primary. + conformance.go:306: Successfully fetched Gateway gateway-conformance-infra/conformance-primary. Spec.GatewayClassName: nginx + conformance.go:265: Shared Gateway gateway-conformance-infra/conformance-primary is ready. + conformance.go:266: Attempting to fetch Gateway gateway-conformance-infra/conformance-secondary. + conformance.go:306: Successfully fetched Gateway gateway-conformance-infra/conformance-secondary. Spec.GatewayClassName: nginx + conformance.go:266: Shared Gateway gateway-conformance-infra/conformance-secondary is ready. + conformance_test.go:130: Running Inference Extension conformance tests against all registered tests +=== RUN TestInferenceExtensionConformance/EppUnAvailableFailOpen + conformance.go:72: 2025-10-14T01:03:36.164248292Z: Applying tests/epp_unavailable_fail_open.yaml + apply.go:275: 2025-10-14T01:03:36.175129875Z: Creating httproute-for-failopen-pool-gw HTTPRoute + conformance.go:77: 2025-10-14T01:03:36.182051833Z: Running EppUnAvailableFailOpen, relying on the following features: -, Gateway-standard + epp_unavailable_fail_open.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw to be Accepted by Gateway gateway-conformance-infra/conformance-secondary + epp_unavailable_fail_open.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary + epp_unavailable_fail_open.go:60: HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary + epp_unavailable_fail_open.go:61: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + epp_unavailable_fail_open.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + epp_unavailable_fail_open.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + epp_unavailable_fail_open.go:62: Waiting for Gateway gateway-conformance-infra/conformance-secondary to get an address... + epp_unavailable_fail_open.go:62: Gateway gateway-conformance-infra/conformance-secondary has address: 10.96.165.14:80 + epp_unavailable_fail_open.go:64: Searching for Pods with labels map[app:secondary-inference-model-server] in namespace gateway-conformance-app-backend +=== RUN TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_1:_Verify_baseline_connectivity_with_EPP_available + epp_unavailable_fail_open.go:71: Sending request to ensure the Gateway and EPP are working correctly... + traffic.go:151: 2025-10-14T01:03:37.21868725Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:37.402427709Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 3.292µs) + traffic.go:151: 2025-10-14T01:03:38.403328918Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:38.426142084Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:38.446418001Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 1.207650709s) + traffic.go:151: 2025-10-14T01:03:39.448027043Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:39.486648835Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:39.513615501Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 2.268142335s) + traffic.go:151: 2025-10-14T01:03:40.517491502Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:40.56746671Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 3.298903335s) + traffic.go:151: 2025-10-14T01:03:41.569563961Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:41.606558586Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 4.350976127s) + traffic.go:151: 2025-10-14T01:03:42.60780142Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:42.626555045Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 5.389276795s) + traffic.go:151: 2025-10-14T01:03:43.627828253Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:43.69536142Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:43.723475462Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 6.476841628s) + traffic.go:151: 2025-10-14T01:03:44.724507212Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:44.771940629Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 7.505984296s) + traffic.go:151: 2025-10-14T01:03:45.772548504Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:45.790925171Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 8.553996546s) + traffic.go:151: 2025-10-14T01:03:46.791878463Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:46.812035797Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:46.833224713Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 9.593544797s) + traffic.go:151: 2025-10-14T01:03:47.83441863Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:47.857052464Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 10.615884797s) + traffic.go:151: 2025-10-14T01:03:48.859770131Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:48.897930881Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:48.913112131Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 11.679430214s) + traffic.go:151: 2025-10-14T01:03:49.914456215Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:49.970198381Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 12.69590984s) + traffic.go:151: 2025-10-14T01:03:50.971954632Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:51.013194465Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 13.753366382s) + traffic.go:151: 2025-10-14T01:03:52.014606882Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:52.057845091Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:52.084620507Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 14.839337549s) + traffic.go:151: 2025-10-14T01:03:53.085642425Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:53.1301423Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:53.168473466Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:53.205107341Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 15.949976216s) + traffic.go:151: 2025-10-14T01:03:54.20599155Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:54.241487258Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:54.261202842Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:54.29185805Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 17.042710592s) + traffic.go:151: 2025-10-14T01:03:55.292574301Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:55.312207217Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 18.073995801s) + traffic.go:151: 2025-10-14T01:03:56.312476468Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:56.331264593Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 19.093928343s) + traffic.go:151: 2025-10-14T01:03:57.331577593Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:57.348852843Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:03:57.371795468Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:57.386972552Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 20.15330376s) + traffic.go:151: 2025-10-14T01:03:58.38759601Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:58.422928094Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 21.169056844s) + traffic.go:151: 2025-10-14T01:03:59.423453969Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:163: 2025-10-14T01:03:59.447749344Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 22.204941553s) + traffic.go:151: 2025-10-14T01:04:00.449553011Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:04:00.47849147Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:04:00.515535511Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:169: 2025-10-14T01:04:00.540372178Z: Request passed +=== RUN TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_2:_Verify_fail-open_behavior_after_EPP_becomes_unavailable + epp_unavailable_fail_open.go:92: Making EPP service gateway-conformance-app-backend/primary-endpoint-picker-svc unavailable... + epp_unavailable_fail_open.go:94: Making Service gateway-conformance-app-backend/primary-endpoint-picker-svc unavailable by modifying its selector... + epp_unavailable_fail_open.go:94: Waiting for EndpointSlices of Service gateway-conformance-app-backend/primary-endpoint-picker-svc to become empty... + epp_unavailable_fail_open.go:94: Successfully modified selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc + epp_unavailable_fail_open.go:98: Sending request again, expecting success to verify fail-open... + traffic.go:151: 2025-10-14T01:04:01.563349637Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:04:01.647647095Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:151: 2025-10-14T01:04:01.696854387Z: Making POST request to http://10.96.165.14/failopen-pool-test + traffic.go:169: 2025-10-14T01:04:01.720366262Z: Request passed + epp_unavailable_fail_open.go:95: Restoring original selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc... + epp_unavailable_fail_open.go:95: Waiting for EndpointSlices of Service gateway-conformance-app-backend/primary-endpoint-picker-svc to be restored... + epp_unavailable_fail_open.go:95: Successfully restored selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc +=== NAME TestInferenceExtensionConformance/EppUnAvailableFailOpen + apply.go:283: 2025-10-14T01:04:02.737540721Z: Deleting httproute-for-failopen-pool-gw HTTPRoute +=== RUN TestInferenceExtensionConformance/GatewayFollowingEPPRouting + conformance.go:72: 2025-10-14T01:04:02.776777596Z: Applying tests/gateway_following_epp_routing.yaml + apply.go:275: 2025-10-14T01:04:02.785686221Z: Creating httproute-for-primary-gw HTTPRoute + conformance.go:77: 2025-10-14T01:04:02.796671638Z: Running GatewayFollowingEPPRouting, relying on the following features: -, Gateway-standard + gateway_following_epp_routing.go:64: Verifying HTTPRoute and InferencePool are accepted and the Gateway has an address. + gateway_following_epp_routing.go:65: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw to be Accepted by Gateway gateway-conformance-infra/conformance-primary + gateway_following_epp_routing.go:65: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + gateway_following_epp_routing.go:65: HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + gateway_following_epp_routing.go:66: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + gateway_following_epp_routing.go:66: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + gateway_following_epp_routing.go:66: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + gateway_following_epp_routing.go:67: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address... + gateway_following_epp_routing.go:67: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80 + gateway_following_epp_routing.go:69: Fetching backend pods with labels: map[app:primary-inference-model-server] + gateway_following_epp_routing.go:70: Searching for Pods with labels map[app:primary-inference-model-server] in namespace gateway-conformance-app-backend + traffic.go:151: 2025-10-14T01:04:03.813018971Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:04.039845221Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.084µs) + traffic.go:151: 2025-10-14T01:04:05.041253555Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:05.066757055Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:05.112700555Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 1.253732668s) + traffic.go:151: 2025-10-14T01:04:06.115200041Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:06.151358708Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:06.165424958Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:06.179929833Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.351770292s) + traffic.go:151: 2025-10-14T01:04:07.181082125Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:07.216399792Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 3.367397543s) + traffic.go:151: 2025-10-14T01:04:08.224429001Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:08.301275626Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 4.410123418s) + traffic.go:151: 2025-10-14T01:04:09.303616793Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:09.326721918Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 5.48990646s) + traffic.go:151: 2025-10-14T01:04:10.330729919Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:10.377394335Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:10.394770752Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 6.563737044s) + traffic.go:151: 2025-10-14T01:04:11.396055002Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:11.417175669Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:11.447515169Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 7.603522086s) + traffic.go:151: 2025-10-14T01:04:12.449247503Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:12.470789711Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:12.484345378Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:12.521570711Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 8.67069217s) + traffic.go:151: 2025-10-14T01:04:13.522841003Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:13.56501492Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:13.582837795Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 9.751359046s) + traffic.go:151: 2025-10-14T01:04:14.585632671Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:14.627992629Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 10.771919296s) + traffic.go:151: 2025-10-14T01:04:15.629393879Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:15.659107754Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:15.694903963Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 11.845439422s) + traffic.go:151: 2025-10-14T01:04:16.695987088Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:16.743231505Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 12.882275339s) + traffic.go:151: 2025-10-14T01:04:17.745030589Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:17.767151755Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 13.931277214s) + traffic.go:151: 2025-10-14T01:04:18.769121548Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:18.790365589Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 14.955418298s) + traffic.go:151: 2025-10-14T01:04:19.791362131Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:19.810897965Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 15.97767709s) + traffic.go:151: 2025-10-14T01:04:20.812557174Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:20.83321659Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:20.863946507Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 17.019561633s) + traffic.go:151: 2025-10-14T01:04:21.865297716Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:21.911503632Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 18.05145005s) + traffic.go:151: 2025-10-14T01:04:22.912107008Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:22.929975466Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 19.0983753s) + traffic.go:151: 2025-10-14T01:04:23.931500008Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:23.959583758Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 20.117819676s) + traffic.go:151: 2025-10-14T01:04:24.962713175Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:25.073250092Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:25.090582551Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:25.106454801Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 21.276928135s) + traffic.go:151: 2025-10-14T01:04:26.107014259Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:26.146236176Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 22.293346385s) + traffic.go:151: 2025-10-14T01:04:27.147186052Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:27.177865218Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 23.333471094s) + traffic.go:151: 2025-10-14T01:04:28.179942719Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:28.219684177Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 24.366152219s) + traffic.go:151: 2025-10-14T01:04:29.221286053Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:29.243604803Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 25.407605928s) + traffic.go:151: 2025-10-14T01:04:30.24408472Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:30.289387886Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 26.43041297s) + traffic.go:151: 2025-10-14T01:04:31.290334345Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:31.341147804Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 27.476604596s) + traffic.go:151: 2025-10-14T01:04:32.342085846Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:32.392231304Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:32.438599596Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:169: 2025-10-14T01:04:32.470273637Z: Request passed + traffic.go:151: 2025-10-14T01:04:32.470374096Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:32.484053054Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.375µs) + traffic.go:151: 2025-10-14T01:04:33.487824888Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:33.574704471Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 1.016638084s) + traffic.go:151: 2025-10-14T01:04:34.576959555Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:34.633173013Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.106494126s) + traffic.go:151: 2025-10-14T01:04:35.634988583Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:35.693747958Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 3.164699876s) + traffic.go:151: 2025-10-14T01:04:36.694937Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:36.750264125Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:36.769162667Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 4.280011127s) + traffic.go:151: 2025-10-14T01:04:37.771181417Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:37.810051584Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 5.300907877s) + traffic.go:151: 2025-10-14T01:04:38.811659626Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:38.866971584Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 6.341297544s) + traffic.go:151: 2025-10-14T01:04:39.875429085Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:39.923348543Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:39.94242846Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:39.962341293Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 7.472177295s) + traffic.go:151: 2025-10-14T01:04:40.963085085Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:41.043185794Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 8.492794129s) + traffic.go:151: 2025-10-14T01:04:42.044456419Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:42.083978294Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 9.574119338s) + traffic.go:151: 2025-10-14T01:04:43.088392128Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:43.149567628Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 10.618079171s) + traffic.go:151: 2025-10-14T01:04:44.151589712Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:44.208832087Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:44.225526295Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 11.738426089s) + traffic.go:151: 2025-10-14T01:04:45.227075713Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:45.281433338Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:45.300640921Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:169: 2025-10-14T01:04:45.325600254Z: Request passed + traffic.go:151: 2025-10-14T01:04:45.325758588Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:45.344870171Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 4.625µs) + traffic.go:151: 2025-10-14T01:04:46.347018421Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:46.378773546Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 1.021176001s) + traffic.go:151: 2025-10-14T01:04:47.379988464Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:47.40334288Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 2.054202001s) + traffic.go:151: 2025-10-14T01:04:48.407456256Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:48.467759131Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:48.485551464Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 3.141919877s) + traffic.go:151: 2025-10-14T01:04:49.486317631Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:49.514445923Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 4.160485294s) + traffic.go:151: 2025-10-14T01:04:50.521837465Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:50.609384007Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:50.64858034Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 5.283617628s) + traffic.go:151: 2025-10-14T01:04:51.651374757Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:51.695010841Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 6.325408795s) + traffic.go:151: 2025-10-14T01:04:52.699581091Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:52.755892383Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 7.371361879s) + traffic.go:151: 2025-10-14T01:04:53.757197717Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:53.779424133Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 8.431435004s) + traffic.go:151: 2025-10-14T01:04:54.782000759Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:54.8078453Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 9.45622888s) + traffic.go:151: 2025-10-14T01:04:55.809546051Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:163: 2025-10-14T01:04:55.861264926Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: , KeyPem: } not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 10.483662172s) + traffic.go:151: 2025-10-14T01:04:56.862348593Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:56.920280093Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:151: 2025-10-14T01:04:56.948710968Z: Making POST request to http://10.96.88.207/primary-gateway-test + traffic.go:169: 2025-10-14T01:04:56.966484301Z: Request passed +=== RUN TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_a_single_designated_pod + gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.165' + gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-rgblk] + gateway_following_epp_routing.go:140: 2025-10-14T01:04:56.96718176Z: Making POST request to http://10.96.88.207/primary-gateway-test + gateway_following_epp_routing.go:140: Not all the requests are sent to the expectedPods successfully, err: request was handled by an unexpected pod "primary-inference-model-server-deployment-66659cd5bf-lw9lc" +=== RUN TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_two_designated_pods + gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.166,10.244.0.170' + gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc] + gateway_following_epp_routing.go:140: 2025-10-14T01:04:57.378579052Z: Making POST request to http://10.96.88.207/primary-gateway-test + gateway_following_epp_routing.go:140: Not all the requests are sent to the expectedPods successfully, err: request was handled by an unexpected pod "primary-inference-model-server-deployment-66659cd5bf-rgblk" +=== RUN TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_all_available_pods + gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.166,10.244.0.170,10.244.0.165' + gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc primary-inference-model-server-deployment-66659cd5bf-rgblk] + gateway_following_epp_routing.go:140: 2025-10-14T01:04:57.736901843Z: Making POST request to http://10.96.88.207/primary-gateway-test + gateway_following_epp_routing.go:140: Traffic successfully reached only to expected pods: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc primary-inference-model-server-deployment-66659cd5bf-rgblk] +=== NAME TestInferenceExtensionConformance/GatewayFollowingEPPRouting + apply.go:283: 2025-10-14T01:04:58.127986844Z: Deleting httproute-for-primary-gw HTTPRoute +=== RUN TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef + conformance.go:72: 2025-10-14T01:04:58.144944469Z: Applying tests/httproute_invalid_inferencepool_ref.yaml + apply.go:275: 2025-10-14T01:04:58.154912969Z: Creating httproute-to-non-existent-pool HTTPRoute + conformance.go:77: 2025-10-14T01:04:58.166170635Z: Running HTTPRouteInvalidInferencePoolRef, relying on the following features: -, Gateway-standard +=== RUN TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef/HTTPRoute_should_have_Accepted=True_and_ResolvedRefs=False_for_non-existent_InferencePool + httproute_invalid_inferencepool_ref.go:63: Successfully verified HTTPRoute gateway-conformance-app-backend/httproute-to-non-existent-pool has conditions: Accepted=True and ResolvedRefs=False (Reason: BackendNotFound) for Gateway gateway-conformance-infra/conformance-primary +=== NAME TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef + apply.go:283: 2025-10-14T01:04:59.182424011Z: Deleting httproute-to-non-existent-pool HTTPRoute +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools + conformance.go:72: 2025-10-14T01:04:59.196436219Z: Applying tests/httproute_multiple_gateways_different_pools.yaml + apply.go:275: 2025-10-14T01:04:59.204446219Z: Creating route-for-primary-gateway HTTPRoute + apply.go:275: 2025-10-14T01:04:59.212459761Z: Creating route-for-secondary-gateway HTTPRoute +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Primary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic + httproute_multiple_gateways_different_pools.go:58: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway to be Accepted by Gateway gateway-conformance-infra/conformance-primary + httproute_multiple_gateways_different_pools.go:58: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + httproute_multiple_gateways_different_pools.go:58: HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + httproute_multiple_gateways_different_pools.go:58: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + httproute_multiple_gateways_different_pools.go:58: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + httproute_multiple_gateways_different_pools.go:58: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + httproute_multiple_gateways_different_pools.go:58: Successfully verified: HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/primary-inference-pool is RouteAccepted. + httproute_multiple_gateways_different_pools.go:66: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address... + httproute_multiple_gateways_different_pools.go:66: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80 + traffic.go:151: 2025-10-14T01:05:00.230920428Z: Making GET request to http://10.96.88.207/test-primary-gateway + traffic.go:151: 2025-10-14T01:05:00.23344447Z: Making GET request to http://10.96.88.207/test-primary-gateway + traffic.go:151: 2025-10-14T01:05:00.234922095Z: Making GET request to http://10.96.88.207/test-primary-gateway + traffic.go:169: 2025-10-14T01:05:00.23615922Z: Request passed +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Secondary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic + httproute_multiple_gateways_different_pools.go:84: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway to be Accepted by Gateway gateway-conformance-infra/conformance-secondary + httproute_multiple_gateways_different_pools.go:84: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary + httproute_multiple_gateways_different_pools.go:84: HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary + httproute_multiple_gateways_different_pools.go:84: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + httproute_multiple_gateways_different_pools.go:84: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + httproute_multiple_gateways_different_pools.go:84: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + httproute_multiple_gateways_different_pools.go:84: Successfully verified: HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway (Gateway gateway-conformance-infra/conformance-secondary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/secondary-inference-pool is RouteAccepted. + httproute_multiple_gateways_different_pools.go:92: Waiting for Gateway gateway-conformance-infra/conformance-secondary to get an address... + httproute_multiple_gateways_different_pools.go:92: Gateway gateway-conformance-infra/conformance-secondary has address: 10.96.165.14:80 + traffic.go:151: 2025-10-14T01:05:00.247583886Z: Making GET request to http://10.96.165.14/test-secondary-gateway + traffic.go:151: 2025-10-14T01:05:00.248908345Z: Making GET request to http://10.96.165.14/test-secondary-gateway + traffic.go:151: 2025-10-14T01:05:00.249860136Z: Making GET request to http://10.96.165.14/test-secondary-gateway + traffic.go:169: 2025-10-14T01:05:00.250706761Z: Request passed +=== NAME TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools + apply.go:283: 2025-10-14T01:05:00.250741053Z: Deleting route-for-secondary-gateway HTTPRoute + apply.go:283: 2025-10-14T01:05:00.256212345Z: Deleting route-for-primary-gateway HTTPRoute +=== RUN TestInferenceExtensionConformance/InferencePoolAccepted + conformance.go:72: 2025-10-14T01:05:00.261789928Z: Applying tests/inferencepool_accepted.yaml + apply.go:275: 2025-10-14T01:05:00.268909345Z: Creating httproute-for-inferencepool-accepted HTTPRoute + conformance.go:77: 2025-10-14T01:05:00.283944303Z: Running InferencePoolAccepted, relying on the following features: -, Gateway-standard +=== RUN TestInferenceExtensionConformance/InferencePoolAccepted/InferencePool_should_have_Accepted_condition_set_to_True + inferencepool_accepted.go:54: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True +=== NAME TestInferenceExtensionConformance/InferencePoolAccepted + apply.go:283: 2025-10-14T01:05:00.290032886Z: Deleting httproute-for-inferencepool-accepted HTTPRoute +=== RUN TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation + conformance.go:72: 2025-10-14T01:05:00.296340053Z: Applying tests/inferencepool_httproute_port_validation.yaml + apply.go:275: 2025-10-14T01:05:00.31779672Z: Creating httproute-pool-port-unspecified HTTPRoute + apply.go:275: 2025-10-14T01:05:00.328601886Z: Creating httproute-pool-port-matching HTTPRoute + apply.go:275: 2025-10-14T01:05:00.336310345Z: Creating httproute-pool-port-non-matching HTTPRoute + conformance.go:77: 2025-10-14T01:05:00.352536553Z: Running InferencePoolHTTPRoutePortValidation, relying on the following features: -, Gateway-standard + inferencepool_httproute_port_validation.go:47: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address... + inferencepool_httproute_port_validation.go:47: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80 +=== RUN TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_1:_HTTPRoute_backendRef_to_InferencePool_with_Port_Unspecified + inferencepool_httproute_port_validation.go:54: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified to be Accepted by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:54: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:54: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:55: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + inferencepool_httproute_port_validation.go:55: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + inferencepool_httproute_port_validation.go:55: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + traffic.go:151: 2025-10-14T01:05:02.364763929Z: Making GET request to http://10.96.88.207/test-port-unspecified + traffic.go:151: 2025-10-14T01:05:02.366493929Z: Making GET request to http://10.96.88.207/test-port-unspecified + traffic.go:151: 2025-10-14T01:05:02.367530596Z: Making GET request to http://10.96.88.207/test-port-unspecified + traffic.go:169: 2025-10-14T01:05:02.368427012Z: Request passed +=== RUN TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_2:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Matching + inferencepool_httproute_port_validation.go:76: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching to be Accepted by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:76: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:76: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:77: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + inferencepool_httproute_port_validation.go:77: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + inferencepool_httproute_port_validation.go:77: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + traffic.go:151: 2025-10-14T01:05:02.375146137Z: Making GET request to http://10.96.88.207/test-port-matching + traffic.go:151: 2025-10-14T01:05:02.376380554Z: Making GET request to http://10.96.88.207/test-port-matching + traffic.go:151: 2025-10-14T01:05:02.377616304Z: Making GET request to http://10.96.88.207/test-port-matching + traffic.go:169: 2025-10-14T01:05:02.378851304Z: Request passed +=== RUN TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_3:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Non-Matching._Request_still_passing_because_HTTP_Port_is_ignored_when_inferencePool_is_backendRef + inferencepool_httproute_port_validation.go:99: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching to be Accepted by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:99: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:99: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_httproute_port_validation.go:100: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + inferencepool_httproute_port_validation.go:100: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + inferencepool_httproute_port_validation.go:100: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + traffic.go:151: 2025-10-14T01:05:02.384983429Z: Making GET request to http://10.96.88.207/test-port-non-matching + traffic.go:151: 2025-10-14T01:05:02.386198887Z: Making GET request to http://10.96.88.207/test-port-non-matching + traffic.go:151: 2025-10-14T01:05:02.387009179Z: Making GET request to http://10.96.88.207/test-port-non-matching + traffic.go:169: 2025-10-14T01:05:02.388009429Z: Request passed +=== NAME TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation + apply.go:283: 2025-10-14T01:05:02.388062512Z: Deleting httproute-pool-port-non-matching HTTPRoute + apply.go:283: 2025-10-14T01:05:02.392225554Z: Deleting httproute-pool-port-matching HTTPRoute + apply.go:283: 2025-10-14T01:05:02.396365179Z: Deleting httproute-pool-port-unspecified HTTPRoute +=== RUN TestInferenceExtensionConformance/InferencePoolInvalidEPPService + conformance.go:72: 2025-10-14T01:05:02.402896512Z: Applying tests/inferencepool_invalid_epp_service.yaml + apply.go:275: 2025-10-14T01:05:02.415857471Z: Creating pool-with-invalid-epp InferencePool + apply.go:275: 2025-10-14T01:05:02.422569762Z: Creating httproute-for-invalid-epp-pool HTTPRoute + conformance.go:77: 2025-10-14T01:05:02.435913971Z: Running InferencePoolInvalidEPPService, relying on the following features: Gateway-standard, HTTPRoute-standard, - + inferencepool_invalid_epp_service.go:55: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address... + inferencepool_invalid_epp_service.go:55: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80 +=== RUN TestInferenceExtensionConformance/InferencePoolInvalidEPPService/InferecePool_has_a_ResolvedRefs_Condition_with_status_False + inferencepool_invalid_epp_service.go:68: InferencePool gateway-conformance-app-backend/pool-with-invalid-epp successfully has condition Type=ResolvedRefs, Status=False +=== RUN TestInferenceExtensionConformance/InferencePoolInvalidEPPService/Request_to_a_route_with_an_invalid_backend_reference_receives_a_500_response + traffic.go:151: 2025-10-14T01:05:03.466323221Z: Making GET request to http://10.96.88.207/invalid-epp-test + traffic.go:151: 2025-10-14T01:05:03.467345763Z: Making GET request to http://10.96.88.207/invalid-epp-test + traffic.go:151: 2025-10-14T01:05:03.468300846Z: Making GET request to http://10.96.88.207/invalid-epp-test + traffic.go:169: 2025-10-14T01:05:03.469222846Z: Request passed +=== NAME TestInferenceExtensionConformance/InferencePoolInvalidEPPService + apply.go:283: 2025-10-14T01:05:03.469294346Z: Deleting httproute-for-invalid-epp-pool HTTPRoute + apply.go:283: 2025-10-14T01:05:03.47732543Z: Deleting pool-with-invalid-epp InferencePool +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools + conformance.go:72: 2025-10-14T01:05:03.481843888Z: Applying tests/inferencepool_multiple_rules_different_pools.yaml + apply.go:275: 2025-10-14T01:05:03.49263818Z: Creating httproute-multiple-rules-different-pools HTTPRoute + conformance.go:77: 2025-10-14T01:05:03.498142263Z: Running HTTPRouteMultipleRulesDifferentPools, relying on the following features: Gateway-standard, HTTPRoute-standard, - +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Wait_for_resources_to_be_accepted + inferencepool_multiple_rules_different_pools.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to be Accepted by Gateway gateway-conformance-infra/conformance-primary + inferencepool_multiple_rules_different_pools.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_multiple_rules_different_pools.go:60: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_multiple_rules_different_pools.go:60: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + inferencepool_multiple_rules_different_pools.go:60: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + inferencepool_multiple_rules_different_pools.go:60: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + inferencepool_multiple_rules_different_pools.go:60: Successfully verified: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/primary-inference-pool is RouteAccepted. + inferencepool_multiple_rules_different_pools.go:61: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to be Accepted by Gateway gateway-conformance-infra/conformance-primary + inferencepool_multiple_rules_different_pools.go:61: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_multiple_rules_different_pools.go:61: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary + inferencepool_multiple_rules_different_pools.go:61: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted) + inferencepool_multiple_rules_different_pools.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted' + inferencepool_multiple_rules_different_pools.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted) + inferencepool_multiple_rules_different_pools.go:61: Successfully verified: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/secondary-inference-pool is RouteAccepted. +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path + inferencepool_multiple_rules_different_pools.go:65: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address... + inferencepool_multiple_rules_different_pools.go:65: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80 +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_primary_pool + inferencepool_multiple_rules_different_pools.go:68: 2025-10-14T01:05:04.52978643Z: Making GET request to http://10.96.88.207/primary + http.go:251: 2025-10-14T01:05:04.536417763Z: Request passed +=== RUN TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_secondary_pool + inferencepool_multiple_rules_different_pools.go:79: 2025-10-14T01:05:04.536504847Z: Making GET request to http://10.96.88.207/secondary + http.go:251: 2025-10-14T01:05:04.539573222Z: Request passed +=== NAME TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools + apply.go:283: 2025-10-14T01:05:04.539630847Z: Deleting httproute-multiple-rules-different-pools HTTPRoute +=== RUN TestInferenceExtensionConformance/InferencePoolResolvedRefsCondition + conformance.go:68: Skipping InferencePoolResolvedRefsCondition: test explicitly skipped +=== NAME TestInferenceExtensionConformance + apply.go:283: 2025-10-14T01:05:04.54499568Z: Deleting epp-to-inference-model-reader RoleBinding + apply.go:283: 2025-10-14T01:05:04.549125555Z: Deleting inference-model-reader Role + apply.go:283: 2025-10-14T01:05:04.552758388Z: Deleting plugins-config ConfigMap + apply.go:283: 2025-10-14T01:05:04.55691718Z: Deleting secondary-app-endpoint-picker Deployment + apply.go:283: 2025-10-14T01:05:04.561630972Z: Deleting secondary-endpoint-picker-svc Service + apply.go:283: 2025-10-14T01:05:04.58978643Z: Deleting secondary-inference-pool InferencePool + apply.go:283: 2025-10-14T01:05:04.59695693Z: Deleting primary-app-endpoint-picker Deployment + apply.go:283: 2025-10-14T01:05:04.613129388Z: Deleting primary-endpoint-picker-svc Service + apply.go:283: 2025-10-14T01:05:04.655569013Z: Deleting primary-inference-pool InferencePool + apply.go:283: 2025-10-14T01:05:04.673237097Z: Deleting secondary-inference-model-server-deployment Deployment + apply.go:283: 2025-10-14T01:05:04.682511888Z: Deleting primary-inference-model-server-deployment Deployment + apply.go:283: 2025-10-14T01:05:04.703110263Z: Deleting conformance-secondary Gateway + apply.go:283: 2025-10-14T01:05:04.715927263Z: Deleting conformance-primary Gateway + apply.go:283: 2025-10-14T01:05:04.725649972Z: Deleting gateway-conformance-app-backend Namespace + apply.go:283: 2025-10-14T01:05:04.764089138Z: Deleting gateway-conformance-infra Namespace +--- FAIL: TestInferenceExtensionConformance (107.40s) + --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen (26.58s) + --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_1:_Verify_baseline_connectivity_with_EPP_available (23.32s) + --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_2:_Verify_fail-open_behavior_after_EPP_becomes_unavailable (2.20s) + --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting (55.39s) + --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_a_single_designated_pod (0.41s) + --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_two_designated_pods (0.36s) + --- PASS: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_all_available_pods (0.39s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef (1.05s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef/HTTPRoute_should_have_Accepted=True_and_ResolvedRefs=False_for_non-existent_InferencePool (1.02s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools (1.07s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Primary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic (1.02s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Secondary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic (0.01s) + --- PASS: TestInferenceExtensionConformance/InferencePoolAccepted (0.03s) + --- PASS: TestInferenceExtensionConformance/InferencePoolAccepted/InferencePool_should_have_Accepted_condition_set_to_True (0.01s) + --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation (2.11s) + --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_1:_HTTPRoute_backendRef_to_InferencePool_with_Port_Unspecified (2.01s) + --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_2:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Matching (0.01s) + --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_3:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Non-Matching._Request_still_passing_because_HTTP_Port_is_ignored_when_inferencePool_is_backendRef (0.01s) + --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService (1.08s) + --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService/InferecePool_has_a_ResolvedRefs_Condition_with_status_False (0.00s) + --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService/Request_to_a_route_with_an_invalid_backend_reference_receives_a_500_response (0.00s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools (1.06s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Wait_for_resources_to_be_accepted (1.02s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path (0.02s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_primary_pool (0.01s) + --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_secondary_pool (0.00s) + --- SKIP: TestInferenceExtensionConformance/InferencePoolResolvedRefsCondition (0.00s) +FAIL +FAIL github.com/nginx/nginx-gateway-fabric/v2/tests/conformance 107.446s +FAIL