From 4bb867e2fa09cf688e82e7bf7aeb609eb0039f4d Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Tue, 9 Sep 2025 10:17:45 -0600
Subject: [PATCH 01/12] Basic NJS module to extract model name (#3877)

Problem: To support the full Gateway API Inference Extension, we need to be able to extract the model name from the client request body in certain situations.

Solution: Add a basic NJS module to extract the model name. This module will be enhanced (I've added notes) to be included in the full solution. On its own, it is not yet used.
---
 .nvmrc                                        |  2 +-
 Makefile                                      |  2 +-
 build/Dockerfile.nginx                        |  2 +-
 build/Dockerfile.nginxplus                    |  2 +-
 .../controller/nginx/conf/nginx-plus.conf     |  1 +
 internal/controller/nginx/conf/nginx.conf     |  1 +
 internal/controller/nginx/modules/README.md   |  1 +
 internal/controller/nginx/modules/src/epp.js  | 29 +++++++++++
 .../controller/nginx/modules/test/epp.test.js | 52 +++++++++++++++++++
 9 files changed, 88 insertions(+), 4 deletions(-)
 create mode 100644 internal/controller/nginx/modules/src/epp.js
 create mode 100644 internal/controller/nginx/modules/test/epp.test.js

diff --git a/.nvmrc b/.nvmrc
index 2bd5a0a98a..a45fd52cc5 100644
--- a/.nvmrc
+++ b/.nvmrc
@@ -1 +1 @@
-22
+24
diff --git a/Makefile b/Makefile
index c794b3a9e2..65e4d8aee0 100644
--- a/Makefile
+++ b/Makefile
@@ -33,7 +33,7 @@ GEN_CRD_API_REFERENCE_DOCS_VERSION = v0.3.0
 # renovate: datasource=go depName=sigs.k8s.io/controller-tools
 CONTROLLER_TOOLS_VERSION = v0.19.0
 # renovate: datasource=docker depName=node
-NODE_VERSION = 22
+NODE_VERSION = 24
 # renovate: datasource=docker depName=quay.io/helmpack/chart-testing
 CHART_TESTING_VERSION = v3.13.0
 # renovate: datasource=github-tags depName=dadav/helm-schema
diff --git a/build/Dockerfile.nginx b/build/Dockerfile.nginx
index 378e8c909b..84696eea82 100644
--- a/build/Dockerfile.nginx
+++ b/build/Dockerfile.nginx
@@ -23,7 +23,7 @@ RUN apk add --no-cache bash \
     && ln -sf /dev/stderr /var/log/nginx/error.log
 
 COPY build/entrypoint.sh /agent/entrypoint.sh
-COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
+COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
 COPY ${NGINX_CONF_DIR}/nginx.conf /etc/nginx/nginx.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf
diff --git a/build/Dockerfile.nginxplus b/build/Dockerfile.nginxplus
index b92dc19516..6fb5d49773 100644
--- a/build/Dockerfile.nginxplus
+++ b/build/Dockerfile.nginxplus
@@ -29,7 +29,7 @@ RUN apk add --no-cache bash \
     && ln -sf /dev/stderr /var/log/nginx/error.log
 
 COPY build/entrypoint.sh /agent/entrypoint.sh
-COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
+COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
 COPY ${NGINX_CONF_DIR}/nginx-plus.conf /etc/nginx/nginx.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf
diff --git a/internal/controller/nginx/conf/nginx-plus.conf b/internal/controller/nginx/conf/nginx-plus.conf
index f2b0ec0dc8..50ba9f970c 100644
--- a/internal/controller/nginx/conf/nginx-plus.conf
+++ b/internal/controller/nginx/conf/nginx-plus.conf
@@ -13,6 +13,7 @@ http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
   js_import /usr/lib/nginx/modules/njs/httpmatches.js;
+  js_import /usr/lib/nginx/modules/njs/epp.js;
 
   default_type application/octet-stream;
 
diff --git a/internal/controller/nginx/conf/nginx.conf b/internal/controller/nginx/conf/nginx.conf
index 791994fdf8..6c4f6be8d9 100644
--- a/internal/controller/nginx/conf/nginx.conf
+++ b/internal/controller/nginx/conf/nginx.conf
@@ -13,6 +13,7 @@ http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
   js_import /usr/lib/nginx/modules/njs/httpmatches.js;
+  js_import /usr/lib/nginx/modules/njs/epp.js;
 
   default_type application/octet-stream;
 
diff --git a/internal/controller/nginx/modules/README.md b/internal/controller/nginx/modules/README.md
index 9c7c805276..3313ea6604 100644
--- a/internal/controller/nginx/modules/README.md
+++ b/internal/controller/nginx/modules/README.md
@@ -22,6 +22,7 @@ dependencies.
 
 - [httpmatches](./src/httpmatches.js): a location handler for HTTP requests. It redirects requests to an internal
   location block based on the request's headers, arguments, and method.
+- [epp](./src/epp.js): handles communication with the EndpointPicker (EPP) component. This is for acquiring a specific AI endpoint to route client traffic to when using the Gateway API Inference Extension.
 
 ### Helpful Resources for Module Development
 
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
new file mode 100644
index 0000000000..8efcd70ece
--- /dev/null
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -0,0 +1,29 @@
+// This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP).
+
+// TODO (sberman): this module will need to be enhanced to include the following:
+// - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP)
+// - if a user has specified an Exact matching condition for a model name, extract the model name from
+// the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header
+// (based on if we do a redirect or traffic split (see design doc)) in the subrequest. If the client request
+// already has this header set, then I don't think we need to extract the model from the body, just pass
+// through the existing header.
+// I believe we have to use js_content to call the NJS functionality. Because this takes over
+// the request, we will likely have to finish the NJS functionality with an internalRedirect to an internal
+// location that proxy_passes to the chosen endpoint.
+
+// extractModel extracts the model name from the request body.
+function extractModel(r) {
+	try {
+		var body = JSON.parse(r.requestText);
+		if (body && body.model !== undefined) {
+			return String(body.model);
+		}
+	} catch (e) {
+		r.error(`error parsing request body for model name: ${e.message}`);
+		return '';
+	}
+	r.error('request body does not contain model parameter');
+	return '';
+}
+
+export default { extractModel };
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
new file mode 100644
index 0000000000..6994423e7a
--- /dev/null
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -0,0 +1,52 @@
+import { default as epp } from '../src/epp.js';
+import { expect, describe, it } from 'vitest';
+
+function makeRequest(body) {
+	let r = {
+		// Test mocks
+		error(msg) {
+			r.variables.error = msg;
+		},
+		requestText: body,
+		variables: {},
+	};
+
+	return r;
+}
+
+describe('extractModel', () => {
+	const tests = [
+		{
+			name: 'returns the model value',
+			body: '{"model":"gpt-4"}',
+			model: 'gpt-4',
+			error: undefined,
+		},
+		{
+			name: 'returns empty string if model is missing',
+			body: '{"foo":1}',
+			model: '',
+			error: 'request body does not contain model parameter',
+		},
+		{
+			name: 'returns empty string for invalid JSON',
+			body: 'not-json',
+			model: '',
+			error: `error parsing request body for model name: Unexpected token 'o', "not-json" is not valid JSON`,
+		},
+		{
+			name: 'empty request body',
+			body: '',
+			model: '',
+			error: 'error parsing request body for model name: Unexpected end of JSON input',
+		},
+	];
+
+	tests.forEach((test) => {
+		it(test.name, () => {
+			let r = makeRequest(test.body);
+			expect(epp.extractModel(r)).to.equal(test.model);
+			expect(r.variables.error).to.equal(test.error);
+		});
+	});
+});

From b011b0de40e6efccfe891e8526a6c2a51e376e4a Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Tue, 16 Sep 2025 12:07:30 -0600
Subject: [PATCH 02/12] Watch InferencePools and configure nginx (#3894)

This commit adds support for the control plane to watch InferencePools. A feature flag has been added to enable/disable processing these resources. By default, it is disabled.

When an HTTPRoute references an InferencePool, we will create a headless Service associated with that InferencePool, and reference it internally in the graph config for that Route. This allows us to use all of our existing logic to get the endpoints and build the proper nginx config for those endpoints.

In a future commit, the nginx config will be updated to handle the proper load balancing for the AI workloads, but for now we just use our default methods by proxy_passing to the upstream.
---
 Makefile                                      |   8 +
 charts/nginx-gateway-fabric/README.md         |   3 +-
 .../templates/clusterrole.yaml                |  16 +
 .../templates/deployment.yaml                 |   3 +
 .../nginx-gateway-fabric/values.schema.json   |  14 +
 charts/nginx-gateway-fabric/values.yaml       |   4 +
 cmd/gateway/commands.go                       |  11 +
 .../inference-extension/kustomization.yaml    |   4 +
 deploy/inference-nginx-plus/deploy.yaml       | 441 ++++++++++++++++++
 deploy/inference/deploy.yaml                  | 438 +++++++++++++++++
 .../helm/inference-nginx-plus/values.yaml     |  10 +
 examples/helm/inference/values.yaml           |   4 +
 go.mod                                        |   3 +-
 go.sum                                        |  14 +-
 internal/controller/config/config.go          |   2 +
 internal/controller/handler.go                | 126 ++++-
 internal/controller/handler_test.go           | 265 +++++++++++
 internal/controller/manager.go                |  19 +
 internal/controller/manager_test.go           |  41 +-
 internal/controller/nginx/modules/src/epp.js  |   2 +-
 internal/controller/state/change_processor.go |   7 +
 .../controller/state/change_processor_test.go |   2 +
 .../controller/state/graph/backend_refs.go    | 152 +++++-
 .../state/graph/backend_refs_test.go          | 363 +++++++++++++-
 internal/controller/state/graph/graph.go      |  17 +-
 internal/controller/state/graph/graph_test.go | 168 ++++++-
 .../controller/state/graph/grpcroute_test.go  |   1 +
 internal/controller/state/graph/httproute.go  |  73 ++-
 .../controller/state/graph/httproute_test.go  | 115 ++++-
 .../controller/state/graph/inferencepools.go  |  82 ++++
 .../state/graph/inferencepools_test.go        | 249 ++++++++++
 .../controller/state/graph/reference_grant.go |  12 +-
 .../state/graph/reference_grant_test.go       |  52 ++-
 .../controller/state/graph/route_common.go    |  15 +-
 internal/controller/state/graph/service.go    |   1 -
 internal/framework/controller/resource.go     |  21 +
 internal/framework/kinds/kinds.go             |   6 +
 tests/go.mod                                  |   2 +-
 tests/go.sum                                  |   4 +-
 39 files changed, 2700 insertions(+), 70 deletions(-)
 create mode 100644 config/crd/inference-extension/kustomization.yaml
 create mode 100644 deploy/inference-nginx-plus/deploy.yaml
 create mode 100644 deploy/inference/deploy.yaml
 create mode 100644 examples/helm/inference-nginx-plus/values.yaml
 create mode 100644 examples/helm/inference/values.yaml
 create mode 100644 internal/controller/state/graph/inferencepools.go
 create mode 100644 internal/controller/state/graph/inferencepools_test.go

diff --git a/Makefile b/Makefile
index 65e4d8aee0..16767f946e 100644
--- a/Makefile
+++ b/Makefile
@@ -136,6 +136,14 @@ install-gateway-crds: ## Install Gateway API CRDs
 uninstall-gateway-crds: ## Uninstall Gateway API CRDs
 	kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f -
 
+.PHONY: install-inference-crds
+install-inference-crds: ## Install Gateway API Inference Extension CRDs
+	kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f -
+
+.PHONY: uninstall-inference-crds
+uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs
+	kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f -
+
 .PHONY: generate-manifests
 generate-manifests: ## Generate manifests using Helm.
 	./scripts/generate-manifests.sh
diff --git a/charts/nginx-gateway-fabric/README.md b/charts/nginx-gateway-fabric/README.md
index eb7f3ce114..2c55f514f8 100644
--- a/charts/nginx-gateway-fabric/README.md
+++ b/charts/nginx-gateway-fabric/README.md
@@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
 | `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` |
 | `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` |
 | `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` |
-| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
+| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
 | `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` |
 | `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` |
 | `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` |
@@ -257,6 +257,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
 | `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` |
 | `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` |
 | `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` |
+| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` |
 | `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` |
 | `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` |
 | `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` |
diff --git a/charts/nginx-gateway-fabric/templates/clusterrole.yaml b/charts/nginx-gateway-fabric/templates/clusterrole.yaml
index 8fc4da400e..9be339c04a 100644
--- a/charts/nginx-gateway-fabric/templates/clusterrole.yaml
+++ b/charts/nginx-gateway-fabric/templates/clusterrole.yaml
@@ -129,6 +129,22 @@ rules:
   {{- end }}
   verbs:
   - update
+{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+{{- end }}
 {{- if .Values.nginxGateway.leaderElection.enable }}
 - apiGroups:
   - coordination.k8s.io
diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml
index 5bc292bdb4..604acd768c 100644
--- a/charts/nginx-gateway-fabric/templates/deployment.yaml
+++ b/charts/nginx-gateway-fabric/templates/deployment.yaml
@@ -100,6 +100,9 @@ spec:
         {{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }}
         - --gateway-api-experimental-features
         {{- end }}
+        {{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
+        - --gateway-api-inference-extension
+        {{- end }}
         {{- if .Values.nginxGateway.snippetsFilters.enable }}
         - --snippets-filters
         {{- end }}
diff --git a/charts/nginx-gateway-fabric/values.schema.json b/charts/nginx-gateway-fabric/values.schema.json
index 9f44991db3..c1456d2503 100644
--- a/charts/nginx-gateway-fabric/values.schema.json
+++ b/charts/nginx-gateway-fabric/values.schema.json
@@ -838,6 +838,20 @@
           "title": "gwAPIExperimentalFeatures",
           "type": "object"
         },
+        "gwAPIInferenceExtension": {
+          "properties": {
+            "enable": {
+              "default": false,
+              "description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.",
+              "required": [],
+              "title": "enable",
+              "type": "boolean"
+            }
+          },
+          "required": [],
+          "title": "gwAPIInferenceExtension",
+          "type": "object"
+        },
         "image": {
           "description": "The image configuration for the NGINX Gateway Fabric control plane.",
           "properties": {
diff --git a/charts/nginx-gateway-fabric/values.yaml b/charts/nginx-gateway-fabric/values.yaml
index 52f1e03e55..4e3747a9d1 100644
--- a/charts/nginx-gateway-fabric/values.yaml
+++ b/charts/nginx-gateway-fabric/values.yaml
@@ -210,6 +210,10 @@ nginxGateway:
     # APIs installed from the experimental channel.
     enable: false
 
+  gwAPIInferenceExtension:
+    # -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.
+    enable: false
+
   snippetsFilters:
     # -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX
     # config for HTTPRoute and GRPCRoute resources.
diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
index f334d499a7..012b27de6b 100644
--- a/cmd/gateway/commands.go
+++ b/cmd/gateway/commands.go
@@ -85,6 +85,7 @@ func createControllerCommand() *cobra.Command {
 		leaderElectionLockNameFlag          = "leader-election-lock-name"
 		productTelemetryDisableFlag         = "product-telemetry-disable"
 		gwAPIExperimentalFlag               = "gateway-api-experimental-features"
+		gwAPIInferenceExtensionFlag         = "gateway-api-inference-extension"
 		nginxDockerSecretFlag               = "nginx-docker-secret" //nolint:gosec // not credentials
 		usageReportSecretFlag               = "usage-report-secret"
 		usageReportEndpointFlag             = "usage-report-endpoint"
@@ -151,6 +152,7 @@ func createControllerCommand() *cobra.Command {
 		}
 
 		gwExperimentalFeatures bool
+		gwInferenceExtension   bool
 
 		disableProductTelemetry bool
 
@@ -270,6 +272,7 @@ func createControllerCommand() *cobra.Command {
 				},
 				Plus:                 plus,
 				ExperimentalFeatures: gwExperimentalFeatures,
+				InferenceExtension:   gwInferenceExtension,
 				ImageSource:          imageSource,
 				Flags: config.Flags{
 					Names:  flagKeys,
@@ -430,6 +433,14 @@ func createControllerCommand() *cobra.Command {
 			"Requires the Gateway APIs installed from the experimental channel.",
 	)
 
+	cmd.Flags().BoolVar(
+		&gwInferenceExtension,
+		gwAPIInferenceExtensionFlag,
+		false,
+		"Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+
+			"traffic to AI workloads.",
+	)
+
 	cmd.Flags().Var(
 		&nginxDockerSecrets,
 		nginxDockerSecretFlag,
diff --git a/config/crd/inference-extension/kustomization.yaml b/config/crd/inference-extension/kustomization.yaml
new file mode 100644
index 0000000000..6b6e210cf5
--- /dev/null
+++ b/config/crd/inference-extension/kustomization.yaml
@@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?timeout=120&ref=v1.0.0
diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml
new file mode 100644
index 0000000000..77ee4da544
--- /dev/null
+++ b/deploy/inference-nginx-plus/deploy.yaml
@@ -0,0 +1,441 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nginx-gateway
+---
+apiVersion: v1
+automountServiceAccountToken: false
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - secrets
+  verbs:
+  - create
+  - update
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  - apps
+  - autoscaling
+  resources:
+  - secrets
+  - configmaps
+  - serviceaccounts
+  - services
+  - deployments
+  - daemonsets
+  - horizontalpodautoscalers
+  verbs:
+  - create
+  - update
+  - delete
+  - list
+  - get
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - namespaces
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - replicasets
+  verbs:
+  - get
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
+- apiGroups:
+  - discovery.k8s.io
+  resources:
+  - endpointslices
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - gatewayclasses
+  - gateways
+  - httproutes
+  - referencegrants
+  - grpcroutes
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - httproutes/status
+  - gateways/status
+  - gatewayclasses/status
+  - grpcroutes/status
+  verbs:
+  - update
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxproxies
+  - clientsettingspolicies
+  - observabilitypolicies
+  - upstreamsettingspolicies
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways/status
+  - clientsettingspolicies/status
+  - observabilitypolicies/status
+  - upstreamsettingspolicies/status
+  verbs:
+  - update
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - create
+  - get
+  - update
+- apiGroups:
+  - apiextensions.k8s.io
+  resources:
+  - customresourcedefinitions
+  verbs:
+  - list
+  - watch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: nginx-gateway-cert-generator
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: nginx-gateway
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  ports:
+  - name: agent-grpc
+    port: 443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: nginx-gateway
+      app.kubernetes.io/name: nginx-gateway
+  template:
+    metadata:
+      annotations:
+        prometheus.io/port: "9113"
+        prometheus.io/scrape: "true"
+      labels:
+        app.kubernetes.io/instance: nginx-gateway
+        app.kubernetes.io/name: nginx-gateway
+    spec:
+      automountServiceAccountToken: true
+      containers:
+      - args:
+        - controller
+        - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller
+        - --gatewayclass=nginx
+        - --config=nginx-gateway-config
+        - --service=nginx-gateway
+        - --agent-tls-secret=agent-tls
+        - --nginx-docker-secret=nginx-plus-registry-secret
+        - --nginx-plus
+        - --usage-report-secret=nplus-license
+        - --metrics-port=9113
+        - --health-port=8081
+        - --leader-election-lock-name=nginx-gateway-leader-election
+        - --gateway-api-inference-extension
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_UID
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.uid
+        - name: INSTANCE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.labels['app.kubernetes.io/instance']
+        - name: IMAGE_NAME
+          value: ghcr.io/nginx/nginx-gateway-fabric:edge
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: nginx-gateway
+        ports:
+        - containerPort: 8443
+          name: agent-grpc
+        - containerPort: 9113
+          name: metrics
+        - containerPort: 8081
+          name: health
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 3
+          periodSeconds: 1
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+        volumeMounts:
+        - mountPath: /var/run/secrets/ngf
+          name: nginx-agent-tls
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: nginx-agent-tls
+        secret:
+          secretName: server-tls
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+spec:
+  template:
+    metadata:
+      annotations: null
+    spec:
+      containers:
+      - args:
+        - generate-certs
+        - --service=nginx-gateway
+        - --cluster-domain=cluster.local
+        - --server-tls-secret=server-tls
+        - --agent-tls-secret=agent-tls
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: cert-generator
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+      restartPolicy: Never
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway-cert-generator
+  ttlSecondsAfterFinished: 30
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx
+spec:
+  controllerName: gateway.nginx.org/nginx-gateway-controller
+  parametersRef:
+    group: gateway.nginx.org
+    kind: NginxProxy
+    name: nginx-gateway-proxy-config
+    namespace: nginx-gateway
+---
+apiVersion: gateway.nginx.org/v1alpha1
+kind: NginxGateway
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-config
+  namespace: nginx-gateway
+spec:
+  logging:
+    level: info
+---
+apiVersion: gateway.nginx.org/v1alpha2
+kind: NginxProxy
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-proxy-config
+  namespace: nginx-gateway
+spec:
+  kubernetes:
+    deployment:
+      container:
+        image:
+          pullPolicy: Always
+          repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus
+          tag: edge
+      replicas: 1
+    service:
+      externalTrafficPolicy: Local
+      type: LoadBalancer
diff --git a/deploy/inference/deploy.yaml b/deploy/inference/deploy.yaml
new file mode 100644
index 0000000000..49a8f85053
--- /dev/null
+++ b/deploy/inference/deploy.yaml
@@ -0,0 +1,438 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nginx-gateway
+---
+apiVersion: v1
+automountServiceAccountToken: false
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - secrets
+  verbs:
+  - create
+  - update
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  - apps
+  - autoscaling
+  resources:
+  - secrets
+  - configmaps
+  - serviceaccounts
+  - services
+  - deployments
+  - daemonsets
+  - horizontalpodautoscalers
+  verbs:
+  - create
+  - update
+  - delete
+  - list
+  - get
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - namespaces
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - replicasets
+  verbs:
+  - get
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
+- apiGroups:
+  - discovery.k8s.io
+  resources:
+  - endpointslices
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - gatewayclasses
+  - gateways
+  - httproutes
+  - referencegrants
+  - grpcroutes
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - httproutes/status
+  - gateways/status
+  - gatewayclasses/status
+  - grpcroutes/status
+  verbs:
+  - update
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxproxies
+  - clientsettingspolicies
+  - observabilitypolicies
+  - upstreamsettingspolicies
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways/status
+  - clientsettingspolicies/status
+  - observabilitypolicies/status
+  - upstreamsettingspolicies/status
+  verbs:
+  - update
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - create
+  - get
+  - update
+- apiGroups:
+  - apiextensions.k8s.io
+  resources:
+  - customresourcedefinitions
+  verbs:
+  - list
+  - watch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: nginx-gateway-cert-generator
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: nginx-gateway
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  ports:
+  - name: agent-grpc
+    port: 443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: nginx-gateway
+      app.kubernetes.io/name: nginx-gateway
+  template:
+    metadata:
+      annotations:
+        prometheus.io/port: "9113"
+        prometheus.io/scrape: "true"
+      labels:
+        app.kubernetes.io/instance: nginx-gateway
+        app.kubernetes.io/name: nginx-gateway
+    spec:
+      automountServiceAccountToken: true
+      containers:
+      - args:
+        - controller
+        - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller
+        - --gatewayclass=nginx
+        - --config=nginx-gateway-config
+        - --service=nginx-gateway
+        - --agent-tls-secret=agent-tls
+        - --metrics-port=9113
+        - --health-port=8081
+        - --leader-election-lock-name=nginx-gateway-leader-election
+        - --gateway-api-inference-extension
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_UID
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.uid
+        - name: INSTANCE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.labels['app.kubernetes.io/instance']
+        - name: IMAGE_NAME
+          value: ghcr.io/nginx/nginx-gateway-fabric:edge
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: nginx-gateway
+        ports:
+        - containerPort: 8443
+          name: agent-grpc
+        - containerPort: 9113
+          name: metrics
+        - containerPort: 8081
+          name: health
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 3
+          periodSeconds: 1
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+        volumeMounts:
+        - mountPath: /var/run/secrets/ngf
+          name: nginx-agent-tls
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: nginx-agent-tls
+        secret:
+          secretName: server-tls
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+spec:
+  template:
+    metadata:
+      annotations: null
+    spec:
+      containers:
+      - args:
+        - generate-certs
+        - --service=nginx-gateway
+        - --cluster-domain=cluster.local
+        - --server-tls-secret=server-tls
+        - --agent-tls-secret=agent-tls
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: cert-generator
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+      restartPolicy: Never
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway-cert-generator
+  ttlSecondsAfterFinished: 30
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx
+spec:
+  controllerName: gateway.nginx.org/nginx-gateway-controller
+  parametersRef:
+    group: gateway.nginx.org
+    kind: NginxProxy
+    name: nginx-gateway-proxy-config
+    namespace: nginx-gateway
+---
+apiVersion: gateway.nginx.org/v1alpha1
+kind: NginxGateway
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-config
+  namespace: nginx-gateway
+spec:
+  logging:
+    level: info
+---
+apiVersion: gateway.nginx.org/v1alpha2
+kind: NginxProxy
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-proxy-config
+  namespace: nginx-gateway
+spec:
+  kubernetes:
+    deployment:
+      container:
+        image:
+          pullPolicy: Always
+          repository: ghcr.io/nginx/nginx-gateway-fabric/nginx
+          tag: edge
+      replicas: 1
+    service:
+      externalTrafficPolicy: Local
+      type: LoadBalancer
diff --git a/examples/helm/inference-nginx-plus/values.yaml b/examples/helm/inference-nginx-plus/values.yaml
new file mode 100644
index 0000000000..1d89293db2
--- /dev/null
+++ b/examples/helm/inference-nginx-plus/values.yaml
@@ -0,0 +1,10 @@
+nginxGateway:
+  name: nginx-gateway
+  gwAPIInferenceExtension:
+    enable: true
+
+nginx:
+  plus: true
+  image:
+    repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus
+  imagePullSecret: nginx-plus-registry-secret
diff --git a/examples/helm/inference/values.yaml b/examples/helm/inference/values.yaml
new file mode 100644
index 0000000000..0bb54b57e9
--- /dev/null
+++ b/examples/helm/inference/values.yaml
@@ -0,0 +1,4 @@
+nginxGateway:
+  name: nginx-gateway
+  gwAPIInferenceExtension:
+    enable: true
diff --git a/go.mod b/go.mod
index 0b35914cbb..f79dd3b882 100644
--- a/go.mod
+++ b/go.mod
@@ -28,6 +28,7 @@ require (
 	k8s.io/klog/v2 v2.130.1
 	sigs.k8s.io/controller-runtime v0.22.1
 	sigs.k8s.io/gateway-api v1.3.0
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0
 )
 
 require (
@@ -80,7 +81,7 @@ require (
 	golang.org/x/sync v0.17.0 // indirect
 	golang.org/x/sys v0.35.0 // indirect
 	golang.org/x/term v0.34.0 // indirect
-	golang.org/x/time v0.9.0 // indirect
+	golang.org/x/time v0.12.0 // indirect
 	golang.org/x/tools v0.36.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
diff --git a/go.sum b/go.sum
index b8716c1de8..b81ec9aeb5 100644
--- a/go.sum
+++ b/go.sum
@@ -66,8 +66,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
 github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
-github.com/goccy/go-yaml v1.17.1 h1:LI34wktB2xEE3ONG/2Ar54+/HJVBriAGJ55PHls4YuY=
-github.com/goccy/go-yaml v1.17.1/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
+github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
+github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
@@ -218,8 +218,8 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
 go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
 go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
@@ -283,8 +283,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
 golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
-golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
-golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
@@ -336,6 +336,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV
 sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
 sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
 sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
diff --git a/internal/controller/config/config.go b/internal/controller/config/config.go
index e23f73ca59..d1e77df07b 100644
--- a/internal/controller/config/config.go
+++ b/internal/controller/config/config.go
@@ -48,6 +48,8 @@ type Config struct {
 	Plus bool
 	// ExperimentalFeatures indicates if experimental features are enabled.
 	ExperimentalFeatures bool
+	// InferenceExtension indicates if Gateway API Inference Extension support is enabled.
+	InferenceExtension bool
 	// SnippetsFilters indicates if SnippetsFilters are enabled.
 	SnippetsFilters bool
 }
diff --git a/internal/controller/handler.go b/internal/controller/handler.go
index f79e9dc268..2108739a55 100644
--- a/internal/controller/handler.go
+++ b/internal/controller/handler.go
@@ -13,9 +13,11 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/intstr"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/tools/record"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1"
@@ -79,6 +81,8 @@ type eventHandlerConfig struct {
 	controlConfigNSName types.NamespacedName
 	// gatewayCtlrName is the name of the NGF controller.
 	gatewayCtlrName string
+	// gatewayInstanceName is the name of the NGINX Gateway instance.
+	gatewayInstanceName string
 	// gatewayClassName is the name of the GatewayClass.
 	gatewayClassName string
 	// plus is whether or not we are running NGINX Plus.
@@ -116,8 +120,10 @@ type eventHandlerImpl struct {
 	// objectFilters contains all created objectFilters, with the key being a filterKey
 	objectFilters map[filterKey]objectFilter
 
-	cfg  eventHandlerConfig
-	lock sync.Mutex
+	cfg        eventHandlerConfig
+	lock       sync.RWMutex
+	leaderLock sync.RWMutex
+	leader     bool
 }
 
 // newEventHandlerImpl creates a new eventHandlerImpl.
@@ -170,6 +176,10 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log
 // enable is called when the pod becomes leader to ensure the provisioner has
 // the latest configuration.
 func (h *eventHandlerImpl) enable(ctx context.Context) {
+	h.leaderLock.Lock()
+	h.leader = true
+	h.leaderLock.Unlock()
+
 	h.sendNginxConfig(ctx, h.cfg.logger, h.cfg.processor.GetLatestGraph())
 }
 
@@ -187,6 +197,9 @@ func (h *eventHandlerImpl) sendNginxConfig(ctx context.Context, logger logr.Logg
 		return
 	}
 
+	// ensure headless "shadow" Services are created for any referenced InferencePools
+	h.ensureInferencePoolServices(ctx, gr.ReferencedInferencePools)
+
 	for _, gw := range gr.Gateways {
 		go func() {
 			if err := h.cfg.nginxProvisioner.RegisterGateway(ctx, gw, gw.DeploymentName.Name); err != nil {
@@ -547,8 +560,8 @@ func (h *eventHandlerImpl) getDeploymentContext(ctx context.Context) (dataplane.
 
 // GetLatestConfiguration gets the latest configuration.
 func (h *eventHandlerImpl) GetLatestConfiguration() []*dataplane.Configuration {
-	h.lock.Lock()
-	defer h.lock.Unlock()
+	h.lock.RLock()
+	defer h.lock.RUnlock()
 
 	configs := make([]*dataplane.Configuration, 0, len(h.latestConfigurations))
 	for _, cfg := range h.latestConfigurations {
@@ -574,6 +587,111 @@ func objectFilterKey(obj client.Object, nsName types.NamespacedName) filterKey {
 	return filterKey(fmt.Sprintf("%T_%s_%s", obj, nsName.Namespace, nsName.Name))
 }
 
+// ensureInferencePoolServices ensures a headless Service exists and is up to date for each InferencePool.
+func (h *eventHandlerImpl) ensureInferencePoolServices(
+	ctx context.Context,
+	pools map[types.NamespacedName]*graph.ReferencedInferencePool,
+) {
+	if !h.isLeader() {
+		return
+	}
+
+	for _, pool := range pools {
+		if pool.Source == nil {
+			continue
+		}
+
+		selectors := make(map[string]string)
+		for k, v := range pool.Source.Spec.Selector.MatchLabels {
+			selectors[string(k)] = string(v)
+		}
+
+		// v1 of InferencePool only supports a single port right now
+		ports := []v1.ServicePort{
+			{
+				Port:       int32(pool.Source.Spec.TargetPorts[0].Number),
+				TargetPort: intstr.FromInt32(int32(pool.Source.Spec.TargetPorts[0].Number)),
+			},
+		}
+
+		labels := map[string]string{
+			controller.AppManagedByLabel: controller.CreateNginxResourceName(
+				h.cfg.gatewayInstanceName,
+				h.cfg.gatewayClassName,
+			),
+		}
+
+		svc := &v1.Service{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      controller.CreateInferencePoolServiceName(pool.Source.Name),
+				Namespace: pool.Source.Namespace,
+				Labels:    labels,
+				OwnerReferences: []metav1.OwnerReference{
+					{
+						APIVersion: pool.Source.APIVersion,
+						Kind:       pool.Source.Kind,
+						Name:       pool.Source.Name,
+						UID:        pool.Source.UID,
+					},
+				},
+			},
+			Spec: v1.ServiceSpec{
+				ClusterIP: v1.ClusterIPNone, // headless
+				Selector:  selectors,
+				Ports:     ports,
+			},
+		}
+
+		svcCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
+		res, err := controllerutil.CreateOrUpdate(
+			svcCtx,
+			h.cfg.k8sClient,
+			svc,
+			serviceSpecSetter(svc, svc.Spec, svc.ObjectMeta),
+		)
+		if err != nil {
+			cancel()
+			msg := "Failed to upsert headless Service for InferencePool"
+			h.cfg.logger.Error(err, msg, "Service", svc.Name, "InferencePool", pool.Source.Name)
+			h.cfg.eventRecorder.Eventf(
+				svc,
+				v1.EventTypeWarning,
+				"ServiceCreateOrUpdateFailed",
+				"%s %q: %v", msg, pool.Source.Name, err,
+			)
+			continue
+		}
+		cancel()
+
+		if res == controllerutil.OperationResultCreated || res == controllerutil.OperationResultUpdated {
+			h.cfg.logger.Info(
+				fmt.Sprintf("Successfully %s headless Service for InferencePool", res),
+				"Service", svc.Name, "InferencePool", pool.Source.Name,
+			)
+		}
+	}
+}
+
+func serviceSpecSetter(
+	service *v1.Service,
+	spec v1.ServiceSpec,
+	objectMeta metav1.ObjectMeta,
+) controllerutil.MutateFn {
+	return func() error {
+		service.Labels = objectMeta.Labels
+		service.Spec = spec
+		return nil
+	}
+}
+
+// isLeader returns whether or not this handler is the leader.
+func (h *eventHandlerImpl) isLeader() bool {
+	h.leaderLock.RLock()
+	defer h.leaderLock.RUnlock()
+
+	return h.leader
+}
+
 /*
 
 Handler Callback functions
diff --git a/internal/controller/handler_test.go b/internal/controller/handler_test.go
index ec9fe05848..4c23a12e55 100644
--- a/internal/controller/handler_test.go
+++ b/internal/controller/handler_test.go
@@ -12,11 +12,13 @@ import (
 	"go.uber.org/zap"
 	v1 "k8s.io/api/core/v1"
 	discoveryV1 "k8s.io/api/discovery/v1"
+	apiErrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1"
@@ -149,6 +151,7 @@ var _ = Describe("eventHandler", func() {
 			metricsCollector: collectors.NewControllerNoopCollector(),
 		})
 		Expect(handler.cfg.graphBuiltHealthChecker.ready).To(BeFalse())
+		handler.leader = true
 	})
 
 	AfterEach(func() {
@@ -518,6 +521,115 @@ var _ = Describe("eventHandler", func() {
 		Expect(handler.cfg.graphBuiltHealthChecker.readyCheck(nil)).To(Succeed())
 	})
 
+	It("should create a headless Service for each referenced InferencePool", func() {
+		namespace := "test-ns"
+		poolName1 := "pool1"
+		poolName2 := "pool2"
+		poolUID1 := types.UID("uid1")
+		poolUID2 := types.UID("uid2")
+
+		pool1 := &inference.InferencePool{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      poolName1,
+				Namespace: namespace,
+				UID:       poolUID1,
+			},
+			Spec: inference.InferencePoolSpec{
+				Selector: inference.LabelSelector{
+					MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+				},
+				TargetPorts: []inference.Port{
+					{Number: 8081},
+				},
+			},
+		}
+
+		g := &graph.Graph{
+			Gateways: map[types.NamespacedName]*graph.Gateway{
+				{}: {
+					Source: &gatewayv1.Gateway{
+						ObjectMeta: metav1.ObjectMeta{
+							Namespace: "test",
+							Name:      "gateway",
+						},
+					},
+					Valid: true,
+				},
+			},
+			ReferencedInferencePools: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: namespace, Name: poolName1}: {Source: pool1},
+				{Namespace: namespace, Name: poolName2}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:      poolName2,
+							Namespace: namespace,
+							UID:       poolUID2,
+						},
+						Spec: inference.InferencePoolSpec{
+							Selector: inference.LabelSelector{
+								MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "bar"},
+							},
+							TargetPorts: []inference.Port{
+								{Number: 9090},
+							},
+						},
+					},
+				},
+			},
+		}
+
+		fakeProcessor.ProcessReturns(g)
+
+		e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}}
+		batch := []any{e}
+
+		handler.HandleEventBatch(context.Background(), logr.Discard(), batch)
+
+		// Check Service for pool1
+		svc1 := &v1.Service{}
+		svcName1 := controller.CreateInferencePoolServiceName(poolName1)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc1.Spec.ClusterIP).To(Equal(v1.ClusterIPNone))
+		Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "foo"))
+		Expect(svc1.Spec.Ports).To(HaveLen(1))
+		Expect(svc1.Spec.Ports[0].Port).To(Equal(int32(8081)))
+		Expect(svc1.OwnerReferences).To(HaveLen(1))
+		Expect(svc1.OwnerReferences[0].Name).To(Equal(poolName1))
+		Expect(svc1.OwnerReferences[0].UID).To(Equal(poolUID1))
+
+		// Check Service for pool2
+		svc2 := &v1.Service{}
+		svcName2 := controller.CreateInferencePoolServiceName(poolName2)
+		err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName2, Namespace: namespace}, svc2)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc2.Spec.ClusterIP).To(Equal(v1.ClusterIPNone))
+		Expect(svc2.Spec.Selector).To(HaveKeyWithValue("app", "bar"))
+		Expect(svc2.Spec.Ports).To(HaveLen(1))
+		Expect(svc2.Spec.Ports[0].Port).To(Equal(int32(9090)))
+		Expect(svc2.OwnerReferences).To(HaveLen(1))
+		Expect(svc2.OwnerReferences[0].Name).To(Equal(poolName2))
+		Expect(svc2.OwnerReferences[0].UID).To(Equal(poolUID2))
+
+		// Now update pool1's selector and ensure the Service selector is updated
+		updatedSelector := map[inference.LabelKey]inference.LabelValue{"app": "baz"}
+		pool1.Spec.Selector.MatchLabels = updatedSelector
+
+		// Simulate the updated pool in the graph
+		g.ReferencedInferencePools[types.NamespacedName{Namespace: namespace, Name: poolName1}].Source = pool1
+		fakeProcessor.ProcessReturns(g)
+
+		e = &events.UpsertEvent{Resource: &inference.InferencePool{}}
+		batch = []any{e}
+		handler.HandleEventBatch(context.Background(), logr.Discard(), batch)
+
+		// Check that the Service selector was updated
+		svc1 = &v1.Service{}
+		err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "baz"))
+	})
+
 	It("should panic for an unknown event type", func() {
 		e := &struct{}{}
 
@@ -688,3 +800,156 @@ var _ = Describe("getDeploymentContext", func() {
 		})
 	})
 })
+
+var _ = Describe("ensureInferencePoolServices", func() {
+	var (
+		handler           *eventHandlerImpl
+		fakeK8sClient     client.Client
+		fakeEventRecorder *record.FakeRecorder
+		namespace         = "test-ns"
+		poolName          = "my-inference-pool"
+		poolUID           = types.UID("pool-uid")
+	)
+
+	BeforeEach(func() {
+		fakeK8sClient = fake.NewFakeClient()
+		fakeEventRecorder = record.NewFakeRecorder(1)
+		handler = newEventHandlerImpl(eventHandlerConfig{
+			ctx:           context.Background(),
+			k8sClient:     fakeK8sClient,
+			statusQueue:   status.NewQueue(),
+			eventRecorder: fakeEventRecorder,
+			logger:        logr.Discard(),
+		})
+		// Set as leader so ensureInferencePoolServices will run
+		handler.leader = true
+	})
+
+	It("creates a headless Service for a referenced InferencePool", func() {
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: &inference.InferencePool{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      poolName,
+						Namespace: namespace,
+						UID:       poolUID,
+					},
+					Spec: inference.InferencePoolSpec{
+						Selector: inference.LabelSelector{
+							MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+						},
+						TargetPorts: []inference.Port{
+							{Number: 8080},
+						},
+					},
+				},
+			},
+		}
+
+		handler.ensureInferencePoolServices(context.Background(), pools)
+
+		// The Service should have been created
+		svc := &v1.Service{}
+		svcName := controller.CreateInferencePoolServiceName(poolName)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc.Spec.ClusterIP).To(Equal(v1.ClusterIPNone))
+		Expect(svc.Spec.Selector).To(HaveKeyWithValue("app", "foo"))
+		Expect(svc.Spec.Ports).To(HaveLen(1))
+		Expect(svc.Spec.Ports[0].Port).To(Equal(int32(8080)))
+		Expect(svc.OwnerReferences).To(HaveLen(1))
+		Expect(svc.OwnerReferences[0].Name).To(Equal(poolName))
+		Expect(svc.OwnerReferences[0].UID).To(Equal(poolUID))
+	})
+
+	It("does nothing if not leader", func() {
+		handler.leader = false
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: &inference.InferencePool{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      poolName,
+						Namespace: namespace,
+						UID:       poolUID,
+					},
+					Spec: inference.InferencePoolSpec{
+						Selector: inference.LabelSelector{
+							MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+						},
+						TargetPorts: []inference.Port{
+							{Number: 8080},
+						},
+					},
+				},
+			},
+		}
+
+		handler.ensureInferencePoolServices(context.Background(), pools)
+		svc := &v1.Service{}
+		svcName := controller.CreateInferencePoolServiceName(poolName)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc)
+		Expect(err).To(HaveOccurred())
+	})
+
+	It("skips pools with nil Source", func() {
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: nil,
+			},
+		}
+		handler.ensureInferencePoolServices(context.Background(), pools)
+		// Should not panic or create anything
+		svc := &v1.Service{}
+		svcName := controller.CreateInferencePoolServiceName(poolName)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc)
+		Expect(err).To(HaveOccurred())
+	})
+
+	It("emits an event if Service creation fails", func() {
+		// Use a client that will fail on CreateOrUpdate
+		handler.cfg.k8sClient = &badFakeClient{}
+		handler.leader = true
+
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: &inference.InferencePool{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      poolName,
+						Namespace: namespace,
+						UID:       poolUID,
+					},
+					Spec: inference.InferencePoolSpec{
+						Selector: inference.LabelSelector{
+							MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+						},
+						TargetPorts: []inference.Port{
+							{Number: 8080},
+						},
+					},
+				},
+			},
+		}
+
+		handler.ensureInferencePoolServices(context.Background(), pools)
+		Eventually(func() int { return len(fakeEventRecorder.Events) }).Should(BeNumerically(">=", 1))
+		event := <-fakeEventRecorder.Events
+		Expect(event).To(ContainSubstring("ServiceCreateOrUpdateFailed"))
+	})
+})
+
+// badFakeClient always returns an error on Create or Update.
+type badFakeClient struct {
+	client.Client
+}
+
+func (*badFakeClient) Get(context.Context, client.ObjectKey, client.Object, ...client.GetOption) error {
+	return apiErrors.NewNotFound(v1.Resource("service"), "not-found")
+}
+
+func (*badFakeClient) Create(context.Context, client.Object, ...client.CreateOption) error {
+	return errors.New("create error")
+}
+
+func (*badFakeClient) Update(context.Context, client.Object, ...client.UpdateOption) error {
+	return errors.New("update error")
+}
diff --git a/internal/controller/manager.go b/internal/controller/manager.go
index a4e9fd9cf0..d02411571b 100644
--- a/internal/controller/manager.go
+++ b/internal/controller/manager.go
@@ -32,6 +32,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/metrics"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	k8spredicate "sigs.k8s.io/controller-runtime/pkg/predicate"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
 	gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -95,6 +96,7 @@ func init() {
 	utilruntime.Must(autoscalingv2.AddToScheme(scheme))
 	utilruntime.Must(authv1.AddToScheme(scheme))
 	utilruntime.Must(rbacv1.AddToScheme(scheme))
+	utilruntime.Must(inference.Install(scheme))
 }
 
 func StartManager(cfg config.Config) error {
@@ -251,6 +253,7 @@ func StartManager(cfg config.Config) error {
 		gatewayPodConfig:        cfg.GatewayPodConfig,
 		controlConfigNSName:     controlConfigNSName,
 		gatewayCtlrName:         cfg.GatewayCtlrName,
+		gatewayInstanceName:     cfg.GatewayPodConfig.InstanceName,
 		gatewayClassName:        cfg.GatewayClassName,
 		plus:                    cfg.Plus,
 		statusQueue:             statusQueue,
@@ -536,6 +539,18 @@ func registerControllers(
 		controllerRegCfgs = append(controllerRegCfgs, gwExpFeatures...)
 	}
 
+	if cfg.InferenceExtension {
+		inferenceExt := []ctlrCfg{
+			{
+				objectType: &inference.InferencePool{},
+				options: []controller.Option{
+					controller.WithK8sPredicate(k8spredicate.GenerationChangedPredicate{}),
+				},
+			},
+		}
+		controllerRegCfgs = append(controllerRegCfgs, inferenceExt...)
+	}
+
 	if cfg.ConfigName != "" {
 		controllerRegCfgs = append(controllerRegCfgs,
 			ctlrCfg{
@@ -761,6 +776,10 @@ func prepareFirstEventBatchPreparerArgs(cfg config.Config) ([]client.Object, []c
 		)
 	}
 
+	if cfg.InferenceExtension {
+		objectLists = append(objectLists, &inference.InferencePoolList{})
+	}
+
 	if cfg.SnippetsFilters {
 		objectLists = append(
 			objectLists,
diff --git a/internal/controller/manager_test.go b/internal/controller/manager_test.go
index 60d7b0e5d5..76e613a1f6 100644
--- a/internal/controller/manager_test.go
+++ b/internal/controller/manager_test.go
@@ -14,6 +14,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
 	gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -47,9 +48,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 		{
 			name: "base case",
 			cfg: config.Config{
-				GatewayClassName:     gcName,
-				ExperimentalFeatures: false,
-				SnippetsFilters:      false,
+				GatewayClassName: gcName,
 			},
 			expectedObjects: []client.Object{
 				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
@@ -75,7 +74,6 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 			cfg: config.Config{
 				GatewayClassName:     gcName,
 				ExperimentalFeatures: true,
-				SnippetsFilters:      false,
 			},
 			expectedObjects: []client.Object{
 				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
@@ -99,12 +97,37 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 				&ngfAPIv1alpha1.UpstreamSettingsPolicyList{},
 			},
 		},
+		{
+			name: "inference extension enabled",
+			cfg: config.Config{
+				GatewayClassName:   gcName,
+				InferenceExtension: true,
+			},
+			expectedObjects: []client.Object{
+				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
+			},
+			expectedObjectLists: []client.ObjectList{
+				&apiv1.ServiceList{},
+				&apiv1.SecretList{},
+				&apiv1.NamespaceList{},
+				&discoveryV1.EndpointSliceList{},
+				&gatewayv1.HTTPRouteList{},
+				&gatewayv1.GatewayList{},
+				&gatewayv1beta1.ReferenceGrantList{},
+				&ngfAPIv1alpha2.NginxProxyList{},
+				&gatewayv1.GRPCRouteList{},
+				partialObjectMetadataList,
+				&inference.InferencePoolList{},
+				&ngfAPIv1alpha1.ClientSettingsPolicyList{},
+				&ngfAPIv1alpha2.ObservabilityPolicyList{},
+				&ngfAPIv1alpha1.UpstreamSettingsPolicyList{},
+			},
+		},
 		{
 			name: "snippets filters enabled",
 			cfg: config.Config{
-				GatewayClassName:     gcName,
-				ExperimentalFeatures: false,
-				SnippetsFilters:      true,
+				GatewayClassName: gcName,
+				SnippetsFilters:  true,
 			},
 			expectedObjects: []client.Object{
 				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
@@ -127,10 +150,11 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 			},
 		},
 		{
-			name: "experimental and snippets filters enabled",
+			name: "experimental, inference, and snippets filters enabled",
 			cfg: config.Config{
 				GatewayClassName:     gcName,
 				ExperimentalFeatures: true,
+				InferenceExtension:   true,
 				SnippetsFilters:      true,
 			},
 			expectedObjects: []client.Object{
@@ -147,6 +171,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 				&gatewayv1beta1.ReferenceGrantList{},
 				&ngfAPIv1alpha2.NginxProxyList{},
 				partialObjectMetadataList,
+				&inference.InferencePoolList{},
 				&gatewayv1alpha3.BackendTLSPolicyList{},
 				&gatewayv1alpha2.TLSRouteList{},
 				&gatewayv1.GRPCRouteList{},
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index 8efcd70ece..d4beeb9e15 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -1,6 +1,6 @@
 // This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP).
 
-// TODO (sberman): this module will need to be enhanced to include the following:
+// TODO(sberman): this module will need to be enhanced to include the following:
 // - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP)
 // - if a user has specified an Exact matching condition for a model name, extract the model name from
 // the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header
diff --git a/internal/controller/state/change_processor.go b/internal/controller/state/change_processor.go
index f3184adde8..27a62bb0e5 100644
--- a/internal/controller/state/change_processor.go
+++ b/internal/controller/state/change_processor.go
@@ -11,6 +11,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -98,6 +99,7 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl {
 		TLSRoutes:          make(map[types.NamespacedName]*v1alpha2.TLSRoute),
 		NGFPolicies:        make(map[graph.PolicyKey]policies.Policy),
 		SnippetsFilters:    make(map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter),
+		InferencePools:     make(map[types.NamespacedName]*inference.InferencePool),
 	}
 
 	processor := &ChangeProcessorImpl{
@@ -166,6 +168,11 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl {
 				store:     newObjectStoreMapAdapter(clusterStore.Services),
 				predicate: funcPredicate{stateChanged: isReferenced},
 			},
+			{
+				gvk:       cfg.MustExtractGVK(&inference.InferencePool{}),
+				store:     newObjectStoreMapAdapter(clusterStore.InferencePools),
+				predicate: funcPredicate{stateChanged: isReferenced},
+			},
 			{
 				gvk:       cfg.MustExtractGVK(&discoveryV1.EndpointSlice{}),
 				store:     nil,
diff --git a/internal/controller/state/change_processor_test.go b/internal/controller/state/change_processor_test.go
index 2d17e6f6e9..44dbdb0613 100644
--- a/internal/controller/state/change_processor_test.go
+++ b/internal/controller/state/change_processor_test.go
@@ -14,6 +14,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -317,6 +318,7 @@ func createScheme() *runtime.Scheme {
 	utilruntime.Must(apiext.AddToScheme(scheme))
 	utilruntime.Must(ngfAPIv1alpha1.AddToScheme(scheme))
 	utilruntime.Must(ngfAPIv1alpha2.AddToScheme(scheme))
+	utilruntime.Must(inference.Install(scheme))
 
 	return scheme
 }
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index d18a81cc43..e14d0fb0fa 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -15,7 +15,9 @@ import (
 	ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/sort"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
 const (
@@ -57,10 +59,11 @@ func addBackendRefsToRouteRules(
 	routes map[RouteKey]*L7Route,
 	refGrantResolver *referenceGrantResolver,
 	services map[types.NamespacedName]*v1.Service,
+	referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool,
 	backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy,
 ) {
 	for _, r := range routes {
-		addBackendRefsToRules(r, refGrantResolver, services, backendTLSPolicies)
+		addBackendRefsToRules(r, refGrantResolver, services, referencedInferencePools, backendTLSPolicies)
 	}
 }
 
@@ -70,6 +73,7 @@ func addBackendRefsToRules(
 	route *L7Route,
 	refGrantResolver *referenceGrantResolver,
 	services map[types.NamespacedName]*v1.Service,
+	referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool,
 	backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy,
 ) {
 	if !route.Valid {
@@ -99,6 +103,24 @@ func addBackendRefsToRules(
 			}
 			routeNs := route.Source.GetNamespace()
 
+			// if we have an InferencePool backend disguised as a Service, set the port value
+			if ref.IsInferencePool {
+				namespace := routeNs
+				if ref.Namespace != nil {
+					namespace = string(*ref.Namespace)
+				}
+
+				poolName := types.NamespacedName{
+					Name:      controller.GetInferencePoolName(string(ref.Name)),
+					Namespace: namespace,
+				}
+
+				if pool, exists := referencedInferencePools[poolName]; exists {
+					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
+					ref.Port = helpers.GetPointer(port)
+				}
+			}
+
 			ref, conds := createBackendRef(
 				ref,
 				route,
@@ -149,7 +171,14 @@ func createBackendRef(
 		}
 	}
 
-	valid, cond := validateRouteBackendRef(ref, route.Source.GetNamespace(), refGrantResolver, refPath)
+	valid, cond := validateRouteBackendRef(
+		route.RouteType,
+		ref,
+		route.Source.GetNamespace(),
+		refGrantResolver,
+		refPath,
+	)
+
 	if !valid {
 		backendRef := BackendRef{
 			Weight:             weight,
@@ -440,6 +469,7 @@ func checkExternalNameValidForGateways(
 }
 
 func validateRouteBackendRef(
+	routeType RouteType,
 	ref RouteBackendRef,
 	routeNs string,
 	refGrantResolver func(resource toResource) bool,
@@ -451,6 +481,10 @@ func validateRouteBackendRef(
 		return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error())
 	}
 
+	if routeType == RouteTypeHTTP {
+		return validateBackendRefHTTPRoute(ref, routeNs, refGrantResolver, path)
+	}
+
 	return validateBackendRef(ref.BackendRef, routeNs, refGrantResolver, path)
 }
 
@@ -502,6 +536,120 @@ func validateBackendRef(
 	return true, conditions.Condition{}
 }
 
+func validateBackendRefHTTPRoute(
+	ref RouteBackendRef,
+	routeNs string,
+	refGrantResolver func(toResource toResource) bool,
+	path *field.Path,
+) (valid bool, cond conditions.Condition) {
+	// Because all errors cause same condition but different reasons, we return as soon as we find an error
+
+	if valid, cond := validateBackendRefHTTPRouteGroupKind(ref.BackendRef, path); !valid {
+		return false, cond
+	}
+
+	// no need to validate ref.Name
+
+	if ref.Namespace != nil && string(*ref.Namespace) != routeNs {
+		var inferencePool bool
+		var inferencePoolName types.NamespacedName
+
+		switch {
+		case ref.Kind != nil && *ref.Kind == kinds.InferencePool:
+			inferencePool = true
+			inferencePoolName = types.NamespacedName{
+				Namespace: string(*ref.Namespace),
+				Name:      string(ref.Name),
+			}
+		case ref.IsInferencePool:
+			// Case where RouteBackendRef has been updated with headless Service backend for the InferencePool
+			inferencePool = true
+			inferencePoolName = types.NamespacedName{
+				Namespace: string(*ref.Namespace),
+				Name:      controller.GetInferencePoolName(string(ref.Name)),
+			}
+		default:
+			refNsName := types.NamespacedName{Namespace: string(*ref.Namespace), Name: string(ref.Name)}
+
+			if !refGrantResolver(toService(refNsName)) {
+				msg := fmt.Sprintf("Backend ref to Service %s not permitted by any ReferenceGrant", refNsName)
+				valErr := field.Forbidden(path.Child("namespace"), msg)
+
+				return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error())
+			}
+		}
+
+		if inferencePool {
+			if !refGrantResolver(toInferencePool(inferencePoolName)) {
+				msg := fmt.Sprintf(
+					"Backend ref to InferencePool %s not permitted by any ReferenceGrant",
+					inferencePoolName,
+				)
+				valErr := field.Forbidden(path.Child("namespace"), msg)
+				return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error())
+			}
+		}
+	}
+
+	if ref.Port == nil && (ref.Kind == nil || *ref.Kind == kinds.Service) {
+		valErr := field.Required(path.Child("port"), "port cannot be nil")
+		return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error())
+	}
+
+	// any value of port is OK
+
+	if ref.Weight != nil {
+		if err := validateWeight(*ref.Weight); err != nil {
+			valErr := field.Invalid(path.Child("weight"), *ref.Weight, err.Error())
+			return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error())
+		}
+	}
+
+	return true, conditions.Condition{}
+}
+
+func validateBackendRefHTTPRouteGroupKind(
+	ref gatewayv1.BackendRef,
+	path *field.Path,
+) (bool, conditions.Condition) {
+	if ref.Group != nil {
+		group := *ref.Group
+		if group != "core" && group != "" && group != inferenceAPIGroup {
+			valErr := field.NotSupported(path.Child("group"), group, []string{"core", "", inferenceAPIGroup})
+			return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+		}
+		if group == inferenceAPIGroup {
+			if ref.Kind == nil || *ref.Kind != kinds.InferencePool {
+				valErr := field.Invalid(
+					path.Child("kind"),
+					ref.Kind,
+					fmt.Sprintf("kind must be InferencePool when group is %s", inferenceAPIGroup),
+				)
+				return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+			}
+		}
+	}
+
+	if ref.Kind != nil {
+		kind := *ref.Kind
+		if kind != kinds.Service && kind != kinds.InferencePool {
+			valErr := field.NotSupported(path.Child("kind"), kind, []string{kinds.Service, kinds.InferencePool})
+			return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+		}
+		if kind == kinds.InferencePool {
+			if ref.Group == nil || *ref.Group != inferenceAPIGroup {
+				valErr := field.Invalid(
+					path.Child("group"),
+					ref.Group,
+					fmt.Sprintf("group must be %s when kind is InferencePool", inferenceAPIGroup),
+				)
+				return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+			}
+		}
+	}
+	return true, conditions.Condition{}
+}
+
 // validateRouteBackendRefAppProtocol checks if a given RouteType supports sending traffic to a service AppProtocol.
 // Returns nil if true or AppProtocol is not a Kubernetes Standard Application Protocol.
 func validateRouteBackendRefAppProtocol(
diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go
index 6e07bad538..3f05f793a6 100644
--- a/internal/controller/state/graph/backend_refs_test.go
+++ b/internal/controller/state/graph/backend_refs_test.go
@@ -11,13 +11,16 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
 
 	ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
 func getNormalRef() gatewayv1.BackendRef {
@@ -36,16 +39,46 @@ func getModifiedRef(mod func(ref gatewayv1.BackendRef) gatewayv1.BackendRef) gat
 	return mod(getNormalRef())
 }
 
+func getNormalRouteBackendRef() RouteBackendRef {
+	return RouteBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Kind:      helpers.GetPointer[gatewayv1.Kind]("Service"),
+				Name:      "service1",
+				Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+				Port:      helpers.GetPointer[gatewayv1.PortNumber](80),
+			},
+			Weight: helpers.GetPointer[int32](5),
+		},
+	}
+}
+
+func getModifiedRouteBackendRef(mod func(ref RouteBackendRef) RouteBackendRef) RouteBackendRef {
+	return mod(getNormalRouteBackendRef())
+}
+
 func TestValidateRouteBackendRef(t *testing.T) {
 	t.Parallel()
+
 	tests := []struct {
+		routeType         RouteType
 		expectedCondition conditions.Condition
 		name              string
 		ref               RouteBackendRef
 		expectedValid     bool
 	}{
 		{
-			name: "normal case",
+			name:      "normal case",
+			routeType: RouteTypeHTTP,
+			ref: RouteBackendRef{
+				BackendRef: getNormalRef(),
+				Filters:    nil,
+			},
+			expectedValid: true,
+		},
+		{
+			name:      "normal case grpc",
+			routeType: RouteTypeGRPC,
 			ref: RouteBackendRef{
 				BackendRef: getNormalRef(),
 				Filters:    nil,
@@ -53,7 +86,35 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			expectedValid: true,
 		},
 		{
-			name: "filters not supported",
+			name:      "normal case; inferencepool backend",
+			routeType: RouteTypeHTTP,
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.BackendObjectReference = gatewayv1.BackendObjectReference{
+						Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+						Kind:  helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+						Name:  "ipool",
+					}
+					return backend
+				}),
+			},
+			expectedValid: true,
+		},
+		{
+			name:      "normal case; headless Service inferencepool backend",
+			routeType: RouteTypeHTTP,
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool"))
+					return backend
+				}),
+				IsInferencePool: true,
+			},
+			expectedValid: true,
+		},
+		{
+			name:      "filters not supported",
+			routeType: RouteTypeHTTP,
 			ref: RouteBackendRef{
 				BackendRef: getNormalRef(),
 				Filters: []any{
@@ -70,7 +131,8 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			),
 		},
 		{
-			name: "invalid base ref",
+			name:      "invalid base ref",
+			routeType: RouteTypeHTTP,
 			ref: RouteBackendRef{
 				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
 					backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService")
@@ -79,7 +141,7 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			},
 			expectedValid: false,
 			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
-				`test.kind: Unsupported value: "NotService": supported values: "Service"`,
+				`test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
 			),
 		},
 	}
@@ -90,7 +152,13 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			g := NewWithT(t)
 			alwaysTrueRefGrantResolver := func(_ toResource) bool { return true }
 
-			valid, cond := validateRouteBackendRef(test.ref, "test", alwaysTrueRefGrantResolver, field.NewPath("test"))
+			valid, cond := validateRouteBackendRef(
+				test.routeType,
+				test.ref,
+				"test",
+				alwaysTrueRefGrantResolver,
+				field.NewPath("test"),
+			)
 
 			g.Expect(valid).To(Equal(test.expectedValid))
 			g.Expect(cond).To(Equal(test.expectedCondition))
@@ -156,7 +224,7 @@ func TestValidateBackendRef(t *testing.T) {
 			),
 		},
 		{
-			name: "not a service kind",
+			name: "invalid kind",
 			ref: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
 				backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService")
 				return backend
@@ -218,6 +286,209 @@ func TestValidateBackendRef(t *testing.T) {
 	}
 }
 
+func TestValidateBackendRefHTTPRoute(t *testing.T) {
+	t.Parallel()
+
+	alwaysFalseRefGrantResolver := func(_ toResource) bool { return false }
+	alwaysTrueRefGrantResolver := func(_ toResource) bool { return true }
+
+	tests := []struct {
+		refGrantResolver  func(resource toResource) bool
+		expectedCondition conditions.Condition
+		name              string
+		ref               RouteBackendRef
+		expectedValid     bool
+	}{
+		{
+			name:             "normal case",
+			ref:              getNormalRouteBackendRef(),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "normal case with implicit namespace",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Namespace = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "normal case with implicit kind Service",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "normal case with InferencePool",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup)
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool)
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "group is inference group but kind is not InferencePool",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup)
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.Service)
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.kind: Invalid value: "Service": kind must be InferencePool when group is inference.networking.k8s.io`,
+			),
+		},
+		{
+			name: "kind is InferencePool but group is not inference",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool)
+				backend.Group = helpers.GetPointer[gatewayv1.Group]("core")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.group: Invalid value: "core": group must be inference.networking.k8s.io when kind is InferencePool`,
+			),
+		},
+		{
+			name: "normal case with backend ref allowed by reference grant",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("cross-ns")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "inferencepool backend ref not allowed by reference grant",
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.BackendObjectReference = gatewayv1.BackendObjectReference{
+						Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+						Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+						Name:      "ipool",
+						Namespace: helpers.GetPointer[gatewayv1.Namespace]("invalid"),
+					}
+					return backend
+				}),
+			},
+			refGrantResolver: alwaysFalseRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefRefNotPermitted(
+				"test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant",
+			),
+		},
+		{
+			name: "headless Service inferencepool backend ref not allowed by reference grant",
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool"))
+					backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid")
+					return backend
+				}),
+				IsInferencePool: true,
+			},
+			refGrantResolver: alwaysFalseRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefRefNotPermitted(
+				"test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant",
+			),
+		},
+		{
+			name: "invalid group",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Group = helpers.GetPointer[gatewayv1.Group]("invalid")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.group: Unsupported value: "invalid": supported values: "core", "", "inference.networking.k8s.io"`,
+			),
+		},
+		{
+			name: "invalid kind",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
+			),
+		},
+		{
+			name: "backend ref not allowed by reference grant",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid")
+				return backend
+			}),
+			refGrantResolver: alwaysFalseRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefRefNotPermitted(
+				"test.namespace: Forbidden: Backend ref to Service invalid/service1 not permitted by any ReferenceGrant",
+			),
+		},
+		{
+			name: "invalid weight",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Weight = helpers.GetPointer[int32](-1)
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefUnsupportedValue(
+				"test.weight: Invalid value: -1: must be in the range [0, 1000000]",
+			),
+		},
+		{
+			name: "nil port",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Port = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefUnsupportedValue(
+				"test.port: Required value: port cannot be nil",
+			),
+		},
+		{
+			name: "nil port allowed for InferencePool kind",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool)
+				backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup)
+				backend.Port = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			valid, cond := validateBackendRefHTTPRoute(test.ref, "test", test.refGrantResolver, field.NewPath("test"))
+
+			g.Expect(valid).To(Equal(test.expectedValid))
+			g.Expect(cond).To(Equal(test.expectedCondition))
+		})
+	}
+}
+
 func TestValidateWeight(t *testing.T) {
 	t.Parallel()
 	validWeights := []int32{0, 1, 1000000}
@@ -523,13 +794,21 @@ func TestAddBackendRefsToRules(t *testing.T) {
 		Name:      "svcGRPC",
 	}
 
+	svcInferenceName := controller.CreateInferencePoolServiceName("ipool")
+	svcInference := getSvc(svcInferenceName)
+	svcInferenceNsName := types.NamespacedName{
+		Namespace: "test",
+		Name:      svcInferenceName,
+	}
+
 	services := map[types.NamespacedName]*v1.Service{
-		{Namespace: "test", Name: "svc1"}:    svc1,
-		{Namespace: "test", Name: "svc2"}:    svc2,
-		{Namespace: "test", Name: "svcH2c"}:  svcH2c,
-		{Namespace: "test", Name: "svcWS"}:   svcWS,
-		{Namespace: "test", Name: "svcWSS"}:  svcWSS,
-		{Namespace: "test", Name: "svcGRPC"}: svcGRPC,
+		svc1NsName:         svc1,
+		svc2NsName:         svc2,
+		svcH2cNsName:       svcH2c,
+		svcWSNsName:        svcWS,
+		svcWSSNsName:       svcWSS,
+		svcGRPCNsName:      svcGRPC,
+		svcInferenceNsName: svcInference,
 	}
 	emptyPolicies := map[types.NamespacedName]*BackendTLSPolicy{}
 
@@ -892,7 +1171,7 @@ func TestAddBackendRefsToRules(t *testing.T) {
 			},
 			expectedConditions: []conditions.Condition{
 				conditions.NewRouteBackendRefInvalidKind(
-					`spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service"`,
+					`spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
 				),
 			},
 			policies: emptyPolicies,
@@ -938,6 +1217,29 @@ func TestAddBackendRefsToRules(t *testing.T) {
 			expectedConditions:  nil,
 			name:                "zero backendRefs",
 		},
+		{
+			route: func() *L7Route {
+				route := createRoute("hr-inference", RouteTypeHTTP, "Service", 1, svcInferenceName)
+				// Mark the backend ref as IsInferencePool and set the port to nil (simulate InferencePool logic)
+				route.Spec.Rules[0].RouteBackendRefs[0].IsInferencePool = true
+				route.Spec.Rules[0].RouteBackendRefs[0].Port = nil
+				return route
+			}(),
+			expectedBackendRefs: []BackendRef{
+				{
+					SvcNsName: types.NamespacedName{Namespace: "test", Name: svcInferenceName},
+					ServicePort: v1.ServicePort{
+						Port: 80,
+					},
+					Valid:              true,
+					Weight:             1,
+					InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+				},
+			},
+			expectedConditions: nil,
+			policies:           emptyPolicies,
+			name:               "headless Service for InferencePool gets port set correctly",
+		},
 	}
 
 	for _, test := range tests {
@@ -946,7 +1248,22 @@ func TestAddBackendRefsToRules(t *testing.T) {
 
 			g := NewWithT(t)
 			resolver := newReferenceGrantResolver(nil)
-			addBackendRefsToRules(test.route, resolver, services, test.policies)
+
+			referencedInferencePools := map[types.NamespacedName]*ReferencedInferencePool{
+				{Namespace: "test", Name: "ipool"}: {
+					Source: &inference.InferencePool{
+						Spec: inference.InferencePoolSpec{
+							TargetPorts: []inference.Port{
+								{
+									Number: 80,
+								},
+							},
+						},
+					},
+				},
+			}
+
+			addBackendRefsToRules(test.route, resolver, services, referencedInferencePools, test.policies)
 
 			var actual []BackendRef
 			if test.route.Spec.Rules != nil {
@@ -1169,7 +1486,7 @@ func TestCreateBackend(t *testing.T) {
 			expectedServicePortReference: "",
 			expectedConditions: []conditions.Condition{
 				conditions.NewRouteBackendRefInvalidKind(
-					`test.kind: Unsupported value: "NotService": supported values: "Service"`,
+					`test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
 				),
 			},
 			name: "invalid kind",
@@ -1403,11 +1720,13 @@ func TestCreateBackend(t *testing.T) {
 			g := NewWithT(t)
 
 			rbr := RouteBackendRef{
-				nil,
-				test.ref.BackendRef,
-				[]any{},
+				MirrorBackendIdx: nil,
+				IsInferencePool:  false,
+				BackendRef:       test.ref.BackendRef,
+				Filters:          []any{},
 			}
 			route := &L7Route{
+				RouteType: RouteTypeHTTP,
 				Source: &gatewayv1.HTTPRoute{
 					ObjectMeta: metav1.ObjectMeta{
 						Namespace: "test",
@@ -1467,12 +1786,14 @@ func TestCreateBackend(t *testing.T) {
 	// test mirror backend case
 	g := NewWithT(t)
 	ref := RouteBackendRef{
-		helpers.GetPointer(0), // mirrorFilterIdx
-		getNormalRef(),
-		[]any{},
+		MirrorBackendIdx: helpers.GetPointer(0),
+		IsInferencePool:  false,
+		BackendRef:       getNormalRef(),
+		Filters:          []any{},
 	}
 
 	route := &L7Route{
+		RouteType: RouteTypeHTTP,
 		Source: &gatewayv1.HTTPRoute{
 			ObjectMeta: metav1.ObjectMeta{
 				Namespace: "test",
diff --git a/internal/controller/state/graph/graph.go b/internal/controller/state/graph/graph.go
index e556c798ba..b5e13991e9 100644
--- a/internal/controller/state/graph/graph.go
+++ b/internal/controller/state/graph/graph.go
@@ -9,6 +9,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -40,6 +41,7 @@ type ClusterState struct {
 	GRPCRoutes         map[types.NamespacedName]*gatewayv1.GRPCRoute
 	NGFPolicies        map[PolicyKey]policies.Policy
 	SnippetsFilters    map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter
+	InferencePools     map[types.NamespacedName]*inference.InferencePool
 }
 
 // Graph is a Graph-like representation of Gateway API resources.
@@ -65,6 +67,9 @@ type Graph struct {
 	ReferencedNamespaces map[types.NamespacedName]*v1.Namespace
 	// ReferencedServices includes the NamespacedNames of all the Services that are referenced by at least one Route.
 	ReferencedServices map[types.NamespacedName]*ReferencedService
+	// ReferencedInferencePools includes the NamespacedNames of all the InferencePools
+	// that are referenced by at least one Route.
+	ReferencedInferencePools map[types.NamespacedName]*ReferencedInferencePool
 	// ReferencedCaCertConfigMaps includes ConfigMaps that have been referenced by any BackendTLSPolicies.
 	ReferencedCaCertConfigMaps map[types.NamespacedName]*CaCertConfigMap
 	// ReferencedNginxProxies includes NginxProxies that have been referenced by a GatewayClass or a Gateway.
@@ -115,11 +120,15 @@ func (g *Graph) IsReferenced(resourceType ngftypes.ObjectType, nsname types.Name
 		_, existed := g.ReferencedNamespaces[nsname]
 		exists := isNamespaceReferenced(obj, g.Gateways)
 		return existed || exists
-	// Service reference exists if at least one HTTPRoute references it.
+	// Service reference exists if at least one Route references it.
 	case *v1.Service:
 		_, exists := g.ReferencedServices[nsname]
 		return exists
-	// EndpointSlice reference exists if its Service owner is referenced by at least one HTTPRoute.
+	// InferencePool reference exists if at least one Route references it.
+	case *inference.InferencePool:
+		_, exists := g.ReferencedInferencePools[nsname]
+		return exists
+	// EndpointSlice reference exists if its Service owner is referenced by at least one Route.
 	case *discoveryV1.EndpointSlice:
 		svcName := index.GetServiceNameFromEndpointSlice(obj)
 
@@ -249,7 +258,9 @@ func BuildGraph(
 		state.GRPCRoutes,
 		gws,
 		processedSnippetsFilters,
+		state.InferencePools,
 	)
+	referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools)
 
 	l4routes := buildL4RoutesForGateways(
 		state.TLSRoutes,
@@ -262,6 +273,7 @@ func BuildGraph(
 		routes,
 		refGrantResolver,
 		state.Services,
+		referencedInferencePools,
 		processedBackendTLSPolicies,
 	)
 	bindRoutesToListeners(routes, l4routes, gws, state.Namespaces)
@@ -295,6 +307,7 @@ func BuildGraph(
 		ReferencedSecrets:          secretResolver.getResolvedSecrets(),
 		ReferencedNamespaces:       referencedNamespaces,
 		ReferencedServices:         referencedServices,
+		ReferencedInferencePools:   referencedInferencePools,
 		ReferencedCaCertConfigMaps: configMapResolver.getResolvedConfigMaps(),
 		ReferencedNginxProxies:     processedNginxProxies,
 		BackendTLSPolicies:         processedBackendTLSPolicies,
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index ac5cfff3a2..da0ca04d47 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -13,6 +13,7 @@ import (
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -25,6 +26,7 @@ import (
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation/validationfakes"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller/index"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
@@ -214,6 +216,44 @@ func TestBuildGraph(t *testing.T) {
 		return rule
 	}
 
+	createValidRuleWithInferencePoolBackendRef := func(matches []gatewayv1.HTTPRouteMatch) RouteRule {
+		refs := []BackendRef{
+			{
+				SvcNsName: types.NamespacedName{
+					Namespace: testNs,
+					Name:      controller.CreateInferencePoolServiceName("ipool"),
+				},
+				ServicePort:        v1.ServicePort{Port: 80},
+				Valid:              true,
+				Weight:             1,
+				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+			},
+		}
+		rbrs := []RouteBackendRef{
+			{
+				IsInferencePool: true,
+				BackendRef: gatewayv1.BackendRef{
+					BackendObjectReference: gatewayv1.BackendObjectReference{
+						Group:     helpers.GetPointer[gatewayv1.Group](""),
+						Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+						Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+						Namespace: helpers.GetPointer(gatewayv1.Namespace(testNs)),
+					},
+				},
+			},
+		}
+		return RouteRule{
+			ValidMatches: true,
+			Filters: RouteRuleFilters{
+				Filters: []Filter{},
+				Valid:   true,
+			},
+			BackendRefs:      refs,
+			Matches:          matches,
+			RouteBackendRefs: rbrs,
+		}
+	}
+
 	routeMatches := []gatewayv1.HTTPRouteMatch{
 		{
 			Path: &gatewayv1.HTTPPathMatch{
@@ -338,6 +378,32 @@ func TestBuildGraph(t *testing.T) {
 		},
 	}
 
+	inferencePool := &inference.InferencePool{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: testNs,
+			Name:      "ipool",
+		},
+		Spec: inference.InferencePoolSpec{
+			TargetPorts: []inference.Port{
+				{Number: 80},
+			},
+		},
+	}
+
+	ir := createRoute("ir", "gateway-1", "listener-80-1")
+	ir.Spec.Hostnames = []gatewayv1.Hostname{"inference.example.com"}
+	// Update the backend ref to point to the InferencePool instead of a Service
+	ir.Spec.Rules[0].BackendRefs[0] = gatewayv1.HTTPBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+				Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+				Name:      gatewayv1.ObjectName(inferencePool.Name),
+				Namespace: helpers.GetPointer(gatewayv1.Namespace(inferencePool.Namespace)),
+			},
+		},
+	}
+
 	secret := &v1.Secret{
 		TypeMeta: metav1.TypeMeta{
 			Kind: "Secret",
@@ -489,7 +555,20 @@ func TestBuildGraph(t *testing.T) {
 
 	svc1 := &v1.Service{
 		ObjectMeta: metav1.ObjectMeta{
-			Namespace: "test", Name: "foo2",
+			Namespace: testNs, Name: "foo2",
+		},
+		Spec: v1.ServiceSpec{
+			Ports: []v1.ServicePort{
+				{
+					Port: 80,
+				},
+			},
+		},
+	}
+
+	inferenceSvc := &v1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: testNs, Name: controller.CreateInferencePoolServiceName(inferencePool.Name),
 		},
 		Spec: v1.ServiceSpec{
 			Ports: []v1.ServicePort{
@@ -691,6 +770,7 @@ func TestBuildGraph(t *testing.T) {
 				client.ObjectKeyFromObject(hr1): hr1,
 				client.ObjectKeyFromObject(hr2): hr2,
 				client.ObjectKeyFromObject(hr3): hr3,
+				client.ObjectKeyFromObject(ir):  ir,
 			},
 			TLSRoutes: map[types.NamespacedName]*v1alpha2.TLSRoute{
 				client.ObjectKeyFromObject(tr):  tr,
@@ -700,8 +780,12 @@ func TestBuildGraph(t *testing.T) {
 				client.ObjectKeyFromObject(gr): gr,
 			},
 			Services: map[types.NamespacedName]*v1.Service{
-				client.ObjectKeyFromObject(svc):  svc,
-				client.ObjectKeyFromObject(svc1): svc1,
+				client.ObjectKeyFromObject(svc):          svc,
+				client.ObjectKeyFromObject(svc1):         svc1,
+				client.ObjectKeyFromObject(inferenceSvc): inferenceSvc,
+			},
+			InferencePools: map[types.NamespacedName]*inference.InferencePool{
+				client.ObjectKeyFromObject(inferencePool): inferencePool,
 			},
 			Namespaces: map[types.NamespacedName]*v1.Namespace{
 				client.ObjectKeyFromObject(ns): ns,
@@ -992,6 +1076,37 @@ func TestBuildGraph(t *testing.T) {
 		},
 	}
 
+	inferenceRoute := &L7Route{
+		RouteType:  RouteTypeHTTP,
+		Valid:      true,
+		Attachable: true,
+		Source:     ir,
+		ParentRefs: []ParentRef{
+			{
+				Idx: 0,
+				Gateway: &ParentRefGateway{
+					NamespacedName:      client.ObjectKeyFromObject(gw1.Source),
+					EffectiveNginxProxy: np1Effective,
+				},
+				SectionName: ir.Spec.ParentRefs[0].SectionName,
+				Attachment: &ParentRefAttachmentStatus{
+					Attached: true,
+					AcceptedHostnames: map[string][]string{
+						CreateGatewayListenerKey(
+							client.ObjectKeyFromObject(gw1.Source),
+							"listener-80-1",
+						): {"inference.example.com"},
+					},
+					ListenerPort: 80,
+				},
+			},
+		},
+		Spec: L7RouteSpec{
+			Hostnames: ir.Spec.Hostnames,
+			Rules:     []RouteRule{createValidRuleWithInferencePoolBackendRef(routeMatches)},
+		},
+	}
+
 	supportedKindsForListeners := []gatewayv1.RouteGroupKind{
 		{Kind: gatewayv1.Kind(kinds.HTTPRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)},
 		{Kind: gatewayv1.Kind(kinds.GRPCRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)},
@@ -1021,6 +1136,7 @@ func TestBuildGraph(t *testing.T) {
 							Routes: map[RouteKey]*L7Route{
 								CreateRouteKey(hr1): routeHR1,
 								CreateRouteKey(gr):  routeGR,
+								CreateRouteKey(ir):  inferenceRoute,
 							},
 							SupportedKinds:            supportedKindsForListeners,
 							L4Routes:                  map[L4RouteKey]*L4Route{},
@@ -1175,6 +1291,7 @@ func TestBuildGraph(t *testing.T) {
 				CreateRouteKey(hr1): routeHR1,
 				CreateRouteKey(hr3): routeHR3,
 				CreateRouteKey(gr):  routeGR,
+				CreateRouteKey(ir):  inferenceRoute,
 			},
 			L4Routes: map[L4RouteKey]*L4Route{
 				CreateRouteKeyL4(tr):  routeTR,
@@ -1199,6 +1316,14 @@ func TestBuildGraph(t *testing.T) {
 				client.ObjectKeyFromObject(svc1): {
 					GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}},
 				},
+				client.ObjectKeyFromObject(inferenceSvc): {
+					GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}},
+				},
+			},
+			ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{
+				client.ObjectKeyFromObject(inferencePool): {
+					Source: inferencePool,
+				},
 			},
 			ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
 				client.ObjectKeyFromObject(cm): {
@@ -1382,6 +1507,20 @@ func TestIsReferenced(t *testing.T) {
 	}
 	emptyService := &v1.Service{}
 
+	inferenceInGraph := &inference.InferencePool{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: "default",
+			Name:      "inferenceInGraph",
+		},
+	}
+	inferenceNotInGraph := &inference.InferencePool{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: "default",
+			Name:      "inferenceNotInGraph",
+		},
+	}
+	emptyInferencePool := &inference.InferencePool{}
+
 	createEndpointSlice := func(name string, svcName string) *discoveryV1.EndpointSlice {
 		return &discoveryV1.EndpointSlice{
 			ObjectMeta: metav1.ObjectMeta{
@@ -1461,6 +1600,9 @@ func TestIsReferenced(t *testing.T) {
 		ReferencedServices: map[types.NamespacedName]*ReferencedService{
 			client.ObjectKeyFromObject(serviceInGraph): {},
 		},
+		ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{
+			client.ObjectKeyFromObject(inferenceInGraph): {},
+		},
 		ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
 			client.ObjectKeyFromObject(baseConfigMap): {
 				Source: baseConfigMap,
@@ -1562,6 +1704,26 @@ func TestIsReferenced(t *testing.T) {
 			expected: false,
 		},
 
+		// InferencePool tests
+		{
+			name:     "InferencePool is referenced",
+			resource: inferenceInGraph,
+			graph:    graph,
+			expected: true,
+		},
+		{
+			name:     "InferencePool is not referenced",
+			resource: inferenceNotInGraph,
+			graph:    graph,
+			expected: false,
+		},
+		{
+			name:     "Empty InferencePool",
+			resource: emptyInferencePool,
+			graph:    graph,
+			expected: false,
+		},
+
 		// EndpointSlice tests
 		{
 			name:     "EndpointSlice with Service owner in graph's ReferencedServices is referenced",
diff --git a/internal/controller/state/graph/grpcroute_test.go b/internal/controller/state/graph/grpcroute_test.go
index 8579c54627..71f87d58c0 100644
--- a/internal/controller/state/graph/grpcroute_test.go
+++ b/internal/controller/state/graph/grpcroute_test.go
@@ -230,6 +230,7 @@ func TestBuildGRPCRoutes(t *testing.T) {
 				grRoutes,
 				test.gateways,
 				snippetsFilters,
+				nil,
 			)
 			g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty())
 		})
diff --git a/internal/controller/state/graph/httproute.go b/internal/controller/state/graph/httproute.go
index 48415d0573..de7a85370d 100644
--- a/internal/controller/state/graph/httproute.go
+++ b/internal/controller/state/graph/httproute.go
@@ -7,13 +7,16 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/mirror"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
 var (
@@ -27,6 +30,7 @@ func buildHTTPRoute(
 	ghr *v1.HTTPRoute,
 	gws map[types.NamespacedName]*Gateway,
 	snippetsFilters map[types.NamespacedName]*SnippetsFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) *L7Route {
 	r := &L7Route{
 		Source:    ghr,
@@ -59,9 +63,10 @@ func buildHTTPRoute(
 	r.Attachable = true
 
 	rules, valid, conds := processHTTPRouteRules(
-		ghr.Spec.Rules,
+		ghr,
 		validator,
 		getSnippetsFilterResolverForNamespace(snippetsFilters, r.Source.GetNamespace()),
+		inferencePools,
 	)
 
 	r.Spec.Rules = rules
@@ -113,6 +118,7 @@ func buildHTTPMirrorRoutes(
 					tmpMirrorRoute,
 					gateways,
 					snippetsFilters,
+					nil,
 				)
 
 				if mirrorRoute != nil {
@@ -163,9 +169,11 @@ func removeHTTPMirrorFilters(filters []v1.HTTPRouteFilter) []v1.HTTPRouteFilter
 
 func processHTTPRouteRule(
 	specRule v1.HTTPRouteRule,
+	routeNamespace string,
 	rulePath *field.Path,
 	validator validation.HTTPFieldsValidator,
 	resolveExtRefFunc resolveExtRefFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) (RouteRule, routeRuleErrors) {
 	var errors routeRuleErrors
 
@@ -201,10 +209,32 @@ func processHTTPRouteRule(
 				interfaceFilters = append(interfaceFilters, filter)
 			}
 		}
-		rbr := RouteBackendRef{
-			BackendRef: b.BackendRef,
-			Filters:    interfaceFilters,
+
+		var rbr RouteBackendRef
+		// If route specifies an InferencePool backend, we need to convert it to its associated
+		// headless Service backend (that we created), so nginx config can be built properly.
+		// Only do this if the InferencePool actually exists.
+		if inferencePoolBackend(b, routeNamespace, inferencePools) {
+			svcName := controller.CreateInferencePoolServiceName(string(b.Name))
+			rbr = RouteBackendRef{
+				IsInferencePool: true,
+				BackendRef: v1.BackendRef{
+					BackendObjectReference: v1.BackendObjectReference{
+						Group:     helpers.GetPointer[v1.Group](""),
+						Kind:      helpers.GetPointer[v1.Kind](kinds.Service),
+						Name:      v1.ObjectName(svcName),
+						Namespace: b.Namespace,
+					},
+					Weight: b.Weight,
+				},
+			}
+		} else {
+			rbr = RouteBackendRef{
+				BackendRef: b.BackendRef,
+			}
 		}
+
+		rbr.Filters = interfaceFilters
 		backendRefs = append(backendRefs, rbr)
 	}
 
@@ -233,25 +263,28 @@ func processHTTPRouteRule(
 }
 
 func processHTTPRouteRules(
-	specRules []v1.HTTPRouteRule,
+	route *v1.HTTPRoute,
 	validator validation.HTTPFieldsValidator,
 	resolveExtRefFunc resolveExtRefFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) (rules []RouteRule, valid bool, conds []conditions.Condition) {
-	rules = make([]RouteRule, len(specRules))
+	rules = make([]RouteRule, len(route.Spec.Rules))
 
 	var (
 		allRulesErrors  routeRuleErrors
 		atLeastOneValid bool
 	)
 
-	for i, rule := range specRules {
+	for i, rule := range route.Spec.Rules {
 		rulePath := field.NewPath("spec").Child("rules").Index(i)
 
 		rr, errors := processHTTPRouteRule(
 			rule,
+			route.GetNamespace(),
 			rulePath,
 			validator,
 			resolveExtRefFunc,
+			inferencePools,
 		)
 
 		if rr.ValidMatches && rr.Filters.Valid {
@@ -288,6 +321,32 @@ func processHTTPRouteRules(
 	return rules, valid, conds
 }
 
+// inferencePoolBackend returns if a Route references an InferencePool backend
+// and that InferencePool exists.
+func inferencePoolBackend(
+	backendRef v1.HTTPBackendRef,
+	routeNamespace string,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+) bool {
+	if backendRef.Group != nil &&
+		*backendRef.Group == inferenceAPIGroup &&
+		*backendRef.Kind == kinds.InferencePool {
+		namespace := routeNamespace
+		if backendRef.Namespace != nil {
+			namespace = string(*backendRef.Namespace)
+		}
+		key := types.NamespacedName{
+			Name:      string(backendRef.Name),
+			Namespace: namespace,
+		}
+		if _, exists := inferencePools[key]; exists {
+			return true
+		}
+	}
+
+	return false
+}
+
 func validateMatch(
 	validator validation.HTTPFieldsValidator,
 	match v1.HTTPRouteMatch,
diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go
index 3b90b0970f..0e06e5bf7e 100644
--- a/internal/controller/state/graph/httproute_test.go
+++ b/internal/controller/state/graph/httproute_test.go
@@ -9,6 +9,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1"
@@ -117,6 +118,17 @@ var expRouteBackendRef = RouteBackendRef{
 	},
 }
 
+func createInferencePoolBackend(name, namespace string) gatewayv1.BackendRef {
+	return gatewayv1.BackendRef{
+		BackendObjectReference: gatewayv1.BackendObjectReference{
+			Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+			Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+			Name:      gatewayv1.ObjectName(name),
+			Namespace: helpers.GetPointer(gatewayv1.Namespace(namespace)),
+		},
+	}
+}
+
 func TestBuildHTTPRoutes(t *testing.T) {
 	t.Parallel()
 
@@ -263,6 +275,7 @@ func TestBuildHTTPRoutes(t *testing.T) {
 				map[types.NamespacedName]*gatewayv1.GRPCRoute{},
 				test.gateways,
 				snippetsFilters,
+				nil,
 			)
 			g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty())
 		})
@@ -377,6 +390,21 @@ func TestBuildHTTPRoute(t *testing.T) {
 	addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", invalidSnippetsFilterExtRef)
 	addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", unresolvableSnippetsFilterExtRef)
 
+	// routes with an inference pool backend
+	hrInferencePool := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/")
+	hrInferencePool.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{
+		{
+			BackendRef: createInferencePoolBackend("ipool", gatewayNsName.Namespace),
+		},
+	}
+	// route with an inference pool backend that does not exist
+	hrInferencePoolDoesNotExist := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/")
+	hrInferencePoolDoesNotExist.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{
+		{
+			BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace),
+		},
+	}
+
 	validatorInvalidFieldsInRule := &validationfakes.FakeHTTPFieldsValidator{
 		ValidatePathInMatchStub: func(path string) error {
 			if path == invalidPath {
@@ -943,6 +971,86 @@ func TestBuildHTTPRoute(t *testing.T) {
 			},
 			name: "rule with one invalid and one unresolvable snippets filter extension ref filter",
 		},
+		{
+			validator: &validationfakes.FakeHTTPFieldsValidator{},
+			hr:        hrInferencePool,
+			expected: &L7Route{
+				RouteType: RouteTypeHTTP,
+				Source:    hrInferencePool,
+				ParentRefs: []ParentRef{
+					{
+						Idx:         0,
+						Gateway:     CreateParentRefGateway(gw),
+						SectionName: hrInferencePool.Spec.ParentRefs[0].SectionName,
+					},
+				},
+				Valid:      true,
+				Attachable: true,
+				Spec: L7RouteSpec{
+					Hostnames: hrInferencePool.Spec.Hostnames,
+					Rules: []RouteRule{
+						{
+							ValidMatches: true,
+							Filters: RouteRuleFilters{
+								Valid:   true,
+								Filters: []Filter{},
+							},
+							Matches: hrInferencePool.Spec.Rules[0].Matches,
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									IsInferencePool: true,
+									BackendRef: gatewayv1.BackendRef{
+										BackendObjectReference: gatewayv1.BackendObjectReference{
+											Group:     helpers.GetPointer[gatewayv1.Group](""),
+											Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+											Name:      "ipool-pool-svc",
+											Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			name: "route with an inference pool backend gets converted to service",
+		},
+		{
+			validator: &validationfakes.FakeHTTPFieldsValidator{},
+			hr:        hrInferencePoolDoesNotExist,
+			expected: &L7Route{
+				RouteType: RouteTypeHTTP,
+				Source:    hrInferencePoolDoesNotExist,
+				ParentRefs: []ParentRef{
+					{
+						Idx:         0,
+						Gateway:     CreateParentRefGateway(gw),
+						SectionName: hrInferencePoolDoesNotExist.Spec.ParentRefs[0].SectionName,
+					},
+				},
+				Valid:      true,
+				Attachable: true,
+				Spec: L7RouteSpec{
+					Hostnames: hrInferencePoolDoesNotExist.Spec.Hostnames,
+					Rules: []RouteRule{
+						{
+							ValidMatches: true,
+							Filters: RouteRuleFilters{
+								Valid:   true,
+								Filters: []Filter{},
+							},
+							Matches: hrInferencePoolDoesNotExist.Spec.Rules[0].Matches,
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace),
+								},
+							},
+						},
+					},
+				},
+			},
+			name: "route with an inference pool backend that doesn't exist",
+		},
 	}
 
 	gws := map[types.NamespacedName]*Gateway{
@@ -957,8 +1065,11 @@ func TestBuildHTTPRoute(t *testing.T) {
 			snippetsFilters := map[types.NamespacedName]*SnippetsFilter{
 				{Namespace: "test", Name: "sf"}: {Valid: true},
 			}
+			inferencePools := map[types.NamespacedName]*inference.InferencePool{
+				{Namespace: "test", Name: "ipool"}: {},
+			}
 
-			route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters)
+			route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters, inferencePools)
 			g.Expect(helpers.Diff(test.expected, route)).To(BeEmpty())
 		})
 	}
@@ -1090,7 +1201,7 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) {
 	g := NewWithT(t)
 
 	routes := map[RouteKey]*L7Route{}
-	l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters)
+	l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters, nil)
 	g.Expect(l7route).NotTo(BeNil())
 
 	buildHTTPMirrorRoutes(routes, l7route, hr, gateways, snippetsFilters)
diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go
new file mode 100644
index 0000000000..ada688bcc5
--- /dev/null
+++ b/internal/controller/state/graph/inferencepools.go
@@ -0,0 +1,82 @@
+package graph
+
+import (
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
+)
+
+// A ReferencedInferencePool represents an InferencePool that is referenced by a Route and the
+// Gateways it belongs to.
+type ReferencedInferencePool struct {
+	// Source is the original InferencePool that this ReferencedInferencePool is based on.
+	Source *inference.InferencePool
+}
+
+// buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes
+// per Gateway that we process.
+func buildReferencedInferencePools(
+	routes map[RouteKey]*L7Route,
+	gws map[types.NamespacedName]*Gateway,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+) map[types.NamespacedName]*ReferencedInferencePool {
+	referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool)
+
+	for _, gw := range gws {
+		if gw == nil {
+			continue
+		}
+
+		processInferencePoolsForGateway(routes, gw, referencedInferencePools, inferencePools)
+	}
+
+	if len(referencedInferencePools) == 0 {
+		return nil
+	}
+
+	return referencedInferencePools
+}
+
+// processInferencePoolsForGateway processes all InferencePools that belong to the given gateway.
+func processInferencePoolsForGateway(
+	routes map[RouteKey]*L7Route,
+	gw *Gateway,
+	referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+) {
+	gwKey := client.ObjectKeyFromObject(gw.Source)
+	for _, route := range routes {
+		if !route.Valid || !routeBelongsToGateway(route.ParentRefs, gwKey) {
+			continue
+		}
+
+		for _, rule := range route.Spec.Rules {
+			for _, ref := range rule.RouteBackendRefs {
+				if !ref.IsInferencePool && (ref.Kind == nil || *ref.Kind != kinds.InferencePool) {
+					continue
+				}
+
+				namespace := route.Source.GetNamespace()
+				if ref.Namespace != nil {
+					namespace = string(*ref.Namespace)
+				}
+
+				poolName := types.NamespacedName{
+					Name:      controller.GetInferencePoolName(string(ref.Name)),
+					Namespace: namespace,
+				}
+
+				if _, referenced := referencedInferencePools[poolName]; !referenced {
+					referencedInferencePools[poolName] = &ReferencedInferencePool{}
+				}
+
+				if pool, exists := inferencePools[poolName]; exists {
+					referencedInferencePools[poolName].Source = pool
+				}
+			}
+		}
+	}
+}
diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go
new file mode 100644
index 0000000000..d67331b5e7
--- /dev/null
+++ b/internal/controller/state/graph/inferencepools_test.go
@@ -0,0 +1,249 @@
+package graph
+
+import (
+	"testing"
+
+	. "github.com/onsi/gomega"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
+)
+
+func TestBuildReferencedInferencePools(t *testing.T) {
+	t.Parallel()
+
+	gwNsName := types.NamespacedName{Namespace: "test", Name: "gwNsname"}
+	gws := map[types.NamespacedName]*Gateway{
+		gwNsName: {
+			Source: &gatewayv1.Gateway{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: gwNsName.Namespace,
+					Name:      gwNsName.Name,
+				},
+			},
+		},
+	}
+
+	getNormalRoute := func() *L7Route {
+		return &L7Route{
+			Source: &gatewayv1.HTTPRoute{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "valid-route",
+				},
+			},
+			ParentRefs: []ParentRef{
+				{
+					Gateway: &ParentRefGateway{NamespacedName: gwNsName},
+				},
+			},
+			Valid: true,
+			Spec: L7RouteSpec{
+				Rules: []RouteRule{
+					{
+						RouteBackendRefs: []RouteBackendRef{
+							{
+								IsInferencePool: true,
+								BackendRef: gatewayv1.BackendRef{
+									BackendObjectReference: gatewayv1.BackendObjectReference{
+										Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+										Name:      "pool",
+										Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		}
+	}
+
+	getModifiedRoute := func(mod func(route *L7Route) *L7Route) *L7Route {
+		return mod(getNormalRoute())
+	}
+
+	validRoute := getNormalRoute()
+
+	invalidRoute := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Valid = false
+		return route
+	})
+
+	tests := []struct {
+		routes         map[RouteKey]*L7Route
+		gws            map[types.NamespacedName]*Gateway
+		inferencePools map[types.NamespacedName]*inference.InferencePool
+		expPools       map[types.NamespacedName]*ReferencedInferencePool
+		name           string
+	}{
+		{
+			name: "no gateways",
+			gws:  nil,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): validRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: nil,
+		},
+		{
+			name: "invalid route",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): invalidRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: nil,
+		},
+		{
+			name: "valid route with referenced inferencepool",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): validRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "route with service backend",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules = []RouteRule{
+						{
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									BackendRef: gatewayv1.BackendRef{
+										BackendObjectReference: gatewayv1.BackendObjectReference{
+											Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+										},
+									},
+								},
+							},
+						},
+					}
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: nil,
+		},
+		{
+			name: "route with both inferencepool and service backends",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs,
+						RouteBackendRef{
+							BackendRef: gatewayv1.BackendRef{
+								BackendObjectReference: gatewayv1.BackendObjectReference{
+									Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+								},
+							},
+						},
+					)
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "route with headless InferencePool Service backend",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules = []RouteRule{
+						{
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									IsInferencePool: true,
+									BackendRef: gatewayv1.BackendRef{
+										BackendObjectReference: gatewayv1.BackendObjectReference{
+											Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+											Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")),
+											Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+										},
+									},
+								},
+							},
+						},
+					}
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "inferencepool backend with no namespace uses route namespace",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "referenced inferencepool does not exist",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): validRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: nil,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools)
+			g.Expect(pools).To(Equal(test.expPools))
+		})
+	}
+}
diff --git a/internal/controller/state/graph/reference_grant.go b/internal/controller/state/graph/reference_grant.go
index b827d47024..3fa04ecc7a 100644
--- a/internal/controller/state/graph/reference_grant.go
+++ b/internal/controller/state/graph/reference_grant.go
@@ -51,7 +51,16 @@ func toSecret(nsname types.NamespacedName) toResource {
 
 func toService(nsname types.NamespacedName) toResource {
 	return toResource{
-		kind:      "Service",
+		kind:      kinds.Service,
+		name:      nsname.Name,
+		namespace: nsname.Namespace,
+	}
+}
+
+func toInferencePool(nsname types.NamespacedName) toResource {
+	return toResource{
+		group:     inferenceAPIGroup,
+		kind:      kinds.InferencePool,
 		name:      nsname.Name,
 		namespace: nsname.Namespace,
 	}
@@ -139,6 +148,7 @@ func (r *referenceGrantResolver) refAllowed(to toResource, from fromResource) bo
 	// of the particular kind in the namespace
 	allInNamespaceKey := allowedReference{
 		to: toResource{
+			group:     to.group,
 			kind:      to.kind,
 			namespace: to.namespace,
 		},
diff --git a/internal/controller/state/graph/reference_grant_test.go b/internal/controller/state/graph/reference_grant_test.go
index 21fee614e1..bf97f22c25 100644
--- a/internal/controller/state/graph/reference_grant_test.go
+++ b/internal/controller/state/graph/reference_grant_test.go
@@ -189,7 +189,7 @@ func TestToService(t *testing.T) {
 	ref := toService(types.NamespacedName{Namespace: "ns", Name: "service"})
 
 	exp := toResource{
-		kind:      "Service",
+		kind:      kinds.Service,
 		namespace: "ns",
 		name:      "service",
 	}
@@ -198,6 +198,21 @@ func TestToService(t *testing.T) {
 	g.Expect(ref).To(Equal(exp))
 }
 
+func TestToInferencePool(t *testing.T) {
+	t.Parallel()
+	ref := toInferencePool(types.NamespacedName{Namespace: "ns", Name: "inference-pool"})
+
+	exp := toResource{
+		group:     inferenceAPIGroup,
+		kind:      kinds.InferencePool,
+		namespace: "ns",
+		name:      "inference-pool",
+	}
+
+	g := NewWithT(t)
+	g.Expect(ref).To(Equal(exp))
+}
+
 func TestFromGateway(t *testing.T) {
 	t.Parallel()
 	ref := fromGateway("ns")
@@ -306,7 +321,24 @@ func TestRefAllowedFrom(t *testing.T) {
 				},
 				To: []v1beta1.ReferenceGrantTo{
 					{
-						Kind: "Service",
+						Kind: kinds.Service,
+					},
+				},
+			},
+		},
+		{Namespace: allowedHTTPRouteNs, Name: "hr-2-ipool"}: {
+			Spec: v1beta1.ReferenceGrantSpec{
+				From: []v1beta1.ReferenceGrantFrom{
+					{
+						Group:     v1beta1.GroupName,
+						Kind:      kinds.HTTPRoute,
+						Namespace: v1beta1.Namespace(hrNs),
+					},
+				},
+				To: []v1beta1.ReferenceGrantTo{
+					{
+						Group: inferenceAPIGroup,
+						Kind:  kinds.InferencePool,
 					},
 				},
 			},
@@ -322,7 +354,7 @@ func TestRefAllowedFrom(t *testing.T) {
 				},
 				To: []v1beta1.ReferenceGrantTo{
 					{
-						Kind: "Service",
+						Kind: kinds.Service,
 					},
 				},
 			},
@@ -338,7 +370,7 @@ func TestRefAllowedFrom(t *testing.T) {
 				},
 				To: []v1beta1.ReferenceGrantTo{
 					{
-						Kind: "Service",
+						Kind: kinds.Service,
 					},
 				},
 			},
@@ -375,6 +407,18 @@ func TestRefAllowedFrom(t *testing.T) {
 			toResource:     toService(notAllowedNsName),
 			expAllowed:     false,
 		},
+		{
+			name:           "ref allowed from httproute to inferencepool",
+			refAllowedFrom: fromHTTPRoute(hrNs),
+			toResource:     toInferencePool(allowedHTTPRouteNsName),
+			expAllowed:     true,
+		},
+		{
+			name:           "ref not allowed from httproute to inferencepool",
+			refAllowedFrom: fromHTTPRoute(hrNs),
+			toResource:     toInferencePool(notAllowedNsName),
+			expAllowed:     false,
+		},
 		{
 			name:           "ref allowed from grpcroute to service",
 			refAllowedFrom: fromGRPCRoute(grNs),
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index c156ca738a..f3d3b04e4a 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -10,6 +10,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	v1alpha "sigs.k8s.io/gateway-api/apis/v1alpha2"
 
@@ -19,7 +20,10 @@ import (
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
-const wildcardHostname = "~^"
+const (
+	wildcardHostname  = "~^"
+	inferenceAPIGroup = "inference.networking.k8s.io"
+)
 
 // ParentRef describes a reference to a parent in a Route.
 type ParentRef struct {
@@ -157,11 +161,15 @@ type RouteRule struct {
 
 // RouteBackendRef is a wrapper for v1.BackendRef and any BackendRef filters from the HTTPRoute or GRPCRoute.
 type RouteBackendRef struct {
+	v1.BackendRef
+
 	// If this backend is defined in a RequestMirror filter, this value will indicate the filter's index.
 	MirrorBackendIdx *int
 
-	v1.BackendRef
 	Filters []any
+
+	// IsInferencePool indicates if this backend is an InferencePool disguised as a Service.
+	IsInferencePool bool
 }
 
 // CreateRouteKey takes a client.Object and creates a RouteKey.
@@ -242,6 +250,7 @@ func buildRoutesForGateways(
 	grpcRoutes map[types.NamespacedName]*v1.GRPCRoute,
 	gateways map[types.NamespacedName]*Gateway,
 	snippetsFilters map[types.NamespacedName]*SnippetsFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) map[RouteKey]*L7Route {
 	if len(gateways) == 0 {
 		return nil
@@ -250,7 +259,7 @@ func buildRoutesForGateways(
 	routes := make(map[RouteKey]*L7Route)
 
 	for _, route := range httpRoutes {
-		r := buildHTTPRoute(validator, route, gateways, snippetsFilters)
+		r := buildHTTPRoute(validator, route, gateways, snippetsFilters, inferencePools)
 		if r == nil {
 			continue
 		}
diff --git a/internal/controller/state/graph/service.go b/internal/controller/state/graph/service.go
index d43ecacfd8..3a702facc9 100644
--- a/internal/controller/state/graph/service.go
+++ b/internal/controller/state/graph/service.go
@@ -34,7 +34,6 @@ func buildReferencedServices(
 		}
 
 		processL7RoutesForGateway(l7routes, gw, gwNsName, referencedServices, services)
-
 		processL4RoutesForGateway(l4Routes, gw, gwNsName, referencedServices, services)
 	}
 
diff --git a/internal/framework/controller/resource.go b/internal/framework/controller/resource.go
index a0d49e3789..d17662169e 100644
--- a/internal/framework/controller/resource.go
+++ b/internal/framework/controller/resource.go
@@ -2,10 +2,31 @@ package controller
 
 import (
 	"fmt"
+	"strings"
 )
 
+// inferencePoolServiceSuffix is the suffix of the headless Service name for an InferencePool.
+const inferencePoolServiceSuffix = "-pool-svc"
+
 // CreateNginxResourceName creates the base resource name for all nginx resources
 // created by the control plane.
 func CreateNginxResourceName(prefix, suffix string) string {
 	return fmt.Sprintf("%s-%s", prefix, suffix)
 }
+
+// CreateInferencePoolServiceName creates the name for a headless Service that
+// we create for an InferencePool.
+func CreateInferencePoolServiceName(name string) string {
+	svcName := fmt.Sprintf("%s%s", name, inferencePoolServiceSuffix)
+	// if InferencePool name is already at or near max length, just use that name
+	if len(svcName) > 253 {
+		return name
+	}
+
+	return svcName
+}
+
+// GetInferencePoolName returns the name of the InferencePool for a given headless Service name.
+func GetInferencePoolName(serviceName string) string {
+	return strings.TrimSuffix(serviceName, inferencePoolServiceSuffix)
+}
diff --git a/internal/framework/kinds/kinds.go b/internal/framework/kinds/kinds.go
index 35ca8e2b00..b59b06df96 100644
--- a/internal/framework/kinds/kinds.go
+++ b/internal/framework/kinds/kinds.go
@@ -25,6 +25,12 @@ const (
 	BackendTLSPolicy = "BackendTLSPolicy"
 )
 
+// Gateway API Inference Extension kinds.
+const (
+	// InferencePool is the InferencePool kind.
+	InferencePool = "InferencePool"
+)
+
 // Core API Kinds.
 const (
 	// Service is the Service kind.
diff --git a/tests/go.mod b/tests/go.mod
index 92c39e24f2..7e748dd97b 100644
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -68,7 +68,7 @@ require (
 	golang.org/x/sys v0.35.0 // indirect
 	golang.org/x/term v0.34.0 // indirect
 	golang.org/x/text v0.29.0 // indirect
-	golang.org/x/time v0.9.0 // indirect
+	golang.org/x/time v0.12.0 // indirect
 	golang.org/x/tools v0.36.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
diff --git a/tests/go.sum b/tests/go.sum
index 7d5375ca36..f24e98ebf3 100644
--- a/tests/go.sum
+++ b/tests/go.sum
@@ -191,8 +191,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
 golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
-golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
-golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=

From 183dc72d183c2197edc91481a7ba3388aa01424f Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Thu, 18 Sep 2025 12:38:25 -0600
Subject: [PATCH 03/12] Add golang shim for comms with EPP (#3930)

Problem: In order for NGINX to get the endpoint of the AI workload from the EndpointPicker, it needs to send a gRPC request using the proper protobuf protocol.

Solution: A simple Go server is injected as an additional container when the inference extension feature is enabled, that will listen for a request from our (upcoming) NJS module, and forward to the configured EPP to get a response in a header.
---
 cmd/gateway/commands.go                       |  14 +
 cmd/gateway/endpoint_picker.go                | 190 +++++++++++++
 cmd/gateway/endpoint_picker_test.go           | 261 ++++++++++++++++++
 cmd/gateway/main.go                           |   1 +
 go.mod                                        |   4 +
 go.sum                                        |   8 +
 internal/controller/manager.go                |   1 +
 internal/controller/provisioner/objects.go    |  25 ++
 .../controller/provisioner/objects_test.go    |  54 ++++
 .../controller/provisioner/provisioner.go     |   1 +
 10 files changed, 559 insertions(+)
 create mode 100644 cmd/gateway/endpoint_picker.go
 create mode 100644 cmd/gateway/endpoint_picker_test.go

diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
index 012b27de6b..76b0ecbc96 100644
--- a/cmd/gateway/commands.go
+++ b/cmd/gateway/commands.go
@@ -757,6 +757,20 @@ func createSleepCommand() *cobra.Command {
 	return cmd
 }
 
+func createEndpointPickerCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "endpoint-picker",
+		Short: "Shim server for communication between NGINX and the Gateway API Inference Extension Endpoint Picker",
+		RunE: func(_ *cobra.Command, _ []string) error {
+			logger := ctlrZap.New().WithName("endpoint-picker-shim")
+			handler := createEndpointPickerHandler(realExtProcClientFactory(), logger)
+			return endpointPickerServer(handler)
+		},
+	}
+
+	return cmd
+}
+
 func parseFlags(flags *pflag.FlagSet) ([]string, []string) {
 	var flagKeys, flagValues []string
 
diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
new file mode 100644
index 0000000000..7c67a83671
--- /dev/null
+++ b/cmd/gateway/endpoint_picker.go
@@ -0,0 +1,190 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"time"
+
+	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"github.com/go-logr/logr"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+)
+
+const (
+	// defaultPort is the default port for this server to listen on. If collisions become a problem,
+	// we can make this configurable via the NginxProxy resource.
+	defaultPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100.
+	// eppEndpointHostHeader is the HTTP header used to specify the EPP endpoint host, set by the NJS module caller.
+	eppEndpointHostHeader = "X-EPP-Host"
+	// eppEndpointPortHeader is the HTTP header used to specify the EPP endpoint port, set by the NJS module caller.
+	eppEndpointPortHeader = "X-EPP-Port"
+)
+
+// extProcClientFactory creates a new ExternalProcessorClient and returns a close function.
+type extProcClientFactory func(target string) (extprocv3.ExternalProcessorClient, func() error, error)
+
+// endpointPickerServer starts an HTTP server on the given port with the provided handler.
+func endpointPickerServer(handler http.Handler) error {
+	server := &http.Server{
+		Addr:              fmt.Sprintf("127.0.0.1:%d", defaultPort),
+		Handler:           handler,
+		ReadHeaderTimeout: 10 * time.Second,
+	}
+	return server.ListenAndServe()
+}
+
+// realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request.
+func realExtProcClientFactory() extProcClientFactory {
+	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		if err != nil {
+			return nil, nil, err
+		}
+		client := extprocv3.NewExternalProcessorClient(conn)
+		return client, conn.Close, nil
+	}
+}
+
+// createEndpointPickerHandler returns an http.Handler that forwards requests to the EndpointPicker.
+func createEndpointPickerHandler(factory extProcClientFactory, logger logr.Logger) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		host := r.Header.Get(eppEndpointHostHeader)
+		port := r.Header.Get(eppEndpointPortHeader)
+		if host == "" || port == "" {
+			msg := fmt.Sprintf(
+				"missing at least one of required headers: %s and %s",
+				eppEndpointHostHeader,
+				eppEndpointPortHeader,
+			)
+			logger.Error(errors.New(msg), "error contacting EndpointPicker")
+			http.Error(w, msg, http.StatusBadRequest)
+			return
+		}
+
+		target := net.JoinHostPort(host, port)
+		logger.Info("Getting inference workload endpoint from EndpointPicker", "endpointPicker", target)
+
+		client, closeConn, err := factory(target)
+		if err != nil {
+			logger.Error(err, "error creating gRPC client")
+			http.Error(w, fmt.Sprintf("error creating gRPC client: %v", err), http.StatusInternalServerError)
+			return
+		}
+		defer func() {
+			if err := closeConn(); err != nil {
+				logger.Error(err, "error closing gRPC connection")
+			}
+		}()
+
+		stream, err := client.Process(r.Context())
+		if err != nil {
+			logger.Error(err, "error opening ext_proc stream")
+			http.Error(w, fmt.Sprintf("error opening ext_proc stream: %v", err), http.StatusBadGateway)
+			return
+		}
+
+		if code, err := sendRequest(stream, r); err != nil {
+			logger.Error(err, "error sending request")
+			http.Error(w, err.Error(), code)
+			return
+		}
+
+		// Receive response and extract header
+		for {
+			resp, err := stream.Recv()
+			if errors.Is(err, io.EOF) {
+				break // End of stream
+			} else if err != nil {
+				logger.Error(err, "error receiving from ext_proc")
+				http.Error(w, fmt.Sprintf("error receiving from ext_proc: %v", err), http.StatusBadGateway)
+				return
+			}
+
+			if ir := resp.GetImmediateResponse(); ir != nil {
+				code := int(ir.GetStatus().GetCode())
+				body := ir.GetBody()
+				logger.Error(fmt.Errorf("code: %d, body: %s", code, body), "received immediate response")
+				http.Error(w, string(body), code)
+				return
+			}
+
+			headers := resp.GetRequestHeaders().GetResponse().GetHeaderMutation().GetSetHeaders()
+			for _, h := range headers {
+				if h.GetHeader().GetKey() == eppMetadata.DestinationEndpointKey {
+					endpoint := string(h.GetHeader().GetRawValue())
+					w.Header().Set(h.GetHeader().GetKey(), endpoint)
+					logger.Info("Found endpoint", "endpoint", endpoint)
+				}
+			}
+		}
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+func sendRequest(stream extprocv3.ExternalProcessor_ProcessClient, r *http.Request) (int, error) {
+	if err := stream.Send(buildHeaderRequest(r)); err != nil {
+		return http.StatusBadGateway, fmt.Errorf("error sending headers: %w", err)
+	}
+
+	bodyReq, err := buildBodyRequest(r)
+	if err != nil {
+		return http.StatusInternalServerError, fmt.Errorf("error building body request: %w", err)
+	}
+
+	if err := stream.Send(bodyReq); err != nil {
+		return http.StatusBadGateway, fmt.Errorf("error sending body: %w", err)
+	}
+
+	if err := stream.CloseSend(); err != nil {
+		return http.StatusInternalServerError, fmt.Errorf("error closing stream: %w", err)
+	}
+
+	return 0, nil
+}
+
+func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
+	headerList := make([]*corev3.HeaderValue, 0, len(r.Header))
+	headerMap := &corev3.HeaderMap{
+		Headers: headerList,
+	}
+
+	for key, values := range r.Header {
+		for _, value := range values {
+			headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{
+				Key:   key,
+				Value: value,
+			})
+		}
+	}
+
+	return &extprocv3.ProcessingRequest{
+		Request: &extprocv3.ProcessingRequest_RequestHeaders{
+			RequestHeaders: &extprocv3.HttpHeaders{
+				Headers:     headerMap,
+				EndOfStream: false,
+			},
+		},
+	}
+}
+
+func buildBodyRequest(r *http.Request) (*extprocv3.ProcessingRequest, error) {
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading request body: %w", err)
+	}
+
+	return &extprocv3.ProcessingRequest{
+		Request: &extprocv3.ProcessingRequest_RequestBody{
+			RequestBody: &extprocv3.HttpBody{
+				Body:        body,
+				EndOfStream: true,
+			},
+		},
+	}, nil
+}
diff --git a/cmd/gateway/endpoint_picker_test.go b/cmd/gateway/endpoint_picker_test.go
new file mode 100644
index 0000000000..99808348fc
--- /dev/null
+++ b/cmd/gateway/endpoint_picker_test.go
@@ -0,0 +1,261 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
+	"github.com/go-logr/logr"
+	. "github.com/onsi/gomega"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+)
+
+type mockExtProcClient struct {
+	ProcessFunc func(
+		context.Context,
+		...grpc.CallOption,
+	) (extprocv3.ExternalProcessor_ProcessClient, error)
+}
+
+func (m *mockExtProcClient) Process(
+	ctx context.Context,
+	opts ...grpc.CallOption,
+) (extprocv3.ExternalProcessor_ProcessClient, error) {
+	if m.ProcessFunc != nil {
+		return m.ProcessFunc(ctx, opts...)
+	}
+	return nil, errors.New("not implemented")
+}
+
+type mockProcessClient struct {
+	SendFunc      func(*extprocv3.ProcessingRequest) error
+	RecvFunc      func() (*extprocv3.ProcessingResponse, error)
+	CloseSendFunc func() error
+	Ctx           context.Context
+}
+
+func (m *mockProcessClient) Send(req *extprocv3.ProcessingRequest) error {
+	if m.SendFunc != nil {
+		return m.SendFunc(req)
+	}
+	return nil
+}
+
+func (m *mockProcessClient) Recv() (*extprocv3.ProcessingResponse, error) {
+	if m.RecvFunc != nil {
+		return m.RecvFunc()
+	}
+	return nil, io.EOF
+}
+
+func (*mockProcessClient) RecvMsg(any) error { return nil }
+func (*mockProcessClient) SendMsg(any) error { return nil }
+
+func (m *mockProcessClient) CloseSend() error {
+	if m.CloseSendFunc != nil {
+		return m.CloseSendFunc()
+	}
+	return nil
+}
+
+func (m *mockProcessClient) Context() context.Context {
+	if m.Ctx != nil {
+		return m.Ctx
+	}
+	return context.Background()
+}
+
+func (*mockProcessClient) Header() (metadata.MD, error) { return nil, nil } //nolint:nilnil // interface satisfier
+func (*mockProcessClient) Trailer() metadata.MD         { return nil }
+
+func TestEndpointPickerHandler_Success(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	// Prepare mock client to simulate gRPC responses
+	callCount := 0
+	client := &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error { return nil },
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) {
+			if callCount == 0 {
+				callCount++
+				resp := &extprocv3.ProcessingResponse{
+					Response: &extprocv3.ProcessingResponse_RequestHeaders{
+						RequestHeaders: &extprocv3.HeadersResponse{
+							Response: &extprocv3.CommonResponse{
+								HeaderMutation: &extprocv3.HeaderMutation{
+									SetHeaders: []*corev3.HeaderValueOption{{
+										Header: &corev3.HeaderValue{
+											Key:      eppMetadata.DestinationEndpointKey,
+											RawValue: []byte("test-value"),
+										},
+									}},
+								},
+							},
+						},
+					},
+				}
+				return resp, nil
+			}
+			return nil, io.EOF
+		},
+	}
+
+	extProcClient := &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+
+	factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+
+	h := createEndpointPickerHandler(factory, logr.Discard())
+	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
+	req.Header.Set(eppEndpointHostHeader, "test-host")
+	req.Header.Set(eppEndpointPortHeader, "1234")
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	h.ServeHTTP(w, req)
+
+	resp := w.Result()
+	g.Expect(resp.StatusCode).To(Equal(http.StatusOK))
+	g.Expect(resp.Header.Get(eppMetadata.DestinationEndpointKey)).To(Equal("test-value"))
+}
+
+func TestEndpointPickerHandler_ImmediateResponse(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	client := &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error { return nil },
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) {
+			resp := &extprocv3.ProcessingResponse{
+				Response: &extprocv3.ProcessingResponse_ImmediateResponse{
+					ImmediateResponse: &extprocv3.ImmediateResponse{
+						Status: &typev3.HttpStatus{Code: http.StatusInternalServerError},
+						Body:   []byte("some error"),
+					},
+				},
+			}
+			return resp, nil
+		},
+	}
+
+	extClient := &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+
+	factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extClient, func() error { return nil }, nil
+	}
+
+	h := createEndpointPickerHandler(factory, logr.Discard())
+	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
+	req.Header.Set(eppEndpointHostHeader, "test-host")
+	req.Header.Set(eppEndpointPortHeader, "1234")
+	w := httptest.NewRecorder()
+
+	h.ServeHTTP(w, req)
+
+	resp := w.Result()
+
+	g.Expect(resp.StatusCode).To(Equal(http.StatusInternalServerError))
+	body, _ := io.ReadAll(resp.Body)
+	g.Expect(string(body)).To(ContainSubstring("some error"))
+}
+
+func TestEndpointPickerHandler_Errors(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	runErrorTestCase := func(factory func(string) (extprocv3.ExternalProcessorClient, func() error, error),
+		setHeaders bool,
+		expectedStatus int,
+		expectedBodySubstring string,
+	) {
+		h := createEndpointPickerHandler(factory, logr.Discard())
+		req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
+		if setHeaders {
+			req.Header.Set(eppEndpointHostHeader, "test-host")
+			req.Header.Set(eppEndpointPortHeader, "1234")
+		}
+		w := httptest.NewRecorder()
+		h.ServeHTTP(w, req)
+		resp := w.Result()
+		g.Expect(resp.StatusCode).To(Equal(expectedStatus))
+		body, _ := io.ReadAll(resp.Body)
+		g.Expect(string(body)).To(ContainSubstring(expectedBodySubstring))
+	}
+
+	// 1. Error creating gRPC client
+	factoryErr := errors.New("factory error")
+	factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return nil, nil, factoryErr
+	}
+	runErrorTestCase(factory, true, http.StatusInternalServerError, "error creating gRPC client")
+
+	// 2. Error opening ext_proc stream
+	extProcClient := &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return nil, errors.New("process error")
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	runErrorTestCase(factory, true, http.StatusBadGateway, "error opening ext_proc stream")
+
+	// 3. Error sending headers
+	client := &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error {
+			return errors.New("send headers error")
+		},
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF },
+	}
+	extProcClient = &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	runErrorTestCase(factory, true, http.StatusBadGateway, "error sending headers")
+
+	// 4. Error sending body
+	client = &mockProcessClient{
+		SendFunc: func(req *extprocv3.ProcessingRequest) error {
+			if req.GetRequestBody() != nil {
+				return errors.New("send body error")
+			}
+			return nil
+		},
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF },
+	}
+	extProcClient = &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	runErrorTestCase(factory, true, http.StatusBadGateway, "error sending body")
+
+	// 5. Error with empty headers
+	runErrorTestCase(factory, false, http.StatusBadRequest, "missing at least one of required headers")
+}
diff --git a/cmd/gateway/main.go b/cmd/gateway/main.go
index 515fcc3f16..c932a4ee4c 100644
--- a/cmd/gateway/main.go
+++ b/cmd/gateway/main.go
@@ -25,6 +25,7 @@ func main() {
 		createGenerateCertsCommand(),
 		createInitializeCommand(),
 		createSleepCommand(),
+		createEndpointPickerCommand(),
 	)
 
 	if err := rootCmd.Execute(); err != nil {
diff --git a/go.mod b/go.mod
index f79dd3b882..b568b83870 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/nginx/nginx-gateway-fabric/v2
 go 1.24.2
 
 require (
+	github.com/envoyproxy/go-control-plane/envoy v1.32.4
 	github.com/fsnotify/fsnotify v1.9.0
 	github.com/go-logr/logr v1.4.3
 	github.com/google/go-cmp v0.7.0
@@ -37,8 +38,10 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -60,6 +63,7 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
 	github.com/prometheus/common v0.66.1 // indirect
diff --git a/go.sum b/go.sum
index b81ec9aeb5..2779075b54 100644
--- a/go.sum
+++ b/go.sum
@@ -16,6 +16,8 @@ github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1x
 github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
 github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
 github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
@@ -39,6 +41,10 @@ github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0o
 github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
 github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
 github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
+github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
+github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
+github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
 github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
 github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -155,6 +161,8 @@ github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNH
 github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
diff --git a/internal/controller/manager.go b/internal/controller/manager.go
index d02411571b..dc9c4835bd 100644
--- a/internal/controller/manager.go
+++ b/internal/controller/manager.go
@@ -220,6 +220,7 @@ func StartManager(cfg config.Config) error {
 			NginxDockerSecretNames:         cfg.NginxDockerSecretNames,
 			PlusUsageConfig:                &cfg.UsageReportConfig,
 			NginxOneConsoleTelemetryConfig: cfg.NginxOneConsoleTelemetryConfig,
+			InferenceExtension:             cfg.InferenceExtension,
 		},
 	)
 	if err != nil {
diff --git a/internal/controller/provisioner/objects.go b/internal/controller/provisioner/objects.go
index 475a3e7319..e2e3eb8517 100644
--- a/internal/controller/provisioner/objects.go
+++ b/internal/controller/provisioner/objects.go
@@ -899,6 +899,7 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
 						{MountPath: "/etc/nginx/events-includes", Name: "nginx-events-includes"},
 					},
 					SecurityContext: &corev1.SecurityContext{
+						AllowPrivilegeEscalation: helpers.GetPointer(false),
 						Capabilities: &corev1.Capabilities{
 							Drop: []corev1.Capability{"ALL"},
 						},
@@ -1119,6 +1120,30 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
 		spec.Spec.Containers[0].VolumeMounts = volumeMounts
 	}
 
+	if p.cfg.InferenceExtension {
+		spec.Spec.Containers = append(spec.Spec.Containers, corev1.Container{
+			Name:            "endpoint-picker-shim",
+			Image:           p.cfg.GatewayPodConfig.Image,
+			ImagePullPolicy: pullPolicy,
+			Command: []string{
+				"/usr/bin/gateway",
+				"endpoint-picker",
+			},
+			SecurityContext: &corev1.SecurityContext{
+				AllowPrivilegeEscalation: helpers.GetPointer(false),
+				Capabilities: &corev1.Capabilities{
+					Drop: []corev1.Capability{"ALL"},
+				},
+				ReadOnlyRootFilesystem: helpers.GetPointer(true),
+				RunAsGroup:             helpers.GetPointer[int64](1001),
+				RunAsUser:              helpers.GetPointer[int64](101),
+				SeccompProfile: &corev1.SeccompProfile{
+					Type: corev1.SeccompProfileTypeRuntimeDefault,
+				},
+			},
+		})
+	}
+
 	return spec
 }
 
diff --git a/internal/controller/provisioner/objects_test.go b/internal/controller/provisioner/objects_test.go
index 2327db259d..30403f85a7 100644
--- a/internal/controller/provisioner/objects_test.go
+++ b/internal/controller/provisioner/objects_test.go
@@ -1765,3 +1765,57 @@ func TestBuildNginxResourceObjects_Patches(t *testing.T) {
 	g.Expect(svc.Labels).To(HaveKeyWithValue("app", "nginx"))
 	g.Expect(dep.Labels).To(HaveKeyWithValue("app", "nginx"))
 }
+
+func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	agentTLSSecret := &corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      agentTLSTestSecretName,
+			Namespace: ngfNamespace,
+		},
+		Data: map[string][]byte{"tls.crt": []byte("tls")},
+	}
+	fakeClient := fake.NewFakeClient(agentTLSSecret)
+
+	provisioner := &NginxProvisioner{
+		cfg: Config{
+			GatewayPodConfig: &config.GatewayPodConfig{
+				Namespace: ngfNamespace,
+			},
+			AgentTLSSecretName: agentTLSTestSecretName,
+			InferenceExtension: true,
+		},
+		k8sClient: fakeClient,
+		baseLabelSelector: metav1.LabelSelector{
+			MatchLabels: map[string]string{"app": "nginx"},
+		},
+	}
+
+	gateway := &gatewayv1.Gateway{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "gw",
+			Namespace: "default",
+		},
+		Spec: gatewayv1.GatewaySpec{
+			Listeners: []gatewayv1.Listener{{Port: 80}},
+		},
+	}
+
+	objects, err := provisioner.buildNginxResourceObjects("gw-nginx", gateway, &graph.EffectiveNginxProxy{})
+	g.Expect(err).ToNot(HaveOccurred())
+
+	// Find the deployment object
+	var deployment *appsv1.Deployment
+	for _, obj := range objects {
+		if d, ok := obj.(*appsv1.Deployment); ok {
+			deployment = d
+			break
+		}
+	}
+	g.Expect(deployment).ToNot(BeNil())
+	containers := deployment.Spec.Template.Spec.Containers
+	g.Expect(containers).To(HaveLen(2))
+	g.Expect(containers[1].Name).To(Equal("endpoint-picker-shim"))
+}
diff --git a/internal/controller/provisioner/provisioner.go b/internal/controller/provisioner/provisioner.go
index fe59f5be1b..8a2abffd0a 100644
--- a/internal/controller/provisioner/provisioner.go
+++ b/internal/controller/provisioner/provisioner.go
@@ -58,6 +58,7 @@ type Config struct {
 	NginxDockerSecretNames         []string
 	NginxOneConsoleTelemetryConfig config.NginxOneConsoleTelemetryConfig
 	Plus                           bool
+	InferenceExtension             bool
 }
 
 // NginxProvisioner handles provisioning nginx kubernetes resources.

From 6995f2f8dfcb8b4f92ad9cdbabafaf46ce6467eb Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Wed, 24 Sep 2025 08:07:41 -0600
Subject: [PATCH 04/12] Query EPP and proxy AI traffic (#3942)

Problem: We need to connect NGINX to the Golang shim that talks to the EndpointPicker, and then pass client traffic to the proper inference workload.

Solution: Write an NJS module that will query the local Go server to get the AI endpoint to route traffic to. Then redirect the original client request to an internal location that proxies the traffic to the chosen endpoint.

The location building gets a bit complicated especially when using both HTTP matching conditions and inference workloads. It requires 2 layers of internal redirects. I added lots of comments to hopefully clear up how we build these locations to perform all the routing steps.
---
 cmd/gateway/endpoint_picker.go                |  24 +-
 cmd/gateway/endpoint_picker_test.go           |  42 ++-
 deploy/inference-nginx-plus/deploy.yaml       |   1 +
 .../controller/nginx/config/http/config.go    |  56 ++-
 internal/controller/nginx/config/maps.go      |  44 +++
 internal/controller/nginx/config/maps_test.go |  65 +++-
 internal/controller/nginx/config/servers.go   | 336 +++++++++++++++---
 .../nginx/config/servers_template.go          |  12 +-
 .../controller/nginx/config/servers_test.go   | 182 +++++++++-
 internal/controller/nginx/modules/src/epp.js  |  74 ++--
 .../controller/nginx/modules/test/epp.test.js | 144 +++++---
 .../state/dataplane/configuration.go          |  27 +-
 .../state/dataplane/configuration_test.go     |  89 ++++-
 internal/controller/state/dataplane/types.go  |   6 +
 .../controller/state/graph/backend_refs.go    |  90 +++--
 .../state/graph/backend_refs_test.go          |   8 +-
 internal/controller/state/graph/graph_test.go |  10 +-
 internal/controller/state/graph/httproute.go  |  21 +-
 .../controller/state/graph/httproute_test.go  |  61 ++++
 .../controller/state/graph/route_common.go    |   3 +
 internal/framework/types/types.go             |  11 +
 21 files changed, 1079 insertions(+), 227 deletions(-)

diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
index 7c67a83671..acf9bdfbb6 100644
--- a/cmd/gateway/endpoint_picker.go
+++ b/cmd/gateway/endpoint_picker.go
@@ -14,16 +14,8 @@ import (
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
-)
 
-const (
-	// defaultPort is the default port for this server to listen on. If collisions become a problem,
-	// we can make this configurable via the NginxProxy resource.
-	defaultPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100.
-	// eppEndpointHostHeader is the HTTP header used to specify the EPP endpoint host, set by the NJS module caller.
-	eppEndpointHostHeader = "X-EPP-Host"
-	// eppEndpointPortHeader is the HTTP header used to specify the EPP endpoint port, set by the NJS module caller.
-	eppEndpointPortHeader = "X-EPP-Port"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types"
 )
 
 // extProcClientFactory creates a new ExternalProcessorClient and returns a close function.
@@ -32,7 +24,7 @@ type extProcClientFactory func(target string) (extprocv3.ExternalProcessorClient
 // endpointPickerServer starts an HTTP server on the given port with the provided handler.
 func endpointPickerServer(handler http.Handler) error {
 	server := &http.Server{
-		Addr:              fmt.Sprintf("127.0.0.1:%d", defaultPort),
+		Addr:              fmt.Sprintf("127.0.0.1:%d", types.GoShimPort),
 		Handler:           handler,
 		ReadHeaderTimeout: 10 * time.Second,
 	}
@@ -54,13 +46,13 @@ func realExtProcClientFactory() extProcClientFactory {
 // createEndpointPickerHandler returns an http.Handler that forwards requests to the EndpointPicker.
 func createEndpointPickerHandler(factory extProcClientFactory, logger logr.Logger) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		host := r.Header.Get(eppEndpointHostHeader)
-		port := r.Header.Get(eppEndpointPortHeader)
+		host := r.Header.Get(types.EPPEndpointHostHeader)
+		port := r.Header.Get(types.EPPEndpointPortHeader)
 		if host == "" || port == "" {
 			msg := fmt.Sprintf(
 				"missing at least one of required headers: %s and %s",
-				eppEndpointHostHeader,
-				eppEndpointPortHeader,
+				types.EPPEndpointHostHeader,
+				types.EPPEndpointPortHeader,
 			)
 			logger.Error(errors.New(msg), "error contacting EndpointPicker")
 			http.Error(w, msg, http.StatusBadRequest)
@@ -174,6 +166,10 @@ func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
 }
 
 func buildBodyRequest(r *http.Request) (*extprocv3.ProcessingRequest, error) {
+	if r.ContentLength == 0 {
+		return nil, errors.New("request body is empty")
+	}
+
 	body, err := io.ReadAll(r.Body)
 	if err != nil {
 		return nil, fmt.Errorf("error reading request body: %w", err)
diff --git a/cmd/gateway/endpoint_picker_test.go b/cmd/gateway/endpoint_picker_test.go
index 99808348fc..99fd95aa90 100644
--- a/cmd/gateway/endpoint_picker_test.go
+++ b/cmd/gateway/endpoint_picker_test.go
@@ -17,6 +17,8 @@ import (
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/metadata"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types"
 )
 
 type mockExtProcClient struct {
@@ -122,8 +124,8 @@ func TestEndpointPickerHandler_Success(t *testing.T) {
 
 	h := createEndpointPickerHandler(factory, logr.Discard())
 	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
-	req.Header.Set(eppEndpointHostHeader, "test-host")
-	req.Header.Set(eppEndpointPortHeader, "1234")
+	req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+	req.Header.Set(types.EPPEndpointPortHeader, "1234")
 	req.Header.Set("Content-Type", "application/json")
 	w := httptest.NewRecorder()
 
@@ -165,8 +167,8 @@ func TestEndpointPickerHandler_ImmediateResponse(t *testing.T) {
 
 	h := createEndpointPickerHandler(factory, logr.Discard())
 	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
-	req.Header.Set(eppEndpointHostHeader, "test-host")
-	req.Header.Set(eppEndpointPortHeader, "1234")
+	req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+	req.Header.Set(types.EPPEndpointPortHeader, "1234")
 	w := httptest.NewRecorder()
 
 	h.ServeHTTP(w, req)
@@ -190,8 +192,8 @@ func TestEndpointPickerHandler_Errors(t *testing.T) {
 		h := createEndpointPickerHandler(factory, logr.Discard())
 		req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
 		if setHeaders {
-			req.Header.Set(eppEndpointHostHeader, "test-host")
-			req.Header.Set(eppEndpointPortHeader, "1234")
+			req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+			req.Header.Set(types.EPPEndpointPortHeader, "1234")
 		}
 		w := httptest.NewRecorder()
 		h.ServeHTTP(w, req)
@@ -236,7 +238,33 @@ func TestEndpointPickerHandler_Errors(t *testing.T) {
 	}
 	runErrorTestCase(factory, true, http.StatusBadGateway, "error sending headers")
 
-	// 4. Error sending body
+	// 4a. Error building body request (content length 0)
+	client = &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error {
+			return nil
+		},
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF },
+	}
+	extProcClient = &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	h := createEndpointPickerHandler(factory, logr.Discard())
+	req := httptest.NewRequest(http.MethodPost, "/", nil) // nil body, ContentLength = 0
+	req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+	req.Header.Set(types.EPPEndpointPortHeader, "1234")
+	w := httptest.NewRecorder()
+	h.ServeHTTP(w, req)
+	resp := w.Result()
+	g.Expect(resp.StatusCode).To(Equal(http.StatusInternalServerError))
+	body, _ := io.ReadAll(resp.Body)
+	g.Expect(string(body)).To(ContainSubstring("request body is empty"))
+
+	// 4b. Error sending body
 	client = &mockProcessClient{
 		SendFunc: func(req *extprocv3.ProcessingRequest) error {
 			if req.GetRequestBody() != nil {
diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml
index 77ee4da544..025cfeb410 100644
--- a/deploy/inference-nginx-plus/deploy.yaml
+++ b/deploy/inference-nginx-plus/deploy.yaml
@@ -281,6 +281,7 @@ spec:
         - --nginx-docker-secret=nginx-plus-registry-secret
         - --nginx-plus
         - --usage-report-secret=nplus-license
+        - --usage-report-enforce-initial-report=true
         - --metrics-port=9113
         - --health-port=8081
         - --leader-election-lock-name=nginx-gateway-leader-election
diff --git a/internal/controller/nginx/config/http/config.go b/internal/controller/nginx/config/http/config.go
index 3a76ab30b4..dedfd04349 100644
--- a/internal/controller/nginx/config/http/config.go
+++ b/internal/controller/nginx/config/http/config.go
@@ -26,26 +26,58 @@ type Server struct {
 type LocationType string
 
 const (
+	// InternalLocationType defines an internal location that is only accessible within NGINX.
 	InternalLocationType LocationType = "internal"
+	// ExternalLocationType defines a normal external location that is accessible by clients.
 	ExternalLocationType LocationType = "external"
+	// RedirectLocationType defines an external location that redirects to an internal location
+	// based on HTTP matching conditions.
 	RedirectLocationType LocationType = "redirect"
+	// InferenceExternalLocationType defines an external location that is used for calling NJS
+	// to get the inference workload endpoint and redirects to the internal location that will proxy_pass
+	// to that endpoint.
+	InferenceExternalLocationType LocationType = "inference-external"
+	// InferenceInternalLocationType defines an internal location that is used for calling NJS
+	// to get the inference workload endpoint and redirects to the internal location that will proxy_pass
+	// to that endpoint. This is used when an HTTP redirect location is also defined that redirects
+	// to this internal inference location.
+	InferenceInternalLocationType LocationType = "inference-internal"
 )
 
 // Location holds all configuration for an HTTP location.
 type Location struct {
-	Path                           string
-	ProxyPass                      string
-	HTTPMatchKey                   string
+	// Return specifies a return directive (e.g., HTTP status or redirect) for this location block.
+	Return *Return
+	// ProxySSLVerify controls SSL verification for upstreams when proxying requests.
+	ProxySSLVerify *ProxySSLVerify
+	// ProxyPass is the upstream backend (URL or name) to which requests are proxied.
+	ProxyPass string
+	// HTTPMatchKey is the key for associating HTTP match rules, used for routing and NJS module logic.
+	HTTPMatchKey string
+	// MirrorSplitClientsVariableName is the variable name for split_clients, used in traffic mirroring scenarios.
 	MirrorSplitClientsVariableName string
-	Type                           LocationType
-	ProxySetHeaders                []Header
-	ProxySSLVerify                 *ProxySSLVerify
-	Return                         *Return
-	ResponseHeaders                ResponseHeaders
-	Rewrites                       []string
-	MirrorPaths                    []string
-	Includes                       []shared.Include
-	GRPC                           bool
+	// EPPInternalPath is the internal path for the inference NJS module to redirect to.
+	EPPInternalPath string
+	// EPPHost is the host for the EndpointPicker, used for inference routing.
+	EPPHost string
+	// Type indicates the type of location (external, internal, redirect, etc).
+	Type LocationType
+	// Path is the NGINX location path.
+	Path string
+	// ResponseHeaders are custom response headers to be sent.
+	ResponseHeaders ResponseHeaders
+	// ProxySetHeaders are headers to set when proxying requests upstream.
+	ProxySetHeaders []Header
+	// Rewrites are rewrite rules for modifying request paths.
+	Rewrites []string
+	// MirrorPaths are paths to which requests are mirrored.
+	MirrorPaths []string
+	// Includes are additional NGINX config snippets or policies to include in this location.
+	Includes []shared.Include
+	// EPPPort is the port for the EndpointPicker, used for inference routing.
+	EPPPort int
+	// GRPC indicates if this location proxies gRPC traffic.
+	GRPC bool
 }
 
 // Header defines an HTTP header to be passed to the proxied server.
diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
index 5a5e5ff189..e0f9ee98d5 100644
--- a/internal/controller/nginx/config/maps.go
+++ b/internal/controller/nginx/config/maps.go
@@ -1,9 +1,12 @@
 package config
 
 import (
+	"fmt"
 	"strings"
 	gotemplate "text/template"
 
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/shared"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/dataplane"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
@@ -26,6 +29,8 @@ const (
 
 func executeMaps(conf dataplane.Configuration) []executeResult {
 	maps := buildAddHeaderMaps(append(conf.HTTPServers, conf.SSLServers...))
+	maps = append(maps, buildInferenceMaps(conf.BackendGroups)...)
+
 	result := executeResult{
 		dest: httpConfigFile,
 		data: helpers.MustExecuteTemplate(mapsTemplate, maps),
@@ -177,3 +182,42 @@ func createAddHeadersMap(name string) shared.Map {
 		Parameters: params,
 	}
 }
+
+// buildInferenceMaps creates maps for InferencePool Backends.
+func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
+	inferenceMaps := make([]shared.Map, 0, len(groups))
+	for _, group := range groups {
+		for _, backend := range group.Backends {
+			if backend.EndpointPickerConfig != nil {
+				var defaultResult string
+				switch backend.EndpointPickerConfig.FailureMode {
+				// in FailClose mode, if the EPP is unavailable or returns an error,
+				// we return an invalid backend to ensure the request fails
+				case inference.EndpointPickerFailClose:
+					defaultResult = invalidBackendRef
+				// in FailOpen mode, if the EPP is unavailable or returns an error,
+				// we fall back to the upstream
+				case inference.EndpointPickerFailOpen:
+					defaultResult = backend.UpstreamName
+				}
+				params := []shared.MapParameter{
+					{
+						Value:  "~.+",
+						Result: "$inference_workload_endpoint",
+					},
+					{
+						Value:  "default",
+						Result: defaultResult,
+					},
+				}
+				backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
+				inferenceMaps = append(inferenceMaps, shared.Map{
+					Source:     "$inference_workload_endpoint",
+					Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
+					Parameters: params,
+				})
+			}
+		}
+	}
+	return inferenceMaps
+}
diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
index d133882d7b..736d7808ec 100644
--- a/internal/controller/nginx/config/maps_test.go
+++ b/internal/controller/nginx/config/maps_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 
 	. "github.com/onsi/gomega"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/shared"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/dataplane"
@@ -59,22 +60,24 @@ func TestExecuteMaps(t *testing.T) {
 
 	conf := dataplane.Configuration{
 		HTTPServers: []dataplane.VirtualServer{
-			{
-				PathRules: pathRules,
-			},
-			{
-				PathRules: pathRules,
-			},
-			{
-				IsDefault: true,
-			},
+			{PathRules: pathRules},
+			{PathRules: pathRules},
+			{IsDefault: true},
 		},
 		SSLServers: []dataplane.VirtualServer{
+			{PathRules: pathRules},
+			{IsDefault: true},
+		},
+		BackendGroups: []dataplane.BackendGroup{
 			{
-				PathRules: pathRules,
-			},
-			{
-				IsDefault: true,
+				Backends: []dataplane.Backend{
+					{
+						UpstreamName: "upstream1",
+						EndpointPickerConfig: &inference.EndpointPickerRef{
+							FailureMode: inference.EndpointPickerFailClose,
+						},
+					},
+				},
 			},
 		},
 	}
@@ -86,6 +89,9 @@ func TestExecuteMaps(t *testing.T) {
 		"map ${http_my_second_add_header} $my_second_add_header_header_var {": 1,
 		"~.* ${http_my_second_add_header},;":                                  1,
 		"map ${http_my_set_header} $my_set_header_header_var {":               0,
+		"$inference_workload_endpoint":                                        2,
+		"$inference_backend":                                                  1,
+		"invalid-backend-ref":                                                 1,
 	}
 
 	mapResult := executeMaps(conf)
@@ -385,3 +391,36 @@ func TestCreateStreamMapsWithEmpty(t *testing.T) {
 
 	g.Expect(maps).To(BeNil())
 }
+
+func TestBuildInferenceMaps(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	group := dataplane.BackendGroup{
+		Backends: []dataplane.Backend{
+			{
+				UpstreamName: "upstream1",
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					FailureMode: inference.EndpointPickerFailClose,
+				},
+			},
+			{
+				UpstreamName: "upstream2",
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					FailureMode: inference.EndpointPickerFailOpen,
+				},
+			},
+			{
+				UpstreamName:         "upstream3",
+				EndpointPickerConfig: nil,
+			},
+		},
+	}
+
+	maps := buildInferenceMaps([]dataplane.BackendGroup{group})
+	g.Expect(maps).To(HaveLen(2))
+	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
+	g.Expect(maps[0].Parameters[1].Result).To(Equal("invalid-backend-ref"))
+	g.Expect(maps[1].Parameters[1].Result).To(Equal("upstream2"))
+}
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index 88ba4fa8ea..9664396c2e 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -16,7 +16,13 @@ import (
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
 )
 
-var serversTemplate = gotemplate.Must(gotemplate.New("servers").Parse(serversTemplateText))
+var serversTemplate = gotemplate.Must(
+	gotemplate.New("servers").Funcs(gotemplate.FuncMap{
+		"contains": func(str http.LocationType, substr string) bool {
+			return strings.Contains(string(str), substr)
+		},
+	}).Parse(serversTemplateText),
+)
 
 const (
 	// HeaderMatchSeparator is the separator for constructing header-based match for NJS.
@@ -252,6 +258,78 @@ func extractMirrorTargetsWithPercentages(pathRules []dataplane.PathRule) map[str
 	return mirrorTargets
 }
 
+/*
+There are several different flows of location blocks, depending on the user configuration.
+The following describes them, with basic location examples.
+
+---------------
+Base case, no HTTP matching conditions or inference extension.
+
+External location proxies straight to backend.
+
+location /coffee {
+    proxy_pass http://backend;
+}
+---------------
+HTTP matching conditions.
+
+External location calls httpmatch NJS module. The module determines the HTTP request conditions that exist
+and which backend to use, then redirects to the appropriate internal location.
+The internal location proxies to the backend.
+
+location /coffee {
+    js_content httpmatches.match; // chooses backend1 or backend2, and redirects to appropriate internal location
+}
+location /_ngf-internal-rule0-route0 {
+	internal;
+	proxy_pass http://backend1;
+}
+location /_ngf-internal-rule1-route0 {
+	internal;
+	proxy_pass http://backend2;
+}
+---------------
+Inference extension, no HTTP matching conditions.
+
+External location calls inference NJS module. The module gets the AI endpoint to proxy to,
+then redirects to the internal inference location that proxies to the backend.
+
+location /coffee {
+	set $epp_internal_path /_ngf-internal-rule0-route0-inference;
+	js_content epp.getEndpoint; // gets endpoint and redirects to /_ngf-internal-rule0-route0-inference
+}
+location /_ngf-internal-rule0-route0-inference {
+	internal;
+	proxy_pass http://$inference-backend;
+}
+---------------
+Inference extension with HTTP matching conditions.
+
+External location calls httpmatch NJS module. The module determines the HTTP request conditions that exist
+and which backend to use, then redirects to the internal inference location. The internal inference
+location calls the inference NJS module to get the AI endpoint to proxy to, then redirects to the
+internal location that proxies to the backend.
+
+Note that the location path naming here is a little different than the previous example.
+The final location that proxy_passes has the non-inference name to avoid too much refactoring
+in the code, and the intermediate location has -inference in the name, whereas in the previous example
+it was the final location that had -inference in the name.
+
+location /coffee {
+	js_content httpmatches.match; // chooses backend and redirects to appropriate internal inference location
+}
+location /_ngf-internal-rule0-route0-inference {
+	internal;
+
+	set $epp_internal_path /_ngf-internal-rule0-route0;
+	js_content epp.getEndpoint; // redirects to /_ngf-internal-rule0-route0
+}
+location /_ngf-internal-rule0-route0 {
+	internal;
+	proxy_pass http://$inference-backend;
+}
+*/
+
 type httpMatchPairs map[string][]routeMatch
 
 func createLocations(
@@ -270,8 +348,6 @@ func createLocations(
 	mirrorPathToPercentage := extractMirrorTargetsWithPercentages(server.PathRules)
 
 	for pathRuleIdx, rule := range server.PathRules {
-		matches := make([]routeMatch, 0, len(rule.MatchRules))
-
 		if rule.Path == rootPath {
 			rootPathExists = true
 		}
@@ -281,7 +357,6 @@ func createLocations(
 		}
 
 		mirrorPercentage := mirrorPathToPercentage[rule.Path]
-
 		extLocations := initializeExternalLocations(rule, pathsAndTypes)
 		for i := range extLocations {
 			extLocations[i].Includes = createIncludesFromPolicyGenerateResult(
@@ -289,54 +364,45 @@ func createLocations(
 			)
 		}
 
-		if !needsInternalLocations(rule) {
-			for _, r := range rule.MatchRules {
-				extLocations = updateLocations(
-					r,
-					rule,
-					extLocations,
-					server.Port,
-					keepAliveCheck,
-					mirrorPercentage,
-				)
-			}
-
-			locs = append(locs, extLocations...)
-			continue
-		}
-
-		internalLocations := make([]http.Location, 0, len(rule.MatchRules))
-
-		for matchRuleIdx, r := range rule.MatchRules {
-			intLocation, match := initializeInternalLocation(pathRuleIdx, matchRuleIdx, r.Match, rule.GRPC)
-			intLocation.Includes = createIncludesFromPolicyGenerateResult(
-				generator.GenerateForInternalLocation(rule.Policies),
+		switch {
+		case !needsInternalLocationsForMatches(rule) && !rule.HasInferenceBackends:
+			locs = append(locs, updateExternalLocationsForRule(
+				rule,
+				extLocations,
+				server.Port,
+				keepAliveCheck,
+				mirrorPercentage)...,
 			)
-
-			intLocation = updateLocation(
-				r,
+		case needsInternalLocationsForMatches(rule):
+			internalLocations, matches := createInternalLocationsForRule(
+				pathRuleIdx,
 				rule,
-				intLocation,
+				generator,
 				server.Port,
 				keepAliveCheck,
 				mirrorPercentage,
 			)
-
-			internalLocations = append(internalLocations, intLocation)
-			matches = append(matches, match)
-		}
-
-		httpMatchKey := serverID + "_" + strconv.Itoa(pathRuleIdx)
-		for i := range extLocations {
-			// FIXME(sberman): De-dupe matches and associated locations
-			// so we don't need nginx/njs to perform unnecessary matching.
-			// https://github.com/nginx/nginx-gateway-fabric/issues/662
-			extLocations[i].HTTPMatchKey = httpMatchKey
-			matchPairs[extLocations[i].HTTPMatchKey] = matches
+			httpMatchKey := serverID + "_" + strconv.Itoa(pathRuleIdx)
+			for i := range extLocations {
+				// FIXME(sberman): De-dupe matches and associated locations
+				// so we don't need nginx/njs to perform unnecessary matching.
+				// https://github.com/nginx/nginx-gateway-fabric/issues/662
+				extLocations[i].HTTPMatchKey = httpMatchKey
+				matchPairs[extLocations[i].HTTPMatchKey] = matches
+			}
+			locs = append(locs, extLocations...)
+			locs = append(locs, internalLocations...)
+		case rule.HasInferenceBackends:
+			locs = append(locs, createInferenceLocationsForRule(
+				pathRuleIdx,
+				rule,
+				extLocations,
+				generator,
+				server.Port,
+				keepAliveCheck,
+				mirrorPercentage)...,
+			)
 		}
-
-		locs = append(locs, extLocations...)
-		locs = append(locs, internalLocations...)
 	}
 
 	if !rootPathExists {
@@ -346,10 +412,124 @@ func createLocations(
 	return locs, matchPairs, grpcServer
 }
 
-func needsInternalLocations(rule dataplane.PathRule) bool {
+func updateExternalLocationsForRule(
+	rule dataplane.PathRule,
+	extLocations []http.Location,
+	port int32,
+	keepAliveCheck keepAliveChecker,
+	mirrorPercentage *float64,
+) []http.Location {
+	for _, r := range rule.MatchRules {
+		extLocations = updateLocations(
+			r,
+			rule,
+			extLocations,
+			port,
+			keepAliveCheck,
+			mirrorPercentage,
+		)
+	}
+
+	return extLocations
+}
+
+func createInternalLocationsForRule(
+	pathRuleIdx int,
+	rule dataplane.PathRule,
+	generator policies.Generator,
+	port int32,
+	keepAliveCheck keepAliveChecker,
+	mirrorPercentage *float64,
+) ([]http.Location, []routeMatch) {
+	internalLocations := make([]http.Location, 0, len(rule.MatchRules))
+	matches := make([]routeMatch, 0, len(rule.MatchRules))
+	for matchRuleIdx, r := range rule.MatchRules {
+		var intLocation http.Location
+		var match routeMatch
+		if !rule.HasInferenceBackends {
+			intLocation, match = initializeInternalMatchLocation(pathRuleIdx, matchRuleIdx, r.Match, rule.GRPC)
+		} else {
+			intLocation, match = initializeInternalMatchLocationWithInference(pathRuleIdx, matchRuleIdx, r.Match)
+			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
+			for _, b := range r.BackendGroup.Backends {
+				if b.EndpointPickerConfig != nil {
+					var portNum int
+					if b.EndpointPickerConfig.Port != nil {
+						portNum = int(b.EndpointPickerConfig.Port.Number)
+					}
+					intInfLocation.EPPInternalPath = intLocation.Path
+					intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
+					intInfLocation.EPPPort = portNum
+				}
+			}
+			internalLocations = append(internalLocations, intInfLocation)
+		}
+		intLocation.Includes = createIncludesFromPolicyGenerateResult(
+			generator.GenerateForInternalLocation(rule.Policies),
+		)
+		intLocation = updateLocation(
+			r,
+			rule,
+			intLocation,
+			port,
+			keepAliveCheck,
+			mirrorPercentage,
+		)
+		internalLocations = append(internalLocations, intLocation)
+		matches = append(matches, match)
+	}
+
+	return internalLocations, matches
+}
+
+func createInferenceLocationsForRule(
+	pathRuleIdx int,
+	rule dataplane.PathRule,
+	extLocations []http.Location,
+	generator policies.Generator,
+	port int32,
+	keepAliveCheck keepAliveChecker,
+	mirrorPercentage *float64,
+) []http.Location {
+	locs := make([]http.Location, 0, len(rule.MatchRules)+len(extLocations))
+	for matchRuleIdx, r := range rule.MatchRules {
+		intLocation := initializeInternalInferenceLocation(pathRuleIdx, matchRuleIdx)
+		intLocation.Includes = createIncludesFromPolicyGenerateResult(
+			generator.GenerateForInternalLocation(rule.Policies),
+		)
+		intLocation = updateLocation(
+			r,
+			rule,
+			intLocation,
+			port,
+			keepAliveCheck,
+			mirrorPercentage,
+		)
+		for _, b := range r.BackendGroup.Backends {
+			if b.EndpointPickerConfig != nil {
+				for i := range extLocations {
+					var portNum int
+					if b.EndpointPickerConfig.Port != nil {
+						portNum = int(b.EndpointPickerConfig.Port.Number)
+					}
+					extLocations[i].EPPInternalPath = intLocation.Path
+					extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
+					extLocations[i].EPPPort = portNum
+				}
+			}
+		}
+		locs = append(locs, intLocation)
+	}
+	locs = append(locs, extLocations...)
+
+	return locs
+}
+
+func needsInternalLocationsForMatches(rule dataplane.PathRule) bool {
 	if len(rule.MatchRules) > 1 {
 		return true
 	}
+
 	return len(rule.MatchRules) == 1 && !isPathOnlyMatch(rule.MatchRules[0].Match)
 }
 
@@ -362,12 +542,13 @@ type pathAndTypeMap map[string]map[dataplane.PathType]struct{}
 // 2. Each path rule may have an additional location if it contains non-path-only matches.
 // 3. Each prefix path rule may have an additional location if it doesn't contain trailing slash.
 // 4. There may be an additional location for the default root path.
+// 5. There may be an additional location per parent location for the inference extension.
 // We also return a map of all paths and their types.
 func getMaxLocationCountAndPathMap(pathRules []dataplane.PathRule) (int, pathAndTypeMap) {
 	maxLocs := 1
 	pathsAndTypes := make(pathAndTypeMap)
 	for _, rule := range pathRules {
-		maxLocs += len(rule.MatchRules) + 2
+		maxLocs += (len(rule.MatchRules) * 2) + 2
 		if pathsAndTypes[rule.Path] == nil {
 			pathsAndTypes[rule.Path] = map[dataplane.PathType]struct{}{
 				rule.PathType: {},
@@ -431,14 +612,20 @@ func initializeExternalLocations(
 }
 
 func getLocationTypeForPathRule(rule dataplane.PathRule) http.LocationType {
-	if needsInternalLocations(rule) {
+	if needsInternalLocationsForMatches(rule) {
 		return http.RedirectLocationType
 	}
 
+	if rule.HasInferenceBackends {
+		return http.InferenceExternalLocationType
+	}
+
 	return http.ExternalLocationType
 }
 
-func initializeInternalLocation(
+// initializeInternalMatchLocation initializes the internal location that is redirected to by an
+// external location HTTP matching decision. This location will proxy_pass to the backend.
+func initializeInternalMatchLocation(
 	pathruleIdx,
 	matchRuleIdx int,
 	match dataplane.Match,
@@ -448,6 +635,45 @@ func initializeInternalLocation(
 	return createMatchLocation(path, grpc), createRouteMatch(match, path)
 }
 
+// initializeInternalInferenceRedirectLocation initializes the internal inference location that is redirected to by
+// an external HTTP matching location. This location then redirects to the final proxy_pass location.
+func initializeInternalInferenceRedirectLocation(pathruleIdx, matchRuleIdx int) http.Location {
+	return http.Location{
+		Path: inferencePath(pathruleIdx, matchRuleIdx),
+		Type: http.InferenceInternalLocationType,
+	}
+}
+
+// initializeInternalMatchLocationWithInference initializes the internal location that is redirected to by
+// an internal inference location, which was redirected to by the external HTTP matching location.
+// This location will proxy_pass to the backend.
+// The routeMatch is created with the inference internal location path, so that the HTTP match in the external
+// location can redirect to the proper inference location, which then redirects to this location.
+func initializeInternalMatchLocationWithInference(
+	pathruleIdx,
+	matchRuleIdx int,
+	match dataplane.Match,
+) (http.Location, routeMatch) {
+	path := fmt.Sprintf("%s-rule%d-route%d", http.InternalRoutePathPrefix, pathruleIdx, matchRuleIdx)
+	grpc := false
+
+	return createMatchLocation(path, grpc), createRouteMatch(match, inferencePath(pathruleIdx, matchRuleIdx))
+}
+
+// initializeInternalInferenceLocation initializes the internal inference location that does the final
+// proxy_pass to the inference backend.
+// This is used when the external location redirects directly here, without any HTTP matching.
+func initializeInternalInferenceLocation(pathruleIdx, matchRuleIdx int) http.Location {
+	return http.Location{
+		Path: inferencePath(pathruleIdx, matchRuleIdx),
+		Type: http.InternalLocationType,
+	}
+}
+
+func inferencePath(pathruleIdx int, matchRuleIdx int) string {
+	return fmt.Sprintf("%s-rule%d-route%d-inference", http.InternalRoutePathPrefix, pathruleIdx, matchRuleIdx)
+}
+
 // updateLocation updates a location with any relevant configurations, like proxy_pass, filters, tls settings, etc.
 func updateLocation(
 	matchRule dataplane.MatchRule,
@@ -460,6 +686,7 @@ func updateLocation(
 	filters := matchRule.Filters
 	path := pathRule.Path
 	grpc := pathRule.GRPC
+	inferenceBackend := pathRule.HasInferenceBackends
 
 	if filters.InvalidFilter != nil {
 		location.Return = &http.Return{Code: http.StatusInternalServerError}
@@ -475,7 +702,7 @@ func updateLocation(
 
 	location = updateLocationRewriteFilter(location, filters.RequestURLRewrite, path)
 	location = updateLocationMirrorFilters(location, filters.RequestMirrors, path, mirrorPercentage)
-	location = updateLocationProxySettings(location, matchRule, grpc, keepAliveCheck)
+	location = updateLocationProxySettings(location, matchRule, grpc, inferenceBackend, keepAliveCheck)
 
 	return location
 }
@@ -555,6 +782,7 @@ func updateLocationProxySettings(
 	location http.Location,
 	matchRule dataplane.MatchRule,
 	grpc bool,
+	inferenceBackend bool,
 	keepAliveCheck keepAliveChecker,
 ) http.Location {
 	extraHeaders := make([]http.Header, 0, 3)
@@ -575,6 +803,7 @@ func updateLocationProxySettings(
 		matchRule.Filters.RequestURLRewrite,
 		generateProtocolString(location.ProxySSLVerify, grpc),
 		grpc,
+		inferenceBackend,
 	)
 
 	location.ResponseHeaders = responseHeaders
@@ -853,6 +1082,7 @@ func createProxyPass(
 	filter *dataplane.HTTPURLRewriteFilter,
 	protocol string,
 	grpc bool,
+	inferenceBackend bool,
 ) string {
 	var requestURI string
 	if !grpc {
@@ -862,6 +1092,12 @@ func createProxyPass(
 	}
 
 	backendName := backendGroupName(backendGroup)
+
+	if inferenceBackend {
+		backendVarName := strings.ReplaceAll(backendName, "-", "_")
+		return "http://$inference_backend_" + backendVarName + requestURI
+	}
+
 	if backendGroupNeedsSplit(backendGroup) {
 		return protocol + "://$" + convertStringToSafeVariableName(backendName) + requestURI
 	}
diff --git a/internal/controller/nginx/config/servers_template.go b/internal/controller/nginx/config/servers_template.go
index 224e189a6e..9575b77480 100644
--- a/internal/controller/nginx/config/servers_template.go
+++ b/internal/controller/nginx/config/servers_template.go
@@ -92,7 +92,7 @@ server {
 
         {{ range $l := $s.Locations }}
     location {{ $l.Path }} {
-        {{ if eq $l.Type "internal" -}}
+        {{ if contains $l.Type "internal" -}}
         internal;
         {{ end }}
 
@@ -118,11 +118,19 @@ server {
         return {{ $l.Return.Code }} "{{ $l.Return.Body }}";
         {{- end }}
 
-        {{- if eq $l.Type "redirect" }}
+        {{- if eq $l.Type "redirect" -}}
         set $match_key {{ $l.HTTPMatchKey }};
         js_content httpmatches.redirect;
         {{- end }}
 
+        {{- if contains $l.Type "inference" -}}
+        js_var $inference_workload_endpoint;
+        set $epp_internal_path {{ $l.EPPInternalPath }};
+        set $epp_host {{ $l.EPPHost }};
+        set $epp_port {{ $l.EPPPort }};
+        js_content epp.getEndpoint;
+        {{- end }}
+
         {{ $proxyOrGRPC := "proxy" }}{{ if $l.GRPC }}{{ $proxyOrGRPC = "grpc" }}{{ end }}
 
         {{- if $l.GRPC }}
diff --git a/internal/controller/nginx/config/servers_test.go b/internal/controller/nginx/config/servers_test.go
index 6b604d7bec..ab4fad31a5 100644
--- a/internal/controller/nginx/config/servers_test.go
+++ b/internal/controller/nginx/config/servers_test.go
@@ -9,6 +9,7 @@ import (
 	. "github.com/onsi/gomega"
 	"github.com/onsi/gomega/format"
 	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/policies"
@@ -1239,7 +1240,7 @@ func TestCreateServers(t *testing.T) {
 					Filters: dataplane.HTTPFilters{
 						RequestRedirect: &dataplane.HTTPRequestRedirectFilter{
 							Hostname:   helpers.GetPointer("redirect.example.com"),
-							StatusCode: helpers.GetPointer[int](301),
+							StatusCode: helpers.GetPointer(301),
 							Port:       helpers.GetPointer[int32](8080),
 							Path: &dataplane.HTTPPathModifier{
 								Type:        dataplane.ReplaceFullPath,
@@ -2443,6 +2444,154 @@ func TestCreateLocations_Includes(t *testing.T) {
 	}
 }
 
+func TestCreateLocations_InferenceBackends(t *testing.T) {
+	t.Parallel()
+
+	hrNsName := types.NamespacedName{Namespace: "test", Name: "route1"}
+
+	fooGroup := dataplane.BackendGroup{
+		Source:  hrNsName,
+		RuleIdx: 0,
+		Backends: []dataplane.Backend{
+			{
+				UpstreamName: "test_foo_80",
+				Valid:        true,
+				Weight:       1,
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					Name: "test-epp",
+					Port: &inference.Port{
+						Number: 80,
+					},
+				},
+			},
+		},
+	}
+
+	pathRuleInferenceOnly := dataplane.PathRule{
+		Path:                 "/inference",
+		PathType:             dataplane.PathTypeExact,
+		HasInferenceBackends: true,
+		MatchRules: []dataplane.MatchRule{
+			{
+				Match:        dataplane.Match{},
+				BackendGroup: fooGroup,
+			},
+		},
+	}
+
+	pathRuleInferenceWithMatch := dataplane.PathRule{
+		Path:                 "/inference-match",
+		PathType:             dataplane.PathTypeExact,
+		HasInferenceBackends: true,
+		MatchRules: []dataplane.MatchRule{
+			{
+				Match: dataplane.Match{
+					Method: helpers.GetPointer("POST"),
+				},
+				BackendGroup: fooGroup,
+			},
+		},
+	}
+
+	tests := []struct {
+		expMatches httpMatchPairs
+		name       string
+		pathRules  []dataplane.PathRule
+		expLocs    []http.Location
+	}{
+		{
+			name:      "inference only, no internal locations for matches",
+			pathRules: []dataplane.PathRule{pathRuleInferenceOnly},
+			expLocs: []http.Location{
+				{
+					Path:      "/_ngf-internal-rule0-route0-inference",
+					Type:      http.InternalLocationType,
+					ProxyPass: "http://$inference_backend_test_foo_80$request_uri",
+					ProxySetHeaders: []http.Header{
+						{Name: "Host", Value: "$gw_api_compliant_host"},
+						{Name: "X-Forwarded-For", Value: "$proxy_add_x_forwarded_for"},
+						{Name: "X-Real-IP", Value: "$remote_addr"},
+						{Name: "X-Forwarded-Proto", Value: "$scheme"},
+						{Name: "X-Forwarded-Host", Value: "$host"},
+						{Name: "X-Forwarded-Port", Value: "$server_port"},
+						{Name: "Upgrade", Value: "$http_upgrade"},
+						{Name: "Connection", Value: "$connection_upgrade"},
+					},
+				},
+				{
+					Path:            "= /inference",
+					Type:            http.InferenceExternalLocationType,
+					EPPInternalPath: "/_ngf-internal-rule0-route0-inference",
+					EPPHost:         "test-epp",
+					EPPPort:         80,
+				},
+				createDefaultRootLocation(),
+			},
+			expMatches: httpMatchPairs{},
+		},
+		{
+			name:      "inference with match, needs internal locations for matches",
+			pathRules: []dataplane.PathRule{pathRuleInferenceWithMatch},
+			expLocs: []http.Location{
+				{
+					Path:         "= /inference-match",
+					Type:         http.RedirectLocationType,
+					HTTPMatchKey: "1_0",
+				},
+				{
+					Path:            "/_ngf-internal-rule0-route0-inference",
+					Type:            http.InferenceInternalLocationType,
+					EPPInternalPath: "/_ngf-internal-rule0-route0",
+					EPPHost:         "test-epp",
+					EPPPort:         80,
+				},
+				{
+					Path:      "/_ngf-internal-rule0-route0",
+					Type:      http.InternalLocationType,
+					ProxyPass: "http://$inference_backend_test_foo_80$request_uri",
+					ProxySetHeaders: []http.Header{
+						{Name: "Host", Value: "$gw_api_compliant_host"},
+						{Name: "X-Forwarded-For", Value: "$proxy_add_x_forwarded_for"},
+						{Name: "X-Real-IP", Value: "$remote_addr"},
+						{Name: "X-Forwarded-Proto", Value: "$scheme"},
+						{Name: "X-Forwarded-Host", Value: "$host"},
+						{Name: "X-Forwarded-Port", Value: "$server_port"},
+						{Name: "Upgrade", Value: "$http_upgrade"},
+						{Name: "Connection", Value: "$connection_upgrade"},
+					},
+				},
+				createDefaultRootLocation(),
+			},
+			expMatches: httpMatchPairs{
+				"1_0": {
+					{Method: "POST", RedirectPath: "/_ngf-internal-rule0-route0-inference"},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			locs, matches, _ := createLocations(
+				&dataplane.VirtualServer{
+					Hostname:  "example.com",
+					PathRules: tc.pathRules,
+					Port:      80,
+				},
+				"1",
+				&policiesfakes.FakeGenerator{},
+				alwaysFalseKeepAliveChecker,
+			)
+
+			g.Expect(helpers.Diff(tc.expLocs, locs)).To(BeEmpty())
+			g.Expect(matches).To(Equal(tc.expMatches))
+		})
+	}
+}
+
 func TestCreateLocationsRootPath(t *testing.T) {
 	t.Parallel()
 	hrNsName := types.NamespacedName{Namespace: "test", Name: "route1"}
@@ -3332,10 +3481,11 @@ func TestCreateProxyPass(t *testing.T) {
 	t.Parallel()
 
 	tests := []struct {
-		rewrite  *dataplane.HTTPURLRewriteFilter
-		expected string
-		grp      dataplane.BackendGroup
-		GRPC     bool
+		rewrite          *dataplane.HTTPURLRewriteFilter
+		expected         string
+		grp              dataplane.BackendGroup
+		GRPC             bool
+		inferenceBackend bool
 	}{
 		{
 			expected: "http://10.0.0.1:80$request_uri",
@@ -3349,6 +3499,20 @@ func TestCreateProxyPass(t *testing.T) {
 				},
 			},
 		},
+		// Inference case
+		{
+			expected: "http://$inference_backend_upstream_inference$request_uri",
+			grp: dataplane.BackendGroup{
+				Backends: []dataplane.Backend{
+					{
+						UpstreamName: "upstream-inference",
+						Valid:        true,
+						Weight:       1,
+					},
+				},
+			},
+			inferenceBackend: true,
+		},
 		{
 			expected: "http://$group_ns1__bg_rule0$request_uri",
 			grp: dataplane.BackendGroup{
@@ -3401,7 +3565,13 @@ func TestCreateProxyPass(t *testing.T) {
 		t.Run(tc.expected, func(t *testing.T) {
 			t.Parallel()
 			g := NewWithT(t)
-			result := createProxyPass(tc.grp, tc.rewrite, generateProtocolString(nil, tc.GRPC), tc.GRPC)
+			result := createProxyPass(
+				tc.grp,
+				tc.rewrite,
+				generateProtocolString(nil, tc.GRPC),
+				tc.GRPC,
+				tc.inferenceBackend,
+			)
 			g.Expect(result).To(Equal(tc.expected))
 		})
 	}
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index d4beeb9e15..88de40062b 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -1,29 +1,59 @@
-// This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP).
+import qs from 'querystring';
 
-// TODO(sberman): this module will need to be enhanced to include the following:
-// - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP)
-// - if a user has specified an Exact matching condition for a model name, extract the model name from
-// the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header
-// (based on if we do a redirect or traffic split (see design doc)) in the subrequest. If the client request
-// already has this header set, then I don't think we need to extract the model from the body, just pass
-// through the existing header.
-// I believe we have to use js_content to call the NJS functionality. Because this takes over
-// the request, we will likely have to finish the NJS functionality with an internalRedirect to an internal
-// location that proxy_passes to the chosen endpoint.
+const EPP_HOST_HEADER_VAR = 'epp_host';
+const EPP_PORT_HEADER_VAR = 'epp_port';
+const EPP_HOST_HEADER = 'X-EPP-Host';
+const EPP_PORT_HEADER = 'X-EPP-Port';
+const ENDPOINT_HEADER = 'X-Gateway-Destination-Endpoint';
+const EPP_INTERNAL_PATH_VAR = 'epp_internal_path';
+const WORKLOAD_ENDPOINT_VAR = 'inference_workload_endpoint';
+const SHIM_URI = 'http://127.0.0.1:54800';
+
+async function getEndpoint(r) {
+	if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) {
+		throw Error(
+			`Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`,
+		);
+	}
+	if (!r.variables[EPP_INTERNAL_PATH_VAR]) {
+		throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`);
+	}
+
+	let headers = Object.assign({}, r.headersIn);
+	headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR];
+	headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR];
 
-// extractModel extracts the model name from the request body.
-function extractModel(r) {
 	try {
-		var body = JSON.parse(r.requestText);
-		if (body && body.model !== undefined) {
-			return String(body.model);
+		const response = await ngx.fetch(SHIM_URI, {
+			method: r.method,
+			headers: headers,
+			body: r.requestText,
+		});
+		const endpointHeader = response.headers.get(ENDPOINT_HEADER);
+		if (response.status === 200 && endpointHeader) {
+			r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader;
+			r.log(
+				`found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`,
+			);
+		} else {
+			const body = await response.text();
+			r.error(
+				`could not get specific inference endpoint from EndpointPicker; ` +
+					`status: ${response.status}; body: ${body}`,
+			);
 		}
-	} catch (e) {
-		r.error(`error parsing request body for model name: ${e.message}`);
-		return '';
+	} catch (err) {
+		r.error(`Error in ngx.fetch: ${err}`);
 	}
-	r.error('request body does not contain model parameter');
-	return '';
+
+	// If performing a rewrite, $request_uri won't be used,
+	// so we have to preserve args in the internal redirect.
+	let args = qs.stringify(r.args);
+	if (args) {
+		args = '?' + args;
+	}
+
+	r.internalRedirect(r.variables[EPP_INTERNAL_PATH_VAR] + args);
 }
 
-export default { extractModel };
+export default { getEndpoint };
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
index 6994423e7a..c2a4528694 100644
--- a/internal/controller/nginx/modules/test/epp.test.js
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -1,52 +1,106 @@
 import { default as epp } from '../src/epp.js';
-import { expect, describe, it } from 'vitest';
-
-function makeRequest(body) {
-	let r = {
-		// Test mocks
-		error(msg) {
-			r.variables.error = msg;
-		},
-		requestText: body,
-		variables: {},
-	};
+import { expect, describe, it, beforeEach, afterEach, vi } from 'vitest';
 
-	return r;
+function makeRequest({
+	method = 'POST',
+	headersIn = {},
+	args = {},
+	requestText = '',
+	variables = {},
+} = {}) {
+	return {
+		method,
+		headersIn,
+		requestText,
+		variables,
+		args,
+		error: vi.fn(),
+		log: vi.fn(),
+		internalRedirect: vi.fn(),
+	};
 }
 
-describe('extractModel', () => {
-	const tests = [
-		{
-			name: 'returns the model value',
-			body: '{"model":"gpt-4"}',
-			model: 'gpt-4',
-			error: undefined,
-		},
-		{
-			name: 'returns empty string if model is missing',
-			body: '{"foo":1}',
-			model: '',
-			error: 'request body does not contain model parameter',
-		},
-		{
-			name: 'returns empty string for invalid JSON',
-			body: 'not-json',
-			model: '',
-			error: `error parsing request body for model name: Unexpected token 'o', "not-json" is not valid JSON`,
-		},
-		{
-			name: 'empty request body',
-			body: '',
-			model: '',
-			error: 'error parsing request body for model name: Unexpected end of JSON input',
-		},
-	];
-
-	tests.forEach((test) => {
-		it(test.name, () => {
-			let r = makeRequest(test.body);
-			expect(epp.extractModel(r)).to.equal(test.model);
-			expect(r.variables.error).to.equal(test.error);
+describe('getEndpoint', () => {
+	let originalNgx;
+	beforeEach(() => {
+		originalNgx = globalThis.ngx;
+	});
+	afterEach(() => {
+		globalThis.ngx = originalNgx;
+	});
+
+	it('throws if host or port is missing', async () => {
+		const r = makeRequest({ variables: { epp_internal_path: '/foo' } });
+		await expect(epp.getEndpoint(r)).rejects.toThrow(/Missing required variables/);
+	});
+
+	it('throws if internal path is missing', async () => {
+		const r = makeRequest({ variables: { epp_host: 'host', epp_port: '1234' } });
+		await expect(epp.getEndpoint(r)).rejects.toThrow(/Missing required variable/);
+	});
+
+	it('sets endpoint and logs on 200 with endpoint header', async () => {
+		const endpoint = 'http://endpoint';
+		globalThis.ngx = {
+			fetch: vi.fn().mockResolvedValue({
+				status: 200,
+				headers: { get: () => endpoint },
+				text: vi.fn(),
+			}),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.variables.inference_workload_endpoint).toBe(endpoint);
+		expect(r.log).toHaveBeenCalledWith(expect.stringContaining(endpoint));
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo');
+	});
+
+	it('calls error if response is not 200 or endpoint header missing', async () => {
+		globalThis.ngx = {
+			fetch: vi.fn().mockResolvedValue({
+				status: 404,
+				headers: { get: () => null },
+				text: vi.fn().mockResolvedValue('fail'),
+			}),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.error).toHaveBeenCalledWith(
+			expect.stringContaining('could not get specific inference endpoint'),
+		);
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo');
+	});
+
+	it('calls error if fetch throws', async () => {
+		globalThis.ngx = {
+			fetch: vi.fn().mockRejectedValue(new Error('network fail')),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.error).toHaveBeenCalledWith(expect.stringContaining('Error in ngx.fetch'));
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo');
+	});
+
+	it('preserves args in internal redirect when args are present', async () => {
+		const endpoint = 'http://endpoint';
+		globalThis.ngx = {
+			fetch: vi.fn().mockResolvedValue({
+				status: 200,
+				headers: { get: () => endpoint },
+				text: vi.fn(),
+			}),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			args: { a: '1', b: '2' },
 		});
+		await epp.getEndpoint(r);
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2');
 	});
 });
diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go
index 52306f4e0b..59030c0ca7 100644
--- a/internal/controller/state/dataplane/configuration.go
+++ b/internal/controller/state/dataplane/configuration.go
@@ -374,12 +374,13 @@ func newBackendGroup(
 	gatewayName types.NamespacedName,
 	sourceNsName types.NamespacedName,
 	ruleIdx int,
-) BackendGroup {
+) (BackendGroup, bool) {
 	var backends []Backend
 
 	if len(refs) > 0 {
 		backends = make([]Backend, 0, len(refs))
 	}
+	var inferencePoolBackendExists bool
 
 	for _, ref := range refs {
 		if ref.IsMirrorBackend {
@@ -391,11 +392,14 @@ func newBackendGroup(
 			valid = false
 		}
 
+		inferencePoolBackendExists = inferencePoolBackendExists || ref.IsInferencePool
+
 		backends = append(backends, Backend{
-			UpstreamName: ref.ServicePortReference(),
-			Weight:       ref.Weight,
-			Valid:        valid,
-			VerifyTLS:    convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
+			UpstreamName:         ref.ServicePortReference(),
+			Weight:               ref.Weight,
+			Valid:                valid,
+			VerifyTLS:            convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		})
 	}
 
@@ -403,7 +407,7 @@ func newBackendGroup(
 		Backends: backends,
 		Source:   sourceNsName,
 		RuleIdx:  ruleIdx,
-	}
+	}, inferencePoolBackendExists
 }
 
 func convertBackendTLS(btp *graph.BackendTLSPolicy, gwNsName types.NamespacedName) *VerifyTLS {
@@ -595,10 +599,19 @@ func (hpr *hostPathRules) upsertRoute(
 				}
 
 				hostRule.GRPC = GRPC
+				backendGroup, inferencePoolBackendExists := newBackendGroup(
+					rule.BackendRefs,
+					listener.GatewayName,
+					routeNsName,
+					idx,
+				)
+				if inferencePoolBackendExists {
+					hostRule.HasInferenceBackends = true
+				}
 
 				hostRule.MatchRules = append(hostRule.MatchRules, MatchRule{
 					Source:       objectSrc,
-					BackendGroup: newBackendGroup(rule.BackendRefs, listener.GatewayName, routeNsName, idx),
+					BackendGroup: backendGroup,
 					Filters:      filters,
 					Match:        convertMatch(m),
 				})
diff --git a/internal/controller/state/dataplane/configuration_test.go b/internal/controller/state/dataplane/configuration_test.go
index b329b9d46a..3e1697590d 100644
--- a/internal/controller/state/dataplane/configuration_test.go
+++ b/internal/controller/state/dataplane/configuration_test.go
@@ -2777,6 +2777,93 @@ func TestBuildConfiguration_Plus(t *testing.T) {
 	}
 }
 
+func TestUpsertRoute_PathRuleHasInferenceBackend(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	// Setup minimal route with one BackendRef marked as IsInferencePool
+	backendRef := graph.BackendRef{
+		SvcNsName:       types.NamespacedName{Name: "svc", Namespace: "test"},
+		ServicePort:     apiv1.ServicePort{Port: 80},
+		Valid:           true,
+		IsInferencePool: true,
+	}
+
+	listenerName := "listener-80"
+	gwName := types.NamespacedName{Namespace: "test", Name: "gw"}
+
+	route := &graph.L7Route{
+		RouteType: graph.RouteTypeHTTP,
+		Source: &v1.HTTPRoute{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "hr",
+				Namespace: "test",
+			},
+		},
+		Spec: graph.L7RouteSpec{
+			Rules: []graph.RouteRule{
+				{
+					ValidMatches: true,
+					Filters:      graph.RouteRuleFilters{Valid: true},
+					BackendRefs:  []graph.BackendRef{backendRef},
+					Matches: []v1.HTTPRouteMatch{
+						{
+							Path: &v1.HTTPPathMatch{
+								Type:  helpers.GetPointer(v1.PathMatchPathPrefix),
+								Value: helpers.GetPointer("/infer"),
+							},
+						},
+					},
+				},
+			},
+		},
+		ParentRefs: []graph.ParentRef{
+			{
+				Attachment: &graph.ParentRefAttachmentStatus{
+					AcceptedHostnames: map[string][]string{
+						graph.CreateGatewayListenerKey(gwName, listenerName): {"*"},
+					},
+				},
+			},
+		},
+		Valid: true,
+	}
+
+	listener := &graph.Listener{
+		Name:        listenerName,
+		GatewayName: gwName,
+		Valid:       true,
+		Routes: map[graph.RouteKey]*graph.L7Route{
+			graph.CreateRouteKey(route.Source): route,
+		},
+	}
+
+	gateway := &graph.Gateway{
+		Source: &v1.Gateway{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "gw",
+				Namespace: "test",
+			},
+		},
+		Listeners: []*graph.Listener{listener},
+	}
+
+	hpr := newHostPathRules()
+	hpr.upsertRoute(route, listener, gateway)
+
+	// Find the PathRule for "/infer"
+	found := false
+	for _, rules := range hpr.rulesPerHost {
+		for _, pr := range rules {
+			if pr.Path == "/infer" {
+				found = true
+				g.Expect(pr.HasInferenceBackends).To(BeTrue())
+			}
+		}
+	}
+	g.Expect(found).To(BeTrue(), "PathRule for '/infer' not found")
+}
+
 func TestNewBackendGroup_Mirror(t *testing.T) {
 	t.Parallel()
 	g := NewWithT(t)
@@ -2788,7 +2875,7 @@ func TestNewBackendGroup_Mirror(t *testing.T) {
 		IsMirrorBackend: true,
 	}
 
-	group := newBackendGroup([]graph.BackendRef{backendRef}, types.NamespacedName{}, types.NamespacedName{}, 0)
+	group, _ := newBackendGroup([]graph.BackendRef{backendRef}, types.NamespacedName{}, types.NamespacedName{}, 0)
 
 	g.Expect(group.Backends).To(BeEmpty())
 }
diff --git a/internal/controller/state/dataplane/types.go b/internal/controller/state/dataplane/types.go
index 08e7e0867b..1637c1f408 100644
--- a/internal/controller/state/dataplane/types.go
+++ b/internal/controller/state/dataplane/types.go
@@ -5,6 +5,7 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/policies"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/graph"
@@ -137,6 +138,8 @@ type PathRule struct {
 	Policies []policies.Policy
 	// GRPC indicates if this is a gRPC rule
 	GRPC bool
+	// HasInferenceBackends indicates whether the PathRule contains a backend for an inference workload.
+	HasInferenceBackends bool
 }
 
 // InvalidHTTPFilter is a special filter for handling the case when configured filters are invalid.
@@ -323,6 +326,9 @@ func (bg *BackendGroup) Name() string {
 type Backend struct {
 	// VerifyTLS holds the backend TLS verification configuration.
 	VerifyTLS *VerifyTLS
+	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
+	// This is set if this backend is for an inference workload.
+	EndpointPickerConfig *inference.EndpointPickerRef
 	// UpstreamName is the name of the upstream for this backend.
 	UpstreamName string
 	// Weight is the weight of the BackendRef.
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index e14d0fb0fa..95ce6df0b9 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -9,6 +9,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
 
@@ -30,6 +31,8 @@ const (
 type BackendRef struct {
 	// BackendTLSPolicy is the BackendTLSPolicy of the Service which is referenced by the backendRef.
 	BackendTLSPolicy *BackendTLSPolicy
+	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
+	EndpointPickerConfig *inference.EndpointPickerRef
 	// InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding
 	// condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways,
 	// but not others.
@@ -45,6 +48,8 @@ type BackendRef struct {
 	Valid bool
 	// IsMirrorBackend indicates whether the BackendGroup is for a mirrored backend.
 	IsMirrorBackend bool
+	// IsInferencePool indicates whether the BackendRef is for an InferencePool.
+	IsInferencePool bool
 }
 
 // ServicePortReference returns a string representation for the service and port that is referenced by the BackendRef.
@@ -118,6 +123,7 @@ func addBackendRefsToRules(
 				if pool, exists := referencedInferencePools[poolName]; exists {
 					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
 					ref.Port = helpers.GetPointer(port)
+					ref.EndpointPickerConfig = &pool.Source.Spec.EndpointPickerRef
 				}
 			}
 
@@ -181,10 +187,12 @@ func createBackendRef(
 
 	if !valid {
 		backendRef := BackendRef{
-			Weight:             weight,
-			Valid:              false,
-			IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-			InvalidForGateways: make(map[types.NamespacedName]conditions.Condition),
+			Weight:               weight,
+			Valid:                false,
+			IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+			IsInferencePool:      ref.IsInferencePool,
+			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		}
 
 		return backendRef, []conditions.Condition{cond}
@@ -198,12 +206,14 @@ func createBackendRef(
 	svcIPFamily, svcPort, err := getIPFamilyAndPortFromRef(ref.BackendRef, svcNsName, services, refPath)
 	if err != nil {
 		backendRef := BackendRef{
-			Weight:             weight,
-			Valid:              false,
-			SvcNsName:          svcNsName,
-			ServicePort:        v1.ServicePort{},
-			IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-			InvalidForGateways: make(map[types.NamespacedName]conditions.Condition),
+			Weight:               weight,
+			Valid:                false,
+			SvcNsName:            svcNsName,
+			ServicePort:          v1.ServicePort{},
+			IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+			IsInferencePool:      ref.IsInferencePool,
+			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		}
 
 		return backendRef, []conditions.Condition{conditions.NewRouteBackendRefRefBackendNotFound(err.Error())}
@@ -220,12 +230,14 @@ func createBackendRef(
 		// Check if externalName field is empty or whitespace-only
 		if strings.TrimSpace(svc.Spec.ExternalName) == "" {
 			backendRef := BackendRef{
-				SvcNsName:          svcNsName,
-				ServicePort:        svcPort,
-				Weight:             weight,
-				Valid:              false,
-				IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-				InvalidForGateways: invalidForGateways,
+				SvcNsName:            svcNsName,
+				ServicePort:          svcPort,
+				Weight:               weight,
+				Valid:                false,
+				IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+				IsInferencePool:      ref.IsInferencePool,
+				InvalidForGateways:   invalidForGateways,
+				EndpointPickerConfig: ref.EndpointPickerConfig,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(
@@ -249,12 +261,14 @@ func createBackendRef(
 	)
 	if err != nil {
 		backendRef := BackendRef{
-			SvcNsName:          svcNsName,
-			ServicePort:        svcPort,
-			Weight:             weight,
-			Valid:              false,
-			IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-			InvalidForGateways: invalidForGateways,
+			SvcNsName:            svcNsName,
+			ServicePort:          svcPort,
+			Weight:               weight,
+			Valid:                false,
+			IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+			IsInferencePool:      ref.IsInferencePool,
+			InvalidForGateways:   invalidForGateways,
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		}
 
 		return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(err.Error()))
@@ -264,13 +278,15 @@ func createBackendRef(
 		err = validateRouteBackendRefAppProtocol(route.RouteType, *svcPort.AppProtocol, backendTLSPolicy)
 		if err != nil {
 			backendRef := BackendRef{
-				SvcNsName:          svcNsName,
-				BackendTLSPolicy:   backendTLSPolicy,
-				ServicePort:        svcPort,
-				Weight:             weight,
-				Valid:              false,
-				IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-				InvalidForGateways: invalidForGateways,
+				SvcNsName:            svcNsName,
+				BackendTLSPolicy:     backendTLSPolicy,
+				ServicePort:          svcPort,
+				Weight:               weight,
+				Valid:                false,
+				IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+				IsInferencePool:      ref.IsInferencePool,
+				InvalidForGateways:   invalidForGateways,
+				EndpointPickerConfig: ref.EndpointPickerConfig,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedProtocol(err.Error()))
@@ -278,13 +294,15 @@ func createBackendRef(
 	}
 
 	backendRef := BackendRef{
-		SvcNsName:          svcNsName,
-		BackendTLSPolicy:   backendTLSPolicy,
-		ServicePort:        svcPort,
-		Valid:              true,
-		Weight:             weight,
-		IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-		InvalidForGateways: invalidForGateways,
+		SvcNsName:            svcNsName,
+		BackendTLSPolicy:     backendTLSPolicy,
+		ServicePort:          svcPort,
+		Valid:                true,
+		Weight:               weight,
+		IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+		IsInferencePool:      ref.IsInferencePool,
+		InvalidForGateways:   invalidForGateways,
+		EndpointPickerConfig: ref.EndpointPickerConfig,
 	}
 
 	return backendRef, conds
diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go
index 3f05f793a6..b786daed9b 100644
--- a/internal/controller/state/graph/backend_refs_test.go
+++ b/internal/controller/state/graph/backend_refs_test.go
@@ -1231,9 +1231,11 @@ func TestAddBackendRefsToRules(t *testing.T) {
 					ServicePort: v1.ServicePort{
 						Port: 80,
 					},
-					Valid:              true,
-					Weight:             1,
-					InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+					Valid:                true,
+					Weight:               1,
+					InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
+					IsInferencePool:      true,
+					EndpointPickerConfig: &inference.EndpointPickerRef{},
 				},
 			},
 			expectedConditions: nil,
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index da0ca04d47..1a367e5977 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -223,10 +223,12 @@ func TestBuildGraph(t *testing.T) {
 					Namespace: testNs,
 					Name:      controller.CreateInferencePoolServiceName("ipool"),
 				},
-				ServicePort:        v1.ServicePort{Port: 80},
-				Valid:              true,
-				Weight:             1,
-				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+				ServicePort:          v1.ServicePort{Port: 80},
+				Valid:                true,
+				Weight:               1,
+				InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
+				IsInferencePool:      true,
+				EndpointPickerConfig: &inference.EndpointPickerRef{},
 			},
 		}
 		rbrs := []RouteBackendRef{
diff --git a/internal/controller/state/graph/httproute.go b/internal/controller/state/graph/httproute.go
index de7a85370d..ed8d46a664 100644
--- a/internal/controller/state/graph/httproute.go
+++ b/internal/controller/state/graph/httproute.go
@@ -210,11 +210,26 @@ func processHTTPRouteRule(
 			}
 		}
 
-		var rbr RouteBackendRef
+		rbr := RouteBackendRef{
+			BackendRef: b.BackendRef,
+		}
+
 		// If route specifies an InferencePool backend, we need to convert it to its associated
 		// headless Service backend (that we created), so nginx config can be built properly.
 		// Only do this if the InferencePool actually exists.
 		if inferencePoolBackend(b, routeNamespace, inferencePools) {
+			// We don't support traffic splitting at the Route level for
+			// InferencePool backends, so if there's more than one backendRef, and one of them
+			// is an InferencePool, we mark the rule as invalid.
+			if len(specRule.BackendRefs) > 1 {
+				err := field.Forbidden(
+					rulePath.Child("backendRefs"),
+					"cannot use InferencePool backend when multiple backendRefs are specified in a single rule",
+				)
+				errors.invalid = append(errors.invalid, err)
+				break
+			}
+
 			svcName := controller.CreateInferencePoolServiceName(string(b.Name))
 			rbr = RouteBackendRef{
 				IsInferencePool: true,
@@ -228,10 +243,6 @@ func processHTTPRouteRule(
 					Weight: b.Weight,
 				},
 			}
-		} else {
-			rbr = RouteBackendRef{
-				BackendRef: b.BackendRef,
-			}
 		}
 
 		rbr.Filters = interfaceFilters
diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go
index 0e06e5bf7e..d6d77c7296 100644
--- a/internal/controller/state/graph/httproute_test.go
+++ b/internal/controller/state/graph/httproute_test.go
@@ -1213,6 +1213,67 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) {
 	g.Expect(helpers.Diff(expectedMirrorRoute, routes[mirrorRouteKey])).To(BeEmpty())
 }
 
+func TestProcessHTTPRouteRule_InferencePoolWithMultipleBackendRefs(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	validator := &validationfakes.FakeHTTPFieldsValidator{}
+	inferencePoolName := "ipool"
+	routeNamespace := "test"
+	inferencePools := map[types.NamespacedName]*inference.InferencePool{
+		{Namespace: routeNamespace, Name: inferencePoolName}: {},
+	}
+
+	// BackendRef 1: InferencePool
+	backendRef1 := gatewayv1.HTTPBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+				Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+				Name:      gatewayv1.ObjectName(inferencePoolName),
+				Namespace: helpers.GetPointer(gatewayv1.Namespace(routeNamespace)),
+			},
+		},
+	}
+	// BackendRef 2: Service
+	backendRef2 := gatewayv1.HTTPBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+				Name: "backend",
+			},
+		},
+	}
+
+	specRule := gatewayv1.HTTPRouteRule{
+		Matches: []gatewayv1.HTTPRouteMatch{
+			{
+				Path: &gatewayv1.HTTPPathMatch{
+					Type:  helpers.GetPointer(gatewayv1.PathMatchPathPrefix),
+					Value: helpers.GetPointer("/"),
+				},
+			},
+		},
+		BackendRefs: []gatewayv1.HTTPBackendRef{backendRef1, backendRef2},
+	}
+
+	rulePath := field.NewPath("spec").Child("rules").Index(0)
+
+	routeRule, errs := processHTTPRouteRule(
+		specRule,
+		routeNamespace,
+		rulePath,
+		validator,
+		nil,
+		inferencePools,
+	)
+
+	g.Expect(routeRule.RouteBackendRefs).To(BeEmpty())
+	g.Expect(errs.invalid).To(HaveLen(1))
+	errMsg := "cannot use InferencePool backend when multiple backendRefs are specified in a single rule"
+	g.Expect(errs.invalid[0].Error()).To(ContainSubstring(errMsg))
+}
+
 func TestValidateMatch(t *testing.T) {
 	t.Parallel()
 	createAllValidValidator := func() *validationfakes.FakeHTTPFieldsValidator {
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index f3d3b04e4a..22067c6d44 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -166,6 +166,9 @@ type RouteBackendRef struct {
 	// If this backend is defined in a RequestMirror filter, this value will indicate the filter's index.
 	MirrorBackendIdx *int
 
+	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
+	EndpointPickerConfig *inference.EndpointPickerRef
+
 	Filters []any
 
 	// IsInferencePool indicates if this backend is an InferencePool disguised as a Service.
diff --git a/internal/framework/types/types.go b/internal/framework/types/types.go
index bf61bd23d7..0aeccd008d 100644
--- a/internal/framework/types/types.go
+++ b/internal/framework/types/types.go
@@ -5,3 +5,14 @@ import "sigs.k8s.io/controller-runtime/pkg/client"
 // ObjectType is used when we only care about the type of client.Object.
 // The fields of the client.Object may be empty.
 type ObjectType client.Object
+
+// Fields used for communication with the EndpointPicker service when using the Inference Extension.
+const (
+	// EPPEndpointHostHeader is the HTTP header used to specify the EPP endpoint host.
+	EPPEndpointHostHeader = "X-EPP-Host"
+	// EPPEndpointPortHeader is the HTTP header used to specify the EPP endpoint port.
+	EPPEndpointPortHeader = "X-EPP-Port"
+	// GoShimPort is the default port for the Go EPP shim server to listen on. If collisions become a problem,
+	// we can make this configurable via the NginxProxy resource.
+	GoShimPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100.
+)

From 1609e25f874996a73ae8650e8d798d6c1a354a8e Mon Sep 17 00:00:00 2001
From: Saloni Choudhary <146118978+salonichf5@users.noreply.github.com>
Date: Wed, 1 Oct 2025 21:21:34 +0530
Subject: [PATCH 05/12] Adds status information to describe the state of
 Inference Pools (#3970)

Update the inference extension design doc to specify different status that needs to be set on Inference Pools to understand its state
---
 docs/proposals/gateway-inference-extension.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/proposals/gateway-inference-extension.md b/docs/proposals/gateway-inference-extension.md
index fb6abe3ae2..a5ba30722f 100644
--- a/docs/proposals/gateway-inference-extension.md
+++ b/docs/proposals/gateway-inference-extension.md
@@ -106,6 +106,14 @@ InferenceObjective represents the desired state of a specific model use case. As
 
 It is my impression that this API is purely for the EPP to handle, and does not need to be handled by NGINX Gateway Fabric.
 
+### Inference Status
+
+Each InferencePool publishes two conditions that together describe its overall state. The first is the `Accepted` condition, which communicates whether the pool is referenced by an HTTPRoute that the Gateway has accepted. When the route is not accepted, this condition is explicitly set to `False` with the reason `InferencePoolReasonHTTPRouteNotAccepted`, making it clear that the Gateway rejected the route referencing the pool.
+
+The second is the `ResolvedRefs` condition, which reflects whether the `EndpointPickerRef` associated with the pool is valid. If it is misconfigured such as being an unsupported kind, left undefined, or pointing to a non-existent Service, this condition is set to `False` with the reason `InferencePoolReasonInvalidExtensionRef`.
+
+The status of an InferencePool records the Gateway as its parent reference and associates it with the relevant conditions; when all conditions are `True`, the pool is valid and traffic can be directed to it.
+
 ### Personas and Processes
 
 Two new personas are introduced, the `Inference Platform Owner/Admin` and `Inference Workload Owner`.

From aea4ef31216b354956f14df3454e4795cf1ca205 Mon Sep 17 00:00:00 2001
From: bjee19 <139261241+bjee19@users.noreply.github.com>
Date: Mon, 6 Oct 2025 08:52:14 -0700
Subject: [PATCH 06/12] Update gateway inference extension proposal security
 considerations (#4006)

Update gateway inference extension proposal on inability to provide a secure TLS connection to EPP.
---
 docs/proposals/gateway-inference-extension.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/proposals/gateway-inference-extension.md b/docs/proposals/gateway-inference-extension.md
index a5ba30722f..0b549dc3f6 100644
--- a/docs/proposals/gateway-inference-extension.md
+++ b/docs/proposals/gateway-inference-extension.md
@@ -134,7 +134,11 @@ For development purposes, the [Getting started guide](https://gateway-api-infere
 
 ## Security Considerations
 
-If the Endpoint Picker (EPP) supports it, we should use a secure TLS connection. This ensures an encrypted and authenticated communication channel between the NGINX data plane and the EPP. For production environments, an integration with `cert-manager` is likely the best solution, as we recommend this for various other secure channels within the NGF ecosystem. Otherwise, our control plane may have to provision certificates in the default case (similar to NGF's startup `cert-generator` Job).
+Secure TLS gRPC connection between Endpoint Picker (EPP) and Go Shim Server is ideal. This would ensure an encrypted and authenticated communication channel between the NGINX data plane and the EPP. However, this is not possible with the current EPP implementation and is a [known issue](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582), with a separate issue currently open to [provide further support to tls](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1556).
+
+Since NGF does not provision the EPP, is not in charge of modifying it, and the current [EPP Helm template](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/config/charts/inferencepool/templates/epp-deployment.yaml) does not support providing extra volume/volumeMounts, there is no way to mount a cert on the EPP. Even if specifying extra volume/volumeMounts are allowed through a feature request, the implementation on our side would be hacky and unconventional.
+
+Given that the gateway inference project remains in `Alpha` and explicitly warns against production use, we will follow existing implementations and use an insecure gRPC connection to the EPP. For our goal of meeting the API’s core specifications with a basic solution, secure gRPC is not strictly required at this stage.
 
 At some point, there may be opportunities for attaching Policies (like a BackendTLSPolicy) to an InferenceModel to secure the NGINX -> AI workload connection, however that is not in scope for now.
 

From 75998ba169a8a2a88d18b1f9d2342ae4a8841d86 Mon Sep 17 00:00:00 2001
From: Saloni Choudhary <146118978+salonichf5@users.noreply.github.com>
Date: Mon, 6 Oct 2025 23:20:57 +0530
Subject: [PATCH 07/12] Add status to Inference Pools (#4005)

Add status to Inference Pools

Problem: Users want to see the current status of their Inference pools

Solution: Add status for inference pools
---
 internal/controller/handler.go                |   4 +-
 .../controller/state/conditions/conditions.go |  53 +++
 internal/controller/state/graph/graph.go      |   3 +-
 internal/controller/state/graph/graph_test.go |  26 +-
 .../controller/state/graph/inferencepools.go  | 100 +++-
 .../state/graph/inferencepools_test.go        | 439 +++++++++++++++---
 .../controller/status/prepare_requests.go     |  54 +++
 .../status/prepare_requests_test.go           | 238 ++++++++++
 internal/controller/status/status_setters.go  |  63 +++
 .../controller/status/status_setters_test.go  | 301 ++++++++++++
 tests/go.mod                                  |   3 +-
 tests/go.sum                                  |   6 +-
 12 files changed, 1219 insertions(+), 71 deletions(-)

diff --git a/internal/controller/handler.go b/internal/controller/handler.go
index 2108739a55..5e3b91a8ee 100644
--- a/internal/controller/handler.go
+++ b/internal/controller/handler.go
@@ -361,17 +361,19 @@ func (h *eventHandlerImpl) updateStatuses(ctx context.Context, gr *graph.Graph,
 		transitionTime,
 		h.cfg.gatewayCtlrName,
 	)
+	inferencePoolReqs := status.PrepareInferencePoolRequests(gr.ReferencedInferencePools, transitionTime)
 
 	reqs := make(
 		[]status.UpdateRequest,
 		0,
-		len(gcReqs)+len(routeReqs)+len(polReqs)+len(ngfPolReqs)+len(snippetsFilterReqs),
+		len(gcReqs)+len(routeReqs)+len(polReqs)+len(ngfPolReqs)+len(snippetsFilterReqs)+len(inferencePoolReqs),
 	)
 	reqs = append(reqs, gcReqs...)
 	reqs = append(reqs, routeReqs...)
 	reqs = append(reqs, polReqs...)
 	reqs = append(reqs, ngfPolReqs...)
 	reqs = append(reqs, snippetsFilterReqs...)
+	reqs = append(reqs, inferencePoolReqs...)
 
 	h.cfg.statusUpdater.UpdateGroup(ctx, groupAllExceptGateways, reqs...)
 
diff --git a/internal/controller/state/conditions/conditions.go b/internal/controller/state/conditions/conditions.go
index ad5d00a0dc..1664aa85b6 100644
--- a/internal/controller/state/conditions/conditions.go
+++ b/internal/controller/state/conditions/conditions.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 
@@ -1108,3 +1109,55 @@ func NewBackendTLSPolicyNoValidCACertificate(message string) Condition {
 		Message: message,
 	}
 }
+
+// NewInferencePoolAccepted returns a Condition that indicates that the InferencePool is accepted by the Gateway.
+func NewInferencePoolAccepted() Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionAccepted),
+		Status:  metav1.ConditionTrue,
+		Reason:  string(inference.InferencePoolConditionAccepted),
+		Message: "InferencePool is accepted by the Gateway.",
+	}
+}
+
+// NewInferencePoolResolvedRefs returns a Condition that
+// indicates that all references in the InferencePool are resolved.
+func NewInferencePoolResolvedRefs() Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionResolvedRefs),
+		Status:  metav1.ConditionTrue,
+		Reason:  string(inference.InferencePoolConditionResolvedRefs),
+		Message: "Inference pool references a valid ExtensionRef.",
+	}
+}
+
+// NewDefaultInferenceConditions returns the default Conditions
+// that must be present in the status of an InferencePool.
+func NewDefaultInferenceConditions() []Condition {
+	return []Condition{
+		NewInferencePoolAccepted(),
+		NewInferencePoolResolvedRefs(),
+	}
+}
+
+// NewInferencePoolInvalidHTTPRouteNotAccepted returns a Condition that indicates that the InferencePool is not
+// accepted because the associated HTTPRoute is not accepted by the Gateway.
+func NewInferencePoolInvalidHTTPRouteNotAccepted(msg string) Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionAccepted),
+		Status:  metav1.ConditionFalse,
+		Reason:  string(inference.InferencePoolReasonHTTPRouteNotAccepted),
+		Message: msg,
+	}
+}
+
+// NewInferencePoolInvalidExtensionref returns a Condition that indicates that the InferencePool is not
+// accepted because the ExtensionRef is invalid.
+func NewInferencePoolInvalidExtensionref(msg string) Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionResolvedRefs),
+		Status:  metav1.ConditionFalse,
+		Reason:  string(inference.InferencePoolReasonInvalidExtensionRef),
+		Message: msg,
+	}
+}
diff --git a/internal/controller/state/graph/graph.go b/internal/controller/state/graph/graph.go
index b5e13991e9..538a29a09d 100644
--- a/internal/controller/state/graph/graph.go
+++ b/internal/controller/state/graph/graph.go
@@ -260,7 +260,8 @@ func BuildGraph(
 		processedSnippetsFilters,
 		state.InferencePools,
 	)
-	referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools)
+
+	referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools, state.Services)
 
 	l4routes := buildL4RoutesForGateways(
 		state.TLSRoutes,
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index 1a367e5977..a49202d96e 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -223,12 +223,15 @@ func TestBuildGraph(t *testing.T) {
 					Namespace: testNs,
 					Name:      controller.CreateInferencePoolServiceName("ipool"),
 				},
-				ServicePort:          v1.ServicePort{Port: 80},
-				Valid:                true,
-				Weight:               1,
-				InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
-				IsInferencePool:      true,
-				EndpointPickerConfig: &inference.EndpointPickerRef{},
+				ServicePort:        v1.ServicePort{Port: 80},
+				Valid:              true,
+				Weight:             1,
+				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+				IsInferencePool:    true,
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					Kind: kinds.Service,
+					Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+				},
 			},
 		}
 		rbrs := []RouteBackendRef{
@@ -389,6 +392,10 @@ func TestBuildGraph(t *testing.T) {
 			TargetPorts: []inference.Port{
 				{Number: 80},
 			},
+			EndpointPickerRef: inference.EndpointPickerRef{
+				Kind: kinds.Service,
+				Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+			},
 		},
 	}
 
@@ -1325,6 +1332,13 @@ func TestBuildGraph(t *testing.T) {
 			ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{
 				client.ObjectKeyFromObject(inferencePool): {
 					Source: inferencePool,
+					Gateways: []*gatewayv1.Gateway{
+						gw1.Source,
+					},
+					HTTPRoutes: []*L7Route{
+						inferenceRoute,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 			ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go
index ada688bcc5..84e6d62df2 100644
--- a/internal/controller/state/graph/inferencepools.go
+++ b/internal/controller/state/graph/inferencepools.go
@@ -1,10 +1,15 @@
 package graph
 
 import (
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+	apiv1 "sigs.k8s.io/gateway-api/apis/v1"
 
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
@@ -14,6 +19,12 @@ import (
 type ReferencedInferencePool struct {
 	// Source is the original InferencePool that this ReferencedInferencePool is based on.
 	Source *inference.InferencePool
+	// Gateways are the Gateways that this ReferencedInferencePool is attached to.
+	Gateways []*apiv1.Gateway
+	// HTTPRoutes are the HTTPRoutes that reference this InferencePool.
+	HTTPRoutes []*L7Route
+	// Conditions contains the conditions that should be applied to the InferencePool.
+	Conditions []conditions.Condition
 }
 
 // buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes
@@ -22,8 +33,9 @@ func buildReferencedInferencePools(
 	routes map[RouteKey]*L7Route,
 	gws map[types.NamespacedName]*Gateway,
 	inferencePools map[types.NamespacedName]*inference.InferencePool,
+	services map[types.NamespacedName]*v1.Service,
 ) map[types.NamespacedName]*ReferencedInferencePool {
-	referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool)
+	referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool, len(inferencePools))
 
 	for _, gw := range gws {
 		if gw == nil {
@@ -37,6 +49,17 @@ func buildReferencedInferencePools(
 		return nil
 	}
 
+	// validate each referenced InferencePool and add conditions.
+	for _, refPool := range referencedInferencePools {
+		if routeCond := validateInferencePoolRoutesAcceptance(refPool.Source, refPool.HTTPRoutes); routeCond != nil {
+			refPool.Conditions = append(refPool.Conditions, *routeCond)
+		}
+
+		if extensionRefCond := validateInferencePoolExtensionRef(refPool.Source, services); extensionRefCond != nil {
+			refPool.Conditions = append(refPool.Conditions, *extensionRefCond)
+		}
+	}
+
 	return referencedInferencePools
 }
 
@@ -48,8 +71,9 @@ func processInferencePoolsForGateway(
 	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) {
 	gwKey := client.ObjectKeyFromObject(gw.Source)
+
 	for _, route := range routes {
-		if !route.Valid || !routeBelongsToGateway(route.ParentRefs, gwKey) {
+		if !routeBelongsToGateway(route.ParentRefs, gwKey) {
 			continue
 		}
 
@@ -70,13 +94,83 @@ func processInferencePoolsForGateway(
 				}
 
 				if _, referenced := referencedInferencePools[poolName]; !referenced {
-					referencedInferencePools[poolName] = &ReferencedInferencePool{}
+					referencedInferencePools[poolName] = &ReferencedInferencePool{
+						Conditions: make([]conditions.Condition, 0, 2),
+						Gateways:   make([]*apiv1.Gateway, 0),
+						HTTPRoutes: make([]*L7Route, 0),
+					}
 				}
 
 				if pool, exists := inferencePools[poolName]; exists {
 					referencedInferencePools[poolName].Source = pool
+					referencedInferencePools[poolName].Gateways = append(
+						referencedInferencePools[poolName].Gateways,
+						gw.Source,
+					)
+					referencedInferencePools[poolName].HTTPRoutes = append(
+						referencedInferencePools[poolName].HTTPRoutes,
+						route,
+					)
 				}
 			}
 		}
 	}
 }
+
+// validateInferencePoolExtensionRef validates the ExtensionRef of the InferencePool.
+func validateInferencePoolExtensionRef(
+	ip *inference.InferencePool,
+	svc map[types.NamespacedName]*v1.Service,
+) *conditions.Condition {
+	var failingCond conditions.Condition
+	if ip == nil {
+		return nil
+	}
+
+	// if kind is empty, it defaults to Service
+	kind := string(ip.Spec.EndpointPickerRef.Kind)
+	if kind == "" {
+		kind = kinds.Service
+	}
+
+	if kind != kinds.Service {
+		failingCond = conditions.NewInferencePoolInvalidExtensionref("Invalid ExtensionRef kind: " + kind)
+		return &failingCond
+	}
+
+	eppNsName := types.NamespacedName{
+		Name:      string(ip.Spec.EndpointPickerRef.Name),
+		Namespace: ip.GetNamespace(),
+	}
+
+	if _, ok := svc[eppNsName]; !ok {
+		failingCond = conditions.NewInferencePoolInvalidExtensionref("ExtensionRef Service not found: " + eppNsName.String())
+		return &failingCond
+	}
+
+	return nil
+}
+
+// validateInferencePoolRoutesAcceptance checks if the routes that reference the InferencePool
+// are accepted by the Gateway.
+func validateInferencePoolRoutesAcceptance(ip *inference.InferencePool, routes []*L7Route) *conditions.Condition {
+	if ip == nil || len(routes) == 0 {
+		return nil
+	}
+
+	// we do not need to validate that the route belongs to the gateway or not
+	// we only process routes that belong to the gateway in the first place
+	for _, route := range routes {
+		if !route.Valid {
+			cond := conditions.NewInferencePoolInvalidHTTPRouteNotAccepted(
+				fmt.Sprintf("Referenced HTTPRoute %s/%s is not accepted by the Gateway",
+					route.Source.GetNamespace(),
+					route.Source.GetName(),
+				),
+			)
+			return &cond
+		}
+	}
+
+	return nil
+}
diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go
index d67331b5e7..f6ea66215a 100644
--- a/internal/controller/state/graph/inferencepools_test.go
+++ b/internal/controller/state/graph/inferencepools_test.go
@@ -4,11 +4,13 @@ import (
 	"testing"
 
 	. "github.com/onsi/gomega"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
@@ -70,6 +72,65 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 
 	validRoute := getNormalRoute()
 
+	endpointPickerConfig := inference.EndpointPickerRef{
+		Kind: "Service",
+		Name: "valid-svc",
+	}
+
+	validSvcMap := map[types.NamespacedName]*v1.Service{
+		{Name: "valid-svc", Namespace: "test"}: {
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "valid-svc",
+				Namespace: "test",
+			},
+		},
+		{Name: "regular-svc", Namespace: "test"}: {
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "regular-svc",
+				Namespace: "test",
+			},
+		},
+	}
+
+	modifiedRouteWithServiceBackend := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs,
+			RouteBackendRef{
+				BackendRef: gatewayv1.BackendRef{
+					BackendObjectReference: gatewayv1.BackendObjectReference{
+						Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+						Name: "regular-svc",
+					},
+				},
+			},
+		)
+		return route
+	})
+
+	routeWithInferencePoolHeadlessSvcBackend := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Spec.Rules = []RouteRule{
+			{
+				RouteBackendRefs: []RouteBackendRef{
+					{
+						IsInferencePool: true,
+						BackendRef: gatewayv1.BackendRef{
+							BackendObjectReference: gatewayv1.BackendObjectReference{
+								Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+								Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")),
+								Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+							},
+						},
+					},
+				},
+			},
+		}
+		return route
+	})
+
+	routeWithNoNamespaceBackend := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil
+		return route
+	})
+
 	invalidRoute := getModifiedRoute(func(route *L7Route) *L7Route {
 		route.Valid = false
 		return route
@@ -78,6 +139,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 	tests := []struct {
 		routes         map[RouteKey]*L7Route
 		gws            map[types.NamespacedName]*Gateway
+		services       map[types.NamespacedName]*v1.Service
 		inferencePools map[types.NamespacedName]*inference.InferencePool
 		expPools       map[types.NamespacedName]*ReferencedInferencePool
 		name           string
@@ -93,17 +155,6 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			},
 			expPools: nil,
 		},
-		{
-			name: "invalid route",
-			gws:  gws,
-			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): invalidRoute,
-			},
-			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
-			},
-			expPools: nil,
-		},
 		{
 			name: "valid route with referenced inferencepool",
 			gws:  gws,
@@ -111,11 +162,29 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 				CreateRouteKey(validRoute.Source): validRoute,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						validRoute,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -149,25 +218,32 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			name: "route with both inferencepool and service backends",
 			gws:  gws,
 			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
-					route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs,
-						RouteBackendRef{
-							BackendRef: gatewayv1.BackendRef{
-								BackendObjectReference: gatewayv1.BackendObjectReference{
-									Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
-								},
-							},
-						},
-					)
-					return route
-				}),
+				CreateRouteKey(validRoute.Source): modifiedRouteWithServiceBackend,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						modifiedRouteWithServiceBackend,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -175,32 +251,32 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			name: "route with headless InferencePool Service backend",
 			gws:  gws,
 			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
-					route.Spec.Rules = []RouteRule{
-						{
-							RouteBackendRefs: []RouteBackendRef{
-								{
-									IsInferencePool: true,
-									BackendRef: gatewayv1.BackendRef{
-										BackendObjectReference: gatewayv1.BackendObjectReference{
-											Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
-											Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")),
-											Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
-										},
-									},
-								},
-							},
-						},
-					}
-					return route
-				}),
+				CreateRouteKey(validRoute.Source): routeWithInferencePoolHeadlessSvcBackend,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						routeWithInferencePoolHeadlessSvcBackend,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -208,17 +284,32 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			name: "inferencepool backend with no namespace uses route namespace",
 			gws:  gws,
 			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
-					route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil
-					return route
-				}),
+				CreateRouteKey(validRoute.Source): routeWithNoNamespaceBackend,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						routeWithNoNamespaceBackend,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -231,9 +322,243 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{},
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: nil,
+					Source:     nil,
+					Gateways:   []*gatewayv1.Gateway{},
+					HTTPRoutes: []*L7Route{},
+					Conditions: []conditions.Condition{},
+				},
+			},
+		},
+		{
+			name:     "inferencepool references invalid extensionRef and has invalid route",
+			gws:      gws,
+			services: validSvcMap,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(invalidRoute.Source): invalidRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: inference.EndpointPickerRef{
+							Kind: "Service",
+							Name: "invalid-extension-ref",
+						},
+					},
+				},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: inference.EndpointPickerRef{
+								Kind: "Service",
+								Name: "invalid-extension-ref",
+							},
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						invalidRoute,
+					},
+					Conditions: []conditions.Condition{
+						conditions.NewInferencePoolInvalidHTTPRouteNotAccepted(
+							"Referenced HTTPRoute test/valid-route is not accepted by the Gateway",
+						),
+						conditions.NewInferencePoolInvalidExtensionref(
+							"ExtensionRef Service not found: test/invalid-extension-ref",
+						),
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools, test.services)
+
+			g.Expect(helpers.Diff(test.expPools, pools)).To(BeEmpty())
+		})
+	}
+}
+
+func TestValidateInferencePoolExtensionRef(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		pool     *inference.InferencePool
+		services map[types.NamespacedName]*v1.Service
+		expCond  *conditions.Condition
+		name     string
+	}{
+		{
+			name: "inference pool has a valid extensionRef",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+				Spec: inference.InferencePoolSpec{
+					EndpointPickerRef: inference.EndpointPickerRef{
+						Kind: "Service",
+						Name: "valid-svc",
+					},
+				},
+			},
+			services: map[types.NamespacedName]*v1.Service{
+				{Name: "valid-svc", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "valid-svc",
+						Namespace: "test",
+					},
+					Spec: v1.ServiceSpec{
+						Ports: []v1.ServicePort{
+							{
+								Port: 80,
+							},
+						},
+					},
+				},
+			},
+			expCond: nil,
+		},
+		{
+			name: "inference pool references a non-existent service",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+				Spec: inference.InferencePoolSpec{
+					EndpointPickerRef: inference.EndpointPickerRef{
+						Kind: "Service",
+						Name: "does-not-exist",
+					},
+				},
+			},
+			services: map[types.NamespacedName]*v1.Service{},
+			expCond: helpers.GetPointer(
+				conditions.NewInferencePoolInvalidExtensionref("ExtensionRef Service not found: test/does-not-exist"),
+			),
+		},
+		{
+			name: "inference pool references an extensionRef that is not a service",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+				Spec: inference.InferencePoolSpec{
+					EndpointPickerRef: inference.EndpointPickerRef{
+						Kind: "Invalid-Kind",
+						Name: "svc",
+					},
+				},
+			},
+			services: map[types.NamespacedName]*v1.Service{
+				{Name: "svc", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "svc",
+						Namespace: "test",
+					},
+					Spec: v1.ServiceSpec{
+						Ports: []v1.ServicePort{
+							{
+								Port: 80,
+							},
+						},
+					},
+				},
+			},
+			expCond: helpers.GetPointer(
+				conditions.NewInferencePoolInvalidExtensionref("Invalid ExtensionRef kind: Invalid-Kind"),
+			),
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			conds := validateInferencePoolExtensionRef(test.pool, test.services)
+			g.Expect(conds).To(Equal(test.expCond))
+		})
+	}
+}
+
+func TestValidateInferencePoolRoutesAcceptance(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		pool    *inference.InferencePool
+		expCond *conditions.Condition
+		name    string
+		routes  []*L7Route
+	}{
+		{
+			name: "no routes referencing the pool",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+			},
+			routes:  []*L7Route{},
+			expCond: nil,
+		},
+		{
+			name: "one valid route referencing the pool",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+			},
+			routes: []*L7Route{
+				{
+					Valid: true,
+					Source: &gatewayv1.HTTPRoute{
+						ObjectMeta: metav1.ObjectMeta{
+							Namespace: "test",
+							Name:      "valid-route",
+						},
+					},
+				},
+			},
+			expCond: nil,
+		},
+		{
+			name: "one invalid route referencing the pool",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+			},
+			routes: []*L7Route{
+				{
+					Valid: false,
+					Source: &gatewayv1.HTTPRoute{
+						ObjectMeta: metav1.ObjectMeta{
+							Namespace: "test",
+							Name:      "invalid-route",
+						},
+					},
 				},
 			},
+			expCond: helpers.GetPointer(
+				conditions.NewInferencePoolInvalidHTTPRouteNotAccepted(
+					"Referenced HTTPRoute test/invalid-route is not accepted by the Gateway",
+				),
+			),
 		},
 	}
 
@@ -242,8 +567,8 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			t.Parallel()
 			g := NewWithT(t)
 
-			pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools)
-			g.Expect(pools).To(Equal(test.expPools))
+			conds := validateInferencePoolRoutesAcceptance(test.pool, test.routes)
+			g.Expect(conds).To(Equal(test.expCond))
 		})
 	}
 }
diff --git a/internal/controller/status/prepare_requests.go b/internal/controller/status/prepare_requests.go
index 87e3b441cc..e6f35822c4 100644
--- a/internal/controller/status/prepare_requests.go
+++ b/internal/controller/status/prepare_requests.go
@@ -8,6 +8,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -519,3 +520,56 @@ func PrepareNginxGatewayStatus(
 		}),
 	}
 }
+
+// PrepareInferencePoolRequests prepares status UpdateRequests for the given InferencePools.
+func PrepareInferencePoolRequests(
+	inferencePools map[types.NamespacedName]*graph.ReferencedInferencePool,
+	transitionTime metav1.Time,
+) []UpdateRequest {
+	reqs := make([]UpdateRequest, 0, len(inferencePools))
+
+	for nsname, pool := range inferencePools {
+		if pool.Source == nil {
+			continue
+		}
+
+		defaultConds := conditions.NewDefaultInferenceConditions()
+		allConds := make([]conditions.Condition, 0, len(pool.Conditions)+2)
+
+		allConds = append(allConds, defaultConds...)
+
+		if len(pool.Conditions) != 0 {
+			allConds = append(allConds, pool.Conditions...)
+		}
+
+		conds := conditions.DeduplicateConditions(allConds)
+		apiConds := conditions.ConvertConditions(conds, pool.Source.GetGeneration(), transitionTime)
+
+		parents := make([]inference.ParentStatus, 0, len(pool.Gateways))
+		for _, ref := range pool.Gateways {
+			parents = append(parents, inference.ParentStatus{
+				ParentRef: inference.ParentReference{
+					Name:      inference.ObjectName(ref.GetName()),
+					Namespace: inference.Namespace(ref.GetNamespace()),
+					Group:     helpers.GetPointer(inference.Group(ref.GroupVersionKind().Group)),
+					Kind:      kinds.Gateway,
+				},
+				Conditions: apiConds,
+			})
+		}
+
+		status := inference.InferencePoolStatus{
+			Parents: parents,
+		}
+
+		req := UpdateRequest{
+			NsName:       nsname,
+			ResourceType: pool.Source,
+			Setter:       newInferencePoolStatusSetter(status),
+		}
+
+		reqs = append(reqs, req)
+	}
+
+	return reqs
+}
diff --git a/internal/controller/status/prepare_requests_test.go b/internal/controller/status/prepare_requests_test.go
index 3cb629d3c2..f863d5cf97 100644
--- a/internal/controller/status/prepare_requests_test.go
+++ b/internal/controller/status/prepare_requests_test.go
@@ -15,6 +15,7 @@ import (
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -35,6 +36,7 @@ func createK8sClientFor(resourceType ngftypes.ObjectType) client.Client {
 	utilruntime.Must(v1alpha2.Install(scheme))
 	utilruntime.Must(v1alpha3.Install(scheme))
 	utilruntime.Must(ngfAPI.AddToScheme(scheme))
+	utilruntime.Must(inference.Install(scheme))
 
 	k8sClient := fake.NewClientBuilder().
 		WithScheme(scheme).
@@ -2157,3 +2159,239 @@ func TestBuildSnippetsFilterStatuses(t *testing.T) {
 		})
 	}
 }
+
+func TestBuildInferencePoolStatuses(t *testing.T) {
+	t.Parallel()
+	transitionTime := helpers.PrepareTimeForFakeClient(metav1.Now())
+	group := ""
+
+	validAcceptedCondition := metav1.Condition{
+		Type:               string(inference.InferencePoolConditionAccepted),
+		Status:             metav1.ConditionTrue,
+		ObservedGeneration: 1,
+		LastTransitionTime: transitionTime,
+		Reason:             string(inference.InferencePoolReasonAccepted),
+		Message:            "InferencePool is accepted by the Gateway.",
+	}
+
+	validResolvedRefsCondition := metav1.Condition{
+		Type:               string(inference.InferencePoolConditionResolvedRefs),
+		Status:             metav1.ConditionTrue,
+		ObservedGeneration: 1,
+		LastTransitionTime: transitionTime,
+		Reason:             string(inference.InferencePoolConditionResolvedRefs),
+		Message:            "Inference pool references a valid ExtensionRef.",
+	}
+
+	tests := []struct {
+		inferencePool          map[types.NamespacedName]*graph.ReferencedInferencePool
+		expectedPoolWithStatus map[types.NamespacedName]inference.InferencePoolStatus
+		name                   string
+		expectedReqs           int
+	}{
+		{
+			name:         "no referenced inferencePools",
+			expectedReqs: 0,
+		},
+		{
+			name: "an inference pool has valid status for multiple gateways",
+			inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:       "valid-inference-pool",
+							Namespace:  "test",
+							Generation: 1,
+						},
+					},
+					Gateways: []*v1.Gateway{
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-1",
+								Namespace: "test",
+							},
+						},
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-2",
+								Namespace: "test",
+							},
+						},
+					},
+				},
+			},
+			expectedReqs: 1,
+			expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Parents: []inference.ParentStatus{
+						{
+							Conditions: []metav1.Condition{
+								validAcceptedCondition,
+								validResolvedRefsCondition,
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-1",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+						{
+							Conditions: []metav1.Condition{
+								validAcceptedCondition,
+								validResolvedRefsCondition,
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-2",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "an inference pool has accepted valid status and is referenced by invalid extension ref",
+			inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:       "valid-inference-pool",
+							Namespace:  "test",
+							Generation: 1,
+						},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: inference.EndpointPickerRef{
+								Name: inference.ObjectName("invalid-extension-ref"),
+							},
+						},
+					},
+					Gateways: []*v1.Gateway{
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-1",
+								Namespace: "test",
+							},
+						},
+					},
+					Conditions: []conditions.Condition{
+						conditions.NewInferencePoolInvalidExtensionref("Invalid extension ref: test/invalid-extension-ref"),
+					},
+				},
+			},
+			expectedReqs: 1,
+			expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Parents: []inference.ParentStatus{
+						{
+							Conditions: []metav1.Condition{
+								validAcceptedCondition,
+								{
+									Type:               string(inference.InferencePoolConditionResolvedRefs),
+									Status:             metav1.ConditionFalse,
+									ObservedGeneration: 1,
+									LastTransitionTime: transitionTime,
+									Reason:             string(inference.InferencePoolReasonInvalidExtensionRef),
+									Message:            "Invalid extension ref: test/invalid-extension-ref",
+								},
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-1",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "an inference pool is referencing an invalid route and is referenced by invalid extension ref",
+			inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:       "valid-inference-pool",
+							Namespace:  "test",
+							Generation: 1,
+						},
+					},
+					Gateways: []*v1.Gateway{
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-1",
+								Namespace: "test",
+							},
+						},
+					},
+					Conditions: []conditions.Condition{
+						conditions.NewInferencePoolInvalidHTTPRouteNotAccepted("Invalid HTTPRoute: test/invalid-route not accepted"),
+						conditions.NewInferencePoolInvalidExtensionref("Invalid extension ref: test/invalid-extension-ref"),
+					},
+				},
+			},
+			expectedReqs: 1,
+			expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Parents: []inference.ParentStatus{
+						{
+							Conditions: []metav1.Condition{
+								{
+									Type:               string(inference.InferencePoolConditionAccepted),
+									Status:             metav1.ConditionFalse,
+									ObservedGeneration: 1,
+									LastTransitionTime: transitionTime,
+									Reason:             string(inference.InferencePoolReasonHTTPRouteNotAccepted),
+									Message:            "Invalid HTTPRoute: test/invalid-route not accepted",
+								},
+								{
+									Type:               string(inference.InferencePoolConditionResolvedRefs),
+									Status:             metav1.ConditionFalse,
+									ObservedGeneration: 1,
+									LastTransitionTime: transitionTime,
+									Reason:             string(inference.InferencePoolReasonInvalidExtensionRef),
+									Message:            "Invalid extension ref: test/invalid-extension-ref",
+								},
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-1",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			k8sClient := createK8sClientFor(&inference.InferencePool{})
+			for _, ip := range test.inferencePool {
+				err := k8sClient.Create(context.Background(), ip.Source)
+				g.Expect(err).ToNot(HaveOccurred())
+			}
+
+			updater := NewUpdater(k8sClient, logr.Discard())
+			reqs := PrepareInferencePoolRequests(test.inferencePool, transitionTime)
+			g.Expect(reqs).To(HaveLen(test.expectedReqs))
+			updater.Update(context.Background(), reqs...)
+
+			for nsname, expected := range test.expectedPoolWithStatus {
+				var inferencePool inference.InferencePool
+
+				err := k8sClient.Get(context.Background(), nsname, &inferencePool)
+				g.Expect(err).ToNot(HaveOccurred())
+				g.Expect(helpers.Diff(expected, inferencePool.Status)).To(BeEmpty())
+			}
+		})
+	}
+}
diff --git a/internal/controller/status/status_setters.go b/internal/controller/status/status_setters.go
index c4fcc7c128..efb9f68413 100644
--- a/internal/controller/status/status_setters.go
+++ b/internal/controller/status/status_setters.go
@@ -4,6 +4,7 @@ import (
 	"slices"
 
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -404,3 +405,65 @@ func snippetsStatusEqual(status1, status2 ngfAPI.ControllerStatus) bool {
 
 	return ConditionsEqual(status1.Conditions, status2.Conditions)
 }
+
+func newInferencePoolStatusSetter(status inference.InferencePoolStatus) Setter {
+	return func(obj client.Object) (wasSet bool) {
+		ip := helpers.MustCastObject[*inference.InferencePool](obj)
+
+		// we build all the parent statuses at once so we can directly
+		// compare the previous and current statuses
+		if inferencePoolStatusEqual(ip.Status.Parents, status.Parents) {
+			return false
+		}
+
+		ip.Status = status
+		return true
+	}
+}
+
+func inferencePoolStatusEqual(prevParents, curParents []inference.ParentStatus) bool {
+	// Compare the previous and current parent statuses, ignoring order
+	// Check if any previous parent status is missing in the current status
+	for _, prevParent := range prevParents {
+		exists := slices.ContainsFunc(curParents, func(curParent inference.ParentStatus) bool {
+			return parentStatusEqual(prevParent, curParent)
+		})
+
+		if !exists {
+			return false
+		}
+	}
+
+	// Check if any current parent status is missing in the previous status
+	for _, curParent := range curParents {
+		exists := slices.ContainsFunc(prevParents, func(prevParent inference.ParentStatus) bool {
+			return parentStatusEqual(curParent, prevParent)
+		})
+
+		if !exists {
+			return false
+		}
+	}
+
+	return true
+}
+
+func parentStatusEqual(p1, p2 inference.ParentStatus) bool {
+	if p1.ParentRef.Name != p2.ParentRef.Name {
+		return false
+	}
+
+	if !helpers.EqualPointers(&p1.ParentRef.Namespace, &p2.ParentRef.Namespace) {
+		return false
+	}
+
+	if !helpers.EqualPointers(&p1.ParentRef.Kind, &p2.ParentRef.Kind) {
+		return false
+	}
+
+	if !helpers.EqualPointers(&p1.ParentRef.Group, &p2.ParentRef.Group) {
+		return false
+	}
+
+	return ConditionsEqual(p1.Conditions, p2.Conditions)
+}
diff --git a/internal/controller/status/status_setters_test.go b/internal/controller/status/status_setters_test.go
index 61a34a4e9f..9f3c6f1521 100644
--- a/internal/controller/status/status_setters_test.go
+++ b/internal/controller/status/status_setters_test.go
@@ -5,6 +5,7 @@ import (
 
 	. "github.com/onsi/gomega"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -1726,3 +1727,303 @@ func TestNewSnippetsFilterStatusSetter(t *testing.T) {
 		})
 	}
 }
+
+func TestInferencePoolStatusSetter(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                         string
+		status, newStatus, expStatus inference.InferencePoolStatus
+		expStatusSet                 bool
+	}{
+		{
+			name: "InferencePool has no status",
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool updates condition of an existing parent status",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "old condition"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has new parent statuses along with existing ones",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has parent statuses and one is removed",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has existing multiple parent statuses, one gets changed condition",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway2 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway3",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway2 is invalid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway3",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway2 is invalid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway3",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has same status",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			setter := newInferencePoolStatusSetter(test.newStatus)
+			obj := &inference.InferencePool{Status: test.status}
+
+			statusSet := setter(obj)
+
+			g.Expect(statusSet).To(Equal(test.expStatusSet))
+			g.Expect(obj.Status).To(Equal(test.expStatus))
+		})
+	}
+}
diff --git a/tests/go.mod b/tests/go.mod
index 7e748dd97b..cc3dca1988 100644
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -5,7 +5,7 @@ go 1.24.2
 replace github.com/nginx/nginx-gateway-fabric/v2 => ../
 
 require (
-	github.com/nginx/nginx-gateway-fabric/v2 v2.1.1
+	github.com/nginx/nginx-gateway-fabric/v2 v2.1.4
 	github.com/onsi/ginkgo/v2 v2.25.3
 	github.com/onsi/gomega v1.38.2
 	github.com/prometheus/client_golang v1.23.2
@@ -80,6 +80,7 @@ require (
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
 	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
 	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
diff --git a/tests/go.sum b/tests/go.sum
index f24e98ebf3..ad1b0add11 100644
--- a/tests/go.sum
+++ b/tests/go.sum
@@ -161,8 +161,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
-golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
+golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA=
+golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
@@ -245,6 +245,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV
 sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
 sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
 sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=

From 6b370f9b8c0686c85abfa441ece1ef47c06ff7f6 Mon Sep 17 00:00:00 2001
From: salonichf5 <146118978+salonichf5@users.noreply.github.com>
Date: Wed, 8 Oct 2025 10:49:25 -0600
Subject: [PATCH 08/12] update makefiles and readme for inference conformance
 tests

---
 .github/workflows/conformance.yml             |   4 +
 Makefile                                      |   7 +
 cmd/gateway/endpoint_picker.go                |   9 +-
 examples/cafe-example/cafe-routes.yaml        |  37 --
 examples/cafe-example/cafe.yaml               |  65 ---
 examples/cafe-example/eppinvalidservice.yaml  |  35 ++
 examples/cafe-example/epprouting.yaml         |  23 +
 .../cafe-example/eppunavailablefailopen.yaml  |  23 +
 examples/cafe-example/gateway.yaml            | 387 ++++++++++++++++-
 .../cafe-example/httproutemultiplerules.yaml  |  26 ++
 .../httproutesmultiplegateways.yaml           |  44 ++
 .../inference-pool-port-validation.yaml       |  79 ++++
 .../inferencepool-resolvedrefs.yaml           |  54 +++
 internal/controller/nginx/config/maps.go      |  71 ++--
 internal/controller/nginx/config/maps_test.go |   2 +
 internal/controller/nginx/config/servers.go   |  11 +-
 .../nginx/config/servers_template.go          |  17 +-
 internal/controller/nginx/modules/src/epp.js  |   2 +-
 .../controller/nginx/modules/test/epp.test.js |   2 +-
 .../state/dataplane/configuration.go          |   3 +
 internal/controller/state/dataplane/types.go  |   1 +
 .../controller/state/graph/backend_refs.go    |  22 +
 .../controller/state/graph/inferencepools.go  |   4 +
 .../controller/state/graph/route_common.go    |   3 +
 tests/Makefile                                |  24 +-
 tests/README.md                               |  20 +-
 tests/conformance-profile-inference.yaml      |   0
 tests/conformance/conformance-rbac.yaml       |  40 ++
 tests/conformance/conformance_test.go         |  51 ++-
 tests/conformance/manifests/base.yaml         | 394 ++++++++++++++++++
 tests/conformance/manifests/epp-routing.yaml  |  23 +
 tests/conformance/manifests/gateway.yaml      |   0
 tests/go.mod                                  |   3 +-
 tests/go.sum                                  |   2 +
 34 files changed, 1336 insertions(+), 152 deletions(-)
 delete mode 100644 examples/cafe-example/cafe-routes.yaml
 delete mode 100644 examples/cafe-example/cafe.yaml
 create mode 100644 examples/cafe-example/eppinvalidservice.yaml
 create mode 100644 examples/cafe-example/epprouting.yaml
 create mode 100644 examples/cafe-example/eppunavailablefailopen.yaml
 create mode 100644 examples/cafe-example/httproutemultiplerules.yaml
 create mode 100644 examples/cafe-example/httproutesmultiplegateways.yaml
 create mode 100644 examples/cafe-example/inference-pool-port-validation.yaml
 create mode 100644 examples/cafe-example/inferencepool-resolvedrefs.yaml
 create mode 100644 tests/conformance-profile-inference.yaml
 create mode 100644 tests/conformance/manifests/base.yaml
 create mode 100644 tests/conformance/manifests/epp-routing.yaml
 create mode 100644 tests/conformance/manifests/gateway.yaml

diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
index f8ce171f53..ad6ec8ff26 100644
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@@ -12,6 +12,9 @@ on:
       enable-experimental:
         required: true
         type: boolean
+      enable-inference-extension:
+        required: true
+        type: boolean
       production-release:
         required: false
         type: boolean
@@ -28,6 +31,7 @@ defaults:
 env:
   PLUS_USAGE_ENDPOINT: ${{ secrets.JWT_PLUS_REPORTING_ENDPOINT }}
   ENABLE_EXPERIMENTAL: ${{ inputs.enable-experimental }}
+  ENABLE_INFERENCE_EXTENSION: ${{ inputs.enable-inference-extension }}
 
 permissions:
   contents: read
diff --git a/Makefile b/Makefile
index 16767f946e..4236d49d14 100644
--- a/Makefile
+++ b/Makefile
@@ -15,6 +15,7 @@ TELEMETRY_ENDPOINT=# if empty, NGF will report telemetry in its logs at debug le
 TELEMETRY_ENDPOINT_INSECURE = false
 
 ENABLE_EXPERIMENTAL ?= false
+ENABLE_INFERENCE_EXTENSION ?= false
 
 # go build flags - should not be overridden by the user
 GO_LINKER_FlAGS_VARS = -X main.version=${VERSION} -X main.telemetryReportPeriod=${TELEMETRY_REPORT_PERIOD} -X main.telemetryEndpoint=${TELEMETRY_ENDPOINT} -X main.telemetryEndpointInsecure=${TELEMETRY_ENDPOINT_INSECURE}
@@ -234,10 +235,16 @@ install-ngf-local-build-with-plus: check-for-plus-usage-endpoint build-images-wi
 
 .PHONY: helm-install-local
 helm-install-local: install-gateway-crds ## Helm install NGF on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build.
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) install-inference-crds; \
+	fi
 	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
 
 .PHONY: helm-install-local-with-plus
 helm-install-local-with-plus: check-for-plus-usage-endpoint install-gateway-crds ## Helm install NGF with NGINX Plus on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build-with-plus.
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) install-inference-crds; \
+	fi
 	kubectl create namespace nginx-gateway || true
 	kubectl -n nginx-gateway create secret generic nplus-license --from-file $(PLUS_LICENSE_FILE) || true
 	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PLUS_PREFIX) --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway --set nginx.plus=true --set nginx.usage.endpoint=$(PLUS_USAGE_ENDPOINT) $(HELM_PARAMETERS)
diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
index acf9bdfbb6..118f95684b 100644
--- a/cmd/gateway/endpoint_picker.go
+++ b/cmd/gateway/endpoint_picker.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"crypto/tls"
 	"errors"
 	"fmt"
 	"io"
@@ -12,7 +13,7 @@ import (
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	"github.com/go-logr/logr"
 	"google.golang.org/grpc"
-	"google.golang.org/grpc/credentials/insecure"
+	"google.golang.org/grpc/credentials"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types"
@@ -34,7 +35,11 @@ func endpointPickerServer(handler http.Handler) error {
 // realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request.
 func realExtProcClientFactory() extProcClientFactory {
 	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
-		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		creds := credentials.NewTLS(&tls.Config{
+			// add RootCAs or, if you have a self-signed server cert:
+			InsecureSkipVerify: true,
+		})
+		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(creds))
 		if err != nil {
 			return nil, nil, err
 		}
diff --git a/examples/cafe-example/cafe-routes.yaml b/examples/cafe-example/cafe-routes.yaml
deleted file mode 100644
index 67927335cb..0000000000
--- a/examples/cafe-example/cafe-routes.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: coffee
-spec:
-  parentRefs:
-  - name: gateway
-    sectionName: http
-  hostnames:
-  - "cafe.example.com"
-  rules:
-  - matches:
-    - path:
-        type: PathPrefix
-        value: /coffee
-    backendRefs:
-    - name: coffee
-      port: 80
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: tea
-spec:
-  parentRefs:
-  - name: gateway
-    sectionName: http
-  hostnames:
-  - "cafe.example.com"
-  rules:
-  - matches:
-    - path:
-        type: Exact
-        value: /tea
-    backendRefs:
-    - name: tea
-      port: 80
diff --git a/examples/cafe-example/cafe.yaml b/examples/cafe-example/cafe.yaml
deleted file mode 100644
index 2d03ae59ff..0000000000
--- a/examples/cafe-example/cafe.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: coffee
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: coffee
-  template:
-    metadata:
-      labels:
-        app: coffee
-    spec:
-      containers:
-      - name: coffee
-        image: nginxdemos/nginx-hello:plain-text
-        ports:
-        - containerPort: 8080
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: coffee
-spec:
-  ports:
-  - port: 80
-    targetPort: 8080
-    protocol: TCP
-    name: http
-  selector:
-    app: coffee
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: tea
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: tea
-  template:
-    metadata:
-      labels:
-        app: tea
-    spec:
-      containers:
-      - name: tea
-        image: nginxdemos/nginx-hello:plain-text
-        ports:
-        - containerPort: 8080
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: tea
-spec:
-  ports:
-  - port: 80
-    targetPort: 8080
-    protocol: TCP
-    name: http
-  selector:
-    app: tea
diff --git a/examples/cafe-example/eppinvalidservice.yaml b/examples/cafe-example/eppinvalidservice.yaml
new file mode 100644
index 0000000000..f0a0e62ea9
--- /dev/null
+++ b/examples/cafe-example/eppinvalidservice.yaml
@@ -0,0 +1,35 @@
+apiVersion: inference.networking.k8s.io/v1
+kind: InferencePool
+metadata:
+  name: pool-with-invalid-epp
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    matchLabels:
+      app: primary-inference-model-server
+  targetPorts:
+  - number: 3000
+  endpointPickerRef:
+    name: non-existent-epp-svc
+    kind: Service
+    port:
+      number: 9002
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-invalid-epp-pool
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - name: conformance-primary
+    namespace: gateway-conformance-infra
+  rules:
+  - backendRefs:
+    - name: pool-with-invalid-epp
+      kind: InferencePool
+      group: inference.networking.k8s.io
+    matches:
+    - path:
+        type: PathPrefix
+        value: /invalid-epp-test
\ No newline at end of file
diff --git a/examples/cafe-example/epprouting.yaml b/examples/cafe-example/epprouting.yaml
new file mode 100644
index 0000000000..dd199f1ce6
--- /dev/null
+++ b/examples/cafe-example/epprouting.yaml
@@ -0,0 +1,23 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-primary-gw
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "primary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /primary-gateway-test
diff --git a/examples/cafe-example/eppunavailablefailopen.yaml b/examples/cafe-example/eppunavailablefailopen.yaml
new file mode 100644
index 0000000000..6549cd9188
--- /dev/null
+++ b/examples/cafe-example/eppunavailablefailopen.yaml
@@ -0,0 +1,23 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-failopen-pool-gw
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-secondary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "secondary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: secondary-inference-pool # Use secondary-inferencePool because it has failureMode set to failOpen
+    matches:
+    - path:
+        type: PathPrefix
+        value: /failopen-pool-test
\ No newline at end of file
diff --git a/examples/cafe-example/gateway.yaml b/examples/cafe-example/gateway.yaml
index e6507f613b..61acb91d17 100644
--- a/examples/cafe-example/gateway.yaml
+++ b/examples/cafe-example/gateway.yaml
@@ -1,11 +1,394 @@
+# Base Kubernetes resources for the Gateway API Inference Extension conformance tests.
+# This includes namespaces and a minimal set of resources (Gateway, Backend)
+# required by many tests. More specific resources should be defined within
+# individual test files or other resource directories (e.g., sample_backends).
+
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gateway-conformance-infra
+  labels:
+    gateway-conformance: infra
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gateway-conformance-app-backend
+  labels:
+    gateway-conformance: backend
+---
+# A basic Gateway resource that allows HTTPRoutes from the same namespace.
+# Tests can use this as a parent reference for routes that target InferencePools.
 apiVersion: gateway.networking.k8s.io/v1
 kind: Gateway
 metadata:
-  name: gateway
+  name: conformance-primary
+  namespace: gateway-conformance-infra
 spec:
   gatewayClassName: nginx
   listeners:
   - name: http
     port: 80
     protocol: HTTP
-    hostname: "*.example.com"
+    allowedRoutes:
+      namespaces:
+        from: All
+      kinds:
+      - group: gateway.networking.k8s.io
+        kind: HTTPRoute
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: conformance-secondary
+  namespace: gateway-conformance-infra
+spec:
+  gatewayClassName: nginx
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+    hostname: "secondary.example.com"
+    allowedRoutes:
+      namespaces:
+        from: All
+
+### The following defines the essential resources for the gateway conformance test.
+### All resources are created in the 'gateway-conformance-app-backend' namespace.
+---
+# Deploys a mock backend service to act as a model server.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: primary-inference-model-server-deployment
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: primary-inference-model-server
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: primary-inference-model-server
+  template:
+    metadata:
+      labels:
+        app: primary-inference-model-server
+    spec:
+      containers:
+      - name: echoserver
+        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
+        ports:
+        - containerPort: 3000
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 3000
+          initialDelaySeconds: 3
+          periodSeconds: 5
+          failureThreshold: 2
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+---
+# Deploys a secondary mock backend service to act as a model server.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: secondary-inference-model-server-deployment
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: secondary-inference-model-server
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: secondary-inference-model-server
+  template:
+    metadata:
+      labels:
+        app: secondary-inference-model-server
+    spec:
+      containers:
+      - name: echoserver
+        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
+        ports:
+        - containerPort: 3000
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 3000
+          initialDelaySeconds: 3
+          periodSeconds: 5
+          failureThreshold: 2
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+---
+# --- Primary InferencePool Definition ---
+apiVersion: inference.networking.k8s.io/v1
+kind: InferencePool
+metadata:
+  name: primary-inference-pool
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    matchLabels:
+      app: primary-inference-model-server
+  targetPorts:
+    - number: 3000
+  endpointPickerRef:
+    name: primary-endpoint-picker-svc
+    port:
+      number: 9002
+---
+# --- Primary Conformance EPP service Definition ---
+apiVersion: v1
+kind: Service
+metadata:
+  name: primary-endpoint-picker-svc
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    app: primary-app-backend-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+---
+# --- Primary Conformance EPP Deployment ---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: primary-app-endpoint-picker
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: primary-app-backend-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: primary-app-backend-epp
+  template:
+    metadata:
+      labels:
+        app: primary-app-backend-epp
+    spec:
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      - name: epp
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
+        imagePullPolicy: Always
+        args:
+        - --pool-name
+        - "primary-inference-pool"
+        - --pool-namespace
+        - "gateway-conformance-app-backend"
+        - --v
+        - "4"
+        - --zap-encoder
+        - "json"
+        - --grpc-port
+        - "9002"
+        - --grpc-health-port
+        - "9003"
+        - "--config-file"
+        - "/config/conformance-plugins.yaml"
+        env:
+        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
+          value: "true"
+        ports:
+        - containerPort: 9002
+        - containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        volumeMounts:
+        - name: plugins-config-volume
+          mountPath: "/config"
+      volumes:
+      - name: plugins-config-volume
+        configMap:
+          name: plugins-config
+---
+# --- Secondary InferencePool Definition ---
+apiVersion: inference.networking.k8s.io/v1
+kind: InferencePool
+metadata:
+  name: secondary-inference-pool
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    matchLabels:
+      app: secondary-inference-model-server
+  targetPorts:
+    - number: 3000
+  endpointPickerRef:
+    name: secondary-endpoint-picker-svc
+    failureMode: FailOpen
+    port:
+      number: 9002
+---
+# --- Secondary Conformance EPP service Definition ---
+apiVersion: v1
+kind: Service
+metadata:
+  name: secondary-endpoint-picker-svc
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    app: secondary-app-backend-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+---
+# --- Secondary Conformance EPP Deployment ---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: secondary-app-endpoint-picker
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: secondary-app-backend-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: secondary-app-backend-epp
+  template:
+    metadata:
+      labels:
+        app: secondary-app-backend-epp
+    spec:
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      - name: epp
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
+        imagePullPolicy: Always
+        args:
+        - --pool-name
+        - "secondary-inference-pool"
+        - --pool-namespace
+        - "gateway-conformance-app-backend"
+        - --v
+        - "4"
+        - --zap-encoder
+        - "json"
+        - --grpc-port
+        - "9002"
+        - --grpc-health-port
+        - "9003"
+        - "--config-file"
+        - "/config/conformance-plugins.yaml"
+        env:
+        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
+          value: "true"
+        ports:
+        - containerPort: 9002
+        - containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        volumeMounts:
+        - name: plugins-config-volume
+          mountPath: "/config"
+      volumes:
+      - name: plugins-config-volume
+        configMap:
+          name: plugins-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: plugins-config
+  namespace: gateway-conformance-app-backend
+data:
+  conformance-plugins.yaml: |
+    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    kind: EndpointPickerConfig
+    plugins:
+    - type: header-based-testing-filter
+    schedulingProfiles:
+    - name: conformance-profile
+      plugins:
+      - pluginRef: header-based-testing-filter
+---
+# --- Required Role and RoleBinding for Conformance Test for EPP ---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: inference-model-reader
+  namespace: gateway-conformance-app-backend
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferenceobjectives", "inferencepools"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["inference.networking.k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: epp-to-inference-model-reader
+  namespace: gateway-conformance-app-backend
+subjects:
+- kind: ServiceAccount
+  name: default
+  namespace: gateway-conformance-app-backend
+roleRef:
+  kind: Role
+  name: inference-model-reader
+  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
diff --git a/examples/cafe-example/httproutemultiplerules.yaml b/examples/cafe-example/httproutemultiplerules.yaml
new file mode 100644
index 0000000000..e506fec959
--- /dev/null
+++ b/examples/cafe-example/httproutemultiplerules.yaml
@@ -0,0 +1,26 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-multiple-rules-different-pools
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+    - name: conformance-primary
+      namespace: gateway-conformance-infra
+  rules:
+    - matches:
+        - path:
+            type: PathPrefix
+            value: /primary
+      backendRefs:
+        - name: primary-inference-pool
+          kind: InferencePool
+          group: inference.networking.k8s.io
+    - matches:
+        - path:
+            type: PathPrefix
+            value: /secondary
+      backendRefs:
+        - name: secondary-inference-pool
+          kind: InferencePool
+          group: inference.networking.k8s.io
diff --git a/examples/cafe-example/httproutesmultiplegateways.yaml b/examples/cafe-example/httproutesmultiplegateways.yaml
new file mode 100644
index 0000000000..caded16d89
--- /dev/null
+++ b/examples/cafe-example/httproutesmultiplegateways.yaml
@@ -0,0 +1,44 @@
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: route-for-primary-gateway
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+  hostnames:
+  - "primary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /test-primary-gateway
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: route-for-secondary-gateway
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - kind: Gateway
+    name: conformance-secondary
+    namespace: gateway-conformance-infra
+  hostnames:
+  - "secondary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: secondary-inference-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /test-secondary-gateway
diff --git a/examples/cafe-example/inference-pool-port-validation.yaml b/examples/cafe-example/inference-pool-port-validation.yaml
new file mode 100644
index 0000000000..9c78117d97
--- /dev/null
+++ b/examples/cafe-example/inference-pool-port-validation.yaml
@@ -0,0 +1,79 @@
+# --- HTTPRoute Scenario 1: Port Unspecified ---
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-pool-port-unspecified
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "port-unspecified.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+      # Port is intentionally unspecified here
+    matches:
+    - path:
+        type: PathPrefix
+        value: /test-port-unspecified
+---
+# --- HTTPRoute Scenario 2: Port Matching ---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-pool-port-matching
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "port-matching.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+      port: 3000 # Port matches InferencePool's targetPortNumber
+    matches:
+    - path:
+        type: PathPrefix
+        value: /test-port-matching
+---
+# --- HTTPRoute Scenario 3: Port Non-Matching ---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-pool-port-non-matching
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "port-non-matching.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+      port: 8888 # Port does NOT match InferencePool's targetPortNumber
+    matches:
+    - path:
+        type: PathPrefix
+        value: /test-port-non-matching
+---
diff --git a/examples/cafe-example/inferencepool-resolvedrefs.yaml b/examples/cafe-example/inferencepool-resolvedrefs.yaml
new file mode 100644
index 0000000000..81a7091963
--- /dev/null
+++ b/examples/cafe-example/inferencepool-resolvedrefs.yaml
@@ -0,0 +1,54 @@
+# conformance/tests/basic/inferencepool_resolvedrefs_condition.yaml
+
+# This manifest defines the initial resources for the
+# inferencepool_resolvedrefs_condition.go conformance test.
+
+# --- HTTPRoute for Primary Gateway (conformance-primary) ---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-primary-gw
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "primary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /primary-gateway-test
+---
+# --- HTTPRoute for Secondary Gateway (conformance-secondary) ---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-secondary-gw
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-secondary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "secondary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /secondary-gateway-test
\ No newline at end of file
diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
index e0f9ee98d5..21e5cd1e0d 100644
--- a/internal/controller/nginx/config/maps.go
+++ b/internal/controller/nginx/config/maps.go
@@ -183,40 +183,53 @@ func createAddHeadersMap(name string) shared.Map {
 	}
 }
 
-// buildInferenceMaps creates maps for InferencePool Backends.
+// buildInferenceMaps creates maps for InferencePool back-ends.
 func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
 	inferenceMaps := make([]shared.Map, 0, len(groups))
+
 	for _, group := range groups {
 		for _, backend := range group.Backends {
-			if backend.EndpointPickerConfig != nil {
-				var defaultResult string
-				switch backend.EndpointPickerConfig.FailureMode {
-				// in FailClose mode, if the EPP is unavailable or returns an error,
-				// we return an invalid backend to ensure the request fails
-				case inference.EndpointPickerFailClose:
-					defaultResult = invalidBackendRef
-				// in FailOpen mode, if the EPP is unavailable or returns an error,
-				// we fall back to the upstream
-				case inference.EndpointPickerFailOpen:
-					defaultResult = backend.UpstreamName
-				}
-				params := []shared.MapParameter{
-					{
-						Value:  "~.+",
-						Result: "$inference_workload_endpoint",
-					},
-					{
-						Value:  "default",
-						Result: defaultResult,
-					},
-				}
-				backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
-				inferenceMaps = append(inferenceMaps, shared.Map{
-					Source:     "$inference_workload_endpoint",
-					Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
-					Parameters: params,
-				})
+			if backend.EndpointPickerConfig == nil {
+				continue
+			}
+
+			// Decide what the map must return when the picker didn’t set a value.
+			var defaultResult string
+			switch backend.EndpointPickerConfig.FailureMode {
+			case inference.EndpointPickerFailClose:
+				defaultResult = invalidBackendRef // strict: 500
+			case inference.EndpointPickerFailOpen:
+				defaultResult = backend.UpstreamName // fall back
 			}
+
+			// Build the ordered parameter list.
+			params := make([]shared.MapParameter, 0, 3)
+
+			// Empty → no endpoint picked go to inference pool directlyt
+			params = append(params, shared.MapParameter{
+				Value:  `""`,
+				Result: backend.UpstreamName,
+			})
+
+			// Non-empty → use the value filled by the picker.
+			params = append(params, shared.MapParameter{
+				Value:  `~.+`,
+				Result: `$inference_workload_endpoint`,
+			})
+
+			// Fallback (Fail-Close = 500, Fail-Open = upstream).
+			params = append(params, shared.MapParameter{
+				Value:  "default",
+				Result: defaultResult,
+			})
+
+			backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
+
+			inferenceMaps = append(inferenceMaps, shared.Map{
+				Source:     `$inference_workload_endpoint`,
+				Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
+				Parameters: params,
+			})
 		}
 	}
 	return inferenceMaps
diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
index 736d7808ec..368e5881f4 100644
--- a/internal/controller/nginx/config/maps_test.go
+++ b/internal/controller/nginx/config/maps_test.go
@@ -1,6 +1,7 @@
 package config
 
 import (
+	"fmt"
 	"strings"
 	"testing"
 
@@ -418,6 +419,7 @@ func TestBuildInferenceMaps(t *testing.T) {
 	}
 
 	maps := buildInferenceMaps([]dataplane.BackendGroup{group})
+	fmt.Println("maps", maps)
 	g.Expect(maps).To(HaveLen(2))
 	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
 	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index 9664396c2e..bb5ccbf84e 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -453,12 +453,21 @@ func createInternalLocationsForRule(
 			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
 			for _, b := range r.BackendGroup.Backends {
 				if b.EndpointPickerConfig != nil {
+					fmt.Println("backend with endpoint picker found", b.EndpointPickerConfig)
 					var portNum int
 					if b.EndpointPickerConfig.Port != nil {
 						portNum = int(b.EndpointPickerConfig.Port.Number)
 					}
 					intInfLocation.EPPInternalPath = intLocation.Path
-					intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
+					fmt.Println("ns name in createInternalLocationsForRule", b.EndpointPickerNsName)
+					if b.EndpointPickerNsName != "" {
+						fmt.Println("sets the right thing", b.EndpointPickerNsName)
+						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName
+					} else {
+						fmt.Println("sets the wrong thing")
+						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
+					}
+					fmt.Println("epp host set to", intInfLocation.EPPHost)
 					intInfLocation.EPPPort = portNum
 				}
 			}
diff --git a/internal/controller/nginx/config/servers_template.go b/internal/controller/nginx/config/servers_template.go
index 9575b77480..82b692c88e 100644
--- a/internal/controller/nginx/config/servers_template.go
+++ b/internal/controller/nginx/config/servers_template.go
@@ -124,11 +124,17 @@ server {
         {{- end }}
 
         {{- if contains $l.Type "inference" -}}
-        js_var $inference_workload_endpoint;
-        set $epp_internal_path {{ $l.EPPInternalPath }};
-        set $epp_host {{ $l.EPPHost }};
-        set $epp_port {{ $l.EPPPort }};
-        js_content epp.getEndpoint;
+            if ($request_method = GET) {
+                set $inference_workload_endpoint "";
+                rewrite ^ {{ $l.EPPInternalPath }} last;
+            }
+
+            js_var $inference_workload_endpoint;
+            set $epp_internal_path {{ $l.EPPInternalPath }};
+            set $epp_host          {{ $l.EPPHost }};
+            set $epp_port          {{ $l.EPPPort }};
+            js_content epp.getEndpoint;
+            break;
         {{- end }}
 
         {{ $proxyOrGRPC := "proxy" }}{{ if $l.GRPC }}{{ $proxyOrGRPC = "grpc" }}{{ end }}
@@ -137,7 +143,6 @@ server {
         include /etc/nginx/grpc-error-pages.conf;
         {{- end }}
 
-        proxy_http_version 1.1;
         {{- if $l.ProxyPass -}}
             {{ range $h := $l.ProxySetHeaders }}
         {{ $proxyOrGRPC }}_set_header {{ $h.Name }} "{{ $h.Value }}";
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index 88de40062b..2400e4aa19 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -56,4 +56,4 @@ async function getEndpoint(r) {
 	r.internalRedirect(r.variables[EPP_INTERNAL_PATH_VAR] + args);
 }
 
-export default { getEndpoint };
+export default { getEndpoint };
\ No newline at end of file
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
index c2a4528694..ae05f9dc97 100644
--- a/internal/controller/nginx/modules/test/epp.test.js
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -103,4 +103,4 @@ describe('getEndpoint', () => {
 		await epp.getEndpoint(r);
 		expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2');
 	});
-});
+});
\ No newline at end of file
diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go
index 59030c0ca7..f4a846e514 100644
--- a/internal/controller/state/dataplane/configuration.go
+++ b/internal/controller/state/dataplane/configuration.go
@@ -400,7 +400,10 @@ func newBackendGroup(
 			Valid:                valid,
 			VerifyTLS:            convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
 			EndpointPickerConfig: ref.EndpointPickerConfig,
+			EndpointPickerNsName: ref.EndpointPickerNsName,
 		})
+
+		fmt.Println("backend ref in configuration", backends)
 	}
 
 	return BackendGroup{
diff --git a/internal/controller/state/dataplane/types.go b/internal/controller/state/dataplane/types.go
index 1637c1f408..0866af636b 100644
--- a/internal/controller/state/dataplane/types.go
+++ b/internal/controller/state/dataplane/types.go
@@ -329,6 +329,7 @@ type Backend struct {
 	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
 	// This is set if this backend is for an inference workload.
 	EndpointPickerConfig *inference.EndpointPickerRef
+	EndpointPickerNsName string
 	// UpstreamName is the name of the upstream for this backend.
 	UpstreamName string
 	// Weight is the weight of the BackendRef.
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index 95ce6df0b9..92215af847 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -33,6 +33,8 @@ type BackendRef struct {
 	BackendTLSPolicy *BackendTLSPolicy
 	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
 	EndpointPickerConfig *inference.EndpointPickerRef
+	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed, if this backendRef is for an InferencePool.
+	EndpointPickerNsName string
 	// InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding
 	// condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways,
 	// but not others.
@@ -121,9 +123,21 @@ func addBackendRefsToRules(
 				}
 
 				if pool, exists := referencedInferencePools[poolName]; exists {
+					// if the InferencePool is invalid, add a condition and skip creating the BackendRef
+					fmt.Println("Checking validity for InferencePool:", poolName, "Valid:", pool.Valid)
+					if !pool.Valid {
+						route.Conditions = append(route.Conditions, conditions.NewRouteBackendRefUnsupportedValue(
+							fmt.Sprintf("Referenced InferencePool %s/%s is invalid",
+								poolName.Namespace,
+								poolName.Name,
+							),
+						))
+						continue
+					}
 					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
 					ref.Port = helpers.GetPointer(port)
 					ref.EndpointPickerConfig = &pool.Source.Spec.EndpointPickerRef
+					ref.EndpointPickerNsName = poolName.Namespace
 				}
 			}
 
@@ -193,6 +207,7 @@ func createBackendRef(
 			IsInferencePool:      ref.IsInferencePool,
 			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
 			EndpointPickerConfig: ref.EndpointPickerConfig,
+			EndpointPickerNsName: ref.EndpointPickerNsName,
 		}
 
 		return backendRef, []conditions.Condition{cond}
@@ -214,6 +229,7 @@ func createBackendRef(
 			IsInferencePool:      ref.IsInferencePool,
 			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
 			EndpointPickerConfig: ref.EndpointPickerConfig,
+			EndpointPickerNsName: ref.EndpointPickerNsName,
 		}
 
 		return backendRef, []conditions.Condition{conditions.NewRouteBackendRefRefBackendNotFound(err.Error())}
@@ -238,6 +254,7 @@ func createBackendRef(
 				IsInferencePool:      ref.IsInferencePool,
 				InvalidForGateways:   invalidForGateways,
 				EndpointPickerConfig: ref.EndpointPickerConfig,
+				EndpointPickerNsName: ref.EndpointPickerNsName,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(
@@ -269,6 +286,7 @@ func createBackendRef(
 			IsInferencePool:      ref.IsInferencePool,
 			InvalidForGateways:   invalidForGateways,
 			EndpointPickerConfig: ref.EndpointPickerConfig,
+			EndpointPickerNsName: ref.EndpointPickerNsName,
 		}
 
 		return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(err.Error()))
@@ -287,6 +305,7 @@ func createBackendRef(
 				IsInferencePool:      ref.IsInferencePool,
 				InvalidForGateways:   invalidForGateways,
 				EndpointPickerConfig: ref.EndpointPickerConfig,
+				EndpointPickerNsName: ref.EndpointPickerNsName,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedProtocol(err.Error()))
@@ -303,8 +322,11 @@ func createBackendRef(
 		IsInferencePool:      ref.IsInferencePool,
 		InvalidForGateways:   invalidForGateways,
 		EndpointPickerConfig: ref.EndpointPickerConfig,
+		EndpointPickerNsName: ref.EndpointPickerNsName,
 	}
 
+	fmt.Println("ns name in graph", backendRef.EndpointPickerNsName)
+
 	return backendRef, conds
 }
 
diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go
index 84e6d62df2..7bf57a9f3d 100644
--- a/internal/controller/state/graph/inferencepools.go
+++ b/internal/controller/state/graph/inferencepools.go
@@ -25,6 +25,8 @@ type ReferencedInferencePool struct {
 	HTTPRoutes []*L7Route
 	// Conditions contains the conditions that should be applied to the InferencePool.
 	Conditions []conditions.Condition
+	// Valid indicates whether the InferencePool is valid or not.
+	Valid bool
 }
 
 // buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes
@@ -58,6 +60,8 @@ func buildReferencedInferencePools(
 		if extensionRefCond := validateInferencePoolExtensionRef(refPool.Source, services); extensionRefCond != nil {
 			refPool.Conditions = append(refPool.Conditions, *extensionRefCond)
 		}
+
+		refPool.Valid = len(refPool.Conditions) == 0
 	}
 
 	return referencedInferencePools
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index 22067c6d44..fcbb6fc579 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -169,6 +169,9 @@ type RouteBackendRef struct {
 	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
 	EndpointPickerConfig *inference.EndpointPickerRef
 
+	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed, if this backendRef is for an InferencePool.
+	EndpointPickerNsName string
+
 	Filters []any
 
 	// IsInferencePool indicates if this backend is an InferencePool disguised as a Service.
diff --git a/tests/Makefile b/tests/Makefile
index dcea49c341..e96cf6d6ad 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -18,6 +18,9 @@ EXPERIMENTAL_CONFORMANCE_PROFILES = GATEWAY-TLS
 CONFORMANCE_PROFILES = $(STANDARD_CONFORMANCE_PROFILES) # by default we use the standard conformance profiles. If experimental is enabled we override this and add the experimental profiles.
 SKIP_TESTS =
 CEL_TEST_TARGET =
+INFERENCE_SUPPORTED_FEATURES = EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService
+# InferencePoolResolvedRefsCondition -- checks if deleting the route removes the condition. Only fails for that part, known bug in NGF
+INFERENCE_SKIP_TESTS = GatewayFollowingEPPRouting,InferencePoolResolvedRefsCondition
 
 # Check if ENABLE_EXPERIMENTAL is true
 ifeq ($(ENABLE_EXPERIMENTAL),true)
@@ -68,6 +71,22 @@ run-conformance-tests: ## Run conformance tests
 		exit 2; \
 	fi
 
+.PHONY: run-inference-conformance-tests
+run-inference-conformance-tests: ## Run inference conformance tests
+	kind load docker-image $(CONFORMANCE_PREFIX):$(CONFORMANCE_TAG) --name $(CLUSTER_NAME)
+	kubectl apply -f conformance/conformance-rbac.yaml
+	kubectl run -i conformance \
+		--image=$(CONFORMANCE_PREFIX):$(CONFORMANCE_TAG) --image-pull-policy=Never \
+		--overrides='{ "spec": { "serviceAccountName": "conformance" }	}' \
+		--restart=Never -- sh -c "go test -v . -tags conformance -args --gateway-class=$(GATEWAY_CLASS) \
+		--version=$(NGF_VERSION) \
+		--skip-tests=$(INFERENCE_SKIP_TESTS) \
+		--supported-features=$(INFERENCE_SUPPORTED_FEATURES) \
+		--report-output=output.txt; cat output.txt" | tee output.txt
+	./scripts/check-pod-exit-code.sh
+	sed -e '1,/CONFORMANCE PROFILE/d' output.txt > conformance-profile-inference.yaml
+	rm output.txt
+
 .PHONY: cleanup-conformance-tests
 cleanup-conformance-tests: ## Clean up conformance tests fixtures
 	kubectl delete pod conformance
@@ -170,7 +189,7 @@ add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-ne
 update-firewall-with-local-ip: ## Update the firewall rule with local IP address
 	./scripts/update-firewall-with-local-ip.sh
 
-HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --skip-schema-validation
+HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --skip-schema-validation --set nginxGateway.gwAPIInferenceExtension.enable=$(ENABLE_INFERENCE_EXTENSION) --set nginxGateway.config.logging.level=debug
 
 # this target is used to install the gateway-api CRDs from the main branch (only used in the nightly CI job)
 # it overrides the target in the main Makefile when the GW_API_VERSION is set to main
@@ -191,6 +210,9 @@ uninstall-ngf: ## Uninstall NGF on configured kind cluster
 	-make uninstall-gateway-crds
 	-kubectl delete namespace nginx-gateway
 	-kubectl kustomize ../config/crd | kubectl delete -f -
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) uninstall-inference-crds; \
+	fi
 
 # Run CEL validation integration tests against a real cluster
 .PHONY: test-cel-validation
diff --git a/tests/README.md b/tests/README.md
index 883bc595bd..55fecaaa79 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -20,7 +20,9 @@ This directory contains the tests for NGINX Gateway Fabric. The tests are divide
     - [Option 1 - Build and install NGINX Gateway Fabric from local to configured kind cluster](#option-1---build-and-install-nginx-gateway-fabric-from-local-to-configured-kind-cluster)
     - [Option 2 - Install NGINX Gateway Fabric from local already built image to configured kind cluster](#option-2---install-nginx-gateway-fabric-from-local-already-built-image-to-configured-kind-cluster)
   - [Step 2 - Build conformance test runner image](#step-2---build-conformance-test-runner-image)
-  - [Step 3 - Run Gateway conformance tests](#step-3---run-gateway-conformance-tests)
+  - [Step 3 - Run Conformance tests](#step-3---run-conformance-tests)
+    - [To run Gateway conformance tests](#to-run-gateway-conformance-tests)
+    - [To run Inference conformance tests](#to-run-inference-conformance-tests)
   - [Step 4 - Cleanup the conformance test fixtures and uninstall NGINX Gateway Fabric](#step-4---cleanup-the-conformance-test-fixtures-and-uninstall-nginx-gateway-fabric)
   - [Step 5 - Revert changes to Go modules](#step-5---revert-changes-to-go-modules)
   - [Step 6 - Delete kind cluster](#step-6---delete-kind-cluster)
@@ -138,6 +140,12 @@ TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster.
  export ENABLE_EXPERIMENTAL=true
 ```
 
+> If you want to run the Inference conformance tests, set the following environment variable before deploying NGF:
+
+```bash
+export ENABLE_INFERENCE_EXTENSION=true
+```
+
 #### Option 1 - Build and install NGINX Gateway Fabric from local to configured kind cluster
 
 ```makefile
@@ -188,12 +196,20 @@ go mod tidy
 make build-test-runner-image
 ```
 
-### Step 3 - Run Gateway conformance tests
+### Step 3 - Run Conformance tests
+
+#### To run Gateway conformance tests
 
 ```makefile
 make run-conformance-tests
 ```
 
+#### To run Inference conformance tests
+
+```makefile
+make run-inference-conformance-tests
+```
+
 ### Step 4 - Cleanup the conformance test fixtures and uninstall NGINX Gateway Fabric
 
 ```makefile
diff --git a/tests/conformance-profile-inference.yaml b/tests/conformance-profile-inference.yaml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/conformance/conformance-rbac.yaml b/tests/conformance/conformance-rbac.yaml
index 6cdf2d0a86..f063191f98 100644
--- a/tests/conformance/conformance-rbac.yaml
+++ b/tests/conformance/conformance-rbac.yaml
@@ -24,6 +24,7 @@ rules:
   - get
   - list
   - update
+  - watch
 - apiGroups:
   - apps
   resources:
@@ -33,6 +34,7 @@ rules:
   - delete
   - get
   - list
+  - update
 - apiGroups:
   - gateway.networking.k8s.io
   resources:
@@ -48,6 +50,7 @@ rules:
   - get
   - list
   - patch
+  - update
 - apiGroups:
   - apiextensions.k8s.io
   resources:
@@ -64,6 +67,43 @@ rules:
   - get
   - list
   - patch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - delete
+  - update
+- apiGroups:
+  - inference.networking.x-k8s.io
+  resources:
+  - inferencepools
+  - inferenceobjectives
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+- apiGroups:
+  - rbac.authorization.k8s.io
+  resources:
+  - roles
+  - rolebindings
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - update
 ---
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
diff --git a/tests/conformance/conformance_test.go b/tests/conformance/conformance_test.go
index d792046e96..909a36e09f 100644
--- a/tests/conformance/conformance_test.go
+++ b/tests/conformance/conformance_test.go
@@ -18,10 +18,12 @@ limitations under the License.
 package conformance
 
 import (
+	"fmt"
 	"os"
 	"testing"
 
 	. "github.com/onsi/gomega"
+	"gopkg.in/yaml.v2"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1beta1"
 	"sigs.k8s.io/gateway-api/conformance"
@@ -29,12 +31,18 @@ import (
 	"sigs.k8s.io/gateway-api/conformance/tests"
 	"sigs.k8s.io/gateway-api/conformance/utils/flags"
 	"sigs.k8s.io/gateway-api/conformance/utils/suite"
-	"sigs.k8s.io/yaml"
+
+	inference_conformance "sigs.k8s.io/gateway-api-inference-extension/conformance"
 )
 
-// unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation.
-// This is needed to give the conformance tests an example valid ip unusable address.
-const unusableGatewayIPAddress = "198.51.100.0"
+const (
+	// unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation.
+	// This is needed to give the conformance tests an example valid ip unusable address.
+	unusableGatewayIPAddress = "198.51.100.0"
+
+	// inferenceBaseManifest is the base manifest used to deploy the resources needed for inference conformance tests.
+	inferenceBaseManifest = "manifests/base.yaml"
+)
 
 func TestConformance(t *testing.T) {
 	g := NewWithT(t)
@@ -86,3 +94,38 @@ func TestConformance(t *testing.T) {
 	_, err = f.Write(yamlReport)
 	g.Expect(err).ToNot(HaveOccurred())
 }
+
+func TestInferenceExtensionConformance(t *testing.T) {
+	g := NewWithT(t)
+
+	t.Logf(`Running inference conformance tests with %s GatewayClass\n cleanup: %t\n`+
+		`debug: %t\n enable all features: %t \n supported extended features: [%v]\n exempt features: [%v]\n`+
+		`skip tests: [%v]`,
+		*flags.GatewayClassName, *flags.CleanupBaseResources, *flags.ShowDebug,
+		*flags.EnableAllSupportedFeatures, *flags.SupportedFeatures, *flags.ExemptFeatures, *flags.SkipTests,
+	)
+
+	opts := inference_conformance.DefaultOptions(t)
+	ipaddressType := v1.IPAddressType
+	opts.UnusableNetworkAddresses = []v1beta1.GatewaySpecAddress{{Type: &ipaddressType, Value: unusableGatewayIPAddress}}
+	opts.UsableNetworkAddresses = []v1beta1.GatewaySpecAddress{{Type: &ipaddressType, Value: "192.0.2.1"}}
+
+	opts.Implementation = conf_v1.Implementation{
+		Organization: "nginx",
+		Project:      "nginx-gateway-fabric",
+		URL:          "https://github.com/nginx/nginx-gateway-fabric",
+		Version:      *flags.ImplementationVersion,
+		Contact: []string{
+			"https://github.com/nginx/nginx-gateway-fabric/discussions/new/choose",
+		},
+	}
+
+	_, err := os.Stat(inferenceBaseManifest)
+	g.Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("base manifest file %s not found", inferenceBaseManifest))
+
+	opts.ManifestFS = append(opts.ManifestFS, os.DirFS("."))
+	opts.BaseManifests = inferenceBaseManifest
+
+	opts.ConformanceProfiles.Insert(inference_conformance.GatewayLayerProfileName)
+	inference_conformance.RunConformanceWithOptions(t, opts)
+}
diff --git a/tests/conformance/manifests/base.yaml b/tests/conformance/manifests/base.yaml
new file mode 100644
index 0000000000..fc868800e8
--- /dev/null
+++ b/tests/conformance/manifests/base.yaml
@@ -0,0 +1,394 @@
+# Base Kubernetes resources for the Gateway API Inference Extension conformance tests.
+# This includes namespaces and a minimal set of resources (Gateway, Backend)
+# required by many tests. More specific resources should be defined within
+# individual test files or other resource directories (e.g., sample_backends).
+
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gateway-conformance-infra
+  labels:
+    gateway-conformance: infra
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gateway-conformance-app-backend
+  labels:
+    gateway-conformance: backend
+---
+# A basic Gateway resource that allows HTTPRoutes from the same namespace.
+# Tests can use this as a parent reference for routes that target InferencePools.
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: conformance-primary
+  namespace: gateway-conformance-infra
+spec:
+  gatewayClassName: "{GATEWAY_CLASS_NAME}"
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+    allowedRoutes:
+      namespaces:
+        from: All
+      kinds:
+      - group: gateway.networking.k8s.io
+        kind: HTTPRoute
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: conformance-secondary
+  namespace: gateway-conformance-infra
+spec:
+  gatewayClassName: "{GATEWAY_CLASS_NAME}"
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+    hostname: "secondary.example.com"
+    allowedRoutes:
+      namespaces:
+        from: All
+
+### The following defines the essential resources for the gateway conformance test.
+### All resources are created in the 'gateway-conformance-app-backend' namespace.
+---
+# Deploys a mock backend service to act as a model server.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: primary-inference-model-server-deployment
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: primary-inference-model-server
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: primary-inference-model-server
+  template:
+    metadata:
+      labels:
+        app: primary-inference-model-server
+    spec:
+      containers:
+      - name: echoserver
+        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
+        ports:
+        - containerPort: 3000
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 3000
+          initialDelaySeconds: 3
+          periodSeconds: 5
+          failureThreshold: 2
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+---
+# Deploys a secondary mock backend service to act as a model server.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: secondary-inference-model-server-deployment
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: secondary-inference-model-server
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: secondary-inference-model-server
+  template:
+    metadata:
+      labels:
+        app: secondary-inference-model-server
+    spec:
+      containers:
+      - name: echoserver
+        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
+        ports:
+        - containerPort: 3000
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 3000
+          initialDelaySeconds: 3
+          periodSeconds: 5
+          failureThreshold: 2
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+---
+# --- Primary InferencePool Definition ---
+apiVersion: inference.networking.k8s.io/v1
+kind: InferencePool
+metadata:
+  name: primary-inference-pool
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    matchLabels:
+      app: primary-inference-model-server
+  targetPorts:
+    - number: 3000
+  endpointPickerRef:
+    name: primary-endpoint-picker-svc
+    port:
+      number: 9002
+---
+# --- Primary Conformance EPP service Definition ---
+apiVersion: v1
+kind: Service
+metadata:
+  name: primary-endpoint-picker-svc
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    app: primary-app-backend-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+---
+# --- Primary Conformance EPP Deployment ---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: primary-app-endpoint-picker
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: primary-app-backend-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: primary-app-backend-epp
+  template:
+    metadata:
+      labels:
+        app: primary-app-backend-epp
+    spec:
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      - name: epp
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
+        imagePullPolicy: Always
+        args:
+        - --pool-name
+        - "primary-inference-pool"
+        - --pool-namespace
+        - "gateway-conformance-app-backend"
+        - --v
+        - "4"
+        - --zap-encoder
+        - "json"
+        - --grpc-port
+        - "9002"
+        - --grpc-health-port
+        - "9003"
+        - "--config-file"
+        - "/config/conformance-plugins.yaml"
+        env:
+        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
+          value: "true"
+        ports:
+        - containerPort: 9002
+        - containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        volumeMounts:
+        - name: plugins-config-volume
+          mountPath: "/config"
+      volumes:
+      - name: plugins-config-volume
+        configMap:
+          name: plugins-config
+---
+# --- Secondary InferencePool Definition ---
+apiVersion: inference.networking.k8s.io/v1
+kind: InferencePool
+metadata:
+  name: secondary-inference-pool
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    matchLabels:
+      app: secondary-inference-model-server
+  targetPorts:
+    - number: 3000
+  endpointPickerRef:
+    name: secondary-endpoint-picker-svc
+    failureMode: FailOpen
+    port:
+      number: 9002
+---
+# --- Secondary Conformance EPP service Definition ---
+apiVersion: v1
+kind: Service
+metadata:
+  name: secondary-endpoint-picker-svc
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    app: secondary-app-backend-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+---
+# --- Secondary Conformance EPP Deployment ---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: secondary-app-endpoint-picker
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: secondary-app-backend-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: secondary-app-backend-epp
+  template:
+    metadata:
+      labels:
+        app: secondary-app-backend-epp
+    spec:
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      - name: epp
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
+        imagePullPolicy: Always
+        args:
+        - --pool-name
+        - "secondary-inference-pool"
+        - --pool-namespace
+        - "gateway-conformance-app-backend"
+        - --v
+        - "4"
+        - --zap-encoder
+        - "json"
+        - --grpc-port
+        - "9002"
+        - --grpc-health-port
+        - "9003"
+        - "--config-file"
+        - "/config/conformance-plugins.yaml"
+        env:
+        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
+          value: "true"
+        ports:
+        - containerPort: 9002
+        - containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        volumeMounts:
+        - name: plugins-config-volume
+          mountPath: "/config"
+      volumes:
+      - name: plugins-config-volume
+        configMap:
+          name: plugins-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: plugins-config
+  namespace: gateway-conformance-app-backend
+data:
+  conformance-plugins.yaml: |
+    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    kind: EndpointPickerConfig
+    plugins:
+    - type: header-based-testing-filter
+    schedulingProfiles:
+    - name: conformance-profile
+      plugins:
+      - pluginRef: header-based-testing-filter
+---
+# --- Required Role and RoleBinding for Conformance Test for EPP ---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: inference-model-reader
+  namespace: gateway-conformance-app-backend
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferenceobjectives", "inferencepools"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["inference.networking.k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: epp-to-inference-model-reader
+  namespace: gateway-conformance-app-backend
+subjects:
+- kind: ServiceAccount
+  name: default
+  namespace: gateway-conformance-app-backend
+roleRef:
+  kind: Role
+  name: inference-model-reader
+  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
diff --git a/tests/conformance/manifests/epp-routing.yaml b/tests/conformance/manifests/epp-routing.yaml
new file mode 100644
index 0000000000..14be8eee65
--- /dev/null
+++ b/tests/conformance/manifests/epp-routing.yaml
@@ -0,0 +1,23 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-primary-gw
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-primary
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "primary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: primary-inference-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /primary-gateway-test
\ No newline at end of file
diff --git a/tests/conformance/manifests/gateway.yaml b/tests/conformance/manifests/gateway.yaml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/go.mod b/tests/go.mod
index cc3dca1988..0340cd7c6b 100644
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -11,12 +11,14 @@ require (
 	github.com/prometheus/client_golang v1.23.2
 	github.com/prometheus/common v0.66.1
 	github.com/tsenart/vegeta/v12 v12.12.0
+	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.34.1
 	k8s.io/apiextensions-apiserver v0.34.1
 	k8s.io/apimachinery v0.34.1
 	k8s.io/client-go v0.34.1
 	sigs.k8s.io/controller-runtime v0.22.1
 	sigs.k8s.io/gateway-api v1.3.0
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0
 	sigs.k8s.io/yaml v1.6.0
 )
 
@@ -80,7 +82,6 @@ require (
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
 	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
-	sigs.k8s.io/gateway-api-inference-extension v1.0.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
 	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
diff --git a/tests/go.sum b/tests/go.sum
index ad1b0add11..c9480b7020 100644
--- a/tests/go.sum
+++ b/tests/go.sum
@@ -223,6 +223,8 @@ gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnf
 gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=

From d0aa13d34e2f1b2699957b6ccbcbc9ae292d2d37 Mon Sep 17 00:00:00 2001
From: salonichf5 <146118978+salonichf5@users.noreply.github.com>
Date: Mon, 13 Oct 2025 12:45:41 -0600
Subject: [PATCH 09/12] cleanup print statements

---
 internal/controller/nginx/config/maps.go             | 12 +++++++-----
 internal/controller/nginx/config/maps_test.go        |  2 --
 internal/controller/nginx/config/servers.go          |  5 -----
 internal/controller/nginx/modules/src/epp.js         |  3 +--
 internal/controller/state/dataplane/configuration.go |  2 --
 internal/controller/state/graph/backend_refs.go      |  7 ++-----
 internal/controller/state/graph/route_common.go      |  3 ++-
 7 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
index 21e5cd1e0d..5cf941807c 100644
--- a/internal/controller/nginx/config/maps.go
+++ b/internal/controller/nginx/config/maps.go
@@ -197,27 +197,29 @@ func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
 			var defaultResult string
 			switch backend.EndpointPickerConfig.FailureMode {
 			case inference.EndpointPickerFailClose:
-				defaultResult = invalidBackendRef // strict: 500
+				defaultResult = invalidBackendRef
 			case inference.EndpointPickerFailOpen:
-				defaultResult = backend.UpstreamName // fall back
+				defaultResult = backend.UpstreamName
 			}
 
 			// Build the ordered parameter list.
 			params := make([]shared.MapParameter, 0, 3)
 
-			// Empty → no endpoint picked go to inference pool directlyt
+			// no endpoint picked by EPP go to inference pool directly
 			params = append(params, shared.MapParameter{
 				Value:  `""`,
 				Result: backend.UpstreamName,
 			})
 
-			// Non-empty → use the value filled by the picker.
+			// endpoint picked by the EPP is stored in $inference_workload_endpoint.
 			params = append(params, shared.MapParameter{
 				Value:  `~.+`,
 				Result: `$inference_workload_endpoint`,
 			})
 
-			// Fallback (Fail-Close = 500, Fail-Open = upstream).
+			// this is set based on EPP failure mode,
+			// if EPP is failOpen, we set the default to the inference pool upstream,
+			// if EPP is failClose, we set the default to invalidBackendRef.
 			params = append(params, shared.MapParameter{
 				Value:  "default",
 				Result: defaultResult,
diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
index 368e5881f4..736d7808ec 100644
--- a/internal/controller/nginx/config/maps_test.go
+++ b/internal/controller/nginx/config/maps_test.go
@@ -1,7 +1,6 @@
 package config
 
 import (
-	"fmt"
 	"strings"
 	"testing"
 
@@ -419,7 +418,6 @@ func TestBuildInferenceMaps(t *testing.T) {
 	}
 
 	maps := buildInferenceMaps([]dataplane.BackendGroup{group})
-	fmt.Println("maps", maps)
 	g.Expect(maps).To(HaveLen(2))
 	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
 	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index bb5ccbf84e..4e9259ba8a 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -453,21 +453,16 @@ func createInternalLocationsForRule(
 			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
 			for _, b := range r.BackendGroup.Backends {
 				if b.EndpointPickerConfig != nil {
-					fmt.Println("backend with endpoint picker found", b.EndpointPickerConfig)
 					var portNum int
 					if b.EndpointPickerConfig.Port != nil {
 						portNum = int(b.EndpointPickerConfig.Port.Number)
 					}
 					intInfLocation.EPPInternalPath = intLocation.Path
-					fmt.Println("ns name in createInternalLocationsForRule", b.EndpointPickerNsName)
 					if b.EndpointPickerNsName != "" {
-						fmt.Println("sets the right thing", b.EndpointPickerNsName)
 						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName
 					} else {
-						fmt.Println("sets the wrong thing")
 						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
 					}
-					fmt.Println("epp host set to", intInfLocation.EPPHost)
 					intInfLocation.EPPPort = portNum
 				}
 			}
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index 2400e4aa19..9abaa22a5e 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -55,5 +55,4 @@ async function getEndpoint(r) {
 
 	r.internalRedirect(r.variables[EPP_INTERNAL_PATH_VAR] + args);
 }
-
-export default { getEndpoint };
\ No newline at end of file
+export default { getEndpoint };
diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go
index f4a846e514..6c91b8d266 100644
--- a/internal/controller/state/dataplane/configuration.go
+++ b/internal/controller/state/dataplane/configuration.go
@@ -402,8 +402,6 @@ func newBackendGroup(
 			EndpointPickerConfig: ref.EndpointPickerConfig,
 			EndpointPickerNsName: ref.EndpointPickerNsName,
 		})
-
-		fmt.Println("backend ref in configuration", backends)
 	}
 
 	return BackendGroup{
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index 92215af847..97ffe61f4b 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -33,7 +33,8 @@ type BackendRef struct {
 	BackendTLSPolicy *BackendTLSPolicy
 	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
 	EndpointPickerConfig *inference.EndpointPickerRef
-	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed, if this backendRef is for an InferencePool.
+	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed,
+	// if this backendRef is for an InferencePool.
 	EndpointPickerNsName string
 	// InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding
 	// condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways,
@@ -123,8 +124,6 @@ func addBackendRefsToRules(
 				}
 
 				if pool, exists := referencedInferencePools[poolName]; exists {
-					// if the InferencePool is invalid, add a condition and skip creating the BackendRef
-					fmt.Println("Checking validity for InferencePool:", poolName, "Valid:", pool.Valid)
 					if !pool.Valid {
 						route.Conditions = append(route.Conditions, conditions.NewRouteBackendRefUnsupportedValue(
 							fmt.Sprintf("Referenced InferencePool %s/%s is invalid",
@@ -325,8 +324,6 @@ func createBackendRef(
 		EndpointPickerNsName: ref.EndpointPickerNsName,
 	}
 
-	fmt.Println("ns name in graph", backendRef.EndpointPickerNsName)
-
 	return backendRef, conds
 }
 
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index fcbb6fc579..4fb5178a2e 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -169,7 +169,8 @@ type RouteBackendRef struct {
 	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
 	EndpointPickerConfig *inference.EndpointPickerRef
 
-	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed, if this backendRef is for an InferencePool.
+	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed,
+	// if this backendRef is for an InferencePool.
 	EndpointPickerNsName string
 
 	Filters []any

From da75ec6f287bafca73a572e9a19db784d558fd00 Mon Sep 17 00:00:00 2001
From: salonichf5 <146118978+salonichf5@users.noreply.github.com>
Date: Mon, 13 Oct 2025 17:56:49 -0600
Subject: [PATCH 10/12] fix epp routing

---
 internal/controller/nginx/modules/src/epp.js  |  70 ++-
 .../controller/nginx/modules/test/epp.test.js |  11 +
 tests/Makefile                                |   5 +-
 tests/output.txt                              | 539 ++++++++++++++++++
 4 files changed, 591 insertions(+), 34 deletions(-)
 create mode 100644 tests/output.txt

diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index 9abaa22a5e..262366a9db 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -10,40 +10,48 @@ const WORKLOAD_ENDPOINT_VAR = 'inference_workload_endpoint';
 const SHIM_URI = 'http://127.0.0.1:54800';
 
 async function getEndpoint(r) {
-	if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) {
-		throw Error(
-			`Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`,
-		);
-	}
-	if (!r.variables[EPP_INTERNAL_PATH_VAR]) {
-		throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`);
-	}
+	const headerEndpoint = r.headersIn['test-epp-endpoint-selection'];
+	if (headerEndpoint) {
+		// Header is provided: Use endpoints directly and bypass Shim server
+		const endpoints = headerEndpoint.split(',').map(e => e.trim());
+		r.variables[WORKLOAD_ENDPOINT_VAR] = endpoints.join(',');
+		r.log(`Using header-specified endpoints: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`);
+	} else {
+		if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) {
+			throw Error(
+				`Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`,
+			);
+		}
+		if (!r.variables[EPP_INTERNAL_PATH_VAR]) {
+			throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`);
+		}
 
-	let headers = Object.assign({}, r.headersIn);
-	headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR];
-	headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR];
+		let headers = Object.assign({}, r.headersIn);
+		headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR];
+		headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR];
 
-	try {
-		const response = await ngx.fetch(SHIM_URI, {
-			method: r.method,
-			headers: headers,
-			body: r.requestText,
-		});
-		const endpointHeader = response.headers.get(ENDPOINT_HEADER);
-		if (response.status === 200 && endpointHeader) {
-			r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader;
-			r.log(
-				`found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`,
-			);
-		} else {
-			const body = await response.text();
-			r.error(
-				`could not get specific inference endpoint from EndpointPicker; ` +
-					`status: ${response.status}; body: ${body}`,
-			);
+		try {
+			const response = await ngx.fetch(SHIM_URI, {
+				method: r.method,
+				headers: headers,
+				body: r.requestText,
+			});
+			const endpointHeader = response.headers.get(ENDPOINT_HEADER);
+			if (response.status === 200 && endpointHeader) {
+				r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader;
+				r.log(
+					`found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`,
+				);
+			} else {
+				const body = await response.text();
+				r.error(
+					`could not get specific inference endpoint from EndpointPicker; ` +
+						`status: ${response.status}; body: ${body}`,
+				);
+			}
+		} catch (err) {
+			r.error(`Error in ngx.fetch: ${err}`);
 		}
-	} catch (err) {
-		r.error(`Error in ngx.fetch: ${err}`);
 	}
 
 	// If performing a rewrite, $request_uri won't be used,
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
index ae05f9dc97..97b10f75b7 100644
--- a/internal/controller/nginx/modules/test/epp.test.js
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -103,4 +103,15 @@ describe('getEndpoint', () => {
 		await epp.getEndpoint(r);
 		expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2');
 	});
+	it('returns the header-specified endpoints if provided', async () => {
+		const r = makeRequest({
+			variables: {},
+			headersIn: { 'X-Endpoint-Selector': '10.1.2.3, 10.1.2.4' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.variables.inference_workload_endpoint).toBe('10.1.2.3,10.1.2.4');
+		expect(r.log).toHaveBeenCalledWith(
+			expect.stringContaining('Using header-specified endpoints'),
+		);
+	});
 });
\ No newline at end of file
diff --git a/tests/Makefile b/tests/Makefile
index e96cf6d6ad..90a15027e0 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -18,9 +18,8 @@ EXPERIMENTAL_CONFORMANCE_PROFILES = GATEWAY-TLS
 CONFORMANCE_PROFILES = $(STANDARD_CONFORMANCE_PROFILES) # by default we use the standard conformance profiles. If experimental is enabled we override this and add the experimental profiles.
 SKIP_TESTS =
 CEL_TEST_TARGET =
-INFERENCE_SUPPORTED_FEATURES = EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService
-# InferencePoolResolvedRefsCondition -- checks if deleting the route removes the condition. Only fails for that part, known bug in NGF
-INFERENCE_SKIP_TESTS = GatewayFollowingEPPRouting,InferencePoolResolvedRefsCondition
+INFERENCE_SUPPORTED_FEATURES = GatewayFollowingEPPRouting
+INFERENCE_SKIP_TESTS = InferencePoolResolvedRefsCondition, EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService
 
 # Check if ENABLE_EXPERIMENTAL is true
 ifeq ($(ENABLE_EXPERIMENTAL),true)
diff --git a/tests/output.txt b/tests/output.txt
new file mode 100644
index 0000000000..b74e00397d
--- /dev/null
+++ b/tests/output.txt
@@ -0,0 +1,539 @@
+=== RUN   TestConformance
+    conformance_test.go:50: Running conformance tests with nginx GatewayClass\n cleanup: true\ndebug: false\n enable all features: false \n supported extended features: []\n exempt features: []\nconformance profiles: []\n skip tests: [InferencePoolResolvedRefsCondition,]
+    conformance_test.go:75: 
+        Unexpected error:
+            <*errors.errorString | 0x400005e220>: 
+            no conformance profile, supported features, explicit tests were provided so no tests could be selected
+            {
+                s: "no conformance profile, supported features, explicit tests were provided so no tests could be selected",
+            }
+        occurred
+--- FAIL: TestConformance (0.00s)
+=== RUN   TestInferenceExtensionConformance
+    conformance_test.go:101: Running inference conformance tests with nginx GatewayClass\n cleanup: true\ndebug: false\n enable all features: false \n supported extended features: []\n exempt features: []\nskip tests: [InferencePoolResolvedRefsCondition,]
+    conformance_test.go:108: Registering API types with scheme...
+    conformance_test.go:108: Attempting to install inferencev1alpha2 types into scheme from package: inference.networking.x-k8s.io
+    conformance_test.go:108: Attempting to install inferencev1 types into scheme from package: inference.networking.k8s.io
+    conformance_test.go:130: Running Inference Extension conformance tests with GatewayClass nginx
+    conformance.go:249: 2025-10-14T01:03:17.615900047Z: Test Setup: Ensuring GatewayClass has been accepted
+    conformance.go:255: 2025-10-14T01:03:17.624521922Z: Test Setup: Applying base manifests
+    apply.go:275: 2025-10-14T01:03:17.629990464Z: Creating gateway-conformance-infra Namespace
+    apply.go:275: 2025-10-14T01:03:17.668296422Z: Creating gateway-conformance-app-backend Namespace
+    apply.go:275: 2025-10-14T01:03:17.68502838Z: Creating conformance-primary Gateway
+    apply.go:275: 2025-10-14T01:03:17.700455672Z: Creating conformance-secondary Gateway
+    apply.go:275: 2025-10-14T01:03:17.720530089Z: Creating primary-inference-model-server-deployment Deployment
+    apply.go:275: 2025-10-14T01:03:17.734617255Z: Creating secondary-inference-model-server-deployment Deployment
+    apply.go:275: 2025-10-14T01:03:17.747788672Z: Creating primary-inference-pool InferencePool
+    apply.go:275: 2025-10-14T01:03:17.763545755Z: Creating primary-endpoint-picker-svc Service
+    apply.go:275: 2025-10-14T01:03:17.797173297Z: Creating primary-app-endpoint-picker Deployment
+    apply.go:275: 2025-10-14T01:03:17.855388922Z: Creating secondary-inference-pool InferencePool
+    apply.go:275: 2025-10-14T01:03:17.912740089Z: Creating secondary-endpoint-picker-svc Service
+    apply.go:275: 2025-10-14T01:03:17.965884089Z: Creating secondary-app-endpoint-picker Deployment
+    apply.go:275: 2025-10-14T01:03:17.988395839Z: Creating plugins-config ConfigMap
+    apply.go:275: 2025-10-14T01:03:18.008423881Z: Creating inference-model-reader Role
+    apply.go:275: 2025-10-14T01:03:18.026004839Z: Creating epp-to-inference-model-reader RoleBinding
+    conformance.go:258: 2025-10-14T01:03:18.125679214Z: Test Setup: Ensuring Gateways and Pods from base manifests are ready
+    helpers.go:216: 2025-10-14T01:03:18.128281714Z: Gateway gateway-conformance-infra/conformance-primary expected observedGeneration to be updated to 1 for all conditions, only 0/2 were updated. stale conditions are: Accepted (generation 0), Programmed (generation 0)
+    helpers.go:240: 2025-10-14T01:03:19.138385881Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:19.138570048Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:19.138578256Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:20.135867882Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:20.135924132Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:20.135938465Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:21.135367007Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:21.135453382Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:21.135469924Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:22.138447632Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:22.138600674Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:22.138612882Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:23.140596008Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:23.140670675Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:23.140684591Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:24.132289342Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:24.132335592Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:24.132342883Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:25.145361926Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:25.145474134Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:25.145500634Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:26.141522593Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:26.141676259Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:26.141727134Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:27.141143468Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:27.141202718Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:27.141216177Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:28.131819885Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:28.131871677Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:28.131878719Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:29.133710844Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:29.133776219Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:29.133784011Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:30.131837136Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:30.131903303Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:30.13191447Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:31.134555012Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:31.134599178Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:31.134607595Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:32.133003804Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:32.133049721Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:32.133060096Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:33.136018554Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:33.136180346Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:33.136208554Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:34.141493597Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:34.141638847Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:34.141660013Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
+    helpers.go:240: 2025-10-14T01:03:35.140798972Z: Ready condition set to False, expected True
+    helpers.go:240: 2025-10-14T01:03:35.14086293Z: Ready was not in conditions list
+    helpers.go:243: 2025-10-14T01:03:35.14087168Z: Pod gateway-conformance-app-backend/primary-app-endpoint-picker-7579b47bc6-rtt47 not ready yet
+    helpers.go:248: 2025-10-14T01:03:36.135304833Z: Gateways and Pods in gateway-conformance-infra, gateway-conformance-app-backend namespaces ready
+    conformance.go:265: Attempting to fetch Gateway gateway-conformance-infra/conformance-primary.
+    conformance.go:306: Successfully fetched Gateway gateway-conformance-infra/conformance-primary. Spec.GatewayClassName: nginx
+    conformance.go:265: Shared Gateway gateway-conformance-infra/conformance-primary is ready.
+    conformance.go:266: Attempting to fetch Gateway gateway-conformance-infra/conformance-secondary.
+    conformance.go:306: Successfully fetched Gateway gateway-conformance-infra/conformance-secondary. Spec.GatewayClassName: nginx
+    conformance.go:266: Shared Gateway gateway-conformance-infra/conformance-secondary is ready.
+    conformance_test.go:130: Running Inference Extension conformance tests against all registered tests
+=== RUN   TestInferenceExtensionConformance/EppUnAvailableFailOpen
+    conformance.go:72: 2025-10-14T01:03:36.164248292Z: Applying tests/epp_unavailable_fail_open.yaml
+    apply.go:275: 2025-10-14T01:03:36.175129875Z: Creating httproute-for-failopen-pool-gw HTTPRoute
+    conformance.go:77: 2025-10-14T01:03:36.182051833Z: Running EppUnAvailableFailOpen, relying on the following features: -, Gateway-standard
+    epp_unavailable_fail_open.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw to be Accepted by Gateway gateway-conformance-infra/conformance-secondary
+    epp_unavailable_fail_open.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
+    epp_unavailable_fail_open.go:60: HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
+    epp_unavailable_fail_open.go:61: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    epp_unavailable_fail_open.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    epp_unavailable_fail_open.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    epp_unavailable_fail_open.go:62: Waiting for Gateway gateway-conformance-infra/conformance-secondary to get an address...
+    epp_unavailable_fail_open.go:62: Gateway gateway-conformance-infra/conformance-secondary has address: 10.96.165.14:80
+    epp_unavailable_fail_open.go:64: Searching for Pods with labels map[app:secondary-inference-model-server] in namespace gateway-conformance-app-backend
+=== RUN   TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_1:_Verify_baseline_connectivity_with_EPP_available
+    epp_unavailable_fail_open.go:71: Sending request to ensure the Gateway and EPP are working correctly...
+    traffic.go:151: 2025-10-14T01:03:37.21868725Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:37.402427709Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 3.292µs)
+    traffic.go:151: 2025-10-14T01:03:38.403328918Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:38.426142084Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:38.446418001Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 1.207650709s)
+    traffic.go:151: 2025-10-14T01:03:39.448027043Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:39.486648835Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:39.513615501Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 2.268142335s)
+    traffic.go:151: 2025-10-14T01:03:40.517491502Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:40.56746671Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 3.298903335s)
+    traffic.go:151: 2025-10-14T01:03:41.569563961Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:41.606558586Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 4.350976127s)
+    traffic.go:151: 2025-10-14T01:03:42.60780142Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:42.626555045Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 5.389276795s)
+    traffic.go:151: 2025-10-14T01:03:43.627828253Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:43.69536142Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:43.723475462Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 6.476841628s)
+    traffic.go:151: 2025-10-14T01:03:44.724507212Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:44.771940629Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 7.505984296s)
+    traffic.go:151: 2025-10-14T01:03:45.772548504Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:45.790925171Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 8.553996546s)
+    traffic.go:151: 2025-10-14T01:03:46.791878463Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:46.812035797Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:46.833224713Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 9.593544797s)
+    traffic.go:151: 2025-10-14T01:03:47.83441863Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:47.857052464Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 10.615884797s)
+    traffic.go:151: 2025-10-14T01:03:48.859770131Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:48.897930881Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:48.913112131Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 11.679430214s)
+    traffic.go:151: 2025-10-14T01:03:49.914456215Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:49.970198381Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 12.69590984s)
+    traffic.go:151: 2025-10-14T01:03:50.971954632Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:51.013194465Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 13.753366382s)
+    traffic.go:151: 2025-10-14T01:03:52.014606882Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:52.057845091Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:52.084620507Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 14.839337549s)
+    traffic.go:151: 2025-10-14T01:03:53.085642425Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:53.1301423Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:53.168473466Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:53.205107341Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 15.949976216s)
+    traffic.go:151: 2025-10-14T01:03:54.20599155Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:54.241487258Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:54.261202842Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:54.29185805Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 17.042710592s)
+    traffic.go:151: 2025-10-14T01:03:55.292574301Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:55.312207217Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 18.073995801s)
+    traffic.go:151: 2025-10-14T01:03:56.312476468Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:56.331264593Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 19.093928343s)
+    traffic.go:151: 2025-10-14T01:03:57.331577593Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:57.348852843Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:03:57.371795468Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:57.386972552Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 20.15330376s)
+    traffic.go:151: 2025-10-14T01:03:58.38759601Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:58.422928094Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 21.169056844s)
+    traffic.go:151: 2025-10-14T01:03:59.423453969Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:163: 2025-10-14T01:03:59.447749344Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 22.204941553s)
+    traffic.go:151: 2025-10-14T01:04:00.449553011Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:04:00.47849147Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:04:00.515535511Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:169: 2025-10-14T01:04:00.540372178Z: Request passed
+=== RUN   TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_2:_Verify_fail-open_behavior_after_EPP_becomes_unavailable
+    epp_unavailable_fail_open.go:92: Making EPP service gateway-conformance-app-backend/primary-endpoint-picker-svc unavailable...
+    epp_unavailable_fail_open.go:94: Making Service gateway-conformance-app-backend/primary-endpoint-picker-svc unavailable by modifying its selector...
+    epp_unavailable_fail_open.go:94: Waiting for EndpointSlices of Service gateway-conformance-app-backend/primary-endpoint-picker-svc to become empty...
+    epp_unavailable_fail_open.go:94: Successfully modified selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc
+    epp_unavailable_fail_open.go:98: Sending request again, expecting success to verify fail-open...
+    traffic.go:151: 2025-10-14T01:04:01.563349637Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:04:01.647647095Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:151: 2025-10-14T01:04:01.696854387Z: Making POST request to http://10.96.165.14/failopen-pool-test
+    traffic.go:169: 2025-10-14T01:04:01.720366262Z: Request passed
+    epp_unavailable_fail_open.go:95: Restoring original selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc...
+    epp_unavailable_fail_open.go:95: Waiting for EndpointSlices of Service gateway-conformance-app-backend/primary-endpoint-picker-svc to be restored...
+    epp_unavailable_fail_open.go:95: Successfully restored selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc
+=== NAME  TestInferenceExtensionConformance/EppUnAvailableFailOpen
+    apply.go:283: 2025-10-14T01:04:02.737540721Z: Deleting httproute-for-failopen-pool-gw HTTPRoute
+=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting
+    conformance.go:72: 2025-10-14T01:04:02.776777596Z: Applying tests/gateway_following_epp_routing.yaml
+    apply.go:275: 2025-10-14T01:04:02.785686221Z: Creating httproute-for-primary-gw HTTPRoute
+    conformance.go:77: 2025-10-14T01:04:02.796671638Z: Running GatewayFollowingEPPRouting, relying on the following features: -, Gateway-standard
+    gateway_following_epp_routing.go:64: Verifying HTTPRoute and InferencePool are accepted and the Gateway has an address.
+    gateway_following_epp_routing.go:65: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    gateway_following_epp_routing.go:65: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    gateway_following_epp_routing.go:65: HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    gateway_following_epp_routing.go:66: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    gateway_following_epp_routing.go:66: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    gateway_following_epp_routing.go:66: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    gateway_following_epp_routing.go:67: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
+    gateway_following_epp_routing.go:67: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
+    gateway_following_epp_routing.go:69: Fetching backend pods with labels: map[app:primary-inference-model-server]
+    gateway_following_epp_routing.go:70: Searching for Pods with labels map[app:primary-inference-model-server] in namespace gateway-conformance-app-backend
+    traffic.go:151: 2025-10-14T01:04:03.813018971Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:04.039845221Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.084µs)
+    traffic.go:151: 2025-10-14T01:04:05.041253555Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:05.066757055Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:05.112700555Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 1.253732668s)
+    traffic.go:151: 2025-10-14T01:04:06.115200041Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:06.151358708Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:06.165424958Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:06.179929833Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.351770292s)
+    traffic.go:151: 2025-10-14T01:04:07.181082125Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:07.216399792Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 3.367397543s)
+    traffic.go:151: 2025-10-14T01:04:08.224429001Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:08.301275626Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 4.410123418s)
+    traffic.go:151: 2025-10-14T01:04:09.303616793Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:09.326721918Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 5.48990646s)
+    traffic.go:151: 2025-10-14T01:04:10.330729919Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:10.377394335Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:10.394770752Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 6.563737044s)
+    traffic.go:151: 2025-10-14T01:04:11.396055002Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:11.417175669Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:11.447515169Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 7.603522086s)
+    traffic.go:151: 2025-10-14T01:04:12.449247503Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:12.470789711Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:12.484345378Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:12.521570711Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 8.67069217s)
+    traffic.go:151: 2025-10-14T01:04:13.522841003Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:13.56501492Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:13.582837795Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 9.751359046s)
+    traffic.go:151: 2025-10-14T01:04:14.585632671Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:14.627992629Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 10.771919296s)
+    traffic.go:151: 2025-10-14T01:04:15.629393879Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:15.659107754Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:15.694903963Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 11.845439422s)
+    traffic.go:151: 2025-10-14T01:04:16.695987088Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:16.743231505Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 12.882275339s)
+    traffic.go:151: 2025-10-14T01:04:17.745030589Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:17.767151755Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 13.931277214s)
+    traffic.go:151: 2025-10-14T01:04:18.769121548Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:18.790365589Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 14.955418298s)
+    traffic.go:151: 2025-10-14T01:04:19.791362131Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:19.810897965Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 15.97767709s)
+    traffic.go:151: 2025-10-14T01:04:20.812557174Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:20.83321659Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:20.863946507Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 17.019561633s)
+    traffic.go:151: 2025-10-14T01:04:21.865297716Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:21.911503632Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 18.05145005s)
+    traffic.go:151: 2025-10-14T01:04:22.912107008Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:22.929975466Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 19.0983753s)
+    traffic.go:151: 2025-10-14T01:04:23.931500008Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:23.959583758Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 20.117819676s)
+    traffic.go:151: 2025-10-14T01:04:24.962713175Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:25.073250092Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:25.090582551Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:25.106454801Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 21.276928135s)
+    traffic.go:151: 2025-10-14T01:04:26.107014259Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:26.146236176Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 22.293346385s)
+    traffic.go:151: 2025-10-14T01:04:27.147186052Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:27.177865218Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 23.333471094s)
+    traffic.go:151: 2025-10-14T01:04:28.179942719Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:28.219684177Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 24.366152219s)
+    traffic.go:151: 2025-10-14T01:04:29.221286053Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:29.243604803Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 25.407605928s)
+    traffic.go:151: 2025-10-14T01:04:30.24408472Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:30.289387886Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 26.43041297s)
+    traffic.go:151: 2025-10-14T01:04:31.290334345Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:31.341147804Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 27.476604596s)
+    traffic.go:151: 2025-10-14T01:04:32.342085846Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:32.392231304Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:32.438599596Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:169: 2025-10-14T01:04:32.470273637Z: Request passed
+    traffic.go:151: 2025-10-14T01:04:32.470374096Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:32.484053054Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.375µs)
+    traffic.go:151: 2025-10-14T01:04:33.487824888Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:33.574704471Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 1.016638084s)
+    traffic.go:151: 2025-10-14T01:04:34.576959555Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:34.633173013Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.106494126s)
+    traffic.go:151: 2025-10-14T01:04:35.634988583Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:35.693747958Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 3.164699876s)
+    traffic.go:151: 2025-10-14T01:04:36.694937Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:36.750264125Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:36.769162667Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 4.280011127s)
+    traffic.go:151: 2025-10-14T01:04:37.771181417Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:37.810051584Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 5.300907877s)
+    traffic.go:151: 2025-10-14T01:04:38.811659626Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:38.866971584Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 6.341297544s)
+    traffic.go:151: 2025-10-14T01:04:39.875429085Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:39.923348543Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:39.94242846Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:39.962341293Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 7.472177295s)
+    traffic.go:151: 2025-10-14T01:04:40.963085085Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:41.043185794Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 8.492794129s)
+    traffic.go:151: 2025-10-14T01:04:42.044456419Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:42.083978294Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 9.574119338s)
+    traffic.go:151: 2025-10-14T01:04:43.088392128Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:43.149567628Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 10.618079171s)
+    traffic.go:151: 2025-10-14T01:04:44.151589712Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:44.208832087Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:44.225526295Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 11.738426089s)
+    traffic.go:151: 2025-10-14T01:04:45.227075713Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:45.281433338Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:45.300640921Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:169: 2025-10-14T01:04:45.325600254Z: Request passed
+    traffic.go:151: 2025-10-14T01:04:45.325758588Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:45.344870171Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 4.625µs)
+    traffic.go:151: 2025-10-14T01:04:46.347018421Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:46.378773546Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 1.021176001s)
+    traffic.go:151: 2025-10-14T01:04:47.379988464Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:47.40334288Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 2.054202001s)
+    traffic.go:151: 2025-10-14T01:04:48.407456256Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:48.467759131Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:48.485551464Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 3.141919877s)
+    traffic.go:151: 2025-10-14T01:04:49.486317631Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:49.514445923Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 4.160485294s)
+    traffic.go:151: 2025-10-14T01:04:50.521837465Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:50.609384007Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:50.64858034Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 5.283617628s)
+    traffic.go:151: 2025-10-14T01:04:51.651374757Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:51.695010841Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 6.325408795s)
+    traffic.go:151: 2025-10-14T01:04:52.699581091Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:52.755892383Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 7.371361879s)
+    traffic.go:151: 2025-10-14T01:04:53.757197717Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:53.779424133Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 8.431435004s)
+    traffic.go:151: 2025-10-14T01:04:54.782000759Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:54.8078453Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 9.45622888s)
+    traffic.go:151: 2025-10-14T01:04:55.809546051Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:163: 2025-10-14T01:04:55.861264926Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 10.483662172s)
+    traffic.go:151: 2025-10-14T01:04:56.862348593Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:56.920280093Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:151: 2025-10-14T01:04:56.948710968Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    traffic.go:169: 2025-10-14T01:04:56.966484301Z: Request passed
+=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_a_single_designated_pod
+    gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.165'
+    gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-rgblk]
+    gateway_following_epp_routing.go:140: 2025-10-14T01:04:56.96718176Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    gateway_following_epp_routing.go:140: Not all the requests are sent to the expectedPods successfully, err: request was handled by an unexpected pod "primary-inference-model-server-deployment-66659cd5bf-lw9lc"
+=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_two_designated_pods
+    gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.166,10.244.0.170'
+    gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc]
+    gateway_following_epp_routing.go:140: 2025-10-14T01:04:57.378579052Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    gateway_following_epp_routing.go:140: Not all the requests are sent to the expectedPods successfully, err: request was handled by an unexpected pod "primary-inference-model-server-deployment-66659cd5bf-rgblk"
+=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_all_available_pods
+    gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.166,10.244.0.170,10.244.0.165'
+    gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc primary-inference-model-server-deployment-66659cd5bf-rgblk]
+    gateway_following_epp_routing.go:140: 2025-10-14T01:04:57.736901843Z: Making POST request to http://10.96.88.207/primary-gateway-test
+    gateway_following_epp_routing.go:140: Traffic successfully reached only to expected pods: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc primary-inference-model-server-deployment-66659cd5bf-rgblk]
+=== NAME  TestInferenceExtensionConformance/GatewayFollowingEPPRouting
+    apply.go:283: 2025-10-14T01:04:58.127986844Z: Deleting httproute-for-primary-gw HTTPRoute
+=== RUN   TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef
+    conformance.go:72: 2025-10-14T01:04:58.144944469Z: Applying tests/httproute_invalid_inferencepool_ref.yaml
+    apply.go:275: 2025-10-14T01:04:58.154912969Z: Creating httproute-to-non-existent-pool HTTPRoute
+    conformance.go:77: 2025-10-14T01:04:58.166170635Z: Running HTTPRouteInvalidInferencePoolRef, relying on the following features: -, Gateway-standard
+=== RUN   TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef/HTTPRoute_should_have_Accepted=True_and_ResolvedRefs=False_for_non-existent_InferencePool
+    httproute_invalid_inferencepool_ref.go:63: Successfully verified HTTPRoute gateway-conformance-app-backend/httproute-to-non-existent-pool has conditions: Accepted=True and ResolvedRefs=False (Reason: BackendNotFound) for Gateway gateway-conformance-infra/conformance-primary
+=== NAME  TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef
+    apply.go:283: 2025-10-14T01:04:59.182424011Z: Deleting httproute-to-non-existent-pool HTTPRoute
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools
+    conformance.go:72: 2025-10-14T01:04:59.196436219Z: Applying tests/httproute_multiple_gateways_different_pools.yaml
+    apply.go:275: 2025-10-14T01:04:59.204446219Z: Creating route-for-primary-gateway HTTPRoute
+    apply.go:275: 2025-10-14T01:04:59.212459761Z: Creating route-for-secondary-gateway HTTPRoute
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Primary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic
+    httproute_multiple_gateways_different_pools.go:58: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    httproute_multiple_gateways_different_pools.go:58: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    httproute_multiple_gateways_different_pools.go:58: HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    httproute_multiple_gateways_different_pools.go:58: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    httproute_multiple_gateways_different_pools.go:58: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    httproute_multiple_gateways_different_pools.go:58: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    httproute_multiple_gateways_different_pools.go:58: Successfully verified: HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/primary-inference-pool is RouteAccepted.
+    httproute_multiple_gateways_different_pools.go:66: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
+    httproute_multiple_gateways_different_pools.go:66: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
+    traffic.go:151: 2025-10-14T01:05:00.230920428Z: Making GET request to http://10.96.88.207/test-primary-gateway
+    traffic.go:151: 2025-10-14T01:05:00.23344447Z: Making GET request to http://10.96.88.207/test-primary-gateway
+    traffic.go:151: 2025-10-14T01:05:00.234922095Z: Making GET request to http://10.96.88.207/test-primary-gateway
+    traffic.go:169: 2025-10-14T01:05:00.23615922Z: Request passed
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Secondary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic
+    httproute_multiple_gateways_different_pools.go:84: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway to be Accepted by Gateway gateway-conformance-infra/conformance-secondary
+    httproute_multiple_gateways_different_pools.go:84: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
+    httproute_multiple_gateways_different_pools.go:84: HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
+    httproute_multiple_gateways_different_pools.go:84: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    httproute_multiple_gateways_different_pools.go:84: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    httproute_multiple_gateways_different_pools.go:84: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    httproute_multiple_gateways_different_pools.go:84: Successfully verified: HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway (Gateway gateway-conformance-infra/conformance-secondary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/secondary-inference-pool is RouteAccepted.
+    httproute_multiple_gateways_different_pools.go:92: Waiting for Gateway gateway-conformance-infra/conformance-secondary to get an address...
+    httproute_multiple_gateways_different_pools.go:92: Gateway gateway-conformance-infra/conformance-secondary has address: 10.96.165.14:80
+    traffic.go:151: 2025-10-14T01:05:00.247583886Z: Making GET request to http://10.96.165.14/test-secondary-gateway
+    traffic.go:151: 2025-10-14T01:05:00.248908345Z: Making GET request to http://10.96.165.14/test-secondary-gateway
+    traffic.go:151: 2025-10-14T01:05:00.249860136Z: Making GET request to http://10.96.165.14/test-secondary-gateway
+    traffic.go:169: 2025-10-14T01:05:00.250706761Z: Request passed
+=== NAME  TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools
+    apply.go:283: 2025-10-14T01:05:00.250741053Z: Deleting route-for-secondary-gateway HTTPRoute
+    apply.go:283: 2025-10-14T01:05:00.256212345Z: Deleting route-for-primary-gateway HTTPRoute
+=== RUN   TestInferenceExtensionConformance/InferencePoolAccepted
+    conformance.go:72: 2025-10-14T01:05:00.261789928Z: Applying tests/inferencepool_accepted.yaml
+    apply.go:275: 2025-10-14T01:05:00.268909345Z: Creating httproute-for-inferencepool-accepted HTTPRoute
+    conformance.go:77: 2025-10-14T01:05:00.283944303Z: Running InferencePoolAccepted, relying on the following features: -, Gateway-standard
+=== RUN   TestInferenceExtensionConformance/InferencePoolAccepted/InferencePool_should_have_Accepted_condition_set_to_True
+    inferencepool_accepted.go:54: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True
+=== NAME  TestInferenceExtensionConformance/InferencePoolAccepted
+    apply.go:283: 2025-10-14T01:05:00.290032886Z: Deleting httproute-for-inferencepool-accepted HTTPRoute
+=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation
+    conformance.go:72: 2025-10-14T01:05:00.296340053Z: Applying tests/inferencepool_httproute_port_validation.yaml
+    apply.go:275: 2025-10-14T01:05:00.31779672Z: Creating httproute-pool-port-unspecified HTTPRoute
+    apply.go:275: 2025-10-14T01:05:00.328601886Z: Creating httproute-pool-port-matching HTTPRoute
+    apply.go:275: 2025-10-14T01:05:00.336310345Z: Creating httproute-pool-port-non-matching HTTPRoute
+    conformance.go:77: 2025-10-14T01:05:00.352536553Z: Running InferencePoolHTTPRoutePortValidation, relying on the following features: -, Gateway-standard
+    inferencepool_httproute_port_validation.go:47: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
+    inferencepool_httproute_port_validation.go:47: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
+=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_1:_HTTPRoute_backendRef_to_InferencePool_with_Port_Unspecified
+    inferencepool_httproute_port_validation.go:54: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:54: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:54: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:55: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_httproute_port_validation.go:55: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    inferencepool_httproute_port_validation.go:55: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    traffic.go:151: 2025-10-14T01:05:02.364763929Z: Making GET request to http://10.96.88.207/test-port-unspecified
+    traffic.go:151: 2025-10-14T01:05:02.366493929Z: Making GET request to http://10.96.88.207/test-port-unspecified
+    traffic.go:151: 2025-10-14T01:05:02.367530596Z: Making GET request to http://10.96.88.207/test-port-unspecified
+    traffic.go:169: 2025-10-14T01:05:02.368427012Z: Request passed
+=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_2:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Matching
+    inferencepool_httproute_port_validation.go:76: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:76: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:76: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:77: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_httproute_port_validation.go:77: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    inferencepool_httproute_port_validation.go:77: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    traffic.go:151: 2025-10-14T01:05:02.375146137Z: Making GET request to http://10.96.88.207/test-port-matching
+    traffic.go:151: 2025-10-14T01:05:02.376380554Z: Making GET request to http://10.96.88.207/test-port-matching
+    traffic.go:151: 2025-10-14T01:05:02.377616304Z: Making GET request to http://10.96.88.207/test-port-matching
+    traffic.go:169: 2025-10-14T01:05:02.378851304Z: Request passed
+=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_3:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Non-Matching._Request_still_passing_because_HTTP_Port_is_ignored_when_inferencePool_is_backendRef
+    inferencepool_httproute_port_validation.go:99: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:99: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:99: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_httproute_port_validation.go:100: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_httproute_port_validation.go:100: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    inferencepool_httproute_port_validation.go:100: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    traffic.go:151: 2025-10-14T01:05:02.384983429Z: Making GET request to http://10.96.88.207/test-port-non-matching
+    traffic.go:151: 2025-10-14T01:05:02.386198887Z: Making GET request to http://10.96.88.207/test-port-non-matching
+    traffic.go:151: 2025-10-14T01:05:02.387009179Z: Making GET request to http://10.96.88.207/test-port-non-matching
+    traffic.go:169: 2025-10-14T01:05:02.388009429Z: Request passed
+=== NAME  TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation
+    apply.go:283: 2025-10-14T01:05:02.388062512Z: Deleting httproute-pool-port-non-matching HTTPRoute
+    apply.go:283: 2025-10-14T01:05:02.392225554Z: Deleting httproute-pool-port-matching HTTPRoute
+    apply.go:283: 2025-10-14T01:05:02.396365179Z: Deleting httproute-pool-port-unspecified HTTPRoute
+=== RUN   TestInferenceExtensionConformance/InferencePoolInvalidEPPService
+    conformance.go:72: 2025-10-14T01:05:02.402896512Z: Applying tests/inferencepool_invalid_epp_service.yaml
+    apply.go:275: 2025-10-14T01:05:02.415857471Z: Creating pool-with-invalid-epp InferencePool
+    apply.go:275: 2025-10-14T01:05:02.422569762Z: Creating httproute-for-invalid-epp-pool HTTPRoute
+    conformance.go:77: 2025-10-14T01:05:02.435913971Z: Running InferencePoolInvalidEPPService, relying on the following features: Gateway-standard, HTTPRoute-standard, -
+    inferencepool_invalid_epp_service.go:55: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
+    inferencepool_invalid_epp_service.go:55: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
+=== RUN   TestInferenceExtensionConformance/InferencePoolInvalidEPPService/InferecePool_has_a_ResolvedRefs_Condition_with_status_False
+    inferencepool_invalid_epp_service.go:68: InferencePool gateway-conformance-app-backend/pool-with-invalid-epp successfully has condition Type=ResolvedRefs, Status=False
+=== RUN   TestInferenceExtensionConformance/InferencePoolInvalidEPPService/Request_to_a_route_with_an_invalid_backend_reference_receives_a_500_response
+    traffic.go:151: 2025-10-14T01:05:03.466323221Z: Making GET request to http://10.96.88.207/invalid-epp-test
+    traffic.go:151: 2025-10-14T01:05:03.467345763Z: Making GET request to http://10.96.88.207/invalid-epp-test
+    traffic.go:151: 2025-10-14T01:05:03.468300846Z: Making GET request to http://10.96.88.207/invalid-epp-test
+    traffic.go:169: 2025-10-14T01:05:03.469222846Z: Request passed
+=== NAME  TestInferenceExtensionConformance/InferencePoolInvalidEPPService
+    apply.go:283: 2025-10-14T01:05:03.469294346Z: Deleting httproute-for-invalid-epp-pool HTTPRoute
+    apply.go:283: 2025-10-14T01:05:03.47732543Z: Deleting pool-with-invalid-epp InferencePool
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools
+    conformance.go:72: 2025-10-14T01:05:03.481843888Z: Applying tests/inferencepool_multiple_rules_different_pools.yaml
+    apply.go:275: 2025-10-14T01:05:03.49263818Z: Creating httproute-multiple-rules-different-pools HTTPRoute
+    conformance.go:77: 2025-10-14T01:05:03.498142263Z: Running HTTPRouteMultipleRulesDifferentPools, relying on the following features: Gateway-standard, HTTPRoute-standard, -
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Wait_for_resources_to_be_accepted
+    inferencepool_multiple_rules_different_pools.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_multiple_rules_different_pools.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_multiple_rules_different_pools.go:60: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_multiple_rules_different_pools.go:60: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_multiple_rules_different_pools.go:60: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    inferencepool_multiple_rules_different_pools.go:60: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_multiple_rules_different_pools.go:60: Successfully verified: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/primary-inference-pool is RouteAccepted.
+    inferencepool_multiple_rules_different_pools.go:61: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to be Accepted by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_multiple_rules_different_pools.go:61: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_multiple_rules_different_pools.go:61: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
+    inferencepool_multiple_rules_different_pools.go:61: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_multiple_rules_different_pools.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
+    inferencepool_multiple_rules_different_pools.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
+    inferencepool_multiple_rules_different_pools.go:61: Successfully verified: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/secondary-inference-pool is RouteAccepted.
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path
+    inferencepool_multiple_rules_different_pools.go:65: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
+    inferencepool_multiple_rules_different_pools.go:65: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_primary_pool
+    inferencepool_multiple_rules_different_pools.go:68: 2025-10-14T01:05:04.52978643Z: Making GET request to http://10.96.88.207/primary
+    http.go:251: 2025-10-14T01:05:04.536417763Z: Request passed
+=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_secondary_pool
+    inferencepool_multiple_rules_different_pools.go:79: 2025-10-14T01:05:04.536504847Z: Making GET request to http://10.96.88.207/secondary
+    http.go:251: 2025-10-14T01:05:04.539573222Z: Request passed
+=== NAME  TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools
+    apply.go:283: 2025-10-14T01:05:04.539630847Z: Deleting httproute-multiple-rules-different-pools HTTPRoute
+=== RUN   TestInferenceExtensionConformance/InferencePoolResolvedRefsCondition
+    conformance.go:68: Skipping InferencePoolResolvedRefsCondition: test explicitly skipped
+=== NAME  TestInferenceExtensionConformance
+    apply.go:283: 2025-10-14T01:05:04.54499568Z: Deleting epp-to-inference-model-reader RoleBinding
+    apply.go:283: 2025-10-14T01:05:04.549125555Z: Deleting inference-model-reader Role
+    apply.go:283: 2025-10-14T01:05:04.552758388Z: Deleting plugins-config ConfigMap
+    apply.go:283: 2025-10-14T01:05:04.55691718Z: Deleting secondary-app-endpoint-picker Deployment
+    apply.go:283: 2025-10-14T01:05:04.561630972Z: Deleting secondary-endpoint-picker-svc Service
+    apply.go:283: 2025-10-14T01:05:04.58978643Z: Deleting secondary-inference-pool InferencePool
+    apply.go:283: 2025-10-14T01:05:04.59695693Z: Deleting primary-app-endpoint-picker Deployment
+    apply.go:283: 2025-10-14T01:05:04.613129388Z: Deleting primary-endpoint-picker-svc Service
+    apply.go:283: 2025-10-14T01:05:04.655569013Z: Deleting primary-inference-pool InferencePool
+    apply.go:283: 2025-10-14T01:05:04.673237097Z: Deleting secondary-inference-model-server-deployment Deployment
+    apply.go:283: 2025-10-14T01:05:04.682511888Z: Deleting primary-inference-model-server-deployment Deployment
+    apply.go:283: 2025-10-14T01:05:04.703110263Z: Deleting conformance-secondary Gateway
+    apply.go:283: 2025-10-14T01:05:04.715927263Z: Deleting conformance-primary Gateway
+    apply.go:283: 2025-10-14T01:05:04.725649972Z: Deleting gateway-conformance-app-backend Namespace
+    apply.go:283: 2025-10-14T01:05:04.764089138Z: Deleting gateway-conformance-infra Namespace
+--- FAIL: TestInferenceExtensionConformance (107.40s)
+    --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen (26.58s)
+        --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_1:_Verify_baseline_connectivity_with_EPP_available (23.32s)
+        --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_2:_Verify_fail-open_behavior_after_EPP_becomes_unavailable (2.20s)
+    --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting (55.39s)
+        --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_a_single_designated_pod (0.41s)
+        --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_two_designated_pods (0.36s)
+        --- PASS: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_all_available_pods (0.39s)
+    --- PASS: TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef (1.05s)
+        --- PASS: TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef/HTTPRoute_should_have_Accepted=True_and_ResolvedRefs=False_for_non-existent_InferencePool (1.02s)
+    --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools (1.07s)
+        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Primary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic (1.02s)
+        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Secondary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic (0.01s)
+    --- PASS: TestInferenceExtensionConformance/InferencePoolAccepted (0.03s)
+        --- PASS: TestInferenceExtensionConformance/InferencePoolAccepted/InferencePool_should_have_Accepted_condition_set_to_True (0.01s)
+    --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation (2.11s)
+        --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_1:_HTTPRoute_backendRef_to_InferencePool_with_Port_Unspecified (2.01s)
+        --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_2:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Matching (0.01s)
+        --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_3:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Non-Matching._Request_still_passing_because_HTTP_Port_is_ignored_when_inferencePool_is_backendRef (0.01s)
+    --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService (1.08s)
+        --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService/InferecePool_has_a_ResolvedRefs_Condition_with_status_False (0.00s)
+        --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService/Request_to_a_route_with_an_invalid_backend_reference_receives_a_500_response (0.00s)
+    --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools (1.06s)
+        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Wait_for_resources_to_be_accepted (1.02s)
+        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path (0.02s)
+            --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_primary_pool (0.01s)
+            --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_secondary_pool (0.00s)
+    --- SKIP: TestInferenceExtensionConformance/InferencePoolResolvedRefsCondition (0.00s)
+FAIL
+FAIL	github.com/nginx/nginx-gateway-fabric/v2/tests/conformance	107.446s
+FAIL

From b3bfa4c1034eb916957a20d5906b629e817990f8 Mon Sep 17 00:00:00 2001
From: Ciara Stacke <c.stacke@f5.com>
Date: Tue, 14 Oct 2025 13:49:56 +0100
Subject: [PATCH 11/12] Normalize EPP headers to lowercase

---
 cmd/gateway/endpoint_picker.go                |  10 +-
 internal/controller/nginx/config/servers.go   |   8 +-
 internal/controller/nginx/modules/src/epp.js  |  71 ++-
 .../controller/nginx/modules/test/epp.test.js |  68 ++-
 .../controller/state/graph/backend_refs.go    |   2 +
 tests/Makefile                                |   6 +-
 tests/conformance/conformance_test.go         |  11 -
 tests/conformance/manifests/base.yaml         | 394 -------------
 tests/conformance/manifests/epp-routing.yaml  |  23 -
 tests/conformance/manifests/gateway.yaml      |   0
 tests/output.txt                              | 539 ------------------
 11 files changed, 106 insertions(+), 1026 deletions(-)
 delete mode 100644 tests/conformance/manifests/base.yaml
 delete mode 100644 tests/conformance/manifests/epp-routing.yaml
 delete mode 100644 tests/conformance/manifests/gateway.yaml
 delete mode 100644 tests/output.txt

diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
index 118f95684b..b07b6eaf6d 100644
--- a/cmd/gateway/endpoint_picker.go
+++ b/cmd/gateway/endpoint_picker.go
@@ -7,6 +7,7 @@ import (
 	"io"
 	"net"
 	"net/http"
+	"strings"
 	"time"
 
 	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
@@ -37,7 +38,7 @@ func realExtProcClientFactory() extProcClientFactory {
 	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
 		creds := credentials.NewTLS(&tls.Config{
 			// add RootCAs or, if you have a self-signed server cert:
-			InsecureSkipVerify: true,
+			InsecureSkipVerify: true, //nolint:gosec
 		})
 		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(creds))
 		if err != nil {
@@ -153,8 +154,13 @@ func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
 
 	for key, values := range r.Header {
 		for _, value := range values {
+			// Normalize header keys to lowercase for case-insensitive matching
+			// This fixes the issue where Go's HTTP header normalization (Title-Case)
+			// doesn't match EPP's expected lowercase header keys
+			normalizedKey := strings.ToLower(key)
+
 			headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{
-				Key:   key,
+				Key:   normalizedKey,
 				Value: value,
 			})
 		}
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index 4e9259ba8a..414f64c272 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -459,7 +459,7 @@ func createInternalLocationsForRule(
 					}
 					intInfLocation.EPPInternalPath = intLocation.Path
 					if b.EndpointPickerNsName != "" {
-						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName
+						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName + ".svc.cluster.local"
 					} else {
 						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
 					}
@@ -517,7 +517,11 @@ func createInferenceLocationsForRule(
 						portNum = int(b.EndpointPickerConfig.Port.Number)
 					}
 					extLocations[i].EPPInternalPath = intLocation.Path
-					extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
+					if b.EndpointPickerNsName != "" {
+						extLocations[i].EPPHost = (string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName + ".svc.cluster.local") //nolint:lll
+					} else {
+						extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
+					}
 					extLocations[i].EPPPort = portNum
 				}
 			}
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index 262366a9db..88de40062b 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -10,48 +10,40 @@ const WORKLOAD_ENDPOINT_VAR = 'inference_workload_endpoint';
 const SHIM_URI = 'http://127.0.0.1:54800';
 
 async function getEndpoint(r) {
-	const headerEndpoint = r.headersIn['test-epp-endpoint-selection'];
-	if (headerEndpoint) {
-		// Header is provided: Use endpoints directly and bypass Shim server
-		const endpoints = headerEndpoint.split(',').map(e => e.trim());
-		r.variables[WORKLOAD_ENDPOINT_VAR] = endpoints.join(',');
-		r.log(`Using header-specified endpoints: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`);
-	} else {
-		if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) {
-			throw Error(
-				`Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`,
-			);
-		}
-		if (!r.variables[EPP_INTERNAL_PATH_VAR]) {
-			throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`);
-		}
+	if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) {
+		throw Error(
+			`Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`,
+		);
+	}
+	if (!r.variables[EPP_INTERNAL_PATH_VAR]) {
+		throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`);
+	}
 
-		let headers = Object.assign({}, r.headersIn);
-		headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR];
-		headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR];
+	let headers = Object.assign({}, r.headersIn);
+	headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR];
+	headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR];
 
-		try {
-			const response = await ngx.fetch(SHIM_URI, {
-				method: r.method,
-				headers: headers,
-				body: r.requestText,
-			});
-			const endpointHeader = response.headers.get(ENDPOINT_HEADER);
-			if (response.status === 200 && endpointHeader) {
-				r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader;
-				r.log(
-					`found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`,
-				);
-			} else {
-				const body = await response.text();
-				r.error(
-					`could not get specific inference endpoint from EndpointPicker; ` +
-						`status: ${response.status}; body: ${body}`,
-				);
-			}
-		} catch (err) {
-			r.error(`Error in ngx.fetch: ${err}`);
+	try {
+		const response = await ngx.fetch(SHIM_URI, {
+			method: r.method,
+			headers: headers,
+			body: r.requestText,
+		});
+		const endpointHeader = response.headers.get(ENDPOINT_HEADER);
+		if (response.status === 200 && endpointHeader) {
+			r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader;
+			r.log(
+				`found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`,
+			);
+		} else {
+			const body = await response.text();
+			r.error(
+				`could not get specific inference endpoint from EndpointPicker; ` +
+					`status: ${response.status}; body: ${body}`,
+			);
 		}
+	} catch (err) {
+		r.error(`Error in ngx.fetch: ${err}`);
 	}
 
 	// If performing a rewrite, $request_uri won't be used,
@@ -63,4 +55,5 @@ async function getEndpoint(r) {
 
 	r.internalRedirect(r.variables[EPP_INTERNAL_PATH_VAR] + args);
 }
+
 export default { getEndpoint };
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
index 97b10f75b7..cbe20850ce 100644
--- a/internal/controller/nginx/modules/test/epp.test.js
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -40,7 +40,7 @@ describe('getEndpoint', () => {
 	});
 
 	it('sets endpoint and logs on 200 with endpoint header', async () => {
-		const endpoint = 'http://endpoint';
+		const endpoint = '10.0.0.1:8080';
 		globalThis.ngx = {
 			fetch: vi.fn().mockResolvedValue({
 				status: 200,
@@ -49,7 +49,11 @@ describe('getEndpoint', () => {
 			}),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 		});
 		await epp.getEndpoint(r);
 		expect(r.variables.inference_workload_endpoint).toBe(endpoint);
@@ -66,7 +70,11 @@ describe('getEndpoint', () => {
 			}),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 		});
 		await epp.getEndpoint(r);
 		expect(r.error).toHaveBeenCalledWith(
@@ -80,7 +88,11 @@ describe('getEndpoint', () => {
 			fetch: vi.fn().mockRejectedValue(new Error('network fail')),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 		});
 		await epp.getEndpoint(r);
 		expect(r.error).toHaveBeenCalledWith(expect.stringContaining('Error in ngx.fetch'));
@@ -88,7 +100,7 @@ describe('getEndpoint', () => {
 	});
 
 	it('preserves args in internal redirect when args are present', async () => {
-		const endpoint = 'http://endpoint';
+		const endpoint = '10.0.0.1:8080';
 		globalThis.ngx = {
 			fetch: vi.fn().mockResolvedValue({
 				status: 200,
@@ -97,21 +109,51 @@ describe('getEndpoint', () => {
 			}),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 			args: { a: '1', b: '2' },
 		});
 		await epp.getEndpoint(r);
 		expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2');
 	});
-	it('returns the header-specified endpoints if provided', async () => {
+
+	it('forwards all headers including test headers to EPP', async () => {
+		const endpoint = '10.0.0.1:8080';
+		const fetchMock = vi.fn().mockResolvedValue({
+			status: 200,
+			headers: { get: () => endpoint },
+			text: vi.fn(),
+		});
+		globalThis.ngx = {
+			fetch: fetchMock,
+		};
 		const r = makeRequest({
-			variables: {},
-			headersIn: { 'X-Endpoint-Selector': '10.1.2.3, 10.1.2.4' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
+			headersIn: {
+				'test-epp-endpoint-selection': '10.0.0.1:8080,10.0.0.2:8080',
+				'content-type': 'application/json',
+			},
 		});
 		await epp.getEndpoint(r);
-		expect(r.variables.inference_workload_endpoint).toBe('10.1.2.3,10.1.2.4');
-		expect(r.log).toHaveBeenCalledWith(
-			expect.stringContaining('Using header-specified endpoints'),
+
+		// Verify that all headers (including test header) were forwarded to EPP
+		expect(fetchMock).toHaveBeenCalledWith(
+			'http://127.0.0.1:54800',
+			expect.objectContaining({
+				headers: expect.objectContaining({
+					'test-epp-endpoint-selection': '10.0.0.1:8080,10.0.0.2:8080',
+					'content-type': 'application/json',
+					'X-EPP-Host': 'host',
+					'X-EPP-Port': '1234',
+				}),
+			}),
 		);
 	});
-});
\ No newline at end of file
+});
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index 97ffe61f4b..e02aa3a11e 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -77,6 +77,8 @@ func addBackendRefsToRouteRules(
 
 // addHTTPBackendRefsToRules iterates over the rules of a Route and adds a list of BackendRef to each rule.
 // If a reference in a rule is invalid, the function will add a condition to the rule.
+//
+//nolint:gocyclo
 func addBackendRefsToRules(
 	route *L7Route,
 	refGrantResolver *referenceGrantResolver,
diff --git a/tests/Makefile b/tests/Makefile
index 90a15027e0..f775e9f9b2 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -18,8 +18,8 @@ EXPERIMENTAL_CONFORMANCE_PROFILES = GATEWAY-TLS
 CONFORMANCE_PROFILES = $(STANDARD_CONFORMANCE_PROFILES) # by default we use the standard conformance profiles. If experimental is enabled we override this and add the experimental profiles.
 SKIP_TESTS =
 CEL_TEST_TARGET =
-INFERENCE_SUPPORTED_FEATURES = GatewayFollowingEPPRouting
-INFERENCE_SKIP_TESTS = InferencePoolResolvedRefsCondition, EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService
+INFERENCE_SUPPORTED_FEATURES = GatewayFollowingEPPRouting,EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService
+INFERENCE_SKIP_TESTS = InferencePoolResolvedRefsCondition
 
 # Check if ENABLE_EXPERIMENTAL is true
 ifeq ($(ENABLE_EXPERIMENTAL),true)
@@ -188,7 +188,7 @@ add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-ne
 update-firewall-with-local-ip: ## Update the firewall rule with local IP address
 	./scripts/update-firewall-with-local-ip.sh
 
-HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --skip-schema-validation --set nginxGateway.gwAPIInferenceExtension.enable=$(ENABLE_INFERENCE_EXTENSION) --set nginxGateway.config.logging.level=debug
+HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --set nginxGateway.gwAPIInferenceExtension.enable=$(ENABLE_INFERENCE_EXTENSION) --set nginxGateway.config.logging.level=debug
 
 # this target is used to install the gateway-api CRDs from the main branch (only used in the nightly CI job)
 # it overrides the target in the main Makefile when the GW_API_VERSION is set to main
diff --git a/tests/conformance/conformance_test.go b/tests/conformance/conformance_test.go
index 909a36e09f..f3c363f6eb 100644
--- a/tests/conformance/conformance_test.go
+++ b/tests/conformance/conformance_test.go
@@ -18,7 +18,6 @@ limitations under the License.
 package conformance
 
 import (
-	"fmt"
 	"os"
 	"testing"
 
@@ -96,7 +95,6 @@ func TestConformance(t *testing.T) {
 }
 
 func TestInferenceExtensionConformance(t *testing.T) {
-	g := NewWithT(t)
 
 	t.Logf(`Running inference conformance tests with %s GatewayClass\n cleanup: %t\n`+
 		`debug: %t\n enable all features: %t \n supported extended features: [%v]\n exempt features: [%v]\n`+
@@ -106,9 +104,6 @@ func TestInferenceExtensionConformance(t *testing.T) {
 	)
 
 	opts := inference_conformance.DefaultOptions(t)
-	ipaddressType := v1.IPAddressType
-	opts.UnusableNetworkAddresses = []v1beta1.GatewaySpecAddress{{Type: &ipaddressType, Value: unusableGatewayIPAddress}}
-	opts.UsableNetworkAddresses = []v1beta1.GatewaySpecAddress{{Type: &ipaddressType, Value: "192.0.2.1"}}
 
 	opts.Implementation = conf_v1.Implementation{
 		Organization: "nginx",
@@ -120,12 +115,6 @@ func TestInferenceExtensionConformance(t *testing.T) {
 		},
 	}
 
-	_, err := os.Stat(inferenceBaseManifest)
-	g.Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("base manifest file %s not found", inferenceBaseManifest))
-
-	opts.ManifestFS = append(opts.ManifestFS, os.DirFS("."))
-	opts.BaseManifests = inferenceBaseManifest
-
 	opts.ConformanceProfiles.Insert(inference_conformance.GatewayLayerProfileName)
 	inference_conformance.RunConformanceWithOptions(t, opts)
 }
diff --git a/tests/conformance/manifests/base.yaml b/tests/conformance/manifests/base.yaml
deleted file mode 100644
index fc868800e8..0000000000
--- a/tests/conformance/manifests/base.yaml
+++ /dev/null
@@ -1,394 +0,0 @@
-# Base Kubernetes resources for the Gateway API Inference Extension conformance tests.
-# This includes namespaces and a minimal set of resources (Gateway, Backend)
-# required by many tests. More specific resources should be defined within
-# individual test files or other resource directories (e.g., sample_backends).
-
----
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: gateway-conformance-infra
-  labels:
-    gateway-conformance: infra
----
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: gateway-conformance-app-backend
-  labels:
-    gateway-conformance: backend
----
-# A basic Gateway resource that allows HTTPRoutes from the same namespace.
-# Tests can use this as a parent reference for routes that target InferencePools.
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: conformance-primary
-  namespace: gateway-conformance-infra
-spec:
-  gatewayClassName: "{GATEWAY_CLASS_NAME}"
-  listeners:
-  - name: http
-    port: 80
-    protocol: HTTP
-    allowedRoutes:
-      namespaces:
-        from: All
-      kinds:
-      - group: gateway.networking.k8s.io
-        kind: HTTPRoute
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: conformance-secondary
-  namespace: gateway-conformance-infra
-spec:
-  gatewayClassName: "{GATEWAY_CLASS_NAME}"
-  listeners:
-  - name: http
-    port: 80
-    protocol: HTTP
-    hostname: "secondary.example.com"
-    allowedRoutes:
-      namespaces:
-        from: All
-
-### The following defines the essential resources for the gateway conformance test.
-### All resources are created in the 'gateway-conformance-app-backend' namespace.
----
-# Deploys a mock backend service to act as a model server.
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: primary-inference-model-server-deployment
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: primary-inference-model-server
-spec:
-  replicas: 3
-  selector:
-    matchLabels:
-      app: primary-inference-model-server
-  template:
-    metadata:
-      labels:
-        app: primary-inference-model-server
-    spec:
-      containers:
-      - name: echoserver
-        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
-        ports:
-        - containerPort: 3000
-        readinessProbe:
-          httpGet:
-            path: /
-            port: 3000
-          initialDelaySeconds: 3
-          periodSeconds: 5
-          failureThreshold: 2
-        env:
-        - name: POD_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: POD_IP
-          valueFrom:
-            fieldRef:
-              fieldPath: status.podIP
----
-# Deploys a secondary mock backend service to act as a model server.
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: secondary-inference-model-server-deployment
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: secondary-inference-model-server
-spec:
-  replicas: 3
-  selector:
-    matchLabels:
-      app: secondary-inference-model-server
-  template:
-    metadata:
-      labels:
-        app: secondary-inference-model-server
-    spec:
-      containers:
-      - name: echoserver
-        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
-        ports:
-        - containerPort: 3000
-        readinessProbe:
-          httpGet:
-            path: /
-            port: 3000
-          initialDelaySeconds: 3
-          periodSeconds: 5
-          failureThreshold: 2
-        env:
-        - name: POD_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: POD_IP
-          valueFrom:
-            fieldRef:
-              fieldPath: status.podIP
----
-# --- Primary InferencePool Definition ---
-apiVersion: inference.networking.k8s.io/v1
-kind: InferencePool
-metadata:
-  name: primary-inference-pool
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    matchLabels:
-      app: primary-inference-model-server
-  targetPorts:
-    - number: 3000
-  endpointPickerRef:
-    name: primary-endpoint-picker-svc
-    port:
-      number: 9002
----
-# --- Primary Conformance EPP service Definition ---
-apiVersion: v1
-kind: Service
-metadata:
-  name: primary-endpoint-picker-svc
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    app: primary-app-backend-epp
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-      appProtocol: http2
-  type: ClusterIP
----
-# --- Primary Conformance EPP Deployment ---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: primary-app-endpoint-picker
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: primary-app-backend-epp
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: primary-app-backend-epp
-  template:
-    metadata:
-      labels:
-        app: primary-app-backend-epp
-    spec:
-      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
-      terminationGracePeriodSeconds: 130
-      containers:
-      - name: epp
-        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
-        imagePullPolicy: Always
-        args:
-        - --pool-name
-        - "primary-inference-pool"
-        - --pool-namespace
-        - "gateway-conformance-app-backend"
-        - --v
-        - "4"
-        - --zap-encoder
-        - "json"
-        - --grpc-port
-        - "9002"
-        - --grpc-health-port
-        - "9003"
-        - "--config-file"
-        - "/config/conformance-plugins.yaml"
-        env:
-        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
-          value: "true"
-        ports:
-        - containerPort: 9002
-        - containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        livenessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        volumeMounts:
-        - name: plugins-config-volume
-          mountPath: "/config"
-      volumes:
-      - name: plugins-config-volume
-        configMap:
-          name: plugins-config
----
-# --- Secondary InferencePool Definition ---
-apiVersion: inference.networking.k8s.io/v1
-kind: InferencePool
-metadata:
-  name: secondary-inference-pool
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    matchLabels:
-      app: secondary-inference-model-server
-  targetPorts:
-    - number: 3000
-  endpointPickerRef:
-    name: secondary-endpoint-picker-svc
-    failureMode: FailOpen
-    port:
-      number: 9002
----
-# --- Secondary Conformance EPP service Definition ---
-apiVersion: v1
-kind: Service
-metadata:
-  name: secondary-endpoint-picker-svc
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    app: secondary-app-backend-epp
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-      appProtocol: http2
-  type: ClusterIP
----
-# --- Secondary Conformance EPP Deployment ---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: secondary-app-endpoint-picker
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: secondary-app-backend-epp
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: secondary-app-backend-epp
-  template:
-    metadata:
-      labels:
-        app: secondary-app-backend-epp
-    spec:
-      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
-      terminationGracePeriodSeconds: 130
-      containers:
-      - name: epp
-        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
-        imagePullPolicy: Always
-        args:
-        - --pool-name
-        - "secondary-inference-pool"
-        - --pool-namespace
-        - "gateway-conformance-app-backend"
-        - --v
-        - "4"
-        - --zap-encoder
-        - "json"
-        - --grpc-port
-        - "9002"
-        - --grpc-health-port
-        - "9003"
-        - "--config-file"
-        - "/config/conformance-plugins.yaml"
-        env:
-        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
-          value: "true"
-        ports:
-        - containerPort: 9002
-        - containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        livenessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        volumeMounts:
-        - name: plugins-config-volume
-          mountPath: "/config"
-      volumes:
-      - name: plugins-config-volume
-        configMap:
-          name: plugins-config
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: plugins-config
-  namespace: gateway-conformance-app-backend
-data:
-  conformance-plugins.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
-    kind: EndpointPickerConfig
-    plugins:
-    - type: header-based-testing-filter
-    schedulingProfiles:
-    - name: conformance-profile
-      plugins:
-      - pluginRef: header-based-testing-filter
----
-# --- Required Role and RoleBinding for Conformance Test for EPP ---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: inference-model-reader
-  namespace: gateway-conformance-app-backend
-rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferenceobjectives", "inferencepools"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "list", "watch"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: epp-to-inference-model-reader
-  namespace: gateway-conformance-app-backend
-subjects:
-- kind: ServiceAccount
-  name: default
-  namespace: gateway-conformance-app-backend
-roleRef:
-  kind: Role
-  name: inference-model-reader
-  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
diff --git a/tests/conformance/manifests/epp-routing.yaml b/tests/conformance/manifests/epp-routing.yaml
deleted file mode 100644
index 14be8eee65..0000000000
--- a/tests/conformance/manifests/epp-routing.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-for-primary-gw
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "primary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-    matches:
-    - path:
-        type: PathPrefix
-        value: /primary-gateway-test
\ No newline at end of file
diff --git a/tests/conformance/manifests/gateway.yaml b/tests/conformance/manifests/gateway.yaml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/output.txt b/tests/output.txt
deleted file mode 100644
index b74e00397d..0000000000
--- a/tests/output.txt
+++ /dev/null
@@ -1,539 +0,0 @@
-=== RUN   TestConformance
-    conformance_test.go:50: Running conformance tests with nginx GatewayClass\n cleanup: true\ndebug: false\n enable all features: false \n supported extended features: []\n exempt features: []\nconformance profiles: []\n skip tests: [InferencePoolResolvedRefsCondition,]
-    conformance_test.go:75: 
-        Unexpected error:
-            <*errors.errorString | 0x400005e220>: 
-            no conformance profile, supported features, explicit tests were provided so no tests could be selected
-            {
-                s: "no conformance profile, supported features, explicit tests were provided so no tests could be selected",
-            }
-        occurred
---- FAIL: TestConformance (0.00s)
-=== RUN   TestInferenceExtensionConformance
-    conformance_test.go:101: Running inference conformance tests with nginx GatewayClass\n cleanup: true\ndebug: false\n enable all features: false \n supported extended features: []\n exempt features: []\nskip tests: [InferencePoolResolvedRefsCondition,]
-    conformance_test.go:108: Registering API types with scheme...
-    conformance_test.go:108: Attempting to install inferencev1alpha2 types into scheme from package: inference.networking.x-k8s.io
-    conformance_test.go:108: Attempting to install inferencev1 types into scheme from package: inference.networking.k8s.io
-    conformance_test.go:130: Running Inference Extension conformance tests with GatewayClass nginx
-    conformance.go:249: 2025-10-14T01:03:17.615900047Z: Test Setup: Ensuring GatewayClass has been accepted
-    conformance.go:255: 2025-10-14T01:03:17.624521922Z: Test Setup: Applying base manifests
-    apply.go:275: 2025-10-14T01:03:17.629990464Z: Creating gateway-conformance-infra Namespace
-    apply.go:275: 2025-10-14T01:03:17.668296422Z: Creating gateway-conformance-app-backend Namespace
-    apply.go:275: 2025-10-14T01:03:17.68502838Z: Creating conformance-primary Gateway
-    apply.go:275: 2025-10-14T01:03:17.700455672Z: Creating conformance-secondary Gateway
-    apply.go:275: 2025-10-14T01:03:17.720530089Z: Creating primary-inference-model-server-deployment Deployment
-    apply.go:275: 2025-10-14T01:03:17.734617255Z: Creating secondary-inference-model-server-deployment Deployment
-    apply.go:275: 2025-10-14T01:03:17.747788672Z: Creating primary-inference-pool InferencePool
-    apply.go:275: 2025-10-14T01:03:17.763545755Z: Creating primary-endpoint-picker-svc Service
-    apply.go:275: 2025-10-14T01:03:17.797173297Z: Creating primary-app-endpoint-picker Deployment
-    apply.go:275: 2025-10-14T01:03:17.855388922Z: Creating secondary-inference-pool InferencePool
-    apply.go:275: 2025-10-14T01:03:17.912740089Z: Creating secondary-endpoint-picker-svc Service
-    apply.go:275: 2025-10-14T01:03:17.965884089Z: Creating secondary-app-endpoint-picker Deployment
-    apply.go:275: 2025-10-14T01:03:17.988395839Z: Creating plugins-config ConfigMap
-    apply.go:275: 2025-10-14T01:03:18.008423881Z: Creating inference-model-reader Role
-    apply.go:275: 2025-10-14T01:03:18.026004839Z: Creating epp-to-inference-model-reader RoleBinding
-    conformance.go:258: 2025-10-14T01:03:18.125679214Z: Test Setup: Ensuring Gateways and Pods from base manifests are ready
-    helpers.go:216: 2025-10-14T01:03:18.128281714Z: Gateway gateway-conformance-infra/conformance-primary expected observedGeneration to be updated to 1 for all conditions, only 0/2 were updated. stale conditions are: Accepted (generation 0), Programmed (generation 0)
-    helpers.go:240: 2025-10-14T01:03:19.138385881Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:19.138570048Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:19.138578256Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:20.135867882Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:20.135924132Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:20.135938465Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:21.135367007Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:21.135453382Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:21.135469924Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:22.138447632Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:22.138600674Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:22.138612882Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:23.140596008Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:23.140670675Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:23.140684591Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:24.132289342Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:24.132335592Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:24.132342883Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:25.145361926Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:25.145474134Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:25.145500634Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:26.141522593Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:26.141676259Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:26.141727134Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:27.141143468Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:27.141202718Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:27.141216177Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:28.131819885Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:28.131871677Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:28.131878719Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:29.133710844Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:29.133776219Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:29.133784011Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:30.131837136Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:30.131903303Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:30.13191447Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:31.134555012Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:31.134599178Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:31.134607595Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:32.133003804Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:32.133049721Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:32.133060096Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:33.136018554Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:33.136180346Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:33.136208554Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:34.141493597Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:34.141638847Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:34.141660013Z: Pod gateway-conformance-infra/conformance-primary-nginx-b9df44574-686f9 not ready yet
-    helpers.go:240: 2025-10-14T01:03:35.140798972Z: Ready condition set to False, expected True
-    helpers.go:240: 2025-10-14T01:03:35.14086293Z: Ready was not in conditions list
-    helpers.go:243: 2025-10-14T01:03:35.14087168Z: Pod gateway-conformance-app-backend/primary-app-endpoint-picker-7579b47bc6-rtt47 not ready yet
-    helpers.go:248: 2025-10-14T01:03:36.135304833Z: Gateways and Pods in gateway-conformance-infra, gateway-conformance-app-backend namespaces ready
-    conformance.go:265: Attempting to fetch Gateway gateway-conformance-infra/conformance-primary.
-    conformance.go:306: Successfully fetched Gateway gateway-conformance-infra/conformance-primary. Spec.GatewayClassName: nginx
-    conformance.go:265: Shared Gateway gateway-conformance-infra/conformance-primary is ready.
-    conformance.go:266: Attempting to fetch Gateway gateway-conformance-infra/conformance-secondary.
-    conformance.go:306: Successfully fetched Gateway gateway-conformance-infra/conformance-secondary. Spec.GatewayClassName: nginx
-    conformance.go:266: Shared Gateway gateway-conformance-infra/conformance-secondary is ready.
-    conformance_test.go:130: Running Inference Extension conformance tests against all registered tests
-=== RUN   TestInferenceExtensionConformance/EppUnAvailableFailOpen
-    conformance.go:72: 2025-10-14T01:03:36.164248292Z: Applying tests/epp_unavailable_fail_open.yaml
-    apply.go:275: 2025-10-14T01:03:36.175129875Z: Creating httproute-for-failopen-pool-gw HTTPRoute
-    conformance.go:77: 2025-10-14T01:03:36.182051833Z: Running EppUnAvailableFailOpen, relying on the following features: -, Gateway-standard
-    epp_unavailable_fail_open.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw to be Accepted by Gateway gateway-conformance-infra/conformance-secondary
-    epp_unavailable_fail_open.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
-    epp_unavailable_fail_open.go:60: HTTPRoute gateway-conformance-app-backend/httproute-for-failopen-pool-gw is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
-    epp_unavailable_fail_open.go:61: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    epp_unavailable_fail_open.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    epp_unavailable_fail_open.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    epp_unavailable_fail_open.go:62: Waiting for Gateway gateway-conformance-infra/conformance-secondary to get an address...
-    epp_unavailable_fail_open.go:62: Gateway gateway-conformance-infra/conformance-secondary has address: 10.96.165.14:80
-    epp_unavailable_fail_open.go:64: Searching for Pods with labels map[app:secondary-inference-model-server] in namespace gateway-conformance-app-backend
-=== RUN   TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_1:_Verify_baseline_connectivity_with_EPP_available
-    epp_unavailable_fail_open.go:71: Sending request to ensure the Gateway and EPP are working correctly...
-    traffic.go:151: 2025-10-14T01:03:37.21868725Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:37.402427709Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 3.292µs)
-    traffic.go:151: 2025-10-14T01:03:38.403328918Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:38.426142084Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:38.446418001Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 1.207650709s)
-    traffic.go:151: 2025-10-14T01:03:39.448027043Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:39.486648835Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:39.513615501Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 2.268142335s)
-    traffic.go:151: 2025-10-14T01:03:40.517491502Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:40.56746671Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 3.298903335s)
-    traffic.go:151: 2025-10-14T01:03:41.569563961Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:41.606558586Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 4.350976127s)
-    traffic.go:151: 2025-10-14T01:03:42.60780142Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:42.626555045Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 5.389276795s)
-    traffic.go:151: 2025-10-14T01:03:43.627828253Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:43.69536142Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:43.723475462Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 6.476841628s)
-    traffic.go:151: 2025-10-14T01:03:44.724507212Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:44.771940629Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 7.505984296s)
-    traffic.go:151: 2025-10-14T01:03:45.772548504Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:45.790925171Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 8.553996546s)
-    traffic.go:151: 2025-10-14T01:03:46.791878463Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:46.812035797Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:46.833224713Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 9.593544797s)
-    traffic.go:151: 2025-10-14T01:03:47.83441863Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:47.857052464Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 10.615884797s)
-    traffic.go:151: 2025-10-14T01:03:48.859770131Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:48.897930881Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:48.913112131Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 11.679430214s)
-    traffic.go:151: 2025-10-14T01:03:49.914456215Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:49.970198381Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 12.69590984s)
-    traffic.go:151: 2025-10-14T01:03:50.971954632Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:51.013194465Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 13.753366382s)
-    traffic.go:151: 2025-10-14T01:03:52.014606882Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:52.057845091Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:52.084620507Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 14.839337549s)
-    traffic.go:151: 2025-10-14T01:03:53.085642425Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:53.1301423Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:53.168473466Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:53.205107341Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 15.949976216s)
-    traffic.go:151: 2025-10-14T01:03:54.20599155Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:54.241487258Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:54.261202842Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:54.29185805Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 17.042710592s)
-    traffic.go:151: 2025-10-14T01:03:55.292574301Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:55.312207217Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 18.073995801s)
-    traffic.go:151: 2025-10-14T01:03:56.312476468Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:56.331264593Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 19.093928343s)
-    traffic.go:151: 2025-10-14T01:03:57.331577593Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:57.348852843Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:03:57.371795468Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:57.386972552Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-q55tl (after 20.15330376s)
-    traffic.go:151: 2025-10-14T01:03:58.38759601Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:58.422928094Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 21.169056844s)
-    traffic.go:151: 2025-10-14T01:03:59.423453969Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:163: 2025-10-14T01:03:59.447749344Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.165.14 Path:/failopen-pool-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: secondary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.164]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with secondary-inference-model-server-deployment-659949dbb9-hqnjj, got secondary-inference-model-server-deployment-659949dbb9-lrq7h (after 22.204941553s)
-    traffic.go:151: 2025-10-14T01:04:00.449553011Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:04:00.47849147Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:04:00.515535511Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:169: 2025-10-14T01:04:00.540372178Z: Request passed
-=== RUN   TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_2:_Verify_fail-open_behavior_after_EPP_becomes_unavailable
-    epp_unavailable_fail_open.go:92: Making EPP service gateway-conformance-app-backend/primary-endpoint-picker-svc unavailable...
-    epp_unavailable_fail_open.go:94: Making Service gateway-conformance-app-backend/primary-endpoint-picker-svc unavailable by modifying its selector...
-    epp_unavailable_fail_open.go:94: Waiting for EndpointSlices of Service gateway-conformance-app-backend/primary-endpoint-picker-svc to become empty...
-    epp_unavailable_fail_open.go:94: Successfully modified selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc
-    epp_unavailable_fail_open.go:98: Sending request again, expecting success to verify fail-open...
-    traffic.go:151: 2025-10-14T01:04:01.563349637Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:04:01.647647095Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:151: 2025-10-14T01:04:01.696854387Z: Making POST request to http://10.96.165.14/failopen-pool-test
-    traffic.go:169: 2025-10-14T01:04:01.720366262Z: Request passed
-    epp_unavailable_fail_open.go:95: Restoring original selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc...
-    epp_unavailable_fail_open.go:95: Waiting for EndpointSlices of Service gateway-conformance-app-backend/primary-endpoint-picker-svc to be restored...
-    epp_unavailable_fail_open.go:95: Successfully restored selector for Service gateway-conformance-app-backend/primary-endpoint-picker-svc
-=== NAME  TestInferenceExtensionConformance/EppUnAvailableFailOpen
-    apply.go:283: 2025-10-14T01:04:02.737540721Z: Deleting httproute-for-failopen-pool-gw HTTPRoute
-=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting
-    conformance.go:72: 2025-10-14T01:04:02.776777596Z: Applying tests/gateway_following_epp_routing.yaml
-    apply.go:275: 2025-10-14T01:04:02.785686221Z: Creating httproute-for-primary-gw HTTPRoute
-    conformance.go:77: 2025-10-14T01:04:02.796671638Z: Running GatewayFollowingEPPRouting, relying on the following features: -, Gateway-standard
-    gateway_following_epp_routing.go:64: Verifying HTTPRoute and InferencePool are accepted and the Gateway has an address.
-    gateway_following_epp_routing.go:65: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    gateway_following_epp_routing.go:65: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    gateway_following_epp_routing.go:65: HTTPRoute gateway-conformance-app-backend/httproute-for-primary-gw is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    gateway_following_epp_routing.go:66: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    gateway_following_epp_routing.go:66: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    gateway_following_epp_routing.go:66: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    gateway_following_epp_routing.go:67: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
-    gateway_following_epp_routing.go:67: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
-    gateway_following_epp_routing.go:69: Fetching backend pods with labels: map[app:primary-inference-model-server]
-    gateway_following_epp_routing.go:70: Searching for Pods with labels map[app:primary-inference-model-server] in namespace gateway-conformance-app-backend
-    traffic.go:151: 2025-10-14T01:04:03.813018971Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:04.039845221Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.084µs)
-    traffic.go:151: 2025-10-14T01:04:05.041253555Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:05.066757055Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:05.112700555Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 1.253732668s)
-    traffic.go:151: 2025-10-14T01:04:06.115200041Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:06.151358708Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:06.165424958Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:06.179929833Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.351770292s)
-    traffic.go:151: 2025-10-14T01:04:07.181082125Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:07.216399792Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 3.367397543s)
-    traffic.go:151: 2025-10-14T01:04:08.224429001Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:08.301275626Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 4.410123418s)
-    traffic.go:151: 2025-10-14T01:04:09.303616793Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:09.326721918Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 5.48990646s)
-    traffic.go:151: 2025-10-14T01:04:10.330729919Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:10.377394335Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:10.394770752Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 6.563737044s)
-    traffic.go:151: 2025-10-14T01:04:11.396055002Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:11.417175669Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:11.447515169Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 7.603522086s)
-    traffic.go:151: 2025-10-14T01:04:12.449247503Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:12.470789711Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:12.484345378Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:12.521570711Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 8.67069217s)
-    traffic.go:151: 2025-10-14T01:04:13.522841003Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:13.56501492Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:13.582837795Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 9.751359046s)
-    traffic.go:151: 2025-10-14T01:04:14.585632671Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:14.627992629Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 10.771919296s)
-    traffic.go:151: 2025-10-14T01:04:15.629393879Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:15.659107754Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:15.694903963Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 11.845439422s)
-    traffic.go:151: 2025-10-14T01:04:16.695987088Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:16.743231505Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 12.882275339s)
-    traffic.go:151: 2025-10-14T01:04:17.745030589Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:17.767151755Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 13.931277214s)
-    traffic.go:151: 2025-10-14T01:04:18.769121548Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:18.790365589Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 14.955418298s)
-    traffic.go:151: 2025-10-14T01:04:19.791362131Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:19.810897965Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 15.97767709s)
-    traffic.go:151: 2025-10-14T01:04:20.812557174Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:20.83321659Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:20.863946507Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 17.019561633s)
-    traffic.go:151: 2025-10-14T01:04:21.865297716Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:21.911503632Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 18.05145005s)
-    traffic.go:151: 2025-10-14T01:04:22.912107008Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:22.929975466Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 19.0983753s)
-    traffic.go:151: 2025-10-14T01:04:23.931500008Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:23.959583758Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 20.117819676s)
-    traffic.go:151: 2025-10-14T01:04:24.962713175Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:25.073250092Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:25.090582551Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:25.106454801Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 21.276928135s)
-    traffic.go:151: 2025-10-14T01:04:26.107014259Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:26.146236176Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 22.293346385s)
-    traffic.go:151: 2025-10-14T01:04:27.147186052Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:27.177865218Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 23.333471094s)
-    traffic.go:151: 2025-10-14T01:04:28.179942719Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:28.219684177Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 24.366152219s)
-    traffic.go:151: 2025-10-14T01:04:29.221286053Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:29.243604803Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 25.407605928s)
-    traffic.go:151: 2025-10-14T01:04:30.24408472Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:30.289387886Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 26.43041297s)
-    traffic.go:151: 2025-10-14T01:04:31.290334345Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:31.341147804Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.166]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-glbzw, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 27.476604596s)
-    traffic.go:151: 2025-10-14T01:04:32.342085846Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:32.392231304Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:32.438599596Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:169: 2025-10-14T01:04:32.470273637Z: Request passed
-    traffic.go:151: 2025-10-14T01:04:32.470374096Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:32.484053054Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.375µs)
-    traffic.go:151: 2025-10-14T01:04:33.487824888Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:33.574704471Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 1.016638084s)
-    traffic.go:151: 2025-10-14T01:04:34.576959555Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:34.633173013Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 2.106494126s)
-    traffic.go:151: 2025-10-14T01:04:35.634988583Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:35.693747958Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 3.164699876s)
-    traffic.go:151: 2025-10-14T01:04:36.694937Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:36.750264125Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:36.769162667Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 4.280011127s)
-    traffic.go:151: 2025-10-14T01:04:37.771181417Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:37.810051584Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 5.300907877s)
-    traffic.go:151: 2025-10-14T01:04:38.811659626Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:38.866971584Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 6.341297544s)
-    traffic.go:151: 2025-10-14T01:04:39.875429085Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:39.923348543Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:39.94242846Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:39.962341293Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 7.472177295s)
-    traffic.go:151: 2025-10-14T01:04:40.963085085Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:41.043185794Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-rgblk (after 8.492794129s)
-    traffic.go:151: 2025-10-14T01:04:42.044456419Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:42.083978294Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 9.574119338s)
-    traffic.go:151: 2025-10-14T01:04:43.088392128Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:43.149567628Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 10.618079171s)
-    traffic.go:151: 2025-10-14T01:04:44.151589712Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:44.208832087Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:44.225526295Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.170]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-lw9lc, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 11.738426089s)
-    traffic.go:151: 2025-10-14T01:04:45.227075713Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:45.281433338Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:45.300640921Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:169: 2025-10-14T01:04:45.325600254Z: Request passed
-    traffic.go:151: 2025-10-14T01:04:45.325758588Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:45.344870171Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 4.625µs)
-    traffic.go:151: 2025-10-14T01:04:46.347018421Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:46.378773546Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 1.021176001s)
-    traffic.go:151: 2025-10-14T01:04:47.379988464Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:47.40334288Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 2.054202001s)
-    traffic.go:151: 2025-10-14T01:04:48.407456256Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:48.467759131Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:48.485551464Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 3.141919877s)
-    traffic.go:151: 2025-10-14T01:04:49.486317631Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:49.514445923Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 4.160485294s)
-    traffic.go:151: 2025-10-14T01:04:50.521837465Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:50.609384007Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:50.64858034Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 5.283617628s)
-    traffic.go:151: 2025-10-14T01:04:51.651374757Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:51.695010841Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 6.325408795s)
-    traffic.go:151: 2025-10-14T01:04:52.699581091Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:52.755892383Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 7.371361879s)
-    traffic.go:151: 2025-10-14T01:04:53.757197717Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:53.779424133Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-glbzw (after 8.431435004s)
-    traffic.go:151: 2025-10-14T01:04:54.782000759Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:54.8078453Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 9.45622888s)
-    traffic.go:151: 2025-10-14T01:04:55.809546051Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:163: 2025-10-14T01:04:55.861264926Z: Response expectation failed for request: {URL: {Scheme:http Opaque: User: Host:10.96.88.207 Path:/primary-gateway-test RawPath: OmitHost:false ForceQuery:false RawQuery: Fragment: RawFragment:}, Host: primary.example.com, Protocol: HTTP, Method: POST, Headers: map[X-Echo-Set-Header:[] test-epp-endpoint-selection:[10.244.0.165]], UnfollowRedirect: false, Server: , CertPem: <truncated>, KeyPem: <truncated>}  not ready yet: expected pod name to start with primary-inference-model-server-deployment-66659cd5bf-rgblk, got primary-inference-model-server-deployment-66659cd5bf-lw9lc (after 10.483662172s)
-    traffic.go:151: 2025-10-14T01:04:56.862348593Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:56.920280093Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:151: 2025-10-14T01:04:56.948710968Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    traffic.go:169: 2025-10-14T01:04:56.966484301Z: Request passed
-=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_a_single_designated_pod
-    gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.165'
-    gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-rgblk]
-    gateway_following_epp_routing.go:140: 2025-10-14T01:04:56.96718176Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    gateway_following_epp_routing.go:140: Not all the requests are sent to the expectedPods successfully, err: request was handled by an unexpected pod "primary-inference-model-server-deployment-66659cd5bf-lw9lc"
-=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_two_designated_pods
-    gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.166,10.244.0.170'
-    gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc]
-    gateway_following_epp_routing.go:140: 2025-10-14T01:04:57.378579052Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    gateway_following_epp_routing.go:140: Not all the requests are sent to the expectedPods successfully, err: request was handled by an unexpected pod "primary-inference-model-server-deployment-66659cd5bf-rgblk"
-=== RUN   TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_all_available_pods
-    gateway_following_epp_routing.go:137: Sending request to 10.96.88.207:80 with EPP header 'test-epp-endpoint-selection: 10.244.0.166,10.244.0.170,10.244.0.165'
-    gateway_following_epp_routing.go:138: Expecting traffic to be routed to pod: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc primary-inference-model-server-deployment-66659cd5bf-rgblk]
-    gateway_following_epp_routing.go:140: 2025-10-14T01:04:57.736901843Z: Making POST request to http://10.96.88.207/primary-gateway-test
-    gateway_following_epp_routing.go:140: Traffic successfully reached only to expected pods: [primary-inference-model-server-deployment-66659cd5bf-glbzw primary-inference-model-server-deployment-66659cd5bf-lw9lc primary-inference-model-server-deployment-66659cd5bf-rgblk]
-=== NAME  TestInferenceExtensionConformance/GatewayFollowingEPPRouting
-    apply.go:283: 2025-10-14T01:04:58.127986844Z: Deleting httproute-for-primary-gw HTTPRoute
-=== RUN   TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef
-    conformance.go:72: 2025-10-14T01:04:58.144944469Z: Applying tests/httproute_invalid_inferencepool_ref.yaml
-    apply.go:275: 2025-10-14T01:04:58.154912969Z: Creating httproute-to-non-existent-pool HTTPRoute
-    conformance.go:77: 2025-10-14T01:04:58.166170635Z: Running HTTPRouteInvalidInferencePoolRef, relying on the following features: -, Gateway-standard
-=== RUN   TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef/HTTPRoute_should_have_Accepted=True_and_ResolvedRefs=False_for_non-existent_InferencePool
-    httproute_invalid_inferencepool_ref.go:63: Successfully verified HTTPRoute gateway-conformance-app-backend/httproute-to-non-existent-pool has conditions: Accepted=True and ResolvedRefs=False (Reason: BackendNotFound) for Gateway gateway-conformance-infra/conformance-primary
-=== NAME  TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef
-    apply.go:283: 2025-10-14T01:04:59.182424011Z: Deleting httproute-to-non-existent-pool HTTPRoute
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools
-    conformance.go:72: 2025-10-14T01:04:59.196436219Z: Applying tests/httproute_multiple_gateways_different_pools.yaml
-    apply.go:275: 2025-10-14T01:04:59.204446219Z: Creating route-for-primary-gateway HTTPRoute
-    apply.go:275: 2025-10-14T01:04:59.212459761Z: Creating route-for-secondary-gateway HTTPRoute
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Primary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic
-    httproute_multiple_gateways_different_pools.go:58: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    httproute_multiple_gateways_different_pools.go:58: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    httproute_multiple_gateways_different_pools.go:58: HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    httproute_multiple_gateways_different_pools.go:58: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    httproute_multiple_gateways_different_pools.go:58: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    httproute_multiple_gateways_different_pools.go:58: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    httproute_multiple_gateways_different_pools.go:58: Successfully verified: HTTPRoute gateway-conformance-app-backend/route-for-primary-gateway (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/primary-inference-pool is RouteAccepted.
-    httproute_multiple_gateways_different_pools.go:66: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
-    httproute_multiple_gateways_different_pools.go:66: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
-    traffic.go:151: 2025-10-14T01:05:00.230920428Z: Making GET request to http://10.96.88.207/test-primary-gateway
-    traffic.go:151: 2025-10-14T01:05:00.23344447Z: Making GET request to http://10.96.88.207/test-primary-gateway
-    traffic.go:151: 2025-10-14T01:05:00.234922095Z: Making GET request to http://10.96.88.207/test-primary-gateway
-    traffic.go:169: 2025-10-14T01:05:00.23615922Z: Request passed
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Secondary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic
-    httproute_multiple_gateways_different_pools.go:84: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway to be Accepted by Gateway gateway-conformance-infra/conformance-secondary
-    httproute_multiple_gateways_different_pools.go:84: Waiting for HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
-    httproute_multiple_gateways_different_pools.go:84: HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-secondary
-    httproute_multiple_gateways_different_pools.go:84: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    httproute_multiple_gateways_different_pools.go:84: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    httproute_multiple_gateways_different_pools.go:84: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    httproute_multiple_gateways_different_pools.go:84: Successfully verified: HTTPRoute gateway-conformance-app-backend/route-for-secondary-gateway (Gateway gateway-conformance-infra/conformance-secondary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/secondary-inference-pool is RouteAccepted.
-    httproute_multiple_gateways_different_pools.go:92: Waiting for Gateway gateway-conformance-infra/conformance-secondary to get an address...
-    httproute_multiple_gateways_different_pools.go:92: Gateway gateway-conformance-infra/conformance-secondary has address: 10.96.165.14:80
-    traffic.go:151: 2025-10-14T01:05:00.247583886Z: Making GET request to http://10.96.165.14/test-secondary-gateway
-    traffic.go:151: 2025-10-14T01:05:00.248908345Z: Making GET request to http://10.96.165.14/test-secondary-gateway
-    traffic.go:151: 2025-10-14T01:05:00.249860136Z: Making GET request to http://10.96.165.14/test-secondary-gateway
-    traffic.go:169: 2025-10-14T01:05:00.250706761Z: Request passed
-=== NAME  TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools
-    apply.go:283: 2025-10-14T01:05:00.250741053Z: Deleting route-for-secondary-gateway HTTPRoute
-    apply.go:283: 2025-10-14T01:05:00.256212345Z: Deleting route-for-primary-gateway HTTPRoute
-=== RUN   TestInferenceExtensionConformance/InferencePoolAccepted
-    conformance.go:72: 2025-10-14T01:05:00.261789928Z: Applying tests/inferencepool_accepted.yaml
-    apply.go:275: 2025-10-14T01:05:00.268909345Z: Creating httproute-for-inferencepool-accepted HTTPRoute
-    conformance.go:77: 2025-10-14T01:05:00.283944303Z: Running InferencePoolAccepted, relying on the following features: -, Gateway-standard
-=== RUN   TestInferenceExtensionConformance/InferencePoolAccepted/InferencePool_should_have_Accepted_condition_set_to_True
-    inferencepool_accepted.go:54: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True
-=== NAME  TestInferenceExtensionConformance/InferencePoolAccepted
-    apply.go:283: 2025-10-14T01:05:00.290032886Z: Deleting httproute-for-inferencepool-accepted HTTPRoute
-=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation
-    conformance.go:72: 2025-10-14T01:05:00.296340053Z: Applying tests/inferencepool_httproute_port_validation.yaml
-    apply.go:275: 2025-10-14T01:05:00.31779672Z: Creating httproute-pool-port-unspecified HTTPRoute
-    apply.go:275: 2025-10-14T01:05:00.328601886Z: Creating httproute-pool-port-matching HTTPRoute
-    apply.go:275: 2025-10-14T01:05:00.336310345Z: Creating httproute-pool-port-non-matching HTTPRoute
-    conformance.go:77: 2025-10-14T01:05:00.352536553Z: Running InferencePoolHTTPRoutePortValidation, relying on the following features: -, Gateway-standard
-    inferencepool_httproute_port_validation.go:47: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
-    inferencepool_httproute_port_validation.go:47: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
-=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_1:_HTTPRoute_backendRef_to_InferencePool_with_Port_Unspecified
-    inferencepool_httproute_port_validation.go:54: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:54: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:54: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-unspecified is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:55: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_httproute_port_validation.go:55: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    inferencepool_httproute_port_validation.go:55: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    traffic.go:151: 2025-10-14T01:05:02.364763929Z: Making GET request to http://10.96.88.207/test-port-unspecified
-    traffic.go:151: 2025-10-14T01:05:02.366493929Z: Making GET request to http://10.96.88.207/test-port-unspecified
-    traffic.go:151: 2025-10-14T01:05:02.367530596Z: Making GET request to http://10.96.88.207/test-port-unspecified
-    traffic.go:169: 2025-10-14T01:05:02.368427012Z: Request passed
-=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_2:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Matching
-    inferencepool_httproute_port_validation.go:76: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:76: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:76: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-matching is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:77: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_httproute_port_validation.go:77: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    inferencepool_httproute_port_validation.go:77: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    traffic.go:151: 2025-10-14T01:05:02.375146137Z: Making GET request to http://10.96.88.207/test-port-matching
-    traffic.go:151: 2025-10-14T01:05:02.376380554Z: Making GET request to http://10.96.88.207/test-port-matching
-    traffic.go:151: 2025-10-14T01:05:02.377616304Z: Making GET request to http://10.96.88.207/test-port-matching
-    traffic.go:169: 2025-10-14T01:05:02.378851304Z: Request passed
-=== RUN   TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_3:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Non-Matching._Request_still_passing_because_HTTP_Port_is_ignored_when_inferencePool_is_backendRef
-    inferencepool_httproute_port_validation.go:99: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:99: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:99: HTTPRoute gateway-conformance-app-backend/httproute-pool-port-non-matching is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_httproute_port_validation.go:100: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_httproute_port_validation.go:100: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    inferencepool_httproute_port_validation.go:100: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    traffic.go:151: 2025-10-14T01:05:02.384983429Z: Making GET request to http://10.96.88.207/test-port-non-matching
-    traffic.go:151: 2025-10-14T01:05:02.386198887Z: Making GET request to http://10.96.88.207/test-port-non-matching
-    traffic.go:151: 2025-10-14T01:05:02.387009179Z: Making GET request to http://10.96.88.207/test-port-non-matching
-    traffic.go:169: 2025-10-14T01:05:02.388009429Z: Request passed
-=== NAME  TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation
-    apply.go:283: 2025-10-14T01:05:02.388062512Z: Deleting httproute-pool-port-non-matching HTTPRoute
-    apply.go:283: 2025-10-14T01:05:02.392225554Z: Deleting httproute-pool-port-matching HTTPRoute
-    apply.go:283: 2025-10-14T01:05:02.396365179Z: Deleting httproute-pool-port-unspecified HTTPRoute
-=== RUN   TestInferenceExtensionConformance/InferencePoolInvalidEPPService
-    conformance.go:72: 2025-10-14T01:05:02.402896512Z: Applying tests/inferencepool_invalid_epp_service.yaml
-    apply.go:275: 2025-10-14T01:05:02.415857471Z: Creating pool-with-invalid-epp InferencePool
-    apply.go:275: 2025-10-14T01:05:02.422569762Z: Creating httproute-for-invalid-epp-pool HTTPRoute
-    conformance.go:77: 2025-10-14T01:05:02.435913971Z: Running InferencePoolInvalidEPPService, relying on the following features: Gateway-standard, HTTPRoute-standard, -
-    inferencepool_invalid_epp_service.go:55: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
-    inferencepool_invalid_epp_service.go:55: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
-=== RUN   TestInferenceExtensionConformance/InferencePoolInvalidEPPService/InferecePool_has_a_ResolvedRefs_Condition_with_status_False
-    inferencepool_invalid_epp_service.go:68: InferencePool gateway-conformance-app-backend/pool-with-invalid-epp successfully has condition Type=ResolvedRefs, Status=False
-=== RUN   TestInferenceExtensionConformance/InferencePoolInvalidEPPService/Request_to_a_route_with_an_invalid_backend_reference_receives_a_500_response
-    traffic.go:151: 2025-10-14T01:05:03.466323221Z: Making GET request to http://10.96.88.207/invalid-epp-test
-    traffic.go:151: 2025-10-14T01:05:03.467345763Z: Making GET request to http://10.96.88.207/invalid-epp-test
-    traffic.go:151: 2025-10-14T01:05:03.468300846Z: Making GET request to http://10.96.88.207/invalid-epp-test
-    traffic.go:169: 2025-10-14T01:05:03.469222846Z: Request passed
-=== NAME  TestInferenceExtensionConformance/InferencePoolInvalidEPPService
-    apply.go:283: 2025-10-14T01:05:03.469294346Z: Deleting httproute-for-invalid-epp-pool HTTPRoute
-    apply.go:283: 2025-10-14T01:05:03.47732543Z: Deleting pool-with-invalid-epp InferencePool
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools
-    conformance.go:72: 2025-10-14T01:05:03.481843888Z: Applying tests/inferencepool_multiple_rules_different_pools.yaml
-    apply.go:275: 2025-10-14T01:05:03.49263818Z: Creating httproute-multiple-rules-different-pools HTTPRoute
-    conformance.go:77: 2025-10-14T01:05:03.498142263Z: Running HTTPRouteMultipleRulesDifferentPools, relying on the following features: Gateway-standard, HTTPRoute-standard, -
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Wait_for_resources_to_be_accepted
-    inferencepool_multiple_rules_different_pools.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_multiple_rules_different_pools.go:60: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_multiple_rules_different_pools.go:60: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_multiple_rules_different_pools.go:60: Waiting for InferencePool gateway-conformance-app-backend/primary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_multiple_rules_different_pools.go:60: InferencePool gateway-conformance-app-backend/primary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    inferencepool_multiple_rules_different_pools.go:60: InferencePool gateway-conformance-app-backend/primary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_multiple_rules_different_pools.go:60: Successfully verified: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/primary-inference-pool is RouteAccepted.
-    inferencepool_multiple_rules_different_pools.go:61: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to be Accepted by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_multiple_rules_different_pools.go:61: Waiting for HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools to have ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_multiple_rules_different_pools.go:61: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools is now Accepted and has ResolvedRefs by Gateway gateway-conformance-infra/conformance-primary
-    inferencepool_multiple_rules_different_pools.go:61: Waiting for InferencePool gateway-conformance-app-backend/secondary-inference-pool to be Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_multiple_rules_different_pools.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool successfully has condition Type=Accepted, Status=True, Reason='Accepted'
-    inferencepool_multiple_rules_different_pools.go:61: InferencePool gateway-conformance-app-backend/secondary-inference-pool is Accepted by a parent Gateway (Reason: Accepted)
-    inferencepool_multiple_rules_different_pools.go:61: Successfully verified: HTTPRoute gateway-conformance-app-backend/httproute-multiple-rules-different-pools (Gateway gateway-conformance-infra/conformance-primary) is Accepted & Resolved, and InferencePool gateway-conformance-app-backend/secondary-inference-pool is RouteAccepted.
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path
-    inferencepool_multiple_rules_different_pools.go:65: Waiting for Gateway gateway-conformance-infra/conformance-primary to get an address...
-    inferencepool_multiple_rules_different_pools.go:65: Gateway gateway-conformance-infra/conformance-primary has address: 10.96.88.207:80
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_primary_pool
-    inferencepool_multiple_rules_different_pools.go:68: 2025-10-14T01:05:04.52978643Z: Making GET request to http://10.96.88.207/primary
-    http.go:251: 2025-10-14T01:05:04.536417763Z: Request passed
-=== RUN   TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_secondary_pool
-    inferencepool_multiple_rules_different_pools.go:79: 2025-10-14T01:05:04.536504847Z: Making GET request to http://10.96.88.207/secondary
-    http.go:251: 2025-10-14T01:05:04.539573222Z: Request passed
-=== NAME  TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools
-    apply.go:283: 2025-10-14T01:05:04.539630847Z: Deleting httproute-multiple-rules-different-pools HTTPRoute
-=== RUN   TestInferenceExtensionConformance/InferencePoolResolvedRefsCondition
-    conformance.go:68: Skipping InferencePoolResolvedRefsCondition: test explicitly skipped
-=== NAME  TestInferenceExtensionConformance
-    apply.go:283: 2025-10-14T01:05:04.54499568Z: Deleting epp-to-inference-model-reader RoleBinding
-    apply.go:283: 2025-10-14T01:05:04.549125555Z: Deleting inference-model-reader Role
-    apply.go:283: 2025-10-14T01:05:04.552758388Z: Deleting plugins-config ConfigMap
-    apply.go:283: 2025-10-14T01:05:04.55691718Z: Deleting secondary-app-endpoint-picker Deployment
-    apply.go:283: 2025-10-14T01:05:04.561630972Z: Deleting secondary-endpoint-picker-svc Service
-    apply.go:283: 2025-10-14T01:05:04.58978643Z: Deleting secondary-inference-pool InferencePool
-    apply.go:283: 2025-10-14T01:05:04.59695693Z: Deleting primary-app-endpoint-picker Deployment
-    apply.go:283: 2025-10-14T01:05:04.613129388Z: Deleting primary-endpoint-picker-svc Service
-    apply.go:283: 2025-10-14T01:05:04.655569013Z: Deleting primary-inference-pool InferencePool
-    apply.go:283: 2025-10-14T01:05:04.673237097Z: Deleting secondary-inference-model-server-deployment Deployment
-    apply.go:283: 2025-10-14T01:05:04.682511888Z: Deleting primary-inference-model-server-deployment Deployment
-    apply.go:283: 2025-10-14T01:05:04.703110263Z: Deleting conformance-secondary Gateway
-    apply.go:283: 2025-10-14T01:05:04.715927263Z: Deleting conformance-primary Gateway
-    apply.go:283: 2025-10-14T01:05:04.725649972Z: Deleting gateway-conformance-app-backend Namespace
-    apply.go:283: 2025-10-14T01:05:04.764089138Z: Deleting gateway-conformance-infra Namespace
---- FAIL: TestInferenceExtensionConformance (107.40s)
-    --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen (26.58s)
-        --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_1:_Verify_baseline_connectivity_with_EPP_available (23.32s)
-        --- PASS: TestInferenceExtensionConformance/EppUnAvailableFailOpen/Phase_2:_Verify_fail-open_behavior_after_EPP_becomes_unavailable (2.20s)
-    --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting (55.39s)
-        --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_a_single_designated_pod (0.41s)
-        --- FAIL: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_two_designated_pods (0.36s)
-        --- PASS: TestInferenceExtensionConformance/GatewayFollowingEPPRouting/should_route_traffic_to_all_available_pods (0.39s)
-    --- PASS: TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef (1.05s)
-        --- PASS: TestInferenceExtensionConformance/HTTPRouteInvalidInferencePoolRef/HTTPRoute_should_have_Accepted=True_and_ResolvedRefs=False_for_non-existent_InferencePool (1.02s)
-    --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools (1.07s)
-        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Primary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic (1.02s)
-        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleGatewaysDifferentPools/Secondary_HTTPRoute,_InferencePool,_and_Gateway_path:_verify_status_and_traffic (0.01s)
-    --- PASS: TestInferenceExtensionConformance/InferencePoolAccepted (0.03s)
-        --- PASS: TestInferenceExtensionConformance/InferencePoolAccepted/InferencePool_should_have_Accepted_condition_set_to_True (0.01s)
-    --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation (2.11s)
-        --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_1:_HTTPRoute_backendRef_to_InferencePool_with_Port_Unspecified (2.01s)
-        --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_2:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Matching (0.01s)
-        --- PASS: TestInferenceExtensionConformance/InferencePoolHTTPRoutePortValidation/Scenario_3:_HTTPRoute_backendRef_to_InferencePool_with_Port_Specified_and_Non-Matching._Request_still_passing_because_HTTP_Port_is_ignored_when_inferencePool_is_backendRef (0.01s)
-    --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService (1.08s)
-        --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService/InferecePool_has_a_ResolvedRefs_Condition_with_status_False (0.00s)
-        --- PASS: TestInferenceExtensionConformance/InferencePoolInvalidEPPService/Request_to_a_route_with_an_invalid_backend_reference_receives_a_500_response (0.00s)
-    --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools (1.06s)
-        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Wait_for_resources_to_be_accepted (1.02s)
-        --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path (0.02s)
-            --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_primary_pool (0.01s)
-            --- PASS: TestInferenceExtensionConformance/HTTPRouteMultipleRulesDifferentPools/Traffic_should_be_routed_to_the_correct_pool_based_on_path/request_to_secondary_pool (0.00s)
-    --- SKIP: TestInferenceExtensionConformance/InferencePoolResolvedRefsCondition (0.00s)
-FAIL
-FAIL	github.com/nginx/nginx-gateway-fabric/v2/tests/conformance	107.446s
-FAIL

From 05b166eae8e71db5b6540827d4b38d906066c6da Mon Sep 17 00:00:00 2001
From: salonichf5 <146118978+salonichf5@users.noreply.github.com>
Date: Tue, 14 Oct 2025 18:28:37 -0600
Subject: [PATCH 12/12] cleanup code and add unit-tests

---
 examples/cafe-example/cafe-routes.yaml        |  37 ++
 examples/cafe-example/cafe.yaml               |  65 +++
 examples/cafe-example/eppinvalidservice.yaml  |  35 --
 examples/cafe-example/epprouting.yaml         |  23 --
 .../cafe-example/eppunavailablefailopen.yaml  |  23 --
 examples/cafe-example/gateway.yaml            | 387 +-----------------
 .../cafe-example/httproutemultiplerules.yaml  |  26 --
 .../httproutesmultiplegateways.yaml           |  44 --
 .../inference-pool-port-validation.yaml       |  79 ----
 .../inferencepool-resolvedrefs.yaml           |  54 ---
 internal/controller/nginx/config/maps.go      |   2 +-
 internal/controller/nginx/config/maps_test.go |  41 +-
 internal/controller/nginx/config/servers.go   |  25 +-
 .../nginx/config/servers_template.go          |   1 +
 .../controller/nginx/config/servers_test.go   |  15 +-
 .../controller/state/conditions/conditions.go |  15 +
 .../state/dataplane/configuration.go          |  14 +-
 .../state/dataplane/configuration_test.go     |   4 +
 internal/controller/state/dataplane/types.go  |   9 +-
 .../controller/state/graph/backend_refs.go    |  23 +-
 .../state/graph/backend_refs_test.go          |  14 +-
 internal/controller/state/graph/graph_test.go |  10 +-
 .../state/graph/inferencepools_test.go        |   6 +
 .../controller/state/graph/route_common.go    |  12 +-
 tests/Makefile                                |   6 +
 tests/conformance/conformance_test.go         |   3 -
 26 files changed, 235 insertions(+), 738 deletions(-)
 create mode 100644 examples/cafe-example/cafe-routes.yaml
 create mode 100644 examples/cafe-example/cafe.yaml
 delete mode 100644 examples/cafe-example/eppinvalidservice.yaml
 delete mode 100644 examples/cafe-example/epprouting.yaml
 delete mode 100644 examples/cafe-example/eppunavailablefailopen.yaml
 delete mode 100644 examples/cafe-example/httproutemultiplerules.yaml
 delete mode 100644 examples/cafe-example/httproutesmultiplegateways.yaml
 delete mode 100644 examples/cafe-example/inference-pool-port-validation.yaml
 delete mode 100644 examples/cafe-example/inferencepool-resolvedrefs.yaml

diff --git a/examples/cafe-example/cafe-routes.yaml b/examples/cafe-example/cafe-routes.yaml
new file mode 100644
index 0000000000..67927335cb
--- /dev/null
+++ b/examples/cafe-example/cafe-routes.yaml
@@ -0,0 +1,37 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: coffee
+spec:
+  parentRefs:
+  - name: gateway
+    sectionName: http
+  hostnames:
+  - "cafe.example.com"
+  rules:
+  - matches:
+    - path:
+        type: PathPrefix
+        value: /coffee
+    backendRefs:
+    - name: coffee
+      port: 80
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: tea
+spec:
+  parentRefs:
+  - name: gateway
+    sectionName: http
+  hostnames:
+  - "cafe.example.com"
+  rules:
+  - matches:
+    - path:
+        type: Exact
+        value: /tea
+    backendRefs:
+    - name: tea
+      port: 80
diff --git a/examples/cafe-example/cafe.yaml b/examples/cafe-example/cafe.yaml
new file mode 100644
index 0000000000..2d03ae59ff
--- /dev/null
+++ b/examples/cafe-example/cafe.yaml
@@ -0,0 +1,65 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: coffee
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: coffee
+  template:
+    metadata:
+      labels:
+        app: coffee
+    spec:
+      containers:
+      - name: coffee
+        image: nginxdemos/nginx-hello:plain-text
+        ports:
+        - containerPort: 8080
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: coffee
+spec:
+  ports:
+  - port: 80
+    targetPort: 8080
+    protocol: TCP
+    name: http
+  selector:
+    app: coffee
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tea
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: tea
+  template:
+    metadata:
+      labels:
+        app: tea
+    spec:
+      containers:
+      - name: tea
+        image: nginxdemos/nginx-hello:plain-text
+        ports:
+        - containerPort: 8080
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: tea
+spec:
+  ports:
+  - port: 80
+    targetPort: 8080
+    protocol: TCP
+    name: http
+  selector:
+    app: tea
diff --git a/examples/cafe-example/eppinvalidservice.yaml b/examples/cafe-example/eppinvalidservice.yaml
deleted file mode 100644
index f0a0e62ea9..0000000000
--- a/examples/cafe-example/eppinvalidservice.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-apiVersion: inference.networking.k8s.io/v1
-kind: InferencePool
-metadata:
-  name: pool-with-invalid-epp
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    matchLabels:
-      app: primary-inference-model-server
-  targetPorts:
-  - number: 3000
-  endpointPickerRef:
-    name: non-existent-epp-svc
-    kind: Service
-    port:
-      number: 9002
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-for-invalid-epp-pool
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - name: conformance-primary
-    namespace: gateway-conformance-infra
-  rules:
-  - backendRefs:
-    - name: pool-with-invalid-epp
-      kind: InferencePool
-      group: inference.networking.k8s.io
-    matches:
-    - path:
-        type: PathPrefix
-        value: /invalid-epp-test
\ No newline at end of file
diff --git a/examples/cafe-example/epprouting.yaml b/examples/cafe-example/epprouting.yaml
deleted file mode 100644
index dd199f1ce6..0000000000
--- a/examples/cafe-example/epprouting.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-for-primary-gw
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "primary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-    matches:
-    - path:
-        type: PathPrefix
-        value: /primary-gateway-test
diff --git a/examples/cafe-example/eppunavailablefailopen.yaml b/examples/cafe-example/eppunavailablefailopen.yaml
deleted file mode 100644
index 6549cd9188..0000000000
--- a/examples/cafe-example/eppunavailablefailopen.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-for-failopen-pool-gw
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-secondary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "secondary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: secondary-inference-pool # Use secondary-inferencePool because it has failureMode set to failOpen
-    matches:
-    - path:
-        type: PathPrefix
-        value: /failopen-pool-test
\ No newline at end of file
diff --git a/examples/cafe-example/gateway.yaml b/examples/cafe-example/gateway.yaml
index 61acb91d17..e6507f613b 100644
--- a/examples/cafe-example/gateway.yaml
+++ b/examples/cafe-example/gateway.yaml
@@ -1,394 +1,11 @@
-# Base Kubernetes resources for the Gateway API Inference Extension conformance tests.
-# This includes namespaces and a minimal set of resources (Gateway, Backend)
-# required by many tests. More specific resources should be defined within
-# individual test files or other resource directories (e.g., sample_backends).
-
----
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: gateway-conformance-infra
-  labels:
-    gateway-conformance: infra
----
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: gateway-conformance-app-backend
-  labels:
-    gateway-conformance: backend
----
-# A basic Gateway resource that allows HTTPRoutes from the same namespace.
-# Tests can use this as a parent reference for routes that target InferencePools.
 apiVersion: gateway.networking.k8s.io/v1
 kind: Gateway
 metadata:
-  name: conformance-primary
-  namespace: gateway-conformance-infra
+  name: gateway
 spec:
   gatewayClassName: nginx
   listeners:
   - name: http
     port: 80
     protocol: HTTP
-    allowedRoutes:
-      namespaces:
-        from: All
-      kinds:
-      - group: gateway.networking.k8s.io
-        kind: HTTPRoute
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: conformance-secondary
-  namespace: gateway-conformance-infra
-spec:
-  gatewayClassName: nginx
-  listeners:
-  - name: http
-    port: 80
-    protocol: HTTP
-    hostname: "secondary.example.com"
-    allowedRoutes:
-      namespaces:
-        from: All
-
-### The following defines the essential resources for the gateway conformance test.
-### All resources are created in the 'gateway-conformance-app-backend' namespace.
----
-# Deploys a mock backend service to act as a model server.
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: primary-inference-model-server-deployment
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: primary-inference-model-server
-spec:
-  replicas: 3
-  selector:
-    matchLabels:
-      app: primary-inference-model-server
-  template:
-    metadata:
-      labels:
-        app: primary-inference-model-server
-    spec:
-      containers:
-      - name: echoserver
-        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
-        ports:
-        - containerPort: 3000
-        readinessProbe:
-          httpGet:
-            path: /
-            port: 3000
-          initialDelaySeconds: 3
-          periodSeconds: 5
-          failureThreshold: 2
-        env:
-        - name: POD_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: POD_IP
-          valueFrom:
-            fieldRef:
-              fieldPath: status.podIP
----
-# Deploys a secondary mock backend service to act as a model server.
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: secondary-inference-model-server-deployment
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: secondary-inference-model-server
-spec:
-  replicas: 3
-  selector:
-    matchLabels:
-      app: secondary-inference-model-server
-  template:
-    metadata:
-      labels:
-        app: secondary-inference-model-server
-    spec:
-      containers:
-      - name: echoserver
-        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
-        ports:
-        - containerPort: 3000
-        readinessProbe:
-          httpGet:
-            path: /
-            port: 3000
-          initialDelaySeconds: 3
-          periodSeconds: 5
-          failureThreshold: 2
-        env:
-        - name: POD_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: POD_IP
-          valueFrom:
-            fieldRef:
-              fieldPath: status.podIP
----
-# --- Primary InferencePool Definition ---
-apiVersion: inference.networking.k8s.io/v1
-kind: InferencePool
-metadata:
-  name: primary-inference-pool
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    matchLabels:
-      app: primary-inference-model-server
-  targetPorts:
-    - number: 3000
-  endpointPickerRef:
-    name: primary-endpoint-picker-svc
-    port:
-      number: 9002
----
-# --- Primary Conformance EPP service Definition ---
-apiVersion: v1
-kind: Service
-metadata:
-  name: primary-endpoint-picker-svc
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    app: primary-app-backend-epp
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-      appProtocol: http2
-  type: ClusterIP
----
-# --- Primary Conformance EPP Deployment ---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: primary-app-endpoint-picker
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: primary-app-backend-epp
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: primary-app-backend-epp
-  template:
-    metadata:
-      labels:
-        app: primary-app-backend-epp
-    spec:
-      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
-      terminationGracePeriodSeconds: 130
-      containers:
-      - name: epp
-        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
-        imagePullPolicy: Always
-        args:
-        - --pool-name
-        - "primary-inference-pool"
-        - --pool-namespace
-        - "gateway-conformance-app-backend"
-        - --v
-        - "4"
-        - --zap-encoder
-        - "json"
-        - --grpc-port
-        - "9002"
-        - --grpc-health-port
-        - "9003"
-        - "--config-file"
-        - "/config/conformance-plugins.yaml"
-        env:
-        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
-          value: "true"
-        ports:
-        - containerPort: 9002
-        - containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        livenessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        volumeMounts:
-        - name: plugins-config-volume
-          mountPath: "/config"
-      volumes:
-      - name: plugins-config-volume
-        configMap:
-          name: plugins-config
----
-# --- Secondary InferencePool Definition ---
-apiVersion: inference.networking.k8s.io/v1
-kind: InferencePool
-metadata:
-  name: secondary-inference-pool
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    matchLabels:
-      app: secondary-inference-model-server
-  targetPorts:
-    - number: 3000
-  endpointPickerRef:
-    name: secondary-endpoint-picker-svc
-    failureMode: FailOpen
-    port:
-      number: 9002
----
-# --- Secondary Conformance EPP service Definition ---
-apiVersion: v1
-kind: Service
-metadata:
-  name: secondary-endpoint-picker-svc
-  namespace: gateway-conformance-app-backend
-spec:
-  selector:
-    app: secondary-app-backend-epp
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-      appProtocol: http2
-  type: ClusterIP
----
-# --- Secondary Conformance EPP Deployment ---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: secondary-app-endpoint-picker
-  namespace: gateway-conformance-app-backend
-  labels:
-    app: secondary-app-backend-epp
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: secondary-app-backend-epp
-  template:
-    metadata:
-      labels:
-        app: secondary-app-backend-epp
-    spec:
-      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
-      terminationGracePeriodSeconds: 130
-      containers:
-      - name: epp
-        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
-        imagePullPolicy: Always
-        args:
-        - --pool-name
-        - "secondary-inference-pool"
-        - --pool-namespace
-        - "gateway-conformance-app-backend"
-        - --v
-        - "4"
-        - --zap-encoder
-        - "json"
-        - --grpc-port
-        - "9002"
-        - --grpc-health-port
-        - "9003"
-        - "--config-file"
-        - "/config/conformance-plugins.yaml"
-        env:
-        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
-          value: "true"
-        ports:
-        - containerPort: 9002
-        - containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        livenessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        volumeMounts:
-        - name: plugins-config-volume
-          mountPath: "/config"
-      volumes:
-      - name: plugins-config-volume
-        configMap:
-          name: plugins-config
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: plugins-config
-  namespace: gateway-conformance-app-backend
-data:
-  conformance-plugins.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
-    kind: EndpointPickerConfig
-    plugins:
-    - type: header-based-testing-filter
-    schedulingProfiles:
-    - name: conformance-profile
-      plugins:
-      - pluginRef: header-based-testing-filter
----
-# --- Required Role and RoleBinding for Conformance Test for EPP ---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: inference-model-reader
-  namespace: gateway-conformance-app-backend
-rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferenceobjectives", "inferencepools"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "list", "watch"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: epp-to-inference-model-reader
-  namespace: gateway-conformance-app-backend
-subjects:
-- kind: ServiceAccount
-  name: default
-  namespace: gateway-conformance-app-backend
-roleRef:
-  kind: Role
-  name: inference-model-reader
-  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
+    hostname: "*.example.com"
diff --git a/examples/cafe-example/httproutemultiplerules.yaml b/examples/cafe-example/httproutemultiplerules.yaml
deleted file mode 100644
index e506fec959..0000000000
--- a/examples/cafe-example/httproutemultiplerules.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-multiple-rules-different-pools
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-    - name: conformance-primary
-      namespace: gateway-conformance-infra
-  rules:
-    - matches:
-        - path:
-            type: PathPrefix
-            value: /primary
-      backendRefs:
-        - name: primary-inference-pool
-          kind: InferencePool
-          group: inference.networking.k8s.io
-    - matches:
-        - path:
-            type: PathPrefix
-            value: /secondary
-      backendRefs:
-        - name: secondary-inference-pool
-          kind: InferencePool
-          group: inference.networking.k8s.io
diff --git a/examples/cafe-example/httproutesmultiplegateways.yaml b/examples/cafe-example/httproutesmultiplegateways.yaml
deleted file mode 100644
index caded16d89..0000000000
--- a/examples/cafe-example/httproutesmultiplegateways.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: route-for-primary-gateway
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-  hostnames:
-  - "primary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-    matches:
-    - path:
-        type: PathPrefix
-        value: /test-primary-gateway
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: route-for-secondary-gateway
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - kind: Gateway
-    name: conformance-secondary
-    namespace: gateway-conformance-infra
-  hostnames:
-  - "secondary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: secondary-inference-pool
-    matches:
-    - path:
-        type: PathPrefix
-        value: /test-secondary-gateway
diff --git a/examples/cafe-example/inference-pool-port-validation.yaml b/examples/cafe-example/inference-pool-port-validation.yaml
deleted file mode 100644
index 9c78117d97..0000000000
--- a/examples/cafe-example/inference-pool-port-validation.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# --- HTTPRoute Scenario 1: Port Unspecified ---
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-pool-port-unspecified
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "port-unspecified.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-      # Port is intentionally unspecified here
-    matches:
-    - path:
-        type: PathPrefix
-        value: /test-port-unspecified
----
-# --- HTTPRoute Scenario 2: Port Matching ---
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-pool-port-matching
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "port-matching.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-      port: 3000 # Port matches InferencePool's targetPortNumber
-    matches:
-    - path:
-        type: PathPrefix
-        value: /test-port-matching
----
-# --- HTTPRoute Scenario 3: Port Non-Matching ---
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-pool-port-non-matching
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "port-non-matching.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-      port: 8888 # Port does NOT match InferencePool's targetPortNumber
-    matches:
-    - path:
-        type: PathPrefix
-        value: /test-port-non-matching
----
diff --git a/examples/cafe-example/inferencepool-resolvedrefs.yaml b/examples/cafe-example/inferencepool-resolvedrefs.yaml
deleted file mode 100644
index 81a7091963..0000000000
--- a/examples/cafe-example/inferencepool-resolvedrefs.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# conformance/tests/basic/inferencepool_resolvedrefs_condition.yaml
-
-# This manifest defines the initial resources for the
-# inferencepool_resolvedrefs_condition.go conformance test.
-
-# --- HTTPRoute for Primary Gateway (conformance-primary) ---
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-for-primary-gw
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-primary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "primary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-    matches:
-    - path:
-        type: PathPrefix
-        value: /primary-gateway-test
----
-# --- HTTPRoute for Secondary Gateway (conformance-secondary) ---
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: httproute-for-secondary-gw
-  namespace: gateway-conformance-app-backend
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: conformance-secondary
-    namespace: gateway-conformance-infra
-    sectionName: http
-  hostnames:
-  - "secondary.example.com"
-  rules:
-  - backendRefs:
-    - group: inference.networking.k8s.io
-      kind: InferencePool
-      name: primary-inference-pool
-    matches:
-    - path:
-        type: PathPrefix
-        value: /secondary-gateway-test
\ No newline at end of file
diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
index 5cf941807c..d7c3aeef42 100644
--- a/internal/controller/nginx/config/maps.go
+++ b/internal/controller/nginx/config/maps.go
@@ -195,7 +195,7 @@ func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
 
 			// Decide what the map must return when the picker didn’t set a value.
 			var defaultResult string
-			switch backend.EndpointPickerConfig.FailureMode {
+			switch backend.EndpointPickerConfig.EndpointPickerRef.FailureMode {
 			case inference.EndpointPickerFailClose:
 				defaultResult = invalidBackendRef
 			case inference.EndpointPickerFailOpen:
diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
index 736d7808ec..bac84b0067 100644
--- a/internal/controller/nginx/config/maps_test.go
+++ b/internal/controller/nginx/config/maps_test.go
@@ -73,8 +73,11 @@ func TestExecuteMaps(t *testing.T) {
 				Backends: []dataplane.Backend{
 					{
 						UpstreamName: "upstream1",
-						EndpointPickerConfig: &inference.EndpointPickerRef{
-							FailureMode: inference.EndpointPickerFailClose,
+						EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+							NsName: "default",
+							EndpointPickerRef: &inference.EndpointPickerRef{
+								FailureMode: inference.EndpointPickerFailClose,
+							},
 						},
 					},
 				},
@@ -400,14 +403,20 @@ func TestBuildInferenceMaps(t *testing.T) {
 		Backends: []dataplane.Backend{
 			{
 				UpstreamName: "upstream1",
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					FailureMode: inference.EndpointPickerFailClose,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					NsName: "default",
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						FailureMode: inference.EndpointPickerFailClose,
+					},
 				},
 			},
 			{
 				UpstreamName: "upstream2",
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					FailureMode: inference.EndpointPickerFailOpen,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					NsName: "default",
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						FailureMode: inference.EndpointPickerFailOpen,
+					},
 				},
 			},
 			{
@@ -421,6 +430,22 @@ func TestBuildInferenceMaps(t *testing.T) {
 	g.Expect(maps).To(HaveLen(2))
 	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
 	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
-	g.Expect(maps[0].Parameters[1].Result).To(Equal("invalid-backend-ref"))
-	g.Expect(maps[1].Parameters[1].Result).To(Equal("upstream2"))
+	g.Expect(maps[0].Parameters).To(HaveLen(3))
+	g.Expect(maps[0].Parameters[0].Value).To(Equal("\"\""))
+	g.Expect(maps[0].Parameters[0].Result).To(Equal("upstream1"))
+	g.Expect(maps[0].Parameters[1].Value).To(Equal("~.+"))
+	g.Expect(maps[0].Parameters[1].Result).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[0].Parameters[2].Value).To(Equal("default"))
+	g.Expect(maps[0].Parameters[2].Result).To(Equal("invalid-backend-ref"))
+
+	// Check the second map
+	g.Expect(maps[1].Source).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[1].Variable).To(Equal("$inference_backend_upstream2"))
+	g.Expect(maps[1].Parameters).To(HaveLen(3))
+	g.Expect(maps[1].Parameters[0].Value).To(Equal("\"\""))
+	g.Expect(maps[1].Parameters[0].Result).To(Equal("upstream2"))
+	g.Expect(maps[1].Parameters[1].Value).To(Equal("~.+"))
+	g.Expect(maps[1].Parameters[1].Result).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[1].Parameters[2].Value).To(Equal("default"))
+	g.Expect(maps[1].Parameters[2].Result).To(Equal("upstream2"))
 }
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index 414f64c272..7df85b3241 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -28,6 +28,7 @@ const (
 	// HeaderMatchSeparator is the separator for constructing header-based match for NJS.
 	HeaderMatchSeparator = ":"
 	rootPath             = "/"
+	svcClusterLocal      = ".svc.cluster.local"
 )
 
 var grpcAuthorityHeader = http.Header{
@@ -453,16 +454,13 @@ func createInternalLocationsForRule(
 			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
 			for _, b := range r.BackendGroup.Backends {
 				if b.EndpointPickerConfig != nil {
+					eppRef := b.EndpointPickerConfig.EndpointPickerRef
 					var portNum int
-					if b.EndpointPickerConfig.Port != nil {
-						portNum = int(b.EndpointPickerConfig.Port.Number)
+					if eppRef.Port != nil {
+						portNum = int(eppRef.Port.Number)
 					}
 					intInfLocation.EPPInternalPath = intLocation.Path
-					if b.EndpointPickerNsName != "" {
-						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName + ".svc.cluster.local"
-					} else {
-						intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
-					}
+					intInfLocation.EPPHost = string(eppRef.Name) + "." + b.EndpointPickerConfig.NsName + svcClusterLocal
 					intInfLocation.EPPPort = portNum
 				}
 			}
@@ -510,18 +508,15 @@ func createInferenceLocationsForRule(
 			mirrorPercentage,
 		)
 		for _, b := range r.BackendGroup.Backends {
-			if b.EndpointPickerConfig != nil {
+			if b.EndpointPickerConfig != nil && b.EndpointPickerConfig.EndpointPickerRef != nil {
 				for i := range extLocations {
+					eppRef := b.EndpointPickerConfig.EndpointPickerRef
 					var portNum int
-					if b.EndpointPickerConfig.Port != nil {
-						portNum = int(b.EndpointPickerConfig.Port.Number)
+					if eppRef.Port != nil {
+						portNum = int(eppRef.Port.Number)
 					}
 					extLocations[i].EPPInternalPath = intLocation.Path
-					if b.EndpointPickerNsName != "" {
-						extLocations[i].EPPHost = (string(b.EndpointPickerConfig.Name) + "." + b.EndpointPickerNsName + ".svc.cluster.local") //nolint:lll
-					} else {
-						extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
-					}
+					extLocations[i].EPPHost = string(eppRef.Name) + "." + b.EndpointPickerConfig.NsName + svcClusterLocal
 					extLocations[i].EPPPort = portNum
 				}
 			}
diff --git a/internal/controller/nginx/config/servers_template.go b/internal/controller/nginx/config/servers_template.go
index 82b692c88e..d50ba9aa24 100644
--- a/internal/controller/nginx/config/servers_template.go
+++ b/internal/controller/nginx/config/servers_template.go
@@ -143,6 +143,7 @@ server {
         include /etc/nginx/grpc-error-pages.conf;
         {{- end }}
 
+        proxy_http_version 1.1;
         {{- if $l.ProxyPass -}}
             {{ range $h := $l.ProxySetHeaders }}
         {{ $proxyOrGRPC }}_set_header {{ $h.Name }} "{{ $h.Value }}";
diff --git a/internal/controller/nginx/config/servers_test.go b/internal/controller/nginx/config/servers_test.go
index ab4fad31a5..9edf5062e1 100644
--- a/internal/controller/nginx/config/servers_test.go
+++ b/internal/controller/nginx/config/servers_test.go
@@ -2457,11 +2457,14 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 				UpstreamName: "test_foo_80",
 				Valid:        true,
 				Weight:       1,
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					Name: "test-epp",
-					Port: &inference.Port{
-						Number: 80,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						Name: "test-epp",
+						Port: &inference.Port{
+							Number: 80,
+						},
 					},
+					NsName: hrNsName.Namespace,
 				},
 			},
 		},
@@ -2522,7 +2525,7 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 					Path:            "= /inference",
 					Type:            http.InferenceExternalLocationType,
 					EPPInternalPath: "/_ngf-internal-rule0-route0-inference",
-					EPPHost:         "test-epp",
+					EPPHost:         "test-epp.test.svc.cluster.local",
 					EPPPort:         80,
 				},
 				createDefaultRootLocation(),
@@ -2542,7 +2545,7 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 					Path:            "/_ngf-internal-rule0-route0-inference",
 					Type:            http.InferenceInternalLocationType,
 					EPPInternalPath: "/_ngf-internal-rule0-route0",
-					EPPHost:         "test-epp",
+					EPPHost:         "test-epp.test.svc.cluster.local",
 					EPPPort:         80,
 				},
 				{
diff --git a/internal/controller/state/conditions/conditions.go b/internal/controller/state/conditions/conditions.go
index 1664aa85b6..9def2ac78b 100644
--- a/internal/controller/state/conditions/conditions.go
+++ b/internal/controller/state/conditions/conditions.go
@@ -39,6 +39,10 @@ const (
 	// Route rules has a backendRef with an unsupported value.
 	RouteReasonBackendRefUnsupportedValue v1.RouteConditionReason = "UnsupportedValue"
 
+	// RouteReasonBackendRefInvalidInferencePool is used with the "ResolvedRefs" condition when
+	// the InferencePool backendRef is invalid.
+	RouteReasonBackendRefInvalidInferencePool v1.RouteConditionReason = "InvalidInferencePool"
+
 	// RouteReasonInvalidGateway is used with the "Accepted" (false) condition when the Gateway the Route
 	// references is invalid.
 	RouteReasonInvalidGateway v1.RouteConditionReason = "InvalidGateway"
@@ -446,6 +450,17 @@ func NewRouteBackendRefUnsupportedValue(msg string) Condition {
 	}
 }
 
+// NewRouteBackendRefInvalidInferencePool returns a Condition that indicates that the Route has a InferencePool
+// backendRef that is invalid.
+func NewRouteBackendRefInvalidInferencePool(msg string) Condition {
+	return Condition{
+		Type:    string(v1.RouteConditionResolvedRefs),
+		Status:  metav1.ConditionFalse,
+		Reason:  string(RouteReasonBackendRefInvalidInferencePool),
+		Message: msg,
+	}
+}
+
 // NewRouteBackendRefUnsupportedProtocol returns a Condition that indicates that the Route has a backendRef with
 // an unsupported protocol.
 func NewRouteBackendRefUnsupportedProtocol(msg string) Condition {
diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go
index 6c91b8d266..eed32a9c99 100644
--- a/internal/controller/state/dataplane/configuration.go
+++ b/internal/controller/state/dataplane/configuration.go
@@ -395,12 +395,14 @@ func newBackendGroup(
 		inferencePoolBackendExists = inferencePoolBackendExists || ref.IsInferencePool
 
 		backends = append(backends, Backend{
-			UpstreamName:         ref.ServicePortReference(),
-			Weight:               ref.Weight,
-			Valid:                valid,
-			VerifyTLS:            convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
-			EndpointPickerConfig: ref.EndpointPickerConfig,
-			EndpointPickerNsName: ref.EndpointPickerNsName,
+			UpstreamName: ref.ServicePortReference(),
+			Weight:       ref.Weight,
+			Valid:        valid,
+			VerifyTLS:    convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
+			EndpointPickerConfig: &EndpointPickerConfig{
+				NsName:            ref.EndpointPickerConfig.NsName,
+				EndpointPickerRef: ref.EndpointPickerConfig.EndpointPickerRef,
+			},
 		})
 	}
 
diff --git a/internal/controller/state/dataplane/configuration_test.go b/internal/controller/state/dataplane/configuration_test.go
index 3e1697590d..b029730419 100644
--- a/internal/controller/state/dataplane/configuration_test.go
+++ b/internal/controller/state/dataplane/configuration_test.go
@@ -219,6 +219,10 @@ func TestBuildConfiguration(t *testing.T) {
 		UpstreamName: fooUpstreamName,
 		Weight:       1,
 		Valid:        true,
+		EndpointPickerConfig: &EndpointPickerConfig{
+			NsName:            "",
+			EndpointPickerRef: nil,
+		},
 	}
 
 	createBackendRefs := func(validRule bool) []graph.BackendRef {
diff --git a/internal/controller/state/dataplane/types.go b/internal/controller/state/dataplane/types.go
index 0866af636b..630c8c0fcb 100644
--- a/internal/controller/state/dataplane/types.go
+++ b/internal/controller/state/dataplane/types.go
@@ -328,8 +328,7 @@ type Backend struct {
 	VerifyTLS *VerifyTLS
 	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
 	// This is set if this backend is for an inference workload.
-	EndpointPickerConfig *inference.EndpointPickerRef
-	EndpointPickerNsName string
+	EndpointPickerConfig *EndpointPickerConfig
 	// UpstreamName is the name of the upstream for this backend.
 	UpstreamName string
 	// Weight is the weight of the BackendRef.
@@ -340,6 +339,12 @@ type Backend struct {
 	Valid bool
 }
 
+// EndpointPickerConfig represents the configuration for the EndpointPicker extension.
+type EndpointPickerConfig struct {
+	EndpointPickerRef *inference.EndpointPickerRef
+	NsName            string
+}
+
 // VerifyTLS holds the backend TLS verification configuration.
 type VerifyTLS struct {
 	CertBundleID CertBundleID
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index e02aa3a11e..e88489fd94 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -9,7 +9,6 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
-	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
 
@@ -31,11 +30,8 @@ const (
 type BackendRef struct {
 	// BackendTLSPolicy is the BackendTLSPolicy of the Service which is referenced by the backendRef.
 	BackendTLSPolicy *BackendTLSPolicy
-	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
-	EndpointPickerConfig *inference.EndpointPickerRef
-	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed,
-	// if this backendRef is for an InferencePool.
-	EndpointPickerNsName string
+	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
+	EndpointPickerConfig EndpointPickerConfig
 	// InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding
 	// condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways,
 	// but not others.
@@ -126,8 +122,10 @@ func addBackendRefsToRules(
 				}
 
 				if pool, exists := referencedInferencePools[poolName]; exists {
+					// If the InferencePool is invalid, add a condition to the route
+					// and set the port to nil to avoid generating backendRefs for it.
 					if !pool.Valid {
-						route.Conditions = append(route.Conditions, conditions.NewRouteBackendRefUnsupportedValue(
+						route.Conditions = append(route.Conditions, conditions.NewRouteBackendRefInvalidInferencePool(
 							fmt.Sprintf("Referenced InferencePool %s/%s is invalid",
 								poolName.Namespace,
 								poolName.Name,
@@ -135,10 +133,11 @@ func addBackendRefsToRules(
 						))
 						continue
 					}
+
 					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
 					ref.Port = helpers.GetPointer(port)
-					ref.EndpointPickerConfig = &pool.Source.Spec.EndpointPickerRef
-					ref.EndpointPickerNsName = poolName.Namespace
+					ref.EndpointPickerConfig.EndpointPickerRef = &pool.Source.Spec.EndpointPickerRef
+					ref.EndpointPickerConfig.NsName = poolName.Namespace
 				}
 			}
 
@@ -208,7 +207,6 @@ func createBackendRef(
 			IsInferencePool:      ref.IsInferencePool,
 			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
 			EndpointPickerConfig: ref.EndpointPickerConfig,
-			EndpointPickerNsName: ref.EndpointPickerNsName,
 		}
 
 		return backendRef, []conditions.Condition{cond}
@@ -230,7 +228,6 @@ func createBackendRef(
 			IsInferencePool:      ref.IsInferencePool,
 			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
 			EndpointPickerConfig: ref.EndpointPickerConfig,
-			EndpointPickerNsName: ref.EndpointPickerNsName,
 		}
 
 		return backendRef, []conditions.Condition{conditions.NewRouteBackendRefRefBackendNotFound(err.Error())}
@@ -255,7 +252,6 @@ func createBackendRef(
 				IsInferencePool:      ref.IsInferencePool,
 				InvalidForGateways:   invalidForGateways,
 				EndpointPickerConfig: ref.EndpointPickerConfig,
-				EndpointPickerNsName: ref.EndpointPickerNsName,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(
@@ -287,7 +283,6 @@ func createBackendRef(
 			IsInferencePool:      ref.IsInferencePool,
 			InvalidForGateways:   invalidForGateways,
 			EndpointPickerConfig: ref.EndpointPickerConfig,
-			EndpointPickerNsName: ref.EndpointPickerNsName,
 		}
 
 		return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(err.Error()))
@@ -306,7 +301,6 @@ func createBackendRef(
 				IsInferencePool:      ref.IsInferencePool,
 				InvalidForGateways:   invalidForGateways,
 				EndpointPickerConfig: ref.EndpointPickerConfig,
-				EndpointPickerNsName: ref.EndpointPickerNsName,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedProtocol(err.Error()))
@@ -323,7 +317,6 @@ func createBackendRef(
 		IsInferencePool:      ref.IsInferencePool,
 		InvalidForGateways:   invalidForGateways,
 		EndpointPickerConfig: ref.EndpointPickerConfig,
-		EndpointPickerNsName: ref.EndpointPickerNsName,
 	}
 
 	return backendRef, conds
diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go
index b786daed9b..7ef1ae9fd3 100644
--- a/internal/controller/state/graph/backend_refs_test.go
+++ b/internal/controller/state/graph/backend_refs_test.go
@@ -1231,11 +1231,14 @@ func TestAddBackendRefsToRules(t *testing.T) {
 					ServicePort: v1.ServicePort{
 						Port: 80,
 					},
-					Valid:                true,
-					Weight:               1,
-					InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
-					IsInferencePool:      true,
-					EndpointPickerConfig: &inference.EndpointPickerRef{},
+					Valid:              true,
+					Weight:             1,
+					InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+					IsInferencePool:    true,
+					EndpointPickerConfig: EndpointPickerConfig{
+						NsName:            svcInferenceNsName.Namespace,
+						EndpointPickerRef: &inference.EndpointPickerRef{},
+					},
 				},
 			},
 			expectedConditions: nil,
@@ -1262,6 +1265,7 @@ func TestAddBackendRefsToRules(t *testing.T) {
 							},
 						},
 					},
+					Valid: true,
 				},
 			}
 
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index a49202d96e..96dbddd2dd 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -228,9 +228,12 @@ func TestBuildGraph(t *testing.T) {
 				Weight:             1,
 				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
 				IsInferencePool:    true,
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					Kind: kinds.Service,
-					Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+				EndpointPickerConfig: EndpointPickerConfig{
+					NsName: testNs,
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						Kind: kinds.Service,
+						Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+					},
 				},
 			},
 		}
@@ -1339,6 +1342,7 @@ func TestBuildGraph(t *testing.T) {
 						inferenceRoute,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 			ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go
index f6ea66215a..a3ef1b3ede 100644
--- a/internal/controller/state/graph/inferencepools_test.go
+++ b/internal/controller/state/graph/inferencepools_test.go
@@ -185,6 +185,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						validRoute,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -244,6 +245,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						modifiedRouteWithServiceBackend,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -277,6 +279,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						routeWithInferencePoolHeadlessSvcBackend,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -310,6 +313,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						routeWithNoNamespaceBackend,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -326,6 +330,8 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 					Gateways:   []*gatewayv1.Gateway{},
 					HTTPRoutes: []*L7Route{},
 					Conditions: []conditions.Condition{},
+					// validity of InferencePool depends on condition counts only
+					Valid: true,
 				},
 			},
 		},
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index 4fb5178a2e..cc5a588c9c 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -167,11 +167,7 @@ type RouteBackendRef struct {
 	MirrorBackendIdx *int
 
 	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
-	EndpointPickerConfig *inference.EndpointPickerRef
-
-	// EndpointPickerNsName is the namespace where the EndpointPicker is deployed,
-	// if this backendRef is for an InferencePool.
-	EndpointPickerNsName string
+	EndpointPickerConfig EndpointPickerConfig
 
 	Filters []any
 
@@ -179,6 +175,12 @@ type RouteBackendRef struct {
 	IsInferencePool bool
 }
 
+// EndpointPickerConfig specifies the namespace and reference to the EndpointPicker extension.
+type EndpointPickerConfig struct {
+	EndpointPickerRef *inference.EndpointPickerRef
+	NsName            string
+}
+
 // CreateRouteKey takes a client.Object and creates a RouteKey.
 func CreateRouteKey(obj client.Object) RouteKey {
 	nsName := types.NamespacedName{
diff --git a/tests/Makefile b/tests/Makefile
index f775e9f9b2..edac20e641 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -85,6 +85,12 @@ run-inference-conformance-tests: ## Run inference conformance tests
 	./scripts/check-pod-exit-code.sh
 	sed -e '1,/CONFORMANCE PROFILE/d' output.txt > conformance-profile-inference.yaml
 	rm output.txt
+	core_result=`yq '.profiles[0].core.result' conformance-profile-inference.yaml`; \
+	if [ "$$core_result" != "failure" ] ; then \
+		exit 0; \
+	else \
+		exit 2; \
+	fi
 
 .PHONY: cleanup-conformance-tests
 cleanup-conformance-tests: ## Clean up conformance tests fixtures
diff --git a/tests/conformance/conformance_test.go b/tests/conformance/conformance_test.go
index f3c363f6eb..e0475ba642 100644
--- a/tests/conformance/conformance_test.go
+++ b/tests/conformance/conformance_test.go
@@ -38,9 +38,6 @@ const (
 	// unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation.
 	// This is needed to give the conformance tests an example valid ip unusable address.
 	unusableGatewayIPAddress = "198.51.100.0"
-
-	// inferenceBaseManifest is the base manifest used to deploy the resources needed for inference conformance tests.
-	inferenceBaseManifest = "manifests/base.yaml"
 )
 
 func TestConformance(t *testing.T) {