diff --git a/Makefile b/Makefile
index 44759e457..e34a1a922 100644
--- a/Makefile
+++ b/Makefile
@@ -105,9 +105,8 @@ vet: ## Run go vet against code.
 test: manifests generate fmt vet envtest ## Run tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
 
-# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors.
-.PHONY: test-e2e  # Run the e2e tests against a Kind k8s instance that is spun up.
-test-e2e:
+.PHONY: test-e2e
+test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs.
 	go test ./test/e2e/ -v -ginkgo.v
 
 .PHONY: lint
diff --git a/README.md b/README.md
index 85185a2bf..a15e9542d 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,10 @@ This project is currently in development.
 
 Follow this [README](./pkg/README.md) to get the inference-extension up and running on your cluster!
 
+## End-to-End Tests
+
+Follow this [README](./test/e2e/README.md) to learn more about running the inference-extension end-to-end test suite on your cluster.
+
 ## Website
 
 Detailed documentation is available on our website: https://gateway-api-inference-extension.sigs.k8s.io/
diff --git a/go.mod b/go.mod
index 2ae6913b5..42d606a8c 100644
--- a/go.mod
+++ b/go.mod
@@ -20,11 +20,13 @@ require (
 	google.golang.org/grpc v1.70.0
 	google.golang.org/protobuf v1.36.4
 	k8s.io/api v0.32.1
+	k8s.io/apiextensions-apiserver v0.32.1
 	k8s.io/apimachinery v0.32.1
 	k8s.io/client-go v0.32.1
 	k8s.io/code-generator v0.32.1
 	k8s.io/component-base v0.32.1
 	k8s.io/klog/v2 v2.130.1
+	k8s.io/utils v0.0.0-20241210054802-24370beab758
 	sigs.k8s.io/controller-runtime v0.20.1
 	sigs.k8s.io/structured-merge-diff/v4 v4.5.0
 )
@@ -73,6 +75,7 @@ require (
 	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
 	github.com/google/uuid v1.6.0 // indirect
+	github.com/gorilla/websocket v1.5.0 // indirect
 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
 	github.com/huandu/xstrings v1.3.3 // indirect
 	github.com/imdario/mergo v0.3.11 // indirect
@@ -87,9 +90,11 @@ require (
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mitchellh/copystructure v1.2.0 // indirect
 	github.com/mitchellh/reflectwalk v1.0.2 // indirect
+	github.com/moby/spdystream v0.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
@@ -99,6 +104,7 @@ require (
 	github.com/spf13/cobra v1.8.1 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/stretchr/objx v0.5.2 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
 	go.opentelemetry.io/otel v1.32.0 // indirect
@@ -128,11 +134,9 @@ require (
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/apiextensions-apiserver v0.32.0 // indirect
-	k8s.io/apiserver v0.32.0 // indirect
+	k8s.io/apiserver v0.32.1 // indirect
 	k8s.io/gengo/v2 v2.0.0-20240911193312-2b36238f13e9 // indirect
 	k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect
-	k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect
 	sigs.k8s.io/controller-tools v0.14.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
diff --git a/go.sum b/go.sum
index d346a198a..f8584f8fe 100644
--- a/go.sum
+++ b/go.sum
@@ -19,6 +19,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo
 github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
 github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
 github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
 github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA=
 github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -116,6 +118,8 @@ github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAx
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
+github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
 github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
 github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
@@ -160,6 +164,8 @@ github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HK
 github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
 github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
 github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
+github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -167,6 +173,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
+github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
 github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@@ -207,6 +215,8 @@ github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8w
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
@@ -341,12 +351,12 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 k8s.io/api v0.32.1 h1:f562zw9cy+GvXzXf0CKlVQ7yHJVYzLfL6JAS4kOAaOc=
 k8s.io/api v0.32.1/go.mod h1:/Yi/BqkuueW1BgpoePYBRdDYfjPF5sgTr5+YqDZra5k=
-k8s.io/apiextensions-apiserver v0.32.0 h1:S0Xlqt51qzzqjKPxfgX1xh4HBZE+p8KKBq+k2SWNOE0=
-k8s.io/apiextensions-apiserver v0.32.0/go.mod h1:86hblMvN5yxMvZrZFX2OhIHAuFIMJIZ19bTvzkP+Fmw=
+k8s.io/apiextensions-apiserver v0.32.1 h1:hjkALhRUeCariC8DiVmb5jj0VjIc1N0DREP32+6UXZw=
+k8s.io/apiextensions-apiserver v0.32.1/go.mod h1:sxWIGuGiYov7Io1fAS2X06NjMIk5CbRHc2StSmbaQto=
 k8s.io/apimachinery v0.32.1 h1:683ENpaCBjma4CYqsmZyhEzrGz6cjn1MY/X2jB2hkZs=
 k8s.io/apimachinery v0.32.1/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE=
-k8s.io/apiserver v0.32.0 h1:VJ89ZvQZ8p1sLeiWdRJpRD6oLozNZD2+qVSLi+ft5Qs=
-k8s.io/apiserver v0.32.0/go.mod h1:HFh+dM1/BE/Hm4bS4nTXHVfN6Z6tFIZPi649n83b4Ag=
+k8s.io/apiserver v0.32.1 h1:oo0OozRos66WFq87Zc5tclUX2r0mymoVHRq8JmR7Aak=
+k8s.io/apiserver v0.32.1/go.mod h1:UcB9tWjBY7aryeI5zAgzVJB/6k7E97bkr1RgqDz0jPw=
 k8s.io/client-go v0.32.1 h1:otM0AxdhdBIaQh7l1Q0jQpmo7WOFIk5FFa4bg6YMdUU=
 k8s.io/client-go v0.32.1/go.mod h1:aTTKZY7MdxUaJ/KiUs8D+GssR9zJZi77ZqtzcGXIiDg=
 k8s.io/code-generator v0.32.1 h1:4lw1kFNDuFYXquTkB7Sl5EwPMUP2yyW9hh6BnFfRZFY=
@@ -359,8 +369,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
 k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y=
 k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4=
-k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
-k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0=
+k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcpeN4baWEV2ko2Z/AsiZgEdwgcfwLgMo=
 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
 sigs.k8s.io/controller-runtime v0.20.1 h1:JbGMAG/X94NeM3xvjenVUaBjy6Ui4Ogd/J5ZtjZnHaE=
diff --git a/pkg/README.md b/pkg/README.md
index ee82aab2d..04ebfde28 100644
--- a/pkg/README.md
+++ b/pkg/README.md
@@ -4,31 +4,34 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
 ### Requirements
  - Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher
- - A cluster that has built-in support for `ServiceType=LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running)
-   - For example, with Kind, you can follow these steps: https://kind.sigs.k8s.io/docs/user/loadbalancer
+ - A cluster with:
+   - Support for Services of type `LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running). For example, with Kind,
+     you can follow [these steps](https://kind.sigs.k8s.io/docs/user/loadbalancer).
+   - 3 GPUs to run the sample model server. Adjust the number of replicas in `./manifests/vllm/deployment.yaml` as needed.
 
 ### Steps
 
-1. **Deploy Sample vLLM Application**
+1. **Deploy Sample Model Server**
 
-   Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model. 
+   Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
    Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
    ```bash
    kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/vllm-lora-deployment.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
    ```
 
-1. **Install the CRDs into the cluster:**
+1. **Install the Inference Extension CRDs:**
 
    ```sh
    kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
    ```
 
-1. **Deploy InferenceModel and InferencePool**
+1. **Deploy InferenceModel**
 
-   Deploy a sample InferenceModel and InferencePool configuration based on the vLLM deployments mentioned above.
+   Deploy the sample InferenceModel which is configured to load balance traffic between the `tweet-summary-0` and `tweet-summary-1`
+   [LoRA adapters](https://docs.vllm.ai/en/latest/features/lora.html) of the sample model server.
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencepool-with-model.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencemodel.yaml
    ```
 
 1. **Update Envoy Gateway Config to enable Patch Policy**
@@ -46,11 +49,15 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/gateway.yaml
    ```
    > **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./manifests/gateway/ext-proc.yaml` file, and an additional `./manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.***
-   
-   
 
+   Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
+   ```bash
+   $ kubectl get gateway inference-gateway
+   NAME                CLASS               ADDRESS         PROGRAMMED   AGE
+   inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
+   ```
 
-1. **Deploy Ext-Proc**
+1. **Deploy the Inference Extension and InferencePool**
 
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/ext_proc.yaml
diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml
index 3263422f7..4eb0415a7 100644
--- a/pkg/manifests/ext_proc.yaml
+++ b/pkg/manifests/ext_proc.yaml
@@ -40,6 +40,16 @@ roleRef:
   kind: ClusterRole
   name: pod-read
 ---
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: InferencePool
+metadata:
+  labels:
+  name: vllm-llama2-7b-pool
+spec:
+  targetPortNumber: 8000
+  selector:
+    app: vllm-llama2-7b-pool
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/pkg/manifests/inferencepool-with-model.yaml b/pkg/manifests/inferencemodel.yaml
similarity index 72%
rename from pkg/manifests/inferencepool-with-model.yaml
rename to pkg/manifests/inferencemodel.yaml
index b5980e2ea..0085a89da 100644
--- a/pkg/manifests/inferencepool-with-model.yaml
+++ b/pkg/manifests/inferencemodel.yaml
@@ -1,14 +1,4 @@
 apiVersion: inference.networking.x-k8s.io/v1alpha1
-kind: InferencePool
-metadata:
-  labels:
-  name: vllm-llama2-7b-pool
-spec:
-  targetPortNumber: 8000
-  selector:
-    app: vllm-llama2-7b-pool
----
-apiVersion: inference.networking.x-k8s.io/v1alpha1
 kind: InferenceModel
 metadata:
   labels:
diff --git a/pkg/manifests/vllm/vllm-lora-deployment.yaml b/pkg/manifests/vllm/deployment.yaml
similarity index 99%
rename from pkg/manifests/vllm/vllm-lora-deployment.yaml
rename to pkg/manifests/vllm/deployment.yaml
index a453eb7ee..30f6f6711 100644
--- a/pkg/manifests/vllm/vllm-lora-deployment.yaml
+++ b/pkg/manifests/vllm/deployment.yaml
@@ -10,14 +10,11 @@ spec:
     port: 8000
     targetPort: 8000
   type: ClusterIP
-
 ---
-
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: vllm-llama2-7b-pool
-  namespace: default
 spec:
   replicas: 3
   selector:
diff --git a/test/e2e/README.md b/test/e2e/README.md
new file mode 100644
index 000000000..584d89143
--- /dev/null
+++ b/test/e2e/README.md
@@ -0,0 +1,38 @@
+# End-to-End Tests
+
+This document provides instructions on how to run the end-to-end tests.
+
+## Overview
+
+The end-to-end tests are designed to validate end-to-end Gateway API Inference Extension functionality. These tests are executed against a Kubernetes cluster and use the Ginkgo testing framework to ensure the extension behaves as expected.
+
+## Prerequisites
+
+- [Go](https://golang.org/doc/install) installed on your machine.
+- [Make](https://www.gnu.org/software/make/manual/make.html) installed to run the end-to-end test target.
+- A Hugging Face Hub token with access to the [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) model.
+
+## Running the End-to-End Tests
+
+Follow these steps to run the end-to-end tests:
+
+1. **Clone the Repository**: Clone the `gateway-api-inference-extension` repository:
+
+   ```sh
+   git clone https://github.com/kubernetes-sigs/gateway-api-inference-extension.git && cd gateway-api-inference-extension
+   ```
+
+1. **Export Your Hugging Face Hub Token**: The token is required to run the test model server:
+
+   ```sh
+   export HF_TOKEN=<MY_HF_TOKEN>
+   ```
+
+1. **Run the Tests**: Run the `test-e2e` target:
+
+   ```sh
+   make test-e2e
+   ```
+
+   The test suite prints details for each step. Note that the `vllm-llama2-7b-pool` model server deployment
+   may take several minutes to report an `Available=True` status due to the time required for bootstraping.
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
index 2da01a9c8..7449d5f61 100644
--- a/test/e2e/e2e_suite_test.go
+++ b/test/e2e/e2e_suite_test.go
@@ -17,16 +17,339 @@ limitations under the License.
 package e2e
 
 import (
+	"context"
 	"fmt"
+	"os"
+	"strings"
 	"testing"
+	"time"
 
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	infextv1a1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	testutils "inference.networking.x-k8s.io/gateway-api-inference-extension/test/utils"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/serializer"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/kubernetes"
+	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/config"
 )
 
-// Run e2e tests using the Ginkgo runner.
-func TestE2E(t *testing.T) {
-	RegisterFailHandler(Fail)
-	_, _ = fmt.Fprintf(GinkgoWriter, "Starting api suite\n")
-	RunSpecs(t, "e2e suite")
+const (
+	// defaultExistsTimeout is the default timeout for a resource to exist in the api server.
+	defaultExistsTimeout = 30 * time.Second
+	// defaultReadyTimeout is the default timeout for a resource to report a ready state.
+	defaultReadyTimeout = 3 * time.Minute
+	// defaultModelReadyTimeout is the default timeout for the model server deployment to report a ready state.
+	defaultModelReadyTimeout = 10 * time.Minute
+	// defaultInterval is the default interval to check if a resource exists or ready conditions.
+	defaultInterval = time.Millisecond * 250
+	// nsName is the name of the Namespace used for tests.
+	// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
+	nsName = "default"
+	// modelServerName is the name of the model server test resources.
+	modelServerName = "vllm-llama2-7b-pool"
+	// modelName is the test model name.
+	modelName = "tweet-summary"
+	// envoyName is the name of the envoy proxy test resources.
+	envoyName = "envoy"
+	// envoyPort is the listener port number of the test envoy proxy.
+	envoyPort = "8081"
+	// inferExtName is the name of the inference extension test resources.
+	inferExtName = "inference-gateway-ext-proc"
+	// clientManifest is the manifest for the client test resources.
+	clientManifest = "../testdata/client.yaml"
+	// modelServerManifest is the manifest for the model server test resources.
+	modelServerManifest = "../../pkg/manifests/vllm/deployment.yaml"
+	// modelServerSecretManifest is the manifest for the model server secret resource.
+	modelServerSecretManifest = "../testdata/model-secret.yaml"
+	// inferPoolManifest is the manifest for the inference pool CRD.
+	inferPoolManifest = "../../config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml"
+	// inferModelManifest is the manifest for the inference model CRD.
+	inferModelManifest = "../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
+	// inferExtManifest is the manifest for the inference extension test resources.
+	inferExtManifest = "../../pkg/manifests/ext_proc.yaml"
+	// envoyManifest is the manifest for the envoy proxy test resources.
+	envoyManifest = "../testdata/envoy.yaml"
+)
+
+var (
+	ctx context.Context
+	cli client.Client
+	// Required for exec'ing in curl pod
+	kubeCli *kubernetes.Clientset
+	scheme  = runtime.NewScheme()
+	cfg     = config.GetConfigOrDie()
+)
+
+func TestAPIs(t *testing.T) {
+	gomega.RegisterFailHandler(ginkgo.Fail)
+	ginkgo.RunSpecs(t,
+		"End To End Test Suite",
+	)
+}
+
+var _ = ginkgo.BeforeSuite(func() {
+	ginkgo.By("Setting up the test suite")
+	setupSuite()
+
+	ginkgo.By("Creating test infrastructure")
+	setupInfra()
+})
+
+func setupInfra() {
+	crds := map[string]string{
+		"inferencepools.inference.networking.x-k8s.io":  inferPoolManifest,
+		"inferencemodels.inference.networking.x-k8s.io": inferModelManifest,
+	}
+	createCRDs(cli, crds)
+	createInferExt(cli, inferExtManifest)
+	createClient(cli, clientManifest)
+	createEnvoy(cli, envoyManifest)
+	// Run this step last, as it requires additional time for the model server to become ready.
+	createModelServer(cli, modelServerSecretManifest, modelServerManifest)
+}
+
+var _ = ginkgo.AfterSuite(func() {
+	ginkgo.By("Performing global cleanup")
+	cleanupResources()
+})
+
+// setupSuite initializes the test suite by setting up the Kubernetes client,
+// loading required API schemes, and validating configuration.
+func setupSuite() {
+	ctx = context.Background()
+	gomega.ExpectWithOffset(1, cfg).NotTo(gomega.BeNil())
+
+	err := clientgoscheme.AddToScheme(scheme)
+	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
+
+	err = apiextv1.AddToScheme(scheme)
+	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
+
+	err = infextv1a1.AddToScheme(scheme)
+	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
+
+	cli, err = client.New(cfg, client.Options{Scheme: scheme})
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+	gomega.Expect(cli).NotTo(gomega.BeNil())
+
+	kubeCli, err = kubernetes.NewForConfig(cfg)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+}
+
+func cleanupResources() {
+	gomega.Expect(testutils.DeleteClusterResources(ctx, cli)).To(gomega.Succeed())
+	gomega.Expect(testutils.DeleteNamespacedResources(ctx, cli, nsName)).To(gomega.Succeed())
+}
+
+func cleanupInferModelResources() {
+	gomega.Expect(testutils.DeleteInferenceModelResources(ctx, cli, nsName)).To(gomega.Succeed())
+}
+
+func getTimeout(key string, fallback time.Duration) time.Duration {
+	if value, ok := os.LookupEnv(key); ok {
+		if parsed, err := time.ParseDuration(value); err == nil {
+			return parsed
+		}
+	}
+	return fallback
+}
+
+var (
+	existsTimeout     = getTimeout("EXISTS_TIMEOUT", defaultExistsTimeout)
+	readyTimeout      = getTimeout("READY_TIMEOUT", defaultReadyTimeout)
+	modelReadyTimeout = getTimeout("MODEL_READY_TIMEOUT", defaultModelReadyTimeout)
+	interval          = defaultInterval
+)
+
+// namespaceExists ensures that a specified namespace exists and is ready for use.
+func namespaceExists(k8sClient client.Client, ns string) {
+	ginkgo.By("Ensuring namespace exists: " + ns)
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Name: ns}, &corev1.Namespace{})
+	}, existsTimeout, interval)
+}
+
+// createCRDs creates the Inference Extension CRDs used for testing.
+func createCRDs(k8sClient client.Client, crds map[string]string) {
+	for name, path := range crds {
+		ginkgo.By("Creating CRD resource from manifest: " + path)
+		applyYAMLFile(k8sClient, path)
+
+		// Wait for the CRD to exist.
+		crd := &apiextv1.CustomResourceDefinition{}
+		testutils.EventuallyExists(ctx, func() error {
+			return k8sClient.Get(ctx, types.NamespacedName{Name: name}, crd)
+		}, existsTimeout, interval)
+
+		// Wait for the CRD to be established.
+		testutils.CRDEstablished(ctx, k8sClient, crd, readyTimeout, interval)
+	}
+}
+
+// createClient creates the client pod used for testing from the given filePath.
+func createClient(k8sClient client.Client, filePath string) {
+	ginkgo.By("Creating client resources from manifest: " + filePath)
+	applyYAMLFile(k8sClient, filePath)
+
+	// Wait for the pod to exist.
+	pod := &corev1.Pod{}
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "curl"}, pod)
+	}, existsTimeout, interval)
+
+	// Wait for the pod to be ready.
+	testutils.PodReady(ctx, k8sClient, pod, readyTimeout, interval)
+}
+
+// createModelServer creates the model server resources used for testing from the given filePaths.
+func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
+	ginkgo.By("Ensuring the HF_TOKEN environment variable is set")
+	token := os.Getenv("HF_TOKEN")
+	gomega.Expect(token).NotTo(gomega.BeEmpty(), "HF_TOKEN is not set")
+
+	inManifests := readYaml(secretPath)
+	ginkgo.By("Replacing placeholder secret data with HF_TOKEN environment variable")
+	outManifests := []string{}
+	for _, m := range inManifests {
+		outManifests = append(outManifests, strings.Replace(m, "$HF_TOKEN", token, 1))
+	}
+
+	ginkgo.By("Creating model server secret resource from manifest: " + deployPath)
+	createObjsFromYaml(k8sClient, outManifests)
+
+	// Wait for the secret to exist before proceeding with test.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "hf-token"}, &corev1.Secret{})
+	}, existsTimeout, interval)
+
+	ginkgo.By("Creating model server resources from manifest: " + deployPath)
+	applyYAMLFile(k8sClient, deployPath)
+
+	// Wait for the deployment to exist.
+	deploy := &appsv1.Deployment{}
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy)
+	}, existsTimeout, interval)
+
+	// Wait for the deployment to be available.
+	testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
+
+	// Wait for the service to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, &corev1.Service{})
+	}, existsTimeout, interval)
+}
+
+// createEnvoy creates the envoy proxy resources used for testing from the given filePath.
+func createEnvoy(k8sClient client.Client, filePath string) {
+	ginkgo.By("Creating envoy proxy resources from manifest: " + filePath)
+	applyYAMLFile(k8sClient, filePath)
+
+	// Wait for the configmap to exist before proceeding with test.
+	cfgMap := &corev1.ConfigMap{}
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: envoyName}, cfgMap)
+	}, existsTimeout, interval)
+
+	// Wait for the deployment to exist.
+	deploy := &appsv1.Deployment{}
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: envoyName}, deploy)
+	}, existsTimeout, interval)
+
+	// Wait for the deployment to be available.
+	testutils.DeploymentAvailable(ctx, k8sClient, deploy, readyTimeout, interval)
+
+	// Wait for the service to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: envoyName}, &corev1.Service{})
+	}, existsTimeout, interval)
+}
+
+// createInferExt creates the inference extension resources used for testing from the given filePath.
+func createInferExt(k8sClient client.Client, filePath string) {
+	ginkgo.By("Creating inference extension resources from manifest: " + filePath)
+	applyYAMLFile(k8sClient, filePath)
+
+	// Wait for the clusterrole to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read"}, &rbacv1.ClusterRole{})
+	}, existsTimeout, interval)
+
+	// Wait for the clusterrolebinding to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read-binding"}, &rbacv1.ClusterRoleBinding{})
+	}, existsTimeout, interval)
+
+	// Wait for the deployment to exist.
+	deploy := &appsv1.Deployment{}
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, deploy)
+	}, existsTimeout, interval)
+
+	// Wait for the deployment to be available.
+	testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
+
+	// Wait for the service to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.Service{})
+	}, existsTimeout, interval)
+}
+
+// applyYAMLFile reads a file containing YAML (possibly multiple docs)
+// and applies each object to the cluster.
+func applyYAMLFile(k8sClient client.Client, filePath string) {
+	// Create the resources from the manifest file
+	createObjsFromYaml(k8sClient, readYaml(filePath))
+}
+
+func readYaml(filePath string) []string {
+	ginkgo.By("Reading YAML file: " + filePath)
+	yamlBytes, err := os.ReadFile(filePath)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	// Split multiple docs, if needed
+	return strings.Split(string(yamlBytes), "\n---")
+}
+
+func createObjsFromYaml(k8sClient client.Client, docs []string) {
+	// For each doc, decode and create
+	decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer()
+	for _, doc := range docs {
+		trimmed := strings.TrimSpace(doc)
+		if trimmed == "" {
+			continue
+		}
+		// Decode into a runtime.Object
+		obj, gvk, decodeErr := decoder.Decode([]byte(trimmed), nil, nil)
+		gomega.Expect(decodeErr).NotTo(gomega.HaveOccurred(),
+			"Failed to decode YAML document to a Kubernetes object")
+
+		ginkgo.By(fmt.Sprintf("Decoded GVK: %s", gvk))
+
+		unstrObj, ok := obj.(*unstructured.Unstructured)
+		if !ok {
+			// Fallback if it's a typed object
+			unstrObj = &unstructured.Unstructured{}
+			// Convert typed to unstructured
+			err := scheme.Convert(obj, unstrObj, nil)
+			gomega.Expect(err).NotTo(gomega.HaveOccurred())
+		}
+
+		unstrObj.SetNamespace(nsName)
+
+		// Create the object
+		err := k8sClient.Create(ctx, unstrObj)
+		gomega.Expect(err).NotTo(gomega.HaveOccurred(),
+			"Failed to create object from YAML")
+	}
 }
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
index 5da114779..801368a22 100644
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@@ -18,104 +18,103 @@ package e2e
 
 import (
 	"fmt"
-	"os/exec"
-	"time"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-	"inference.networking.x-k8s.io/gateway-api-inference-extension/test/utils"
+	"strings"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	infextv1a1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	testutils "inference.networking.x-k8s.io/gateway-api-inference-extension/test/utils"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/utils/ptr"
 )
 
-const namespace = "api-system"
-
-var _ = Describe("controller", Ordered, func() {
-	BeforeAll(func() {
-		By("installing prometheus operator")
-		Expect(utils.InstallPrometheusOperator()).To(Succeed())
-
-		By("installing the cert-manager")
-		Expect(utils.InstallCertManager()).To(Succeed())
-
-		By("creating manager namespace")
-		cmd := exec.Command("kubectl", "create", "ns", namespace)
-		_, _ = utils.Run(cmd)
+var _ = ginkgo.Describe("InferencePool", func() {
+	ginkgo.BeforeEach(func() {
+		ginkgo.By("Waiting for the namespace to exist.")
+		namespaceExists(cli, nsName)
 	})
 
-	AfterAll(func() {
-		By("uninstalling the Prometheus manager bundle")
-		utils.UninstallPrometheusOperator()
-
-		By("uninstalling the cert-manager bundle")
-		utils.UninstallCertManager()
-
-		By("removing manager namespace")
-		cmd := exec.Command("kubectl", "delete", "ns", namespace)
-		_, _ = utils.Run(cmd)
+	ginkgo.AfterEach(func() {
+		ginkgo.By("Deleting the InferenceModel test resource.")
+		cleanupInferModelResources()
 	})
 
-	Context("Operator", func() {
-		It("should run successfully", func() {
-			var controllerPodName string
-			var err error
-
-			// projectimage stores the name of the image used in the example
-			var projectimage = "example.com/api:v0.0.1"
-
-			By("building the manager(Operator) image")
-			cmd := exec.Command("make", "docker-build", "IMG=%s"+projectimage)
-			_, err = utils.Run(cmd)
-			ExpectWithOffset(1, err).NotTo(HaveOccurred())
-
-			By("loading the manager(Operator) image on Kind")
-			err = utils.LoadImageToKindClusterWithName(projectimage)
-			ExpectWithOffset(1, err).NotTo(HaveOccurred())
-
-			By("installing CRDs")
-			cmd = exec.Command("make", "install")
-			_, err = utils.Run(cmd)
-			ExpectWithOffset(1, err).NotTo(HaveOccurred())
-
-			By("deploying the controller-manager")
-			cmd = exec.Command("make", "deploy", "IMG=%s"+projectimage)
-			_, err = utils.Run(cmd)
-			ExpectWithOffset(1, err).NotTo(HaveOccurred())
-
-			By("validating that the controller-manager pod is running as expected")
-			verifyControllerUp := func() error {
-				// Get pod name
+	ginkgo.When("The Inference Extension is running", func() {
+		ginkgo.It("Should route traffic to target model servers", func() {
+			ginkgo.By("Creating an InferenceModel resource")
+			infModel := newInferenceModel(nsName)
+			gomega.Expect(cli.Create(ctx, infModel)).To(gomega.Succeed())
+
+			ginkgo.By("Ensuring the InferenceModel resource exists in the namespace")
+			gomega.Eventually(func() error {
+				err := cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
+				if err != nil {
+					return err
+				}
+				return nil
+			}, existsTimeout, interval).Should(gomega.Succeed())
 
-				cmd = exec.Command("kubectl", "get",
-					"pods", "-l", "control-plane=controller-manager",
-					"-o", "go-template={{ range .items }}"+
-						"{{ if not .metadata.deletionTimestamp }}"+
-						"{{ .metadata.name }}"+
-						"{{ \"\\n\" }}{{ end }}{{ end }}",
-					"-n", namespace,
-				)
+			ginkgo.By("Verifying connectivity through the inference extension")
+			curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName)
 
-				podOutput, err := utils.Run(cmd)
-				ExpectWithOffset(2, err).NotTo(HaveOccurred())
-				podNames := utils.GetNonEmptyLines(string(podOutput))
-				if len(podNames) != 1 {
-					return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames))
+			// Ensure the expected responses include the inferencemodel target model names.
+			var expected []string
+			for _, m := range infModel.Spec.TargetModels {
+				expected = append(expected, m.Name)
+			}
+			actual := []string{}
+			gomega.Eventually(func() error {
+				resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
+				if err != nil || !strings.Contains(resp, "200 OK") {
+					return err
 				}
-				controllerPodName = podNames[0]
-				ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager"))
-
-				// Validate pod status
-				cmd = exec.Command("kubectl", "get",
-					"pods", controllerPodName, "-o", "jsonpath={.status.phase}",
-					"-n", namespace,
-				)
-				status, err := utils.Run(cmd)
-				ExpectWithOffset(2, err).NotTo(HaveOccurred())
-				if string(status) != "Running" {
-					return fmt.Errorf("controller pod in %s status", status)
+				for _, m := range expected {
+					if strings.Contains(resp, m) {
+						actual = append(actual, m)
+					}
+				}
+				// Compare expected and actual models in responses, ignoring order.
+				if !cmp.Equal(actual, expected, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
+					return err
 				}
 				return nil
-			}
-			EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed())
-
+			}, existsTimeout, interval).Should(gomega.Succeed())
 		})
 	})
 })
+
+// newInferenceModel creates an InferenceModel in the given namespace for testutils.
+func newInferenceModel(ns string) *infextv1a1.InferenceModel {
+	targets := []infextv1a1.TargetModel{
+		{
+			Name:   modelName + "%-0",
+			Weight: ptr.To(int32(50)),
+		},
+		{
+			Name:   modelName + "-1",
+			Weight: ptr.To(int32(50)),
+		},
+	}
+	return testutils.MakeModelWrapper("inferencemodel-sample", ns).
+		SetCriticality(infextv1a1.Critical).
+		SetModelName(modelName).
+		SetPoolRef(modelServerName).
+		SetTargetModels(targets).
+		Obj()
+}
+
+// getCurlCommand returns the command, as a slice of strings, for curl'ing
+// the test model server at the given name, namespace, port, and model name.
+func getCurlCommand(name, ns, port, model string) []string {
+	return []string{
+		"curl",
+		"-i",
+		fmt.Sprintf("%s.%s.svc:%s/v1/completions", name, ns, port),
+		"-H",
+		"Content-Type: application/json",
+		"-d",
+		fmt.Sprintf(`{"model": "%s", "prompt": "Write as if you were a critic: San Francisco", "max_tokens": 100, "temperature": 0}`, model),
+	}
+}
diff --git a/test/testdata/client.yaml b/test/testdata/client.yaml
new file mode 100644
index 000000000..f2559189d
--- /dev/null
+++ b/test/testdata/client.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  annotations:
+  labels:
+    app: curl
+  name: curl
+  namespace: inf-ext-e2e
+spec:
+  containers:
+  - command:
+    - tail
+    - -f
+    - /dev/null
+    image: curlimages/curl:7.83.1
+    imagePullPolicy: IfNotPresent
+    name: curl
+  restartPolicy: Never
+  schedulerName: default-scheduler
diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml
new file mode 100644
index 000000000..386d25875
--- /dev/null
+++ b/test/testdata/envoy.yaml
@@ -0,0 +1,288 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: envoy
+  namespace: inf-ext-e2e
+  labels:
+    app: envoy
+data:
+  envoy.yaml: |
+    admin:
+      address:
+        socket_address:
+          address: 127.0.0.1
+          port_value: 19000
+      access_log:
+        - name: envoy.access_loggers.file
+          typed_config:
+            "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+            path: /dev/null
+    static_resources:
+      listeners:
+        - name: envoy-proxy-ready-0.0.0.0-19001
+          address:
+            socket_address:
+              address: 0.0.0.0
+              port_value: 19001
+          filter_chains:
+            - filters:
+                - name: envoy.filters.network.http_connection_manager
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                    stat_prefix: envoy-ready-http
+                    route_config:
+                      name: local_route
+                      virtual_hosts:
+                        - name: prometheus_stats
+                          domains: ["*"]
+                          routes:
+                            - match:
+                                prefix: "/stats/prometheus"
+                              route:
+                                cluster: "prometheus_stats"
+                    http_filters:
+                      - name: envoy.filters.http.health_check
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck
+                          pass_through_mode: false
+                          headers:
+                            - name: ":path"
+                              string_match:
+                                exact: "/ready"
+                      - name: envoy.filters.http.router
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+        - name: vllm
+          address:
+            socket_address:
+              address: 0.0.0.0
+              port_value: 8081
+          per_connection_buffer_limit_bytes: 32768
+          access_log:
+            - name: envoy.access_loggers.file
+              filter:
+                response_flag_filter:
+                  flags: ["NR"]
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                path: /dev/stdout
+                log_format:
+                  text_format_source:
+                    inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
+          filter_chains:
+            - name: vllm
+              filters:
+                - name: envoy.filters.network.http_connection_manager
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                    stat_prefix: http-8081
+                    route_config:
+                      name: vllm
+                      virtual_hosts:
+                        - name: vllm-default
+                          domains: ["*"]
+                          routes:
+                            - match:
+                                prefix: "/"
+                              route:
+                                cluster: original_destination_cluster
+                                timeout: 86400s
+                                idle_timeout: 86400s
+                                upgrade_configs:
+                                  - upgrade_type: websocket
+                              typed_per_filter_config:
+                                envoy.filters.http.ext_proc:
+                                  "@type": type.googleapis.com/envoy.config.route.v3.FilterConfig
+                                  config: {}
+                    http_filters:
+                      - name: envoy.filters.http.ext_proc
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                          grpc_service:
+                            envoy_grpc:
+                              cluster_name: ext_proc
+                              authority: inference-gateway-ext-proc.inf-ext-e2e:9002
+                            timeout: 10s
+                          processing_mode:
+                            request_header_mode: SEND
+                            response_header_mode: SKIP
+                            request_body_mode: BUFFERED
+                            request_trailer_mode: SKIP
+                            response_trailer_mode: SKIP
+                          message_timeout: 1000s
+                        # Mark it as disabled if needed for troubleshooting:
+                        # disabled: true
+                      - name: envoy.filters.http.router
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+                          suppress_envoy_headers: true
+                    http2_protocol_options:
+                      max_concurrent_streams: 100
+                      initial_stream_window_size: 65536
+                      initial_connection_window_size: 1048576
+                    use_remote_address: true
+                    normalize_path: true
+                    merge_slashes: true
+                    server_header_transformation: PASS_THROUGH
+                    common_http_protocol_options:
+                      headers_with_underscores_action: REJECT_REQUEST
+                    path_with_escaped_slashes_action: UNESCAPE_AND_REDIRECT
+                    access_log:
+                      - name: envoy.access_loggers.file
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                          path: /dev/stdout
+                          log_format:
+                            text_format_source:
+                              inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
+      clusters:
+        - name: prometheus_stats
+          type: STATIC
+          connect_timeout: 0.250s
+          load_assignment:
+            cluster_name: prometheus_stats
+            endpoints:
+              - lb_endpoints:
+                  - endpoint:
+                      address:
+                        socket_address:
+                          address: 127.0.0.1
+                          port_value: 19000
+        - name: original_destination_cluster
+          type: ORIGINAL_DST
+          connect_timeout: 1000s
+          lb_policy: CLUSTER_PROVIDED
+          circuit_breakers:
+            thresholds:
+              - max_connections: 40000
+                max_pending_requests: 40000
+                max_requests: 40000
+          original_dst_lb_config:
+            use_http_header: true
+            http_header_name: target-pod
+        - name: ext_proc
+          type: STRICT_DNS
+          connect_timeout: 86400s
+          lb_policy: LEAST_REQUEST
+          circuit_breakers:
+            thresholds:
+              - max_connections: 40000
+                max_pending_requests: 40000
+                max_requests: 40000
+                max_retries: 1024
+          typed_extension_protocol_options:
+            envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+              "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+              explicit_http_config:
+                http2_protocol_options:
+                  initial_stream_window_size: 65536
+                  initial_connection_window_size: 1048576
+          load_assignment:
+            cluster_name: ext_proc
+            endpoints:
+              - locality:
+                  region: ext_proc/e2e/0
+                lb_endpoints:
+                  - endpoint:
+                      address:
+                        socket_address:
+                          address: inference-gateway-ext-proc.inf-ext-e2e
+                          port_value: 9002
+                    health_status: HEALTHY
+                    load_balancing_weight: 1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: envoy
+  namespace: inf-ext-e2e
+  labels:
+    app: envoy
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: envoy
+  template:
+    metadata:
+      labels:
+        app: envoy
+      annotations:
+        prometheus.io/path: /stats/prometheus
+        prometheus.io/port: "19001"
+        prometheus.io/scrape: "true"
+    spec:
+      containers:
+      - name: envoy
+        image: docker.io/envoyproxy/envoy:distroless-v1.32.2
+        args:
+          - "--service-cluster" 
+          - "default/inference-gateway"
+          - "--service-node"
+          - "$(ENVOY_POD_NAME)"
+          - "--log-level"
+          - "debug"
+          - "--cpuset-threads"
+          - "--drain-strategy"
+          - "immediate"
+          - "--drain-time-s"
+          - "60"
+          - "-c"
+          - "/etc/envoy/envoy.yaml"
+        command:
+          - envoy
+        env:
+        - name: ENVOY_NS_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: ENVOY_POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name 
+        ports:
+        - containerPort: 8081
+          name: http-8081
+        - containerPort: 19001
+          name: metrics
+        readinessProbe:
+          failureThreshold: 1
+          httpGet:
+            path: /ready
+            port: 19001
+            scheme: HTTP
+          periodSeconds: 5
+          successThreshold: 1
+          timeoutSeconds: 1
+        resources:
+          requests:
+            cpu: 100m
+            memory: 512Mi
+        volumeMounts:
+        - name: config
+          mountPath: /etc/envoy
+          readOnly: true
+      volumes:
+      - name: config
+        configMap:
+          name: envoy
+          items:
+            - key: envoy.yaml
+              path: envoy.yaml
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: envoy
+  name: envoy
+  namespace: inf-ext-e2e
+spec:
+  ports:
+  - name: http-8081
+    port: 8081
+    protocol: TCP
+    targetPort: 8081
+  selector:
+    app: envoy
+  type: ClusterIP
diff --git a/test/testdata/model-secret.yaml b/test/testdata/model-secret.yaml
new file mode 100644
index 000000000..01c2556c3
--- /dev/null
+++ b/test/testdata/model-secret.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: hf-token
+  labels:
+    app: vllm
+stringData:
+  token: $HF_TOKEN
diff --git a/test/utils/utils.go b/test/utils/utils.go
index 1f96382e3..337599c34 100644
--- a/test/utils/utils.go
+++ b/test/utils/utils.go
@@ -17,124 +17,271 @@ limitations under the License.
 package utils
 
 import (
+	"bytes"
+	"context"
 	"fmt"
-	"os"
-	"os/exec"
-	"strings"
+	"time"
 
-	. "github.com/onsi/ginkgo/v2" //nolint:golint,revive
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	infextv1a1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/remotecommand"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-const (
-	prometheusOperatorVersion = "v0.72.0"
-	prometheusOperatorURL     = "https://github.com/prometheus-operator/prometheus-operator/" +
-		"releases/download/%s/bundle.yaml"
-
-	certmanagerVersion = "v1.14.4"
-	certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml"
-)
-
-func warnError(err error) {
-	_, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err)
+// DeleteClusterResources deletes all cluster-scoped objects the tests typically create.
+func DeleteClusterResources(ctx context.Context, cli client.Client) error {
+	binding := &rbacv1.ClusterRoleBinding{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "pod-read-binding",
+		},
+	}
+	err := cli.Delete(ctx, binding, client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	role := &rbacv1.ClusterRole{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "pod-read",
+		},
+	}
+	err = cli.Delete(ctx, role, client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	model := &apiextv1.CustomResourceDefinition{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "inferencemodels.inference.networking.x-k8s.io",
+		},
+	}
+	err = cli.Delete(ctx, model, client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	pool := &apiextv1.CustomResourceDefinition{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "inferencepools.inference.networking.x-k8s.io",
+		},
+	}
+	err = cli.Delete(ctx, pool, client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	return nil
 }
 
-// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics.
-func InstallPrometheusOperator() error {
-	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
-	cmd := exec.Command("kubectl", "create", "-f", url)
-	_, err := Run(cmd)
-	return err
+// DeleteNamespacedResources deletes all namespace-scoped objects the tests typically create.
+// The given namespace will also be deleted if it's not "default".
+func DeleteNamespacedResources(ctx context.Context, cli client.Client, ns string) error {
+	if ns == "" {
+		return nil
+	}
+	err := cli.DeleteAllOf(ctx, &appsv1.Deployment{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	err = cli.DeleteAllOf(ctx, &corev1.Service{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	err = cli.DeleteAllOf(ctx, &corev1.Pod{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	err = cli.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	err = cli.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	err = cli.DeleteAllOf(ctx, &infextv1a1.InferencePool{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	err = cli.DeleteAllOf(ctx, &infextv1a1.InferenceModel{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
+	}
+	if ns != "default" {
+		ns := &corev1.Namespace{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: ns,
+			},
+		}
+		if err := cli.Delete(ctx, ns, client.PropagationPolicy(metav1.DeletePropagationForeground)); err != nil && !apierrors.IsNotFound(err) {
+			return err
+		}
+	}
+	return nil
 }
 
-// Run executes the provided command within this context
-func Run(cmd *exec.Cmd) ([]byte, error) {
-	dir, _ := GetProjectDir()
-	cmd.Dir = dir
-
-	if err := os.Chdir(cmd.Dir); err != nil {
-		_, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err)
+// DeleteInferenceModelResources deletes all InferenceModel objects in the given namespace.
+func DeleteInferenceModelResources(ctx context.Context, cli client.Client, ns string) error {
+	if ns == "" {
+		return nil
 	}
-
-	cmd.Env = append(os.Environ(), "GO111MODULE=on")
-	command := strings.Join(cmd.Args, " ")
-	_, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command)
-	output, err := cmd.CombinedOutput()
-	if err != nil {
-		return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output))
+	err := cli.DeleteAllOf(ctx, &infextv1a1.InferenceModel{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	if err != nil && !apierrors.IsNotFound(err) {
+		return err
 	}
-
-	return output, nil
+	return nil
 }
 
-// UninstallPrometheusOperator uninstalls the prometheus
-func UninstallPrometheusOperator() {
-	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
-	cmd := exec.Command("kubectl", "delete", "-f", url)
-	if _, err := Run(cmd); err != nil {
-		warnError(err)
+// PodReady checks if the given Pod reports the "Ready" status condition before the given timeout.
+func PodReady(ctx context.Context, cli client.Client, pod *corev1.Pod, timeout, interval time.Duration) {
+	ginkgo.By(fmt.Sprintf("Checking pod %s/%s status is: %s", pod.Namespace, pod.Name, corev1.PodReady))
+	conditions := []corev1.PodCondition{
+		{
+			Type:   corev1.PodReady,
+			Status: corev1.ConditionTrue,
+		},
 	}
+	gomega.Eventually(checkPodStatus, timeout, interval).WithArguments(ctx, cli, pod, conditions).Should(gomega.BeTrue())
 }
 
-// UninstallCertManager uninstalls the cert manager
-func UninstallCertManager() {
-	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
-	cmd := exec.Command("kubectl", "delete", "-f", url)
-	if _, err := Run(cmd); err != nil {
-		warnError(err)
+// checkPodStatus checks if the given Pod status matches the expected conditions.
+func checkPodStatus(ctx context.Context, cli client.Client, pod *corev1.Pod, conditions []corev1.PodCondition) (bool, error) {
+	var fetchedPod corev1.Pod
+	if err := cli.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, &fetchedPod); err != nil {
+		return false, err
+	}
+	found := 0
+	for _, want := range conditions {
+		for _, c := range fetchedPod.Status.Conditions {
+			if c.Type == want.Type && c.Status == want.Status {
+				found += 1
+			}
+		}
 	}
+	return found == len(conditions), nil
 }
 
-// InstallCertManager installs the cert manager bundle.
-func InstallCertManager() error {
-	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
-	cmd := exec.Command("kubectl", "apply", "-f", url)
-	if _, err := Run(cmd); err != nil {
-		return err
+// DeploymentAvailable checks if the given Deployment reports the "Available" status condition before the given timeout.
+func DeploymentAvailable(ctx context.Context, cli client.Client, deploy *appsv1.Deployment, timeout, interval time.Duration) {
+	ginkgo.By(fmt.Sprintf("Checking if deployment %s/%s status is: %s", deploy.Namespace, deploy.Name, appsv1.DeploymentAvailable))
+	conditions := []appsv1.DeploymentCondition{
+		{
+			Type:   appsv1.DeploymentAvailable,
+			Status: corev1.ConditionTrue,
+		},
 	}
-	// Wait for cert-manager-webhook to be ready, which can take time if cert-manager
-	// was re-installed after uninstalling on a cluster.
-	cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook",
-		"--for", "condition=Available",
-		"--namespace", "cert-manager",
-		"--timeout", "5m",
-	)
+	gomega.Eventually(checkDeploymentStatus, timeout, interval).WithArguments(ctx, cli, deploy, conditions).Should(gomega.BeTrue())
+}
 
-	_, err := Run(cmd)
-	return err
+// checkDeploymentStatus checks if the given Deployment status matches the expected conditions.
+func checkDeploymentStatus(ctx context.Context, cli client.Client, deploy *appsv1.Deployment, conditions []appsv1.DeploymentCondition) (bool, error) {
+	var fetchedDeploy appsv1.Deployment
+	if err := cli.Get(ctx, types.NamespacedName{Namespace: deploy.Namespace, Name: deploy.Name}, &fetchedDeploy); err != nil {
+		return false, err
+	}
+	found := 0
+	for _, want := range conditions {
+		for _, c := range fetchedDeploy.Status.Conditions {
+			if c.Type == want.Type && c.Status == want.Status {
+				found += 1
+			}
+		}
+	}
+	return found == len(conditions), nil
 }
 
-// LoadImageToKindClusterWithName loads a local docker image to the kind cluster
-func LoadImageToKindClusterWithName(name string) error {
-	cluster := "kind"
-	if v, ok := os.LookupEnv("KIND_CLUSTER"); ok {
-		cluster = v
+// CRDEstablished checks if the given CRD reports the "Established" status condition before the given timeout.
+func CRDEstablished(ctx context.Context, cli client.Client, crd *apiextv1.CustomResourceDefinition, timeout, interval time.Duration) {
+	ginkgo.By(fmt.Sprintf("Checking CRD %s status is: %s", crd.Name, apiextv1.Established))
+	conditions := []apiextv1.CustomResourceDefinitionCondition{
+		{
+			Type:   apiextv1.Established,
+			Status: apiextv1.ConditionTrue,
+		},
 	}
-	kindOptions := []string{"load", "docker-image", name, "--name", cluster}
-	cmd := exec.Command("kind", kindOptions...)
-	_, err := Run(cmd)
-	return err
+	gomega.Eventually(checkCrdStatus, timeout, interval).WithArguments(ctx, cli, crd, conditions).Should(gomega.BeTrue())
 }
 
-// GetNonEmptyLines converts given command output string into individual objects
-// according to line breakers, and ignores the empty elements in it.
-func GetNonEmptyLines(output string) []string {
-	var res []string
-	elements := strings.Split(output, "\n")
-	for _, element := range elements {
-		if element != "" {
-			res = append(res, element)
+// checkCrdStatus checks if the given CRD status matches the expected conditions.
+func checkCrdStatus(
+	ctx context.Context,
+	cli client.Client,
+	crd *apiextv1.CustomResourceDefinition,
+	conditions []apiextv1.CustomResourceDefinitionCondition,
+) (bool, error) {
+	var fetchedCrd apiextv1.CustomResourceDefinition
+	if err := cli.Get(ctx, types.NamespacedName{Name: crd.Name}, &fetchedCrd); err != nil {
+		return false, err
+	}
+	found := 0
+	for _, want := range conditions {
+		for _, c := range fetchedCrd.Status.Conditions {
+			if c.Type == want.Type && c.Status == want.Status {
+				found += 1
+			}
 		}
 	}
-
-	return res
+	return found == len(conditions), nil
 }
 
-// GetProjectDir will return the directory where the project is
-func GetProjectDir() (string, error) {
-	wd, err := os.Getwd()
+// ExecCommandInPod runs a command in a given container of a given Pod, returning combined stdout+stderr.
+func ExecCommandInPod(
+	ctx context.Context,
+	cfg *rest.Config,
+	scheme *runtime.Scheme,
+	kubeClient *kubernetes.Clientset,
+	podNamespace, podName, containerName string,
+	cmd []string,
+) (string, error) {
+
+	parameterCodec := runtime.NewParameterCodec(scheme)
+
+	req := kubeClient.CoreV1().RESTClient().
+		Post().
+		Resource("pods").
+		Name(podName).
+		Namespace(podNamespace).
+		SubResource("exec").
+		VersionedParams(&corev1.PodExecOptions{
+			Container: containerName,
+			Command:   cmd,
+			Stdin:     false,
+			Stdout:    true,
+			Stderr:    true,
+			TTY:       false,
+		}, parameterCodec)
+
+	exec, err := remotecommand.NewSPDYExecutor(cfg, "POST", req.URL())
 	if err != nil {
-		return wd, err
+		return "", fmt.Errorf("could not initialize executor: %w", err)
 	}
-	wd = strings.ReplaceAll(wd, "/test/e2e", "")
-	return wd, nil
+
+	var stdout, stderr bytes.Buffer
+	execErr := exec.StreamWithContext(ctx, remotecommand.StreamOptions{
+		Stdout: &stdout,
+		Stderr: &stderr,
+	})
+
+	combinedOutput := stdout.String() + stderr.String()
+
+	if execErr != nil {
+		return combinedOutput, fmt.Errorf("exec dial command %v failed: %w", cmd, execErr)
+	}
+
+	return combinedOutput, nil
+}
+
+// EventuallyExists checks if a Kubernetes resource exists and returns nil if successful.
+// It takes a function `getResource` which retrieves the resource and returns an error if it doesn't exist.
+func EventuallyExists(ctx context.Context, getResource func() error, timeout, interval time.Duration) {
+	gomega.Eventually(func() error {
+		return getResource()
+	}, timeout, interval).Should(gomega.Succeed())
 }
diff --git a/test/utils/wrappers.go b/test/utils/wrappers.go
new file mode 100644
index 000000000..12ff856af
--- /dev/null
+++ b/test/utils/wrappers.go
@@ -0,0 +1,78 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package utils
+
+import (
+	infextv1a1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// InferenceModelWrapper wraps an InferenceModel.
+type InferenceModelWrapper struct {
+	infextv1a1.InferenceModel
+}
+
+// MakeModelWrapper creates a wrapper for an MakeModelWrapper.
+func MakeModelWrapper(name, ns string) *InferenceModelWrapper {
+	return &InferenceModelWrapper{
+		infextv1a1.InferenceModel{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      name,
+				Namespace: ns,
+			},
+			Spec: infextv1a1.InferenceModelSpec{
+				ModelName: "",
+				PoolRef:   infextv1a1.PoolObjectReference{},
+			},
+		},
+	}
+}
+
+// SetModelName sets the value of the inferenceModel.spec.modelName.
+func (m *InferenceModelWrapper) SetModelName(name string) *InferenceModelWrapper {
+	m.Spec.ModelName = name
+	return m
+}
+
+// SetCriticality sets the value of the inferenceModel.spec.criticality.
+func (m *InferenceModelWrapper) SetCriticality(level infextv1a1.Criticality) *InferenceModelWrapper {
+	m.Spec.Criticality = &level
+	return m
+}
+
+// SetPoolRef sets the value of the inferenceModel.spec.poolRef using defaults
+// for group/kind and name as the PoolObjectReference name.
+func (m *InferenceModelWrapper) SetPoolRef(name string) *InferenceModelWrapper {
+	ref := infextv1a1.PoolObjectReference{
+		Group: infextv1a1.GroupVersion.Group,
+		Kind:  "inferencepools",
+		Name:  name,
+	}
+	m.Spec.PoolRef = ref
+	return m
+}
+
+// SetTargetModels sets the value of the inferenceModel.spec.targetModels.
+func (m *InferenceModelWrapper) SetTargetModels(models []infextv1a1.TargetModel) *InferenceModelWrapper {
+	m.Spec.TargetModels = models
+	return m
+}
+
+// Obj returns the inner InferenceModel.
+func (m *InferenceModelWrapper) Obj() *infextv1a1.InferenceModel {
+	return &m.InferenceModel
+}