diff --git a/unit_tests/assistants/directory_structure/relevant_file_finder.py b/unit_tests/assistants/directory_structure/relevant_file_finder.py
index c6788bd..b6e712c 100644
--- a/unit_tests/assistants/directory_structure/relevant_file_finder.py
+++ b/unit_tests/assistants/directory_structure/relevant_file_finder.py
@@ -4,16 +4,28 @@ from typing import Callable, List
 
 from assistants.directory_structure.base import DirectoryStructureBase
 from assistants.rerank_files import rerank_files
+from devchat.llm.openai import chat_completion_no_stream_return_json
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from openai_util import create_chat_completion_content
 from tools.directory_viewer import ListViewer
 from tools.tiktoken_util import get_encoding
 
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95)
+
 
 class RelevantFileFinder(DirectoryStructureBase):
-    model_name = "gpt-3.5-turbo-1106"
-    dir_token_budget = 16000 * 0.95
-    encoding = get_encoding("cl100k_base")
-
     def _paginate_dir_structure(
         self, criteria: Callable[[Path], bool], style: str = "list"
     ) -> List[str]:
@@ -38,8 +50,8 @@ class RelevantFileFinder(DirectoryStructureBase):
             # Check if each page is within the token budget
             within_budget = True
             for p in pages:
-                tokens = len(self.encoding.encode(p, disallowed_special=()))
-                if tokens > self.dir_token_budget:
+                tokens = len(ENCODING.encode(p, disallowed_special=()))
+                if tokens > TOKEN_BUDGET:
                     within_budget = False
                     break
 
@@ -82,16 +94,31 @@ class RelevantFileFinder(DirectoryStructureBase):
         for dir_structure in dir_structure_pages:
             user_msg = self._mk_message(objective, dir_structure)
 
-            response = create_chat_completion_content(
-                model=self.model_name,
-                messages=[
-                    {"role": "user", "content": user_msg},
-                ],
-                response_format={"type": "json_object"},
-                temperature=0.1,
-            )
+            json_res = {}
+            if USE_USER_MODEL:
+                # Use the wrapped api parameters
+                json_res = (
+                    chat_completion_no_stream_return_json(
+                        messages=[{"role": "user", "content": user_msg}],
+                        llm_config={
+                            "model": MODEL,
+                            "temperature": 0.1,
+                        },
+                    )
+                    or {}
+                )
 
-            json_res = json.loads(response)
+            else:
+                # Use the openai api parameters
+                response = create_chat_completion_content(
+                    model=MODEL,
+                    messages=[
+                        {"role": "user", "content": user_msg},
+                    ],
+                    response_format={"type": "json_object"},
+                    temperature=0.1,
+                )
+                json_res = json.loads(response)
 
             files.extend(json_res.get("files", []))
 
diff --git a/unit_tests/assistants/recommend_test_context.py b/unit_tests/assistants/recommend_test_context.py
index 45050e9..f7d849f 100644
--- a/unit_tests/assistants/recommend_test_context.py
+++ b/unit_tests/assistants/recommend_test_context.py
@@ -1,13 +1,26 @@
 import json
 from typing import List, Optional
 
+from devchat.llm.openai import chat_completion_no_stream_return_json
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from model import FuncToTest
 from openai_util import create_chat_completion_content
+from tools.tiktoken_util import get_encoding
 
-MODEL = "gpt-4-1106-preview"
-ENCODING = "cl100k_base"
-# TODO: handle token budget
-TOKEN_BUDGET = int(128000 * 0.9)
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
 
 
 # ruff: noqa: E501
@@ -45,26 +58,59 @@ JSON Format Example:
 """
 
 
+def _mk_user_msg(func_to_test: FuncToTest, contexts: List) -> str:
+    """
+    Create a user message to be sent to the model within the token budget.
+    """
+    msg = None
+    while msg is None:
+        context_content = "\n\n".join([str(c) for c in contexts])
+
+        msg = recommend_symbol_context_prompt.format(
+            function_content=func_to_test.func_content,
+            context_content=context_content,
+            function_name=func_to_test.func_name,
+            file_path=func_to_test.file_path,
+        )
+
+        token_count = len(ENCODING.encode(msg, disallowed_special=()))
+        if contexts and token_count > TOKEN_BUDGET:
+            # Remove the last context and try again
+            contexts.pop()
+            msg = None
+
+    return msg
+
+
 def get_recommended_symbols(
     func_to_test: FuncToTest, known_context: Optional[List] = None
 ) -> List[str]:
     known_context = known_context or []
-    context_content = "\n\n".join([str(c) for c in known_context])
+    msg = _mk_user_msg(func_to_test, known_context)
 
-    msg = recommend_symbol_context_prompt.format(
-        function_content=func_to_test.func_content,
-        context_content=context_content,
-        function_name=func_to_test.func_name,
-        file_path=func_to_test.file_path,
-    )
+    json_res = {}
+    if USE_USER_MODEL:
+        # Use the wrapped api parameters
+        json_res = (
+            chat_completion_no_stream_return_json(
+                messages=[{"role": "user", "content": msg}],
+                llm_config={
+                    "model": MODEL,
+                    "temperature": 0.1,
+                },
+            )
+            or {}
+        )
 
-    response = create_chat_completion_content(
-        model=MODEL,
-        messages=[{"role": "user", "content": msg}],
-        response_format={"type": "json_object"},
-        temperature=0.1,
-    )
+    else:
+        response = create_chat_completion_content(
+            model=MODEL,
+            messages=[{"role": "user", "content": msg}],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+        )
+        json_res = json.loads(response)
 
-    key_symbols = json.loads(response).get("key_symbols", [])
+    key_symbols = json_res.get("key_symbols", [])
 
     return key_symbols
diff --git a/unit_tests/assistants/rerank_files.py b/unit_tests/assistants/rerank_files.py
index 9d3068a..523a274 100644
--- a/unit_tests/assistants/rerank_files.py
+++ b/unit_tests/assistants/rerank_files.py
@@ -1,8 +1,15 @@
 import json
 from typing import List, Tuple
 
+from devchat.llm.openai import chat_completion_no_stream_return_json
+from llm_conf import (
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from openai_util import create_chat_completion_content
 
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
+
 # ruff: noqa: E501
 
 rerank_file_prompt = """
@@ -28,8 +35,6 @@ Accumulated Knowledge: {accumulated_knowledge}
 Answer:
 """
 
-RERANK_MODEL = "gpt-3.5-turbo-1106"
-
 
 def rerank_files(
     question: str,
@@ -56,19 +61,40 @@ def rerank_files(
         accumulated_knowledge=knowledge,
     )
 
-    response = create_chat_completion_content(
-        model=RERANK_MODEL,
-        messages=[
-            {
-                "role": "user",
-                "content": user_msg,
-            },
-        ],
-        response_format={"type": "json_object"},
-        temperature=0.1,
-    )
+    result = {}
+    if USE_USER_MODEL:
+        # Use the wrapped api parameters
+        result = (
+            chat_completion_no_stream_return_json(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": user_msg,
+                    },
+                ],
+                llm_config={
+                    "model": MODEL,
+                    "temperature": 0.1,
+                },
+            )
+            or {}
+        )
 
-    result = json.loads(response)
-    reranked = [(i["item"], i["relevance"]) for i in result["result"]]
+    else:
+        # Use the openai api parameters
+        response = create_chat_completion_content(
+            model=MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": user_msg,
+                },
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+        )
+        result = json.loads(response)
+
+    reranked = [(i["item"], i["relevance"]) for i in result.get("result", [])]
 
     return reranked
diff --git a/unit_tests/llm_conf.py b/unit_tests/llm_conf.py
new file mode 100644
index 0000000..a0af3f8
--- /dev/null
+++ b/unit_tests/llm_conf.py
@@ -0,0 +1,22 @@
+import os
+
+USE_USER_MODEL = bool(os.environ.get("DEVCHAT_UNIT_TESTS_USE_USER_MODEL", False))
+USER_LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4-turbo-preview")
+
+DEFAULT_CONTEXT_SIZE = 4000
+CONTEXT_SIZE = {
+    "gpt-3.5-turbo": 16000,
+    "gpt-4": 8000,
+    "gpt-4-turbo-preview": 128000,
+    "claude-3-sonnet": 1000000,
+    "claude-3-opus": 1000000,
+    "xinghuo-3.5": 8000,
+    "GLM-4": 8000,
+    "ERNIE-Bot-4.0": 8000,
+    "togetherai/codellama/CodeLlama-70b-Instruct-hf": 4000,
+    "togetherai/mistralai/Mixtral-8x7B-Instruct-v0.1": 16000,
+    "minimax/abab6-chat": 8000,
+    "llama-2-70b-chat": 4000,
+}
+
+DEFAULT_ENCODING = "cl100k_base"
diff --git a/unit_tests/propose_test.py b/unit_tests/propose_test.py
index bf3e1da..8bc4add 100644
--- a/unit_tests/propose_test.py
+++ b/unit_tests/propose_test.py
@@ -2,16 +2,27 @@ import json
 from functools import partial
 from typing import List, Optional
 
+from devchat.llm.openai import chat_completion_no_stream_return_json
 from find_context import Context
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from model import FuncToTest, TokenBudgetExceededException
 from openai_util import create_chat_completion_content
 from prompts import PROPOSE_TEST_PROMPT
 from tools.tiktoken_util import get_encoding
 
-MODEL = "gpt-3.5-turbo-1106"
-# MODEL = "gpt-4-1106-preview"
-ENCODING = "cl100k_base"
-TOKEN_BUDGET = int(16000 * 0.9)
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
 
 
 def _mk_user_msg(
@@ -23,7 +34,6 @@ def _mk_user_msg(
     """
     Create a user message to be sent to the model within the token budget.
     """
-    encoding = get_encoding(ENCODING)
 
     func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
     class_content = ""
@@ -61,7 +71,7 @@ def _mk_user_msg(
     prioritized_msgs = [msg_0, msg_1, msg_2]
 
     for msg in prioritized_msgs:
-        token_count = len(encoding.encode(msg, disallowed_special=()))
+        token_count = len(ENCODING.encode(msg, disallowed_special=()))
         if token_count <= TOKEN_BUDGET:
             return msg
 
@@ -97,14 +107,31 @@ def propose_test(
         chat_language=chat_language,
     )
 
-    content = create_chat_completion_content(
-        model=MODEL,
-        messages=[{"role": "user", "content": user_msg}],
-        response_format={"type": "json_object"},
-        temperature=0.1,
-    )
+    json_res = {}
+    if USE_USER_MODEL:
+        # Use the wrapped api parameters
+        json_res = (
+            chat_completion_no_stream_return_json(
+                messages=[{"role": "user", "content": user_msg}],
+                llm_config={
+                    "model": MODEL,
+                    "temperature": 0.1,
+                },
+            )
+            or {}
+        )
 
-    cases = json.loads(content).get("test_cases", [])
+    else:
+        # Use the openai api parameters
+        content = create_chat_completion_content(
+            model=MODEL,
+            messages=[{"role": "user", "content": user_msg}],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+        )
+        json_res = json.loads(content)
+
+    cases = json_res.get("test_cases", [])
 
     descriptions = []
     for case in cases:
diff --git a/unit_tests/write_tests.py b/unit_tests/write_tests.py
index 79fb1bc..183c720 100644
--- a/unit_tests/write_tests.py
+++ b/unit_tests/write_tests.py
@@ -1,16 +1,28 @@
 from functools import partial
 from typing import List, Optional
 
+from devchat.llm.openai import chat_completion_stream
 from find_context import Context
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from model import FuncToTest, TokenBudgetExceededException
 from openai_util import create_chat_completion_chunks
 from prompts import WRITE_TESTS_PROMPT
 from tools.file_util import retrieve_file_content
 from tools.tiktoken_util import get_encoding
 
-MODEL = "gpt-4-1106-preview"
-ENCODING = "cl100k_base"
-TOKEN_BUDGET = int(128000 * 0.9)
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
 
 
 def _mk_write_tests_msg(
@@ -23,8 +35,6 @@ def _mk_write_tests_msg(
     symbol_contexts: Optional[List[Context]] = None,
     user_requirements: str = "",
 ) -> Optional[str]:
-    encoding = get_encoding(ENCODING)
-
     additional_requirements = user_requirements
 
     test_cases_str = ""
@@ -94,7 +104,7 @@ def _mk_write_tests_msg(
     prioritized_msgs = [msg_0, msg_1, msg_2, msg_3]
 
     for msg in prioritized_msgs:
-        tokens = len(encoding.encode(msg, disallowed_special=()))
+        tokens = len(ENCODING.encode(msg, disallowed_special=()))
         if tokens <= TOKEN_BUDGET:
             return msg
 
@@ -124,13 +134,26 @@ def write_and_print_tests(
         chat_language=chat_language,
     )
 
-    chunks = create_chat_completion_chunks(
-        model=MODEL,
-        messages=[{"role": "user", "content": user_msg}],
-        temperature=0.1,
-    )
+    if USE_USER_MODEL:
+        # Use the wrapped api
+        res = chat_completion_stream(
+            messages=[{"role": "user", "content": user_msg}],
+            llm_config={"model": MODEL, "temperature": 0.1},
+        )
+        if res:
+            print(res.get("content", ""))
 
-    for chunk in chunks:
-        if chunk.choices[0].finish_reason == "stop":
-            break
-        print(chunk.choices[0].delta.content, flush=True, end="")
+    else:
+        # Use the openai api parameters
+        chunks = create_chat_completion_chunks(
+            model=MODEL,
+            messages=[{"role": "user", "content": user_msg}],
+            temperature=0.1,
+        )
+        for chunk in chunks:
+            if chunk.choices[0].finish_reason == "stop":
+                break
+
+            content = chunk.choices[0].delta.content
+            if content is not None:
+                print(content, flush=True, end="")