Merge pull request #75 from devchat-ai/switch-model

Use env var `DEVCHAT_UNIT_TESTS_USE_USER_MODEL` to switch to user selected model for `/unit_tests`
2024-03-13 14:34:57 +08:00 · 2024-03-13 14:34:57 +08:00 · ccc1d97c90
commit ccc1d97c90
parent 874da1710f c44891a39f
6 changed files with 247 additions and 76 deletions
--- a/unit_tests/assistants/directory_structure/relevant_file_finder.py
+++ b/unit_tests/assistants/directory_structure/relevant_file_finder.py
@ -4,16 +4,28 @@ from typing import Callable, List

 from assistants.directory_structure.base import DirectoryStructureBase
 from assistants.rerank_files import rerank_files
+from devchat.llm.openai import chat_completion_no_stream_return_json
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from openai_util import create_chat_completion_content
 from tools.directory_viewer import ListViewer
 from tools.tiktoken_util import get_encoding

+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95)
+

 class RelevantFileFinder(DirectoryStructureBase):
-    model_name = "gpt-3.5-turbo-1106"
-    dir_token_budget = 16000 * 0.95
-    encoding = get_encoding("cl100k_base")
-
    def _paginate_dir_structure(
        self, criteria: Callable[[Path], bool], style: str = "list"
    ) -> List[str]:
@ -38,8 +50,8 @@ class RelevantFileFinder(DirectoryStructureBase):
            # Check if each page is within the token budget
            within_budget = True
            for p in pages:
-                tokens = len(self.encoding.encode(p, disallowed_special=()))
-                if tokens > self.dir_token_budget:
+                tokens = len(ENCODING.encode(p, disallowed_special=()))
+                if tokens > TOKEN_BUDGET:
                    within_budget = False
                    break

@ -82,16 +94,31 @@ class RelevantFileFinder(DirectoryStructureBase):
        for dir_structure in dir_structure_pages:
            user_msg = self._mk_message(objective, dir_structure)

-            response = create_chat_completion_content(
-                model=self.model_name,
-                messages=[
-                    {"role": "user", "content": user_msg},
-                ],
-                response_format={"type": "json_object"},
-                temperature=0.1,
-            )
+            json_res = {}
+            if USE_USER_MODEL:
+                # Use the wrapped api parameters
+                json_res = (
+                    chat_completion_no_stream_return_json(
+                        messages=[{"role": "user", "content": user_msg}],
+                        llm_config={
+                            "model": MODEL,
+                            "temperature": 0.1,
+                        },
+                    )
+                    or {}
+                )

-            json_res = json.loads(response)
+            else:
+                # Use the openai api parameters
+                response = create_chat_completion_content(
+                    model=MODEL,
+                    messages=[
+                        {"role": "user", "content": user_msg},
+                    ],
+                    response_format={"type": "json_object"},
+                    temperature=0.1,
+                )
+                json_res = json.loads(response)

            files.extend(json_res.get("files", []))

--- a/unit_tests/assistants/recommend_test_context.py
+++ b/unit_tests/assistants/recommend_test_context.py
@ -1,13 +1,26 @@
 import json
 from typing import List, Optional

+from devchat.llm.openai import chat_completion_no_stream_return_json
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from model import FuncToTest
 from openai_util import create_chat_completion_content
+from tools.tiktoken_util import get_encoding

-MODEL = "gpt-4-1106-preview"
-ENCODING = "cl100k_base"
-# TODO: handle token budget
-TOKEN_BUDGET = int(128000 * 0.9)
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)


 # ruff: noqa: E501
@ -45,26 +58,59 @@ JSON Format Example:
 """


+def _mk_user_msg(func_to_test: FuncToTest, contexts: List) -> str:
+    """
+    Create a user message to be sent to the model within the token budget.
+    """
+    msg = None
+    while msg is None:
+        context_content = "\n\n".join([str(c) for c in contexts])
+
+        msg = recommend_symbol_context_prompt.format(
+            function_content=func_to_test.func_content,
+            context_content=context_content,
+            function_name=func_to_test.func_name,
+            file_path=func_to_test.file_path,
+        )
+
+        token_count = len(ENCODING.encode(msg, disallowed_special=()))
+        if contexts and token_count > TOKEN_BUDGET:
+            # Remove the last context and try again
+            contexts.pop()
+            msg = None
+
+    return msg
+
+
 def get_recommended_symbols(
    func_to_test: FuncToTest, known_context: Optional[List] = None
 ) -> List[str]:
    known_context = known_context or []
-    context_content = "\n\n".join([str(c) for c in known_context])
+    msg = _mk_user_msg(func_to_test, known_context)

-    msg = recommend_symbol_context_prompt.format(
-        function_content=func_to_test.func_content,
-        context_content=context_content,
-        function_name=func_to_test.func_name,
-        file_path=func_to_test.file_path,
-    )
+    json_res = {}
+    if USE_USER_MODEL:
+        # Use the wrapped api parameters
+        json_res = (
+            chat_completion_no_stream_return_json(
+                messages=[{"role": "user", "content": msg}],
+                llm_config={
+                    "model": MODEL,
+                    "temperature": 0.1,
+                },
+            )
+            or {}
+        )

-    response = create_chat_completion_content(
-        model=MODEL,
-        messages=[{"role": "user", "content": msg}],
-        response_format={"type": "json_object"},
-        temperature=0.1,
-    )
+    else:
+        response = create_chat_completion_content(
+            model=MODEL,
+            messages=[{"role": "user", "content": msg}],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+        )
+        json_res = json.loads(response)

-    key_symbols = json.loads(response).get("key_symbols", [])
+    key_symbols = json_res.get("key_symbols", [])

    return key_symbols
--- a/unit_tests/assistants/rerank_files.py
+++ b/unit_tests/assistants/rerank_files.py
@ -1,8 +1,15 @@
 import json
 from typing import List, Tuple

+from devchat.llm.openai import chat_completion_no_stream_return_json
+from llm_conf import (
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from openai_util import create_chat_completion_content

+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
+
 # ruff: noqa: E501

 rerank_file_prompt = """
@ -28,8 +35,6 @@ Accumulated Knowledge: {accumulated_knowledge}
 Answer:
 """

-RERANK_MODEL = "gpt-3.5-turbo-1106"
-

 def rerank_files(
    question: str,
@ -56,19 +61,40 @@ def rerank_files(
        accumulated_knowledge=knowledge,
    )

-    response = create_chat_completion_content(
-        model=RERANK_MODEL,
-        messages=[
-            {
-                "role": "user",
-                "content": user_msg,
-            },
-        ],
-        response_format={"type": "json_object"},
-        temperature=0.1,
-    )
+    result = {}
+    if USE_USER_MODEL:
+        # Use the wrapped api parameters
+        result = (
+            chat_completion_no_stream_return_json(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": user_msg,
+                    },
+                ],
+                llm_config={
+                    "model": MODEL,
+                    "temperature": 0.1,
+                },
+            )
+            or {}
+        )

-    result = json.loads(response)
-    reranked = [(i["item"], i["relevance"]) for i in result["result"]]
+    else:
+        # Use the openai api parameters
+        response = create_chat_completion_content(
+            model=MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": user_msg,
+                },
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+        )
+        result = json.loads(response)
+
+    reranked = [(i["item"], i["relevance"]) for i in result.get("result", [])]

    return reranked
--- a/unit_tests/llm_conf.py
+++ b/unit_tests/llm_conf.py
@ -0,0 +1,22 @@
+import os
+
+USE_USER_MODEL = bool(os.environ.get("DEVCHAT_UNIT_TESTS_USE_USER_MODEL", False))
+USER_LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4-turbo-preview")
+
+DEFAULT_CONTEXT_SIZE = 4000
+CONTEXT_SIZE = {
+    "gpt-3.5-turbo": 16000,
+    "gpt-4": 8000,
+    "gpt-4-turbo-preview": 128000,
+    "claude-3-sonnet": 1000000,
+    "claude-3-opus": 1000000,
+    "xinghuo-3.5": 8000,
+    "GLM-4": 8000,
+    "ERNIE-Bot-4.0": 8000,
+    "togetherai/codellama/CodeLlama-70b-Instruct-hf": 4000,
+    "togetherai/mistralai/Mixtral-8x7B-Instruct-v0.1": 16000,
+    "minimax/abab6-chat": 8000,
+    "llama-2-70b-chat": 4000,
+}
+
+DEFAULT_ENCODING = "cl100k_base"
--- a/unit_tests/propose_test.py
+++ b/unit_tests/propose_test.py
@ -2,16 +2,27 @@ import json
 from functools import partial
 from typing import List, Optional

+from devchat.llm.openai import chat_completion_no_stream_return_json
 from find_context import Context
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from model import FuncToTest, TokenBudgetExceededException
 from openai_util import create_chat_completion_content
 from prompts import PROPOSE_TEST_PROMPT
 from tools.tiktoken_util import get_encoding

-MODEL = "gpt-3.5-turbo-1106"
-# MODEL = "gpt-4-1106-preview"
-ENCODING = "cl100k_base"
-TOKEN_BUDGET = int(16000 * 0.9)
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)


 def _mk_user_msg(
@ -23,7 +34,6 @@ def _mk_user_msg(
    """
    Create a user message to be sent to the model within the token budget.
    """
-    encoding = get_encoding(ENCODING)

    func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
    class_content = ""
@ -61,7 +71,7 @@ def _mk_user_msg(
    prioritized_msgs = [msg_0, msg_1, msg_2]

    for msg in prioritized_msgs:
-        token_count = len(encoding.encode(msg, disallowed_special=()))
+        token_count = len(ENCODING.encode(msg, disallowed_special=()))
        if token_count <= TOKEN_BUDGET:
            return msg

@ -97,14 +107,31 @@ def propose_test(
        chat_language=chat_language,
    )

-    content = create_chat_completion_content(
-        model=MODEL,
-        messages=[{"role": "user", "content": user_msg}],
-        response_format={"type": "json_object"},
-        temperature=0.1,
-    )
+    json_res = {}
+    if USE_USER_MODEL:
+        # Use the wrapped api parameters
+        json_res = (
+            chat_completion_no_stream_return_json(
+                messages=[{"role": "user", "content": user_msg}],
+                llm_config={
+                    "model": MODEL,
+                    "temperature": 0.1,
+                },
+            )
+            or {}
+        )

-    cases = json.loads(content).get("test_cases", [])
+    else:
+        # Use the openai api parameters
+        content = create_chat_completion_content(
+            model=MODEL,
+            messages=[{"role": "user", "content": user_msg}],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+        )
+        json_res = json.loads(content)
+
+    cases = json_res.get("test_cases", [])

    descriptions = []
    for case in cases:
--- a/unit_tests/write_tests.py
+++ b/unit_tests/write_tests.py
@ -1,16 +1,28 @@
 from functools import partial
 from typing import List, Optional

+from devchat.llm.openai import chat_completion_stream
 from find_context import Context
+from llm_conf import (
+    CONTEXT_SIZE,
+    DEFAULT_CONTEXT_SIZE,
+    DEFAULT_ENCODING,
+    USE_USER_MODEL,
+    USER_LLM_MODEL,
+)
 from model import FuncToTest, TokenBudgetExceededException
 from openai_util import create_chat_completion_chunks
 from prompts import WRITE_TESTS_PROMPT
 from tools.file_util import retrieve_file_content
 from tools.tiktoken_util import get_encoding

-MODEL = "gpt-4-1106-preview"
-ENCODING = "cl100k_base"
-TOKEN_BUDGET = int(128000 * 0.9)
+MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
+ENCODING = (
+    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
+    if USE_USER_MODEL
+    else get_encoding("cl100k_base")
+)
+TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)


 def _mk_write_tests_msg(
@ -23,8 +35,6 @@ def _mk_write_tests_msg(
    symbol_contexts: Optional[List[Context]] = None,
    user_requirements: str = "",
 ) -> Optional[str]:
-    encoding = get_encoding(ENCODING)
-
    additional_requirements = user_requirements

    test_cases_str = ""
@ -94,7 +104,7 @@ def _mk_write_tests_msg(
    prioritized_msgs = [msg_0, msg_1, msg_2, msg_3]

    for msg in prioritized_msgs:
-        tokens = len(encoding.encode(msg, disallowed_special=()))
+        tokens = len(ENCODING.encode(msg, disallowed_special=()))
        if tokens <= TOKEN_BUDGET:
            return msg

@ -124,13 +134,26 @@ def write_and_print_tests(
        chat_language=chat_language,
    )

-    chunks = create_chat_completion_chunks(
-        model=MODEL,
-        messages=[{"role": "user", "content": user_msg}],
-        temperature=0.1,
-    )
+    if USE_USER_MODEL:
+        # Use the wrapped api
+        res = chat_completion_stream(
+            messages=[{"role": "user", "content": user_msg}],
+            llm_config={"model": MODEL, "temperature": 0.1},
+        )
+        if res:
+            print(res.get("content", ""))

-    for chunk in chunks:
-        if chunk.choices[0].finish_reason == "stop":
-            break
-        print(chunk.choices[0].delta.content, flush=True, end="")
+    else:
+        # Use the openai api parameters
+        chunks = create_chat_completion_chunks(
+            model=MODEL,
+            messages=[{"role": "user", "content": user_msg}],
+            temperature=0.1,
+        )
+        for chunk in chunks:
+            if chunk.choices[0].finish_reason == "stop":
+                break
+
+            content = chunk.choices[0].delta.content
+            if content is not None:
+                print(content, flush=True, end="")