Merge pull request #75 from devchat-ai/switch-model

Use env var `DEVCHAT_UNIT_TESTS_USE_USER_MODEL` to switch to user selected model for `/unit_tests`
This commit is contained in:
boob.yang 2024-03-13 14:34:57 +08:00 committed by GitHub
commit ccc1d97c90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 247 additions and 76 deletions

View File

@ -4,16 +4,28 @@ from typing import Callable, List
from assistants.directory_structure.base import DirectoryStructureBase
from assistants.rerank_files import rerank_files
from devchat.llm.openai import chat_completion_no_stream_return_json
from llm_conf import (
CONTEXT_SIZE,
DEFAULT_CONTEXT_SIZE,
DEFAULT_ENCODING,
USE_USER_MODEL,
USER_LLM_MODEL,
)
from openai_util import create_chat_completion_content
from tools.directory_viewer import ListViewer
from tools.tiktoken_util import get_encoding
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
ENCODING = (
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
if USE_USER_MODEL
else get_encoding("cl100k_base")
)
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95)
class RelevantFileFinder(DirectoryStructureBase):
model_name = "gpt-3.5-turbo-1106"
dir_token_budget = 16000 * 0.95
encoding = get_encoding("cl100k_base")
def _paginate_dir_structure(
self, criteria: Callable[[Path], bool], style: str = "list"
) -> List[str]:
@ -38,8 +50,8 @@ class RelevantFileFinder(DirectoryStructureBase):
# Check if each page is within the token budget
within_budget = True
for p in pages:
tokens = len(self.encoding.encode(p, disallowed_special=()))
if tokens > self.dir_token_budget:
tokens = len(ENCODING.encode(p, disallowed_special=()))
if tokens > TOKEN_BUDGET:
within_budget = False
break
@ -82,16 +94,31 @@ class RelevantFileFinder(DirectoryStructureBase):
for dir_structure in dir_structure_pages:
user_msg = self._mk_message(objective, dir_structure)
response = create_chat_completion_content(
model=self.model_name,
messages=[
{"role": "user", "content": user_msg},
],
response_format={"type": "json_object"},
temperature=0.1,
)
json_res = {}
if USE_USER_MODEL:
# Use the wrapped api parameters
json_res = (
chat_completion_no_stream_return_json(
messages=[{"role": "user", "content": user_msg}],
llm_config={
"model": MODEL,
"temperature": 0.1,
},
)
or {}
)
json_res = json.loads(response)
else:
# Use the openai api parameters
response = create_chat_completion_content(
model=MODEL,
messages=[
{"role": "user", "content": user_msg},
],
response_format={"type": "json_object"},
temperature=0.1,
)
json_res = json.loads(response)
files.extend(json_res.get("files", []))

View File

@ -1,13 +1,26 @@
import json
from typing import List, Optional
from devchat.llm.openai import chat_completion_no_stream_return_json
from llm_conf import (
CONTEXT_SIZE,
DEFAULT_CONTEXT_SIZE,
DEFAULT_ENCODING,
USE_USER_MODEL,
USER_LLM_MODEL,
)
from model import FuncToTest
from openai_util import create_chat_completion_content
from tools.tiktoken_util import get_encoding
MODEL = "gpt-4-1106-preview"
ENCODING = "cl100k_base"
# TODO: handle token budget
TOKEN_BUDGET = int(128000 * 0.9)
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
ENCODING = (
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
if USE_USER_MODEL
else get_encoding("cl100k_base")
)
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
# ruff: noqa: E501
@ -45,26 +58,59 @@ JSON Format Example:
"""
def _mk_user_msg(func_to_test: FuncToTest, contexts: List) -> str:
"""
Create a user message to be sent to the model within the token budget.
"""
msg = None
while msg is None:
context_content = "\n\n".join([str(c) for c in contexts])
msg = recommend_symbol_context_prompt.format(
function_content=func_to_test.func_content,
context_content=context_content,
function_name=func_to_test.func_name,
file_path=func_to_test.file_path,
)
token_count = len(ENCODING.encode(msg, disallowed_special=()))
if contexts and token_count > TOKEN_BUDGET:
# Remove the last context and try again
contexts.pop()
msg = None
return msg
def get_recommended_symbols(
func_to_test: FuncToTest, known_context: Optional[List] = None
) -> List[str]:
known_context = known_context or []
context_content = "\n\n".join([str(c) for c in known_context])
msg = _mk_user_msg(func_to_test, known_context)
msg = recommend_symbol_context_prompt.format(
function_content=func_to_test.func_content,
context_content=context_content,
function_name=func_to_test.func_name,
file_path=func_to_test.file_path,
)
json_res = {}
if USE_USER_MODEL:
# Use the wrapped api parameters
json_res = (
chat_completion_no_stream_return_json(
messages=[{"role": "user", "content": msg}],
llm_config={
"model": MODEL,
"temperature": 0.1,
},
)
or {}
)
response = create_chat_completion_content(
model=MODEL,
messages=[{"role": "user", "content": msg}],
response_format={"type": "json_object"},
temperature=0.1,
)
else:
response = create_chat_completion_content(
model=MODEL,
messages=[{"role": "user", "content": msg}],
response_format={"type": "json_object"},
temperature=0.1,
)
json_res = json.loads(response)
key_symbols = json.loads(response).get("key_symbols", [])
key_symbols = json_res.get("key_symbols", [])
return key_symbols

View File

@ -1,8 +1,15 @@
import json
from typing import List, Tuple
from devchat.llm.openai import chat_completion_no_stream_return_json
from llm_conf import (
USE_USER_MODEL,
USER_LLM_MODEL,
)
from openai_util import create_chat_completion_content
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
# ruff: noqa: E501
rerank_file_prompt = """
@ -28,8 +35,6 @@ Accumulated Knowledge: {accumulated_knowledge}
Answer:
"""
RERANK_MODEL = "gpt-3.5-turbo-1106"
def rerank_files(
question: str,
@ -56,19 +61,40 @@ def rerank_files(
accumulated_knowledge=knowledge,
)
response = create_chat_completion_content(
model=RERANK_MODEL,
messages=[
{
"role": "user",
"content": user_msg,
},
],
response_format={"type": "json_object"},
temperature=0.1,
)
result = {}
if USE_USER_MODEL:
# Use the wrapped api parameters
result = (
chat_completion_no_stream_return_json(
messages=[
{
"role": "user",
"content": user_msg,
},
],
llm_config={
"model": MODEL,
"temperature": 0.1,
},
)
or {}
)
result = json.loads(response)
reranked = [(i["item"], i["relevance"]) for i in result["result"]]
else:
# Use the openai api parameters
response = create_chat_completion_content(
model=MODEL,
messages=[
{
"role": "user",
"content": user_msg,
},
],
response_format={"type": "json_object"},
temperature=0.1,
)
result = json.loads(response)
reranked = [(i["item"], i["relevance"]) for i in result.get("result", [])]
return reranked

22
unit_tests/llm_conf.py Normal file
View File

@ -0,0 +1,22 @@
import os
USE_USER_MODEL = bool(os.environ.get("DEVCHAT_UNIT_TESTS_USE_USER_MODEL", False))
USER_LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4-turbo-preview")
DEFAULT_CONTEXT_SIZE = 4000
CONTEXT_SIZE = {
"gpt-3.5-turbo": 16000,
"gpt-4": 8000,
"gpt-4-turbo-preview": 128000,
"claude-3-sonnet": 1000000,
"claude-3-opus": 1000000,
"xinghuo-3.5": 8000,
"GLM-4": 8000,
"ERNIE-Bot-4.0": 8000,
"togetherai/codellama/CodeLlama-70b-Instruct-hf": 4000,
"togetherai/mistralai/Mixtral-8x7B-Instruct-v0.1": 16000,
"minimax/abab6-chat": 8000,
"llama-2-70b-chat": 4000,
}
DEFAULT_ENCODING = "cl100k_base"

View File

@ -2,16 +2,27 @@ import json
from functools import partial
from typing import List, Optional
from devchat.llm.openai import chat_completion_no_stream_return_json
from find_context import Context
from llm_conf import (
CONTEXT_SIZE,
DEFAULT_CONTEXT_SIZE,
DEFAULT_ENCODING,
USE_USER_MODEL,
USER_LLM_MODEL,
)
from model import FuncToTest, TokenBudgetExceededException
from openai_util import create_chat_completion_content
from prompts import PROPOSE_TEST_PROMPT
from tools.tiktoken_util import get_encoding
MODEL = "gpt-3.5-turbo-1106"
# MODEL = "gpt-4-1106-preview"
ENCODING = "cl100k_base"
TOKEN_BUDGET = int(16000 * 0.9)
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
ENCODING = (
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
if USE_USER_MODEL
else get_encoding("cl100k_base")
)
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
def _mk_user_msg(
@ -23,7 +34,6 @@ def _mk_user_msg(
"""
Create a user message to be sent to the model within the token budget.
"""
encoding = get_encoding(ENCODING)
func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
class_content = ""
@ -61,7 +71,7 @@ def _mk_user_msg(
prioritized_msgs = [msg_0, msg_1, msg_2]
for msg in prioritized_msgs:
token_count = len(encoding.encode(msg, disallowed_special=()))
token_count = len(ENCODING.encode(msg, disallowed_special=()))
if token_count <= TOKEN_BUDGET:
return msg
@ -97,14 +107,31 @@ def propose_test(
chat_language=chat_language,
)
content = create_chat_completion_content(
model=MODEL,
messages=[{"role": "user", "content": user_msg}],
response_format={"type": "json_object"},
temperature=0.1,
)
json_res = {}
if USE_USER_MODEL:
# Use the wrapped api parameters
json_res = (
chat_completion_no_stream_return_json(
messages=[{"role": "user", "content": user_msg}],
llm_config={
"model": MODEL,
"temperature": 0.1,
},
)
or {}
)
cases = json.loads(content).get("test_cases", [])
else:
# Use the openai api parameters
content = create_chat_completion_content(
model=MODEL,
messages=[{"role": "user", "content": user_msg}],
response_format={"type": "json_object"},
temperature=0.1,
)
json_res = json.loads(content)
cases = json_res.get("test_cases", [])
descriptions = []
for case in cases:

View File

@ -1,16 +1,28 @@
from functools import partial
from typing import List, Optional
from devchat.llm.openai import chat_completion_stream
from find_context import Context
from llm_conf import (
CONTEXT_SIZE,
DEFAULT_CONTEXT_SIZE,
DEFAULT_ENCODING,
USE_USER_MODEL,
USER_LLM_MODEL,
)
from model import FuncToTest, TokenBudgetExceededException
from openai_util import create_chat_completion_chunks
from prompts import WRITE_TESTS_PROMPT
from tools.file_util import retrieve_file_content
from tools.tiktoken_util import get_encoding
MODEL = "gpt-4-1106-preview"
ENCODING = "cl100k_base"
TOKEN_BUDGET = int(128000 * 0.9)
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
ENCODING = (
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
if USE_USER_MODEL
else get_encoding("cl100k_base")
)
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
def _mk_write_tests_msg(
@ -23,8 +35,6 @@ def _mk_write_tests_msg(
symbol_contexts: Optional[List[Context]] = None,
user_requirements: str = "",
) -> Optional[str]:
encoding = get_encoding(ENCODING)
additional_requirements = user_requirements
test_cases_str = ""
@ -94,7 +104,7 @@ def _mk_write_tests_msg(
prioritized_msgs = [msg_0, msg_1, msg_2, msg_3]
for msg in prioritized_msgs:
tokens = len(encoding.encode(msg, disallowed_special=()))
tokens = len(ENCODING.encode(msg, disallowed_special=()))
if tokens <= TOKEN_BUDGET:
return msg
@ -124,13 +134,26 @@ def write_and_print_tests(
chat_language=chat_language,
)
chunks = create_chat_completion_chunks(
model=MODEL,
messages=[{"role": "user", "content": user_msg}],
temperature=0.1,
)
if USE_USER_MODEL:
# Use the wrapped api
res = chat_completion_stream(
messages=[{"role": "user", "content": user_msg}],
llm_config={"model": MODEL, "temperature": 0.1},
)
if res:
print(res.get("content", ""))
for chunk in chunks:
if chunk.choices[0].finish_reason == "stop":
break
print(chunk.choices[0].delta.content, flush=True, end="")
else:
# Use the openai api parameters
chunks = create_chat_completion_chunks(
model=MODEL,
messages=[{"role": "user", "content": user_msg}],
temperature=0.1,
)
for chunk in chunks:
if chunk.choices[0].finish_reason == "stop":
break
content = chunk.choices[0].delta.content
if content is not None:
print(content, flush=True, end="")