Merge pull request #75 from devchat-ai/switch-model
Use env var `DEVCHAT_UNIT_TESTS_USE_USER_MODEL` to switch to user selected model for `/unit_tests`
This commit is contained in:
commit
ccc1d97c90
@ -4,16 +4,28 @@ from typing import Callable, List
|
|||||||
|
|
||||||
from assistants.directory_structure.base import DirectoryStructureBase
|
from assistants.directory_structure.base import DirectoryStructureBase
|
||||||
from assistants.rerank_files import rerank_files
|
from assistants.rerank_files import rerank_files
|
||||||
|
from devchat.llm.openai import chat_completion_no_stream_return_json
|
||||||
|
from llm_conf import (
|
||||||
|
CONTEXT_SIZE,
|
||||||
|
DEFAULT_CONTEXT_SIZE,
|
||||||
|
DEFAULT_ENCODING,
|
||||||
|
USE_USER_MODEL,
|
||||||
|
USER_LLM_MODEL,
|
||||||
|
)
|
||||||
from openai_util import create_chat_completion_content
|
from openai_util import create_chat_completion_content
|
||||||
from tools.directory_viewer import ListViewer
|
from tools.directory_viewer import ListViewer
|
||||||
from tools.tiktoken_util import get_encoding
|
from tools.tiktoken_util import get_encoding
|
||||||
|
|
||||||
|
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
|
||||||
|
ENCODING = (
|
||||||
|
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
|
||||||
|
if USE_USER_MODEL
|
||||||
|
else get_encoding("cl100k_base")
|
||||||
|
)
|
||||||
|
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95)
|
||||||
|
|
||||||
|
|
||||||
class RelevantFileFinder(DirectoryStructureBase):
|
class RelevantFileFinder(DirectoryStructureBase):
|
||||||
model_name = "gpt-3.5-turbo-1106"
|
|
||||||
dir_token_budget = 16000 * 0.95
|
|
||||||
encoding = get_encoding("cl100k_base")
|
|
||||||
|
|
||||||
def _paginate_dir_structure(
|
def _paginate_dir_structure(
|
||||||
self, criteria: Callable[[Path], bool], style: str = "list"
|
self, criteria: Callable[[Path], bool], style: str = "list"
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
@ -38,8 +50,8 @@ class RelevantFileFinder(DirectoryStructureBase):
|
|||||||
# Check if each page is within the token budget
|
# Check if each page is within the token budget
|
||||||
within_budget = True
|
within_budget = True
|
||||||
for p in pages:
|
for p in pages:
|
||||||
tokens = len(self.encoding.encode(p, disallowed_special=()))
|
tokens = len(ENCODING.encode(p, disallowed_special=()))
|
||||||
if tokens > self.dir_token_budget:
|
if tokens > TOKEN_BUDGET:
|
||||||
within_budget = False
|
within_budget = False
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -82,15 +94,30 @@ class RelevantFileFinder(DirectoryStructureBase):
|
|||||||
for dir_structure in dir_structure_pages:
|
for dir_structure in dir_structure_pages:
|
||||||
user_msg = self._mk_message(objective, dir_structure)
|
user_msg = self._mk_message(objective, dir_structure)
|
||||||
|
|
||||||
|
json_res = {}
|
||||||
|
if USE_USER_MODEL:
|
||||||
|
# Use the wrapped api parameters
|
||||||
|
json_res = (
|
||||||
|
chat_completion_no_stream_return_json(
|
||||||
|
messages=[{"role": "user", "content": user_msg}],
|
||||||
|
llm_config={
|
||||||
|
"model": MODEL,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Use the openai api parameters
|
||||||
response = create_chat_completion_content(
|
response = create_chat_completion_content(
|
||||||
model=self.model_name,
|
model=MODEL,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "user", "content": user_msg},
|
{"role": "user", "content": user_msg},
|
||||||
],
|
],
|
||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
json_res = json.loads(response)
|
json_res = json.loads(response)
|
||||||
|
|
||||||
files.extend(json_res.get("files", []))
|
files.extend(json_res.get("files", []))
|
||||||
|
@ -1,13 +1,26 @@
|
|||||||
import json
|
import json
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from devchat.llm.openai import chat_completion_no_stream_return_json
|
||||||
|
from llm_conf import (
|
||||||
|
CONTEXT_SIZE,
|
||||||
|
DEFAULT_CONTEXT_SIZE,
|
||||||
|
DEFAULT_ENCODING,
|
||||||
|
USE_USER_MODEL,
|
||||||
|
USER_LLM_MODEL,
|
||||||
|
)
|
||||||
from model import FuncToTest
|
from model import FuncToTest
|
||||||
from openai_util import create_chat_completion_content
|
from openai_util import create_chat_completion_content
|
||||||
|
from tools.tiktoken_util import get_encoding
|
||||||
|
|
||||||
MODEL = "gpt-4-1106-preview"
|
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
|
||||||
ENCODING = "cl100k_base"
|
ENCODING = (
|
||||||
# TODO: handle token budget
|
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
|
||||||
TOKEN_BUDGET = int(128000 * 0.9)
|
if USE_USER_MODEL
|
||||||
|
else get_encoding("cl100k_base")
|
||||||
|
)
|
||||||
|
|
||||||
|
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
|
||||||
|
|
||||||
|
|
||||||
# ruff: noqa: E501
|
# ruff: noqa: E501
|
||||||
@ -45,11 +58,13 @@ JSON Format Example:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_recommended_symbols(
|
def _mk_user_msg(func_to_test: FuncToTest, contexts: List) -> str:
|
||||||
func_to_test: FuncToTest, known_context: Optional[List] = None
|
"""
|
||||||
) -> List[str]:
|
Create a user message to be sent to the model within the token budget.
|
||||||
known_context = known_context or []
|
"""
|
||||||
context_content = "\n\n".join([str(c) for c in known_context])
|
msg = None
|
||||||
|
while msg is None:
|
||||||
|
context_content = "\n\n".join([str(c) for c in contexts])
|
||||||
|
|
||||||
msg = recommend_symbol_context_prompt.format(
|
msg = recommend_symbol_context_prompt.format(
|
||||||
function_content=func_to_test.func_content,
|
function_content=func_to_test.func_content,
|
||||||
@ -58,13 +73,44 @@ def get_recommended_symbols(
|
|||||||
file_path=func_to_test.file_path,
|
file_path=func_to_test.file_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
token_count = len(ENCODING.encode(msg, disallowed_special=()))
|
||||||
|
if contexts and token_count > TOKEN_BUDGET:
|
||||||
|
# Remove the last context and try again
|
||||||
|
contexts.pop()
|
||||||
|
msg = None
|
||||||
|
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
|
def get_recommended_symbols(
|
||||||
|
func_to_test: FuncToTest, known_context: Optional[List] = None
|
||||||
|
) -> List[str]:
|
||||||
|
known_context = known_context or []
|
||||||
|
msg = _mk_user_msg(func_to_test, known_context)
|
||||||
|
|
||||||
|
json_res = {}
|
||||||
|
if USE_USER_MODEL:
|
||||||
|
# Use the wrapped api parameters
|
||||||
|
json_res = (
|
||||||
|
chat_completion_no_stream_return_json(
|
||||||
|
messages=[{"role": "user", "content": msg}],
|
||||||
|
llm_config={
|
||||||
|
"model": MODEL,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
response = create_chat_completion_content(
|
response = create_chat_completion_content(
|
||||||
model=MODEL,
|
model=MODEL,
|
||||||
messages=[{"role": "user", "content": msg}],
|
messages=[{"role": "user", "content": msg}],
|
||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
json_res = json.loads(response)
|
||||||
|
|
||||||
key_symbols = json.loads(response).get("key_symbols", [])
|
key_symbols = json_res.get("key_symbols", [])
|
||||||
|
|
||||||
return key_symbols
|
return key_symbols
|
||||||
|
@ -1,8 +1,15 @@
|
|||||||
import json
|
import json
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from devchat.llm.openai import chat_completion_no_stream_return_json
|
||||||
|
from llm_conf import (
|
||||||
|
USE_USER_MODEL,
|
||||||
|
USER_LLM_MODEL,
|
||||||
|
)
|
||||||
from openai_util import create_chat_completion_content
|
from openai_util import create_chat_completion_content
|
||||||
|
|
||||||
|
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
|
||||||
|
|
||||||
# ruff: noqa: E501
|
# ruff: noqa: E501
|
||||||
|
|
||||||
rerank_file_prompt = """
|
rerank_file_prompt = """
|
||||||
@ -28,8 +35,6 @@ Accumulated Knowledge: {accumulated_knowledge}
|
|||||||
Answer:
|
Answer:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
RERANK_MODEL = "gpt-3.5-turbo-1106"
|
|
||||||
|
|
||||||
|
|
||||||
def rerank_files(
|
def rerank_files(
|
||||||
question: str,
|
question: str,
|
||||||
@ -56,8 +61,29 @@ def rerank_files(
|
|||||||
accumulated_knowledge=knowledge,
|
accumulated_knowledge=knowledge,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
if USE_USER_MODEL:
|
||||||
|
# Use the wrapped api parameters
|
||||||
|
result = (
|
||||||
|
chat_completion_no_stream_return_json(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": user_msg,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
llm_config={
|
||||||
|
"model": MODEL,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Use the openai api parameters
|
||||||
response = create_chat_completion_content(
|
response = create_chat_completion_content(
|
||||||
model=RERANK_MODEL,
|
model=MODEL,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@ -67,8 +93,8 @@ def rerank_files(
|
|||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = json.loads(response)
|
result = json.loads(response)
|
||||||
reranked = [(i["item"], i["relevance"]) for i in result["result"]]
|
|
||||||
|
reranked = [(i["item"], i["relevance"]) for i in result.get("result", [])]
|
||||||
|
|
||||||
return reranked
|
return reranked
|
||||||
|
22
unit_tests/llm_conf.py
Normal file
22
unit_tests/llm_conf.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
USE_USER_MODEL = bool(os.environ.get("DEVCHAT_UNIT_TESTS_USE_USER_MODEL", False))
|
||||||
|
USER_LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4-turbo-preview")
|
||||||
|
|
||||||
|
DEFAULT_CONTEXT_SIZE = 4000
|
||||||
|
CONTEXT_SIZE = {
|
||||||
|
"gpt-3.5-turbo": 16000,
|
||||||
|
"gpt-4": 8000,
|
||||||
|
"gpt-4-turbo-preview": 128000,
|
||||||
|
"claude-3-sonnet": 1000000,
|
||||||
|
"claude-3-opus": 1000000,
|
||||||
|
"xinghuo-3.5": 8000,
|
||||||
|
"GLM-4": 8000,
|
||||||
|
"ERNIE-Bot-4.0": 8000,
|
||||||
|
"togetherai/codellama/CodeLlama-70b-Instruct-hf": 4000,
|
||||||
|
"togetherai/mistralai/Mixtral-8x7B-Instruct-v0.1": 16000,
|
||||||
|
"minimax/abab6-chat": 8000,
|
||||||
|
"llama-2-70b-chat": 4000,
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFAULT_ENCODING = "cl100k_base"
|
@ -2,16 +2,27 @@ import json
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from devchat.llm.openai import chat_completion_no_stream_return_json
|
||||||
from find_context import Context
|
from find_context import Context
|
||||||
|
from llm_conf import (
|
||||||
|
CONTEXT_SIZE,
|
||||||
|
DEFAULT_CONTEXT_SIZE,
|
||||||
|
DEFAULT_ENCODING,
|
||||||
|
USE_USER_MODEL,
|
||||||
|
USER_LLM_MODEL,
|
||||||
|
)
|
||||||
from model import FuncToTest, TokenBudgetExceededException
|
from model import FuncToTest, TokenBudgetExceededException
|
||||||
from openai_util import create_chat_completion_content
|
from openai_util import create_chat_completion_content
|
||||||
from prompts import PROPOSE_TEST_PROMPT
|
from prompts import PROPOSE_TEST_PROMPT
|
||||||
from tools.tiktoken_util import get_encoding
|
from tools.tiktoken_util import get_encoding
|
||||||
|
|
||||||
MODEL = "gpt-3.5-turbo-1106"
|
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo"
|
||||||
# MODEL = "gpt-4-1106-preview"
|
ENCODING = (
|
||||||
ENCODING = "cl100k_base"
|
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
|
||||||
TOKEN_BUDGET = int(16000 * 0.9)
|
if USE_USER_MODEL
|
||||||
|
else get_encoding("cl100k_base")
|
||||||
|
)
|
||||||
|
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
|
||||||
|
|
||||||
|
|
||||||
def _mk_user_msg(
|
def _mk_user_msg(
|
||||||
@ -23,7 +34,6 @@ def _mk_user_msg(
|
|||||||
"""
|
"""
|
||||||
Create a user message to be sent to the model within the token budget.
|
Create a user message to be sent to the model within the token budget.
|
||||||
"""
|
"""
|
||||||
encoding = get_encoding(ENCODING)
|
|
||||||
|
|
||||||
func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
|
func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
|
||||||
class_content = ""
|
class_content = ""
|
||||||
@ -61,7 +71,7 @@ def _mk_user_msg(
|
|||||||
prioritized_msgs = [msg_0, msg_1, msg_2]
|
prioritized_msgs = [msg_0, msg_1, msg_2]
|
||||||
|
|
||||||
for msg in prioritized_msgs:
|
for msg in prioritized_msgs:
|
||||||
token_count = len(encoding.encode(msg, disallowed_special=()))
|
token_count = len(ENCODING.encode(msg, disallowed_special=()))
|
||||||
if token_count <= TOKEN_BUDGET:
|
if token_count <= TOKEN_BUDGET:
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
@ -97,14 +107,31 @@ def propose_test(
|
|||||||
chat_language=chat_language,
|
chat_language=chat_language,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
json_res = {}
|
||||||
|
if USE_USER_MODEL:
|
||||||
|
# Use the wrapped api parameters
|
||||||
|
json_res = (
|
||||||
|
chat_completion_no_stream_return_json(
|
||||||
|
messages=[{"role": "user", "content": user_msg}],
|
||||||
|
llm_config={
|
||||||
|
"model": MODEL,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Use the openai api parameters
|
||||||
content = create_chat_completion_content(
|
content = create_chat_completion_content(
|
||||||
model=MODEL,
|
model=MODEL,
|
||||||
messages=[{"role": "user", "content": user_msg}],
|
messages=[{"role": "user", "content": user_msg}],
|
||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
json_res = json.loads(content)
|
||||||
|
|
||||||
cases = json.loads(content).get("test_cases", [])
|
cases = json_res.get("test_cases", [])
|
||||||
|
|
||||||
descriptions = []
|
descriptions = []
|
||||||
for case in cases:
|
for case in cases:
|
||||||
|
@ -1,16 +1,28 @@
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from devchat.llm.openai import chat_completion_stream
|
||||||
from find_context import Context
|
from find_context import Context
|
||||||
|
from llm_conf import (
|
||||||
|
CONTEXT_SIZE,
|
||||||
|
DEFAULT_CONTEXT_SIZE,
|
||||||
|
DEFAULT_ENCODING,
|
||||||
|
USE_USER_MODEL,
|
||||||
|
USER_LLM_MODEL,
|
||||||
|
)
|
||||||
from model import FuncToTest, TokenBudgetExceededException
|
from model import FuncToTest, TokenBudgetExceededException
|
||||||
from openai_util import create_chat_completion_chunks
|
from openai_util import create_chat_completion_chunks
|
||||||
from prompts import WRITE_TESTS_PROMPT
|
from prompts import WRITE_TESTS_PROMPT
|
||||||
from tools.file_util import retrieve_file_content
|
from tools.file_util import retrieve_file_content
|
||||||
from tools.tiktoken_util import get_encoding
|
from tools.tiktoken_util import get_encoding
|
||||||
|
|
||||||
MODEL = "gpt-4-1106-preview"
|
MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"
|
||||||
ENCODING = "cl100k_base"
|
ENCODING = (
|
||||||
TOKEN_BUDGET = int(128000 * 0.9)
|
get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation
|
||||||
|
if USE_USER_MODEL
|
||||||
|
else get_encoding("cl100k_base")
|
||||||
|
)
|
||||||
|
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9)
|
||||||
|
|
||||||
|
|
||||||
def _mk_write_tests_msg(
|
def _mk_write_tests_msg(
|
||||||
@ -23,8 +35,6 @@ def _mk_write_tests_msg(
|
|||||||
symbol_contexts: Optional[List[Context]] = None,
|
symbol_contexts: Optional[List[Context]] = None,
|
||||||
user_requirements: str = "",
|
user_requirements: str = "",
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
encoding = get_encoding(ENCODING)
|
|
||||||
|
|
||||||
additional_requirements = user_requirements
|
additional_requirements = user_requirements
|
||||||
|
|
||||||
test_cases_str = ""
|
test_cases_str = ""
|
||||||
@ -94,7 +104,7 @@ def _mk_write_tests_msg(
|
|||||||
prioritized_msgs = [msg_0, msg_1, msg_2, msg_3]
|
prioritized_msgs = [msg_0, msg_1, msg_2, msg_3]
|
||||||
|
|
||||||
for msg in prioritized_msgs:
|
for msg in prioritized_msgs:
|
||||||
tokens = len(encoding.encode(msg, disallowed_special=()))
|
tokens = len(ENCODING.encode(msg, disallowed_special=()))
|
||||||
if tokens <= TOKEN_BUDGET:
|
if tokens <= TOKEN_BUDGET:
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
@ -124,13 +134,26 @@ def write_and_print_tests(
|
|||||||
chat_language=chat_language,
|
chat_language=chat_language,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if USE_USER_MODEL:
|
||||||
|
# Use the wrapped api
|
||||||
|
res = chat_completion_stream(
|
||||||
|
messages=[{"role": "user", "content": user_msg}],
|
||||||
|
llm_config={"model": MODEL, "temperature": 0.1},
|
||||||
|
)
|
||||||
|
if res:
|
||||||
|
print(res.get("content", ""))
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Use the openai api parameters
|
||||||
chunks = create_chat_completion_chunks(
|
chunks = create_chat_completion_chunks(
|
||||||
model=MODEL,
|
model=MODEL,
|
||||||
messages=[{"role": "user", "content": user_msg}],
|
messages=[{"role": "user", "content": user_msg}],
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
)
|
)
|
||||||
|
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
if chunk.choices[0].finish_reason == "stop":
|
if chunk.choices[0].finish_reason == "stop":
|
||||||
break
|
break
|
||||||
print(chunk.choices[0].delta.content, flush=True, end="")
|
|
||||||
|
content = chunk.choices[0].delta.content
|
||||||
|
if content is not None:
|
||||||
|
print(content, flush=True, end="")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user