diff --git a/unit_tests/assistants/directory_structure/relevant_file_finder.py b/unit_tests/assistants/directory_structure/relevant_file_finder.py index c6788bd..b6e712c 100644 --- a/unit_tests/assistants/directory_structure/relevant_file_finder.py +++ b/unit_tests/assistants/directory_structure/relevant_file_finder.py @@ -4,16 +4,28 @@ from typing import Callable, List from assistants.directory_structure.base import DirectoryStructureBase from assistants.rerank_files import rerank_files +from devchat.llm.openai import chat_completion_no_stream_return_json +from llm_conf import ( + CONTEXT_SIZE, + DEFAULT_CONTEXT_SIZE, + DEFAULT_ENCODING, + USE_USER_MODEL, + USER_LLM_MODEL, +) from openai_util import create_chat_completion_content from tools.directory_viewer import ListViewer from tools.tiktoken_util import get_encoding +MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo" +ENCODING = ( + get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation + if USE_USER_MODEL + else get_encoding("cl100k_base") +) +TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95) + class RelevantFileFinder(DirectoryStructureBase): - model_name = "gpt-3.5-turbo-1106" - dir_token_budget = 16000 * 0.95 - encoding = get_encoding("cl100k_base") - def _paginate_dir_structure( self, criteria: Callable[[Path], bool], style: str = "list" ) -> List[str]: @@ -38,8 +50,8 @@ class RelevantFileFinder(DirectoryStructureBase): # Check if each page is within the token budget within_budget = True for p in pages: - tokens = len(self.encoding.encode(p, disallowed_special=())) - if tokens > self.dir_token_budget: + tokens = len(ENCODING.encode(p, disallowed_special=())) + if tokens > TOKEN_BUDGET: within_budget = False break @@ -82,16 +94,31 @@ class RelevantFileFinder(DirectoryStructureBase): for dir_structure in dir_structure_pages: user_msg = self._mk_message(objective, dir_structure) - response = create_chat_completion_content( - model=self.model_name, - messages=[ - {"role": "user", "content": user_msg}, - ], - response_format={"type": "json_object"}, - temperature=0.1, - ) + json_res = {} + if USE_USER_MODEL: + # Use the wrapped api parameters + json_res = ( + chat_completion_no_stream_return_json( + messages=[{"role": "user", "content": user_msg}], + llm_config={ + "model": MODEL, + "temperature": 0.1, + }, + ) + or {} + ) - json_res = json.loads(response) + else: + # Use the openai api parameters + response = create_chat_completion_content( + model=MODEL, + messages=[ + {"role": "user", "content": user_msg}, + ], + response_format={"type": "json_object"}, + temperature=0.1, + ) + json_res = json.loads(response) files.extend(json_res.get("files", [])) diff --git a/unit_tests/assistants/recommend_test_context.py b/unit_tests/assistants/recommend_test_context.py index 45050e9..f7d849f 100644 --- a/unit_tests/assistants/recommend_test_context.py +++ b/unit_tests/assistants/recommend_test_context.py @@ -1,13 +1,26 @@ import json from typing import List, Optional +from devchat.llm.openai import chat_completion_no_stream_return_json +from llm_conf import ( + CONTEXT_SIZE, + DEFAULT_CONTEXT_SIZE, + DEFAULT_ENCODING, + USE_USER_MODEL, + USER_LLM_MODEL, +) from model import FuncToTest from openai_util import create_chat_completion_content +from tools.tiktoken_util import get_encoding -MODEL = "gpt-4-1106-preview" -ENCODING = "cl100k_base" -# TODO: handle token budget -TOKEN_BUDGET = int(128000 * 0.9) +MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview" +ENCODING = ( + get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation + if USE_USER_MODEL + else get_encoding("cl100k_base") +) + +TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9) # ruff: noqa: E501 @@ -45,26 +58,59 @@ JSON Format Example: """ +def _mk_user_msg(func_to_test: FuncToTest, contexts: List) -> str: + """ + Create a user message to be sent to the model within the token budget. + """ + msg = None + while msg is None: + context_content = "\n\n".join([str(c) for c in contexts]) + + msg = recommend_symbol_context_prompt.format( + function_content=func_to_test.func_content, + context_content=context_content, + function_name=func_to_test.func_name, + file_path=func_to_test.file_path, + ) + + token_count = len(ENCODING.encode(msg, disallowed_special=())) + if contexts and token_count > TOKEN_BUDGET: + # Remove the last context and try again + contexts.pop() + msg = None + + return msg + + def get_recommended_symbols( func_to_test: FuncToTest, known_context: Optional[List] = None ) -> List[str]: known_context = known_context or [] - context_content = "\n\n".join([str(c) for c in known_context]) + msg = _mk_user_msg(func_to_test, known_context) - msg = recommend_symbol_context_prompt.format( - function_content=func_to_test.func_content, - context_content=context_content, - function_name=func_to_test.func_name, - file_path=func_to_test.file_path, - ) + json_res = {} + if USE_USER_MODEL: + # Use the wrapped api parameters + json_res = ( + chat_completion_no_stream_return_json( + messages=[{"role": "user", "content": msg}], + llm_config={ + "model": MODEL, + "temperature": 0.1, + }, + ) + or {} + ) - response = create_chat_completion_content( - model=MODEL, - messages=[{"role": "user", "content": msg}], - response_format={"type": "json_object"}, - temperature=0.1, - ) + else: + response = create_chat_completion_content( + model=MODEL, + messages=[{"role": "user", "content": msg}], + response_format={"type": "json_object"}, + temperature=0.1, + ) + json_res = json.loads(response) - key_symbols = json.loads(response).get("key_symbols", []) + key_symbols = json_res.get("key_symbols", []) return key_symbols diff --git a/unit_tests/assistants/rerank_files.py b/unit_tests/assistants/rerank_files.py index 9d3068a..523a274 100644 --- a/unit_tests/assistants/rerank_files.py +++ b/unit_tests/assistants/rerank_files.py @@ -1,8 +1,15 @@ import json from typing import List, Tuple +from devchat.llm.openai import chat_completion_no_stream_return_json +from llm_conf import ( + USE_USER_MODEL, + USER_LLM_MODEL, +) from openai_util import create_chat_completion_content +MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo" + # ruff: noqa: E501 rerank_file_prompt = """ @@ -28,8 +35,6 @@ Accumulated Knowledge: {accumulated_knowledge} Answer: """ -RERANK_MODEL = "gpt-3.5-turbo-1106" - def rerank_files( question: str, @@ -56,19 +61,40 @@ def rerank_files( accumulated_knowledge=knowledge, ) - response = create_chat_completion_content( - model=RERANK_MODEL, - messages=[ - { - "role": "user", - "content": user_msg, - }, - ], - response_format={"type": "json_object"}, - temperature=0.1, - ) + result = {} + if USE_USER_MODEL: + # Use the wrapped api parameters + result = ( + chat_completion_no_stream_return_json( + messages=[ + { + "role": "user", + "content": user_msg, + }, + ], + llm_config={ + "model": MODEL, + "temperature": 0.1, + }, + ) + or {} + ) - result = json.loads(response) - reranked = [(i["item"], i["relevance"]) for i in result["result"]] + else: + # Use the openai api parameters + response = create_chat_completion_content( + model=MODEL, + messages=[ + { + "role": "user", + "content": user_msg, + }, + ], + response_format={"type": "json_object"}, + temperature=0.1, + ) + result = json.loads(response) + + reranked = [(i["item"], i["relevance"]) for i in result.get("result", [])] return reranked diff --git a/unit_tests/llm_conf.py b/unit_tests/llm_conf.py new file mode 100644 index 0000000..a0af3f8 --- /dev/null +++ b/unit_tests/llm_conf.py @@ -0,0 +1,22 @@ +import os + +USE_USER_MODEL = bool(os.environ.get("DEVCHAT_UNIT_TESTS_USE_USER_MODEL", False)) +USER_LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4-turbo-preview") + +DEFAULT_CONTEXT_SIZE = 4000 +CONTEXT_SIZE = { + "gpt-3.5-turbo": 16000, + "gpt-4": 8000, + "gpt-4-turbo-preview": 128000, + "claude-3-sonnet": 1000000, + "claude-3-opus": 1000000, + "xinghuo-3.5": 8000, + "GLM-4": 8000, + "ERNIE-Bot-4.0": 8000, + "togetherai/codellama/CodeLlama-70b-Instruct-hf": 4000, + "togetherai/mistralai/Mixtral-8x7B-Instruct-v0.1": 16000, + "minimax/abab6-chat": 8000, + "llama-2-70b-chat": 4000, +} + +DEFAULT_ENCODING = "cl100k_base" diff --git a/unit_tests/propose_test.py b/unit_tests/propose_test.py index bf3e1da..8bc4add 100644 --- a/unit_tests/propose_test.py +++ b/unit_tests/propose_test.py @@ -2,16 +2,27 @@ import json from functools import partial from typing import List, Optional +from devchat.llm.openai import chat_completion_no_stream_return_json from find_context import Context +from llm_conf import ( + CONTEXT_SIZE, + DEFAULT_CONTEXT_SIZE, + DEFAULT_ENCODING, + USE_USER_MODEL, + USER_LLM_MODEL, +) from model import FuncToTest, TokenBudgetExceededException from openai_util import create_chat_completion_content from prompts import PROPOSE_TEST_PROMPT from tools.tiktoken_util import get_encoding -MODEL = "gpt-3.5-turbo-1106" -# MODEL = "gpt-4-1106-preview" -ENCODING = "cl100k_base" -TOKEN_BUDGET = int(16000 * 0.9) +MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-3.5-turbo" +ENCODING = ( + get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation + if USE_USER_MODEL + else get_encoding("cl100k_base") +) +TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9) def _mk_user_msg( @@ -23,7 +34,6 @@ def _mk_user_msg( """ Create a user message to be sent to the model within the token budget. """ - encoding = get_encoding(ENCODING) func_content = f"function code\n```\n{func_to_test.func_content}\n```\n" class_content = "" @@ -61,7 +71,7 @@ def _mk_user_msg( prioritized_msgs = [msg_0, msg_1, msg_2] for msg in prioritized_msgs: - token_count = len(encoding.encode(msg, disallowed_special=())) + token_count = len(ENCODING.encode(msg, disallowed_special=())) if token_count <= TOKEN_BUDGET: return msg @@ -97,14 +107,31 @@ def propose_test( chat_language=chat_language, ) - content = create_chat_completion_content( - model=MODEL, - messages=[{"role": "user", "content": user_msg}], - response_format={"type": "json_object"}, - temperature=0.1, - ) + json_res = {} + if USE_USER_MODEL: + # Use the wrapped api parameters + json_res = ( + chat_completion_no_stream_return_json( + messages=[{"role": "user", "content": user_msg}], + llm_config={ + "model": MODEL, + "temperature": 0.1, + }, + ) + or {} + ) - cases = json.loads(content).get("test_cases", []) + else: + # Use the openai api parameters + content = create_chat_completion_content( + model=MODEL, + messages=[{"role": "user", "content": user_msg}], + response_format={"type": "json_object"}, + temperature=0.1, + ) + json_res = json.loads(content) + + cases = json_res.get("test_cases", []) descriptions = [] for case in cases: diff --git a/unit_tests/write_tests.py b/unit_tests/write_tests.py index 79fb1bc..183c720 100644 --- a/unit_tests/write_tests.py +++ b/unit_tests/write_tests.py @@ -1,16 +1,28 @@ from functools import partial from typing import List, Optional +from devchat.llm.openai import chat_completion_stream from find_context import Context +from llm_conf import ( + CONTEXT_SIZE, + DEFAULT_CONTEXT_SIZE, + DEFAULT_ENCODING, + USE_USER_MODEL, + USER_LLM_MODEL, +) from model import FuncToTest, TokenBudgetExceededException from openai_util import create_chat_completion_chunks from prompts import WRITE_TESTS_PROMPT from tools.file_util import retrieve_file_content from tools.tiktoken_util import get_encoding -MODEL = "gpt-4-1106-preview" -ENCODING = "cl100k_base" -TOKEN_BUDGET = int(128000 * 0.9) +MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview" +ENCODING = ( + get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation + if USE_USER_MODEL + else get_encoding("cl100k_base") +) +TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.9) def _mk_write_tests_msg( @@ -23,8 +35,6 @@ def _mk_write_tests_msg( symbol_contexts: Optional[List[Context]] = None, user_requirements: str = "", ) -> Optional[str]: - encoding = get_encoding(ENCODING) - additional_requirements = user_requirements test_cases_str = "" @@ -94,7 +104,7 @@ def _mk_write_tests_msg( prioritized_msgs = [msg_0, msg_1, msg_2, msg_3] for msg in prioritized_msgs: - tokens = len(encoding.encode(msg, disallowed_special=())) + tokens = len(ENCODING.encode(msg, disallowed_special=())) if tokens <= TOKEN_BUDGET: return msg @@ -124,13 +134,26 @@ def write_and_print_tests( chat_language=chat_language, ) - chunks = create_chat_completion_chunks( - model=MODEL, - messages=[{"role": "user", "content": user_msg}], - temperature=0.1, - ) + if USE_USER_MODEL: + # Use the wrapped api + res = chat_completion_stream( + messages=[{"role": "user", "content": user_msg}], + llm_config={"model": MODEL, "temperature": 0.1}, + ) + if res: + print(res.get("content", "")) - for chunk in chunks: - if chunk.choices[0].finish_reason == "stop": - break - print(chunk.choices[0].delta.content, flush=True, end="") + else: + # Use the openai api parameters + chunks = create_chat_completion_chunks( + model=MODEL, + messages=[{"role": "user", "content": user_msg}], + temperature=0.1, + ) + for chunk in chunks: + if chunk.choices[0].finish_reason == "stop": + break + + content = chunk.choices[0].delta.content + if content is not None: + print(content, flush=True, end="")