workflows/unit_tests/propose_test.py

import json
from functools import partial
from typing import List, Optional

from devchat.llm.openai import chat_completion_no_stream_return_json
from find_context import Context
from llm_conf import (
    CONTEXT_SIZE,
    DEFAULT_CONTEXT_SIZE,
    DEFAULT_ENCODING,
    USE_USER_MODEL,
    USER_LLM_MODEL,
)
from model import FuncToTest, TokenBudgetExceededException
from openai_util import create_chat_completion_content
from prompts import PROPOSE_TEST_PROMPT
from tools.tiktoken_util import get_encoding

MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview"  # "gpt-3.5-turbo"
ENCODING = (
    get_encoding(DEFAULT_ENCODING)  # Use default encoding as an approximation
    if USE_USER_MODEL
    else get_encoding("cl100k_base")
)
TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95)


def _mk_user_msg(
    user_prompt: str,
    func_to_test: FuncToTest,
    contexts: List[Context],
    chat_language: str,
) -> str:
    """
    Create a user message to be sent to the model within the token budget.
    """

    func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
    class_content = ""
    if func_to_test.container_content is not None:
        class_content = f"class code\n```\n{func_to_test.container_content}\n```\n"

    context_content = ""
    if contexts:
        context_content = "\n\nrelevant context\n\n"
        context_content += "\n\n".join([str(c) for c in contexts])
        context_content += "\n\n"

    # Prepare a list of user messages to fit the token budget
    # by adjusting the relevant content
    relevant_content_fmt = partial(
        PROPOSE_TEST_PROMPT.format,
        user_prompt=user_prompt,
        function_name=func_to_test.func_name,
        file_path=func_to_test.file_path,
        chat_language=chat_language,
    )
    # 0. func content & class content & context content
    msg_0 = relevant_content_fmt(
        relevant_content="\n".join([func_content, class_content, context_content]),
    )
    # 1. func content & class content
    msg_1 = relevant_content_fmt(
        relevant_content="\n".join([func_content, class_content]),
    )
    # 2. func content only
    msg_2 = relevant_content_fmt(
        relevant_content=func_content,
    )

    prioritized_msgs = [msg_0, msg_1, msg_2]

    for msg in prioritized_msgs:
        token_count = len(ENCODING.encode(msg, disallowed_special=()))
        if token_count <= TOKEN_BUDGET:
            return msg

    # Even func content exceeds the token budget
    raise TokenBudgetExceededException(
        f"Token budget exceeded while proposing test cases for <{func_to_test}>. "
        f"({token_count}/{TOKEN_BUDGET})"
    )


def propose_test(
    user_prompt: str,
    func_to_test: FuncToTest,
    contexts: Optional[List[Context]] = None,
    chat_language: str = "English",
) -> List[str]:
    """Propose test cases for a specified function based on a user prompt

    Args:
        user_prompt (str): The prompt or description for which test cases need to be generated.
        function_name (str): The name of the function to generate test cases for.
        file_path (str): The absolute path to the file containing the target function for which
                         test cases will be generated.

    Returns:
        List[str]: A list of test case descriptions.
    """
    contexts = contexts or []
    user_msg = _mk_user_msg(
        user_prompt=user_prompt,
        func_to_test=func_to_test,
        contexts=contexts,
        chat_language=chat_language,
    )

    json_res = {}
    if USE_USER_MODEL:
        # Use the wrapped api parameters
        json_res = (
            chat_completion_no_stream_return_json(
                messages=[{"role": "user", "content": user_msg}],
                llm_config={
                    "model": MODEL,
                    "temperature": 0.1,
                },
            )
            or {}
        )

    else:
        # Use the openai api parameters
        content = create_chat_completion_content(
            model=MODEL,
            messages=[{"role": "user", "content": user_msg}],
            response_format={"type": "json_object"},
            temperature=0.1,
        )
        json_res = json.loads(content)

    cases = json_res.get("test_cases", [])

    descriptions = []
    for case in cases:
        description = case.get("description", None)
        category = case.get("category", None)
        if description:
            if category:
                descriptions.append(category + ": " + description)
            else:
                descriptions.append(description)

    return descriptions
Sort and format import blocks 2024-01-05 12:27:57 +08:00			`import json`
Improve adjusting context to fit token budget 2024-01-02 20:36:13 +08:00			`from functools import partial`
Use context informations when propose cases 2024-02-27 16:17:23 +08:00			`from typing import List, Optional`
Improve adjusting context to fit token budget 2024-01-02 20:36:13 +08:00
Be able to switch between openai api & wrapped api 2024-03-12 20:41:39 +08:00			`from devchat.llm.openai import chat_completion_no_stream_return_json`
Include file path info in Context 2024-03-06 15:35:21 +08:00			`from find_context import Context`
Be able to switch between openai api & wrapped api 2024-03-12 20:41:39 +08:00			`from llm_conf import (`
			`CONTEXT_SIZE,`
			`DEFAULT_CONTEXT_SIZE,`
			`DEFAULT_ENCODING,`
			`USE_USER_MODEL,`
			`USER_LLM_MODEL,`
			`)`
Sort and format import blocks 2024-01-05 12:27:57 +08:00			`from model import FuncToTest, TokenBudgetExceededException`
Use create completion wrappers in propose_test and write_tests 2023-12-24 19:15:43 +08:00			`from openai_util import create_chat_completion_content`
Manage prompts in a single file and ignore line-too-long error 2023-12-24 19:43:29 +08:00			`from prompts import PROPOSE_TEST_PROMPT`
Revert "Revert "/unit_tests switch to devchat env"" 2024-01-17 17:56:29 +08:00			`from tools.tiktoken_util import get_encoding`
Init /gen_ut 2023-12-18 20:46:16 +08:00
Add comment for switching back to gpt3.5 2024-03-19 10:56:34 +08:00			`MODEL = USER_LLM_MODEL if USE_USER_MODEL else "gpt-4-turbo-preview" # "gpt-3.5-turbo"`
Be able to switch between openai api & wrapped api 2024-03-12 20:41:39 +08:00			`ENCODING = (`
			`get_encoding(DEFAULT_ENCODING) # Use default encoding as an approximation`
			`if USE_USER_MODEL`
			`else get_encoding("cl100k_base")`
			`)`
Use gpt4-turbo for unit_tests as default 2024-03-15 15:21:51 +08:00			`TOKEN_BUDGET = int(CONTEXT_SIZE.get(MODEL, DEFAULT_CONTEXT_SIZE) * 0.95)`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00

			`def _mk_user_msg(`
			`user_prompt: str,`
			`func_to_test: FuncToTest,`
Include file path info in Context 2024-03-06 15:35:21 +08:00			`contexts: List[Context],`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`chat_language: str,`
			`) -> str:`
			`"""`
			`Create a user message to be sent to the model within the token budget.`
			`"""`

			func_content = f"function code\n```\n{func_to_test.func_content}\n```\n"
			`class_content = ""`
			`if func_to_test.container_content is not None:`
			class_content = f"class code\n```\n{func_to_test.container_content}\n```\n"

Use context informations when propose cases 2024-02-27 16:17:23 +08:00			`context_content = ""`
Include file path info in Context 2024-03-06 15:35:21 +08:00			`if contexts:`
			`context_content = "\n\nrelevant context\n\n"`
			`context_content += "\n\n".join([str(c) for c in contexts])`
			`context_content += "\n\n"`
Use context informations when propose cases 2024-02-27 16:17:23 +08:00
Improve adjusting context to fit token budget 2024-01-02 20:36:13 +08:00			`# Prepare a list of user messages to fit the token budget`
			`# by adjusting the relevant content`
			`relevant_content_fmt = partial(`
			`PROPOSE_TEST_PROMPT.format,`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`user_prompt=user_prompt,`
			`function_name=func_to_test.func_name,`
			`file_path=func_to_test.file_path,`
			`chat_language=chat_language,`
			`)`
Use context informations when propose cases 2024-02-27 16:17:23 +08:00			`# 0. func content & class content & context content`
			`msg_0 = relevant_content_fmt(`
			`relevant_content="\n".join([func_content, class_content, context_content]),`
			`)`
Improve adjusting context to fit token budget 2024-01-02 20:36:13 +08:00			`# 1. func content & class content`
			`msg_1 = relevant_content_fmt(`
			`relevant_content="\n".join([func_content, class_content]),`
			`)`
			`# 2. func content only`
			`msg_2 = relevant_content_fmt(`
			`relevant_content=func_content,`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`)`

Use context informations when propose cases 2024-02-27 16:17:23 +08:00			`prioritized_msgs = [msg_0, msg_1, msg_2]`
Improve adjusting context to fit token budget 2024-01-02 20:36:13 +08:00
			`for msg in prioritized_msgs:`
Be able to switch between openai api & wrapped api 2024-03-12 20:41:39 +08:00			`token_count = len(ENCODING.encode(msg, disallowed_special=()))`
Improve adjusting context to fit token budget 2024-01-02 20:36:13 +08:00			`if token_count <= TOKEN_BUDGET:`
			`return msg`

			`# Even func content exceeds the token budget`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`raise TokenBudgetExceededException(`
Improve the output of token budget exception 2023-12-28 16:40:42 +08:00			`f"Token budget exceeded while proposing test cases for <{func_to_test}>. "`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`f"({token_count}/{TOKEN_BUDGET})"`
			`)`
Init /gen_ut 2023-12-18 20:46:16 +08:00

			`def propose_test(`
			`user_prompt: str,`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`func_to_test: FuncToTest,`
Include file path info in Context 2024-03-06 15:35:21 +08:00			`contexts: Optional[List[Context]] = None,`
Support different languages in llm chatting 2023-12-24 17:31:35 +08:00			`chat_language: str = "English",`
Init /gen_ut 2023-12-18 20:46:16 +08:00			`) -> List[str]:`
			`"""Propose test cases for a specified function based on a user prompt`

			`Args:`
			`user_prompt (str): The prompt or description for which test cases need to be generated.`
			`function_name (str): The name of the function to generate test cases for.`
			`file_path (str): The absolute path to the file containing the target function for which`
			`test cases will be generated.`
Extract function content when line numbers are provided 2023-12-18 22:21:12 +08:00
Init /gen_ut 2023-12-18 20:46:16 +08:00			`Returns:`
			`List[str]: A list of test case descriptions.`
			`"""`
Include file path info in Context 2024-03-06 15:35:21 +08:00			`contexts = contexts or []`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`user_msg = _mk_user_msg(`
Init /gen_ut 2023-12-18 20:46:16 +08:00			`user_prompt=user_prompt,`
Adjust content to fit token budget and handle budget exceeded exception 2023-12-28 15:56:15 +08:00			`func_to_test=func_to_test,`
Include file path info in Context 2024-03-06 15:35:21 +08:00			`contexts=contexts,`
Support different languages in llm chatting 2023-12-24 17:31:35 +08:00			`chat_language=chat_language,`
Init /gen_ut 2023-12-18 20:46:16 +08:00			`)`

Be able to switch between openai api & wrapped api 2024-03-12 20:41:39 +08:00			`json_res = {}`
			`if USE_USER_MODEL:`
			`# Use the wrapped api parameters`
Improve the usage of the wrapped api 2024-03-13 10:58:07 +08:00			`json_res = (`
			`chat_completion_no_stream_return_json(`
			`messages=[{"role": "user", "content": user_msg}],`
			`llm_config={`
			`"model": MODEL,`
			`"temperature": 0.1,`
			`},`
			`)`
			`or {}`
Be able to switch between openai api & wrapped api 2024-03-12 20:41:39 +08:00			`)`

			`else:`
			`# Use the openai api parameters`
			`content = create_chat_completion_content(`
			`model=MODEL,`
			`messages=[{"role": "user", "content": user_msg}],`
			`response_format={"type": "json_object"},`
			`temperature=0.1,`
			`)`
			`json_res = json.loads(content)`

			`cases = json_res.get("test_cases", [])`
Init /gen_ut 2023-12-18 20:46:16 +08:00
			`descriptions = []`
			`for case in cases:`
			`description = case.get("description", None)`
feat: maintain a balance between happy paths and edge cases 2024-01-23 16:40:04 -08:00			`category = case.get("category", None)`
Init /gen_ut 2023-12-18 20:46:16 +08:00			`if description:`
feat: maintain a balance between happy paths and edge cases 2024-01-23 16:40:04 -08:00			`if category:`
			`descriptions.append(category + ": " + description)`
			`else:`
			`descriptions.append(description)`
Extract function content when line numbers are provided 2023-12-18 22:21:12 +08:00
Init /gen_ut 2023-12-18 20:46:16 +08:00			`return descriptions`