workflows/merico/pr/command.py

196 lines
6.5 KiB
Python
Raw Normal View History

2024-05-18 22:05:51 +08:00
"""
/pr.describe https://github.com/devchat-ai/devchat-vscode/pull/25
"""
# ruff: noqa: E402
2024-05-18 22:05:51 +08:00
2024-05-19 15:34:26 +08:00
import logging
2024-05-18 22:05:51 +08:00
import os
import sys
2024-05-19 15:34:26 +08:00
# add the current directory to the path
from os.path import abspath, dirname
2024-05-18 22:05:51 +08:00
from lib.ide_service import IDEService
sys.path.append(dirname(dirname(abspath(__file__))))
# add new model configs to algo.MAX_TOKENS
import pr_agent.algo as algo
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
algo.MAX_TOKENS["gpt-4-turbo-preview"] = 128000
algo.MAX_TOKENS["claude-3-opus"] = 100000
algo.MAX_TOKENS["claude-3-sonnet"] = 100000
algo.MAX_TOKENS["claude-3-haiku"] = 100000
algo.MAX_TOKENS["ERNIE-Bot-4.0"] = 8000
algo.MAX_TOKENS["GLM-4"] = 8000
algo.MAX_TOKENS["hzwxai/Mixtral-8x7B-Instruct-v0.1-GPTQ"] = 16000
algo.MAX_TOKENS["minimax/abab6-chat"] = 8000
algo.MAX_TOKENS["xinghuo-3.5"] = 8000
algo.MAX_TOKENS["llama-2-70b-chat"] = 4000
algo.MAX_TOKENS["togetherai/codellama/CodeLlama-70b-Instruct-hf"] = 4000
algo.MAX_TOKENS["togetherai/mistralai/Mixtral-8x7B-Instruct-v0.1"] = 16000
algo.MAX_TOKENS["text-embedding-ada-002"] = 8000
algo.MAX_TOKENS["text-embedding-3-small"] = 8000
algo.MAX_TOKENS["text-embedding-3-large"] = 8000
algo.MAX_TOKENS["embedding-v1"] = 8000
algo.MAX_TOKENS["embedding-2"] = 8000
algo.MAX_TOKENS["togethercomputer/m2-bert-80M-2k-retrieval"] = 2048
algo.MAX_TOKENS["togethercomputer/m2-bert-80M-8k-retrieval"] = 8192
algo.MAX_TOKENS["togethercomputer/m2-bert-80M-32k-retrieval"] = 32768
algo.MAX_TOKENS["WhereIsAI/UAE-Large-V1"] = 512
algo.MAX_TOKENS["BAAI/bge-large-en-v1.5"] = 512
algo.MAX_TOKENS["BAAI/bge-base-en-v1.5"] = 512
algo.MAX_TOKENS["sentence-transformers/msmarco-bert-base-dot-v5"] = 512
algo.MAX_TOKENS["bert-base-uncased"] = 512
if os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106") not in algo.MAX_TOKENS:
current_model = os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106")
2024-05-19 15:34:26 +08:00
IDEService().ide_logging(
"info", f"{current_model}'s max tokens is not config, we use it as default 16000"
)
2024-05-18 22:05:51 +08:00
algo.MAX_TOKENS[os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106")] = 16000
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
# add new git provider
def get_git_provider():
from pr_agent.config_loader import get_settings
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
_git_provider_old_ = get_settings().config.git_provider
get_settings().config.git_provider = "devchat"
provider = _get_git_provider_old()
get_settings().config.git_provider = _git_provider_old_
return provider
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
import pr_agent.git_providers as git_providers
from providers.devchat_provider import DevChatProvider
2024-05-19 15:34:26 +08:00
git_providers._GIT_PROVIDERS["devchat"] = DevChatProvider
2024-05-18 22:05:51 +08:00
_get_git_provider_old = git_providers.get_git_provider
git_providers.get_git_provider = get_git_provider
from pr_agent.cli import run
2024-05-19 15:34:26 +08:00
from pr_agent.config_loader import get_settings
2024-05-18 22:05:51 +08:00
# mock logging method, to redirect log to IDE
2024-05-19 15:34:26 +08:00
from pr_agent.log import inv_analytics_filter, setup_logger
2024-05-18 22:05:51 +08:00
class CustomOutput:
def __init__(self):
pass
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
def write(self, message):
IDEService().ide_logging("info", message.strip())
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
def flush(self):
pass
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
def close(self):
pass
2024-05-19 15:34:26 +08:00
2024-05-18 22:05:51 +08:00
log_level = os.environ.get("LOG_LEVEL", "INFO")
logger = setup_logger(log_level)
logger.remove(None)
2024-05-19 15:34:26 +08:00
logger.add(
CustomOutput(),
level=logging.INFO,
format="{message}",
colorize=False,
filter=inv_analytics_filter,
)
2024-05-18 22:05:51 +08:00
from config_util import get_repo_type, gitlab_host, read_server_access_token_with_input
2024-05-18 22:05:51 +08:00
from custom_suggestions_config import get_custom_suggestions_system_prompt
# set openai key and api base
get_settings().set("OPENAI.KEY", os.environ.get("OPENAI_API_KEY", ""))
get_settings().set("OPENAI.API_BASE", os.environ.get("OPENAI_API_BASE", ""))
get_settings().set("LLM.CUSTOM_LLM_PROVIDER", "openai")
# set github token
access_token = read_server_access_token_with_input(sys.argv[1])
if not access_token:
print("Command has been canceled.", flush=True)
sys.exit(0)
repo_type = get_repo_type(sys.argv[1])
IDEService().ide_logging("debug", f"repo type: {repo_type}")
2024-05-18 22:05:51 +08:00
if repo_type == "github":
get_settings().set("GITHUB.USER_TOKEN", access_token)
elif repo_type == "gitlab":
get_settings().set("GITLAB.PERSONAL_ACCESS_TOKEN", access_token)
host = gitlab_host()
if host:
IDEService().ide_logging("debug", f"gitlab host: {host}")
get_settings().set("GITLAB.URL", host)
2024-05-18 22:05:51 +08:00
else:
2024-05-19 15:34:26 +08:00
print(
"Unsupported git hosting service, input pr url is:",
sys.argv[1],
file=sys.stderr,
flush=True,
)
2024-05-18 22:05:51 +08:00
sys.exit(1)
# USER TOKEN
# set git provider, default is devchat
# in devchat provider, we will create actual repo provider
# get_settings().set("CONFIG.GIT_PROVIDER", "devchat")
# set model
get_settings().set("CONFIG.MODEL", os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106"))
get_settings().set("CONFIG.MODEL_TURBO", os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106"))
get_settings().set("CONFIG.FALLBACK_MODELS", [os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106")])
# disable help text as default config
get_settings().set("PR_REVIEWER.ENABLE_HELP_TEXT", False)
get_settings().set("PR_DESCRIPTION.ENABLE_HELP_TEXT", False)
get_settings().set("PR_DESCRIPTION.ENABLE_HELP_COMMENT", False)
get_settings().set("PR_CODE_SUGGESTIONS.ENABLE_HELP_TEXT", False)
get_settings().set("PR_TEST.ENABLE_HELP_TEXT", False)
get_settings().set("CHECKS.ENABLE_HELP_TEXT", False)
# get_settings().set("PR_CODE_SUGGESTIONS.SUMMARIZE", False)
# handle custom suggestions command
if sys.argv[2] == "custom_suggestions":
get_settings().pr_code_suggestions_prompt.system = get_custom_suggestions_system_prompt()
sys.argv[2] = "improve"
# get current language config
language = IDEService().ide_language()
language_prompt = "\n\n输出内容使用中文输出。\n" if language == "zh" else ""
get_settings().pr_code_suggestions_prompt.system += language_prompt
get_settings().pr_review_prompt.system += language_prompt
get_settings().pr_description_prompt.system += language_prompt
2024-05-19 15:34:26 +08:00
# get_settings().pr_reviewer.inline_code_comments = True
2024-05-18 22:05:51 +08:00
# config for find similar issues
get_settings().set("PR_SIMILAR_ISSUE.VECTORDB", "lancedb")
get_settings().set("LANCEDB.URI", "data/lancedb")
# set git provider type, devchat provider will create actual repo provider based on this type
pr_provider_type = get_repo_type(sys.argv[1])
if not pr_provider_type:
2024-05-19 15:34:26 +08:00
print(
"Unsupported git hosting service, input pr url is:",
sys.argv[1],
file=sys.stderr,
flush=True,
)
2024-05-18 22:05:51 +08:00
sys.exit(1)
get_settings().set("CONFIG.GIT_PROVIDER", pr_provider_type)
2024-05-19 15:34:26 +08:00
os.environ["CONFIG.GIT_PROVIDER_TYPE"] = pr_provider_type
2024-05-18 22:05:51 +08:00
# os.environ['ENABLE_PUBLISH_LABELS'] = "1"
2024-05-19 15:34:26 +08:00
if __name__ == "__main__":
sys.argv = [sys.executable, "--pr_url", sys.argv[1].strip(), sys.argv[2].strip()]
2024-05-18 22:05:51 +08:00
run()