From cba2b5a447e79662a92b9f3181984c92bd58ab39 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Thu, 23 Nov 2023 08:27:56 +0800 Subject: [PATCH 01/15] add requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..13c7b44 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +devchat-ask>=0.1.7 From 1dc2dfdba1619458658d43ea7565fcc2531c2932 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Thu, 30 Nov 2023 07:50:49 +0800 Subject: [PATCH 02/15] add ide service functions --- libs/ide_services/__init__.py | 5 +++++ libs/ide_services/services.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 libs/ide_services/__init__.py create mode 100644 libs/ide_services/services.py diff --git a/libs/ide_services/__init__.py b/libs/ide_services/__init__.py new file mode 100644 index 0000000..f8cbc41 --- /dev/null +++ b/libs/ide_services/__init__.py @@ -0,0 +1,5 @@ +from .services import get_lsp_brige_port + +__all__ = [ + 'get_lsp_brige_port' +] diff --git a/libs/ide_services/services.py b/libs/ide_services/services.py new file mode 100644 index 0000000..a983114 --- /dev/null +++ b/libs/ide_services/services.py @@ -0,0 +1,32 @@ + +import requests +from functools import wraps + +BASE_SERVER_URL = 'http://localhost:3000' + +def rpc_call(f): + @wraps(f) + def wrapper(*args, **kwargs): + function_name = f.__name__ + url = f"{BASE_SERVER_URL}/{function_name}" + + data = dict(zip(f.__code__.co_varnames, args)) + data.update(kwargs) + headers = {'Content-Type': 'application/json'} + + response = requests.post(url, json=data, headers=headers) + + if response.status_code != 200: + raise Exception(f"Server error: {response.status_code}") + + response_data = response.json() + if 'error' in response_data: + raise Exception(f"Server returned an error: {response_data['error']}") + return response_data['result'] + + return wrapper + + +@rpc_call +def get_lsp_brige_port(): + pass \ No newline at end of file From 8a1ece4569b3c2648d7a717c1c344b1a36954d40 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Thu, 30 Nov 2023 07:57:01 +0800 Subject: [PATCH 03/15] add ask-code command --- ask-code/ask-code.py | 72 ++++++++++++++++++++++++++++++++++++++++++++ ask-code/command.yml | 4 +++ ask-code/prompt.txt | 0 3 files changed, 76 insertions(+) create mode 100644 ask-code/ask-code.py create mode 100644 ask-code/command.yml create mode 100644 ask-code/prompt.txt diff --git a/ask-code/ask-code.py b/ask-code/ask-code.py new file mode 100644 index 0000000..28307e3 --- /dev/null +++ b/ask-code/ask-code.py @@ -0,0 +1,72 @@ +import os +import sys +import json +from chat.ask_codebase.chains.smart_qa import SmartQA + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'libs')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..' , 'libs')) + +from ide_services import get_lsp_brige_port + + +def output_message(output): + out_data = f"""\n<>\n{output}\n<>\n""" + print(out_data) + +def output_result(output): + result = {"result": f"{output}"} + out_data = f"""\n<>\n{json.dumps(result)}\n<>\n""" + print(out_data) + +def request(data): + output_message(data) + + lines = [] + while True: + try: + line = input() + if line.strip() == '<>': + break + elif line.strip() == '<>': + continue + lines.append(line) + except EOFError: + pass + + replay_message = '\n'.join(lines) + replay_object = json.loads(replay_message) + return replay_object + + +def query(question, lsp_brige_port): + root_path = os.getcwd() + + # Create an instance of SmartQA + smart_qa = SmartQA(root_path) + + # Use SmartQA to get the answer + answer = smart_qa.run(question=question, verbose=False, dfs_depth=3, dfs_max_visit=10, bridge_url=f'http://localhost:{lsp_brige_port}' ) + + # Print the answer + print(answer[0]) + print(f"***/ask-code has costed approximately ${int(float(answer[2]['token_usage']['total_cost'])/0.7*10000)/10000} USD for this question.***") + + +def main(): + try: + if len(sys.argv) < 3: + print("Usage: python index_and_query.py query [question] [port]") + sys.exit(1) + + port = get_lsp_brige_port() + + question = sys.argv[2] + query(question, port) + sys.exit(0) + except Exception as e: + print("Exception: ", e, file=sys.stderr, flush=True) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ask-code/command.yml b/ask-code/command.yml new file mode 100644 index 0000000..55c5132 --- /dev/null +++ b/ask-code/command.yml @@ -0,0 +1,4 @@ +description: ask codebase. +input: required +steps: + - run: $command_python $command_path/ask-code.py query "$input" \ No newline at end of file diff --git a/ask-code/prompt.txt b/ask-code/prompt.txt new file mode 100644 index 0000000..e69de29 From f87a2f7f8123acf61f7ebb91bdb301e44214c2f4 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Thu, 30 Nov 2023 07:57:24 +0800 Subject: [PATCH 04/15] read service url from environ --- libs/ide_services/services.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/ide_services/services.py b/libs/ide_services/services.py index a983114..56c6bb4 100644 --- a/libs/ide_services/services.py +++ b/libs/ide_services/services.py @@ -1,8 +1,9 @@ import requests +import os from functools import wraps -BASE_SERVER_URL = 'http://localhost:3000' +BASE_SERVER_URL = os.environ.get('DEVCHAT_IDE_SERVICE_URL', 'http://localhost:3000') def rpc_call(f): @wraps(f) From 552052ecbf97b284d8c2ff800607b48fd71e70e9 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 1 Dec 2023 12:29:30 +0800 Subject: [PATCH 05/15] more description for ask-code --- ask-code/command.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ask-code/command.yml b/ask-code/command.yml index 55c5132..bbeece4 100644 --- a/ask-code/command.yml +++ b/ask-code/command.yml @@ -1,4 +1,4 @@ -description: ask codebase. +description: Ask questions about the current project's codebase, which requires proactive acquisition of additional context information to answer. input: required steps: - run: $command_python $command_path/ask-code.py query "$input" \ No newline at end of file From 33c534753bd235e7ec3a3492e63e08a04768ec5e Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 1 Dec 2023 12:32:06 +0800 Subject: [PATCH 06/15] remove unsed functions --- ask-code/ask-code.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/ask-code/ask-code.py b/ask-code/ask-code.py index 28307e3..f58a63b 100644 --- a/ask-code/ask-code.py +++ b/ask-code/ask-code.py @@ -9,35 +9,6 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..' , 'libs')) from ide_services import get_lsp_brige_port -def output_message(output): - out_data = f"""\n<>\n{output}\n<>\n""" - print(out_data) - -def output_result(output): - result = {"result": f"{output}"} - out_data = f"""\n<>\n{json.dumps(result)}\n<>\n""" - print(out_data) - -def request(data): - output_message(data) - - lines = [] - while True: - try: - line = input() - if line.strip() == '<>': - break - elif line.strip() == '<>': - continue - lines.append(line) - except EOFError: - pass - - replay_message = '\n'.join(lines) - replay_object = json.loads(replay_message) - return replay_object - - def query(question, lsp_brige_port): root_path = os.getcwd() From 11f951f3b4dc75d611232dca08bb2c4b77f30474 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 1 Dec 2023 13:06:08 +0800 Subject: [PATCH 07/15] add commit command --- commit/command.yml | 5 + commit/commit.py | 342 +++++++++++++++++++++++++++++++++++++++++++++ commit/prompts.py | 97 +++++++++++++ 3 files changed, 444 insertions(+) create mode 100644 commit/command.yml create mode 100644 commit/commit.py create mode 100644 commit/prompts.py diff --git a/commit/command.yml b/commit/command.yml new file mode 100644 index 0000000..c754470 --- /dev/null +++ b/commit/command.yml @@ -0,0 +1,5 @@ +description: commit changes. +hint: to close Issue ... +input: optional +steps: + - run: $command_python $command_path/commit.py "$input" \ No newline at end of file diff --git a/commit/commit.py b/commit/commit.py new file mode 100644 index 0000000..a59240c --- /dev/null +++ b/commit/commit.py @@ -0,0 +1,342 @@ +""" +commit.py: 通过几个步骤完成提交。 + +具体步骤包含: +1. 获取当前修改文件列表; +2. 获取用户选中的修改文件; + a. 标记出已经staged的文件; + b. 获取用户选中的文件; + c. 根据用户选中文件,重新构建stage列表; +3. 获取用户选中修改文件的Diff信息; +4. 生成提交信息; +5. 展示提交信息并提交。 + +注意: 步骤2.c, 步骤5有专门的函数实现,本脚本中不需要具体这两个步骤的实现。 +""" + +import os +import sys +import time +import re +import json +import subprocess +import openai + +from prompts import \ + PROMPT_SUMMARY_FOR_FILES, \ + PROMPT_GROUP_FILES, \ + PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT, \ + PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT, \ + PROMPT_SUMMARY_FOR_FILES_RETRY, \ + PROMPT_GROUP_FILES_RETRY, \ + prompt_summary_for_files_llm_config, \ + prompt_group_files_llm_config, \ + prompt_commit_message_by_diff_user_input_llm_config, \ + prompt_commit_message_by_summary_user_input_llm_config + + + +def output_message(output): + out_data = f"""\n{output}\n""" + print(out_data, flush=True) + +def parse_response_from_ui(response): + # resonse text like this: + """ + ``` some_name + some key name 1: value1 + some key name 2: value2 + ``` + """ + # parse key values + lines = response.strip().split("\n") + if len(lines) <= 2: + return {} + + import ymal + data = yaml.safe_load(lines[1:-1]) + return data + + +def pipe_interaction_mock(output: str): + output_message(output) + # read response.txt in same dir with current script file + response_file = os.path.join(os.path.dirname(__file__), 'response.txt') + + # clear content in response_file + with open(response_file, 'w+', encoding="utf8"): + pass + + while True: + if os.path.exists(response_file): + with open(response_file, encoding="utf8") as f: + response = f.read() + if response.strip().endswith("```"): + break + time.sleep(1) + return parse_response_from_ui(response) + + +def pipe_interaction(output: str): + output_message(output) + + lines = [] + while True: + try: + line = input() + if line.strip().startswith('``` '): + lines = [] + elif line.strip().startswith('```'): + lines.append(line) + break + lines.append(line) + except EOFError: + pass + + replay_message = '\n'.join(lines) + return parse_response_from_ui(replay_message) + + +def call_gpt_with_config(messages, llm_config) -> str: + connection_error = '' + for _1 in range(3): + try: + response = openai.ChatCompletion.create( + messages=messages, + **llm_config, + stream=False + ) + + response_dict = json.loads(str(response)) + respose_message = response_dict["choices"][0]["message"] + return respose_message + except ConnectionError as err: + connection_error = err + continue + except Exception as err: + print("Exception:", err, file=sys.stderr, flush=True) + return None + print("Connect Error:", connection_error, file=sys.stderr, flush=True) + return None + +def call_gpt_with_config_and_ensure_json(messages, llm_config): + for _1 in range(3): + response = call_gpt_with_config(messages, llm_config) + if response is None: + sys.exit(-1) + + try: + response_obj = json.loads(response["content"]) + return response_obj + except Exception: + continue + print("Not valid json response:", response["content"], file=sys.stderr, flush=True) + sys.exit(-1) + + +def get_modified_files(): + """ 获取当前修改文件列表以及已经staged的文件列表""" + output = subprocess.check_output(["git", "status", "-s", "-u"]) + output = output.decode('utf-8') + lines = output.split('\n') + modified_files = [] + staged_files = [] + + def strip_file_name(file_name): + file = file_name.strip() + if file.startswith('"'): + file = file[1:-1] + return file + + for line in lines: + if len(line) > 2: + status, filename = line[:2], line[3:] + # check wether filename is a directory + if os.path.isdir(filename): + continue + modified_files.append(strip_file_name(filename)) + if status == "M " or status == "A ": + staged_files.append(strip_file_name(filename)) + return modified_files, staged_files + +def gpt_file_summary(diff, diff_files): + prompt = PROMPT_SUMMARY_FOR_FILES.replace("{__DIFF__}", f"{diff}") + messages = [{"role": "user", "content": prompt}] + normpath_summaries = {} + + retry_times = 0 + while retry_times < 3: + retry_times += 1 + file_summaries = call_gpt_with_config_and_ensure_json(messages, prompt_summary_for_files_llm_config) + for key, value in file_summaries.items(): + normpath_summaries[os.path.normpath(key)] = value + + missed_files = [file for file in diff_files if file not in normpath_summaries] + if len(missed_files) > 0: + prompt_retry = PROMPT_SUMMARY_FOR_FILES_RETRY.replace("{__MISSED_FILES__}", f"{missed_files}") + messages.append({"role": "assistant", "content": json.dumps(file_summaries)}) + messages.append({"role": "user", "content": prompt_retry}) + else: + break + + return normpath_summaries + + +def gpt_file_group(diff, diff_files): + prompt = PROMPT_GROUP_FILES.replace("{__DIFF__}", f"{diff}") + messages = [{"role": "user", "content": prompt}] + file_groups = [] + + retry_times = 0 + while retry_times < 3: + retry_times += 1 + file_groups = call_gpt_with_config_and_ensure_json(messages, prompt_group_files_llm_config) + grouped_files = [] + for group in file_groups: + grouped_files.extend(group["files"]) + missed_files = [file for file in diff_files if file not in grouped_files] + + if len(missed_files) > 0: + prompt_retry = PROMPT_GROUP_FILES_RETRY.replace("{__MISSED_FILES__}", f"{missed_files}") + messages.append({"role": "assistant", "content": json.dumps(file_groups)}) + messages.append({"role": "user", "content": prompt_retry}) + else: + break + + return file_groups + + +def get_file_summary(modified_files, staged_files): + """ 当modified_files文件列表<=5时,根据项目修改差异生成每一个文件的修改总结 """ + diffs = [] + for file in modified_files: + if file not in staged_files: + subprocess.check_output(["git", "add", file]) + diff = subprocess.check_output(["git", "diff", "--cached", file]) + if file not in staged_files: + subprocess.check_output(["git", "reset", file]) + diffs.append(diff.decode('utf-8')) + # total_diff = subprocess.check_output(["git", "diff", "HEAD"]) + total_diff_decoded = '\n'.join(diffs) # total_diff.decode('utf-8') + + if len(total_diff_decoded) > 15000: + print("Current diff length:", len(total_diff_decoded), flush=True) + return {} + + # 在prompt中明确处置AI模型的输出格式需求 + normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files) + print(f""" +``` file summary +{json.dumps(normpath_summaries, indent=4)} +``` + """) + + # 通过AI模型对提交文件进行分组,分组的依据是按修改内容的关联性。 + file_groups = gpt_file_group(total_diff_decoded, modified_files) + print(f""" +``` group +{json.dumps(file_groups, indent=4)} +``` + """) + + return normpath_summaries + + +def get_marked_files(modified_files, staged_files, file_summaries): + """ 获取用户选中的修改文件及已经staged的文件""" + # Coordinate with user interface to let user select files. + # assuming user_files is a list of filenames selected by user. + out_str = "```chatmark\n" + out_str += "Staged:\n" + for file in staged_files: + out_str += f"- [x] {file} {file_summaries.get(file, '')}\n" + out_str += "Unstaged:\n" + for file in modified_files: + if file in staged_files: + continue + out_str += f"- [] {file} {file_summaries.get(file, '')}\n" + out_str += "```" + + output_message(out_str) + return [file for file in modified_files if file_summaries.get(file, None)] + replay_object = pipe_interaction_mock(out_str) + + select_files = [] + for key, value in replay_object.items(): + if key in modified_files and value == "true": + select_files.append(key) + return select_files + + +def rebuild_stage_list(user_files): + """ 根据用户选中文件,重新构建stage列表 """ + # Unstage all files + subprocess.check_output(["git", "reset"]) + # Stage all user_files + for file in user_files: + os.system(f"git add \"{file}\"") + + +def get_diff(): + """ 获取staged files的Diff信息 """ + return subprocess.check_output(["git", "diff", "--cached"]) + +def generate_commit_message_base_diff(user_input, diff): + """ Based on the diff information, generate a commit message through AI """ + prompt = PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT.replace( + "{__DIFF__}", f"{diff}" + ).replace( + "{__USER_INPUT__}", f"{user_input}" + ) + messages = [{"role": "user", "content": prompt}] + response = call_gpt_with_config(messages, prompt_commit_message_by_diff_user_input_llm_config) + return response + + +def generate_commit_message_base_file_summaries(user_input, file_summaries): + """ Based on the file_summaries, generate a commit message through AI """ + prompt = PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT.replace( + "{__USER_INPUT__}", f"{user_input}" + ).replace( + "{__FILE_SUMMARY__}", f"{json.dumps(file_summaries, indent=4)}" + ) + # Call AI model to generate commit message + messages = [{"role": "user", "content": prompt}] + response = call_gpt_with_config(messages, prompt_commit_message_by_summary_user_input_llm_config) + return response + + +def display_commit_message_and_commit(commit_message): + """ 展示提交信息并提交 """ + commit_message_with_flag = f""" +```editor +{commit_message} +``` + """ + replay_object = pipe_interaction_mock(commit_message_with_flag) + new_commit_message, commit = replay_object["commit_message"], replay_object["commit"] + + if commit == "true": + subprocess.check_output(["git", "commit", "-m", new_commit_message]) + + +def main(): + try: + user_input = sys.argv[1] + + modified_files, staged_files = get_modified_files() + file_summaries = get_file_summary(modified_files, staged_files) + selected_files = get_marked_files(modified_files, staged_files, file_summaries) + rebuild_stage_list(selected_files) + diff = get_diff() + commit_message = generate_commit_message_base_diff(user_input, diff) + commit_message2 = generate_commit_message_base_file_summaries(user_input, file_summaries) + display_commit_message_and_commit(commit_message2["content"] + "\n\n\n" + commit_message["content"]) + output_message("""\n```progress\n\nDone\n\n```""") + sys.exit(0) + except Exception as err: + print("Exception:", err, file=sys.stderr, flush=True) + sys.exit(-1) + +if __name__ == '__main__': + main() diff --git a/commit/prompts.py b/commit/prompts.py new file mode 100644 index 0000000..7d820e7 --- /dev/null +++ b/commit/prompts.py @@ -0,0 +1,97 @@ + + +# summary changes for files based diff +# diff => {__DIFF__} +PROMPT_SUMMARY_FOR_FILES = """ +I have made the following changes: +```{__DIFF__}``` +Please provide a summary for each modified file. The output should ONLY be a JSON format like: +{"file1": "Summary of the changes made in file1", +"file2": "Summary of the changes made in file2"} +Key is real filename which is exist in diff changes. +Please make sure there is no other additional output. +""" +prompt_summary_for_files_llm_config = { + "model": "gpt-3.5-turbo-16k" +} +# ask summaries for missed files +# missed files => {__MISSED_FILES__} +PROMPT_SUMMARY_FOR_FILES_RETRY = """ +The following files are missed in your summary: +{__MISSED_FILES__} +""" + +# group changes for files based diff +# diff => {__DIFF__} +PROMPT_GROUP_FILES = """ +Please group the following modified files based on their modification relevance: +{__DIFF__} +The output should be a JSON format like: +[ + {"files": ["file1", "file2"], "group": "Feature Improvement", "important_level": 1}, + {"files": ["file3"], "group": "Bug Fix", "important_level": 2} +] +Please make sure there is no other additional output. +Please make sure every file in diff is grouped. +important_level is an integer from 1 to 10, 1 means the most important, 10 means the least important. +""" +prompt_group_files_llm_config = { + "model": "gpt-3.5-turbo-16k" +} +# re-group files based missed files +# missed files => {__MISSED_FILES__} +PROMPT_GROUP_FILES_RETRY = """ +The following files are missed in your response: +{__MISSED_FILES__} +Please re-group the files again, don't miss any file. +""" + + +# generate commit message based diff and user_input +# diff => {__DIFF__} +# user_input => {__USER_INPUT__} +PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT = """ +I have made the following changes to the code: +``` +{__DIFF__} +``` +Please help me generate a commit message. {__USER_INPUT__}. If you don't know exact closed issue number, please don't output "Closes #Issue_number" line. The format is as follows: +``` +feat: commit message title + +Commit message body: +- Detailed message 1. +- Detailed message 2. + +Closes #Issue_number +``` +Adhere to best practices. Keep the title under 50 characters and each body line under 72 characters. +""" +prompt_commit_message_by_diff_user_input_llm_config = { + "model": "gpt-3.5-turbo-16k" +} + +# generate commit message based file summary and user_input +# file_summary => {__FILE_SUMMARY__} +# user_input => {__USER_INPUT__} +PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT = """ +Here are the file summaries generated by AI: +``` +{__FILE_SUMMARY__} +``` + +Please help me generate a commit message. {__USER_INPUT__}. If you don't know exact closed issue number, please don't output "Closes #Issue_number" line. The format is as follows: +``` +feat: commit message title + +Commit message body: +- Detailed message 1. +- Detailed message 2. + +Closes #Issue_number +``` +Adhere to best practices. Keep the title under 50 characters and each body line under 72 characters. +""" +prompt_commit_message_by_summary_user_input_llm_config = { + "model": "gpt-3.5-turbo-16k" +} From 30de2d8c624722ef87d76c7ac4f754200abb6b72 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 08:14:34 +0800 Subject: [PATCH 08/15] update commit command --- .gitignore | 3 + commit/command.yml | 6 +- commit/commit.py | 256 +++++++++++++++--------------------------- commit/prompts.py | 171 +++++++++++++++++++++------- commit/zh/command.yml | 5 + 5 files changed, 236 insertions(+), 205 deletions(-) create mode 100644 commit/zh/command.yml diff --git a/.gitignore b/.gitignore index 722d5e7..eb0debc 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ .vscode +__pycache__/ +.DS_Store + diff --git a/commit/command.yml b/commit/command.yml index c754470..93f62af 100644 --- a/commit/command.yml +++ b/commit/command.yml @@ -1,5 +1,5 @@ -description: commit changes. -hint: to close Issue ... +description: commit changes with commit message in english. +hint: to close Issue #issue_number input: optional steps: - - run: $command_python $command_path/commit.py "$input" \ No newline at end of file + - run: $command_python $command_path/commit.py "$input" "english" \ No newline at end of file diff --git a/commit/commit.py b/commit/commit.py index a59240c..feb5e61 100644 --- a/commit/commit.py +++ b/commit/commit.py @@ -1,26 +1,8 @@ -""" -commit.py: 通过几个步骤完成提交。 - -具体步骤包含: -1. 获取当前修改文件列表; -2. 获取用户选中的修改文件; - a. 标记出已经staged的文件; - b. 获取用户选中的文件; - c. 根据用户选中文件,重新构建stage列表; -3. 获取用户选中修改文件的Diff信息; -4. 生成提交信息; -5. 展示提交信息并提交。 - -注意: 步骤2.c, 步骤5有专门的函数实现,本脚本中不需要具体这两个步骤的实现。 -""" - import os import sys -import time -import re import json import subprocess -import openai +from typing import List from prompts import \ PROMPT_SUMMARY_FOR_FILES, \ @@ -35,103 +17,19 @@ from prompts import \ prompt_commit_message_by_summary_user_input_llm_config +sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'libs')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..' , 'libs')) -def output_message(output): - out_data = f"""\n{output}\n""" - print(out_data, flush=True) - -def parse_response_from_ui(response): - # resonse text like this: - """ - ``` some_name - some key name 1: value1 - some key name 2: value2 - ``` - """ - # parse key values - lines = response.strip().split("\n") - if len(lines) <= 2: - return {} - - import ymal - data = yaml.safe_load(lines[1:-1]) - return data - - -def pipe_interaction_mock(output: str): - output_message(output) - # read response.txt in same dir with current script file - response_file = os.path.join(os.path.dirname(__file__), 'response.txt') - - # clear content in response_file - with open(response_file, 'w+', encoding="utf8"): - pass - - while True: - if os.path.exists(response_file): - with open(response_file, encoding="utf8") as f: - response = f.read() - if response.strip().endswith("```"): - break - time.sleep(1) - return parse_response_from_ui(response) +from ui_utils import ui_checkbox_select, ui_text_edit, CheckboxOption +from llm_api import chat_completion_no_stream, chat_completion_no_stream_return_json -def pipe_interaction(output: str): - output_message(output) +language = "" - lines = [] - while True: - try: - line = input() - if line.strip().startswith('``` '): - lines = [] - elif line.strip().startswith('```'): - lines.append(line) - break - lines.append(line) - except EOFError: - pass - - replay_message = '\n'.join(lines) - return parse_response_from_ui(replay_message) - - -def call_gpt_with_config(messages, llm_config) -> str: - connection_error = '' - for _1 in range(3): - try: - response = openai.ChatCompletion.create( - messages=messages, - **llm_config, - stream=False - ) - - response_dict = json.loads(str(response)) - respose_message = response_dict["choices"][0]["message"] - return respose_message - except ConnectionError as err: - connection_error = err - continue - except Exception as err: - print("Exception:", err, file=sys.stderr, flush=True) - return None - print("Connect Error:", connection_error, file=sys.stderr, flush=True) - return None - -def call_gpt_with_config_and_ensure_json(messages, llm_config): - for _1 in range(3): - response = call_gpt_with_config(messages, llm_config) - if response is None: - sys.exit(-1) - - try: - response_obj = json.loads(response["content"]) - return response_obj - except Exception: - continue - print("Not valid json response:", response["content"], file=sys.stderr, flush=True) - sys.exit(-1) +def assert_value(value, message): + if value: + print(message, file=sys.stderr, flush=True) + sys.exit(-1) def get_modified_files(): @@ -159,15 +57,18 @@ def get_modified_files(): staged_files.append(strip_file_name(filename)) return modified_files, staged_files -def gpt_file_summary(diff, diff_files): - prompt = PROMPT_SUMMARY_FOR_FILES.replace("{__DIFF__}", f"{diff}") - messages = [{"role": "user", "content": prompt}] +def gpt_file_summary(diff, diff_files, user_input): + global language + prompt = PROMPT_SUMMARY_FOR_FILES.replace("{__DIFF__}", f"{diff}").replace("{__USER_INPUT__}", f"{user_input}") + messages = [{"role": "user", "content": prompt + (" \nPlease response summaries in chinese" if language == "chinese" else "")}] normpath_summaries = {} retry_times = 0 while retry_times < 3: retry_times += 1 - file_summaries = call_gpt_with_config_and_ensure_json(messages, prompt_summary_for_files_llm_config) + file_summaries = chat_completion_no_stream_return_json(messages, prompt_summary_for_files_llm_config) + if not file_summaries: + continue for key, value in file_summaries.items(): normpath_summaries[os.path.normpath(key)] = value @@ -175,7 +76,7 @@ def gpt_file_summary(diff, diff_files): if len(missed_files) > 0: prompt_retry = PROMPT_SUMMARY_FOR_FILES_RETRY.replace("{__MISSED_FILES__}", f"{missed_files}") messages.append({"role": "assistant", "content": json.dumps(file_summaries)}) - messages.append({"role": "user", "content": prompt_retry}) + messages.append({"role": "user", "content": prompt_retry + (" \nPlease response summaries in chinese" if language == "chinese" else "")}) else: break @@ -190,7 +91,11 @@ def gpt_file_group(diff, diff_files): retry_times = 0 while retry_times < 3: retry_times += 1 - file_groups = call_gpt_with_config_and_ensure_json(messages, prompt_group_files_llm_config) + file_groups = chat_completion_no_stream_return_json(messages, prompt_group_files_llm_config) + if not file_groups: + continue + if 'groups' in file_groups: + file_groups = file_groups["groups"] grouped_files = [] for group in file_groups: grouped_files.extend(group["files"]) @@ -206,7 +111,33 @@ def gpt_file_group(diff, diff_files): return file_groups -def get_file_summary(modified_files, staged_files): +def get_file_summaries(modified_files, staged_files, user_input): + diffs = [] + for file in modified_files: + if file not in staged_files: + subprocess.check_output(["git", "add", file]) + diff = subprocess.check_output(["git", "diff", "--cached", file]) + if file not in staged_files: + subprocess.check_output(["git", "reset", file]) + diffs.append(diff.decode('utf-8')) + # total_diff = subprocess.check_output(["git", "diff", "HEAD"]) + total_diff_decoded = '\n'.join(diffs) # total_diff.decode('utf-8') + + if len(total_diff_decoded) > 15000: + print("Current diff length:", len(total_diff_decoded), flush=True) + return {}, [] + + # 在prompt中明确处置AI模型的输出格式需求 + normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files, user_input) + print(f""" +``` file summary +{json.dumps(normpath_summaries, indent=4)} +``` + """) + + return normpath_summaries + +def get_file_summaries_and_groups(modified_files, staged_files, user_input): """ 当modified_files文件列表<=5时,根据项目修改差异生成每一个文件的修改总结 """ diffs = [] for file in modified_files: @@ -221,10 +152,10 @@ def get_file_summary(modified_files, staged_files): if len(total_diff_decoded) > 15000: print("Current diff length:", len(total_diff_decoded), flush=True) - return {} + return {}, [] # 在prompt中明确处置AI模型的输出格式需求 - normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files) + normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files, user_input) print(f""" ``` file summary {json.dumps(normpath_summaries, indent=4)} @@ -239,33 +170,20 @@ def get_file_summary(modified_files, staged_files): ``` """) - return normpath_summaries + return normpath_summaries, file_groups -def get_marked_files(modified_files, staged_files, file_summaries): +def get_marked_files(modified_files, staged_files, file_summaries, file_groups=None): """ 获取用户选中的修改文件及已经staged的文件""" # Coordinate with user interface to let user select files. # assuming user_files is a list of filenames selected by user. - out_str = "```chatmark\n" - out_str += "Staged:\n" - for file in staged_files: - out_str += f"- [x] {file} {file_summaries.get(file, '')}\n" - out_str += "Unstaged:\n" - for file in modified_files: - if file in staged_files: - continue - out_str += f"- [] {file} {file_summaries.get(file, '')}\n" - out_str += "```" - - output_message(out_str) - return [file for file in modified_files if file_summaries.get(file, None)] - replay_object = pipe_interaction_mock(out_str) - - select_files = [] - for key, value in replay_object.items(): - if key in modified_files and value == "true": - select_files.append(key) - return select_files + # commit_files = [] if len(file_groups) == 0 else sorted(file_groups, key=lambda obj: obj['importance_level'])[0]['files'] + options : List[CheckboxOption] = [] + options += [CheckboxOption(file, file + " - " + file_summaries.get(file, ''), "Staged", True) for file in staged_files] + options += [CheckboxOption(file, file + " - " + file_summaries.get(file, ''), "Unstaged", False) for file in modified_files if file not in staged_files] + + selected_files = ui_checkbox_select("Select files to commit", options) + return selected_files def rebuild_stage_list(user_files): @@ -283,56 +201,68 @@ def get_diff(): def generate_commit_message_base_diff(user_input, diff): """ Based on the diff information, generate a commit message through AI """ + global language + language_prompt = "You must response commit message in chinese。\n" if language == "chinese" else "" prompt = PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT.replace( "{__DIFF__}", f"{diff}" ).replace( - "{__USER_INPUT__}", f"{user_input}" + "{__USER_INPUT__}", f"{user_input + language_prompt}" ) messages = [{"role": "user", "content": prompt}] - response = call_gpt_with_config(messages, prompt_commit_message_by_diff_user_input_llm_config) + response = chat_completion_no_stream(messages, prompt_commit_message_by_diff_user_input_llm_config) + assert_value(not response, "") return response def generate_commit_message_base_file_summaries(user_input, file_summaries): """ Based on the file_summaries, generate a commit message through AI """ + global language + language_prompt = "Please response commit message in chinese.\n" if language == "chinese" else "" prompt = PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT.replace( "{__USER_INPUT__}", f"{user_input}" ).replace( "{__FILE_SUMMARY__}", f"{json.dumps(file_summaries, indent=4)}" ) # Call AI model to generate commit message - messages = [{"role": "user", "content": prompt}] - response = call_gpt_with_config(messages, prompt_commit_message_by_summary_user_input_llm_config) + messages = [{"role": "user", "content": language_prompt + prompt}] + response = chat_completion_no_stream(messages, prompt_commit_message_by_summary_user_input_llm_config) + assert_value(not response, "") return response def display_commit_message_and_commit(commit_message): """ 展示提交信息并提交 """ - commit_message_with_flag = f""" -```editor -{commit_message} -``` - """ - replay_object = pipe_interaction_mock(commit_message_with_flag) - new_commit_message, commit = replay_object["commit_message"], replay_object["commit"] - - if commit == "true": - subprocess.check_output(["git", "commit", "-m", new_commit_message]) + new_commit_message = ui_text_edit("Edit commit meesage", commit_message) + if not new_commit_message: + return + subprocess.check_output(["git", "commit", "-m", new_commit_message]) def main(): + global language try: user_input = sys.argv[1] + language = sys.argv[2] modified_files, staged_files = get_modified_files() - file_summaries = get_file_summary(modified_files, staged_files) + file_summaries = get_file_summaries(modified_files, staged_files, user_input) + # file_summaries, file_groups = get_file_summaries_and_groups(modified_files, staged_files, user_input) selected_files = get_marked_files(modified_files, staged_files, file_summaries) + if len(selected_files) == 0: + print("No files selected, commit aborted.") + return rebuild_stage_list(selected_files) - diff = get_diff() - commit_message = generate_commit_message_base_diff(user_input, diff) - commit_message2 = generate_commit_message_base_file_summaries(user_input, file_summaries) - display_commit_message_and_commit(commit_message2["content"] + "\n\n\n" + commit_message["content"]) - output_message("""\n```progress\n\nDone\n\n```""") + + summaries_for_select_files = {file: file_summaries[file] for file in selected_files if file in file_summaries} + if len(summaries_for_select_files.keys()) < len(selected_files): + diff = get_diff() + commit_message = generate_commit_message_base_diff(user_input, diff) + else: + commit_message = generate_commit_message_base_file_summaries(user_input, summaries_for_select_files) + + # display_commit_message_and_commit(commit_message2["content"] + "\n\n\n" + commit_message["content"]) + display_commit_message_and_commit(commit_message["content"]) + print("""\n```progress\n\nDone\n\n```""") sys.exit(0) except Exception as err: print("Exception:", err, file=sys.stderr, flush=True) diff --git a/commit/prompts.py b/commit/prompts.py index 7d820e7..fc91033 100644 --- a/commit/prompts.py +++ b/commit/prompts.py @@ -3,13 +3,40 @@ # summary changes for files based diff # diff => {__DIFF__} PROMPT_SUMMARY_FOR_FILES = """ -I have made the following changes: -```{__DIFF__}``` -Please provide a summary for each modified file. The output should ONLY be a JSON format like: -{"file1": "Summary of the changes made in file1", -"file2": "Summary of the changes made in file2"} -Key is real filename which is exist in diff changes. -Please make sure there is no other additional output. +Objective: **Create concise summaries for each modified file based on the provided diff changes and any additional user input.** + +**Instructions:** +1. Review the diff changes and user input to understand the context and content of the modifications. +2. Write a summary for each file that has been modified, capturing the essence of the changes. +3. Use the filename from the diff as the key, and the summary as the value in the output JSON object. + +**Response Format:** +```json +{ + \"filename1\": \"Summary of the changes made in filename1\", + \"filename2\": \"Summary of the changes made in filename2\", + ... +} +``` + +**Constraints:** +- Ensure that the summaries are accurate and reflect the changes made. +- The response must be in JSON format, with filenames as keys and summaries as values. +- Do not include any additional text or output outside of the JSON format. +- The keys in the JSON object should correspond to real filenames present in the diff changes. + +**User Input:** +``` +{__USER_INPUT__} +``` + +**Diff Changes:** +``` +{__DIFF__} +``` +--- + +Based on the provided diff changes and any additional user input, please generate a JSON object containing summaries for each modified file. """ prompt_summary_for_files_llm_config = { "model": "gpt-3.5-turbo-16k" @@ -24,16 +51,44 @@ The following files are missed in your summary: # group changes for files based diff # diff => {__DIFF__} PROMPT_GROUP_FILES = """ -Please group the following modified files based on their modification relevance: +Objective: **Categorize the modified files from a diff into groups based on their relevance to each other, and assign an importance level to each group. Limit the number of groups to a maximum of three.** + +**Instructions:** +1. **Analysis:** Review the diff content to discern related changes. Group files that are part of the same logical change, ensuring that the code will compile and run correctly post-commit. +2. **Atomic Grouping:** Aim for the smallest possible groups. Each should represent a single, cohesive modification for clarity and independent comprehension. Do not exceed three groups in total. +3. **Importance Level:** Rate each group's importance on a scale of 1 to 10, with 1 being the most critical. Consider the impact on functionality, urgency of fixes, and feature significance. + +**Response Format:** +- Use JSON format for your response. +- Include all files from the diff content. +- Structure the JSON as shown in the example below. + +**Example Output:** +```json +{ + "groups": [ + {\"files\": [\"fileA\", \"fileB\"], \"group\": \"Feature Improvement\", \"importance_level\": 5}, + {\"files\": [\"fileC\"], \"group\": \"Bug Fix\", \"importance_level\": 1}, + {\"files\": [\"fileD\", \"fileE\"], \"group\": \"Code Refactoring\", \"importance_level\": 3} + ] +} +``` + +**Constraints:** +- Ensure the JSON output is valid and contains no additional text or characters. +- Each group must be self-contained, with no cross-group dependencies. +- The importance level should accurately reflect the priority for committing the changes. +- The total number of groups must not exceed three. +- Follows the JSON structure shown in the example above. + +**Diff Content:** +``` {__DIFF__} -The output should be a JSON format like: -[ - {"files": ["file1", "file2"], "group": "Feature Improvement", "important_level": 1}, - {"files": ["file3"], "group": "Bug Fix", "important_level": 2} -] -Please make sure there is no other additional output. -Please make sure every file in diff is grouped. -important_level is an integer from 1 to 10, 1 means the most important, 10 means the least important. +``` + +--- + +Based on the provided diff content, group the files accordingly and assign an appropriate importance level to each group, following the instructions and constraints. """ prompt_group_files_llm_config = { "model": "gpt-3.5-turbo-16k" @@ -51,21 +106,40 @@ Please re-group the files again, don't miss any file. # diff => {__DIFF__} # user_input => {__USER_INPUT__} PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT = """ -I have made the following changes to the code: +Objective:** Create a commit message that concisely summarizes the changes made to the codebase as reflected by the provided diff. The commit message should also take into account any additional context or instructions given by the user. + +**Commit Message Structure:** +1. **Title Line:** Start with a type from the following options: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, etc. Follow the type with a concise title. Format: `type: Title`. Only one title line is allowed. +2. **Summary:** Provide a summary of all changes in no more than three detailed message lines. Each line should be prefixed with a \"-\". +3. **Closing Reference (Optional):** If applicable, include a closing reference line in the format `Closes #IssueNumber`. Only include this if you know the exact issue number. + +**Response Format:** +``` +type: Title + + Detail message line 1 + Detail message line 2 + Detail message line 3 + +Closes #IssueNumber +``` + +**Constraints:** +- Do not include markdown block flags (```) or the placeholder text \"commit_message\" in your response. +- Adhere to best practices for commit messages: + - Keep the title under 50 characters. + - Keep each summary line under 72 characters. +- If the exact issue number is unknown, omit the closing reference line. + +**User Input:** `{__USER_INPUT__}` + +**Code Changes:** ``` {__DIFF__} ``` -Please help me generate a commit message. {__USER_INPUT__}. If you don't know exact closed issue number, please don't output "Closes #Issue_number" line. The format is as follows: -``` -feat: commit message title +--- -Commit message body: -- Detailed message 1. -- Detailed message 2. - -Closes #Issue_number -``` -Adhere to best practices. Keep the title under 50 characters and each body line under 72 characters. +Please use the above structure to generate a commit message that meets the specified criteria. """ prompt_commit_message_by_diff_user_input_llm_config = { "model": "gpt-3.5-turbo-16k" @@ -75,23 +149,42 @@ prompt_commit_message_by_diff_user_input_llm_config = { # file_summary => {__FILE_SUMMARY__} # user_input => {__USER_INPUT__} PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT = """ -Here are the file summaries generated by AI: +Objective:** Generate a commit message that accurately reflects the changes made to the codebase, as summarized by the AI-generated file summary and any additional user input. + +**Commit Message Structure:** +1. **Title Line:** Begin with a type from the following options: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, etc. The title should be concise and informative. Format: `type: Title`. Only one title line is allowed. +2. **Summary:** Condense the changes into 1-3 detailed message lines, each beginning with a \"-\". +3. **Closing Reference (Optional):** If known, include a closing reference in the format `Closes #IssueNumber`. If the exact issue number is unknown, omit this line. + +**Response Format:** +``` +type: Title + + Detail message line 1 + Detail message line 2 + Detail message line 3 + +Closes #IssueNumber +``` + +**Constraints:** +- Exclude markdown code block flags (```) and the placeholder \"commit_message\" from your response. +- Follow commit message best practices: + - Title line should be under 50 characters. + - Each summary line should be under 72 characters. +- If the issue number is not provided, do not include the closing reference line. + +**User Input:** `{__USER_INPUT__}` + +**File Summary:** ``` {__FILE_SUMMARY__} ``` -Please help me generate a commit message. {__USER_INPUT__}. If you don't know exact closed issue number, please don't output "Closes #Issue_number" line. The format is as follows: -``` -feat: commit message title +--- -Commit message body: -- Detailed message 1. -- Detailed message 2. - -Closes #Issue_number -``` -Adhere to best practices. Keep the title under 50 characters and each body line under 72 characters. +Please create a commit message following the above guidelines based on the provided file summary and user input. """ prompt_commit_message_by_summary_user_input_llm_config = { - "model": "gpt-3.5-turbo-16k" + "model": "gpt-4-1106-preview" } diff --git a/commit/zh/command.yml b/commit/zh/command.yml new file mode 100644 index 0000000..b9afd36 --- /dev/null +++ b/commit/zh/command.yml @@ -0,0 +1,5 @@ +description: commit changes with commit message in chinese. +hint: to close Issue #issue_number +input: optional +steps: + - run: $command_python $command_path/../commit.py "$input" "chinese" \ No newline at end of file From 39c721ad66bda6a828d747d797f226fd01b23af2 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 08:18:56 +0800 Subject: [PATCH 09/15] Update file summaries in commit.py --- commit/commit.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/commit/commit.py b/commit/commit.py index feb5e61..d031e81 100644 --- a/commit/commit.py +++ b/commit/commit.py @@ -129,11 +129,6 @@ def get_file_summaries(modified_files, staged_files, user_input): # 在prompt中明确处置AI模型的输出格式需求 normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files, user_input) - print(f""" -``` file summary -{json.dumps(normpath_summaries, indent=4)} -``` - """) return normpath_summaries From 6bb5e29593f21aea58830fa8b67ce95d58c63c3e Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 09:20:01 +0800 Subject: [PATCH 10/15] update prompts and add function comments --- commit/commit.py | 157 +++++++++++++++++++++++++++++++++++++++++----- commit/prompts.py | 1 + 2 files changed, 142 insertions(+), 16 deletions(-) diff --git a/commit/commit.py b/commit/commit.py index d031e81..1dae5f8 100644 --- a/commit/commit.py +++ b/commit/commit.py @@ -27,12 +27,32 @@ from llm_api import chat_completion_no_stream, chat_completion_no_stream_return_ language = "" def assert_value(value, message): + """ + 判断给定的value是否为True,如果是,则输出指定的message并终止程序。 + + Args: + value: 用于判断的值。 + message: 如果value为True时需要输出的信息。 + + Returns: + 无返回值。 + + """ if value: print(message, file=sys.stderr, flush=True) sys.exit(-1) def get_modified_files(): + """ + 获取当前修改文件列表以及已经staged的文件列表 + + Args: + 无 + + Returns: + tuple: 包含两个list的元组,第一个list包含当前修改过的文件,第二个list包含已经staged的文件 + """ """ 获取当前修改文件列表以及已经staged的文件列表""" output = subprocess.check_output(["git", "status", "-s", "-u"]) output = output.decode('utf-8') @@ -58,6 +78,17 @@ def get_modified_files(): return modified_files, staged_files def gpt_file_summary(diff, diff_files, user_input): + """ + 生成GPT对话,获取文件差异内容的摘要。 + + Args: + diff (str): 文件差异内容。 + diff_files (List[str]): 文件差异列表。 + user_input (str): 用户输入。 + + Returns: + dict: 文件路径作为键,摘要内容作为值的字典。 + """ global language prompt = PROMPT_SUMMARY_FOR_FILES.replace("{__DIFF__}", f"{diff}").replace("{__USER_INPUT__}", f"{user_input}") messages = [{"role": "user", "content": prompt + (" \nPlease response summaries in chinese" if language == "chinese" else "")}] @@ -84,6 +115,16 @@ def gpt_file_summary(diff, diff_files, user_input): def gpt_file_group(diff, diff_files): + """ + 根据diff和diff_files列表,对文件进行分组,返回分组结果。 + + Args: + diff (str): 差异信息。 + diff_files (List[str]): 文件列表。 + + Returns: + List[Dict]: 文件分组结果,每个分组是一个字典,包含"name"和"files"两个键值对,分别表示分组名称和该分组下的文件列表。 + """ prompt = PROMPT_GROUP_FILES.replace("{__DIFF__}", f"{diff}") messages = [{"role": "user", "content": prompt}] file_groups = [] @@ -112,6 +153,17 @@ def gpt_file_group(diff, diff_files): def get_file_summaries(modified_files, staged_files, user_input): + """ + 计算git暂存区文件的差异,并生成对应的AI模型输入格式。 + + Args: + modified_files (list): 当前工作区的修改文件列表 + staged_files (list): 已暂存文件列表 + user_input (str): 用户输入信息 + + Returns: + dict: AI模型输出格式,包含normpath_summaries和modified_files两个key-value对。 + """ diffs = [] for file in modified_files: if file not in staged_files: @@ -133,7 +185,22 @@ def get_file_summaries(modified_files, staged_files, user_input): return normpath_summaries def get_file_summaries_and_groups(modified_files, staged_files, user_input): - """ 当modified_files文件列表<=5时,根据项目修改差异生成每一个文件的修改总结 """ + """ + 获取已修改文件的摘要和分组。 + + Args: + modified_files (List[str]): 已修改的文件列表。 + staged_files (List[str]): 已暂存的文件列表。 + user_input (str): 用户输入。 + + Returns: + Tuple[Dict[str, Any], List[Dict[str, Any]]]: 包含以下两个元素的元组: + - 文件摘要信息,字典类型,键为文件路径,值为该文件对应的摘要信息; + - 文件分组信息,列表类型,每个元素为包含以下三个键值对的字典: + * group_id:组ID。 + * files:属于该分组的文件列表。 + * summary:该分组的摘要信息。 + """ diffs = [] for file in modified_files: if file not in staged_files: @@ -168,11 +235,19 @@ def get_file_summaries_and_groups(modified_files, staged_files, user_input): return normpath_summaries, file_groups -def get_marked_files(modified_files, staged_files, file_summaries, file_groups=None): - """ 获取用户选中的修改文件及已经staged的文件""" - # Coordinate with user interface to let user select files. - # assuming user_files is a list of filenames selected by user. - # commit_files = [] if len(file_groups) == 0 else sorted(file_groups, key=lambda obj: obj['importance_level'])[0]['files'] +def get_marked_files(modified_files, staged_files, file_summaries): + """ + 根据给定的参数获取用户选中以供提交的文件 + + Args: + modified_files (List[str]): 用户已修改文件列表 + staged_files (List[str]): 用户已staged文件列表 + file_summaries (Dict[str, str]): 文件摘要信息,key为文件名,value为摘要信息 + file_groups (List[Dict[str, Any]]): 文件分组信息,每个元素是一个字典,包含两个key值分别为 "importance_level" 和 "files",分别表示文件的重要程度和该重要程度下的文件列表 + + Returns: + List[str]: 用户选中的文件列表 + """ options : List[CheckboxOption] = [] options += [CheckboxOption(file, file + " - " + file_summaries.get(file, ''), "Staged", True) for file in staged_files] options += [CheckboxOption(file, file + " - " + file_summaries.get(file, ''), "Unstaged", False) for file in modified_files if file not in staged_files] @@ -182,7 +257,16 @@ def get_marked_files(modified_files, staged_files, file_summaries, file_groups=N def rebuild_stage_list(user_files): - """ 根据用户选中文件,重新构建stage列表 """ + """ + 根据用户选中文件,重新构建stage列表 + + Args: + user_files: 用户选中的文件列表 + + Returns: + None + + """ # Unstage all files subprocess.check_output(["git", "reset"]) # Stage all user_files @@ -191,11 +275,30 @@ def rebuild_stage_list(user_files): def get_diff(): - """ 获取staged files的Diff信息 """ + """ + 获取暂存区文件的Diff信息 + + Args: + 无 + + Returns: + bytes: 返回bytes类型,是git diff --cached命令的输出结果 + + """ return subprocess.check_output(["git", "diff", "--cached"]) def generate_commit_message_base_diff(user_input, diff): - """ Based on the diff information, generate a commit message through AI """ + """ + 根据diff信息,通过AI生成一个commit消息 + + Args: + user_input (str): 用户输入的commit信息 + diff (str): 提交的diff信息 + + Returns: + str: 生成的commit消息 + + """ global language language_prompt = "You must response commit message in chinese。\n" if language == "chinese" else "" prompt = PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT.replace( @@ -210,7 +313,16 @@ def generate_commit_message_base_diff(user_input, diff): def generate_commit_message_base_file_summaries(user_input, file_summaries): - """ Based on the file_summaries, generate a commit message through AI """ + """ + 根据文件摘要生成通过AI生成的提交消息 + + Args: + user_input (str): 用户输入 + file_summaries (list[dict]): 文件摘要列表 + + Returns: + str: 提交消息 + """ global language language_prompt = "Please response commit message in chinese.\n" if language == "chinese" else "" prompt = PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT.replace( @@ -226,7 +338,16 @@ def generate_commit_message_base_file_summaries(user_input, file_summaries): def display_commit_message_and_commit(commit_message): - """ 展示提交信息并提交 """ + """ + 展示提交信息并提交。 + + Args: + commit_message: 提交信息。 + + Returns: + None。 + + """ new_commit_message = ui_text_edit("Edit commit meesage", commit_message) if not new_commit_message: return @@ -236,26 +357,30 @@ def display_commit_message_and_commit(commit_message): def main(): global language try: + # Ensure enough command line arguments are provided + if len(sys.argv) < 3: + print("Usage: python script.py ") + return + user_input = sys.argv[1] language = sys.argv[2] modified_files, staged_files = get_modified_files() file_summaries = get_file_summaries(modified_files, staged_files, user_input) - # file_summaries, file_groups = get_file_summaries_and_groups(modified_files, staged_files, user_input) selected_files = get_marked_files(modified_files, staged_files, file_summaries) - if len(selected_files) == 0: + if not selected_files: print("No files selected, commit aborted.") return - rebuild_stage_list(selected_files) - + + rebuild_stage_list(selected_files) summaries_for_select_files = {file: file_summaries[file] for file in selected_files if file in file_summaries} + if len(summaries_for_select_files.keys()) < len(selected_files): diff = get_diff() commit_message = generate_commit_message_base_diff(user_input, diff) else: commit_message = generate_commit_message_base_file_summaries(user_input, summaries_for_select_files) - # display_commit_message_and_commit(commit_message2["content"] + "\n\n\n" + commit_message["content"]) display_commit_message_and_commit(commit_message["content"]) print("""\n```progress\n\nDone\n\n```""") sys.exit(0) diff --git a/commit/prompts.py b/commit/prompts.py index fc91033..89943a6 100644 --- a/commit/prompts.py +++ b/commit/prompts.py @@ -21,6 +21,7 @@ Objective: **Create concise summaries for each modified file based on the provid **Constraints:** - Ensure that the summaries are accurate and reflect the changes made. +- Ensure that the summary is concise and does not exceed 200 characters. - The response must be in JSON format, with filenames as keys and summaries as values. - Do not include any additional text or output outside of the JSON format. - The keys in the JSON object should correspond to real filenames present in the diff changes. From f3d867623c2b67e6347d596821062ccee05dce94 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 10:37:32 +0800 Subject: [PATCH 11/15] feat: Integrate LLM API and UI utility functions - Added new LLM API functions for non-streaming chat completions - Introduced UI utility functions for input/output interaction - Implemented multi-select and single-select UI components --- libs/llm_api/__init__.py | 6 +++ libs/llm_api/openai.py | 48 +++++++++++++++++++++++ libs/ui_utils/__init__.py | 16 ++++++++ libs/ui_utils/iobase.py | 64 +++++++++++++++++++++++++++++++ libs/ui_utils/multi_selections.py | 40 +++++++++++++++++++ libs/ui_utils/single_select.py | 33 ++++++++++++++++ libs/ui_utils/text_edit.py | 40 +++++++++++++++++++ 7 files changed, 247 insertions(+) create mode 100644 libs/llm_api/__init__.py create mode 100644 libs/llm_api/openai.py create mode 100644 libs/ui_utils/__init__.py create mode 100644 libs/ui_utils/iobase.py create mode 100644 libs/ui_utils/multi_selections.py create mode 100644 libs/ui_utils/single_select.py create mode 100644 libs/ui_utils/text_edit.py diff --git a/libs/llm_api/__init__.py b/libs/llm_api/__init__.py new file mode 100644 index 0000000..f77f3bf --- /dev/null +++ b/libs/llm_api/__init__.py @@ -0,0 +1,6 @@ +from .openai import chat_completion_no_stream, chat_completion_no_stream_return_json + +__all__ = [ + 'chat_completion_no_stream', + 'chat_completion_no_stream_return_json' +] diff --git a/libs/llm_api/openai.py b/libs/llm_api/openai.py new file mode 100644 index 0000000..0fc453d --- /dev/null +++ b/libs/llm_api/openai.py @@ -0,0 +1,48 @@ +import sys +import json + +import openai + +def chat_completion_no_stream(messages, llm_config, error_out: bool=True) -> str: + connection_error = '' + for _1 in range(3): + try: + response = openai.ChatCompletion.create( + messages=messages, + **llm_config, + stream=False + ) + + response_dict = json.loads(str(response)) + if 'choices' not in response_dict: + if error_out: + print("Response Error:", response_dict, file=sys.stderr, flush=True) + return None + respose_message = response_dict["choices"][0]["message"] + # print("=> llm response:", respose_message, end="\n\n") + return respose_message + except ConnectionError as err: + connection_error = err + continue + except Exception as err: + if error_out: + print("Exception:", err, file=sys.stderr, flush=True) + return None + if error_out: + print("Connect Error:", connection_error, file=sys.stderr, flush=True) + return None + +def chat_completion_no_stream_return_json(messages, llm_config, error_out: bool=True): + for _1 in range(3): + response = chat_completion_no_stream(messages, llm_config) + if response is None: + return None + + try: + response_obj = json.loads(response["content"]) + return response_obj + except Exception: + continue + if error_out: + print("Not valid json response:", response["content"], file=sys.stderr, flush=True) + return None diff --git a/libs/ui_utils/__init__.py b/libs/ui_utils/__init__.py new file mode 100644 index 0000000..286ed20 --- /dev/null +++ b/libs/ui_utils/__init__.py @@ -0,0 +1,16 @@ +from .iobase import parse_response_from_ui, pipe_interaction, pipe_interaction_mock +from .multi_selections import ui_checkbox_select, CheckboxOption +from .single_select import ui_radio_select, RadioOption +from .text_edit import ui_text_edit + + +__all__ = [ + 'parse_response_from_ui', + 'pipe_interaction', + 'pipe_interaction_mock', + 'ui_checkbox_select', + 'ui_radio_select', + 'ui_text_edit', + 'CheckboxOption', + 'RadioOption' +] diff --git a/libs/ui_utils/iobase.py b/libs/ui_utils/iobase.py new file mode 100644 index 0000000..e8f61ec --- /dev/null +++ b/libs/ui_utils/iobase.py @@ -0,0 +1,64 @@ +import os +import time + +import yaml + +def output_message(output): + out_data = f"""\n{output}\n""" + print(out_data, flush=True) + + +def parse_response_from_ui(response): + # resonse text like this: + """ + ``` some_name + some key name 1: value1 + some key name 2: value2 + ``` + """ + # parse key values + lines = response.strip().split("\n") + if len(lines) <= 2: + return {} + + data = yaml.safe_load('\n'.join(lines[1:-1])) + return data + + +def pipe_interaction_mock(output: str): + output_message(output) + # read response.txt in same dir with current script file + response_file = os.path.join(os.path.dirname(__file__), 'response.txt') + + # clear content in response_file + with open(response_file, 'w+', encoding="utf8"): + pass + + while True: + if os.path.exists(response_file): + with open(response_file, encoding="utf8") as f: + response = f.read() + if response.strip().endswith("```"): + break + time.sleep(1) + return parse_response_from_ui(response) + + +def pipe_interaction(output: str): + output_message(output) + + lines = [] + while True: + try: + line = input() + if line.strip().startswith('```yaml'): + lines = [] + elif line.strip() == '```': + lines.append(line) + break + lines.append(line) + except EOFError: + pass + + replay_message = '\n'.join(lines) + return parse_response_from_ui(replay_message) diff --git a/libs/ui_utils/multi_selections.py b/libs/ui_utils/multi_selections.py new file mode 100644 index 0000000..dad14d2 --- /dev/null +++ b/libs/ui_utils/multi_selections.py @@ -0,0 +1,40 @@ +from typing import List + +from .iobase import pipe_interaction + + +class CheckboxOption: + def __init__(self, id, text, group: str=None, checked: bool=False): + # it will show as: [] (id): text + self._id = id + self._text = text + self._group = group + self._checked = checked + + +def ui_checkbox_select(title: str, options: List[CheckboxOption]) -> List[str]: + """ + send text to UI as: + ```chatmark + Which files would you like to commit? I've suggested a few. + > [x](file1) devchat/engine/prompter.py + > [x](file2) devchat/prompt.py + > [](file3) tests/test_cli_prompt.py + ``` + """ + _NT = "\n" + groups = list({option._group: 1 for option in options if option._group}.keys()) + check_option_message = lambda option: f"> [{'x' if option._checked else ''}]({option._id}) {option._text}" + check_option_group_message = lambda group: f"{group}:{_NT}{_NT.join([check_option_message(option) for option in options if option._group==group])}" + ui_message = f""" +```chatmark type=form +{title} +{_NT.join([check_option_group_message(group) for group in groups])} +``` + """ + # print(ui_message) + # return [option._id for option in options] + response = pipe_interaction(ui_message) + + selected_options = [key for key, value in response.items() if value=='checked' and key in [option._id for option in options]] + return selected_options diff --git a/libs/ui_utils/single_select.py b/libs/ui_utils/single_select.py new file mode 100644 index 0000000..98f8abe --- /dev/null +++ b/libs/ui_utils/single_select.py @@ -0,0 +1,33 @@ +from typing import List + +from .iobase import pipe_interaction + + +class RadioOption: + def __init__(self, id, text): + # it will show as: - (id): text + self._id = id + self._text = text + +def ui_radio_select(title: str, options: List[RadioOption]) -> str | None: + """ + ```chatmark type=form + How would you like to make the change? + > - (insert) Insert the new code. + > - (new) Put the code in a new file. + > - (replace) Replace the current code. + ``` + """ + option_line = lambda option: f"> - ({option._id}) {option._text}" + options_lines = "\n".join([option_line(option) for option in options]) + ui_message = f""" +```chatmark type=form +{title} +{options_lines} +``` +""" + + response = pipe_interaction(ui_message) + + selected_options = [key for key, value in response.items() if value=='checked' and key in [option._id for option in options]] + return selected_options[0] if len(selected_options) > 0 else None diff --git a/libs/ui_utils/text_edit.py b/libs/ui_utils/text_edit.py new file mode 100644 index 0000000..6312353 --- /dev/null +++ b/libs/ui_utils/text_edit.py @@ -0,0 +1,40 @@ +import os +from typing import List + +from .iobase import pipe_interaction + + +def ui_text_edit(title: str, text: str) -> str | None: + """ + ```chatmark type=form + I've drafted a commit message for you as below. Feel free to modify it. + + > | (ID) + > fix: prevent racing of requests + > + > Introduce a request id and a reference to latest request. Dismiss + > incoming responses other than from latest request. + > + > Reviewed-by: Z + > Refs: #123 + ``` + """ + text_lines = text.strip().split('\n') + if len(text_lines) > 0 and text_lines[0].strip().startswith('```'): + text_lines = text_lines[1:] + if len(text_lines) > 0 and text_lines[-1].strip() == '```': + text_lines = text_lines[:-1] + text = '\n'.join(text_lines) + text = text.replace('\n', '\n> ') + ui_message = f""" +```chatmark type=form +{title} + +> | (editor0) +> {text} +``` +""" + response = pipe_interaction(ui_message) + if 'editor0' in response: + return response['editor0'] + return None From 169d02007e426a4b66b83425b8ed7bc7b70017fa Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 10:55:48 +0800 Subject: [PATCH 12/15] refactor: Update LLM configs and add markdown flag removal - Updated model version in various LLM configs to 'gpt-3.5-turbo-1106' - Added '_try_remove_markdown_block_flag' function in openai.py --- commit/prompts.py | 6 +++--- libs/llm_api/openai.py | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/commit/prompts.py b/commit/prompts.py index 89943a6..da76620 100644 --- a/commit/prompts.py +++ b/commit/prompts.py @@ -40,7 +40,7 @@ Objective: **Create concise summaries for each modified file based on the provid Based on the provided diff changes and any additional user input, please generate a JSON object containing summaries for each modified file. """ prompt_summary_for_files_llm_config = { - "model": "gpt-3.5-turbo-16k" + "model": "gpt-3.5-turbo-1106" } # ask summaries for missed files # missed files => {__MISSED_FILES__} @@ -92,7 +92,7 @@ Objective: **Categorize the modified files from a diff into groups based on thei Based on the provided diff content, group the files accordingly and assign an appropriate importance level to each group, following the instructions and constraints. """ prompt_group_files_llm_config = { - "model": "gpt-3.5-turbo-16k" + "model": "gpt-3.5-turbo-1106" } # re-group files based missed files # missed files => {__MISSED_FILES__} @@ -143,7 +143,7 @@ Closes #IssueNumber Please use the above structure to generate a commit message that meets the specified criteria. """ prompt_commit_message_by_diff_user_input_llm_config = { - "model": "gpt-3.5-turbo-16k" + "model": "gpt-3.5-turbo-1106" } # generate commit message based file summary and user_input diff --git a/libs/llm_api/openai.py b/libs/llm_api/openai.py index 0fc453d..4dfbf0d 100644 --- a/libs/llm_api/openai.py +++ b/libs/llm_api/openai.py @@ -1,8 +1,30 @@ +import re import sys import json import openai + +def _try_remove_markdown_block_flag(content): + """ + 如果content是一个markdown块,则删除它的头部```xxx和尾部``` + """ + # 定义正则表达式模式,用于匹配markdown块的头部和尾部 + pattern = r'^\s*```\s*(\w+)\s*\n(.*?)\n\s*```\s*$' + + # 使用re模块进行匹配 + match = re.search(pattern, content, re.DOTALL | re.MULTILINE) + + if match: + # 如果匹配成功,则提取出markdown块的内容并返回 + language = match.group(1) + markdown_content = match.group(2) + return markdown_content.strip() + else: + # 如果匹配失败,则返回原始内容 + return content + + def chat_completion_no_stream(messages, llm_config, error_out: bool=True) -> str: connection_error = '' for _1 in range(3): @@ -39,7 +61,9 @@ def chat_completion_no_stream_return_json(messages, llm_config, error_out: bool= return None try: - response_obj = json.loads(response["content"]) + # json will format as ```json ... ``` in 1106 model + response_content = _try_remove_markdown_block_flag(response["content"]) + response_obj = json.loads(response_content) return response_obj except Exception: continue From c58ef78d52805b849bedda27cef247bebb8bbae8 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 11:08:36 +0800 Subject: [PATCH 13/15] refactor: Update response format and enhance documentation - Updated the response format to include file paths in filename keys - Added docstrings to chat_completion functions for better clarity --- commit/prompts.py | 4 ++-- libs/llm_api/openai.py | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/commit/prompts.py b/commit/prompts.py index da76620..72deacf 100644 --- a/commit/prompts.py +++ b/commit/prompts.py @@ -13,8 +13,8 @@ Objective: **Create concise summaries for each modified file based on the provid **Response Format:** ```json { - \"filename1\": \"Summary of the changes made in filename1\", - \"filename2\": \"Summary of the changes made in filename2\", + \"filename1 with path\": \"Summary of the changes made in filename1\", + \"filename2 with path\": \"Summary of the changes made in filename2\", ... } ``` diff --git a/libs/llm_api/openai.py b/libs/llm_api/openai.py index 4dfbf0d..ed66570 100644 --- a/libs/llm_api/openai.py +++ b/libs/llm_api/openai.py @@ -23,9 +23,21 @@ def _try_remove_markdown_block_flag(content): else: # 如果匹配失败,则返回原始内容 return content - + def chat_completion_no_stream(messages, llm_config, error_out: bool=True) -> str: + """ + 通过ChatCompletion API获取OpenAI聊天机器人的回复。 + + Args: + messages: 一个列表,包含用户输入的消息。 + llm_config: 一个字典,包含ChatCompletion API的配置信息。 + error_out: 如果为True,遇到异常时输出错误信息并返回None,否则返回None。 + + Returns: + 如果成功获取到聊天机器人的回复,返回一个字符串类型的回复消息。如果连接失败,则返回None。 + + """ connection_error = '' for _1 in range(3): try: @@ -55,6 +67,17 @@ def chat_completion_no_stream(messages, llm_config, error_out: bool=True) -> str return None def chat_completion_no_stream_return_json(messages, llm_config, error_out: bool=True): + """ + 尝试三次从聊天完成API获取结果,并返回JSON对象。如果无法解析JSON,将尝试三次,直到出现错误或达到最大尝试次数。 + + Args: + messages (List[str]): 用户输入的消息列表。 + llm_config (Dict[str, Any]): 聊天配置字典。 + error_out (bool, optional): 如果为True,则如果出现错误将打印错误消息并返回None。默认为True。 + + Returns: + Dict[str, Any]: 从聊天完成API获取的JSON对象。如果无法解析JSON或达到最大尝试次数,则返回None。 + """ for _1 in range(3): response = chat_completion_no_stream(messages, llm_config) if response is None: From 4092985e05b1c78019f7c90fffd46571bdba7a4f Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 11:13:17 +0800 Subject: [PATCH 14/15] feat: Improve rpc_call error handling - Updated rpc_call in ide_services to gracefully handle server errors --- libs/ide_services/services.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/ide_services/services.py b/libs/ide_services/services.py index 56c6bb4..5c519ba 100644 --- a/libs/ide_services/services.py +++ b/libs/ide_services/services.py @@ -14,9 +14,9 @@ def rpc_call(f): data = dict(zip(f.__code__.co_varnames, args)) data.update(kwargs) headers = {'Content-Type': 'application/json'} - + response = requests.post(url, json=data, headers=headers) - + if response.status_code != 200: raise Exception(f"Server error: {response.status_code}") From 8792c56a9dd0cbc63b77115587d97c524e07edd0 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Fri, 8 Dec 2023 11:35:09 +0800 Subject: [PATCH 15/15] docs: Update commit command description in Chinese - Updated the commit command description to include Chinese messaging --- commit/zh/command.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit/zh/command.yml b/commit/zh/command.yml index b9afd36..5b91909 100644 --- a/commit/zh/command.yml +++ b/commit/zh/command.yml @@ -1,4 +1,4 @@ -description: commit changes with commit message in chinese. +description: 提交当前代码修改. hint: to close Issue #issue_number input: optional steps: