update commit command

2023-12-08 08:14:34 +08:00 · 2023-12-08 08:14:34 +08:00 · 30de2d8c62
commit 30de2d8c62
parent 11f951f3b4
5 changed files with 236 additions and 205 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,4 @@
 .vscode
+__pycache__/
+.DS_Store
+
--- a/commit/command.yml
+++ b/commit/command.yml
@ -1,5 +1,5 @@
-description: commit changes.
-hint: to close Issue ...
+description: commit changes with commit message in english.
+hint: to close Issue #issue_number
 input: optional
 steps:
-  - run: $command_python $command_path/commit.py "$input"
+  - run: $command_python $command_path/commit.py "$input" "english"
--- a/commit/commit.py
+++ b/commit/commit.py
@ -1,26 +1,8 @@
-"""
-commit.py: 通过几个步骤完成提交。
-
-具体步骤包含：
-1. 获取当前修改文件列表；
-2. 获取用户选中的修改文件；
-    a. 标记出已经staged的文件；
-    b. 获取用户选中的文件；
-    c. 根据用户选中文件，重新构建stage列表；
-3. 获取用户选中修改文件的Diff信息；
-4. 生成提交信息；
-5. 展示提交信息并提交。
-
-注意： 步骤2.c, 步骤5有专门的函数实现，本脚本中不需要具体这两个步骤的实现。
-"""
-
 import os
 import sys
-import time
-import re
 import json
 import subprocess
-import openai
+from typing import List

 from prompts import \
    PROMPT_SUMMARY_FOR_FILES, \
@ -35,103 +17,19 @@ from prompts import \
    prompt_commit_message_by_summary_user_input_llm_config
    

+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'libs'))
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..' , 'libs'))

-def output_message(output):
-    out_data = f"""\n{output}\n"""
-    print(out_data, flush=True)
-
-def parse_response_from_ui(response):
-    # resonse text like this:
-    """
-    ``` some_name
-    some key name 1: value1
-    some key name 2: value2
-    ```
-    """
-    # parse key values
-    lines = response.strip().split("\n")
-    if len(lines) <= 2:
-        return {}
-    
-    import ymal
-    data = yaml.safe_load(lines[1:-1])
-    return data
-
-    
-def pipe_interaction_mock(output: str):
-    output_message(output)
-    # read response.txt in same dir with current script file
-    response_file = os.path.join(os.path.dirname(__file__), 'response.txt')
-    
-    # clear content in response_file
-    with open(response_file, 'w+', encoding="utf8"):
-        pass
-
-    while True:
-        if os.path.exists(response_file):
-            with open(response_file, encoding="utf8") as f:
-                response = f.read()
-                if response.strip().endswith("```"):
-                    break
-        time.sleep(1)
-    return parse_response_from_ui(response)
+from ui_utils import ui_checkbox_select, ui_text_edit, CheckboxOption
+from llm_api import chat_completion_no_stream, chat_completion_no_stream_return_json


-def pipe_interaction(output: str):
-    output_message(output)
+language = ""

-    lines = []
-    while True:
-        try:
-            line = input()
-            if line.strip().startswith('``` '):
-                lines = []
-            elif line.strip().startswith('```'):
-                lines.append(line)
-                break
-            lines.append(line)
-        except EOFError:
-            pass
-
-    replay_message = '\n'.join(lines)
-    return parse_response_from_ui(replay_message)
-
-
-def call_gpt_with_config(messages, llm_config) -> str:
-    connection_error = ''
-    for _1 in range(3):
-        try:
-            response = openai.ChatCompletion.create(
-                messages=messages,
-                **llm_config,
-                stream=False
-            )
-            
-            response_dict = json.loads(str(response))
-            respose_message = response_dict["choices"][0]["message"]
-            return respose_message
-        except ConnectionError as err:
-            connection_error = err
-            continue
-        except Exception as err:
-            print("Exception:", err, file=sys.stderr, flush=True)
-            return None
-    print("Connect Error:", connection_error, file=sys.stderr, flush=True)
-    return None
-
-def call_gpt_with_config_and_ensure_json(messages, llm_config):
-    for _1 in range(3):
-        response = call_gpt_with_config(messages, llm_config)
-        if response is None:
-            sys.exit(-1)
-
-        try:
-            response_obj = json.loads(response["content"])
-            return response_obj
-        except Exception:
-            continue
-    print("Not valid json response:", response["content"], file=sys.stderr, flush=True)
-    sys.exit(-1)
+def assert_value(value, message):
+    if value:
+        print(message, file=sys.stderr, flush=True)
+        sys.exit(-1)


 def get_modified_files():
@ -159,15 +57,18 @@ def get_modified_files():
                staged_files.append(strip_file_name(filename))
    return modified_files, staged_files

-def gpt_file_summary(diff, diff_files):
-    prompt = PROMPT_SUMMARY_FOR_FILES.replace("{__DIFF__}", f"{diff}")
-    messages = [{"role": "user", "content": prompt}]
+def gpt_file_summary(diff, diff_files, user_input):
+    global language
+    prompt = PROMPT_SUMMARY_FOR_FILES.replace("{__DIFF__}", f"{diff}").replace("{__USER_INPUT__}", f"{user_input}")
+    messages = [{"role": "user", "content": prompt + (" \nPlease response summaries in chinese" if language == "chinese" else "")}]
    normpath_summaries = {}
    
    retry_times = 0
    while retry_times < 3:
        retry_times += 1
-        file_summaries = call_gpt_with_config_and_ensure_json(messages, prompt_summary_for_files_llm_config)
+        file_summaries = chat_completion_no_stream_return_json(messages, prompt_summary_for_files_llm_config)
+        if not file_summaries:
+            continue
        for key, value in file_summaries.items():
            normpath_summaries[os.path.normpath(key)] = value
        
@ -175,7 +76,7 @@ def gpt_file_summary(diff, diff_files):
        if len(missed_files) > 0:
            prompt_retry = PROMPT_SUMMARY_FOR_FILES_RETRY.replace("{__MISSED_FILES__}", f"{missed_files}")
            messages.append({"role": "assistant", "content": json.dumps(file_summaries)})
-            messages.append({"role": "user", "content": prompt_retry})
+            messages.append({"role": "user", "content": prompt_retry + (" \nPlease response summaries in chinese" if language == "chinese" else "")})
        else:
            break
    
@ -190,7 +91,11 @@ def gpt_file_group(diff, diff_files):
    retry_times = 0
    while retry_times < 3:
        retry_times += 1
-        file_groups = call_gpt_with_config_and_ensure_json(messages, prompt_group_files_llm_config)
+        file_groups = chat_completion_no_stream_return_json(messages, prompt_group_files_llm_config)
+        if not file_groups:
+            continue
+        if 'groups' in file_groups:
+            file_groups = file_groups["groups"]
        grouped_files = []
        for group in file_groups:
            grouped_files.extend(group["files"])
@ -206,7 +111,33 @@ def gpt_file_group(diff, diff_files):
    return file_groups


-def get_file_summary(modified_files, staged_files):
+def get_file_summaries(modified_files, staged_files, user_input):
+    diffs = []
+    for file in modified_files:
+        if file not in staged_files:
+            subprocess.check_output(["git", "add", file])
+        diff = subprocess.check_output(["git", "diff", "--cached", file])
+        if file not in staged_files:
+            subprocess.check_output(["git", "reset", file])
+        diffs.append(diff.decode('utf-8'))
+    # total_diff = subprocess.check_output(["git", "diff", "HEAD"])
+    total_diff_decoded = '\n'.join(diffs) #  total_diff.decode('utf-8')
+    
+    if len(total_diff_decoded) > 15000:
+        print("Current diff length:", len(total_diff_decoded), flush=True)
+        return {}, []
+
+    # 在prompt中明确处置AI模型的输出格式需求
+    normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files, user_input)
+    print(f"""
+``` file summary
+{json.dumps(normpath_summaries, indent=4)}
+```
+    """)
+
+    return normpath_summaries
+
+def get_file_summaries_and_groups(modified_files, staged_files, user_input):
    """ 当modified_files文件列表<=5时，根据项目修改差异生成每一个文件的修改总结 """
    diffs = []
    for file in modified_files:
@ -221,10 +152,10 @@ def get_file_summary(modified_files, staged_files):
    
    if len(total_diff_decoded) > 15000:
        print("Current diff length:", len(total_diff_decoded), flush=True)
-        return {}
+        return {}, []

    # 在prompt中明确处置AI模型的输出格式需求
-    normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files)
+    normpath_summaries = gpt_file_summary(total_diff_decoded, modified_files, user_input)
    print(f"""
 ``` file summary
 {json.dumps(normpath_summaries, indent=4)}
@ -239,33 +170,20 @@ def get_file_summary(modified_files, staged_files):
 ```
    """)

-    return normpath_summaries
+    return normpath_summaries, file_groups


-def get_marked_files(modified_files, staged_files, file_summaries):
+def get_marked_files(modified_files, staged_files, file_summaries, file_groups=None):
    """ 获取用户选中的修改文件及已经staged的文件"""
    # Coordinate with user interface to let user select files.
    # assuming user_files is a list of filenames selected by user.
-    out_str = "```chatmark\n"
-    out_str += "Staged:\n"
-    for file in staged_files:
-        out_str += f"- [x] {file} {file_summaries.get(file, '')}\n"
-    out_str += "Unstaged:\n"
-    for file in modified_files:
-        if file in staged_files:
-            continue
-        out_str += f"- [] {file} {file_summaries.get(file, '')}\n"
-    out_str += "```"
-    
-    output_message(out_str)
-    return [file for file in modified_files if file_summaries.get(file, None)]
-    replay_object = pipe_interaction_mock(out_str)
-    
-    select_files = []
-    for key, value in replay_object.items():
-        if key in modified_files and value == "true":
-            select_files.append(key)
-    return select_files
+    # commit_files = [] if len(file_groups) == 0 else sorted(file_groups, key=lambda obj: obj['importance_level'])[0]['files']
+    options : List[CheckboxOption] = []
+    options += [CheckboxOption(file, file + " - " + file_summaries.get(file, ''), "Staged", True) for file in staged_files]
+    options += [CheckboxOption(file, file + " - " + file_summaries.get(file, ''), "Unstaged", False) for file in modified_files if file not in staged_files]
+
+    selected_files = ui_checkbox_select("Select files to commit", options)
+    return selected_files


 def rebuild_stage_list(user_files):
@ -283,56 +201,68 @@ def get_diff():

 def generate_commit_message_base_diff(user_input, diff):
    """ Based on the diff information, generate a commit message through AI """
+    global language
+    language_prompt = "You must response commit message in chinese。\n" if language == "chinese" else ""
    prompt = PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT.replace(
        "{__DIFF__}", f"{diff}"
    ).replace(
-        "{__USER_INPUT__}", f"{user_input}"
+        "{__USER_INPUT__}", f"{user_input + language_prompt}"
    )
    messages = [{"role": "user", "content": prompt}]
-    response = call_gpt_with_config(messages, prompt_commit_message_by_diff_user_input_llm_config)
+    response = chat_completion_no_stream(messages, prompt_commit_message_by_diff_user_input_llm_config)
+    assert_value(not response, "")
    return response


 def generate_commit_message_base_file_summaries(user_input, file_summaries):
    """ Based on the file_summaries, generate a commit message through AI """
+    global language
+    language_prompt = "Please response commit message in chinese.\n" if language == "chinese" else ""
    prompt = PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT.replace(
        "{__USER_INPUT__}", f"{user_input}"
    ).replace(
        "{__FILE_SUMMARY__}", f"{json.dumps(file_summaries, indent=4)}"
    )
    # Call AI model to generate commit message
-    messages = [{"role": "user", "content": prompt}]
-    response = call_gpt_with_config(messages, prompt_commit_message_by_summary_user_input_llm_config)
+    messages = [{"role": "user", "content": language_prompt + prompt}]
+    response = chat_completion_no_stream(messages, prompt_commit_message_by_summary_user_input_llm_config)
+    assert_value(not response, "")
    return response


 def display_commit_message_and_commit(commit_message):
    """ 展示提交信息并提交 """
-    commit_message_with_flag = f"""
-```editor
-{commit_message}
-```
-    """
-    replay_object = pipe_interaction_mock(commit_message_with_flag)
-    new_commit_message, commit = replay_object["commit_message"], replay_object["commit"]
-
-    if commit == "true":
-        subprocess.check_output(["git", "commit", "-m", new_commit_message])
+    new_commit_message = ui_text_edit("Edit commit meesage", commit_message)
+    if not new_commit_message:
+        return
+    subprocess.check_output(["git", "commit", "-m", new_commit_message])


 def main():
+    global language
    try:
        user_input = sys.argv[1]
+        language = sys.argv[2]

        modified_files, staged_files = get_modified_files()
-        file_summaries = get_file_summary(modified_files, staged_files)
+        file_summaries = get_file_summaries(modified_files, staged_files, user_input)
+        # file_summaries, file_groups = get_file_summaries_and_groups(modified_files, staged_files, user_input)
        selected_files = get_marked_files(modified_files, staged_files, file_summaries)
+        if len(selected_files) == 0:
+            print("No files selected, commit aborted.")
+            return
        rebuild_stage_list(selected_files)
-        diff = get_diff()
-        commit_message = generate_commit_message_base_diff(user_input, diff)
-        commit_message2 = generate_commit_message_base_file_summaries(user_input, file_summaries)
-        display_commit_message_and_commit(commit_message2["content"] + "\n\n\n" + commit_message["content"])
-        output_message("""\n```progress\n\nDone\n\n```""")
+        
+        summaries_for_select_files = {file: file_summaries[file] for file in selected_files if file in file_summaries}
+        if len(summaries_for_select_files.keys()) < len(selected_files):
+            diff = get_diff()
+            commit_message = generate_commit_message_base_diff(user_input, diff)
+        else:
+            commit_message = generate_commit_message_base_file_summaries(user_input, summaries_for_select_files)
+            
+        # display_commit_message_and_commit(commit_message2["content"] + "\n\n\n" + commit_message["content"])
+        display_commit_message_and_commit(commit_message["content"])
+        print("""\n```progress\n\nDone\n\n```""")
        sys.exit(0)
    except Exception as err:
        print("Exception:", err, file=sys.stderr, flush=True)
--- a/commit/prompts.py
+++ b/commit/prompts.py
@ -3,13 +3,40 @@
 # summary changes for files based diff
 # diff => {__DIFF__}
 PROMPT_SUMMARY_FOR_FILES = """ 
-I have made the following changes: 
-```{__DIFF__}```
-Please provide a summary for each modified file. The output should ONLY be a JSON format like: 
-{"file1": "Summary of the changes made in file1", 
-"file2": "Summary of the changes made in file2"}
-Key is real filename which is exist in diff changes.
-Please make sure there is no other additional output.
+Objective: **Create concise summaries for each modified file based on the provided diff changes and any additional user input.**
+
+**Instructions:**
+1. Review the diff changes and user input to understand the context and content of the modifications.
+2. Write a summary for each file that has been modified, capturing the essence of the changes.
+3. Use the filename from the diff as the key, and the summary as the value in the output JSON object.
+
+**Response Format:**
+```json
+{
+  \"filename1\": \"Summary of the changes made in filename1\",
+  \"filename2\": \"Summary of the changes made in filename2\",
+  ...
+}
+```
+
+**Constraints:**
+- Ensure that the summaries are accurate and reflect the changes made.
+- The response must be in JSON format, with filenames as keys and summaries as values.
+- Do not include any additional text or output outside of the JSON format.
+- The keys in the JSON object should correspond to real filenames present in the diff changes.
+
+**User Input:**
+```
+{__USER_INPUT__}
+```
+
+**Diff Changes:**
+```
+{__DIFF__}
+```
+---
+
+Based on the provided diff changes and any additional user input, please generate a JSON object containing summaries for each modified file.
 """
 prompt_summary_for_files_llm_config = {
 	"model": "gpt-3.5-turbo-16k"
@ -24,16 +51,44 @@ The following files are missed in your summary:
 # group changes for files based diff
 # diff => {__DIFF__}
 PROMPT_GROUP_FILES = """
-Please group the following modified files based on their modification relevance:
+Objective: **Categorize the modified files from a diff into groups based on their relevance to each other, and assign an importance level to each group. Limit the number of groups to a maximum of three.**
+
+**Instructions:**
+1. **Analysis:** Review the diff content to discern related changes. Group files that are part of the same logical change, ensuring that the code will compile and run correctly post-commit.
+2. **Atomic Grouping:** Aim for the smallest possible groups. Each should represent a single, cohesive modification for clarity and independent comprehension. Do not exceed three groups in total.
+3. **Importance Level:** Rate each group's importance on a scale of 1 to 10, with 1 being the most critical. Consider the impact on functionality, urgency of fixes, and feature significance.
+
+**Response Format:**
+- Use JSON format for your response.
+- Include all files from the diff content.
+- Structure the JSON as shown in the example below.
+
+**Example Output:**
+```json
+{
+    "groups": [
+		{\"files\": [\"fileA\", \"fileB\"], \"group\": \"Feature Improvement\", \"importance_level\": 5},
+		{\"files\": [\"fileC\"], \"group\": \"Bug Fix\", \"importance_level\": 1},
+		{\"files\": [\"fileD\", \"fileE\"], \"group\": \"Code Refactoring\", \"importance_level\": 3}
+	]
+}
+```
+
+**Constraints:**
+- Ensure the JSON output is valid and contains no additional text or characters.
+- Each group must be self-contained, with no cross-group dependencies.
+- The importance level should accurately reflect the priority for committing the changes.
+- The total number of groups must not exceed three.
+- Follows the JSON structure shown in the example above.
+
+**Diff Content:**
+```
 {__DIFF__}
-The output should be a JSON format like:
-[
-	{"files": ["file1", "file2"], "group": "Feature Improvement", "important_level": 1},
-	{"files": ["file3"], "group": "Bug Fix", "important_level": 2}
-]
-Please make sure there is no other additional output.
-Please make sure every file in diff is grouped.
-important_level is an integer from 1 to 10, 1 means the most important, 10 means the least important.
+```
+
+---
+
+Based on the provided diff content, group the files accordingly and assign an appropriate importance level to each group, following the instructions and constraints.
 """
 prompt_group_files_llm_config = {
 	"model": "gpt-3.5-turbo-16k"
@ -51,21 +106,40 @@ Please re-group the files again, don't miss any file.
 # diff => {__DIFF__}
 # user_input => {__USER_INPUT__}
 PROMPT_COMMIT_MESSAGE_BY_DIFF_USER_INPUT = """
-I have made the following changes to the code:
+Objective:** Create a commit message that concisely summarizes the changes made to the codebase as reflected by the provided diff. The commit message should also take into account any additional context or instructions given by the user.
+
+**Commit Message Structure:**
+1. **Title Line:** Start with a type from the following options: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, etc. Follow the type with a concise title. Format: `type: Title`. Only one title line is allowed.
+2. **Summary:** Provide a summary of all changes in no more than three detailed message lines. Each line should be prefixed with a \"-\".
+3. **Closing Reference (Optional):** If applicable, include a closing reference line in the format `Closes #IssueNumber`. Only include this if you know the exact issue number.
+
+**Response Format:**
+```
+type: Title
+
+ Detail message line 1
+ Detail message line 2
+ Detail message line 3
+
+Closes #IssueNumber
+```
+
+**Constraints:**
+- Do not include markdown block flags (```) or the placeholder text \"commit_message\" in your response.
+- Adhere to best practices for commit messages:
+  - Keep the title under 50 characters.
+  - Keep each summary line under 72 characters.
+- If the exact issue number is unknown, omit the closing reference line.
+
+**User Input:** `{__USER_INPUT__}`
+
+**Code Changes:**
 ```
 {__DIFF__}
 ```
-Please help me generate a commit message. {__USER_INPUT__}. If you don't know exact closed issue number, please don't output "Closes #Issue_number" line. The format is as follows:
-```
-feat: commit message title
+---

-Commit message body:
- Detailed message 1.
- Detailed message 2.
-
-Closes #Issue_number
-```
-Adhere to best practices. Keep the title under 50 characters and each body line under 72 characters.
+Please use the above structure to generate a commit message that meets the specified criteria.
 """
 prompt_commit_message_by_diff_user_input_llm_config = {
 	"model": "gpt-3.5-turbo-16k"
@ -75,23 +149,42 @@ prompt_commit_message_by_diff_user_input_llm_config = {
 # file_summary => {__FILE_SUMMARY__}
 # user_input => {__USER_INPUT__}
 PROMPT_COMMIT_MESSAGE_BY_SUMMARY_USER_INPUT = """
-Here are the file summaries generated by AI:
+Objective:** Generate a commit message that accurately reflects the changes made to the codebase, as summarized by the AI-generated file summary and any additional user input.
+
+**Commit Message Structure:**
+1. **Title Line:** Begin with a type from the following options: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, etc. The title should be concise and informative. Format: `type: Title`. Only one title line is allowed.
+2. **Summary:** Condense the changes into 1-3 detailed message lines, each beginning with a \"-\".
+3. **Closing Reference (Optional):** If known, include a closing reference in the format `Closes #IssueNumber`. If the exact issue number is unknown, omit this line.
+
+**Response Format:**
+```
+type: Title
+
+ Detail message line 1
+ Detail message line 2
+ Detail message line 3
+
+Closes #IssueNumber
+```
+
+**Constraints:**
+- Exclude markdown code block flags (```) and the placeholder \"commit_message\" from your response.
+- Follow commit message best practices:
+  - Title line should be under 50 characters.
+  - Each summary line should be under 72 characters.
+- If the issue number is not provided, do not include the closing reference line.
+
+**User Input:** `{__USER_INPUT__}`
+
+**File Summary:**
 ```
 {__FILE_SUMMARY__}
 ```

-Please help me generate a commit message. {__USER_INPUT__}. If you don't know exact closed issue number, please don't output "Closes #Issue_number" line. The format is as follows:
-```
-feat: commit message title
+---

-Commit message body:
- Detailed message 1.
- Detailed message 2.
-
-Closes #Issue_number
-```
-Adhere to best practices. Keep the title under 50 characters and each body line under 72 characters.
+Please create a commit message following the above guidelines based on the provided file summary and user input.
 """
 prompt_commit_message_by_summary_user_input_llm_config = {
-	"model": "gpt-3.5-turbo-16k"
+	"model": "gpt-4-1106-preview"
 }
--- a/commit/zh/command.yml
+++ b/commit/zh/command.yml
@ -0,0 +1,5 @@
+description: commit changes with commit message in chinese.
+hint: to close Issue #issue_number
+input: optional
+steps:
+  - run: $command_python $command_path/../commit.py "$input" "chinese"