feat: Add ask_issue workflow for automated lint error fixing

- Implement ask_issue command with README, command.yml, and main.py - Create workflow to generate fix solutions using LLM - Add functionality to extract and process diagnostics from IDE
2024-07-18 16:00:24 +08:00 · 2024-07-18 16:00:24 +08:00 · 3980e6f996
commit 3980e6f996
parent f9fe89c5c0
3 changed files with 275 additions and 0 deletions
--- a/merico/ask_issue/README.md
+++ b/merico/ask_issue/README.md
@ -0,0 +1,6 @@
+### 操作指南
+
+ask_issue工作流命令使用步骤如下：
+1. 选中Sonar或者Lint错误提示对应行。
+2. 输入`/ask_issue`命令。
+3. 开始生成问题描述解释以及对应的解决办法，等待生成结束。
--- a/merico/ask_issue/command.yml
+++ b/merico/ask_issue/command.yml
@ -0,0 +1,4 @@
+description: Automatically fix lint errors.
+help: README.md
+steps:
+  - run: $devchat_python $command_path/main.py
--- a/merico/ask_issue/main.py
+++ b/merico/ask_issue/main.py
@ -0,0 +1,265 @@
+import os
+import re
+import sys
+
+from devchat.llm import chat
+from devchat.memory import FixSizeChatMemory
+
+from lib.ide_service import IDEService
+
+
+def extract_edits_block(text):
+    """
+    Extracts the first Markdown code block from the given text without the language specifier.
+
+    :param text: A string containing Markdown text
+    :return: The content of the first Markdown code block, or None if not found
+    """
+    index = text.find("```edits")
+    if index == -1:
+        return None
+    else:
+        start = index + len("```edits")
+        end = text.find("```", start)
+        if end == -1:
+            return None
+        else:
+            return text[start:end]
+
+def extract_markdown_block(text):
+    """
+    Extracts the first Markdown code block from the given text without the language specifier.
+
+    :param text: A string containing Markdown text
+    :return: The content of the first Markdown code block, or None if not found
+    """
+    edit_code = extract_edits_block(text)
+    if edit_code:
+        return edit_code
+
+    pattern = r"```(?:\w+)?\s*\n(.*?)\n```"
+    match = re.search(pattern, text, re.DOTALL)
+
+    if match:
+        block_content = match.group(1)
+        return block_content
+    else:
+        # whether exist ```language?
+        if text.find("```"):
+            return None
+        return text
+
+
+# step 1 : get selected code
+def get_selected_code():
+    selected_data = IDEService().get_selected_range().dict()
+
+    if selected_data["range"]["start"] == -1:
+        return None, None, None
+
+    if selected_data["range"]["start"]["line"] != selected_data["range"]["end"]["line"]:
+        print("Please select the line code of issue reported.\n\n", file=sys.stderr)
+        sys.exit(1)
+
+    return selected_data["abspath"], selected_data["text"], selected_data["range"]["start"]["line"]
+
+
+# step 2 : input issue descriptions
+def input_issue_descriptions(file_path, issue_line_num):
+    diagnostics = IDEService().get_diagnostics_in_range(file_path, issue_line_num, issue_line_num)
+    if not diagnostics:
+        return None
+
+    # select first sonarlint diagnostic
+    for diagnostic in diagnostics:
+        if diagnostic.find("<sonar") > 0:
+            return diagnostic
+    return diagnostics[0]
+
+
+# step 3 : call llm to generate fix solutions
+SYSTEM_ROLE_DIFF= """
+You are a code refactoring assistant. Your task is to refactor the user's code to fix lint diagnostics. You will be provided with a code snippet and a list of diagnostics. Your response should include two parts:
+
+1. An explanation of the reason for the diagnostics and how to fix them.
+2. The edited code snippet with the diagnostics fixed, using markdown format for clarity.
+
+The markdown block for edits should look like this:
+
+```edits
+def hello():
+    print("Call hello():")
+     print("hello")
+
+...
+
+- hello(20)
+ hello()
+```
+Or like this, if a variable is not defined:
+
+```edits
+...
+     cur_file = __file__
+    print(cur_file)
+```
+Please note the following important points:
+
+1. The new code should maintain the correct indentation. The "+ " sign is followed by two spaces for indentation, which should be included in the edited code.
+2. In addition to outputting key editing information, sufficient context (i.e., key information before and after editing) should also be provided to help locate the specific position of the edited line.
+3. Don't output all file lines, if some lines are unchanged, please use "..." to indicate the ignored lines.
+4. Use "+ " and "- " at start of the line to indicate the addition and deletion of lines.
+
+Here are some examples of incorrect responses:
+
+Incorrect example 1, where the indentation is not correct:
+
+```edits
+def hello():
+    print("Call hello():")
+   print("hello")
+```
+In this case, if the "+ " sign and the extra space are removed, the print("hello") statement will lack the necessary two spaces for correct indentation.
+
+Incorrect example 2, where no other code lines are provided:
+
+```edits
+ print("hello")
+```
+This is an incorrect example because without additional context, it's unclear where the new print("hello") statement should be inserted.
+"""
+
+SYSTEM_ROLE_CODEBLOCK = """
+你是一个重构工程师，你需要根据错误描述，对代码进行问题修正，只需要关注描述的问题，不需要关注代码中的其他问题。
+
+输出的修正代码中，如果修改了多个代码段，中间没有修改的代码段，请使用...表示。
+每一个被修改的代码段，应该包含前后至少3行未修改的代码，作为修改代码段的边界表示。
+
+输出一个代码块中，例如：
+```edits
+def hello():
+    msg = "hello"
+    print(msg)
+
+...
+
+if __name__ == "__main__":
+    hello()
+```
+"""
+
+
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-3.5-turbo-1106")
+if LLM_MODEL in ["qwen2-72b-instruct", "qwen-long", "qwen-turbo", "Yi-34B-Chat", "deepseek-coder", "xinghuo-3.5"]:
+    SYSTEM_ROLE = SYSTEM_ROLE_CODEBLOCK
+else:
+    SYSTEM_ROLE = SYSTEM_ROLE_DIFF
+MESSAGES_A = [
+    {
+        "role": "system",
+        "content": SYSTEM_ROLE,
+    },
+]
+
+# step 3 : call llm to generate fix solutions
+PROMPT = """
+
+Here is the code file:
+
+{file_content}
+
+There is an issue in the following code:
+
+{issue_line_code}
+
+{issue_description}
+
+Here is the rule description:
+
+{rule_description}
+
+Please focus only on the error described in the prompt. Other errors in the code should be disregarded.
+
+"""
+
+memory = FixSizeChatMemory(max_size=20, messages=MESSAGES_A)
+@chat(prompt=PROMPT, stream_out=True, memory=memory)
+def call_llm_to_generate_fix_solutions(
+    file_content, issue_line_code, issue_description, rule_description
+):
+    pass
+
+
+# current file content
+def get_current_file_content(file_path, issue_line_num):
+    try:
+        return IDEService().get_collapsed_code(file_path, issue_line_num, issue_line_num)
+    except Exception:
+        print("Error reading file:", file=sys.stderr)
+        return None
+
+
+# get issue description
+def get_rule_description(issue_description):
+    def parse_source_code(text):
+        pattern = r"<(\w+):(.+?)>"
+        match = re.search(pattern, text)
+
+        if match:
+            source = match.group(1)
+            code = match.group(2)
+            return source, code
+        else:
+            return None, None
+
+    issue_source, issue_code = parse_source_code(issue_description)
+    if issue_source.find("sonar") == -1:
+        return issue_description
+
+    issue_id = issue_code.split(":")[-1]
+    issue_language = issue_code.split(":")[0]
+
+    tools_path = IDEService().get_extension_tools_path()
+    rules_path = "sonar-rspec"
+
+    rule_path = os.path.join(tools_path, rules_path, "rules", issue_id, issue_language, "rule.adoc")
+    if os.path.exists(rule_path):
+        with open(rule_path, "r", encoding="utf-8") as file:
+            return file.read()
+    return issue_description
+
+
+def main():
+    print("start fix issue ...\n\n")
+    file_path, issue_line, issue_line_num = get_selected_code()
+    if not file_path or not issue_line:
+        print("No code selected. Please select the code line you want to fix.", file=sys.stderr)
+        sys.exit(1)
+    issue_description = input_issue_descriptions(file_path, issue_line_num)
+    if not issue_description:
+        print(
+            "There are no issues to resolve on the current line. "
+            "Please select the line where an issue needs to be resolved."
+        )
+        sys.exit(0)
+
+    print("make llm prompt ...\n\n")
+    current_file_content = get_current_file_content(file_path, issue_line_num)
+    rule_description = get_rule_description(issue_description)
+    #print("Rule description:\n\n", rule_description, end="\n\n")
+
+    print("call llm to fix issue ...\n\n")
+    fix_solutions = call_llm_to_generate_fix_solutions(
+        file_content=current_file_content,
+        issue_line_code=issue_line,
+        issue_description=issue_description,
+        rule_description=rule_description,
+    )
+    if not fix_solutions:
+        sys.exit(1)
+
+    print("\n\n", flush=True)
+
+
+if __name__ == "__main__":
+    main()