workflows/merico/pr/custom_suggestions_config.py

import json
import os
import sys

from lib.chatmark import TextEditor


def read_custom_suggestions():
    config_path = os.path.join(os.path.expanduser("~/.chat"), ".workflow_config.json")
    if os.path.exists(config_path):
        with open(config_path, "r", encoding="utf-8") as f:
            config_data = json.load(f)
            if "custom_suggestions" in config_data:
                return config_data["custom_suggestions"]
    return ""


def save_custom_suggestions(custom_suggestions):
    config_path = os.path.join(os.path.expanduser("~/.chat"), ".workflow_config.json")

    config_data = {}
    if os.path.exists(config_path):
        with open(config_path, "r", encoding="utf-8") as f:
            config_data = json.load(f)

    config_data["custom_suggestions"] = custom_suggestions
    with open(config_path, "w+", encoding="utf-8") as f:
        json.dump(config_data, f, indent=4)


def read_custom_suggestions_with_input():
    custom_suggestions = read_custom_suggestions()
    if not custom_suggestions:
        # Input your github TOKEN to access github api:
        custom_suggestions_editor = TextEditor(
            "- make sure the code is efficient\n", "Please input your custom suggestions:"
        )
        custom_suggestions_editor.render()

        custom_suggestions = custom_suggestions_editor.new_text
        if not custom_suggestions:
            return custom_suggestions
        save_custom_suggestions(custom_suggestions)
    return custom_suggestions


def get_custom_suggestions_system_prompt():
    custom_suggestions = read_custom_suggestions_with_input()
    if not custom_suggestions:
        print("Command has been canceled.", flush=True)
        sys.exit(0)

    system_prompt = (
        """You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.


The format we will use to present the PR code diff:
======
## file: 'src/file1.py'

@@ ... @@ def func1():
__new hunk__
12  code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
14  code line3 that remained unchanged in the PR
__old hunk__
 code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
 code line3 that remained unchanged in the PR

@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...


## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.


Specific instructions for generating code suggestions:
- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes.
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
- Suggestions should not repeat code already present in the '__new hunk__' sections.
- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
- Take into account that you are reviewing a PR code diff, and that the entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase.


Instructions from the user, that should be taken into account with high priority:
"""
        + custom_suggestions
        + """


{%- if extra_instructions %}


Extra instructions from the user, that should be taken into account with high priority:
======
{{ extra_instructions }}
======
{%- endif %}


The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
    relevant_file: str = Field(description="the relevant file full path")
    language: str = Field(description="the code language of the relevant file")
    suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
    existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
    improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion.")
    one_sentence_summary:str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
    relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
    relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
    label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")

class PRCodeSuggestions(BaseModel):
    code_suggestions: List[CodeSuggestion]
=====


Example output:
```yaml
code_suggestions:
- relevant_file: |
    src/file1.py
  language: |
    python
  suggestion_content: |
    ...
  existing_code: |
    ...
  improved_code: |
    ...
  one_sentence_summary: |
    ...
  relevant_lines_start: 12
  relevant_lines_end: 13
  label: |
    ...
```


Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""
    )
    return system_prompt
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`import json`
Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00			`import os`
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`import sys`

			`from lib.chatmark import TextEditor`


			`def read_custom_suggestions():`
			`config_path = os.path.join(os.path.expanduser("~/.chat"), ".workflow_config.json")`
			`if os.path.exists(config_path):`
			`with open(config_path, "r", encoding="utf-8") as f:`
			`config_data = json.load(f)`
			`if "custom_suggestions" in config_data:`
			`return config_data["custom_suggestions"]`
			`return ""`

Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`def save_custom_suggestions(custom_suggestions):`
			`config_path = os.path.join(os.path.expanduser("~/.chat"), ".workflow_config.json")`

			`config_data = {}`
			`if os.path.exists(config_path):`
			`with open(config_path, "r", encoding="utf-8") as f:`
			`config_data = json.load(f)`
Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`config_data["custom_suggestions"] = custom_suggestions`
			`with open(config_path, "w+", encoding="utf-8") as f:`
			`json.dump(config_data, f, indent=4)`


			`def read_custom_suggestions_with_input():`
			`custom_suggestions = read_custom_suggestions()`
			`if not custom_suggestions:`
			`# Input your github TOKEN to access github api:`
			`custom_suggestions_editor = TextEditor(`
Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00			`"- make sure the code is efficient\n", "Please input your custom suggestions:"`
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`)`
			`custom_suggestions_editor.render()`

			`custom_suggestions = custom_suggestions_editor.new_text`
			`if not custom_suggestions:`
			`return custom_suggestions`
			`save_custom_suggestions(custom_suggestions)`
			`return custom_suggestions`


			`def get_custom_suggestions_system_prompt():`
			`custom_suggestions = read_custom_suggestions_with_input()`
			`if not custom_suggestions:`
			`print("Command has been canceled.", flush=True)`
			`sys.exit(0)`

Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00			`system_prompt = (`
			`"""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.`
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.`


			`The format we will use to present the PR code diff:`
			`======`
			`## file: 'src/file1.py'`

			`@@ ... @@ def func1():`
			`__new hunk__`
			`12 code line1 that remained unchanged in the PR`
			`13 +new hunk code line2 added in the PR`
			`14 code line3 that remained unchanged in the PR`
			`__old hunk__`
			`code line1 that remained unchanged in the PR`
			`-old hunk code line2 that was removed in the PR`
			`code line3 that remained unchanged in the PR`

			`@@ ... @@ def func2():`
			`__new hunk__`
			`...`
			`__old hunk__`
			`...`


			`## file: 'src/file2.py'`
			`...`
			`======`
			`- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.`
			`- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.`
			`- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.`


			`Specific instructions for generating code suggestions:`
			`- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.`
			`- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes.`
			`- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.`
			`- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.`
			`- Suggestions should not repeat code already present in the '__new hunk__' sections.`
			`- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.`
			- When quoting variables or names from the code, use backticks (`) instead of single quote (').
			`- Take into account that you are reviewing a PR code diff, and that the entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase.`



			`Instructions from the user, that should be taken into account with high priority:`
Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00			`"""`
			`+ custom_suggestions`
			`+ """`
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00

			`{%- if extra_instructions %}`


			`Extra instructions from the user, that should be taken into account with high priority:`
			`======`
			`{{ extra_instructions }}`
			`======`
			`{%- endif %}`


			`The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:`
			`=====`
			`class CodeSuggestion(BaseModel):`
			`relevant_file: str = Field(description="the relevant file full path")`
			`language: str = Field(description="the code language of the relevant file")`
			`suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")`
			`existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")`
			`improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion.")`
			`one_sentence_summary:str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")`
			`relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")`
			`relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")`
			`label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")`

			`class PRCodeSuggestions(BaseModel):`
			`code_suggestions: List[CodeSuggestion]`
			`=====`


			`Example output:`
			```yaml
			`code_suggestions:`
			`- relevant_file: \|`
			`src/file1.py`
			`language: \|`
			`python`
			`suggestion_content: \|`
			`...`
			`existing_code: \|`
			`...`
			`improved_code: \|`
			`...`
			`one_sentence_summary: \|`
			`...`
			`relevant_lines_start: 12`
			`relevant_lines_end: 13`
			`label: \|`
			`...`
			```


			`Each YAML output MUST be after a newline, indented, with block scalar indicator ('\|').`
			`"""`
Fix format issues in pr/ & github/ 2024-05-19 15:34:26 +08:00			`)`
Add github and pr workflow commands 2024-05-18 22:05:51 +08:00			`return system_prompt`