202 lines
5.8 KiB
Python
202 lines
5.8 KiB
Python
import os
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
|
|
def retrieve_file_content(file_path: str, root_path: str) -> str:
|
|
"""
|
|
Retrieve the content of a file given its relative or absolute path.
|
|
|
|
If a relative path is provided, it will be joined with the root_path to form an absolute path.
|
|
|
|
Args:
|
|
file_path (str): The relative or absolute path to the file.
|
|
root_path (str): The root directory path to be used if file_path is a relative path.
|
|
|
|
Returns:
|
|
str: The content of the file.
|
|
"""
|
|
if not os.path.isabs(file_path):
|
|
file_path = os.path.join(root_path, file_path)
|
|
|
|
with open(file_path, "r") as file:
|
|
content = file.read()
|
|
return content
|
|
|
|
|
|
def remove_duplicates(items: List[str]) -> List[str]:
|
|
"""
|
|
Remove duplicate items from a list while preserving the order.
|
|
"""
|
|
seen = set()
|
|
res = []
|
|
for i in items:
|
|
if i in seen:
|
|
continue
|
|
res.append(i)
|
|
seen.add(i)
|
|
return res
|
|
|
|
|
|
def check_file_exists(file_path: str, root_path: str) -> bool:
|
|
"""
|
|
Check if a file exists at the given path.
|
|
|
|
Args:
|
|
file_path (str): The relative or absolute path to the file.
|
|
root_path (str): The root directory path to be used if file_path is a relative path.
|
|
|
|
Returns:
|
|
bool: True if the file exists, False otherwise.
|
|
"""
|
|
if not os.path.isabs(file_path):
|
|
file_path = os.path.join(root_path, file_path)
|
|
return os.path.isfile(file_path)
|
|
|
|
|
|
def verify_file_list(file_list: List[str], root_path: str) -> List[str]:
|
|
"""
|
|
Sometimes the file list given by LLM may contain:
|
|
- duplicated files
|
|
- files that don't exist
|
|
|
|
This function prunes the file list to make it reliable.
|
|
"""
|
|
# Remove duplicates
|
|
file_list = remove_duplicates(file_list)
|
|
|
|
# Remove files that don't exist
|
|
file_list = [f for f in file_list if check_file_exists(f, root_path)]
|
|
|
|
return file_list
|
|
|
|
|
|
def resolve_relative_path(file: str, path: str) -> str:
|
|
"""Resolve a relative path based on the current file's path.
|
|
|
|
Args:
|
|
file (str): The path to the current file.
|
|
path (str): The path to resolve.
|
|
|
|
Returns:
|
|
str: The resolved path if the input path is relative, otherwise the original path.
|
|
"""
|
|
# Only resolve the path if it's relative
|
|
if path.startswith("./") or path.startswith("../"):
|
|
# Get the directory of the current file
|
|
file_dir = os.path.dirname(file)
|
|
|
|
# Join the directory with the relative path
|
|
resolved_path = os.path.join(file_dir, path)
|
|
|
|
# Normalize the path (resolve "..", ".", etc.)
|
|
resolved_path = os.path.normpath(resolved_path)
|
|
|
|
return resolved_path
|
|
|
|
# If the path is not relative, return it as is
|
|
return path
|
|
|
|
|
|
def is_not_hidden(relpath: Path) -> bool:
|
|
return not relpath.name.startswith(".")
|
|
|
|
|
|
def is_source_code(file_name: str, only_code=False) -> bool:
|
|
"""
|
|
Check if a given file is a source code file based on its extension.
|
|
|
|
Args:
|
|
file_name (str): The name of the file to check.
|
|
only_code (bool): if include md/yaml/json...
|
|
|
|
Returns:
|
|
bool: True if the file is a source code file, False otherwise.
|
|
"""
|
|
# List of meaningful source code file extensions
|
|
source_code_extensions = [
|
|
".py", # Python
|
|
".java", # Java
|
|
".c", # C
|
|
".cpp", # C++
|
|
".h", # C header
|
|
".hpp", # C++ header
|
|
".hh", # C++ header
|
|
".js", # JavaScript
|
|
".ts", # TypeScript
|
|
".go", # Go
|
|
".rs", # Rust
|
|
".rb", # Ruby
|
|
".cs", # C#
|
|
".m", # Objective-C
|
|
".swift", # Swift
|
|
".php", # PHP
|
|
".kt", # Kotlin
|
|
".scala", # Scala
|
|
".r", # R
|
|
".pl", # Perl
|
|
".lua", # Lua
|
|
".groovy", # Groovy
|
|
".dart", # Dart
|
|
".sh", # Bash
|
|
".bat", # Batch file
|
|
".ipynb", # Jupyter Notebook
|
|
]
|
|
if not only_code:
|
|
source_code_extensions.extend(
|
|
[
|
|
".md", # Markdown
|
|
".yaml", # YAML
|
|
".yml", # YAML
|
|
]
|
|
)
|
|
|
|
_, extension = os.path.splitext(file_name)
|
|
|
|
return extension in source_code_extensions
|
|
|
|
|
|
DEFAULT_TEST_REGS = [r"^(.+/)*[Tt]ests?/"] # C, C++, OBJC
|
|
TEST_PATH_PATTERNS: Dict[str, List[str]] = {
|
|
"C": DEFAULT_TEST_REGS,
|
|
"C++": DEFAULT_TEST_REGS,
|
|
"Objective-C": DEFAULT_TEST_REGS,
|
|
# Gradle https://docs.gradle.org/current/userguide/java_testing.html#sec:test_detection
|
|
"Java": [r"^(.+/)*src/test/.*Tests?.java$"],
|
|
# jest
|
|
"JavaScript": [r"(.+/)*(__[Tt]ests__/.*|((.*\.)?(test|spec)))\.[jt]sx?$"],
|
|
# pytest https://docs.pytest.org/en/stable/goodpractices.html#conventions-for-python-test-discovery
|
|
"Python": [r"(.*_test|.*/?test_[^/]*)\.py$"],
|
|
"Ruby": [r"^(.+/)*(spec/.*_spec.rb|test/.*_test.rb)$"],
|
|
# golang, from `go help test`
|
|
"Go": [r"^(.+/)*[^_\.][^/]*_test.go$"],
|
|
"PHP": [r"^(.+/)*[Tt]ests?/(.+/)*([^/]*[Tt]ests?\.php|[Ff]ixtures?/(.+/)*.+\.php)"],
|
|
"Kotlin": [r"^(.+/)*src/test/.*Tests?.kt$"],
|
|
"C#": [r"^(.+/)[^/]+[Tt]ests?.cs$"],
|
|
"Swift": [r"^(.+/)*[^/]*Tests?.swift"],
|
|
"Scala": [r"^(.+/)*src/test/.*(scala|sc)"],
|
|
"Dart": [r"^(.+/)*[Tt]ests?/(.+/)*[^/]*[Tt]ests?.dart"],
|
|
"Lua": [r"^(.+/)*(specs?/.*_spec|tests?/(.*_test|test_[^/]*))\.lua$"],
|
|
}
|
|
LANG_TEST_REGS: Dict[str, List] = {
|
|
k: [re.compile(r) for r in v] for k, v in TEST_PATH_PATTERNS.items()
|
|
}
|
|
|
|
|
|
def is_test_file(file_path: str) -> bool:
|
|
"""
|
|
Check if a given file is a test file based on its path.
|
|
|
|
Args:
|
|
file_path (str): The path to the file to check.
|
|
|
|
Returns:
|
|
bool: True if the file is a test file, False otherwise.
|
|
"""
|
|
for _, regs in LANG_TEST_REGS.items():
|
|
for reg in regs:
|
|
if reg.match(file_path):
|
|
return True
|
|
return False
|