2024-05-24 09:56:19 +08:00

202 lines
5.8 KiB
Python

import os
import re
from pathlib import Path
from typing import Dict, List
def retrieve_file_content(file_path: str, root_path: str) -> str:
"""
Retrieve the content of a file given its relative or absolute path.
If a relative path is provided, it will be joined with the root_path to form an absolute path.
Args:
file_path (str): The relative or absolute path to the file.
root_path (str): The root directory path to be used if file_path is a relative path.
Returns:
str: The content of the file.
"""
if not os.path.isabs(file_path):
file_path = os.path.join(root_path, file_path)
with open(file_path, "r") as file:
content = file.read()
return content
def remove_duplicates(items: List[str]) -> List[str]:
"""
Remove duplicate items from a list while preserving the order.
"""
seen = set()
res = []
for i in items:
if i in seen:
continue
res.append(i)
seen.add(i)
return res
def check_file_exists(file_path: str, root_path: str) -> bool:
"""
Check if a file exists at the given path.
Args:
file_path (str): The relative or absolute path to the file.
root_path (str): The root directory path to be used if file_path is a relative path.
Returns:
bool: True if the file exists, False otherwise.
"""
if not os.path.isabs(file_path):
file_path = os.path.join(root_path, file_path)
return os.path.isfile(file_path)
def verify_file_list(file_list: List[str], root_path: str) -> List[str]:
"""
Sometimes the file list given by LLM may contain:
- duplicated files
- files that don't exist
This function prunes the file list to make it reliable.
"""
# Remove duplicates
file_list = remove_duplicates(file_list)
# Remove files that don't exist
file_list = [f for f in file_list if check_file_exists(f, root_path)]
return file_list
def resolve_relative_path(file: str, path: str) -> str:
"""Resolve a relative path based on the current file's path.
Args:
file (str): The path to the current file.
path (str): The path to resolve.
Returns:
str: The resolved path if the input path is relative, otherwise the original path.
"""
# Only resolve the path if it's relative
if path.startswith("./") or path.startswith("../"):
# Get the directory of the current file
file_dir = os.path.dirname(file)
# Join the directory with the relative path
resolved_path = os.path.join(file_dir, path)
# Normalize the path (resolve "..", ".", etc.)
resolved_path = os.path.normpath(resolved_path)
return resolved_path
# If the path is not relative, return it as is
return path
def is_not_hidden(relpath: Path) -> bool:
return not relpath.name.startswith(".")
def is_source_code(file_name: str, only_code=False) -> bool:
"""
Check if a given file is a source code file based on its extension.
Args:
file_name (str): The name of the file to check.
only_code (bool): if include md/yaml/json...
Returns:
bool: True if the file is a source code file, False otherwise.
"""
# List of meaningful source code file extensions
source_code_extensions = [
".py", # Python
".java", # Java
".c", # C
".cpp", # C++
".h", # C header
".hpp", # C++ header
".hh", # C++ header
".js", # JavaScript
".ts", # TypeScript
".go", # Go
".rs", # Rust
".rb", # Ruby
".cs", # C#
".m", # Objective-C
".swift", # Swift
".php", # PHP
".kt", # Kotlin
".scala", # Scala
".r", # R
".pl", # Perl
".lua", # Lua
".groovy", # Groovy
".dart", # Dart
".sh", # Bash
".bat", # Batch file
".ipynb", # Jupyter Notebook
]
if not only_code:
source_code_extensions.extend(
[
".md", # Markdown
".yaml", # YAML
".yml", # YAML
]
)
_, extension = os.path.splitext(file_name)
return extension in source_code_extensions
DEFAULT_TEST_REGS = [r"^(.+/)*[Tt]ests?/"] # C, C++, OBJC
TEST_PATH_PATTERNS: Dict[str, List[str]] = {
"C": DEFAULT_TEST_REGS,
"C++": DEFAULT_TEST_REGS,
"Objective-C": DEFAULT_TEST_REGS,
# Gradle https://docs.gradle.org/current/userguide/java_testing.html#sec:test_detection
"Java": [r"^(.+/)*src/test/.*Tests?.java$"],
# jest
"JavaScript": [r"(.+/)*(__[Tt]ests__/.*|((.*\.)?(test|spec)))\.[jt]sx?$"],
# pytest https://docs.pytest.org/en/stable/goodpractices.html#conventions-for-python-test-discovery
"Python": [r"(.*_test|.*/?test_[^/]*)\.py$"],
"Ruby": [r"^(.+/)*(spec/.*_spec.rb|test/.*_test.rb)$"],
# golang, from `go help test`
"Go": [r"^(.+/)*[^_\.][^/]*_test.go$"],
"PHP": [r"^(.+/)*[Tt]ests?/(.+/)*([^/]*[Tt]ests?\.php|[Ff]ixtures?/(.+/)*.+\.php)"],
"Kotlin": [r"^(.+/)*src/test/.*Tests?.kt$"],
"C#": [r"^(.+/)[^/]+[Tt]ests?.cs$"],
"Swift": [r"^(.+/)*[^/]*Tests?.swift"],
"Scala": [r"^(.+/)*src/test/.*(scala|sc)"],
"Dart": [r"^(.+/)*[Tt]ests?/(.+/)*[^/]*[Tt]ests?.dart"],
"Lua": [r"^(.+/)*(specs?/.*_spec|tests?/(.*_test|test_[^/]*))\.lua$"],
}
LANG_TEST_REGS: Dict[str, List] = {
k: [re.compile(r) for r in v] for k, v in TEST_PATH_PATTERNS.items()
}
def is_test_file(file_path: str) -> bool:
"""
Check if a given file is a test file based on its path.
Args:
file_path (str): The path to the file to check.
Returns:
bool: True if the file is a test file, False otherwise.
"""
for _, regs in LANG_TEST_REGS.items():
for reg in regs:
if reg.match(file_path):
return True
return False