2024-05-09 22:18:51 +08:00

157 lines
4.2 KiB
Python

import os
from pathlib import Path
from typing import List
def retrieve_file_content(file_path: str, root_path: str) -> str:
"""
Retrieve the content of a file given its relative or absolute path.
If a relative path is provided, it will be joined with the root_path to form an absolute path.
Args:
file_path (str): The relative or absolute path to the file.
root_path (str): The root directory path to be used if file_path is a relative path.
Returns:
str: The content of the file.
"""
if not os.path.isabs(file_path):
file_path = os.path.join(root_path, file_path)
with open(file_path, "r") as file:
content = file.read()
return content
def remove_duplicates(items: List[str]) -> List[str]:
"""
Remove duplicate items from a list while preserving the order.
"""
seen = set()
res = []
for i in items:
if i in seen:
continue
res.append(i)
seen.add(i)
return res
def check_file_exists(file_path: str, root_path: str) -> bool:
"""
Check if a file exists at the given path.
Args:
file_path (str): The relative or absolute path to the file.
root_path (str): The root directory path to be used if file_path is a relative path.
Returns:
bool: True if the file exists, False otherwise.
"""
if not os.path.isabs(file_path):
file_path = os.path.join(root_path, file_path)
return os.path.isfile(file_path)
def verify_file_list(file_list: List[str], root_path: str) -> List[str]:
"""
Sometimes the file list given by LLM may contain:
- duplicated files
- files that don't exist
This function prunes the file list to make it reliable.
"""
# Remove duplicates
file_list = remove_duplicates(file_list)
# Remove files that don't exist
file_list = [f for f in file_list if check_file_exists(f, root_path)]
return file_list
def resolve_relative_path(file: str, path: str) -> str:
"""Resolve a relative path based on the current file's path.
Args:
file (str): The path to the current file.
path (str): The path to resolve.
Returns:
str: The resolved path if the input path is relative, otherwise the original path.
"""
# Only resolve the path if it's relative
if path.startswith("./") or path.startswith("../"):
# Get the directory of the current file
file_dir = os.path.dirname(file)
# Join the directory with the relative path
resolved_path = os.path.join(file_dir, path)
# Normalize the path (resolve "..", ".", etc.)
resolved_path = os.path.normpath(resolved_path)
return resolved_path
# If the path is not relative, return it as is
return path
def is_not_hidden(relpath: Path) -> bool:
return not relpath.name.startswith(".")
def is_source_code(file_name: str, only_code=False) -> bool:
"""
Check if a given file is a source code file based on its extension.
Args:
file_name (str): The name of the file to check.
only_code (bool): if include md/yaml/json...
Returns:
bool: True if the file is a source code file, False otherwise.
"""
# List of meaningful source code file extensions
source_code_extensions = [
".py", # Python
".java", # Java
".c", # C
".cpp", # C++
".h", # C header
".hpp", # C++ header
".hh", # C++ header
".js", # JavaScript
".ts", # TypeScript
".go", # Go
".rs", # Rust
".rb", # Ruby
".cs", # C#
".m", # Objective-C
".swift", # Swift
".php", # PHP
".kt", # Kotlin
".scala", # Scala
".r", # R
".pl", # Perl
".lua", # Lua
".groovy", # Groovy
".dart", # Dart
".sh", # Bash
".bat", # Batch file
".ipynb", # Jupyter Notebook
]
if not only_code:
source_code_extensions.extend(
[
".md", # Markdown
".yaml", # YAML
".yml", # YAML
]
)
_, extension = os.path.splitext(file_name)
return extension in source_code_extensions