2024-05-09 22:18:51 +08:00

146 lines
4.2 KiB
Python

import functools
import os
from collections import defaultdict
from pathlib import Path
from typing import Callable, Dict, List
import pathspec
# NOTE: git-ignore pattern examples
# https://www.atlassian.com/git/tutorials/saving-changes/gitignore#git-ignore-patterns
def load_gitignore_spec_from_file(
ignore_filepath: str,
) -> pathspec.GitIgnoreSpec:
"""
Create a path spec for match git ignore patterns from a given .gitignore file.
ignore_filepath: The absolute path to the .gitignore file.
"""
ignore_patterns = [".git/"] # Ignore .git directory first
if os.path.exists(ignore_filepath):
with open(ignore_filepath, "r") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
ignore_patterns.append(line)
ignore_spec = pathspec.GitIgnoreSpec.from_lines(ignore_patterns)
return ignore_spec
def load_submodule_spec_from_file(
modules_filepath: str,
) -> pathspec.GitIgnoreSpec:
"""
Create a path spec to match submodule dirs & files from a given .gitmodules file.
modules_filepath: The absolute path to the .gitmodules file.
"""
submodule_dirs = []
if os.path.exists(modules_filepath):
with open(modules_filepath, "r") as f:
for line in f:
line = line.strip()
if line.startswith("path = "):
submodule_dirs.append(line[7:])
spec = pathspec.GitIgnoreSpec.from_lines(submodule_dirs)
return spec
def load_gitlfs_spec_from_file(
attributes_filepath: str,
) -> pathspec.GitIgnoreSpec:
"""
Create a path spec to match git-lfs files from a given .gitattributes file.
"""
lfs_patterns = []
if os.path.exists(attributes_filepath):
with open(attributes_filepath, "r") as f:
for line in f:
line = line.strip()
items = line.split()
if "filter=lfs" in items:
lfs_patterns.append(items[0])
spec = pathspec.GitIgnoreSpec.from_lines(lfs_patterns)
return spec
def _is_path_of_interest(relpath: Path, skip_specs: List[Dict]) -> bool:
"""
Check if the given relative path is of interest.
relpath: The relative path to the repo_root.
skip_specs: A list of path spec dict to skip.
key: The relative prefix of the path spec.
value: The path spec.
"""
skip = False
for spec_dict in skip_specs:
if skip:
break
for rel_prefix, spec in spec_dict.items():
prefix = "" if rel_prefix == "." else rel_prefix + "/"
prefix = Path(prefix)
subpath = None
try:
subpath = relpath.relative_to(prefix)
except ValueError:
pass
if subpath is None:
continue
if spec.match_file(str(subpath)):
skip = True
break
return not skip
def git_file_of_interest_filter(repo_path: str) -> Callable[[Path], bool]:
"""
Return a function which checks if a given relative path is of interest
based on the gitignore and submodule specifications in a git repo.
"""
repo_root = Path(repo_path)
# Load submodule spec
submodule_specs: Dict[str, List[pathspec.GitIgnoreSpec]] = {
".": load_submodule_spec_from_file(str(repo_root / ".gitmodules"))
}
# Load git-lfs spec
lfs_specs: Dict[str, List[pathspec.GitIgnoreSpec]] = {
".": load_gitlfs_spec_from_file(str(repo_root / ".gitattributes"))
}
# Load gitignore specs
ignore_specs = defaultdict(list)
# find all .gitignore files
ignore_files = []
for fp in repo_root.rglob(".gitignore"):
ignore_files.append(fp)
for ignore_file in ignore_files:
relative_prefix = str(ignore_file.parent.relative_to(repo_root))
ignore_specs[relative_prefix] = load_gitignore_spec_from_file(str(ignore_file))
is_git_interest = functools.partial(
_is_path_of_interest,
skip_specs=[
dict(ignore_specs),
submodule_specs,
lfs_specs,
],
)
return is_git_interest