146 lines
4.2 KiB
Python
146 lines
4.2 KiB
Python
import functools
|
|
import os
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from typing import Callable, Dict, List
|
|
|
|
import pathspec
|
|
|
|
# NOTE: git-ignore pattern examples
|
|
# https://www.atlassian.com/git/tutorials/saving-changes/gitignore#git-ignore-patterns
|
|
|
|
|
|
def load_gitignore_spec_from_file(
|
|
ignore_filepath: str,
|
|
) -> pathspec.GitIgnoreSpec:
|
|
"""
|
|
Create a path spec for match git ignore patterns from a given .gitignore file.
|
|
|
|
ignore_filepath: The absolute path to the .gitignore file.
|
|
"""
|
|
ignore_patterns = [".git/"] # Ignore .git directory first
|
|
|
|
if os.path.exists(ignore_filepath):
|
|
with open(ignore_filepath, "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith("#"):
|
|
ignore_patterns.append(line)
|
|
ignore_spec = pathspec.GitIgnoreSpec.from_lines(ignore_patterns)
|
|
|
|
return ignore_spec
|
|
|
|
|
|
def load_submodule_spec_from_file(
|
|
modules_filepath: str,
|
|
) -> pathspec.GitIgnoreSpec:
|
|
"""
|
|
Create a path spec to match submodule dirs & files from a given .gitmodules file.
|
|
|
|
modules_filepath: The absolute path to the .gitmodules file.
|
|
"""
|
|
submodule_dirs = []
|
|
|
|
if os.path.exists(modules_filepath):
|
|
with open(modules_filepath, "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line.startswith("path = "):
|
|
submodule_dirs.append(line[7:])
|
|
spec = pathspec.GitIgnoreSpec.from_lines(submodule_dirs)
|
|
return spec
|
|
|
|
|
|
def load_gitlfs_spec_from_file(
|
|
attributes_filepath: str,
|
|
) -> pathspec.GitIgnoreSpec:
|
|
"""
|
|
Create a path spec to match git-lfs files from a given .gitattributes file.
|
|
"""
|
|
lfs_patterns = []
|
|
if os.path.exists(attributes_filepath):
|
|
with open(attributes_filepath, "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
items = line.split()
|
|
if "filter=lfs" in items:
|
|
lfs_patterns.append(items[0])
|
|
|
|
spec = pathspec.GitIgnoreSpec.from_lines(lfs_patterns)
|
|
return spec
|
|
|
|
|
|
def _is_path_of_interest(relpath: Path, skip_specs: List[Dict]) -> bool:
|
|
"""
|
|
Check if the given relative path is of interest.
|
|
|
|
relpath: The relative path to the repo_root.
|
|
skip_specs: A list of path spec dict to skip.
|
|
key: The relative prefix of the path spec.
|
|
value: The path spec.
|
|
"""
|
|
skip = False
|
|
for spec_dict in skip_specs:
|
|
if skip:
|
|
break
|
|
|
|
for rel_prefix, spec in spec_dict.items():
|
|
prefix = "" if rel_prefix == "." else rel_prefix + "/"
|
|
prefix = Path(prefix)
|
|
subpath = None
|
|
try:
|
|
subpath = relpath.relative_to(prefix)
|
|
except ValueError:
|
|
pass
|
|
|
|
if subpath is None:
|
|
continue
|
|
|
|
if spec.match_file(str(subpath)):
|
|
skip = True
|
|
break
|
|
|
|
return not skip
|
|
|
|
|
|
def git_file_of_interest_filter(repo_path: str) -> Callable[[Path], bool]:
|
|
"""
|
|
Return a function which checks if a given relative path is of interest
|
|
based on the gitignore and submodule specifications in a git repo.
|
|
"""
|
|
repo_root = Path(repo_path)
|
|
|
|
# Load submodule spec
|
|
submodule_specs: Dict[str, List[pathspec.GitIgnoreSpec]] = {
|
|
".": load_submodule_spec_from_file(str(repo_root / ".gitmodules"))
|
|
}
|
|
|
|
# Load git-lfs spec
|
|
lfs_specs: Dict[str, List[pathspec.GitIgnoreSpec]] = {
|
|
".": load_gitlfs_spec_from_file(str(repo_root / ".gitattributes"))
|
|
}
|
|
|
|
# Load gitignore specs
|
|
ignore_specs = defaultdict(list)
|
|
|
|
# find all .gitignore files
|
|
ignore_files = []
|
|
for fp in repo_root.rglob(".gitignore"):
|
|
ignore_files.append(fp)
|
|
|
|
for ignore_file in ignore_files:
|
|
relative_prefix = str(ignore_file.parent.relative_to(repo_root))
|
|
|
|
ignore_specs[relative_prefix] = load_gitignore_spec_from_file(str(ignore_file))
|
|
|
|
is_git_interest = functools.partial(
|
|
_is_path_of_interest,
|
|
skip_specs=[
|
|
dict(ignore_specs),
|
|
submodule_specs,
|
|
lfs_specs,
|
|
],
|
|
)
|
|
|
|
return is_git_interest
|