git-lfs-auto
No description available
Code
python
#!/usr/bin/env python3
"""
@title git-lfs-auto
@description Scan a directory for large files and register them with Git LFS.
@author ropean, Claude Sonnet (Anthropic)
@version 1.0.0
For every file exceeding SIZE_THRESHOLD_MB the script:
1. Ensures git is initialised in the directory (or its repo root).
2. Ensures `git lfs install` has been run.
3. Runs `git lfs track` for each new large-file extension pattern
(or exact filename when the file has no extension).
4. Stages the resulting .gitattributes change.
Supports Windows absolute paths, relative paths, and WSL UNC paths
(\\\\wsl.localhost\\<distro>\\... or \\\\wsl$\\<distro>\\...).
@example
python git-lfs-auto.py "D:\\Projects\\my-repo"
python git-lfs-auto.py "D:\\Projects\\my-repo" --dry-run
python git-lfs-auto.py "D:\\Projects\\my-repo" --threshold 10
python git-lfs-auto.py "\\\\wsl.localhost\\Ubuntu\\home\\user\\repo"
python git-lfs-auto.py .
"""
from __future__ import annotations
# ════════════════════════════════════════════════════════════
# USER CONFIG
# ════════════════════════════════════════════════════════════
SIZE_THRESHOLD_MB: float = 4.0 # Files larger than this are tracked by LFS
_SKIP_DIRS: set[str] = {".git", "node_modules", "__pycache__", ".venv", "venv"}
# ════════════════════════════════════════════════════════════
# END OF USER CONFIG
# ════════════════════════════════════════════════════════════
import argparse
import os
import subprocess
import sys
from pathlib import Path
# ── Terminal colors ───────────────────────────────────────
def _supports_color() -> bool:
term = os.environ.get("TERM", "")
if os.environ.get("FORCE_COLOR") or os.environ.get("COLORTERM"):
return True
if term and term != "dumb":
return True
if os.name == "nt" and (os.environ.get("WT_SESSION") or os.environ.get("TERM_PROGRAM")):
return True
return sys.stdout.isatty()
if _supports_color():
_C = {
"red": "\033[31m",
"yellow": "\033[33m",
"green": "\033[32m",
"cyan": "\033[36m",
"bold": "\033[1m",
"dim": "\033[2m",
"reset": "\033[0m",
}
else:
_C = {k: "" for k in ("red", "yellow", "green", "cyan", "bold", "dim", "reset")}
def c(color: str, text: str) -> str:
return f"{_C[color]}{text}{_C['reset']}"
# ── Path helpers ──────────────────────────────────────────
def _wsl_path(p: Path) -> tuple[str, str] | None:
"""If p is a WSL UNC path, return (distro, linux_path); else None."""
s = str(p)
for prefix in ("\\\\wsl.localhost\\", "\\\\wsl$\\"):
if s.startswith(prefix):
rest = s[len(prefix):] # "Ubuntu\home\user\repo"
parts = rest.split("\\", 1)
distro = parts[0]
linux_path = ("/" + parts[1].replace("\\", "/")) if len(parts) > 1 else "/"
return distro, linux_path
return None
def resolve_dir(arg: str) -> Path:
p = Path(arg).expanduser()
# Skip resolve() for UNC paths (\\server\share) — it corrupts them on Windows
if not str(p).startswith("\\\\"):
p = p.resolve()
if not p.is_dir():
sys.exit(c("red", f"[error] Not a directory: {p}"))
return p
# ── Git helpers ───────────────────────────────────────────
def _run(cmd: list[str], cwd: Path) -> tuple[int, str, str]:
wsl = _wsl_path(cwd)
if wsl:
distro, linux_path = wsl
actual_cmd = ["wsl", "-d", distro, "git", "-C", linux_path] + cmd[1:]
result = subprocess.run(
actual_cmd,
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
elif str(cwd).startswith("\\\\"):
cmd = ["git", "-c", "safe.directory=*"] + cmd[1:]
result = subprocess.run(
cmd, cwd=cwd,
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
else:
result = subprocess.run(
cmd, cwd=cwd,
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
return result.returncode, result.stdout.strip(), result.stderr.strip()
def find_repo_root(start: Path) -> Path | None:
"""Walk up from start to find the directory containing .git."""
check = start
while True:
if (check / ".git").exists():
return check
parent = check.parent
if parent == check:
return None
check = parent
def git_init(repo_dir: Path, dry_run: bool) -> bool:
print(c("yellow", f" Initialising git repo in: {repo_dir}"))
if dry_run:
print(c("dim", " [dry-run] skipped"))
return True
rc, out, err = _run(["git", "init"], repo_dir)
if rc != 0:
print(c("red", f" [error] git init failed: {err}"))
return False
print(c("green", f" {out or 'Initialized'}"))
return True
def ensure_lfs_installed(repo_dir: Path, dry_run: bool) -> bool:
"""Run `git lfs install` if lfs hooks are not already active."""
# filter.lfs.required is set by `git lfs install` — its presence means LFS
# hooks are wired up. `git lfs status` alone is not a reliable check because
# it returns 0 whenever git-lfs is on PATH, even before `git lfs install`.
rc, _, _ = _run(["git", "config", "--get", "filter.lfs.required"], repo_dir)
if rc == 0:
return True # already installed
print(c("yellow", " Installing git-lfs hooks..."))
if dry_run:
print(c("dim", " [dry-run] skipped"))
return True
rc, out, err = _run(["git", "lfs", "install"], repo_dir)
if rc != 0:
print(c("red", f" [error] git lfs install failed: {err}"))
print(c("yellow", " Ensure git-lfs is installed: https://git-lfs.com"))
return False
print(c("green", f" git lfs install: {out or 'done'}"))
return True
def get_already_tracked(repo_root: Path) -> set[str]:
"""Return the set of patterns already tracked by LFS in .gitattributes."""
attrs = repo_root / ".gitattributes"
tracked: set[str] = set()
if not attrs.exists():
return tracked
try:
for line in attrs.read_text(encoding="utf-8", errors="replace").splitlines():
line = line.strip()
if "filter=lfs" in line:
tracked.add(line.split()[0])
except OSError:
pass
return tracked
def lfs_track(pattern: str, repo_root: Path, dry_run: bool) -> bool:
if dry_run:
print(c("dim", f" [dry-run] git lfs track \"{pattern}\""))
return True
rc, out, err = _run(["git", "lfs", "track", pattern], repo_root)
if rc != 0:
print(c("red", f" [error] tracking '{pattern}': {err}"))
return False
# git lfs track prints "Tracking <pattern>" — show the first line only
msg = (out or err).splitlines()[0] if (out or err) else "done"
print(c("green", f" {msg}"))
return True
def lfs_pattern_for(file_path: Path) -> str:
"""*.ext for files with an extension; exact filename otherwise."""
suffix = file_path.suffix # e.g. '.zip', '.psd', or ''
return f"*{suffix}" if suffix else file_path.name
# ── Scan ──────────────────────────────────────────────────
def scan_large_files(root: Path, threshold_bytes: int) -> list[Path]:
large: list[Path] = []
for dirpath, dirs, files in os.walk(str(root), followlinks=False):
dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
for fname in files:
fpath = Path(dirpath) / fname
try:
if fpath.stat().st_size > threshold_bytes:
large.append(fpath)
except OSError:
continue
return sorted(large)
# ── Entry point ───────────────────────────────────────────
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Scan a directory for large files and register them with Git LFS.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__.split("Usage:")[1] if "Usage:" in __doc__ else "",
)
parser.add_argument(
"dir_path",
help="Directory to scan (absolute, relative, or WSL UNC path)",
)
parser.add_argument(
"--dry-run", action="store_true",
help="Show what would be done without making any changes",
)
parser.add_argument(
"--threshold", type=float, default=SIZE_THRESHOLD_MB, metavar="MB",
help=f"File size threshold in MB (default: {SIZE_THRESHOLD_MB})",
)
return parser.parse_args()
def main() -> None:
args = parse_args()
threshold_bytes = int(args.threshold * 1024 * 1024)
root = resolve_dir(args.dir_path)
print(c("bold", "\ngit-lfs-auto"))
print(f"Directory : {c('cyan', str(root))}")
print(f"Threshold : {c('cyan', f'{args.threshold} MB ({threshold_bytes:,} bytes)')}")
if args.dry_run:
print(c("yellow", "Mode : DRY RUN — no changes will be made"))
print()
# ── 1. Scan for large files ───────────────────────────
print("Scanning...", end="", flush=True)
large_files = scan_large_files(root, threshold_bytes)
print(f" {len(large_files)} large file(s) found\n")
if not large_files:
print(c("green", "No files exceed the threshold. Nothing to do."))
return
for f in large_files:
try:
size_mb = f.stat().st_size / (1024 * 1024)
except OSError:
size_mb = 0.0
try:
rel = f.relative_to(root)
except ValueError:
rel = f
print(f" {size_mb:7.2f} MB {rel}")
print()
# ── 2. Find or initialise git repo ───────────────────
repo_root = find_repo_root(root)
if repo_root is None:
print(c("yellow", f"No git repo found at or above: {root}"))
if not git_init(root, args.dry_run):
sys.exit(1)
repo_root = root
print()
else:
print(f"Git repo : {c('cyan', str(repo_root))}\n")
# ── 3. Ensure git-lfs hooks are installed ─────────────
if not ensure_lfs_installed(repo_root, args.dry_run):
sys.exit(1)
# ── 4. Determine which patterns need tracking ─────────
already_tracked = get_already_tracked(repo_root)
patterns: dict[str, list[Path]] = {} # pattern -> example files
for f in large_files:
pat = lfs_pattern_for(f)
if pat not in already_tracked:
patterns.setdefault(pat, []).append(f)
if not patterns:
print(c("green", "\nAll large-file patterns are already in .gitattributes."))
return
print(f"\nTracking {len(patterns)} new pattern(s):")
all_ok = True
for pat in sorted(patterns):
examples = patterns[pat]
try:
example_rel = examples[0].relative_to(root)
except ValueError:
example_rel = examples[0].name
print(f" {c('cyan', pat):<30} e.g. {example_rel}")
if not lfs_track(pat, repo_root, args.dry_run):
all_ok = False
# ── 5. Stage .gitattributes ───────────────────────────
if all_ok and not args.dry_run:
print()
rc, _, err = _run(["git", "add", ".gitattributes"], repo_root)
if rc == 0:
print(c("green", ".gitattributes staged."))
else:
print(c("yellow", f"Could not stage .gitattributes: {err}"))
print()
if args.dry_run:
print(c("yellow", "Dry-run complete — no files were modified."))
elif all_ok:
print(c("green", "Done. Commit .gitattributes to complete LFS setup."))
print(c("dim", ' git commit -m "chore: track large files with Git LFS"'))
else:
print(c("red", "Some patterns could not be tracked — check errors above."))
sys.exit(1)
if __name__ == "__main__":
main()File Information
- Filename:
git-lfs-auto.py - Category: python
- Language: PYTHON