git-clean-ignored
No description available
Code
python
#!/usr/bin/env python3
"""
@title git-clean-ignored
@description Scan a directory for Git repos and clean all gitignored files, safely.
@author ropean, Claude Sonnet (Anthropic)
@version 1.0.0
Scans every immediate subdirectory of <dir> for Git repositories, then removes
files matched by .gitignore rules via `git clean -fdX`, while preserving
environment and secrets files (.env, credentials, keys, etc.).
Large directories such as node_modules are deleted in bulk before git clean
runs, using a long-path-safe method (\\\\?\\ prefix on Windows, rm -rf in WSL)
to avoid the 260-character NTFS path limit common with pnpm/yarn workspaces.
Repos with submodules are handled by discovering submodule paths via
`git submodule foreach` and cleaning each one individually, so every level
of nesting gets the same long-path-safe treatment.
Supports Windows absolute paths, relative paths, and WSL UNC paths
(\\\\wsl.localhost\\<distro>\\... or \\\\wsl$\\<distro>\\...).
Running without arguments launches an interactive setup wizard.
@example
python git-clean-ignored.py "D:\\Git"
python git-clean-ignored.py "D:\\Git" --dry-run
python git-clean-ignored.py "D:\\Git" --recursive --depth 2 --submodules
python git-clean-ignored.py "\\\\wsl.localhost\\Ubuntu\\home\\user\\projects"
python git-clean-ignored.py # interactive wizard
"""
from __future__ import annotations
# ════════════════════════════════════════════════════════════
# USER CONFIG
# ════════════════════════════════════════════════════════════
# Files matching these gitignore-style patterns are NEVER deleted,
# even when .gitignore would otherwise match them.
# A pattern with no '/' matches in any subdirectory of the repo.
PROTECTED_PATTERNS: list[str] = [
# dotenv variants
".env",
".env.*",
"*.env",
".envrc", # direnv
".env.local",
".env.*.local",
# secrets / credentials
"secrets",
"secrets.*",
"*.secret",
"*.secrets",
".secret",
".secrets",
"secret.yml",
"secret.yaml",
"credentials",
"credentials.*",
".credentials",
"*.credentials",
# common secrets paths
"config/secrets*",
"config/credentials*",
".vault-token", # HashiCorp Vault
# key/cert files
"*.pem",
"*.key",
"*.p12",
"*.pfx",
]
# Directories skipped while searching for git repos
_SKIP_DIRS: set[str] = {".git", "node_modules", "__pycache__", ".venv", "venv"}
# Directories deleted in bulk BEFORE git clean to avoid Windows long-path errors.
# These are typically gitignored and can contain deeply nested paths that exceed
# the 260-character Windows path limit (e.g. pnpm's node_modules/.pnpm/...).
BULK_DELETE_DIRS: list[str] = [
"node_modules",
".next",
".nuxt",
".turbo",
".svelte-kit",
".output",
"dist",
"build",
".cache",
".parcel-cache",
".vite",
]
# ════════════════════════════════════════════════════════════
# END OF USER CONFIG
# ════════════════════════════════════════════════════════════
import argparse
import os
import shutil
import subprocess
import sys
from pathlib import Path
# ── Terminal colors ───────────────────────────────────────
def _supports_color() -> bool:
if os.environ.get("FORCE_COLOR") or os.environ.get("COLORTERM"):
return True
term = os.environ.get("TERM", "")
if term and term != "dumb":
return True
if os.name == "nt" and (os.environ.get("WT_SESSION") or os.environ.get("TERM_PROGRAM")):
return True
return sys.stdout.isatty()
if _supports_color():
_C = {
"red": "\033[31m",
"yellow": "\033[33m",
"green": "\033[32m",
"cyan": "\033[36m",
"bold": "\033[1m",
"dim": "\033[2m",
"reset": "\033[0m",
}
else:
_C = {k: "" for k in ("red", "yellow", "green", "cyan", "bold", "dim", "reset")}
def c(color: str, text: str) -> str:
return f"{_C[color]}{text}{_C['reset']}"
# ── Path helpers ──────────────────────────────────────────
def _wsl_path(p: Path) -> tuple[str, str] | None:
"""If p is a WSL UNC path, return (distro, linux_path); else None."""
s = str(p)
for prefix in ("\\\\wsl.localhost\\", "\\\\wsl$\\"):
if s.startswith(prefix):
rest = s[len(prefix):] # "Ubuntu\home\user\projects"
parts = rest.split("\\", 1)
distro = parts[0]
linux_path = ("/" + parts[1].replace("\\", "/")) if len(parts) > 1 else "/"
return distro, linux_path
return None
def resolve_dir(arg: str) -> Path:
p = Path(arg).expanduser()
# Skip resolve() for UNC paths (\\server\share) — it corrupts them on Windows
if not str(p).startswith("\\\\"):
p = p.resolve()
if not p.is_dir():
sys.exit(c("red", f"[error] Not a directory: {p}"))
return p
# ── Git helpers ───────────────────────────────────────────
def _run(cmd: list[str], cwd: Path) -> tuple[int, str, str]:
wsl = _wsl_path(cwd)
if wsl:
distro, linux_path = wsl
actual_cmd = ["wsl", "-d", distro, "git", "-C", linux_path] + cmd[1:]
result = subprocess.run(
actual_cmd,
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
elif str(cwd).startswith("\\\\"):
cmd = ["git", "-c", "safe.directory=*"] + cmd[1:]
result = subprocess.run(
cmd, cwd=cwd,
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
else:
result = subprocess.run(
cmd, cwd=cwd,
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
return result.returncode, result.stdout.strip(), result.stderr.strip()
def is_git_repo(path: Path) -> bool:
"""True if path contains a .git entry (file or directory, covering submodules)."""
return (path / ".git").exists()
# ── Repo discovery ────────────────────────────────────────
def find_git_repos(root: Path, recursive: bool, max_depth: int = 3) -> list[Path]:
"""
Return git repo roots found inside root.
Without --recursive: only immediate children (depth 1).
With --recursive: descend up to max_depth levels, never entering a found repo
or any directory in _SKIP_DIRS.
"""
repos: list[Path] = []
def _scan(directory: Path, depth: int) -> None:
if depth > max_depth:
return
try:
entries = sorted(directory.iterdir())
except PermissionError:
return
for entry in entries:
if not entry.is_dir() or entry.name in _SKIP_DIRS:
continue
if is_git_repo(entry):
repos.append(entry)
# never descend into a found repo
elif recursive:
_scan(entry, depth + 1)
_scan(root, 1)
return repos
# ── Bulk directory deletion (long-path safe) ─────────────
def _rmtree_windows(path: Path) -> None:
"""
Delete a directory tree on Windows, bypassing the 260-char path limit
by using the \\\\?\\ extended-length path prefix.
Falls back to shutil.rmtree if the prefix trick isn't needed / doesn't apply.
"""
# \\?\ prefix only works with absolute, non-UNC paths
p = str(path)
if not p.startswith("\\\\"):
long_p = "\\\\?\\" + p if not p.startswith("\\\\?\\") else p
subprocess.run(
["cmd", "/c", "rmdir", "/s", "/q", long_p],
capture_output=True,
)
else:
shutil.rmtree(str(path), ignore_errors=True)
def _rmtree_wsl(linux_path: str, distro: str) -> None:
"""Delete a directory inside WSL using `rm -rf`."""
subprocess.run(
["wsl", "-d", distro, "rm", "-rf", "--", linux_path],
capture_output=True,
)
def _is_gitignored(name: str, repo: Path) -> bool:
"""Ask git whether a path is ignored (so we don't delete tracked dirs)."""
rc, _, _ = _run(["git", "check-ignore", "-q", name], repo)
return rc == 0
def bulk_delete_ignored_dirs(repo: Path, dry_run: bool, max_depth: int = 4) -> None:
"""
Recursively walk `repo` (up to max_depth levels) looking for any directory
named in BULK_DELETE_DIRS. Each candidate is verified as gitignored before
deletion. Stops descending into a directory once it is deleted.
Handles both Windows long-path (\\\\?\\) and WSL paths.
"""
bulk_names = set(BULK_DELETE_DIRS)
wsl = _wsl_path(repo)
if wsl:
# WSL: use `find` to locate all candidates in one shot, then rm -rf each.
distro, linux_repo = wsl
for name in bulk_names:
result = subprocess.run(
[
"wsl", "-d", distro,
"find", linux_repo,
"-maxdepth", str(max_depth),
"-name", name, "-type", "d",
"-not", "-path", "*/.git/*",
],
capture_output=True, text=True, encoding="utf-8", errors="replace",
)
for linux_target in result.stdout.splitlines():
linux_target = linux_target.strip()
if not linux_target:
continue
# Derive relative path for git check-ignore
rel = linux_target[len(linux_repo):].lstrip("/").replace("/", os.sep)
if not _is_gitignored(rel, repo):
continue
if dry_run:
print(c("dim", f" [dry-run] would delete {rel}{os.sep}"))
continue
print(c("yellow", f" Deleting {rel}{os.sep} (long-path safe)"), end="", flush=True)
_rmtree_wsl(linux_target, distro)
print(c("green", " done"))
else:
# Windows: walk the tree ourselves so we can use \\?\\ for deletion.
_skip = {".git"}
def _walk(directory: Path, depth: int) -> None:
if depth > max_depth:
return
try:
entries = sorted(directory.iterdir())
except (PermissionError, OSError):
return
for entry in entries:
if not entry.is_dir():
continue
if entry.name in bulk_names:
try:
rel = str(entry.relative_to(repo))
except ValueError:
rel = entry.name
if not _is_gitignored(rel.replace("\\", "/"), repo):
continue
if dry_run:
print(c("dim", f" [dry-run] would delete {rel}{os.sep}"))
continue
print(c("yellow", f" Deleting {rel}{os.sep} (long-path safe)"), end="", flush=True)
_rmtree_windows(entry)
print(c("green", " done"))
# don't descend into a directory we just deleted
elif entry.name not in _skip:
_walk(entry, depth + 1)
_walk(repo, 1)
# ── Submodule discovery ───────────────────────────────────
def get_submodule_paths(repo: Path) -> list[Path]:
"""
Return absolute paths to all submodules (recursive) inside repo.
Uses `git submodule foreach --recursive` so git handles .gitmodules parsing.
Works for both normal Windows paths and WSL UNC paths.
"""
rc, out, _ = _run(
["git", "submodule", "foreach", "--quiet", "--recursive", "echo $displaypath"],
repo,
)
if rc != 0 or not out:
return []
paths: list[Path] = []
for line in out.splitlines():
line = line.strip()
if not line:
continue
# $displaypath uses forward slashes; Path() handles them on Windows too
sub = repo / Path(line)
if sub.is_dir() or _wsl_path(repo): # WSL: trust git output, skip is_dir
paths.append(sub)
return paths
# ── git clean ─────────────────────────────────────────────
def build_clean_cmd(dry_run: bool) -> list[str]:
"""Build the `git clean` command with protected-pattern exclusions."""
cmd = ["git", "clean", "-fdX"]
if dry_run:
cmd.append("-n") # --dry-run: list files, don't delete
for pattern in PROTECTED_PATTERNS:
cmd += ["-e", pattern]
# Bulk-delete dirs are handled separately before git clean
for name in BULK_DELETE_DIRS:
cmd += ["-e", name]
return cmd
def _clean_one(repo: Path, dry_run: bool, indent: str = " ") -> bool:
"""Run bulk delete + git clean on a single repo directory."""
bulk_delete_ignored_dirs(repo, dry_run)
cmd = build_clean_cmd(dry_run)
rc, out, err = _run(cmd, repo)
if rc != 0:
print(c("red", f"{indent}[error] git clean failed: {err or out}"))
return False
if not out:
print(c("dim", f"{indent}(nothing left to clean)"))
return True
for line in out.splitlines():
prefix = f"{indent}[dry-run] " if dry_run else indent
print(c("dim" if dry_run else "green", f"{prefix}{line}"))
return True
def clean_repo(repo: Path, dry_run: bool, recurse_submodules: bool) -> bool:
"""
Clean a repository:
1. Bulk-delete gitignored large directories (long-path safe).
2. git clean -fdX for everything else.
3. If recurse_submodules, repeat for every submodule.
"""
ok = _clean_one(repo, dry_run)
if not recurse_submodules:
return ok
submodules = get_submodule_paths(repo)
if not submodules:
return ok
print(c("dim", f" Submodules ({len(submodules)}):"))
for sub in submodules:
try:
label = sub.relative_to(repo)
except ValueError:
label = sub
print(c("bold", f" [{label}]"))
if not _clean_one(sub, dry_run, indent=" "):
ok = False
print()
return ok
# ── Interactive prompt helpers ────────────────────────────
def _ask(prompt: str, default: str = "") -> str:
hint = f" [{c('dim', default)}]" if default else ""
try:
val = input(f" {prompt}{hint}: ").strip()
except (EOFError, KeyboardInterrupt):
print()
sys.exit(0)
return val or default
def _ask_yn(prompt: str, default: bool = False) -> bool:
hint = c("dim", "Y/n" if default else "y/N")
try:
val = input(f" {prompt} [{hint}]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print()
sys.exit(0)
return default if not val else val in ("y", "yes", "1")
def interactive_prompt() -> argparse.Namespace:
"""Walk the user through all options when no CLI args are given."""
print(c("bold", "\ngit-clean-ignored — interactive setup\n"))
# 1. Directory
dir_path = _ask("Directory to scan (absolute, relative, or WSL UNC path)", ".")
# 2. Recursive
recursive = _ask_yn("Scan subdirectories recursively?", default=False)
# 3. Depth (only relevant when recursive)
depth = 3
if recursive:
raw = _ask("Max recursion depth", "3")
try:
depth = int(raw)
except ValueError:
print(c("yellow", " Invalid number, using default 3."))
depth = 3
# 4. Submodules
submodules = _ask_yn("Also clean inside submodules?", default=True)
# 5. Dry-run — last, and default True so users don't accidentally nuke things
dry_run = _ask_yn("Dry-run first? (recommended)", default=True)
print()
import argparse as _ap
ns = _ap.Namespace(
dir_path=dir_path,
recursive=recursive,
depth=depth,
submodules=submodules,
dry_run=dry_run,
)
return ns
# ── Entry point ───────────────────────────────────────────
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Clean gitignored files from Git repos found inside a directory.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__.split("Usage:")[1] if "Usage:" in __doc__ else "",
)
parser.add_argument(
"dir_path",
help="Directory to scan (absolute, relative, or WSL UNC path)",
)
parser.add_argument(
"--dry-run", action="store_true",
help="Show what would be removed without deleting anything",
)
parser.add_argument(
"--recursive", action="store_true",
help="Descend into subdirectories to find git repos (default depth: 3)",
)
parser.add_argument(
"--depth", type=int, default=3, metavar="N",
help="Max recursion depth when using --recursive (default: 3)",
)
parser.add_argument(
"--submodules", action="store_true",
help="Also clean inside submodules (reads .gitmodules and cleans each submodule separately)",
)
return parser.parse_args()
def main() -> None:
if len(sys.argv) == 1:
args = interactive_prompt()
else:
args = parse_args()
root = resolve_dir(args.dir_path)
print(c("bold", "\ngit-clean-ignored"))
print(f"Directory : {c('cyan', str(root))}")
if args.recursive:
print(f"Recursive : depth ≤ {c('cyan', str(args.depth))}")
if args.submodules:
print(f"Submodules: {c('cyan', 'will be cleaned')}")
if args.dry_run:
print(c("yellow", "Mode : DRY RUN — no files will be deleted"))
print()
# ── 1. Discover repos ─────────────────────────────────
# If the given directory is itself a repo, clean it directly.
if is_git_repo(root):
repos = [root]
else:
repos = find_git_repos(root, args.recursive, args.depth)
if not repos:
print(c("yellow", "No git repositories found in the immediate subdirectories."))
if not args.recursive:
print(c("dim", "Tip: use --recursive to search at any depth."))
return
print(f"Found {c('cyan', str(len(repos)))} git repo(s):\n")
for r in repos:
try:
rel = r.relative_to(root)
except ValueError:
rel = r
print(f" {rel}")
print()
# ── 2. Protected-patterns summary ─────────────────────
print(c("dim", f"Protected patterns ({len(PROTECTED_PATTERNS)}): "
+ " ".join(PROTECTED_PATTERNS[:6])
+ (" ..." if len(PROTECTED_PATTERNS) > 6 else "")))
print()
# ── 3. Clean each repo ────────────────────────────────
ok_count = 0
fail_count = 0
for repo in repos:
try:
rel = repo.relative_to(root)
except ValueError:
rel = repo
print(c("bold", f"[{rel}]"))
if clean_repo(repo, args.dry_run, args.submodules):
ok_count += 1
else:
fail_count += 1
print()
# ── 4. Summary ────────────────────────────────────────
if args.dry_run:
print(c("yellow", f"Dry-run complete. {ok_count} repo(s) checked, {fail_count} error(s)."))
elif fail_count == 0:
print(c("green", f"Done. {ok_count} repo(s) cleaned successfully."))
else:
print(c("red", f"Finished with errors: {ok_count} ok, {fail_count} failed."))
sys.exit(1)
if __name__ == "__main__":
main()File Information
- Filename:
git-clean-ignored.py - Category: python
- Language: PYTHON