FileJanitor

Overview

FileJanitor is a Python package for cleaning and organizing your file system. It provides simple functions to standardize filenames, flatten directory structures, replace patterns in filenames, and index files.

Installation

Install FileJanitor from Test PyPI:

pip install -i https://test.pypi.org/simple/ filejanitor

Functions

  • Flatten directories - Bring all nested files to a single level
  • Standardize filenames - Clean and normalize filename formats
  • Replace patterns - Bulk rename files with pattern matching
  • Index files - Create organized file listings

See the Examples page and below for detailed usage.

See the Function References page for complete API reference.

Executable Examples

standardize_filename

from pathlib import Path
import tempfile
from FileJanitor import standardize_filename

# Create a temporary directory with messy filenames
with tempfile.TemporaryDirectory() as tmp:
    tmp_path = Path(tmp)

    # Create example files
    (tmp_path / "My Document (1).pdf").write_text("dummy")
    (tmp_path / "Final-Report_2024.txt").write_text("dummy")
    (tmp_path / "messy name-file.csv").write_text("dummy")

    print("Before:")
    for f in sorted(tmp_path.iterdir()):
        print(" -", f.name)

    # Standardize filenames in the directory
    standardize_filename(tmp_path, case="lower", sep="_")

    print("\nAfter:")
    for f in sorted(tmp_path.iterdir()):
        print(" -", f.name)
Before:
 - Final-Report_2024.txt
 - My Document (1).pdf
 - messy name-file.csv

After:
 - final_report_2024.txt
 - messy_name_file.csv
 - my_document_(1).pdf

replace_pattern

from pathlib import Path
import tempfile
from FileJanitor import replace_pattern 

# Create a temporary directory with example files
with tempfile.TemporaryDirectory() as tmp:
    tmp_path = Path(tmp)

    # Create sample files
    (tmp_path / "file_janitors.txt").write_text("dummy")
    (tmp_path / "report_v1_final.pdf").write_text("dummy")
    (tmp_path / "notes.txt").write_text("dummy")  

    print("Before:")
    for f in sorted(tmp_path.iterdir()):
        print(" -", f.name)

    # Replace "_" with " & " in filenames
    replace_pattern("_", " & ", dir=tmp_path)

    print("\nAfter:")
    for f in sorted(tmp_path.iterdir()):
        print(" -", f.name)
Before:
 - file_janitors.txt
 - notes.txt
 - report_v1_final.pdf

After:
 - file & janitors.txt
 - notes.txt
 - report & v1 & final.pdf

flatten

from pathlib import Path
import tempfile
from FileJanitor import flatten

# Create a temporary nested directory structure
with tempfile.TemporaryDirectory() as tmp:
    tmp_path = Path(tmp)

    nested = tmp_path / "project"
    nested.mkdir()

    subfolder = nested / "data"
    subfolder.mkdir()

    # Create files at different levels
    (nested / "top_level.txt").write_text("dummy")
    (subfolder / "nested_file.csv").write_text("dummy")

    print("Before flatten:")
    for path in nested.rglob("*"):
        print(" -", path.relative_to(tmp_path))

    # Flatten all files into the root temp directory (recursive)
    flatten(nested_directory=nested, output_directory=tmp_path, recursive=True)

    print("\nAfter flatten:")
    for path in tmp_path.iterdir():
        print(" -", path.name)
Before flatten:
 - project/top_level.txt
 - project/data
 - project/data/nested_file.csv

After flatten:
 - top_level.txt
 - project
 - nested_file.csv

index_files

from pathlib import Path
import tempfile
from FileJanitor import index_files 

# Create a temporary directory with example files
with tempfile.TemporaryDirectory() as tmp:
    tmp_path = Path(tmp)

    # Create sample files
    (tmp_path / "discussion.pdf").write_text("dummy")
    (tmp_path / "intro.pdf").write_text("dummy")
    (tmp_path / "conclusions.pdf").write_text("dummy")
    (tmp_path / "analysis.pdf").write_text("dummy")

    print("Before indexing:")
    for f in sorted(tmp_path.iterdir()):
        print(" -", f.name)

    # Define desired order
    order = [
        "intro.pdf",
        "analysis.pdf",
        "discussion.pdf",
        "conclusions.pdf",
    ]

    # Index files
    index_files(dir=tmp_path, order=order, unlisted="hide")

    print("\nAfter indexing:")
    for f in sorted(tmp_path.iterdir()):
        print(" -", f.name)
Before indexing:
 - analysis.pdf
 - conclusions.pdf
 - discussion.pdf
 - intro.pdf

After indexing:
 - 01_intro.pdf
 - 02_analysis.pdf
 - 03_discussion.pdf
 - 04_conclusions.pdf

Contributors