Weeding Screener (`weeding_screener.py`)

This tool takes a circulation/export CSV and adds a weeding_candidate flag based on simple rules (years since last checkout, years since publication, total checkouts).

How It Works (In Plain Language)

Finds key columns by looking for words like “year”, “pub”, “last”, and “total checkouts” in the column names.
Reads each row and tries to understand: how long ago it was last checked out, how old it is, and how many times it has been checked out.
Uses simple rules at the top of the file (for example “5+ years since last checkout AND 10+ years old AND very few checkouts”). You can edit those numbers.
If at least two conditions are true, it marks weeding_candidate = "yes" for that row.
Saves a new CSV with all your original columns plus the weeding_candidate column so you can sort/filter in Excel.

How to Use

Place your circ/export CSV (for example circ_data.csv) in this folder.
Open Terminal and run: cd ~/Desktop/library_pop_up_tools
Then run: python weeding_screener.py circ_data.csv weeding_candidates.csv
Open weeding_candidates.csv and look for weeding_candidate = yes.

library_pop_up_tools % python weeding_screener.py circ_data.csv weeding_candidates.csv
Weeding candidates written to: weeding_candidates.csv

Optional: Adjust the Rules

You can use this tool without changing anything. At the top of the script there are a few numbers you can edit to match your local policy (for example how many years since last checkout).

MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0

If you want to be stricter or looser, open weeding_screener.py, change these numbers, save, and run it again.

Full Python Source (Optional)

Click to show the full script

#!/usr/bin/env python3
"""
weeding_screener.py

Pop-up tool to flag potential weeding candidates from a circulation export (CSV).

You can adjust the defaults at the top of this file to match your local policy.

Example:
    python weeding_screener.py circ_export.csv weeding_candidates.csv
"""

import csv
import sys
from datetime import datetime
from pathlib import Path

MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0

DATE_FORMATS = [
    "%Y-%m-%d",
    "%m/%d/%Y",
    "%Y/%m/%d",
]


def parse_year(value: str) -> int | None:
    if not value:
        return None
    digits = "".join(ch for ch in value if ch.isdigit())
    if len(digits) >= 4:
        try:
            return int(digits[:4])
        except ValueError:
            return None
    return None


def parse_date(value: str) -> datetime | None:
    if not value:
        return None
    value = value.strip()
    for fmt in DATE_FORMATS:
        try:
            return datetime.strptime(value, fmt)
        except ValueError:
            continue
    return None


def years_between(start: datetime, end: datetime) -> float:
    return (end - start).days / 365.25


def should_flag(row: dict, today: datetime) -> bool:
    last_checkout_keys = [k for k in row if "last" in k.lower() and "check" in k.lower()]
    pub_year_keys = [k for k in row if "year" in k.lower() or "pub" in k.lower()]
    total_chk_keys = [k for k in row if "total" in k.lower() and "check" in k.lower()]

    last_checkout_years = None
    pub_years = None
    total_checkouts = None

    for k in last_checkout_keys:
        d = parse_date(row.get(k, ""))
        if d:
            last_checkout_years = years_between(d, today)
            break

    for k in pub_year_keys:
        y = parse_year(row.get(k, ""))
        if y:
            pub_years = years_between(datetime(y, 1, 1), today)
            break

    for k in total_chk_keys:
        try:
            total_checkouts = int(row.get(k, "").strip() or "0")
            break
        except ValueError:
            continue

    conditions = []
    if last_checkout_years is not None:
        conditions.append(last_checkout_years >= MAX_YEARS_SINCE_LAST_CHECKOUT)
    if pub_years is not None:
        conditions.append(pub_years >= MAX_YEARS_SINCE_PUBLICATION)
    if total_checkouts is not None:
        conditions.append(total_checkouts <= MIN_TOTAL_CHECKOUTS)

    known_true = [c for c in conditions if c is not None and c]
    return len(known_true) >= 2


def screen_weeding(input_path: Path, output_path: Path) -> None:
    today = datetime.today()
    with input_path.open(newline="", encoding="utf-8-sig") as infile, output_path.open(
        "w", newline="", encoding="utf-8"
    ) as outfile:
        reader = csv.DictReader(infile)
        fieldnames = list(reader.fieldnames or [])
        if "weeding_candidate" not in fieldnames:
            fieldnames.append("weeding_candidate")
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            for key in row:
                if row[key] is None:
                    row[key] = ""
            row["weeding_candidate"] = "yes" if should_flag(row, today) else ""
            writer.writerow(row)


def main(argv: list[str]) -> int:
    if len(argv) != 3:
        print("Usage: python weeding_screener.py circ_export.csv weeding_candidates.csv")
        return 1
    input_path = Path(argv[1]).expanduser()
    output_path = Path(argv[2]).expanduser()
    if not input_path.exists():
        print(f"Input file not found: {input_path}")
        return 1
    screen_weeding(input_path, output_path)
    print(f"Weeding candidates written to: {output_path}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main(sys.argv))

#!/usr/bin/env python3
"""
weeding_screener.py

Pop-up tool to flag potential weeding candidates from a circulation export (CSV).

You can adjust the defaults at the top of this file to match your local policy.

Example:
    python weeding_screener.py circ_export.csv weeding_candidates.csv
"""

import csv
import sys
from datetime import datetime
from pathlib import Path

MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0

DATE_FORMATS = [
    "%Y-%m-%d",
    "%m/%d/%Y",
    "%Y/%m/%d",
]


def parse_year(value: str) -> int | None:
    if not value:
        return None
    digits = "".join(ch for ch in value if ch.isdigit())
    if len(digits) >= 4:
        try:
            return int(digits[:4])
        except ValueError:
            return None
    return None


def parse_date(value: str) -> datetime | None:
    if not value:
        return None
    value = value.strip()
    for fmt in DATE_FORMATS:
        try:
            return datetime.strptime(value, fmt)
        except ValueError:
            continue
    return None


def years_between(start: datetime, end: datetime) -> float:
    return (end - start).days / 365.25


def should_flag(row: dict, today: datetime) -> bool:
    last_checkout_keys = [k for k in row if "last" in k.lower() and "check" in k.lower()]
    pub_year_keys = [k for k in row if "year" in k.lower() or "pub" in k.lower()]
    total_chk_keys = [k for k in row if "total" in k.lower() and "check" in k.lower()]

    last_checkout_years = None
    pub_years = None
    total_checkouts = None

    for k in last_checkout_keys:
        d = parse_date(row.get(k, ""))
        if d:
            last_checkout_years = years_between(d, today)
            break

    for k in pub_year_keys:
        y = parse_year(row.get(k, ""))
        if y:
            pub_years = years_between(datetime(y, 1, 1), today)
            break

    for k in total_chk_keys:
        try:
            total_checkouts = int(row.get(k, "").strip() or "0")
            break
        except ValueError:
            continue

    conditions = []
    if last_checkout_years is not None:
        conditions.append(last_checkout_years >= MAX_YEARS_SINCE_LAST_CHECKOUT)
    if pub_years is not None:
        conditions.append(pub_years >= MAX_YEARS_SINCE_PUBLICATION)
    if total_checkouts is not None:
        conditions.append(total_checkouts <= MIN_TOTAL_CHECKOUTS)

    known_true = [c for c in conditions if c is not None and c]
    return len(known_true) >= 2


def screen_weeding(input_path: Path, output_path: Path) -> None:
    today = datetime.today()
    with input_path.open(newline="", encoding="utf-8-sig") as infile, output_path.open(
        "w", newline="", encoding="utf-8"
    ) as outfile:
        reader = csv.DictReader(infile)
        fieldnames = list(reader.fieldnames or [])
        if "weeding_candidate" not in fieldnames:
            fieldnames.append("weeding_candidate")
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            for key in row:
                if row[key] is None:
                    row[key] = ""
            row["weeding_candidate"] = "yes" if should_flag(row, today) else ""
            writer.writerow(row)


def main(argv: list[str]) -> int:
    if len(argv) != 3:
        print("Usage: python weeding_screener.py circ_export.csv weeding_candidates.csv")
        return 1
    input_path = Path(argv[1]).expanduser()
    output_path = Path(argv[2]).expanduser()
    if not input_path.exists():
        print(f"Input file not found: {input_path}")
        return 1
    screen_weeding(input_path, output_path)
    print(f"Weeding candidates written to: {output_path}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main(sys.argv))

← Back to all tools

Weeding Screener (weeding_screener.py)

How It Works (In Plain Language)

How to Use

Optional: Adjust the Rules

Full Python Source (Optional)

Weeding Screener (`weeding_screener.py`)