← Back to Library Pop-Up Tools

Weeding Screener (weeding_screener.py)

This tool takes a circulation/export CSV and adds a weeding_candidate flag based on simple rules (years since last checkout, years since publication, total checkouts).

How It Works (In Plain Language)

  • Finds key columns by looking for words like “year”, “pub”, “last”, and “total checkouts” in the column names.
  • Reads each row and tries to understand: how long ago it was last checked out, how old it is, and how many times it has been checked out.
  • Uses simple rules at the top of the file (for example “5+ years since last checkout AND 10+ years old AND very few checkouts”). You can edit those numbers.
  • If at least two conditions are true, it marks weeding_candidate = "yes" for that row.
  • Saves a new CSV with all your original columns plus the weeding_candidate column so you can sort/filter in Excel.

How to Use

  1. Place your circ/export CSV (for example circ_data.csv) in this folder.
  2. Open Terminal and run: cd ~/Desktop/library_pop_up_tools
  3. Then run: python weeding_screener.py circ_data.csv weeding_candidates.csv
  4. Open weeding_candidates.csv and look for weeding_candidate = yes.
library_pop_up_tools % python weeding_screener.py circ_data.csv weeding_candidates.csv
Weeding candidates written to: weeding_candidates.csv

Optional: Adjust the Rules

You can use this tool without changing anything. At the top of the script there are a few numbers you can edit to match your local policy (for example how many years since last checkout).

MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0

If you want to be stricter or looser, open weeding_screener.py, change these numbers, save, and run it again.

Full Python Source (Optional)

Click to show the full script
#!/usr/bin/env python3
"""
weeding_screener.py

Pop-up tool to flag potential weeding candidates from a circulation export (CSV).

You can adjust the defaults at the top of this file to match your local policy.

Example:
    python weeding_screener.py circ_export.csv weeding_candidates.csv
"""

import csv
import sys
from datetime import datetime
from pathlib import Path

MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0

DATE_FORMATS = [
    "%Y-%m-%d",
    "%m/%d/%Y",
    "%Y/%m/%d",
]


def parse_year(value: str) -> int | None:
    if not value:
        return None
    digits = "".join(ch for ch in value if ch.isdigit())
    if len(digits) >= 4:
        try:
            return int(digits[:4])
        except ValueError:
            return None
    return None


def parse_date(value: str) -> datetime | None:
    if not value:
        return None
    value = value.strip()
    for fmt in DATE_FORMATS:
        try:
            return datetime.strptime(value, fmt)
        except ValueError:
            continue
    return None


def years_between(start: datetime, end: datetime) -> float:
    return (end - start).days / 365.25


def should_flag(row: dict, today: datetime) -> bool:
    last_checkout_keys = [k for k in row if "last" in k.lower() and "check" in k.lower()]
    pub_year_keys = [k for k in row if "year" in k.lower() or "pub" in k.lower()]
    total_chk_keys = [k for k in row if "total" in k.lower() and "check" in k.lower()]

    last_checkout_years = None
    pub_years = None
    total_checkouts = None

    for k in last_checkout_keys:
        d = parse_date(row.get(k, ""))
        if d:
            last_checkout_years = years_between(d, today)
            break

    for k in pub_year_keys:
        y = parse_year(row.get(k, ""))
        if y:
            pub_years = years_between(datetime(y, 1, 1), today)
            break

    for k in total_chk_keys:
        try:
            total_checkouts = int(row.get(k, "").strip() or "0")
            break
        except ValueError:
            continue

    conditions = []
    if last_checkout_years is not None:
        conditions.append(last_checkout_years >= MAX_YEARS_SINCE_LAST_CHECKOUT)
    if pub_years is not None:
        conditions.append(pub_years >= MAX_YEARS_SINCE_PUBLICATION)
    if total_checkouts is not None:
        conditions.append(total_checkouts <= MIN_TOTAL_CHECKOUTS)

    known_true = [c for c in conditions if c is not None and c]
    return len(known_true) >= 2


def screen_weeding(input_path: Path, output_path: Path) -> None:
    today = datetime.today()
    with input_path.open(newline="", encoding="utf-8-sig") as infile, output_path.open(
        "w", newline="", encoding="utf-8"
    ) as outfile:
        reader = csv.DictReader(infile)
        fieldnames = list(reader.fieldnames or [])
        if "weeding_candidate" not in fieldnames:
            fieldnames.append("weeding_candidate")
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            for key in row:
                if row[key] is None:
                    row[key] = ""
            row["weeding_candidate"] = "yes" if should_flag(row, today) else ""
            writer.writerow(row)


def main(argv: list[str]) -> int:
    if len(argv) != 3:
        print("Usage: python weeding_screener.py circ_export.csv weeding_candidates.csv")
        return 1
    input_path = Path(argv[1]).expanduser()
    output_path = Path(argv[2]).expanduser()
    if not input_path.exists():
        print(f"Input file not found: {input_path}")
        return 1
    screen_weeding(input_path, output_path)
    print(f"Weeding candidates written to: {output_path}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main(sys.argv))
#!/usr/bin/env python3
"""
weeding_screener.py

Pop-up tool to flag potential weeding candidates from a circulation export (CSV).

You can adjust the defaults at the top of this file to match your local policy.

Example:
    python weeding_screener.py circ_export.csv weeding_candidates.csv
"""

import csv
import sys
from datetime import datetime
from pathlib import Path

MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0

DATE_FORMATS = [
    "%Y-%m-%d",
    "%m/%d/%Y",
    "%Y/%m/%d",
]


def parse_year(value: str) -> int | None:
    if not value:
        return None
    digits = "".join(ch for ch in value if ch.isdigit())
    if len(digits) >= 4:
        try:
            return int(digits[:4])
        except ValueError:
            return None
    return None


def parse_date(value: str) -> datetime | None:
    if not value:
        return None
    value = value.strip()
    for fmt in DATE_FORMATS:
        try:
            return datetime.strptime(value, fmt)
        except ValueError:
            continue
    return None


def years_between(start: datetime, end: datetime) -> float:
    return (end - start).days / 365.25


def should_flag(row: dict, today: datetime) -> bool:
    last_checkout_keys = [k for k in row if "last" in k.lower() and "check" in k.lower()]
    pub_year_keys = [k for k in row if "year" in k.lower() or "pub" in k.lower()]
    total_chk_keys = [k for k in row if "total" in k.lower() and "check" in k.lower()]

    last_checkout_years = None
    pub_years = None
    total_checkouts = None

    for k in last_checkout_keys:
        d = parse_date(row.get(k, ""))
        if d:
            last_checkout_years = years_between(d, today)
            break

    for k in pub_year_keys:
        y = parse_year(row.get(k, ""))
        if y:
            pub_years = years_between(datetime(y, 1, 1), today)
            break

    for k in total_chk_keys:
        try:
            total_checkouts = int(row.get(k, "").strip() or "0")
            break
        except ValueError:
            continue

    conditions = []
    if last_checkout_years is not None:
        conditions.append(last_checkout_years >= MAX_YEARS_SINCE_LAST_CHECKOUT)
    if pub_years is not None:
        conditions.append(pub_years >= MAX_YEARS_SINCE_PUBLICATION)
    if total_checkouts is not None:
        conditions.append(total_checkouts <= MIN_TOTAL_CHECKOUTS)

    known_true = [c for c in conditions if c is not None and c]
    return len(known_true) >= 2


def screen_weeding(input_path: Path, output_path: Path) -> None:
    today = datetime.today()
    with input_path.open(newline="", encoding="utf-8-sig") as infile, output_path.open(
        "w", newline="", encoding="utf-8"
    ) as outfile:
        reader = csv.DictReader(infile)
        fieldnames = list(reader.fieldnames or [])
        if "weeding_candidate" not in fieldnames:
            fieldnames.append("weeding_candidate")
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            for key in row:
                if row[key] is None:
                    row[key] = ""
            row["weeding_candidate"] = "yes" if should_flag(row, today) else ""
            writer.writerow(row)


def main(argv: list[str]) -> int:
    if len(argv) != 3:
        print("Usage: python weeding_screener.py circ_export.csv weeding_candidates.csv")
        return 1
    input_path = Path(argv[1]).expanduser()
    output_path = Path(argv[2]).expanduser()
    if not input_path.exists():
        print(f"Input file not found: {input_path}")
        return 1
    screen_weeding(input_path, output_path)
    print(f"Weeding candidates written to: {output_path}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main(sys.argv))

← Back to all tools