Skills bookkeeping-automation
install
source · Clone the upstream repo
git clone https://github.com/TerminalSkills/skills
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/TerminalSkills/skills "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/bookkeeping-automation" ~/.claude/skills/terminalskills-skills-bookkeeping-automation && rm -rf "$T"
manifest:
skills/bookkeeping-automation/SKILL.mdsafety · automated scan (low risk)
This is a pattern-based risk scan, not a security review. Our crawler flagged:
- references API keys
Always read a skill's source content before installing. Patterns alone don't mean the skill is malicious — but they warrant attention.
source content
Bookkeeping Automation
Overview
Automate the manual parts of bookkeeping: importing bank statements in multiple formats, categorizing transactions using keyword rules or AI, deduplicating entries, reconciling account balances, and producing expense reports. This skill is format-agnostic — it handles CSV exports from most banks, OFX/QFX files (used by most US banks and Mint), and QIF files (legacy Quicken format).
Instructions
Step 1: Parse bank statements
# parser.py — Parse CSV, OFX, and QIF bank statement formats import csv import re from dataclasses import dataclass, field from datetime import datetime from decimal import Decimal from typing import List @dataclass class Transaction: date: datetime description: str amount: Decimal # Negative = debit, Positive = credit raw_id: str = "" # Bank's transaction ID (for dedup) category: str = "Uncategorized" notes: str = "" def parse_csv(filepath: str, date_col: str = "Date", desc_col: str = "Description", amount_col: str = "Amount", date_fmt: str = "%m/%d/%Y") -> List[Transaction]: """ Parse a bank CSV export. Column names vary by bank — adjust defaults. Common variants: Chase: Date, Description, Amount Bank of America: Date, Description, Amount, Running Bal. Wells Fargo: date, description, deposits, withdrawals, balance """ transactions = [] with open(filepath, newline="", encoding="utf-8-sig") as f: reader = csv.DictReader(f) for row in reader: # Handle split debit/credit columns (e.g., Wells Fargo) if "deposits" in [k.lower() for k in row]: credit = Decimal(row.get("deposits", "0").replace(",", "") or "0") debit = Decimal(row.get("withdrawals", "0").replace(",", "") or "0") amount = credit - debit else: raw = row[amount_col].replace(",", "").replace("$", "").strip() amount = Decimal(raw) transactions.append(Transaction( date=datetime.strptime(row[date_col].strip(), date_fmt), description=row[desc_col].strip(), amount=amount, raw_id=row.get("Transaction ID", ""), )) return transactions def parse_ofx(filepath: str) -> List[Transaction]: """Parse OFX/QFX files (Open Financial Exchange — used by most US banks).""" transactions = [] with open(filepath, encoding="utf-8", errors="ignore") as f: content = f.read() # Extract STMTTRN blocks pattern = re.compile(r"<STMTTRN>(.*?)</STMTTRN>", re.DOTALL) for match in pattern.finditer(content): block = match.group(1) def extract(tag): m = re.search(rf"<{tag}>(.*?)(?:<|$)", block) return m.group(1).strip() if m else "" date_str = extract("DTPOSTED")[:8] # YYYYMMDD transactions.append(Transaction( date=datetime.strptime(date_str, "%Y%m%d"), description=extract("MEMO") or extract("NAME"), amount=Decimal(extract("TRNAMT")), raw_id=extract("FITID"), )) return transactions def parse_qif(filepath: str) -> List[Transaction]: """Parse QIF files (Quicken Interchange Format — legacy but still common).""" transactions = [] current = {} with open(filepath, encoding="utf-8") as f: for line in f: line = line.strip() if line.startswith("D"): # Date current["date"] = line[1:] elif line.startswith("T"): # Amount current["amount"] = line[1:].replace(",", "") elif line.startswith("P"): # Payee current["description"] = line[1:] elif line.startswith("N"): # Check number / ID current["raw_id"] = line[1:] elif line == "^": # Record separator if "date" in current and "amount" in current: # QIF dates vary: M/D/Y, M/D'YY, etc. for fmt in ("%m/%d/%Y", "%m/%d'%y", "%d/%m/%Y"): try: parsed_date = datetime.strptime(current["date"], fmt) break except ValueError: continue transactions.append(Transaction( date=parsed_date, description=current.get("description", ""), amount=Decimal(current["amount"]), raw_id=current.get("raw_id", ""), )) current = {} return transactions
Step 2: Categorize transactions
# categorizer.py — Rule-based and AI-powered transaction categorization import re from typing import List from parser import Transaction # Rule-based categorization — extend this dict for your business CATEGORY_RULES = { "Software & SaaS": [ "aws", "amazon web services", "digitalocean", "cloudflare", "github", "vercel", "heroku", "stripe", "twilio", "sendgrid", "datadog", "notion", "linear", "figma", "zapier", "openai", ], "Advertising": [ "google ads", "facebook ads", "meta ads", "twitter ads", "linkedin ads", "reddit ads", "bing ads", ], "Payroll & Contractors": [ "gusto", "rippling", "deel", "remote.com", "paylocity", "payroll", "wise", "transferwise", ], "Office & Supplies": [ "staples", "office depot", "amazon", "best buy", ], "Travel": [ "airbnb", "marriott", "hilton", "delta", "united", "american airlines", "southwest", "uber", "lyft", "expedia", ], "Meals & Entertainment": [ "restaurant", "cafe", "coffee", "starbucks", "doordash", "grubhub", "ubereats", ], "Banking & Fees": [ "bank fee", "service charge", "wire fee", "monthly fee", "overdraft", ], "Revenue": [ "stripe payment", "paypal transfer", "square payment", ], } def categorize_by_rules(transactions: List[Transaction]) -> List[Transaction]: """Apply keyword rules to categorize transactions.""" for tx in transactions: desc_lower = tx.description.lower() matched = False for category, keywords in CATEGORY_RULES.items(): if any(kw in desc_lower for kw in keywords): tx.category = category matched = True break if not matched: tx.category = "Uncategorized" return transactions def categorize_with_ai(transactions: List[Transaction], api_key: str, model: str = "gpt-4o-mini") -> List[Transaction]: """ Use an LLM to categorize transactions that rules couldn't match. Only sends uncategorized transactions to reduce API costs. """ import json import openai client = openai.OpenAI(api_key=api_key) uncategorized = [tx for tx in transactions if tx.category == "Uncategorized"] if not uncategorized: return transactions # Batch up to 50 transactions per request batch_size = 50 categories = list(CATEGORY_RULES.keys()) + ["Personal", "Tax", "Insurance", "Other"] for i in range(0, len(uncategorized), batch_size): batch = uncategorized[i:i + batch_size] tx_list = [ {"id": j, "description": tx.description, "amount": str(tx.amount)} for j, tx in enumerate(batch) ] prompt = f"""Categorize these bank transactions into one of these categories: {json.dumps(categories, indent=2)} Transactions: {json.dumps(tx_list, indent=2)} Return a JSON array of {{"id": <int>, "category": "<category>"}} objects only.""" response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], response_format={"type": "json_object"}, temperature=0, ) try: results = json.loads(response.choices[0].message.content) # Handle both {"results": [...]} and [...] formats if isinstance(results, dict): results = next(iter(results.values())) for r in results: batch[r["id"]].category = r["category"] except (json.JSONDecodeError, KeyError, IndexError): pass # Keep "Uncategorized" if AI fails return transactions
Step 3: Detect and remove duplicates
# dedup.py — Detect duplicate transactions from typing import List, Tuple from parser import Transaction from datetime import timedelta from decimal import Decimal def detect_duplicates(transactions: List[Transaction], date_window_days: int = 3, amount_tolerance: Decimal = Decimal("0.01")) -> List[Tuple]: """ Find likely duplicate transactions based on: - Same amount (within tolerance) - Similar description (fuzzy match) - Date within a rolling window Returns list of (tx_a, tx_b) duplicate pairs. """ duplicates = [] sorted_txs = sorted(transactions, key=lambda t: (t.amount, t.date)) for i, tx_a in enumerate(sorted_txs): for tx_b in sorted_txs[i + 1:]: # Stop early — sorted by amount, no point checking further if abs(tx_b.amount - tx_a.amount) > amount_tolerance: break # Check date proximity date_diff = abs((tx_b.date - tx_a.date).days) if date_diff > date_window_days: continue # If both have bank IDs, use them for exact match if tx_a.raw_id and tx_b.raw_id and tx_a.raw_id == tx_b.raw_id: duplicates.append((tx_a, tx_b)) continue # Fuzzy description match (Jaccard similarity on word sets) words_a = set(tx_a.description.lower().split()) words_b = set(tx_b.description.lower().split()) if words_a and words_b: jaccard = len(words_a & words_b) / len(words_a | words_b) if jaccard > 0.6: duplicates.append((tx_a, tx_b)) return duplicates def remove_duplicates(transactions: List[Transaction]) -> List[Transaction]: """Return deduplicated list, keeping the first occurrence of each duplicate pair.""" duplicates = detect_duplicates(transactions) to_remove = set(id(pair[1]) for pair in duplicates) return [tx for tx in transactions if id(tx) not in to_remove]
Step 4: Reconcile accounts
# reconciler.py — Reconcile parsed transactions against a known closing balance from typing import List from decimal import Decimal from parser import Transaction from datetime import datetime def reconcile(transactions: List[Transaction], opening_balance: Decimal, expected_closing_balance: Decimal, statement_date: datetime) -> dict: """ Reconcile a list of transactions against a bank statement. Returns a dict with reconciliation status and any discrepancy. """ # Filter to transactions on or before statement date in_scope = [tx for tx in transactions if tx.date <= statement_date] computed_balance = opening_balance + sum(tx.amount for tx in in_scope) discrepancy = computed_balance - expected_closing_balance result = { "opening_balance": float(opening_balance), "total_transactions": len(in_scope), "total_debits": float(sum(tx.amount for tx in in_scope if tx.amount < 0)), "total_credits": float(sum(tx.amount for tx in in_scope if tx.amount > 0)), "computed_closing_balance": float(computed_balance), "expected_closing_balance": float(expected_closing_balance), "discrepancy": float(discrepancy), "reconciled": abs(discrepancy) < Decimal("0.01"), } if result["reconciled"]: print(f"✓ Reconciled — closing balance: ${computed_balance:.2f}") else: print(f"✗ Discrepancy of ${discrepancy:.2f} found") print(f" Computed: ${computed_balance:.2f}") print(f" Expected: ${expected_closing_balance:.2f}") return result
Step 5: Generate expense reports
# reports.py — Generate expense summaries from typing import List from parser import Transaction from collections import defaultdict from decimal import Decimal import json def monthly_summary(transactions: List[Transaction]) -> dict: """Group transactions by month and category, return summary.""" summary = defaultdict(lambda: defaultdict(Decimal)) for tx in transactions: month_key = tx.date.strftime("%Y-%m") summary[month_key][tx.category] += tx.amount return {month: dict(cats) for month, cats in sorted(summary.items())} def expense_report(transactions: List[Transaction], title: str = "Expense Report") -> str: """Generate a human-readable expense report.""" debits = [tx for tx in transactions if tx.amount < 0] by_category = defaultdict(Decimal) for tx in debits: by_category[tx.category] += abs(tx.amount) total = sum(by_category.values()) lines = [f"\n{'=' * 50}", f" {title}", f"{'=' * 50}"] for category, amount in sorted(by_category.items(), key=lambda x: -x[1]): pct = (amount / total * 100) if total else 0 lines.append(f" {category:<30} ${amount:>10.2f} ({pct:.1f}%)") lines.append(f"{'─' * 50}") lines.append(f" {'TOTAL':<30} ${total:>10.2f}") lines.append(f"{'=' * 50}\n") return "\n".join(lines) def export_to_csv(transactions: List[Transaction], output_path: str): """Export categorized transactions to CSV for review or import.""" import csv with open(output_path, "w", newline="") as f: writer = csv.DictWriter(f, fieldnames=["date", "description", "amount", "category", "notes"]) writer.writeheader() for tx in transactions: writer.writerow({ "date": tx.date.strftime("%Y-%m-%d"), "description": tx.description, "amount": str(tx.amount), "category": tx.category, "notes": tx.notes, }) print(f"Exported {len(transactions)} transactions to {output_path}")
Examples
Example 1: Full pipeline — CSV → categorize → report
from parser import parse_csv from categorizer import categorize_by_rules, categorize_with_ai from dedup import remove_duplicates from reports import expense_report, export_to_csv import os # 1. Parse bank CSV transactions = parse_csv("march_bank_statement.csv") print(f"Parsed {len(transactions)} transactions") # 2. Apply rules first (free, fast) transactions = categorize_by_rules(transactions) # 3. AI for remaining unknowns (paid, slower) transactions = categorize_with_ai(transactions, api_key=os.environ["OPENAI_API_KEY"]) # 4. Remove duplicates transactions = remove_duplicates(transactions) # 5. Print expense report print(expense_report(transactions, title="March 2025 Expenses")) # 6. Export for review / accounting system import export_to_csv(transactions, "march_2025_categorized.csv")
Output:
Parsed 143 transactions ================================================== March 2025 Expenses ================================================== Software & SaaS $ 4,230.00 (41.2%) Payroll & Contractors $ 3,800.00 (37.0%) Advertising $ 1,200.00 (11.7%) Meals & Entertainment $ 420.00 (4.1%) Travel $ 320.00 (3.1%) Other $ 300.00 (2.9%) ────────────────────────────────────────────────── TOTAL $ 10,270.00 ==================================================
Example 2: Reconcile an OFX statement
from parser import parse_ofx from dedup import remove_duplicates from reconciler import reconcile from decimal import Decimal from datetime import datetime transactions = parse_ofx("march_checking.ofx") transactions = remove_duplicates(transactions) result = reconcile( transactions=transactions, opening_balance=Decimal("12,450.00"), expected_closing_balance=Decimal("8,930.00"), statement_date=datetime(2025, 3, 31), ) # ✓ Reconciled — closing balance: $8,930.00
Guidelines
- Always run deduplication before categorization and reconciliation — bank exports sometimes include the same transaction twice (pending + cleared).
- Use rule-based categorization first; send only unmatched transactions to AI to keep costs low. 70–80% of transactions typically match keyword rules.
- For AI categorization, batch 50 transactions per request and request JSON output to avoid parsing errors.
- When reconciling, sort transactions by date and check for off-by-one errors around the statement cutoff date.
- Preserve the original
(FITID in OFX, Num in QIF) for reliable deduplication even if descriptions vary.raw_id - Export to CSV after categorization for a human review step before pushing to Xero or QuickBooks.
- For recurring reports, store processed transaction IDs in a local SQLite database to avoid reprocessing already-imported entries.