#!/usr/bin/env python3
"""
Extract tool call patterns from top 50 most recent Claude Code session JSONL files.
Writes results to stdout as JSON.
"""

import os
import glob as globmod
import json
import re
import sys
from collections import defaultdict

# The FLSM project directory
base = os.path.expanduser("~/.claude/projects/-Users-bryce-FLSM")

# Get all JSONL files, sort by mtime desc
all_files = globmod.glob(f"{base}/**/*.jsonl", recursive=True)
all_files.sort(key=lambda f: os.path.getmtime(f), reverse=True)
top_files = all_files[:50]

print(f"# Scanning {len(top_files)} files (total found: {len(all_files)})", file=sys.stderr)

bash_patterns = defaultdict(int)
mcp_tools = defaultdict(int)

def get_bash_pattern(cmd):
    if not cmd or not isinstance(cmd, str):
        return None
    cmd = cmd.strip()

    # Strip leading env var assignments like FOO=bar command
    while True:
        m = re.match(r'^[A-Z_][A-Z0-9_]*=[^\s]*\s+(.*)', cmd, re.DOTALL)
        if m:
            cmd = m.group(1).strip()
        else:
            break

    if not cmd:
        return None

    # Take the first token
    parts = cmd.split()
    if not parts:
        return None

    token = parts[0]

    # Skip shell constructs that aren't real commands
    if token in ('if', 'then', 'else', 'fi', 'for', 'while', 'do', 'done', 'case', 'esac', '[', '[['):
        return None

    # Handle transparent wrappers
    if token in ('sudo', 'env', 'time', 'nohup', 'nice', 'stdbuf', 'unbuffered') and len(parts) > 1:
        token = parts[1]
        parts = parts[1:]

    # Get potential subcommand
    sub = parts[1] if len(parts) > 1 else None

    # Commands where we want cmd + subcommand
    compound = {
        'git', 'npm', 'npx', 'vercel', 'gh', 'pip', 'pip3', 'pip3.11',
        'brew', 'docker', 'kubectl', 'pm2', 'systemctl', 'service',
        'uv', 'pnpm', 'yarn', 'cargo', 'go', 'terraform', 'aws',
        'heroku', 'fly', 'wrangler', 'railway'
    }

    if token in compound and sub and not sub.startswith('-') and not sub.startswith('/'):
        return f"{token} {sub}"

    # Python: show script name
    if token in ('python3', 'python', 'python3.11', 'python3.12') and sub:
        if sub.startswith('-'):
            return 'python3'
        # Look for tools/ pattern
        full_cmd = cmd
        m = re.search(r'tools/([^\s/"]+\.py)', full_cmd)
        if m:
            return f"python3 tools/{m.group(1)}"
        if sub.endswith('.py'):
            return f"python3 {os.path.basename(sub)}"
        return 'python3'

    # node: show script
    if token == 'node' and sub and not sub.startswith('-'):
        ext = os.path.splitext(sub)[1]
        if ext in ('.js', '.mjs', '.cjs', '.ts'):
            return f"node {os.path.basename(sub)}"

    # tsx/ts-node: show script
    if token in ('tsx', 'ts-node', 'bun') and sub and not sub.startswith('-'):
        ext = os.path.splitext(sub)[1]
        if ext in ('.ts', '.js', '.mjs'):
            return f"{token} {os.path.basename(sub)}"

    return token

NON_MCP = {
    'Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash',
    'WebFetch', 'WebSearch', 'TodoWrite', 'ToolSearch',
    'Skill', 'ScheduleWakeup', 'Monitor', 'NotebookEdit',
    'PushNotification', 'RemoteTrigger', 'CronCreate',
    'CronDelete', 'CronList', 'EnterWorktree', 'ExitWorktree',
    'ListMcpResourcesTool', 'ReadMcpResourceTool', 'Agent',
    'computer', 'str_replace_editor', 'str_replace_based_edit_tool',
    'task'
}

errors = 0
total_lines = 0
bash_calls = 0
mcp_calls = 0

for filepath in top_files:
    try:
        with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
            for line in f:
                total_lines += 1
                line = line.strip()
                if not line:
                    continue
                try:
                    entry = json.loads(line)
                except json.JSONDecodeError:
                    errors += 1
                    continue

                # The JSONL format: each line has a "message" key with role/content
                msg = entry.get('message', {})
                if not isinstance(msg, dict):
                    continue
                if msg.get('role') != 'assistant':
                    continue

                content = msg.get('content', [])
                if not isinstance(content, list):
                    continue

                for block in content:
                    if not isinstance(block, dict):
                        continue
                    if block.get('type') != 'tool_use':
                        continue

                    tool_name = block.get('name', '')
                    inp = block.get('input', {})
                    if not isinstance(inp, dict):
                        inp = {}

                    if tool_name == 'Bash':
                        cmd = inp.get('command', '')
                        pattern = get_bash_pattern(cmd)
                        if pattern:
                            bash_patterns[pattern] += 1
                            bash_calls += 1
                    elif tool_name not in NON_MCP and tool_name:
                        mcp_tools[tool_name] += 1
                        mcp_calls += 1

    except Exception as e:
        print(f"Error: {filepath}: {e}", file=sys.stderr)

print(f"# Lines: {total_lines}, Errors: {errors}, Bash: {bash_calls}, MCP: {mcp_calls}", file=sys.stderr)

top_bash = sorted(bash_patterns.items(), key=lambda x: x[1], reverse=True)[:40]
top_mcp = sorted(mcp_tools.items(), key=lambda x: x[1], reverse=True)[:20]

result = {
    "bash": {k: v for k, v in top_bash},
    "mcp": {k: v for k, v in top_mcp}
}

print(json.dumps(result, indent=2))
