#!/usr/bin/env python3
"""Scan Claude Code session transcripts for Bash and MCP tool call patterns.
Reads files already found by glob and sorted by mtime."""

import os
import json
import re
from collections import defaultdict

# Files to scan - all JSONL files from -Users-bryce-FLSM project
# We'll sort by mtime and take top 50
import glob as globmod

base_dirs = [
    os.path.expanduser("~/.claude/projects/-Users-bryce-FLSM"),
]

all_files = []
for base in base_dirs:
    found = globmod.glob(f"{base}/**/*.jsonl", recursive=True)
    all_files.extend(found)

all_files.sort(key=lambda f: os.path.getmtime(f), reverse=True)
top_files = all_files[:50]

import sys
print(f"Total JSONL: {len(all_files)}, scanning top {len(top_files)}", file=sys.stderr)
for f in top_files[:5]:
    print(f"  {os.path.basename(f)} mtime={os.path.getmtime(f):.0f}", file=sys.stderr)

bash_patterns = defaultdict(int)
mcp_tools = defaultdict(int)

def get_pattern(cmd):
    if not cmd or not isinstance(cmd, str):
        return None
    cmd = cmd.strip()

    # Strip env var prefixes
    while re.match(r'^[A-Z_][A-Z0-9_]*=\S+\s+', cmd):
        cmd = re.sub(r'^[A-Z_][A-Z0-9_]*=\S+\s+', '', cmd).strip()

    if not cmd:
        return None

    parts = re.split(r'\s+', cmd)
    token = parts[0]

    # Handle wrappers
    if token in ('sudo', 'env', 'time', 'nohup', 'nice') and len(parts) > 1:
        token = parts[1]
        parts = parts[1:]

    subcommand = parts[1] if len(parts) > 1 else None

    compound_tools = {
        'git', 'npm', 'npx', 'vercel', 'gh', 'pip', 'pip3', 'brew',
        'docker', 'kubectl', 'pm2', 'systemctl', 'service', 'uv',
        'pnpm', 'yarn'
    }

    if token in compound_tools and subcommand and not subcommand.startswith('-'):
        return f"{token} {subcommand}"

    if token in ('python3', 'python') and subcommand:
        if subcommand.startswith('-'):
            return token
        if 'tools/' in subcommand:
            script = re.search(r'tools/([^/\s]+)', subcommand)
            if script:
                return f"python3 tools/{script.group(1)}"
        if subcommand.endswith('.py') or subcommand.endswith('.py"') :
            return f"python3 {os.path.basename(subcommand.strip('\"'))}"
        return token

    if token == 'node' and subcommand and not subcommand.startswith('-'):
        return f"node {os.path.basename(subcommand)}"

    return token

NON_MCP = {
    'Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash',
    'WebFetch', 'WebSearch', 'TodoWrite', 'ToolSearch',
    'Skill', 'ScheduleWakeup', 'Monitor', 'NotebookEdit',
    'PushNotification', 'RemoteTrigger', 'CronCreate',
    'CronDelete', 'CronList', 'EnterWorktree', 'ExitWorktree',
    'ListMcpResourcesTool', 'ReadMcpResourceTool', 'Agent',
    'computer', 'str_replace_editor', 'str_replace_based_edit_tool'
}

total_bash = 0
total_mcp = 0

for filepath in top_files:
    try:
        with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    entry = json.loads(line)
                except json.JSONDecodeError:
                    continue

                if entry.get('type') != 'message':
                    continue
                if entry.get('role') != 'assistant':
                    continue

                content = entry.get('content', [])
                if not isinstance(content, list):
                    continue

                for block in content:
                    if not isinstance(block, dict):
                        continue
                    if block.get('type') != 'tool_use':
                        continue

                    tool_name = block.get('name', '')
                    inp = block.get('input', {})

                    if tool_name == 'Bash':
                        cmd = inp.get('command', '')
                        pattern = get_pattern(cmd)
                        if pattern:
                            bash_patterns[pattern] += 1
                            total_bash += 1
                    elif tool_name not in NON_MCP:
                        mcp_tools[tool_name] += 1
                        total_mcp += 1

    except Exception as e:
        print(f"Error reading {filepath}: {e}", file=sys.stderr)

print(f"Total Bash calls: {total_bash}, MCP calls: {total_mcp}", file=sys.stderr)

top_bash = sorted(bash_patterns.items(), key=lambda x: x[1], reverse=True)[:40]
top_mcp = sorted(mcp_tools.items(), key=lambda x: x[1], reverse=True)[:20]

result = {
    "bash": {k: v for k, v in top_bash},
    "mcp": {k: v for k, v in top_mcp}
}

print(json.dumps(result, indent=2))
