#!/usr/bin/env python3
"""Scan Claude Code session transcripts for Bash and MCP tool call patterns."""

import os
import glob
import json
import re
from collections import defaultdict

BASE = os.path.expanduser("~/.claude/projects")

# Find all JSONL files sorted by modification time (most recent first)
all_files = glob.glob(f"{BASE}/**/*.jsonl", recursive=True)
all_files.sort(key=lambda f: os.path.getmtime(f), reverse=True)
top_files = all_files[:50]

import sys
print(f"Found {len(all_files)} total JSONL files, scanning top {len(top_files)}", file=sys.stderr, flush=True)

bash_patterns = defaultdict(int)
mcp_tools = defaultdict(int)

def get_pattern(cmd):
    """Get a normalized pattern for counting."""
    if not cmd or not isinstance(cmd, str):
        return None

    cmd = cmd.strip()

    # Strip env var prefixes like KEY=val command
    while re.match(r'^[A-Z_][A-Z0-9_]*=\S+\s+', cmd):
        cmd = re.sub(r'^[A-Z_][A-Z0-9_]*=\S+\s+', '', cmd).strip()

    if not cmd:
        return None

    # Get the first "logical" command (before pipe, &&, ||, ;)
    # but keep full command for parsing
    parts = re.split(r'\s+', cmd)
    token = parts[0]

    # Handle wrappers
    if token in ('sudo', 'env', 'time', 'nohup', 'nice') and len(parts) > 1:
        token = parts[1]
        parts = parts[1:]

    # Get subcommand if applicable
    subcommand = parts[1] if len(parts) > 1 else None

    # Tools where subcommand matters
    compound_tools = {
        'git', 'npm', 'npx', 'vercel', 'gh', 'pip', 'pip3', 'brew',
        'docker', 'kubectl', 'pm2', 'systemctl', 'service', 'uv'
    }

    if token in compound_tools and subcommand and not subcommand.startswith('-'):
        return f"{token} {subcommand}"

    # Python: include script path
    if token in ('python3', 'python') and subcommand:
        if subcommand.startswith('-'):
            return token
        # Normalize path - extract meaningful part
        if 'tools/' in subcommand:
            script = re.search(r'tools/([^/\s]+\.py)', subcommand)
            if script:
                return f"python3 tools/{script.group(1)}"
        if subcommand.endswith('.py'):
            return f"python3 {os.path.basename(subcommand)}"
        return token

    # node: include script
    if token == 'node' and subcommand and not subcommand.startswith('-'):
        if subcommand.endswith('.js') or subcommand.endswith('.mjs'):
            return f"node {os.path.basename(subcommand)}"

    return token

NON_MCP = {
    'Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash',
    'WebFetch', 'WebSearch', 'TodoWrite', 'ToolSearch',
    'Skill', 'ScheduleWakeup', 'Monitor', 'NotebookEdit',
    'PushNotification', 'RemoteTrigger', 'CronCreate',
    'CronDelete', 'CronList', 'EnterWorktree', 'ExitWorktree',
    'ListMcpResourcesTool', 'ReadMcpResourceTool', 'Agent'
}

for filepath in top_files:
    try:
        with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    entry = json.loads(line)
                except json.JSONDecodeError:
                    continue

                # Look for tool use in assistant messages
                msg_type = entry.get('type')
                if msg_type != 'message':
                    continue
                if entry.get('role') != 'assistant':
                    continue

                content = entry.get('content', [])
                if not isinstance(content, list):
                    continue

                for block in content:
                    if not isinstance(block, dict):
                        continue
                    if block.get('type') != 'tool_use':
                        continue

                    tool_name = block.get('name', '')
                    inp = block.get('input', {})

                    if tool_name == 'Bash':
                        cmd = inp.get('command', '')
                        pattern = get_pattern(cmd)
                        if pattern:
                            bash_patterns[pattern] += 1
                    elif tool_name not in NON_MCP:
                        # Likely an MCP tool
                        mcp_tools[tool_name] += 1

    except Exception as e:
        print(f"Error reading {filepath}: {e}", file=sys.stderr, flush=True)

# Sort and get top results
top_bash = sorted(bash_patterns.items(), key=lambda x: x[1], reverse=True)[:40]
top_mcp = sorted(mcp_tools.items(), key=lambda x: x[1], reverse=True)[:20]

result = {
    "bash": {k: v for k, v in top_bash},
    "mcp": {k: v for k, v in top_mcp}
}

print(json.dumps(result, indent=2))
