#!/usr/bin/env python3
"""
Analyze word counts in legal opinion sections to guide editing decisions.

Usage:
    python3 analyze_opinion.py <opinion_file.txt>
    
Or import and use programmatically:
    from analyze_opinion import analyze_opinion
    results = analyze_opinion(text)
"""

import sys
import re
from collections import defaultdict


def analyze_opinion(text):
    """
    Analyze a legal opinion's word count by section.
    
    Returns a dictionary with:
    - total_words: Total word count
    - sections: Dictionary of section names to word counts
    - bracketed_text: Word count of text in brackets
    - citations: Approximate count of citation text
    """
    results = {
        'total_words': 0,
        'sections': defaultdict(int),
        'bracketed_text': 0,
        'citations': 0,
        'footnotes': 0
    }
    
    # Count total words (simple whitespace split)
    words = text.split()
    results['total_words'] = len(words)
    
    # Count bracketed text
    bracketed = re.findall(r'\[([^\]]+)\]', text)
    results['bracketed_text'] = sum(len(b.split()) for b in bracketed)
    
    # Estimate citation density (simplified - looks for reporter citations)
    # Patterns like "123 U.S. 456" or "789 F.2d 101"
    citations = re.findall(r'\d+\s+[A-Z][a-z]*\.?\s*\d*[a-z]*\.?\s+\d+', text)
    results['citations'] = len(citations)
    
    # Count footnotes (text starting with number and period at line start)
    footnotes = re.findall(r'^\s*\d+\.\s+.+', text, re.MULTILINE)
    results['footnotes'] = len(footnotes)
    
    # Try to identify major sections by common headers
    section_patterns = {
        'facts': r'(?:FACTS?|BACKGROUND|Factual Background)',
        'procedural': r'(?:PROCEDURAL HISTORY|Procedural Background)',
        'discussion': r'(?:DISCUSSION|ANALYSIS|OPINION)',
        'conclusion': r'(?:CONCLUSION|DISPOSITION|JUDGMENT)',
        'dissent': r'(?:DISSENT|dissenting)',
        'concurrence': r'(?:CONCUR|concurring)'
    }
    
    for section_name, pattern in section_patterns.items():
        if re.search(pattern, text, re.IGNORECASE):
            results['sections'][section_name] = 1  # Mark as present
    
    return results


def print_analysis(results):
    """Print a formatted analysis report."""
    print("=" * 60)
    print("OPINION ANALYSIS")
    print("=" * 60)
    print(f"\nTotal words: {results['total_words']:,}")
    print(f"Bracketed text: {results['bracketed_text']:,} words ({results['bracketed_text']/results['total_words']*100:.1f}%)")
    print(f"Citation markers found: {results['citations']}")
    print(f"Footnotes detected: {results['footnotes']}")
    
    if results['sections']:
        print("\nSections detected:")
        for section, present in results['sections'].items():
            print(f"  - {section.title()}")
    
    print("\n" + "=" * 60)


def calculate_reduction(original_count, target_count):
    """Calculate reduction needed."""
    reduction = original_count - target_count
    percentage = (reduction / original_count) * 100
    print(f"\nTo reach {target_count:,} words from {original_count:,}:")
    print(f"  - Must cut: {reduction:,} words")
    print(f"  - Reduction: {percentage:.1f}%")


def suggest_cuts(original_count, target_count):
    """Suggest where to make cuts based on typical allocation."""
    reduction_needed = original_count - target_count
    
    print("\nSuggested allocation for edited case:")
    print(f"  Facts:              {int(target_count * 0.15):,} words (15%)")
    print(f"  Procedural History: {int(target_count * 0.05):,} words (5%)")
    print(f"  Legal Standard:     {int(target_count * 0.10):,} words (10%)")
    print(f"  Core Reasoning:     {int(target_count * 0.60):,} words (60%)")
    print(f"  Dissent (if any):   {int(target_count * 0.10):,} words (10%)")


def main():
    if len(sys.argv) < 2:
        print("Usage: python3 analyze_opinion.py <opinion_file.txt>")
        sys.exit(1)
    
    filename = sys.argv[1]
    
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            text = f.read()
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found")
        sys.exit(1)
    
    results = analyze_opinion(text)
    print_analysis(results)
    
    # If target word count provided as second argument
    if len(sys.argv) >= 3:
        try:
            target = int(sys.argv[2])
            calculate_reduction(results['total_words'], target)
            suggest_cuts(results['total_words'], target)
        except ValueError:
            print(f"\nWarning: '{sys.argv[2]}' is not a valid target word count")


if __name__ == '__main__':
    main()
