"""
FLHIP Scraper v3 - Claude Processor
Single intelligent prompt that qualifies AND extracts in one call
"""

import anthropic
import json
import re
from datetime import datetime, timedelta
from config import ANTHROPIC_API_KEY, CLAUDE_MODEL, CLAUDE_MAX_TOKENS, CLAUDE_TEMPERATURE, US_STATES


# Initialize Anthropic client
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

# Current date for reference
CURRENT_DATE = datetime.now().strftime("%Y-%m-%d")


EXTRACTION_PROMPT = f"""You are a lead qualification system for FLHIP, a restaurant industry database. Your job is to identify FUTURE restaurant openings in the United States.

TODAY'S DATE: {CURRENT_DATE}

TASK: Analyze the article and determine if it contains a legitimate future restaurant opening lead.

=== AUTOMATIC FAIL (return null) ===
- Restaurant has ALREADY OPENED (past tense: "opened", "launched", "debuted")
- Article is a "best of" list, retrospective, or year-in-review
- Article is about closures, fires, accidents, or negative events
- Article is about holiday hours, brunch guides, or "where to eat"
- Article is about a non-US location
- Article is about hotels, retail, or non-restaurant businesses
- Article is corporate/financial news without a specific location opening
- Article mentions an opening date that is IN THE PAST
- Content is blocked/paywalled (captcha, "verify you are human", etc.)

=== PASS CRITERIA (ALL must be true) ===
1. Article describes a restaurant that WILL open in the future
2. Location is in the United States
3. At least one of these future signals:
   - Specific future opening date mentioned
   - "Coming soon", "set to open", "will open", "plans to open"
   - Construction/buildout underway
   - Hiring for new location
   - Permit/license approved for new restaurant

=== EXTRACTION RULES ===
- Business name: The actual restaurant name, NOT the publisher, NOT generic descriptions
- If multiple restaurants mentioned, extract the PRIMARY one (the main subject)
- Opening date: Use YYYY-MM-DD format. If "Spring 2026" use "2026-04-15". If "early 2026" use "2026-02-28". If "late 2026" use "2026-11-15". If unknown, use null.
- State: Use 2-letter code (TX, CA, NY, etc.)
- Phone: Format as found, or null
- Contact: Owner/chef/manager name if mentioned, or null

=== RESPONSE FORMAT ===
Return ONLY valid JSON. No explanation. No markdown.

If FAIL:
{{"status": "FAIL", "reason": "brief reason"}}

If PASS:
{{
  "status": "PASS",
  "confidence": "high" | "medium" | "low",
  "business_name": "Restaurant Name",
  "address": "123 Main St" | null,
  "city": "City Name",
  "state": "TX",
  "zip": "12345" | null,
  "opening_date": "YYYY-MM-DD" | null,
  "phone": "555-123-4567" | null,
  "contact_name": "John Smith" | null,
  "contact_email": "email@example.com" | null,
  "notes": "Brief description of the restaurant and opening details",
  "future_signals": ["signal1", "signal2"]
}}

=== ARTICLE TO ANALYZE ===
TITLE: {{title}}
SOURCE: {{source}}
ARTICLE DATE: {{article_date}}

CONTENT:
{{content}}
"""


def process_article(title: str, content: str, source_url: str, article_date: str) -> dict | None:
    """
    Process a single article through Claude.
    Returns extracted data dict if PASS, None if FAIL.
    """
    
    # Quick pre-checks before API call
    if not content or len(content.strip()) < 100:
        return None
    
    # Check for bot walls in content
    bot_wall_patterns = [
        "verifying you are human",
        "unusual traffic",
        "enable javascript",
        "captcha",
        "access denied",
        "subscribe to continue",
        "create an account"
    ]
    content_lower = content.lower()
    for pattern in bot_wall_patterns:
        if pattern in content_lower:
            return None
    
    # Build the prompt - use single braces for replacement
    prompt = EXTRACTION_PROMPT.replace("{title}", title or "")
    prompt = prompt.replace("{source}", source_url or "")
    prompt = prompt.replace("{article_date}", article_date or "")
    prompt = prompt.replace("{content}", content[:8000])
    
    try:
        response = client.messages.create(
            model=CLAUDE_MODEL,
            max_tokens=CLAUDE_MAX_TOKENS,
            temperature=CLAUDE_TEMPERATURE,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        # Extract response text
        response_text = response.content[0].text.strip()
        
        # Parse JSON response
        # Handle potential markdown code blocks
        if response_text.startswith("```"):
            response_text = re.sub(r"```json?\n?", "", response_text)
            response_text = response_text.replace("```", "").strip()
        
        result = json.loads(response_text)
        
        # Check status
        if result.get("status") != "PASS":
            return None
        
        # Validate required fields
        if not result.get("business_name"):
            return None
        if not result.get("city"):
            return None
        if not result.get("state"):
            return None
        
        # Validate state is US
        state = result.get("state", "").upper()
        if state not in US_STATES:
            return None
        
        # Validate opening date is in the future (if provided)
        opening_date = result.get("opening_date")
        if opening_date:
            try:
                open_dt = datetime.strptime(opening_date, "%Y-%m-%d")
                if open_dt < datetime.now():
                    return None  # Past date = FAIL
            except ValueError:
                result["opening_date"] = None  # Invalid format, clear it
        
        # Clean up the result
        result["state"] = state
        result["source_url"] = source_url
        result["article_title"] = title
        result["article_date"] = article_date
        result["processed_at"] = datetime.now().isoformat()
        
        return result
        
    except json.JSONDecodeError as e:
        print(f"JSON parse error: {e}")
        return None
    except anthropic.APIError as e:
        print(f"Claude API error: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None


def batch_process_articles(articles: list[dict]) -> list[dict]:
    """
    Process a batch of articles.
    articles: list of {"title": str, "content": str, "url": str, "date": str, "feed_name": str}
    Returns: list of extracted leads (only PASSed articles)
    """
    leads = []
    
    for i, article in enumerate(articles):
        print(f"Processing {i+1}/{len(articles)}: {article.get('title', '')[:60]}...")
        
        result = process_article(
            title=article.get("title", ""),
            content=article.get("content", ""),
            source_url=article.get("url", ""),
            article_date=article.get("date", "")
        )
        
        if result:
            # Pass through feed_name for tracking
            result["feed_name"] = article.get("feed_name", "")
            
            leads.append(result)
            print(f"  PASS: {result.get('business_name')} in {result.get('city')}, {result.get('state')}")
        else:
            print(f"  FAIL")
    
    return leads


# =============================================================================
# TEST
# =============================================================================
if __name__ == "__main__":
    # Test with a sample article
    test_article = {
        "title": "New Italian Restaurant Coming to Downtown Austin in Spring 2026",
        "content": """
        AUSTIN, TX - Chef Marco Rossi is bringing his acclaimed Italian cuisine to 
        downtown Austin with the opening of Rossi's Trattoria. The restaurant, located 
        at 123 Congress Avenue, is set to open in April 2026.
        
        The 5,000 square foot space will feature handmade pasta, wood-fired pizzas, 
        and an extensive wine list. Rossi, who trained in Bologna, has been planning 
        this expansion for two years.
        
        "Austin has such a vibrant food scene, and I can't wait to be part of it," 
        said Rossi. The restaurant is currently hiring for all positions.
        
        For more information, contact marco@rossistrattoria.com or call 512-555-0123.
        """,
        "url": "https://example.com/austin-restaurant-news/rossis-trattoria",
        "date": "2025-12-28",
        "feed_name": "WhatNow Austin"
    }
    
    print("Testing Claude processor...")
    print("=" * 60)
    
    result = process_article(
        title=test_article["title"],
        content=test_article["content"],
        source_url=test_article["url"],
        article_date=test_article["date"]
    )
    
    if result:
        print("\nExtracted Lead:")
        print(json.dumps(result, indent=2))
    else:
        print("\nNo lead extracted (FAIL)")
