#!/usr/bin/env python3
"""
FLHIP Scraper v3 - Hotel Main Orchestrator
Run this after restaurant scraper on Saturday mornings via cron

Usage:
    python hotel_main.py              # Full run: fetch, process, save, email
    python hotel_main.py --no-email   # Skip email send
    python hotel_main.py --dry-run    # Process but don't save to DB
"""

import sys
import argparse
from datetime import datetime

from hotel_config import MAX_ARTICLES_PER_RUN, ANTHROPIC_API_KEY
from hotel_rss_fetcher import fetch_and_scrape_all
from hotel_claude_processor import batch_process_articles
from hotel_db_handler import insert_leads_batch, get_recent_leads
from hotel_email_sender import send_lead_email


def main():
    # Parse arguments
    parser = argparse.ArgumentParser(description="FLHIP Hotel Lead Scraper v3")
    parser.add_argument("--no-email", action="store_true", help="Skip email send")
    parser.add_argument("--dry-run", action="store_true", help="Process but don't save to DB")
    parser.add_argument("--limit", type=int, default=MAX_ARTICLES_PER_RUN, help="Max articles to process")
    args = parser.parse_args()
    
    start_time = datetime.now()
    
    print("=" * 70)
    print("FLHIP HOTEL SCRAPER v3 - Claude API Edition")
    print(f"Started: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 70)
    
    # Validate API key
    if not ANTHROPIC_API_KEY:
        print("\n ERROR: ANTHROPIC_API_KEY not set!")
        print("Set it via environment variable:")
        print("  export ANTHROPIC_API_KEY='your-key-here'")
        sys.exit(1)
    
    # =========================================================================
    # STEP 1: Fetch and scrape articles
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 1: Fetching Hotel RSS feeds and scraping articles")
    print("=" * 70)
    
    articles = fetch_and_scrape_all()
    
    # Limit if needed
    if len(articles) > args.limit:
        print(f"\nLimiting to {args.limit} articles (from {len(articles)})")
        articles = articles[:args.limit]
    
    if not articles:
        print("\n No hotel articles to process. Check RSS feeds.")
        sys.exit(0)
    
    # =========================================================================
    # STEP 2: Process through Claude
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 2: Processing hotel articles through Claude API")
    print("=" * 70)
    
    leads = batch_process_articles(articles)
    
    print(f"\n Claude Results:")
    print(f"   Articles processed: {len(articles)}")
    print(f"   Hotel leads extracted: {len(leads)}")
    print(f"   Pass rate: {len(leads)/len(articles)*100:.1f}%")
    
    if not leads:
        print("\n No hotel leads extracted. Review Claude processing.")
        
        # Still send email to report zero leads
        if not args.no_email:
            stats = {"total": len(articles), "inserted": 0, "duplicates": 0, "errors": 0}
            send_lead_email([], stats)
        
        sys.exit(0)
    
    # =========================================================================
    # STEP 3: Save to database
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 3: Saving hotel leads to database")
    print("=" * 70)
    
    if args.dry_run:
        print("\n DRY RUN - Skipping database insert")
        print("\nHotel leads that would be inserted:")
        for lead in leads:
            print(f"   - {lead.get('business_name')} - {lead.get('city')}, {lead.get('state')}")
        stats = {"total": len(articles), "inserted": len(leads), "duplicates": 0, "errors": 0}
    else:
        stats = insert_leads_batch(leads)
        print(f"\n Database Results:")
        print(f"   Inserted: {stats['inserted']}")
        print(f"   Duplicates: {stats['duplicates']}")
        print(f"   Errors: {stats['errors']}")
    
    # Update stats with total
    stats["total"] = len(articles)
    
    # =========================================================================
    # STEP 4: Send email
    # =========================================================================
    print("\n" + "=" * 70)
    print("STEP 4: Sending hotel email report")
    print("=" * 70)
    
    if args.no_email:
        print("\n Skipping email (--no-email flag)")
    elif args.dry_run:
        print("\n Skipping email (--dry-run flag)")
    else:
        # Get all leads inserted in this run
        send_lead_email(leads, stats)
    
    # =========================================================================
    # SUMMARY
    # =========================================================================
    end_time = datetime.now()
    duration = (end_time - start_time).total_seconds()
    
    print("\n" + "=" * 70)
    print("HOTEL SCRAPER COMPLETE")
    print("=" * 70)
    print(f"Duration: {duration:.1f} seconds")
    print(f"Articles processed: {len(articles)}")
    print(f"Hotel leads found: {len(leads)}")
    print(f"Hotel leads saved: {stats.get('inserted', 0)}")
    print(f"Quality rate: {len(leads)/len(articles)*100:.1f}%")
    print("=" * 70)
    
    # Return exit code based on success
    if stats.get("inserted", 0) > 0 or args.dry_run:
        sys.exit(0)
    else:
        sys.exit(1)


if __name__ == "__main__":
    main()
