"""
FLHIP Scraper v3 - Hotel Database Handler
Saves extracted hotel leads to MySQL - hotel_leads_usa
Updated: 2026-01-10 - Added archive table duplicate checking
"""

import mysql.connector
from mysql.connector import Error
from datetime import datetime
import json
from hotel_config import DB_CONFIG, RSS_FEEDS


TABLE_NAME = "hotel_leads_usa"
ARCHIVE_TABLE = "hotel_leads_usa_archive"

# Build feed URL lookup from config
FEED_URL_LOOKUP = {feed['name']: feed['url'] for feed in RSS_FEEDS}


def get_feed_url(feed_name: str) -> str:
    """Look up feed URL from feed name."""
    if not feed_name:
        return ''
    return FEED_URL_LOOKUP.get(feed_name, '')


def get_connection():
    """Get database connection."""
    try:
        conn = mysql.connector.connect(**DB_CONFIG)
        return conn
    except Error as e:
        print(f"Database connection error: {e}")
        return None


def check_duplicate(conn, url: str) -> bool:
    """Check if article URL already exists in current table."""
    cursor = conn.cursor()
    try:
        cursor.execute(
            f"SELECT id FROM {TABLE_NAME} WHERE info_source = %s LIMIT 1",
            (url,)
        )
        result = cursor.fetchone()
        return result is not None
    finally:
        cursor.close()


def check_archive_duplicate(conn, url: str) -> bool:
    """Check if article URL already exists in archive table."""
    cursor = conn.cursor()
    try:
        # Check if archive table exists first
        cursor.execute(f"SHOW TABLES LIKE '{ARCHIVE_TABLE}'")
        if not cursor.fetchone():
            return False
        
        cursor.execute(
            f"SELECT id FROM {ARCHIVE_TABLE} WHERE info_source = %s LIMIT 1",
            (url,)
        )
        result = cursor.fetchone()
        return result is not None
    except Error as e:
        print(f"Archive duplicate check error: {e}")
        return False
    finally:
        cursor.close()


def check_all_duplicates(conn, url: str) -> tuple[bool, str | None]:
    """
    Check if URL exists in current table OR archive.
    Returns (is_duplicate, location) where location is 'current', 'archive', or None.
    """
    if check_duplicate(conn, url):
        return True, 'current'
    if check_archive_duplicate(conn, url):
        return True, 'archive'
    return False, None


def sync_auto_increment(conn):
    """
    Ensure auto-increment starts after the max ID from both tables.
    This prevents ID collisions when archive has higher IDs.
    """
    cursor = conn.cursor()
    try:
        # Get max ID from current table
        cursor.execute(f"SELECT COALESCE(MAX(id), 0) FROM {TABLE_NAME}")
        current_max = cursor.fetchone()[0]
        
        # Check if archive table exists and get max ID
        cursor.execute(f"SHOW TABLES LIKE '{ARCHIVE_TABLE}'")
        archive_max = 0
        if cursor.fetchone():
            cursor.execute(f"SELECT COALESCE(MAX(id), 0) FROM {ARCHIVE_TABLE}")
            archive_max = cursor.fetchone()[0]
        
        # Set auto-increment to max + 1
        next_id = max(current_max, archive_max) + 1
        cursor.execute(f"ALTER TABLE {TABLE_NAME} AUTO_INCREMENT = {next_id}")
        conn.commit()
        
        print(f"[SYNC] Current table max ID: {current_max}")
        print(f"[SYNC] Archive table max ID: {archive_max}")
        print(f"[SYNC] Auto-increment set to: {next_id}")
        
    except Error as e:
        print(f"Auto-increment sync error: {e}")
    finally:
        cursor.close()


def insert_lead(conn, lead: dict) -> tuple[int | None, str | None]:
    """
    Insert a lead into the database. 
    Returns (inserted_id, None) on success, (None, 'duplicate_location') on duplicate, (None, 'error') on error.
    """
    
    source_url = lead.get("source_url", "")
    
    # Check both current and archive tables
    is_dup, location = check_all_duplicates(conn, source_url)
    if is_dup:
        print(f"  Duplicate ({location}): {lead.get('business_name')}")
        return None, location
    
    cursor = conn.cursor()
    
    try:
        sql = f"""
        INSERT INTO {TABLE_NAME} (
            rss_title, feed_name, info_source, internet_info, article_date,
            business, address, city, state, zip,
            opening_date, standardized_opening_date,
            phone, contact_name, contact_email,
            confidence, future_signals, notes,
            lead_stage, extraction_method
        ) VALUES (
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
        )
        """
        
        # Determine lead stage based on opening date
        opening_date = lead.get("opening_date")
        lead_stage = None
        if opening_date:
            try:
                open_dt = datetime.strptime(opening_date, "%Y-%m-%d")
                days_out = (open_dt - datetime.now()).days
                if days_out <= 30:
                    lead_stage = "LATE"
                elif days_out <= 90:
                    lead_stage = "MID"
                else:
                    lead_stage = "EARLY"
            except:
                lead_stage = "EARLY"
        
        # Convert future_signals list to JSON string
        future_signals = lead.get("future_signals", [])
        future_signals_json = json.dumps(future_signals) if future_signals else None
        
        # internet_info stores the human-readable feed name (e.g. "Google Alerts - Hotel Openings TX")
        feed_name = lead.get("feed_name", "")
        
        values = (
            lead.get("article_title", "")[:500],
            feed_name[:100] if feed_name else None,
            source_url[:500] if source_url else None,
            feed_name[:500] if feed_name else None,  # internet_info = feed name, not RSS URL
            lead.get("article_date"),
            lead.get("business_name", "")[:255],
            lead.get("address", "")[:255] if lead.get("address") else None,
            lead.get("city", "")[:100],
            lead.get("state", "")[:10],
            lead.get("zip", "")[:10] if lead.get("zip") else None,
            opening_date,
            opening_date,
            lead.get("phone", "")[:20] if lead.get("phone") else None,
            lead.get("contact_name", "")[:100] if lead.get("contact_name") else None,
            lead.get("contact_email", "")[:255] if lead.get("contact_email") else None,
            lead.get("confidence", "medium")[:20],
            future_signals_json,
            lead.get("notes", "")[:1000] if lead.get("notes") else None,
            lead_stage,
            "claude_v3_hotel"
        )
        
        cursor.execute(sql, values)
        conn.commit()
        
        inserted_id = cursor.lastrowid
        print(f"  Inserted ID {inserted_id}: {lead.get('business_name')}")
        return inserted_id, None
        
    except Error as e:
        print(f"  Insert error: {e}")
        conn.rollback()
        return None, 'error'
    finally:
        cursor.close()


def insert_leads_batch(leads: list[dict]) -> dict:
    """Insert multiple leads. Returns stats."""
    stats = {
        "total": len(leads),
        "inserted": 0,
        "duplicates": 0,
        "archive_duplicates": 0,
        "errors": 0,
        "inserted_ids": []
    }
    
    conn = get_connection()
    if not conn:
        stats["errors"] = len(leads)
        return stats
    
    try:
        # Sync auto-increment before inserting
        sync_auto_increment(conn)
        
        for lead in leads:
            result_id, result_type = insert_lead(conn, lead)
            if result_id:
                stats["inserted"] += 1
                stats["inserted_ids"].append(result_id)
            elif result_type == 'current':
                stats["duplicates"] += 1
            elif result_type == 'archive':
                stats["archive_duplicates"] += 1
            else:
                stats["errors"] += 1
    finally:
        conn.close()
    
    return stats


def get_recent_leads(days: int = 7) -> list[dict]:
    """Get leads inserted in the last N days."""
    conn = get_connection()
    if not conn:
        return []
    
    cursor = conn.cursor(dictionary=True)
    try:
        sql = f"""
        SELECT * FROM {TABLE_NAME}
        WHERE created_at >= DATE_SUB(NOW(), INTERVAL %s DAY)
        ORDER BY created_at DESC
        """
        cursor.execute(sql, (days,))
        return cursor.fetchall()
    finally:
        cursor.close()
        conn.close()


def get_lead_count() -> int:
    """Get total lead count."""
    conn = get_connection()
    if not conn:
        return 0
    cursor = conn.cursor()
    try:
        cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
        return cursor.fetchone()[0]
    finally:
        cursor.close()
        conn.close()


if __name__ == "__main__":
    print(f"Testing connection to {TABLE_NAME}...")
    conn = get_connection()
    if conn:
        print("Connected successfully")
        
        # Test sync
        print("\nTesting auto-increment sync...")
        sync_auto_increment(conn)
        
        cursor = conn.cursor()
        cursor.execute(f"DESCRIBE {TABLE_NAME}")
        print(f"Table has {len(cursor.fetchall())} columns")
        cursor.close()
        conn.close()
    
    print(f"\nCurrent hotel leads: {get_lead_count()}")
