#!/usr/bin/env python3
"""
Simplified cBridge Data Collector

Fetches callwatch data from cBridge servers and stores basic parsed data
in PostgreSQL. Configuration is loaded from JSON files.

2021-03-21 KB1B nedecn@kb1b.org
"""

import argparse
import json
import logging
import re
import sys
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import psycopg2
from psycopg2.extras import execute_batch
import requests

DEFAULT_CONFIG_FILE = "cbridge_config.json"
DEFAULT_DB_CONFIG_FILE = "db_config.json"
DEFAULT_TIMEOUT = 15
DEFAULT_BATCH_SIZE = 500

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger(__name__)


@dataclass
class CallRecord:
    """Simplified call record with basic fields only."""
    epoch: int
    date: str          # YYYY-MM-DD
    time: str          # HH:MM:SS
    secs: int          # duration in seconds
    bridge: str        # bridge name
    source: str        # source radio alias (unparsed)
    talk: str          # destination bridge group (unparsed) - renamed from 'group'
    rssi: Optional[int]
    site: Optional[str]
    loss: Optional[float]
    
    def to_dict(self) -> Dict:
        return {
            'epoch': self.epoch,
            'date': self.date,
            'time': self.time,
            'secs': self.secs,
            'bridge': self.bridge,
            'source': self.source,
            'talk': self.talk,
            'rssi': self.rssi,
            'site': self.site,
            'loss': self.loss,
        }


class Config:
    """Loads configuration from JSON files."""
    
    def __init__(self, cbridge_config_file: Path, db_config_file: Path):
        self.bridges = self._load_bridges(cbridge_config_file)
        self.db_params = self._load_db_config(db_config_file)
    
    def _load_bridges(self, config_file: Path) -> List[Dict]:
        """Load cBridge configuration as list."""
        if not config_file.exists():
            logger.error(f"cBridge config file not found: {config_file}")
            sys.exit(1)
        
        try:
            with open(config_file, 'r') as f:
                config = json.load(f)
            logger.info(f"Loaded cBridge config from {config_file}")
            
            bridges = config.get('bridges', [])
            if not bridges:
                logger.error("No bridges defined in config")
                sys.exit(1)
            
            return bridges
        except (json.JSONDecodeError, IOError) as e:
            logger.error(f"Failed to load cBridge config: {e}")
            sys.exit(1)
    
    def _load_db_config(self, config_file: Path) -> Dict:
        """Load database configuration."""
        if not config_file.exists():
            logger.error(f"Database config file not found: {config_file}")
            sys.exit(1)
        
        try:
            with open(config_file, 'r') as f:
                config = json.load(f)
            logger.info(f"Loaded database config from {config_file}")
            
            # Build connection params
            return {
                'host': config.get('host', 'localhost'),
                'dbname': config.get('database', config.get('dbname', 'cbridge')),
                'user': config.get('user', 'postgres'),
                'password': config.get('password', ''),
                'port': config.get('port', 5432),
                'table': config.get('table', 'cbridge_data'),
            }
        except (json.JSONDecodeError, IOError) as e:
            logger.error(f"Failed to load database config: {e}")
            sys.exit(1)


def parse_timestamp(ts_str: str) -> Tuple[Optional[datetime], Optional[int], Optional[str], Optional[str]]:
    """Parse cBridge timestamp and return datetime, epoch, date, time."""
    if not ts_str:
        return None, None, None, None
    
    try:
        # Remove fractional seconds: "09:26:39.7 Jan 31" -> "09:26:39 Jan 31"
        ts_clean = re.sub(r'\.\d+', '', ts_str)
        current_year = datetime.now().year
        dt_local = datetime.strptime(f"{ts_clean} {current_year}", "%H:%M:%S %b %d %Y")
        
        # If timestamp is in the future, it's from last year
        if dt_local > datetime.now():
            dt_local = datetime.strptime(f"{ts_clean} {current_year - 1}", "%H:%M:%S %b %d %Y")
        
        epoch = int(dt_local.timestamp())
        date_str = dt_local.strftime("%Y-%m-%d")
        time_str = dt_local.strftime("%H:%M:%S")
        
        return dt_local, epoch, date_str, time_str
    except ValueError as e:
        logger.debug(f"Failed to parse timestamp '{ts_str}': {e}")
        return None, None, None, None


def fetch_callwatch_data(bridge_name: str, url: str, timeout: int = DEFAULT_TIMEOUT) -> str:
    """Fetch raw callwatch data from cBridge URL."""
    logger.info(f"Fetching from {bridge_name}: {url}")
    
    try:
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()
        
        parts = response.text.count('\x0b')
        logger.info(f"  ✓ {bridge_name}: {len(response.text)} bytes, {parts} parts")
        
        return response.text
    except requests.RequestException as e:
        logger.error(f"  ✗ {bridge_name}: {e}")
        raise


def parse_callwatch_data(bridge_name: str, data: str) -> List[CallRecord]:
    """
    Parse callwatch data into simplified records.
    
    Format: Vertical-tab (\\x0b) separated fields.
    Each record starts with TAB + timestamp, followed by 7 fields:
    - Duration
    - Source Peer Alias (repeater) - UNPARSED
    - Source Radio Alias (caller) - UNPARSED
    - Dest Bridge Group (talkgroup) - UNPARSED
    - RSSI
    - Site Name
    - Loss Rate
    """
    records = []
    parts = data.split('\x0b')
    
    logger.info(f"Parsing {len(parts)} parts from {bridge_name}")
    
    i = 0
    while i < len(parts):
        part = parts[i].strip()
        
        # Look for timestamp (contains TAB and time pattern)
        if '\t' in part and re.search(r'\d{2}:\d{2}:\d{2}', part):
            timestamp_str = part.split('\t')[-1].strip()
            
            # Need 7 more fields
            if i + 7 >= len(parts):
                break
            
            duration_str = parts[i+1].strip()
            peer_alias = parts[i+2].strip()      # Source peer (repeater) - keep as-is
            radio_alias = parts[i+3].strip()     # Source radio (caller) - keep as-is
            talkgroup = parts[i+4].strip()       # Dest bridge group - keep as-is
            rssi_str = parts[i+5].strip()
            site_name = parts[i+6].strip()
            loss_str = parts[i+7].strip()
            
            try:
                # Parse timestamp
                dt_local, epoch, date_str, time_str = parse_timestamp(timestamp_str)
                if not epoch:
                    dt_local = datetime.now()
                    epoch = int(dt_local.timestamp())
                    date_str = dt_local.strftime("%Y-%m-%d")
                    time_str = dt_local.strftime("%H:%M:%S.%f")[:11]  # HH:MM:SS.SS
                else:
                    # Limit time to hundredths of a second (HH:MM:SS.SS = 11 chars)
                    if '.' not in time_str:
                        time_str = time_str + ".00"
                    elif len(time_str) < 11:
                        # Pad to hundredths
                        time_str = time_str.ljust(11, '0')
                    elif len(time_str) > 11:
                        # Truncate to hundredths
                        time_str = time_str[:11]
                
                # Parse duration
                secs = 0
                if duration_str:
                    try:
                        secs = int(float(duration_str))
                    except (ValueError, TypeError):
                        pass
                
                # Parse RSSI
                rssi = None
                if rssi_str and rssi_str not in ['N/A', ' ', '']:
                    try:
                        rssi = int(float(rssi_str))
                    except (ValueError, TypeError):
                        pass
                
                # Parse loss rate - handle TAB-separated next record
                loss = None
                if loss_str and loss_str.strip():
                    try:
                        # Loss field may have next record's timestamp appended: "0.0%\t12:05:30.4 Feb 1"
                        # Split on TAB and take only the first part
                        loss_parts = loss_str.split('\t')
                        loss_value = loss_parts[0].strip()
                        
                        # Remove % and whitespace
                        loss_clean = loss_value.replace('%', '').strip()
                        if loss_clean and loss_clean not in ['N/A', ' ', '', 'Not avail.']:
                            loss = float(loss_clean)
                    except (ValueError, TypeError) as e:
                        logger.debug(f"Could not parse loss: '{loss_str}' - {e}")
                
                # Clean up empty strings
                if not site_name or site_name in ['N/A', ' ', '']:
                    site_name = None
                if not talkgroup or talkgroup in ['N/A', ' ', '']:
                    talkgroup = None
                
                # Create record
                record = CallRecord(
                    epoch=epoch,
                    date=date_str,
                    time=time_str,
                    secs=secs,
                    bridge=bridge_name,
                    source=radio_alias if radio_alias else None,
                    talk=talkgroup,
                    rssi=rssi,
                    site=site_name,
                    loss=loss,
                )
                
                records.append(record)
                
                if logger.isEnabledFor(logging.DEBUG) and len(records) <= 5:
                    logger.debug(f"  Record {len(records)}: {date_str} {time_str}, dur={secs}s")
                
            except Exception as e:
                logger.debug(f"Parse error at part {i}: {e}")
            
            i += 8  # Move to next record
        else:
            i += 1
    
    logger.info(f"  Parsed {len(records)} valid records from {bridge_name}")
    return records


def create_table_if_not_exists(conn, table_name: str) -> None:
    """Create the database table if it doesn't exist."""
    create_sql = f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            epoch INTEGER NOT NULL,
            date VARCHAR(10) NOT NULL,
            time VARCHAR(11) NOT NULL,
            secs INTEGER,
            bridge VARCHAR(20) NOT NULL,
            source TEXT,
            talk VARCHAR(100),
            rssi INTEGER,
            site VARCHAR(100),
            loss REAL,
            PRIMARY KEY (epoch, bridge, source)
        );
        
        CREATE INDEX IF NOT EXISTS idx_{table_name}_bridge ON {table_name}(bridge);
        CREATE INDEX IF NOT EXISTS idx_{table_name}_date ON {table_name}(date DESC);
        CREATE INDEX IF NOT EXISTS idx_{table_name}_epoch ON {table_name}(epoch DESC);
    """
    
    with conn.cursor() as cur:
        cur.execute(create_sql)
    conn.commit()
    logger.info(f"Table '{table_name}' verified/created")


def insert_records(conn, table_name: str, records: List[CallRecord], 
                   batch_size: int = DEFAULT_BATCH_SIZE) -> int:
    """Insert records into database with upsert logic."""
    if not records:
        logger.info("No records to insert")
        return 0
    
    insert_sql = f"""
        INSERT INTO {table_name} (
            epoch, date, time, secs, bridge, source, talk, rssi, site, loss
        )
        VALUES (
            %(epoch)s, %(date)s, %(time)s, %(secs)s, %(bridge)s, 
            %(source)s, %(talk)s, %(rssi)s, %(site)s, %(loss)s
        )
        ON CONFLICT (epoch, bridge, source) 
        DO UPDATE SET
            secs = EXCLUDED.secs,
            rssi = EXCLUDED.rssi,
            loss = EXCLUDED.loss
    """
    
    rows = [record.to_dict() for record in records]
    logger.info(f"Inserting {len(rows)} records into '{table_name}'...")
    
    try:
        with conn.cursor() as cur:
            execute_batch(cur, insert_sql, rows, page_size=batch_size)
        conn.commit()
        logger.info(f"  ✓ Inserted/updated {len(rows)} records")
        return len(rows)
    except psycopg2.Error as e:
        conn.rollback()
        logger.error(f"  ✗ Insert failed: {e}")
        raise


def main():
    parser = argparse.ArgumentParser(
        description="Simplified cBridge Data Collector",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s --create-table                          # Create database table
  %(prog)s --bridges NewEng-TRBO-MA NewEng-TRBO-CT # Fetch specific bridges
  %(prog)s --verbose                               # Show debug output
  %(prog)s --dry-run                               # Test without inserting to DB
  
Config Files:
  cbridge_config.json - Bridge definitions (array of {name, url, description})
  db_config.json      - Database connection parameters
        """
    )
    
    parser.add_argument('--cbridge-config', type=Path, default=Path(DEFAULT_CONFIG_FILE),
                       help=f'cBridge config file (default: {DEFAULT_CONFIG_FILE})')
    parser.add_argument('--db-config', type=Path, default=Path(DEFAULT_DB_CONFIG_FILE),
                       help=f'Database config file (default: {DEFAULT_DB_CONFIG_FILE})')
    parser.add_argument('--bridges', nargs='+', 
                       help='Specific bridges to query (default: all from config)')
    parser.add_argument('--timeout', type=int, default=DEFAULT_TIMEOUT,
                       help=f'HTTP timeout in seconds (default: {DEFAULT_TIMEOUT})')
    parser.add_argument('--batch-size', type=int, default=DEFAULT_BATCH_SIZE,
                       help=f'Database insert batch size (default: {DEFAULT_BATCH_SIZE})')
    parser.add_argument('--create-table', action='store_true',
                       help='Create database table if needed')
    parser.add_argument('--dry-run', action='store_true',
                       help='Parse data but do not insert into database')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Enable debug logging')
    
    args = parser.parse_args()
    
    if args.verbose:
        logger.setLevel(logging.DEBUG)
        logging.getLogger().setLevel(logging.DEBUG)
    
    try:
        # Load configuration
        config = Config(args.cbridge_config, args.db_config)
        
        # Select bridges to query
        if args.bridges:
            # Filter bridges by name
            bridge_names_to_query = set(args.bridges)
            bridges_to_query = [
                b for b in config.bridges 
                if b['name'] in bridge_names_to_query
            ]
            
            # Check if all requested bridges were found
            found_names = {b['name'] for b in bridges_to_query}
            missing = bridge_names_to_query - found_names
            if missing:
                logger.error(f"Unknown bridges: {', '.join(missing)}")
                available = [b['name'] for b in config.bridges]
                logger.info(f"Available bridges: {', '.join(available)}")
                return 1
        else:
            bridges_to_query = config.bridges
        
        bridge_names = [b['name'] for b in bridges_to_query]
        logger.info(f"Querying {len(bridges_to_query)} bridge(s): {', '.join(bridge_names)}")
        
        # Connect to database
        db_params = {k: v for k, v in config.db_params.items() if k != 'table'}
        table_name = config.db_params['table']
        
        with psycopg2.connect(**db_params) as conn:
            logger.info(f"Database connected: {config.db_params['dbname']}")
            
            if args.create_table:
                create_table_if_not_exists(conn, table_name)
            
            # Fetch and parse data from all bridges
            all_records = []
            for bridge in bridges_to_query:
                bridge_name = bridge['name']
                bridge_url = bridge['url']
                
                try:
                    data = fetch_callwatch_data(bridge_name, bridge_url, timeout=args.timeout)
                    records = parse_callwatch_data(bridge_name, data)
                    all_records.extend(records)
                except Exception as e:
                    logger.error(f"Failed to process {bridge_name}: {e}")
                    if args.verbose:
                        import traceback
                        traceback.print_exc()
            
            logger.info(f"Total records parsed: {len(all_records)}")
            
            # Insert records
            if args.dry_run:
                logger.info("DRY RUN: Skipping database insert")
                total_inserted = 0
            else:
                total_inserted = insert_records(conn, table_name, all_records, 
                                               batch_size=args.batch_size)
        
        logger.info(f"Complete: {len(all_records)} fetched, {total_inserted} inserted")
        return 0
        
    except KeyboardInterrupt:
        logger.warning("Interrupted by user")
        return 130
    except Exception as e:
        logger.error(f"Failed: {e}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        return 1


if __name__ == "__main__":
    sys.exit(main())

