ConsistentlyInconsistentYT-.../tests/test_data/ground_truth_generator.py

"""
Ground Truth Data Generator
Generates ground truth annotations for validation and accuracy testing
"""

import numpy as np
import json
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass, asdict
import logging

logger = logging.getLogger(__name__)


@dataclass
class GroundTruthDetection:
    """Ground truth detection annotation"""
    frame_id: int
    target_id: int
    position_2d: Tuple[float, float]  # pixel coordinates
    position_3d: Tuple[float, float, float]  # world coordinates
    bounding_box: Tuple[float, float, float, float]  # x, y, width, height
    velocity_2d: Tuple[float, float]  # pixel velocity
    velocity_3d: Tuple[float, float, float]  # world velocity
    distance_m: float
    visible: bool
    occluded: bool
    truncated: bool
    confidence: float = 1.0  # Ground truth confidence


@dataclass
class GroundTruthFrame:
    """Ground truth annotations for a single frame"""
    frame_id: int
    timestamp: float
    camera_id: int
    detections: List[GroundTruthDetection]
    metadata: Dict


class GroundTruthGenerator:
    """Generate ground truth annotations from simulated data"""

    def __init__(self, frame_width: int = 7680, frame_height: int = 4320):
        """
        Initialize ground truth generator

        Args:
            frame_width: Frame width in pixels
            frame_height: Frame height in pixels
        """
        self.frame_width = frame_width
        self.frame_height = frame_height

    def generate_from_trajectories(
        self,
        trajectories: Dict,
        projection_func,
        num_frames: int,
        camera_id: int = 0
    ) -> List[GroundTruthFrame]:
        """
        Generate ground truth from trajectory data

        Args:
            trajectories: Dictionary of drone trajectories
            projection_func: Function to project 3D to 2D coordinates
            num_frames: Number of frames to generate
            camera_id: Camera ID for annotations

        Returns:
            List of ground truth frames
        """
        ground_truth_frames = []

        for frame_id in range(num_frames):
            detections = []

            for drone_id, trajectory in trajectories.items():
                if frame_id < len(trajectory.points):
                    point = trajectory.points[frame_id]

                    # Project 3D position to 2D
                    pixel_x, pixel_y, distance = projection_func(point.position)

                    if pixel_x is not None and pixel_y is not None:
                        # Check visibility
                        visible = (
                            0 <= pixel_x < self.frame_width and
                            0 <= pixel_y < self.frame_height and
                            distance > 0
                        )

                        # Determine occlusion (simplified)
                        occluded = False

                        # Determine truncation
                        truncated = (
                            pixel_x < 10 or pixel_x > self.frame_width - 10 or
                            pixel_y < 10 or pixel_y > self.frame_height - 10
                        )

                        # Calculate bounding box (simplified)
                        # In reality, this would depend on drone size and distance
                        pixel_size = max(5, 100 / max(distance / 100, 1))
                        bbox = (
                            pixel_x - pixel_size / 2,
                            pixel_y - pixel_size / 2,
                            pixel_size,
                            pixel_size
                        )

                        detection = GroundTruthDetection(
                            frame_id=frame_id,
                            target_id=drone_id,
                            position_2d=(pixel_x, pixel_y),
                            position_3d=point.position,
                            bounding_box=bbox,
                            velocity_2d=(0, 0),  # Would need to calculate from consecutive frames
                            velocity_3d=point.velocity,
                            distance_m=distance,
                            visible=visible,
                            occluded=occluded,
                            truncated=truncated
                        )

                        detections.append(detection)

            gt_frame = GroundTruthFrame(
                frame_id=frame_id,
                timestamp=frame_id / 30.0,  # Assuming 30 FPS
                camera_id=camera_id,
                detections=detections,
                metadata={'num_targets': len(detections)}
            )

            ground_truth_frames.append(gt_frame)

        logger.info(f"Generated ground truth for {num_frames} frames")
        return ground_truth_frames

    def calculate_detection_metrics(
        self,
        ground_truth: GroundTruthFrame,
        predicted_detections: List[Dict],
        iou_threshold: float = 0.5,
        distance_threshold: float = 50.0
    ) -> Dict:
        """
        Calculate detection metrics by comparing predictions to ground truth

        Args:
            ground_truth: Ground truth frame
            predicted_detections: Predicted detections
            iou_threshold: IoU threshold for matching
            distance_threshold: Distance threshold for matching (pixels)

        Returns:
            Metrics dictionary
        """
        gt_visible = [d for d in ground_truth.detections if d.visible and not d.occluded]

        true_positives = 0
        false_positives = 0
        false_negatives = 0
        matched_distances = []

        matched_gt = set()

        # Match predictions to ground truth
        for pred in predicted_detections:
            pred_x = pred.get('x', pred.get('pixel_x'))
            pred_y = pred.get('y', pred.get('pixel_y'))

            best_match = None
            best_distance = float('inf')

            for i, gt_det in enumerate(gt_visible):
                if i in matched_gt:
                    continue

                gt_x, gt_y = gt_det.position_2d
                distance = np.sqrt((pred_x - gt_x)**2 + (pred_y - gt_y)**2)

                if distance < distance_threshold and distance < best_distance:
                    best_distance = distance
                    best_match = i

            if best_match is not None:
                true_positives += 1
                matched_gt.add(best_match)
                matched_distances.append(best_distance)
            else:
                false_positives += 1

        false_negatives = len(gt_visible) - true_positives

        # Calculate metrics
        precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
        recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        avg_distance_error = np.mean(matched_distances) if matched_distances else 0

        return {
            'true_positives': true_positives,
            'false_positives': false_positives,
            'false_negatives': false_negatives,
            'precision': precision,
            'recall': recall,
            'f1_score': f1_score,
            'avg_distance_error': avg_distance_error,
            'num_ground_truth': len(gt_visible),
            'num_predictions': len(predicted_detections)
        }

    def save_ground_truth(self, frames: List[GroundTruthFrame], filename: str):
        """Save ground truth to JSON file"""
        data = {
            'metadata': {
                'num_frames': len(frames),
                'frame_width': self.frame_width,
                'frame_height': self.frame_height
            },
            'frames': []
        }

        for frame in frames:
            frame_data = {
                'frame_id': frame.frame_id,
                'timestamp': frame.timestamp,
                'camera_id': frame.camera_id,
                'metadata': frame.metadata,
                'detections': [asdict(det) for det in frame.detections]
            }
            data['frames'].append(frame_data)

        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)

        logger.info(f"Saved ground truth to {filename}")

    def load_ground_truth(self, filename: str) -> List[GroundTruthFrame]:
        """Load ground truth from JSON file"""
        with open(filename, 'r') as f:
            data = json.load(f)

        frames = []
        for frame_data in data['frames']:
            detections = [
                GroundTruthDetection(
                    frame_id=det['frame_id'],
                    target_id=det['target_id'],
                    position_2d=tuple(det['position_2d']),
                    position_3d=tuple(det['position_3d']),
                    bounding_box=tuple(det['bounding_box']),
                    velocity_2d=tuple(det['velocity_2d']),
                    velocity_3d=tuple(det['velocity_3d']),
                    distance_m=det['distance_m'],
                    visible=det['visible'],
                    occluded=det['occluded'],
                    truncated=det['truncated'],
                    confidence=det.get('confidence', 1.0)
                )
                for det in frame_data['detections']
            ]

            frame = GroundTruthFrame(
                frame_id=frame_data['frame_id'],
                timestamp=frame_data['timestamp'],
                camera_id=frame_data['camera_id'],
                detections=detections,
                metadata=frame_data['metadata']
            )
            frames.append(frame)

        logger.info(f"Loaded {len(frames)} ground truth frames from {filename}")
        return frames

    def generate_validation_report(
        self,
        ground_truth_frames: List[GroundTruthFrame],
        all_predictions: List[List[Dict]]
    ) -> Dict:
        """
        Generate comprehensive validation report

        Args:
            ground_truth_frames: List of ground truth frames
            all_predictions: List of predictions for each frame

        Returns:
            Validation report dictionary
        """
        all_metrics = []

        for gt_frame, predictions in zip(ground_truth_frames, all_predictions):
            metrics = self.calculate_detection_metrics(gt_frame, predictions)
            all_metrics.append(metrics)

        # Aggregate metrics
        avg_precision = np.mean([m['precision'] for m in all_metrics])
        avg_recall = np.mean([m['recall'] for m in all_metrics])
        avg_f1 = np.mean([m['f1_score'] for m in all_metrics])

        total_tp = sum(m['true_positives'] for m in all_metrics)
        total_fp = sum(m['false_positives'] for m in all_metrics)
        total_fn = sum(m['false_negatives'] for m in all_metrics)

        overall_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
        overall_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
        overall_f1 = 2 * (overall_precision * overall_recall) / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0

        report = {
            'summary': {
                'num_frames': len(ground_truth_frames),
                'avg_precision': avg_precision,
                'avg_recall': avg_recall,
                'avg_f1_score': avg_f1,
                'overall_precision': overall_precision,
                'overall_recall': overall_recall,
                'overall_f1_score': overall_f1,
                'total_true_positives': total_tp,
                'total_false_positives': total_fp,
                'total_false_negatives': total_fn
            },
            'per_frame_metrics': all_metrics
        }

        logger.info(f"Validation report generated:")
        logger.info(f"  Overall Precision: {overall_precision:.3f}")
        logger.info(f"  Overall Recall: {overall_recall:.3f}")
        logger.info(f"  Overall F1: {overall_f1:.3f}")

        return report


if __name__ == "__main__":
    # Example usage
    logging.basicConfig(level=logging.INFO)

    generator = GroundTruthGenerator(width=1920, height=1080)

    # Create example ground truth
    example_detection = GroundTruthDetection(
        frame_id=0,
        target_id=0,
        position_2d=(960, 540),
        position_3d=(0, 0, 1000),
        bounding_box=(950, 530, 20, 20),
        velocity_2d=(5, 0),
        velocity_3d=(10, 0, 0),
        distance_m=1000.0,
        visible=True,
        occluded=False,
        truncated=False
    )

    gt_frame = GroundTruthFrame(
        frame_id=0,
        timestamp=0.0,
        camera_id=0,
        detections=[example_detection],
        metadata={'num_targets': 1}
    )

    # Save example
    generator.save_ground_truth([gt_frame], "/tmp/ground_truth.json")
    print("Saved example ground truth")