Source code for fmusvid.operations.subtitle

"""
Subtitle support for FMUS-VID.

This module provides functionality for adding and manipulating subtitles.
"""

from typing import Union, List, Dict, Optional, Tuple
from pathlib import Path
import re
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from PIL import Image, ImageDraw, ImageFont, ImageColor

logger = logging.getLogger(__name__)


[docs]
@dataclass
class SubtitleEntry:
    """Represents a single subtitle entry."""
    start_time: float  # Start time in seconds
    end_time: float    # End time in seconds
    text: str         # Subtitle text
    position: Optional[Tuple[int, int]] = None  # Optional position override



[docs]
class SubtitleParser:
    """Parser for various subtitle formats."""


[docs]
    @staticmethod
    def parse(path: Union[str, Path], format: Optional[str] = None) -> List[SubtitleEntry]:
        """
        Parse a subtitle file.

        Args:
            path: Path to subtitle file
            format: Subtitle format ("srt", "vtt", etc.) or None for auto-detect

        Returns:
            List of SubtitleEntry objects
        """
        path = Path(path)

        # Auto-detect format from extension if not specified
        if format is None:
            format = path.suffix.lower().lstrip('.')

        # Select parser based on format
        if format == "srt":
            return SubtitleParser._parse_srt(path)
        elif format == "vtt":
            return SubtitleParser._parse_vtt(path)
        else:
            raise ValueError(f"Unsupported subtitle format: {format}")


    @staticmethod
    def _parse_srt(path: Path) -> List[SubtitleEntry]:
        """Parse an SRT subtitle file."""
        entries = []
        current_entry = None

        with open(path, 'r', encoding='utf-8-sig') as f:
            lines = f.readlines()

        for line in lines:
            line = line.strip()

            if not line:
                if current_entry:
                    entries.append(current_entry)
                    current_entry = None
                continue

            # Parse index number
            if current_entry is None:
                try:
                    int(line)
                    current_entry = {"text": []}
                    continue
                except ValueError:
                    pass

            # Parse timestamp line
            if current_entry and not current_entry.get("start_time"):
                match = re.match(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})', line)
                if match:
                    current_entry["start_time"] = SubtitleParser._parse_timestamp(match.group(1))
                    current_entry["end_time"] = SubtitleParser._parse_timestamp(match.group(2))
                    continue

            # Add text line
            if current_entry:
                current_entry["text"].append(line)

        # Add last entry if exists
        if current_entry:
            entries.append(current_entry)

        # Convert to SubtitleEntry objects
        return [
            SubtitleEntry(
                start_time=entry["start_time"],
                end_time=entry["end_time"],
                text="\n".join(entry["text"])
            )
            for entry in entries
        ]

    @staticmethod
    def _parse_vtt(path: Path) -> List[SubtitleEntry]:
        """Parse a WebVTT subtitle file."""
        entries = []
        current_entry = None
        header_passed = False

        with open(path, 'r', encoding='utf-8-sig') as f:
            lines = f.readlines()

        for line in lines:
            line = line.strip()

            # Skip WebVTT header
            if not header_passed:
                if line.startswith('WEBVTT'):
                    header_passed = True
                continue

            if not line:
                if current_entry:
                    entries.append(current_entry)
                    current_entry = None
                continue

            # Parse timestamp line
            match = re.match(r'(\d{2}:\d{2}:\d{2}\.\d{3}) --> (\d{2}:\d{2}:\d{2}\.\d{3})', line)
            if match:
                current_entry = {
                    "start_time": SubtitleParser._parse_timestamp(match.group(1)),
                    "end_time": SubtitleParser._parse_timestamp(match.group(2)),
                    "text": []
                }
                continue

            # Add text line
            if current_entry:
                current_entry["text"].append(line)

        # Add last entry if exists
        if current_entry:
            entries.append(current_entry)

        # Convert to SubtitleEntry objects
        return [
            SubtitleEntry(
                start_time=entry["start_time"],
                end_time=entry["end_time"],
                text="\n".join(entry["text"])
            )
            for entry in entries
        ]

    @staticmethod
    def _parse_timestamp(timestamp: str) -> float:
        """Convert timestamp string to seconds."""
        # Handle both SRT (,) and VTT (.) decimal separators
        timestamp = timestamp.replace(',', '.')

        # Parse timestamp
        time_obj = datetime.strptime(timestamp, '%H:%M:%S.%f')
        return (time_obj.hour * 3600 +
                time_obj.minute * 60 +
                time_obj.second +
                time_obj.microsecond / 1_000_000)



[docs]
class SubtitleRenderer:
    """Renders subtitles on video frames."""


[docs]
    def __init__(self, entries: List[SubtitleEntry],
                 font: str = "Arial", size: int = 24,
                 color: Union[str, Tuple[int, int, int]] = "white",
                 stroke_width: int = 2,
                 stroke_color: Union[str, Tuple[int, int, int]] = "black",
                 position: Optional[Tuple[int, int]] = None,
                 fade_in: float = 0.25, fade_out: float = 0.25):
        """
        Initialize subtitle renderer.

        Args:
            entries: List of subtitle entries
            font: Font name or path
            size: Font size in pixels
            color: Text color
            stroke_width: Outline width (0 for no outline)
            stroke_color: Outline color
            position: Default (x, y) position (None for bottom center)
            fade_in: Fade-in duration in seconds
            fade_out: Fade-out duration in seconds
        """
        self.entries = entries
        self.font = font
        self.size = size
        self.color = color
        self.stroke_width = stroke_width
        self.stroke_color = stroke_color
        self.default_position = position
        self.fade_in = fade_in
        self.fade_out = fade_out

        # Try to load the font
        try:
            self._font_obj = ImageFont.truetype(font, size)
        except IOError:
            logger.warning(f"Font '{font}' not found, using default font")
            self._font_obj = ImageFont.load_default()



[docs]
    def render(self, frame: Image.Image, time: float) -> Image.Image:
        """
        Render subtitles for the current frame.

        Args:
            frame: PIL Image to render on
            time: Current video time in seconds

        Returns:
            Frame with rendered subtitles
        """
        # Find visible subtitles at current time
        visible_entries = [
            entry for entry in self.entries
            if entry.start_time <= time <= entry.end_time
        ]

        if not visible_entries:
            return frame

        # Create a copy of the frame
        result = frame.copy()

        # Calculate default position if not specified
        if self.default_position is None:
            width, height = frame.size
            x = width // 2
            y = height - 50  # 50 pixels from bottom
            self.default_position = (x, y)

        # Render each visible subtitle
        for entry in visible_entries:
            # Calculate opacity based on fade effects
            opacity = 1.0

            if time < entry.start_time + self.fade_in:
                opacity = (time - entry.start_time) / self.fade_in
            elif time > entry.end_time - self.fade_out:
                opacity = (entry.end_time - time) / self.fade_out

            opacity = max(0.0, min(1.0, opacity))

            if opacity == 0:
                continue

            # Use entry-specific position or default
            position = entry.position or self.default_position

            # Create a temporary image for the subtitle
            subtitle_overlay = Image.new('RGBA', frame.size, (0, 0, 0, 0))
            draw = ImageDraw.Draw(subtitle_overlay)

            # Draw text with stroke if specified
            if self.stroke_width > 0:
                # Draw stroke
                for dx in range(-self.stroke_width, self.stroke_width + 1):
                    for dy in range(-self.stroke_width, self.stroke_width + 1):
                        draw.text(
                            (position[0] + dx, position[1] + dy),
                            entry.text,
                            font=self._font_obj,
                            fill=(*self._get_color(self.stroke_color), int(255 * opacity)),
                            align="center"
                        )

            # Draw main text
            draw.text(
                position,
                entry.text,
                font=self._font_obj,
                fill=(*self._get_color(self.color), int(255 * opacity)),
                align="center"
            )

            # Composite the subtitle overlay onto the frame
            result = Image.alpha_composite(result.convert('RGBA'), subtitle_overlay)

        return result


    def _get_color(self, color: Union[str, Tuple[int, int, int]]) -> Tuple[int, int, int]:
        """Convert color to RGB tuple."""
        if isinstance(color, tuple):
            return color
        return ImageColor.getrgb(color)