Source code for fmusvid.operations.subtitle

"""
Subtitle support for FMUS-VID.

This module provides functionality for adding and manipulating subtitles.
"""

from typing import Union, List, Dict, Optional, Tuple
from pathlib import Path
import re
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from PIL import Image, ImageDraw, ImageFont, ImageColor

logger = logging.getLogger(__name__)

[docs] @dataclass class SubtitleEntry: """Represents a single subtitle entry.""" start_time: float # Start time in seconds end_time: float # End time in seconds text: str # Subtitle text position: Optional[Tuple[int, int]] = None # Optional position override
[docs] class SubtitleParser: """Parser for various subtitle formats."""
[docs] @staticmethod def parse(path: Union[str, Path], format: Optional[str] = None) -> List[SubtitleEntry]: """ Parse a subtitle file. Args: path: Path to subtitle file format: Subtitle format ("srt", "vtt", etc.) or None for auto-detect Returns: List of SubtitleEntry objects """ path = Path(path) # Auto-detect format from extension if not specified if format is None: format = path.suffix.lower().lstrip('.') # Select parser based on format if format == "srt": return SubtitleParser._parse_srt(path) elif format == "vtt": return SubtitleParser._parse_vtt(path) else: raise ValueError(f"Unsupported subtitle format: {format}")
@staticmethod def _parse_srt(path: Path) -> List[SubtitleEntry]: """Parse an SRT subtitle file.""" entries = [] current_entry = None with open(path, 'r', encoding='utf-8-sig') as f: lines = f.readlines() for line in lines: line = line.strip() if not line: if current_entry: entries.append(current_entry) current_entry = None continue # Parse index number if current_entry is None: try: int(line) current_entry = {"text": []} continue except ValueError: pass # Parse timestamp line if current_entry and not current_entry.get("start_time"): match = re.match(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})', line) if match: current_entry["start_time"] = SubtitleParser._parse_timestamp(match.group(1)) current_entry["end_time"] = SubtitleParser._parse_timestamp(match.group(2)) continue # Add text line if current_entry: current_entry["text"].append(line) # Add last entry if exists if current_entry: entries.append(current_entry) # Convert to SubtitleEntry objects return [ SubtitleEntry( start_time=entry["start_time"], end_time=entry["end_time"], text="\n".join(entry["text"]) ) for entry in entries ] @staticmethod def _parse_vtt(path: Path) -> List[SubtitleEntry]: """Parse a WebVTT subtitle file.""" entries = [] current_entry = None header_passed = False with open(path, 'r', encoding='utf-8-sig') as f: lines = f.readlines() for line in lines: line = line.strip() # Skip WebVTT header if not header_passed: if line.startswith('WEBVTT'): header_passed = True continue if not line: if current_entry: entries.append(current_entry) current_entry = None continue # Parse timestamp line match = re.match(r'(\d{2}:\d{2}:\d{2}\.\d{3}) --> (\d{2}:\d{2}:\d{2}\.\d{3})', line) if match: current_entry = { "start_time": SubtitleParser._parse_timestamp(match.group(1)), "end_time": SubtitleParser._parse_timestamp(match.group(2)), "text": [] } continue # Add text line if current_entry: current_entry["text"].append(line) # Add last entry if exists if current_entry: entries.append(current_entry) # Convert to SubtitleEntry objects return [ SubtitleEntry( start_time=entry["start_time"], end_time=entry["end_time"], text="\n".join(entry["text"]) ) for entry in entries ] @staticmethod def _parse_timestamp(timestamp: str) -> float: """Convert timestamp string to seconds.""" # Handle both SRT (,) and VTT (.) decimal separators timestamp = timestamp.replace(',', '.') # Parse timestamp time_obj = datetime.strptime(timestamp, '%H:%M:%S.%f') return (time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1_000_000)
[docs] class SubtitleRenderer: """Renders subtitles on video frames."""
[docs] def __init__(self, entries: List[SubtitleEntry], font: str = "Arial", size: int = 24, color: Union[str, Tuple[int, int, int]] = "white", stroke_width: int = 2, stroke_color: Union[str, Tuple[int, int, int]] = "black", position: Optional[Tuple[int, int]] = None, fade_in: float = 0.25, fade_out: float = 0.25): """ Initialize subtitle renderer. Args: entries: List of subtitle entries font: Font name or path size: Font size in pixels color: Text color stroke_width: Outline width (0 for no outline) stroke_color: Outline color position: Default (x, y) position (None for bottom center) fade_in: Fade-in duration in seconds fade_out: Fade-out duration in seconds """ self.entries = entries self.font = font self.size = size self.color = color self.stroke_width = stroke_width self.stroke_color = stroke_color self.default_position = position self.fade_in = fade_in self.fade_out = fade_out # Try to load the font try: self._font_obj = ImageFont.truetype(font, size) except IOError: logger.warning(f"Font '{font}' not found, using default font") self._font_obj = ImageFont.load_default()
[docs] def render(self, frame: Image.Image, time: float) -> Image.Image: """ Render subtitles for the current frame. Args: frame: PIL Image to render on time: Current video time in seconds Returns: Frame with rendered subtitles """ # Find visible subtitles at current time visible_entries = [ entry for entry in self.entries if entry.start_time <= time <= entry.end_time ] if not visible_entries: return frame # Create a copy of the frame result = frame.copy() # Calculate default position if not specified if self.default_position is None: width, height = frame.size x = width // 2 y = height - 50 # 50 pixels from bottom self.default_position = (x, y) # Render each visible subtitle for entry in visible_entries: # Calculate opacity based on fade effects opacity = 1.0 if time < entry.start_time + self.fade_in: opacity = (time - entry.start_time) / self.fade_in elif time > entry.end_time - self.fade_out: opacity = (entry.end_time - time) / self.fade_out opacity = max(0.0, min(1.0, opacity)) if opacity == 0: continue # Use entry-specific position or default position = entry.position or self.default_position # Create a temporary image for the subtitle subtitle_overlay = Image.new('RGBA', frame.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(subtitle_overlay) # Draw text with stroke if specified if self.stroke_width > 0: # Draw stroke for dx in range(-self.stroke_width, self.stroke_width + 1): for dy in range(-self.stroke_width, self.stroke_width + 1): draw.text( (position[0] + dx, position[1] + dy), entry.text, font=self._font_obj, fill=(*self._get_color(self.stroke_color), int(255 * opacity)), align="center" ) # Draw main text draw.text( position, entry.text, font=self._font_obj, fill=(*self._get_color(self.color), int(255 * opacity)), align="center" ) # Composite the subtitle overlay onto the frame result = Image.alpha_composite(result.convert('RGBA'), subtitle_overlay) return result
def _get_color(self, color: Union[str, Tuple[int, int, int]]) -> Tuple[int, int, int]: """Convert color to RGB tuple.""" if isinstance(color, tuple): return color return ImageColor.getrgb(color)