"""
Subtitle support for FMUS-VID.
This module provides functionality for adding and manipulating subtitles.
"""
from typing import Union, List, Dict, Optional, Tuple
from pathlib import Path
import re
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from PIL import Image, ImageDraw, ImageFont, ImageColor
logger = logging.getLogger(__name__)
[docs]
@dataclass
class SubtitleEntry:
"""Represents a single subtitle entry."""
start_time: float # Start time in seconds
end_time: float # End time in seconds
text: str # Subtitle text
position: Optional[Tuple[int, int]] = None # Optional position override
[docs]
class SubtitleParser:
"""Parser for various subtitle formats."""
[docs]
@staticmethod
def parse(path: Union[str, Path], format: Optional[str] = None) -> List[SubtitleEntry]:
"""
Parse a subtitle file.
Args:
path: Path to subtitle file
format: Subtitle format ("srt", "vtt", etc.) or None for auto-detect
Returns:
List of SubtitleEntry objects
"""
path = Path(path)
# Auto-detect format from extension if not specified
if format is None:
format = path.suffix.lower().lstrip('.')
# Select parser based on format
if format == "srt":
return SubtitleParser._parse_srt(path)
elif format == "vtt":
return SubtitleParser._parse_vtt(path)
else:
raise ValueError(f"Unsupported subtitle format: {format}")
@staticmethod
def _parse_srt(path: Path) -> List[SubtitleEntry]:
"""Parse an SRT subtitle file."""
entries = []
current_entry = None
with open(path, 'r', encoding='utf-8-sig') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if not line:
if current_entry:
entries.append(current_entry)
current_entry = None
continue
# Parse index number
if current_entry is None:
try:
int(line)
current_entry = {"text": []}
continue
except ValueError:
pass
# Parse timestamp line
if current_entry and not current_entry.get("start_time"):
match = re.match(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})', line)
if match:
current_entry["start_time"] = SubtitleParser._parse_timestamp(match.group(1))
current_entry["end_time"] = SubtitleParser._parse_timestamp(match.group(2))
continue
# Add text line
if current_entry:
current_entry["text"].append(line)
# Add last entry if exists
if current_entry:
entries.append(current_entry)
# Convert to SubtitleEntry objects
return [
SubtitleEntry(
start_time=entry["start_time"],
end_time=entry["end_time"],
text="\n".join(entry["text"])
)
for entry in entries
]
@staticmethod
def _parse_vtt(path: Path) -> List[SubtitleEntry]:
"""Parse a WebVTT subtitle file."""
entries = []
current_entry = None
header_passed = False
with open(path, 'r', encoding='utf-8-sig') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
# Skip WebVTT header
if not header_passed:
if line.startswith('WEBVTT'):
header_passed = True
continue
if not line:
if current_entry:
entries.append(current_entry)
current_entry = None
continue
# Parse timestamp line
match = re.match(r'(\d{2}:\d{2}:\d{2}\.\d{3}) --> (\d{2}:\d{2}:\d{2}\.\d{3})', line)
if match:
current_entry = {
"start_time": SubtitleParser._parse_timestamp(match.group(1)),
"end_time": SubtitleParser._parse_timestamp(match.group(2)),
"text": []
}
continue
# Add text line
if current_entry:
current_entry["text"].append(line)
# Add last entry if exists
if current_entry:
entries.append(current_entry)
# Convert to SubtitleEntry objects
return [
SubtitleEntry(
start_time=entry["start_time"],
end_time=entry["end_time"],
text="\n".join(entry["text"])
)
for entry in entries
]
@staticmethod
def _parse_timestamp(timestamp: str) -> float:
"""Convert timestamp string to seconds."""
# Handle both SRT (,) and VTT (.) decimal separators
timestamp = timestamp.replace(',', '.')
# Parse timestamp
time_obj = datetime.strptime(timestamp, '%H:%M:%S.%f')
return (time_obj.hour * 3600 +
time_obj.minute * 60 +
time_obj.second +
time_obj.microsecond / 1_000_000)
[docs]
class SubtitleRenderer:
"""Renders subtitles on video frames."""
[docs]
def __init__(self, entries: List[SubtitleEntry],
font: str = "Arial", size: int = 24,
color: Union[str, Tuple[int, int, int]] = "white",
stroke_width: int = 2,
stroke_color: Union[str, Tuple[int, int, int]] = "black",
position: Optional[Tuple[int, int]] = None,
fade_in: float = 0.25, fade_out: float = 0.25):
"""
Initialize subtitle renderer.
Args:
entries: List of subtitle entries
font: Font name or path
size: Font size in pixels
color: Text color
stroke_width: Outline width (0 for no outline)
stroke_color: Outline color
position: Default (x, y) position (None for bottom center)
fade_in: Fade-in duration in seconds
fade_out: Fade-out duration in seconds
"""
self.entries = entries
self.font = font
self.size = size
self.color = color
self.stroke_width = stroke_width
self.stroke_color = stroke_color
self.default_position = position
self.fade_in = fade_in
self.fade_out = fade_out
# Try to load the font
try:
self._font_obj = ImageFont.truetype(font, size)
except IOError:
logger.warning(f"Font '{font}' not found, using default font")
self._font_obj = ImageFont.load_default()
[docs]
def render(self, frame: Image.Image, time: float) -> Image.Image:
"""
Render subtitles for the current frame.
Args:
frame: PIL Image to render on
time: Current video time in seconds
Returns:
Frame with rendered subtitles
"""
# Find visible subtitles at current time
visible_entries = [
entry for entry in self.entries
if entry.start_time <= time <= entry.end_time
]
if not visible_entries:
return frame
# Create a copy of the frame
result = frame.copy()
# Calculate default position if not specified
if self.default_position is None:
width, height = frame.size
x = width // 2
y = height - 50 # 50 pixels from bottom
self.default_position = (x, y)
# Render each visible subtitle
for entry in visible_entries:
# Calculate opacity based on fade effects
opacity = 1.0
if time < entry.start_time + self.fade_in:
opacity = (time - entry.start_time) / self.fade_in
elif time > entry.end_time - self.fade_out:
opacity = (entry.end_time - time) / self.fade_out
opacity = max(0.0, min(1.0, opacity))
if opacity == 0:
continue
# Use entry-specific position or default
position = entry.position or self.default_position
# Create a temporary image for the subtitle
subtitle_overlay = Image.new('RGBA', frame.size, (0, 0, 0, 0))
draw = ImageDraw.Draw(subtitle_overlay)
# Draw text with stroke if specified
if self.stroke_width > 0:
# Draw stroke
for dx in range(-self.stroke_width, self.stroke_width + 1):
for dy in range(-self.stroke_width, self.stroke_width + 1):
draw.text(
(position[0] + dx, position[1] + dy),
entry.text,
font=self._font_obj,
fill=(*self._get_color(self.stroke_color), int(255 * opacity)),
align="center"
)
# Draw main text
draw.text(
position,
entry.text,
font=self._font_obj,
fill=(*self._get_color(self.color), int(255 * opacity)),
align="center"
)
# Composite the subtitle overlay onto the frame
result = Image.alpha_composite(result.convert('RGBA'), subtitle_overlay)
return result
def _get_color(self, color: Union[str, Tuple[int, int, int]]) -> Tuple[int, int, int]:
"""Convert color to RGB tuple."""
if isinstance(color, tuple):
return color
return ImageColor.getrgb(color)