Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions change-log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Highlights since the 4.x line:

-----------------------------------------------------------------------------------------------------

v5.0.0-rc5 (TBD)

* seconv: OCR VobSub subtitles (.sub/.idx, VobSub-in-MKV, VobSub-in-MP4) to text formats
* seconv: add --time-codes-only to extract time codes from image-based subtitles (.sup/VobSub/PGS/DVB-sub) without OCR

-----------------------------------------------------------------------------------------------------

v5.0.0-rc4 (11th of June 2026)

* Add Zonos TTS text-to-speech engine (CrispASR)
Expand Down
3 changes: 2 additions & 1 deletion docs/features/seconv.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ seconv *.srt webvtt
seconv movie.srt subrip --encoding:source --FixCommonErrors
seconv movie.mkv subrip --track-number:3
seconv movie.sup subrip --ocr-engine:tesseract --ocr-language:eng
seconv movie.sup subrip --ocr-engine:binaryocr --ocr-db:Latin.db
seconv movie.sub subrip --ocr-engine:binaryocr --ocr-db:Latin.db # VobSub (.idx auto-detected)
seconv movie.sup subrip --time-codes-only
```

For full usage, options, OCR setup, operations pipeline, examples, and exit codes, see the canonical reference:
Expand Down
10 changes: 9 additions & 1 deletion docs/reference/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ If two tracks share a language, the track number is added: `movie.#3.eng.srt`.
| `--ocr-db:<path>` | OCR database file: `.nocr` for `nocr`, `.db` for `binaryocr` (required for both) |
| `--ollama-url:<url>` | Default `http://localhost:11434/api/chat` |
| `--ollama-model:<model>` | Default `llama3.2-vision` |
| `--time-codes-only` | Image sources (`.sup`, VobSub `.sub`/`.idx`, MKV PGS/VobSub, MP4 VobSub, TS DVB-sub) → text format with time codes only and empty text. **Skips OCR entirely** — no OCR engine required. Ignored for text inputs and image output targets. |

> **OCR database files are not bundled with `seconv`.** The `nocr` and `binaryocr` engines need a `.nocr` or `.db` file passed via `--ocr-db`. Sources:
>
Expand All @@ -183,11 +184,18 @@ seconv movie.sup subrip --ocr-engine:nocr --ocr-db:"C:\Users\me\AppData\Roaming\
# BinaryOCR
seconv movie.sup subrip --ocr-engine:binaryocr --ocr-db:"C:\Users\me\AppData\Roaming\Subtitle Edit\Ocr\Latin.db"

# MKV with image (PGS) tracks — OCR runs automatically
# MKV with image (PGS or VobSub) tracks — OCR runs automatically
seconv movie.mkv subrip --ocr-engine:tesseract --ocr-language:eng

# VobSub .sub + .idx pair — the .idx companion is auto-detected
seconv movie.sub subrip --ocr-engine:tesseract --ocr-language:eng

# Transport-stream teletext (no OCR needed)
seconv broadcast.ts subrip

# Time codes only — extract timing with no OCR (empty text); works for any image source
seconv movie.sup subrip --time-codes-only
seconv movie.sub subrip --time-codes-only
```

### Templates / replacements
Expand Down
5 changes: 5 additions & 0 deletions src/seconv/Commands/ConvertCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ public sealed class Settings : CommandSettings
[Description("Path to a .nocr file (--ocr-engine=nocr) or .db file (--ocr-engine=binaryocr)")]
public string? OcrDb { get; init; }

[CommandOption("--time-codes-only|--timecodesonly")]
[Description("For image-based sources (.sup, VobSub .sub/.idx, MKV PGS/VobSub, MP4 VobSub, TS DVB-sub): output time codes only with empty text; skips OCR (no OCR engine required)")]
public bool TimeCodesOnly { get; init; }

[CommandOption("--ollama-url")]
[Description("Ollama API endpoint (default: http://localhost:11434/api/chat)")]
public string? OllamaUrl { get; init; }
Expand Down Expand Up @@ -449,6 +453,7 @@ protected override async Task<int> ExecuteAsync(CommandContext context, Settings
OcrEngine = string.IsNullOrWhiteSpace(settings.OcrEngine) ? "tesseract" : settings.OcrEngine,
OcrLanguage = settings.OcrLanguage ?? "eng",
OcrDb = settings.OcrDb,
TimeCodesOnly = settings.TimeCodesOnly,
OllamaUrl = settings.OllamaUrl,
OllamaModel = settings.OllamaModel,
TeletextOnly = settings.TeletextOnly,
Expand Down
127 changes: 115 additions & 12 deletions src/seconv/Core/BitmapSubtitleLoader.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Nikse.SubtitleEdit.Core.BluRaySup;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.ContainerFormats.Matroska;
using Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes;
using Nikse.SubtitleEdit.Core.ContainerFormats.TransportStream;
using Nikse.SubtitleEdit.Core.VobSub;
using SkiaSharp;
Expand Down Expand Up @@ -63,22 +64,19 @@ public static IReadOnlyList<BitmapSubtitleItem> LoadMatroskaPgs(MatroskaFile mat
}

/// <summary>
/// VobSub <c>.sub</c> + <c>.idx</c> pair → bitmap events. Uses
/// <see cref="VobSubParser.OpenSubIdx"/> so the .idx provides timing + palette
/// and the .sub provides the subpicture stream payload. The VobSub spec doesn't
/// store a screen size in the index file, so we bake in the DVD-standard frame
/// sizes (720x576 PAL, 720x480 NTSC) — otherwise the output writer would fall
/// back to <c>--resolution</c> / 1920x1080, which is wrong metadata for DVD
/// sources.
/// VobSub <c>.sub</c> (+ optional <c>.idx</c>) → bitmap events. Uses
/// <see cref="VobSubParser.OpenSubIdx"/>, which uses the .idx for timing + palette when
/// present and otherwise parses the .sub's MPEG-PS stream directly (stream PTS timing +
/// a default palette). The VobSub spec doesn't store a screen size in the index file, so
/// we bake in the DVD-standard frame sizes (720x576 PAL, 720x480 NTSC) — otherwise the
/// output writer would fall back to <c>--resolution</c> / 1920x1080, which is wrong
/// metadata for DVD sources.
/// </summary>
public static IReadOnlyList<BitmapSubtitleItem> LoadVobSub(string subPath, string idxPath, bool isPal)
{
if (!File.Exists(idxPath))
{
throw new InvalidOperationException($"VobSub .idx companion not found at: {idxPath}");
}

var parser = new VobSubParser(isPal);
// OpenSubIdx falls back to parsing the .sub stream directly when the .idx is missing,
// so an absent companion is not fatal — see IsBinaryVobSub for the caller's gate.
parser.OpenSubIdx(subPath, idxPath);
var packs = parser.MergeVobSubPacks();
if (packs.Count == 0)
Expand All @@ -102,6 +100,111 @@ public static IReadOnlyList<BitmapSubtitleItem> LoadVobSub(string subPath, strin
return items;
}

/// <summary>
/// True if the file begins with an MPEG-2 pack header (<c>00 00 01 BA</c>), i.e. it's a
/// binary VobSub subpicture stream rather than a text MicroDVD <c>.sub</c>. Used to decide
/// whether a <c>.sub</c> without an <c>.idx</c> companion is a VobSub (read it directly,
/// with a warning) or a plain text subtitle (fall through to the text loader).
/// </summary>
public static bool IsBinaryVobSub(string filePath)
{
try
{
var header = new byte[4];
using var fs = File.OpenRead(filePath);
return fs.Read(header, 0, 4) == 4 && VobSubParser.IsMpeg2PackHeader(header);
}
catch
{
// I/O race / permissions — let the text loader try rather than hard-failing here.
return false;
}
}

/// <summary>
/// VobSub track inside an MKV (<c>S_VOBSUB</c>) → bitmap events. The subpicture packets
/// live in the Matroska blocks; the per-pack timing comes from the block Start/End (not
/// the SubPicture's own delay, which only applies to standalone <c>.sub</c>+<c>.idx</c>).
/// Mirrors the desktop batch converter's <c>LoadVobSubFromMatroska</c>; the palette is
/// left to <see cref="SubPicture"/>'s default, matching the GUI's OCR path.
/// </summary>
public static IReadOnlyList<BitmapSubtitleItem> LoadMatroskaVobSub(MatroskaFile matroska, MatroskaTrackInfo track)
{
if (track.ContentEncodingType == 1)
{
throw new InvalidOperationException(
$"VobSub MKV track #{track.TrackNumber} is compressed (content encoding 1), which isn't supported.");
}

var sub = matroska.GetSubtitle(track.TrackNumber, null);
var packs = new List<VobSubMergedPack>(sub.Count);
foreach (var p in sub)
{
packs.Add(new VobSubMergedPack(p.GetData(track), TimeSpan.FromMilliseconds(p.Start), 32, null)
{
EndTime = TimeSpan.FromMilliseconds(p.End),
});

// Fix overlapping time codes (some Handbrake versions emit them) by clamping the
// previous pack's end to just before this one's start.
if (packs.Count > 1 && packs[^2].EndTime > packs[^1].StartTime)
{
packs[^2].EndTime = TimeSpan.FromMilliseconds(packs[^1].StartTime.TotalMilliseconds - 1);
}
}

var items = new List<BitmapSubtitleItem>(packs.Count);
foreach (var pack in packs)
{
var bmp = pack.GetBitmap();
if (bmp is null)
{
continue;
}
// Use the block-derived Start/End (TimeSpan), not StartTimeCode/EndTimeCode which
// are based on the SubPicture delay and only correct for .sub+.idx sources.
items.Add(new BitmapSubtitleItem(
new TimeCode(pack.StartTime.TotalMilliseconds),
new TimeCode(pack.EndTime.TotalMilliseconds),
bmp));
}
if (items.Count == 0)
{
throw new InvalidOperationException($"No VobSub subtitles in MKV track #{track.TrackNumber}.");
}
return items;
}

/// <summary>
/// VobSub track inside an MP4 (handler type <c>subp</c>, e.g. produced by MP4Box) →
/// bitmap events. The decoded subpictures and their timing are parsed by libse's
/// <see cref="Stbl"/>; index <c>i</c> of <c>SubPictures</c> lines up with paragraph
/// <c>i</c>. Mirrors the desktop <c>OcrSubtitleMp4VobSub</c> (palette left to default).
/// </summary>
public static IReadOnlyList<BitmapSubtitleItem> LoadMp4VobSub(Trak track)
{
var paragraphs = track.Mdia.Minf.Stbl.GetParagraphs();
var subPictures = track.Mdia.Minf.Stbl.SubPictures;
var count = Math.Min(paragraphs.Count, subPictures.Count);

var items = new List<BitmapSubtitleItem>(count);
for (var i = 0; i < count; i++)
{
var bmp = subPictures[i].GetBitmap(
null, SKColors.Transparent, SKColors.Black, SKColors.White, SKColors.Black, false);
if (bmp is null)
{
continue;
}
items.Add(new BitmapSubtitleItem(paragraphs[i].StartTime, paragraphs[i].EndTime, bmp));
}
if (items.Count == 0)
{
throw new InvalidOperationException("No VobSub subpictures found in MP4 track.");
}
return items;
}

/// <summary>
/// Transport stream DVB-sub → one bitmap list per packet ID. Caller is responsible
/// for routing each PID to its own output file (multiple subtitle streams =
Expand Down
62 changes: 60 additions & 2 deletions src/seconv/Core/ContainerSubtitleLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,31 @@ public sealed record LoadedTrack(
return LoadBluRaySup(filePath, options);
}

if (ext == ".sub")
{
var idxPath = Path.ChangeExtension(filePath, ".idx");
if (File.Exists(idxPath))
{
return LoadVobSub(filePath, idxPath, options);
}

// No .idx companion. A binary VobSub .sub can still be read — the MPEG-PS packets
// carry their own PTS timing and a default palette is used — so read it (with a
// note) rather than letting it fall through to the MicroDVD text loader, which
// would misparse the binary and surface a confusing "no subtitles found" error. A
// genuine text MicroDVD .sub starts with text, not the MPEG pack header, so it
// returns null here and is handled by the text loader.
if (BitmapSubtitleLoader.IsBinaryVobSub(filePath))
{
AnsiConsole.MarkupLine(
$"[yellow]Note: VobSub '.sub' has no '.idx' companion ({Path.GetFileName(idxPath).EscapeMarkup()}); "
+ "reading timing from the stream and using a default color palette.[/]");
return LoadVobSub(filePath, idxPath, options);
}

return null;
}

if (ext is ".ts" or ".m2ts" or ".mts")
{
return LoadTransportStream(filePath, options);
Expand Down Expand Up @@ -223,7 +248,18 @@ private static List<LoadedTrack> LoadMatroska(string filePath, ConversionOptions

if (track.CodecId.Equals("S_VOBSUB", StringComparison.OrdinalIgnoreCase))
{
AnsiConsole.MarkupLine($"[yellow]Warning: skipping VobSub MKV track #{track.TrackNumber} — VobSub OCR not yet supported in seconv. Use Subtitle Edit (UI) for now.[/]");
try
{
var vobSub = ImageOcrLoader.LoadMatroskaVobSub(matroska, track, options);
if (vobSub.Paragraphs.Count > 0)
{
tracks.Add(new LoadedTrack(vobSub, new SubRip(), SanitizeLang(track.Language), track.TrackNumber));
}
}
catch (Exception ex)
{
AnsiConsole.MarkupLine($"[yellow]Warning: VobSub OCR failed on MKV track #{track.TrackNumber}: {ex.Message.EscapeMarkup()}[/]");
}
continue;
}

Expand Down Expand Up @@ -269,7 +305,19 @@ private static List<LoadedTrack> LoadMp4(string filePath, ConversionOptions opti
}
if (trak.Mdia.IsVobSubSubtitle)
{
AnsiConsole.MarkupLine($"[yellow]Warning: skipping VobSub MP4 track #{trackId} — OCR is not yet supported.[/]");
try
{
var vobSub = ImageOcrLoader.LoadMp4VobSub(trak, options);
if (vobSub.Paragraphs.Count > 0)
{
var vobLang = LanguageAutoDetect.AutoDetectGoogleLanguageOrNull(vobSub) ?? string.Empty;
tracks.Add(new LoadedTrack(vobSub, new SubRip(), vobLang, trackId));
}
}
catch (Exception ex)
{
AnsiConsole.MarkupLine($"[yellow]Warning: VobSub OCR failed on MP4 track #{trackId}: {ex.Message.EscapeMarkup()}[/]");
}
continue;
}

Expand Down Expand Up @@ -315,6 +363,16 @@ private static List<LoadedTrack> LoadBluRaySup(string filePath, ConversionOption
return [new LoadedTrack(subtitle, new SubRip(), string.Empty, null)];
}

private static List<LoadedTrack> LoadVobSub(string subPath, string idxPath, ConversionOptions options)
{
var subtitle = ImageOcrLoader.LoadVobSub(subPath, idxPath, options);
if (subtitle.Paragraphs.Count == 0)
{
throw new InvalidOperationException($"No subtitles recognised in VobSub file: {subPath}");
}
return [new LoadedTrack(subtitle, new SubRip(), string.Empty, null)];
}

private static List<LoadedTrack> LoadTransportStream(string filePath, ConversionOptions options)
{
var tracks = new List<LoadedTrack>();
Expand Down
Loading