MiniMax-AI · RyanLee-Dev · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/skill/SKILL.md b/skill/SKILL.md
@@ -178,7 +178,7 @@ mmx speech synthesize --text <text> [flags]
 | `--bitrate <bps>` | number | Bitrate (default: 128000) |
 | `--channels <n>` | number | Audio channels (default: 1) |
 | `--language <code>` | string | Language boost |
-| `--subtitles` | boolean | Include subtitle timing data |
+| `--subtitles` | boolean | Download and save subtitles as `.srt` file (alongside `--out` audio file). API must support subtitles for the selected model.
 | `--pronunciation <from/to>` | string, repeatable | Custom pronunciation |
 | `--sound-effect <effect>` | string | Add sound effect |
 | `--out <path>` | string | Save audio to file |
@@ -188,6 +188,9 @@ mmx speech synthesize --text <text> [flags]
 mmx speech synthesize --text "Hello world" --out hello.mp3 --quiet
 # stdout: hello.mp3
 
+mmx speech synthesize --text "Hello" --subtitles --out hello.mp3
+# saves hello.mp3 + hello.srt (SRT subtitle file)
+
 echo "Breaking news." | mmx speech synthesize --text-file - --out news.mp3
 ```
 

diff --git a/src/commands/speech/synthesize.ts b/src/commands/speech/synthesize.ts
@@ -6,6 +6,7 @@ import { speechEndpoint } from '../../client/endpoints';
 import { parseSSE } from '../../client/stream';
 import { detectOutputFormat, formatOutput } from '../../output/formatter';
 import { saveAudioOutput } from '../../output/audio';
+import { writeFileSync } from 'fs';
 import { readTextFromPathOrStdin } from '../../utils/fs';
 import type { Config } from '../../config/schema';
 import type { GlobalFlags } from '../../types/flags';
@@ -37,6 +38,7 @@ export default defineCommand({
   examples: [
     'mmx speech synthesize --text "Hello, world!"',
     'mmx speech synthesize --text "Hello, world!" --out hello.mp3',
+    'mmx speech synthesize --text "Hello" --subtitles --out hello.mp3',
     'echo "Breaking news." | mmx speech synthesize --text-file - --out news.mp3',
     'mmx speech synthesize --text "Stream" --stream | mpv --no-terminal -',
   ],
@@ -85,7 +87,7 @@ export default defineCommand({
     };
 
     if (flags.language) body.language_boost = flags.language as string;
-    if (flags.subtitles) body.subtitle = true;
+    if (flags.subtitles) body.subtitle_enable = true;  // Correct API parameter name
 
     if (flags.pronunciation) {
       body.pronunciation_dict = (flags.pronunciation as string[]).map(p => {
@@ -122,5 +124,52 @@ export default defineCommand({
 
     if (!config.quiet) process.stderr.write(`[Model: ${model}]\n`);
     saveAudioOutput(response, outPath, format, config.quiet);
+
+    // Download and save subtitle file when --subtitles is requested
+    if (flags.subtitles && response.data.subtitle_file) {
+      try {
+        // Download the subtitle JSON file from the URL
+        const subtitleRes = await fetch(response.data.subtitle_file);
+        if (!subtitleRes.ok) {
+          throw new CLIError(`Failed to download subtitle file: ${subtitleRes.status}`, ExitCode.GENERAL);
+        }
+        // API returns a flat array, not { subtitles: [...] }
+        const subtitleArray = await subtitleRes.json() as Array<{ text: string; time_begin: number; time_end: number }>;
+
+        if (subtitleArray?.length) {
+          // Convert to SRT format (API returns time in milliseconds)
+          const subtitlePath = outPath.replace(/\.[^.]+$/, '') + '.srt';
+          const srtContent = subtitleArray
+            .map((s, i) => {
+              // API already returns milliseconds, use directly
+              const fmt = (ms: number) => {
+                const h = String(Math.floor(ms / 3600000)).padStart(2, '0');
+                const m = String(Math.floor((ms % 3600000) / 60000)).padStart(2, '0');
+                const sec = String(Math.floor((ms % 60000) / 1000)).padStart(2, '0');
+                const mil = String(Math.round(ms % 1000)).padStart(3, '0');
+                return `${h}:${m}:${sec},${mil}`;
+              };
+              return `${i + 1}\n${fmt(s.time_begin)} --> ${fmt(s.time_end)}\n${s.text}`;
+            })
+            .join('\n\n');
+          writeFileSync(subtitlePath, srtContent, 'utf-8');
+          if (!config.quiet) {
+            console.log(formatOutput({ subtitles: subtitlePath }, format));
+          } else {
+            console.log(subtitlePath);
+          }
+        }
+      } catch (err) {
+        // Non-fatal: log warning but don't fail the whole synthesis
+        if (!config.quiet) {
+          process.stderr.write(`Warning: failed to download subtitles: ${(err as Error).message}\n`);
+        }
+      }
+    } else if (flags.subtitles && !response.data.subtitle_file) {
+      // Warn if --subtitles was requested but API didn't return subtitle_file
+      if (!config.quiet) {
+        process.stderr.write(`Warning: subtitles requested but not returned by API\n`);
+      }
+    }
   },
 });
diff --git a/src/types/api.ts b/src/types/api.ts
@@ -108,15 +108,15 @@ export interface SpeechRequest {
   pronunciation_dict?: Array<{ tone: string; text: string }>;
   output_format?: 'url' | 'hex';
   stream?: boolean;
-  subtitle?: boolean;
+  subtitle_enable?: boolean;  // Correct API parameter name (not 'subtitle')
 }
 
 export interface SpeechResponse {
   base_resp: BaseResp;
   data: {
     audio?: string; // hex-encoded audio data
     audio_url?: string;
-    subtitle_info?: SubtitleInfo;
+    subtitle_file?: string; // URL to download subtitle JSON file (when subtitle_enable=true)
     status: number;
   };
   extra_info?: {
@@ -129,14 +129,6 @@ export interface SpeechResponse {
   };
 }
 
-export interface SubtitleInfo {
-  subtitles: Array<{
-    text: string;
-    start_time: number;
-    end_time: number;
-  }>;
-}
-
 // ---- Voice List ----
 
 export interface SystemVoiceInfo {

diff --git a/test/auth/timeout-fix.test.ts b/test/auth/timeout-fix.test.ts
@@ -124,7 +124,8 @@ describe('refreshAccessToken: timeout and error handling', () => {
     // We test the real function against a mock server via a wrapper
     // that overrides the fetch to hit our local server instead.
     const origFetch = globalThis.fetch;
-    globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (globalThis as any).fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
       const url = typeof input === 'string' ? input : input.toString();
       if (url.includes('oauth/token')) {
         return origFetch(`${server.url}/v1/oauth/token`, init);
@@ -156,7 +157,8 @@ describe('refreshAccessToken: timeout and error handling', () => {
 
     const mod = await import('../../src/auth/refresh');
     const origFetch = globalThis.fetch;
-    globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (globalThis as any).fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
       const url = typeof input === 'string' ? input : input.toString();
       if (url.includes('oauth/token')) {
         return origFetch(`${server.url}/v1/oauth/token`, init);

diff --git a/test/commands/speech/synthesize.test.ts b/test/commands/speech/synthesize.test.ts
@@ -159,4 +159,47 @@ describe('speech synthesize command', () => {
       console.log = originalLog;
     }
   });
+
+  it('--subtitles sets subtitle_enable in dry-run output', async () => {
+    const config = {
+      apiKey: 'test-key',
+      region: 'global' as const,
+      baseUrl: 'https://api.mmx.io',
+      output: 'json' as const,
+      timeout: 10,
+      verbose: false,
+      quiet: false,
+      noColor: true,
+      yes: false,
+      dryRun: true,
+      nonInteractive: true,
+      async: false,
+    };
+
+    const originalLog = console.log;
+    let output = '';
+    console.log = (msg: string) => { output += msg; };
+
+    try {
+      await synthesizeCommand.execute(config, {
+        text: 'Hello',
+        subtitles: true,
+        quiet: false,
+        verbose: false,
+        noColor: true,
+        yes: false,
+        dryRun: true,
+        help: false,
+        nonInteractive: true,
+        async: false,
+      });
+
+      const parsed = JSON.parse(output);
+      expect(parsed.request.subtitle_enable).toBe(true);
+      // Verify the old incorrect parameter name is NOT used
+      expect(parsed.request.subtitle).toBeUndefined();
+    } finally {
+      console.log = originalLog;
+    }
+  });
 });