Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion skill/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ mmx speech synthesize --text <text> [flags]
| `--bitrate <bps>` | number | Bitrate (default: 128000) |
| `--channels <n>` | number | Audio channels (default: 1) |
| `--language <code>` | string | Language boost |
| `--subtitles` | boolean | Include subtitle timing data |
| `--subtitles` | boolean | Download and save subtitles as `.srt` file (alongside `--out` audio file). API must support subtitles for the selected model.
| `--pronunciation <from/to>` | string, repeatable | Custom pronunciation |
| `--sound-effect <effect>` | string | Add sound effect |
| `--out <path>` | string | Save audio to file |
Expand All @@ -188,6 +188,9 @@ mmx speech synthesize --text <text> [flags]
mmx speech synthesize --text "Hello world" --out hello.mp3 --quiet
# stdout: hello.mp3

mmx speech synthesize --text "Hello" --subtitles --out hello.mp3
# saves hello.mp3 + hello.srt (SRT subtitle file)

echo "Breaking news." | mmx speech synthesize --text-file - --out news.mp3
```

Expand Down
51 changes: 50 additions & 1 deletion src/commands/speech/synthesize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { speechEndpoint } from '../../client/endpoints';
import { parseSSE } from '../../client/stream';
import { detectOutputFormat, formatOutput } from '../../output/formatter';
import { saveAudioOutput } from '../../output/audio';
import { writeFileSync } from 'fs';
import { readTextFromPathOrStdin } from '../../utils/fs';
import type { Config } from '../../config/schema';
import type { GlobalFlags } from '../../types/flags';
Expand Down Expand Up @@ -37,6 +38,7 @@ export default defineCommand({
examples: [
'mmx speech synthesize --text "Hello, world!"',
'mmx speech synthesize --text "Hello, world!" --out hello.mp3',
'mmx speech synthesize --text "Hello" --subtitles --out hello.mp3',
'echo "Breaking news." | mmx speech synthesize --text-file - --out news.mp3',
'mmx speech synthesize --text "Stream" --stream | mpv --no-terminal -',
],
Expand Down Expand Up @@ -85,7 +87,7 @@ export default defineCommand({
};

if (flags.language) body.language_boost = flags.language as string;
if (flags.subtitles) body.subtitle = true;
if (flags.subtitles) body.subtitle_enable = true; // Correct API parameter name

if (flags.pronunciation) {
body.pronunciation_dict = (flags.pronunciation as string[]).map(p => {
Expand Down Expand Up @@ -122,5 +124,52 @@ export default defineCommand({

if (!config.quiet) process.stderr.write(`[Model: ${model}]\n`);
saveAudioOutput(response, outPath, format, config.quiet);

// Download and save subtitle file when --subtitles is requested
if (flags.subtitles && response.data.subtitle_file) {
try {
// Download the subtitle JSON file from the URL
const subtitleRes = await fetch(response.data.subtitle_file);
if (!subtitleRes.ok) {
throw new CLIError(`Failed to download subtitle file: ${subtitleRes.status}`, ExitCode.GENERAL);
}
// API returns a flat array, not { subtitles: [...] }
const subtitleArray = await subtitleRes.json() as Array<{ text: string; time_begin: number; time_end: number }>;

if (subtitleArray?.length) {
// Convert to SRT format (API returns time in milliseconds)
const subtitlePath = outPath.replace(/\.[^.]+$/, '') + '.srt';
const srtContent = subtitleArray
.map((s, i) => {
// API already returns milliseconds, use directly
const fmt = (ms: number) => {
const h = String(Math.floor(ms / 3600000)).padStart(2, '0');
const m = String(Math.floor((ms % 3600000) / 60000)).padStart(2, '0');
const sec = String(Math.floor((ms % 60000) / 1000)).padStart(2, '0');
const mil = String(Math.round(ms % 1000)).padStart(3, '0');
return `${h}:${m}:${sec},${mil}`;
};
return `${i + 1}\n${fmt(s.time_begin)} --> ${fmt(s.time_end)}\n${s.text}`;
})
.join('\n\n');
writeFileSync(subtitlePath, srtContent, 'utf-8');
if (!config.quiet) {
console.log(formatOutput({ subtitles: subtitlePath }, format));
} else {
console.log(subtitlePath);
}
}
} catch (err) {
// Non-fatal: log warning but don't fail the whole synthesis
if (!config.quiet) {
process.stderr.write(`Warning: failed to download subtitles: ${(err as Error).message}\n`);
}
}
} else if (flags.subtitles && !response.data.subtitle_file) {
// Warn if --subtitles was requested but API didn't return subtitle_file
if (!config.quiet) {
process.stderr.write(`Warning: subtitles requested but not returned by API\n`);
}
}
},
});
12 changes: 2 additions & 10 deletions src/types/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ export interface SpeechRequest {
pronunciation_dict?: Array<{ tone: string; text: string }>;
output_format?: 'url' | 'hex';
stream?: boolean;
subtitle?: boolean;
subtitle_enable?: boolean; // Correct API parameter name (not 'subtitle')
}

export interface SpeechResponse {
base_resp: BaseResp;
data: {
audio?: string; // hex-encoded audio data
audio_url?: string;
subtitle_info?: SubtitleInfo;
subtitle_file?: string; // URL to download subtitle JSON file (when subtitle_enable=true)
status: number;
};
extra_info?: {
Expand All @@ -129,14 +129,6 @@ export interface SpeechResponse {
};
}

export interface SubtitleInfo {
subtitles: Array<{
text: string;
start_time: number;
end_time: number;
}>;
}

// ---- Voice List ----

export interface SystemVoiceInfo {
Expand Down
6 changes: 4 additions & 2 deletions test/auth/timeout-fix.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ describe('refreshAccessToken: timeout and error handling', () => {
// We test the real function against a mock server via a wrapper
// that overrides the fetch to hit our local server instead.
const origFetch = globalThis.fetch;
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(globalThis as any).fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
const url = typeof input === 'string' ? input : input.toString();
if (url.includes('oauth/token')) {
return origFetch(`${server.url}/v1/oauth/token`, init);
Expand Down Expand Up @@ -156,7 +157,8 @@ describe('refreshAccessToken: timeout and error handling', () => {

const mod = await import('../../src/auth/refresh');
const origFetch = globalThis.fetch;
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(globalThis as any).fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
const url = typeof input === 'string' ? input : input.toString();
if (url.includes('oauth/token')) {
return origFetch(`${server.url}/v1/oauth/token`, init);
Expand Down
43 changes: 43 additions & 0 deletions test/commands/speech/synthesize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,47 @@ describe('speech synthesize command', () => {
console.log = originalLog;
}
});

it('--subtitles sets subtitle_enable in dry-run output', async () => {
const config = {
apiKey: 'test-key',
region: 'global' as const,
baseUrl: 'https://api.mmx.io',
output: 'json' as const,
timeout: 10,
verbose: false,
quiet: false,
noColor: true,
yes: false,
dryRun: true,
nonInteractive: true,
async: false,
};

const originalLog = console.log;
let output = '';
console.log = (msg: string) => { output += msg; };

try {
await synthesizeCommand.execute(config, {
text: 'Hello',
subtitles: true,
quiet: false,
verbose: false,
noColor: true,
yes: false,
dryRun: true,
help: false,
nonInteractive: true,
async: false,
});

const parsed = JSON.parse(output);
expect(parsed.request.subtitle_enable).toBe(true);
// Verify the old incorrect parameter name is NOT used
expect(parsed.request.subtitle).toBeUndefined();
} finally {
console.log = originalLog;
}
});
});
Loading