I use this script a lot, especially since I can call it from other scripts. It does require setting up a Google Cloud account on your machine, but totally worth it for way higher quality voices.

Here's how you run this from another script. You can select a voice by changing 5 to whatever:

await run("speak-text", "I like tacos", "--voice", 5)

The script in full:

Install speak-text

// Menu: Speak Text
// Description: Speaks Text Using Google's Text-to-Speech
// Author: John Lindquist
// Twitter: @johnlindquist
// Requires a Google Cloud account and configuration:
// https://cloud.google.com/text-to-speech
let { playAudioFile } = await kit("audio")
let { format } = await npm("date-fns")
/** @type typeof import("@google-cloud/text-to-speech") */
let textToSpeech = await npm("@google-cloud/text-to-speech")
let client = new textToSpeech.TextToSpeechClient()
let text = await arg("What should I say?")
let voicesDB = db("voices", { voices: [] })
let voices = voicesDB.get("voices").value()
//cache voices
if (voices.length === 0) {
let [{ voices: englishVoices }] = await client.listVoices(
{
languageCode: "en",
}
)
let voiceChoices = englishVoices.map(voice => {
return {
name: `${voice.ssmlGender} - ${voice.name}`,
value: {
...voice,
languageCode: voice.name.slice(0, 4),
},
}
})
voicesDB.set("voices", voiceChoices).write()
voices = voicesDB.get("voices").value()
}
// From the terminal or run
// speak-text "I like tacos" --voice 5
// await run("speak-text", "I like tacos", "--voice", "5")
let voice =
typeof arg?.voice === "number"
? voices[arg?.voice].value
: await arg("Select voice", voices)
let effectsProfileId = ["headphone-class-device"]
let createRequest = (voice, text) => {
let speakingRate = 1
return {
input: { text },
voice,
audioConfig: {
audioEncoding: "MP3",
effectsProfileId,
speakingRate,
},
}
}
let request = createRequest(voice, text)
let safeFileName = text
.slice(0, 10)
.replace(/[^a-z0-9]/gi, "-")
.toLowerCase()
let date = format(new Date(), "yyyy-MM-dd-hh-mm-ss")
let fileName = `${date}-${safeFileName}.mp3`
// Performs the text-to-speech request
let [response] = await client.synthesizeSpeech(request)
// Write the .mp3 locally
let textAudioPath = tmp(fileName)
await writeFile(
textAudioPath,
response.audioContent,
"binary"
)
playAudioFile(textAudioPath)