Skip to content

Commit 91e5d06

Browse files
committed
Add Piper as TTS provider
1 parent a075e32 commit 91e5d06

9 files changed

Lines changed: 143 additions & 3 deletions

File tree

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ tts.provider.gcloud.defaultVoice = "en-GB-Neural2-A"
115115
-- Requires at least Windows Server 2019 to work properly.
116116
tts.provider.win.defaultVoice = "David"
117117

118+
-- The default Piper language model to use (must be installed manually).
119+
tts.provider.piper.defaultVoice = "..."
120+
121+
-- The default Piper speech speed (1.0 is the default; lower is quicker, higher is slower).
122+
tts.provider.piper.defaultSpeed = 1.0
123+
118124
-- Your SRS server's address.
119125
srs.addr = "127.0.0.1:5002"
120126
```
@@ -167,6 +173,17 @@ You can also check for the present of a `\Logs\grpc.log` file.
167173

168174
The server will be running on port 50051 by default.
169175

176+
## Install Piper TTS
177+
178+
This is only necessary if you plan to use Piper as your TTS provider.
179+
180+
1. Download `piper_windows_amd64.zip` from the latest [Piper release](https://github.com/rhasspy/piper/releases).
181+
2. Extract the `piper` directory from this zip file and place it at `DCS.openbeta\Mods\tech\DCS-gRPC\piper`.
182+
3. Download at least one voice from [Piper Voices](https://github.com/rhasspy/piper/blob/master/VOICES.md). You need both the `model` and the `config`. For the SRS voice quality, a `low` model is sufficient.
183+
4. Place the model and config into your `DCS.openbeta\Mods\tech\DCS-gRPC\piper\` directory (e.g. `DCS.openbeta\Mods\tech\DCS-gRPC\piper\en_US-amy-low.onnx` and `DCS.openbeta\Mods\tech\DCS-gRPC\piper\en_US-amy-low.onnx.json`.
184+
5. Set one of your installed voices as the default voice in your config (`tts.provider.piper.defaultVoice = "..."`, e.g. `tts.provider.piper.defaultVoice = "en_US-amy-low.onnx"`).
185+
6. If you want to use Piper, don't forget to set it as your default provider, or enable it on a per-transmission basis.
186+
170187
## Lua API
171188

172189
`DCS-gRPC` provides the following Lua APIs to interact with the server from within Lua.
@@ -202,6 +219,7 @@ The server will be running on port 50051 by default.
202219
-- `= { azure = {} }` / `= { azure = { voice = "..." } }` enable Azure TTS
203220
-- `= { gcloud = {} }` / `= { gcloud = { voice = "..." } }` enable Google Cloud TTS
204221
-- `= { win = {} }` / `= { win = { voice = "..." } }` enable Windows TTS
222+
-- `= { piper = {} }` / `= { piper = { voice = "...", speed = 1.0 } }` enable Piper TTS
205223
provider = null,
206224
}
207225
```

lua/DCS-gRPC/grpc-mission.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
if not GRPC then
22
GRPC = {
33
-- scaffold nested tables to allow direct assignment in config file
4-
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {} } },
4+
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {}, piper = {} } },
55
srs = {},
66
}
77
end

lua/Hooks/DCS-gRPC.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ local function init()
77
if not GRPC then
88
_G.GRPC = {
99
-- scaffold nested tables to allow direct assignment in config file
10-
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {} } },
10+
tts = { provider = { gcloud = {}, aws = {}, azure = {}, win = {}, piper = {} } },
1111
srs = {},
1212
}
1313
end

protos/dcs/srs/v0/srs.proto

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,25 @@ message TransmitRequest {
7575
optional string voice = 1;
7676
}
7777

78+
message Piper {
79+
// The voice model the text is synthesized in (corresponds to a model placed in your
80+
// `DCS.openbeta\Mods\tech\DCS-gRPC\piper\` directory).
81+
optional string voice = 1;
82+
83+
// The speed of the generated speech; 1.0 is the default; lower is quicker, higher is slower.
84+
optional float speed = 2;
85+
}
86+
87+
7888
// Optional TTS provider to be use. Defaults to the one configured in your
7989
// config or to Windows' built-in TTS.
8090
oneof provider {
8191
Aws aws = 8;
8292
Azure azure = 9;
8393
GCloud gcloud = 10;
8494
Windows win = 11;
95+
// Piper does not support SSML, only use it with plain text.
96+
Piper piper = 12;
8597
}
8698
}
8799

src/config.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub struct TtsProviderConfig {
3838
pub azure: Option<AzureConfig>,
3939
pub gcloud: Option<GCloudConfig>,
4040
pub win: Option<WinConfig>,
41+
pub piper: Option<PiperConfig>,
4142
}
4243

4344
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
@@ -48,6 +49,7 @@ pub enum TtsProvider {
4849
GCloud,
4950
#[default]
5051
Win,
52+
Piper,
5153
}
5254

5355
#[derive(Clone, Deserialize, Serialize)]
@@ -80,6 +82,13 @@ pub struct WinConfig {
8082
pub default_voice: Option<String>,
8183
}
8284

85+
#[derive(Debug, Clone, Deserialize, Serialize)]
86+
#[serde(rename_all = "camelCase")]
87+
pub struct PiperConfig {
88+
pub default_voice: Option<String>,
89+
pub default_speed: Option<f32>,
90+
}
91+
8392
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
8493
#[serde(rename_all = "camelCase")]
8594
pub struct SrsConfig {

src/rpc/srs.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
use std::error;
22
use std::future::Future;
33
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
4+
use std::path::PathBuf;
45
use std::str::FromStr;
56
use std::time::{Duration, Instant};
67

78
use ::srs::Sender;
89
#[cfg(target_os = "windows")]
910
use ::tts::WinConfig;
10-
use ::tts::{AwsConfig, AwsRegion, AzureConfig, GCloudConfig, TtsConfig};
11+
use ::tts::{AwsConfig, AwsRegion, AzureConfig, GCloudConfig, PiperConfig, TtsConfig};
1112
use futures_util::FutureExt;
1213
use stubs::common::v0::{Coalition, Unit};
1314
use stubs::mission::v0::stream_events_response::{Event, TtsEvent};
@@ -27,6 +28,7 @@ use crate::srs::SrsClients;
2728
pub struct Srs {
2829
tts_config: crate::config::TtsConfig,
2930
srs_config: crate::config::SrsConfig,
31+
write_dir: PathBuf,
3032
rpc: MissionRpc,
3133
srs_clients: SrsClients,
3234
shutdown_signal: ShutdownHandle,
@@ -36,13 +38,15 @@ impl Srs {
3638
pub fn new(
3739
tts_config: crate::config::TtsConfig,
3840
srs_config: crate::config::SrsConfig,
41+
write_dir: PathBuf,
3942
rpc: MissionRpc,
4043
srs_clients: SrsClients,
4144
shutdown_signal: ShutdownHandle,
4245
) -> Self {
4346
Self {
4447
tts_config,
4548
srs_config,
49+
write_dir,
4650
rpc,
4751
srs_clients,
4852
shutdown_signal,
@@ -105,6 +109,10 @@ impl SrsService for Srs {
105109
TtsProvider::Win => {
106110
transmit_request::Provider::Win(transmit_request::Windows { voice: None })
107111
}
112+
TtsProvider::Piper => transmit_request::Provider::Piper(transmit_request::Piper {
113+
voice: None,
114+
speed: None,
115+
}),
108116
}) {
109117
transmit_request::Provider::Aws(transmit_request::Aws { voice }) => {
110118
TtsConfig::Aws(AwsConfig {
@@ -215,6 +223,32 @@ impl SrsService for Srs {
215223
"Windows TTS is only available on Windows",
216224
));
217225
}
226+
transmit_request::Provider::Piper(transmit_request::Piper { voice, speed }) => {
227+
TtsConfig::Piper(PiperConfig {
228+
voice: voice
229+
.or_else(|| {
230+
self.tts_config
231+
.provider
232+
.as_ref()
233+
.and_then(|p| p.piper.as_ref())
234+
.and_then(|p| p.default_voice.clone())
235+
})
236+
.filter(|v| !v.is_empty())
237+
.ok_or_else(|| {
238+
Status::failed_precondition("tts.provider.piper.default_voice not set")
239+
})?,
240+
speed: speed
241+
.or_else(|| {
242+
self.tts_config
243+
.provider
244+
.as_ref()
245+
.and_then(|p| p.piper.as_ref())
246+
.and_then(|p| p.default_speed)
247+
})
248+
.unwrap_or(1.0),
249+
piper_path: self.write_dir.join("Mods/tech/DCS-gRPC/piper"),
250+
})
251+
}
218252
};
219253

220254
let frames = ::tts::synthesize(&request.ssml, &config)

src/server.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::future::Future;
22
use std::net::SocketAddr;
3+
use std::path::PathBuf;
34
use std::sync::Arc;
45
use std::time::Duration;
56

@@ -49,6 +50,7 @@ struct ServerState {
4950
stats: Stats,
5051
tts_config: TtsConfig,
5152
srs_config: SrsConfig,
53+
write_dir: PathBuf,
5254
srs_transmit: Arc<Mutex<mpsc::Receiver<TransmitRequest>>>,
5355
}
5456

@@ -70,6 +72,7 @@ impl Server {
7072
stats: Stats::new(shutdown.handle()),
7173
tts_config: config.tts.clone().unwrap_or_default(),
7274
srs_config: config.srs.clone().unwrap_or_default(),
75+
write_dir: PathBuf::from(&config.write_dir),
7376
srs_transmit: Arc::new(Mutex::new(rx)),
7477
},
7578
srs_transmit: tx,
@@ -202,6 +205,7 @@ async fn try_run(
202205
stats,
203206
tts_config,
204207
srs_config,
208+
write_dir,
205209
srs_transmit,
206210
} = state;
207211

@@ -225,6 +229,7 @@ async fn try_run(
225229
let srs = Srs::new(
226230
tts_config.clone(),
227231
srs_config.clone(),
232+
write_dir.clone(),
228233
mission_rpc.clone(),
229234
srs_clients.clone(),
230235
shutdown_signal.clone(),
@@ -256,6 +261,7 @@ async fn try_run(
256261
.add_service(SrsServiceServer::new(Srs::new(
257262
tts_config,
258263
srs_config,
264+
write_dir,
259265
mission_rpc.clone(),
260266
srs_clients,
261267
shutdown_signal.clone(),

tts/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ use std::error;
33
pub use aws::{AwsConfig, Region as AwsRegion};
44
pub use azure::AzureConfig;
55
pub use gcloud::GCloudConfig;
6+
pub use piper::PiperConfig;
67
#[cfg(target_os = "windows")]
78
pub use win::WinConfig;
89

910
mod aws;
1011
mod azure;
1112
mod gcloud;
13+
mod piper;
1214
#[cfg(target_os = "windows")]
1315
mod win;
1416

@@ -19,6 +21,7 @@ pub enum TtsConfig {
1921
GCloud(gcloud::GCloudConfig),
2022
#[cfg(target_os = "windows")]
2123
Win(win::WinConfig),
24+
Piper(piper::PiperConfig),
2225
}
2326

2427
/// Synthesize the `text` to speech. Returns a vec of opus frames.
@@ -32,6 +35,7 @@ pub async fn synthesize(
3235
TtsConfig::GCloud(config) => gcloud::synthesize(text, config).await?,
3336
#[cfg(target_os = "windows")]
3437
TtsConfig::Win(config) => win::synthesize(text, config).await?,
38+
TtsConfig::Piper(config) => piper::synthesize(text, config).await?,
3539
})
3640
}
3741

tts/src/piper.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use std::path::PathBuf;
2+
use std::process::Stdio;
3+
4+
use tokio::io::AsyncWriteExt;
5+
use tokio::process::Command;
6+
7+
#[derive(Debug)]
8+
pub struct PiperConfig {
9+
pub voice: String,
10+
pub speed: f32,
11+
pub piper_path: PathBuf,
12+
}
13+
14+
pub async fn synthesize(text: &str, config: &PiperConfig) -> Result<Vec<Vec<u8>>, std::io::Error> {
15+
let mut command = Command::new(config.piper_path.join("piper.exe"));
16+
command
17+
.arg("--model")
18+
.arg(&config.voice)
19+
.arg("--length_scale")
20+
.arg(format!("{}", config.speed))
21+
.arg("--output-raw")
22+
.current_dir(&config.piper_path)
23+
.stdin(Stdio::piped())
24+
.stdout(Stdio::piped())
25+
.stderr(Stdio::piped());
26+
27+
#[cfg(target_os = "windows")]
28+
{
29+
const CREATE_NO_WINDOW: u32 = 0x08000000;
30+
command.creation_flags(CREATE_NO_WINDOW);
31+
}
32+
33+
let mut child = command.spawn()?;
34+
35+
child
36+
.stdin
37+
.as_mut()
38+
.unwrap()
39+
.write_all(text.as_bytes())
40+
.await?;
41+
let output = child.wait_with_output().await?;
42+
43+
if !output.status.success() {
44+
return Err(std::io::Error::new(
45+
std::io::ErrorKind::Other,
46+
if output.stderr.is_empty() {
47+
"failed to execute piper (maybe voice model not found)".into()
48+
} else {
49+
String::from_utf8_lossy(&output.stderr)
50+
},
51+
));
52+
}
53+
54+
crate::wav_to_opus(output.stdout.into())
55+
.await
56+
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
57+
}

0 commit comments

Comments
 (0)