External WAV and eSpeak/eSpeak NG interoperability recipe¶
Tutorial goal
Bring an external WAV signal into lattice-dsp without adding audio I/O dependencies to the package.
Note
New to the terminology? See the lattice DSP concept map and the causality/data-use guide for how online, offline, block, and MIMO examples should be read.
Context¶
Speech synthesizers, recording tools, DAWs, and simulators can all write WAV files. This recipe uses the Python standard library for a minimal PCM WAV boundary and leaves richer loaders such as librosa, soundfile, or scipy.io.wavfile as user-side choices.
Key idea and equations¶
The boundary is intentionally simple:
external tool -> WAV file -> NumPy array -> lattice-dsp filter/model
How to read the result¶
The printed RMS values confirm that the signal crossed the WAV-to-array boundary and was processed by a lattice filter.
Run command¶
python examples/external_audio_wav_recipe.py
Run status¶
Return code: 0
Captured stdout¶
loaded sample rate: 16000
loaded samples: 4000
input RMS: 0.166148
filtered RMS: 0.106986
example WAV path: external_audio_recipe_input.wav
Source code¶
1"""Recipe: bring an external WAV signal into lattice-dsp without extra dependencies.
2
3This example uses only the Python standard library for WAV I/O plus NumPy and
4``lattice-dsp``. It is meant as a minimal bridge for speech or audio generated
5outside the package, for example with eSpeak/eSpeak NG:
6
7 espeak-ng -w speech.wav "This is a lattice DSP test signal."
8
9For production audio I/O, use your preferred optional package such as soundfile,
10librosa, or scipy.io.wavfile in your own application code.
11"""
12
13from __future__ import annotations
14
15import math
16import struct
17import wave
18from pathlib import Path
19
20import numpy as np
21
22import lattice_dsp as ld
23
24
25def write_mono_pcm16(path: Path, sample_rate: int, x: np.ndarray) -> None:
26 """Write a mono float signal in [-1, 1] as 16-bit PCM WAV."""
27
28 x16 = np.clip(np.asarray(x, dtype=float), -1.0, 1.0)
29 payload = b"".join(struct.pack("<h", int(round(v * 32767.0))) for v in x16)
30 with wave.open(str(path), "wb") as wf:
31 wf.setnchannels(1)
32 wf.setsampwidth(2)
33 wf.setframerate(sample_rate)
34 wf.writeframes(payload)
35
36
37def read_mono_pcm16(path: Path) -> tuple[int, np.ndarray]:
38 """Read a mono or stereo 16-bit PCM WAV as a floating NumPy array."""
39
40 with wave.open(str(path), "rb") as wf:
41 if wf.getsampwidth() != 2:
42 raise ValueError("expected 16-bit PCM WAV")
43 sample_rate = wf.getframerate()
44 channels = wf.getnchannels()
45 raw = wf.readframes(wf.getnframes())
46
47 x = np.frombuffer(raw, dtype="<i2").astype(float) / 32768.0
48 if channels > 1:
49 x = x.reshape(-1, channels).mean(axis=1)
50 return sample_rate, x
51
52
53def synthetic_speech_like_signal(sample_rate: int, seconds: float) -> np.ndarray:
54 """Generate a small deterministic voiced-speech-like test signal."""
55
56 n = int(round(sample_rate * seconds))
57 t = np.arange(n) / sample_rate
58 envelope = 0.5 * (1.0 + np.sin(2.0 * math.pi * 3.0 * t))
59 carrier = (
60 0.55 * np.sin(2.0 * math.pi * 180.0 * t)
61 + 0.25 * np.sin(2.0 * math.pi * 360.0 * t)
62 + 0.12 * np.sin(2.0 * math.pi * 720.0 * t)
63 )
64 return 0.55 * envelope * carrier
65
66
67def main() -> None:
68 sample_rate = 16_000
69 path = Path("external_audio_recipe_input.wav")
70
71 # Replace this block with an externally generated file, for example:
72 # espeak-ng -w speech.wav "This is a lattice DSP test signal."
73 # path = Path("speech.wav")
74 write_mono_pcm16(path, sample_rate, synthetic_speech_like_signal(sample_rate, 0.25))
75
76 loaded_rate, x = read_mono_pcm16(path)
77 filt = ld.LatticeIIR([0.35, -0.2], [0.45, 0.05, 0.2])
78 y = np.asarray(filt.process(x), dtype=float)
79
80 print("loaded sample rate:", loaded_rate)
81 print("loaded samples:", x.shape[0])
82 print("input RMS:", f"{np.sqrt(np.mean(x**2)):.6f}")
83 print("filtered RMS:", f"{np.sqrt(np.mean(y**2)):.6f}")
84 print("example WAV path:", path)
85
86
87if __name__ == "__main__":
88 main()