The WebSocket API provides low-level access to Deepslate Realtime for server-side integrations. Use this for telephony backends, SIP gateways, or custom voice pipelines.
This interface is for server-side use only. End users should connect through WebRTC or your application’s frontend. Never expose your API key to clients.
Prerequisites
npm install ws protobufjs
Connect
Connect to the WebSocket endpoint with your API key in the headers:
import WebSocket from 'ws';
import protobuf from 'protobufjs';
const ws = new WebSocket('wss://app.phonebot.io/v1/realtime', {
headers: {
'Authorization': 'Bearer YOUR_API_KEY'
}
});
ws.binaryType = 'arraybuffer';
ws.on('open', () => {
console.log('Connected');
// Initialize session immediately after connecting
});
ws.on('message', (data) => {
// Handle incoming protobuf messages
});
ws.on('close', (code, reason) => {
console.log(`Disconnected: ${code}`);
});
Initialize Session
The first message must be an InitializeSessionRequest to configure audio format, VAD, and model behavior:
// Load protobuf definitions
const root = await protobuf.load('realtime.proto');
const ServiceBoundMessage = root.lookupType('eu.deepslate.realtime.speeq.ServiceBoundMessage');
const ClientBoundMessage = root.lookupType('eu.deepslate.realtime.speeq.ClientBoundMessage');
// Build initialization message
const initMessage = ServiceBoundMessage.create({
initializeSessionRequest: {
inputAudioLine: {
sampleRate: 16000,
channelCount: 1,
sampleFormat: 1 // SIGNED_16_BIT
},
outputAudioLine: {
sampleRate: 16000,
channelCount: 1,
sampleFormat: 1 // SIGNED_16_BIT
},
vadConfiguration: {
confidenceThreshold: 0.5,
minVolume: 0,
startDuration: { seconds: 0, nanos: 300000000 }, // 300ms
stopDuration: { seconds: 0, nanos: 700000000 }, // 700ms
backbufferDuration: { seconds: 1, nanos: 0 } // 1s
},
inferenceConfiguration: {
systemPrompt: 'You are a helpful assistant.'
}
}
});
// Send as binary protobuf
const buffer = ServiceBoundMessage.encode(initMessage).finish();
ws.send(buffer);
See the API Reference for all configuration options including TTS providers and tool definitions.
Send Audio
Stream audio as UserInput messages. Audio must match your inputAudioLine configuration:
let packetId = 0;
function sendAudio(pcmBuffer) {
const message = ServiceBoundMessage.create({
userInput: {
packetId: packetId++,
audioData: {
data: pcmBuffer // Raw PCM bytes matching your config
}
}
});
const buffer = ServiceBoundMessage.encode(message).finish();
ws.send(buffer);
}
Audio format must exactly match your session configuration. For 16-bit signed PCM at 16kHz mono, each sample is 2 bytes, little-endian.
Handle Responses
The server sends ClientBoundMessage with one of several payload types:
ws.on('message', (data) => {
const message = ClientBoundMessage.decode(new Uint8Array(data));
if (message.modelTextFragment) {
// Streamed text tokens (when TTS is not configured)
process.stdout.write(message.modelTextFragment.text);
}
if (message.modelAudioChunk) {
// TTS audio output
const audioData = message.modelAudioChunk.audio.data;
const transcript = message.modelAudioChunk.transcript;
// Queue audio for playback
playAudio(audioData);
}
if (message.playbackClearBuffer) {
// User interrupted - clear any buffered audio immediately
clearAudioQueue();
}
});
Handle Interruptions
When the user starts speaking while audio is playing, the server sends PlaybackClearBuffer. You should immediately discard any queued audio that hasn’t played yet:
let audioQueue = [];
function playAudio(data) {
audioQueue.push(data);
// Process queue...
}
function clearAudioQueue() {
audioQueue = [];
// Also stop any currently playing audio
}
Enable the model to call functions by defining tools and handling requests.
Send an UpdateToolDefinitionsRequest to register available tools. Each tool needs a name, description, and JSON Schema parameters:
function updateTools() {
const message = ServiceBoundMessage.create({
updateToolDefinitionsRequest: {
toolDefinitions: [
{
name: 'get_weather',
description: 'Get current weather for a location',
parameters: {
fields: {
location: {
kind: { stringValue: 'string' }
}
}
}
},
{
name: 'get_time',
description: 'Get the current time',
parameters: {}
}
]
}
});
ws.send(ServiceBoundMessage.encode(message).finish());
}
Calling UpdateToolDefinitionsRequest replaces all existing tools. Send an empty array to clear all tools.
When the model wants to use a tool, you receive a ToolCallRequest. You must respond with a ToolCallResponse:
ws.on('message', (data) => {
const message = ClientBoundMessage.decode(new Uint8Array(data));
if (message.toolCallRequest) {
const { id, name, parameters } = message.toolCallRequest;
// Execute the tool
const result = executeToolCall(name, parameters);
// Send the response (required for every request)
const response = ServiceBoundMessage.create({
toolCallResponse: {
id: id, // Must match the request ID
result: result
}
});
ws.send(ServiceBoundMessage.encode(response).finish());
}
});
function executeToolCall(name, parameters) {
switch (name) {
case 'get_weather':
const location = parameters?.fields?.location?.kind?.stringValue;
return JSON.stringify({ temperature: 22, condition: 'sunny', location });
case 'get_time':
return JSON.stringify({ time: new Date().toISOString() });
default:
return JSON.stringify({ error: `Unknown tool: ${name}` });
}
}
Every ToolCallRequest must receive a ToolCallResponse, even if the tool execution fails. The model waits for the response before continuing.
Complete Example
A Node.js client with microphone input and speaker output:
npm install ws protobufjs audify
import WebSocket from 'ws';
import protobuf from 'protobufjs';
import pkg from 'audify';
const { RtAudio, RtAudioFormat } = pkg;
async function main() {
// Load protobuf types
const root = await protobuf.load('realtime.proto');
const ServiceBoundMessage = root.lookupType('eu.deepslate.realtime.speeq.ServiceBoundMessage');
const ClientBoundMessage = root.lookupType('eu.deepslate.realtime.speeq.ClientBoundMessage');
// Audio config
const SAMPLE_RATE = 16000;
const CHANNELS = 1;
const FRAME_SIZE = 1600; // 100ms of audio
// Set up audio I/O
const rtAudio = new RtAudio();
// State
let packetId = 0;
let playbackQueue = [];
// Connect
const ws = new WebSocket('wss://app.phonebot.io/v1/realtime', {
headers: { 'Authorization': 'Bearer YOUR_API_KEY' }
});
ws.binaryType = 'arraybuffer';
// Helper to send messages
function send(payload) {
const msg = ServiceBoundMessage.create(payload);
ws.send(ServiceBoundMessage.encode(msg).finish());
}
ws.on('open', () => {
// Initialize session
send({
initializeSessionRequest: {
inputAudioLine: { sampleRate: SAMPLE_RATE, channelCount: CHANNELS, sampleFormat: 1 },
outputAudioLine: { sampleRate: SAMPLE_RATE, channelCount: CHANNELS, sampleFormat: 1 },
vadConfiguration: {
confidenceThreshold: 0.5,
minVolume: 0,
startDuration: { seconds: 0, nanos: 300000000 },
stopDuration: { seconds: 0, nanos: 700000000 },
backbufferDuration: { seconds: 1, nanos: 0 }
},
inferenceConfiguration: {
systemPrompt: 'You are a friendly and helpful assistant.'
}
}
});
// Register tools
send({
updateToolDefinitionsRequest: {
toolDefinitions: [{
name: 'get_time',
description: 'Get the current time',
parameters: {}
}]
}
});
// Open microphone input
rtAudio.openStream(
null, // No output in this stream
{ nChannels: CHANNELS },
RtAudioFormat.RTAUDIO_SINT16,
SAMPLE_RATE,
FRAME_SIZE,
'deepslate-input',
(pcm) => {
send({
userInput: {
packetId: packetId++,
audioData: { data: pcm }
}
});
}
);
rtAudio.start();
console.log('Listening... speak into your microphone');
});
ws.on('message', (data) => {
const msg = ClientBoundMessage.decode(new Uint8Array(data));
if (msg.modelTextFragment) {
process.stdout.write(msg.modelTextFragment.text);
}
if (msg.modelAudioChunk) {
// Queue audio for playback
playbackQueue.push(Buffer.from(msg.modelAudioChunk.audio.data));
}
if (msg.playbackClearBuffer) {
// Clear queued audio
playbackQueue = [];
}
if (msg.toolCallRequest) {
const { id, name, parameters } = msg.toolCallRequest;
console.log(`Tool call: ${name}`, parameters);
const result = handleTool(name, parameters);
console.log(`Tool result: ${result}`);
send({ toolCallResponse: { id, result } });
}
});
ws.on('close', (code) => {
rtAudio.stop();
rtAudio.closeStream();
console.log('Disconnected:', code);
});
ws.on('error', (err) => console.error('Error:', err));
}
function handleTool(name, parameters) {
switch (name) {
case 'get_time':
return JSON.stringify({ time: new Date().toLocaleTimeString() });
default:
return JSON.stringify({ error: `Unknown tool: ${name}` });
}
}
main();
Next Steps