Tutorials
Setup a custom LLM Websocket
Step-by-step guide on how to set up a custom LLM for your Millis AI voice agent
Using a WebSocket connection, your voice agent can interact in real-time with your custom LLM, enabling dynamic and responsive conversational capabilities based on your specific requirements.
Sample Code
A sample code using FastAPI websocket and asyncio.
app = FastAPI()
@app.websocket("/llm-websocket")
async def websocket_handler(websocket: WebSocket):
await websocket.accept()
current_stream_id = -1
async def stream_response(request):
try:
for event in draft_response(request):
await websocket.send_text(json.dumps(
{
"type": "stream_response",
"data": event
}
))
if request['stream_id'] < current_stream_id:
return # Got new stream_request, abandon this task
except:
print(traceback.format_exc(), flush=True)
try:
while True:
message = await websocket.receive_text()
request = json.loads(message)
if request["type"] == "start_call":
current_stream_id = request["data"]['stream_id']
first_event = {
"type": "stream_response",
"data": {
"stream_id": current_stream_id,
"content": "How can I help you?", # Agent's first message
"end_of_stream": True,
}
}
await websocket.send_text(json.dumps(first_event))
elif request["type"] == "stream_request":
current_stream_id = request["data"]['stream_id']
asyncio.create_task(stream_response(request["data"]))
except WebSocketDisconnect:
...
def draft_response(self, request):
prompt = self.prepare_prompt(request)
stream = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=prompt,
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
yield {
"stream_id": request['stream_id'],
"content": chunk.choices[0].delta.content,
"end_of_stream": False,
}
yield {
"stream_id": request['stream_id'],
"content": "",
"end_of_stream": True,
}