Overview
Add "stream": true to any chat completion request to receive tokens as they’re generated instead of waiting for the full response. Uses the Server-Sent Events (SSE) protocol — the same format as OpenAI.
Code Examples
from openai import OpenAI
client = OpenAI(
api_key="sk-samurai-YOUR_KEY",
base_url="https://www.samuraiapi.in/v1"
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain the concept of flow state."}
],
stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
print(delta.content, end="", flush=True)
print() # final newline
Each streamed chunk arrives as a data: line:
data: {"id":"chatcmpl-abc","object":"chat.completion.chunk","created":1715000000,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Flow"},"finish_reason":null}]}
data: {"id":"chatcmpl-abc","object":"chat.completion.chunk","created":1715000000,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":" state"},"finish_reason":null}]}
data: {"id":"chatcmpl-abc","object":"chat.completion.chunk","created":1715000000,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
data: [DONE]
The stream ends with data: [DONE].
Vercel AI SDK
Works perfectly with the Vercel AI SDK for Next.js and React:
// app/api/chat/route.ts
import { createOpenAI } from '@ai-sdk/openai';
import { streamText } from 'ai';
const samurai = createOpenAI({
apiKey: process.env.SAMURAI_API_KEY!,
baseURL: 'https://www.samuraiapi.in/v1',
});
export async function POST(req: Request) {
const { messages } = await req.json();
const result = streamText({
model: samurai('gpt-4o'),
messages,
});
return result.toDataStreamResponse();
}
// components/Chat.tsx
'use client';
import { useChat } from 'ai/react';
export function Chat() {
const { messages, input, handleInputChange, handleSubmit } = useChat({
api: '/api/chat',
});
return (
<div>
{messages.map(m => (
<div key={m.id}><b>{m.role}:</b> {m.content}</div>
))}
<form onSubmit={handleSubmit}>
<input value={input} onChange={handleInputChange} placeholder="Say something..." />
<button type="submit">Send</button>
</form>
</div>
);
}
LangChain Streaming
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
llm = ChatOpenAI(
model="gpt-4o",
openai_api_key="sk-samurai-YOUR_KEY",
openai_api_base="https://www.samuraiapi.in/v1",
streaming=True
)
for chunk in llm.stream([HumanMessage(content="Tell me a story about a samurai.")]):
print(chunk.content, end="", flush=True)
All models that support chat completions support streaming. Just set stream: true.
Overview
Set stream: true to receive tokens as they are generated instead of waiting for the full response. Samurai AI uses the OpenAI-compatible SSE format.
Code Examples
from openai import OpenAI
client = OpenAI(
api_key="sk-samurai-YOUR_KEY",
base_url="https://api.samuraiapi.in/v1"
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a haiku about AI."}],
stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
print(delta.content, end="", flush=True)
Each chunk is a data: line containing a JSON object:
data: {"id":"chatcmpl-abc","choices":[{"delta":{"content":"Silicon"},"index":0}]}
data: {"id":"chatcmpl-abc","choices":[{"delta":{"content":" dreams"},"index":0}]}
data: [DONE]
The stream ends with data: [DONE].
Vercel AI SDK
import { createOpenAI } from '@ai-sdk/openai';
import { streamText } from 'ai';
const samurai = createOpenAI({
apiKey: process.env.SAMURAI_API_KEY,
baseURL: 'https://api.samuraiapi.in/v1',
});
const { textStream } = streamText({
model: samurai('gpt-4o'),
prompt: 'Write a haiku about AI.',
});
for await (const text of textStream) {
process.stdout.write(text);
}
All models that support chat completions also support streaming. Use the same model parameter.