Display AI responses in real-time as they're generated for better UX.
Streaming lets you display AI-generated text token by token as it's produced, instead of waiting for the complete response.
Add "stream": true to your API request:
curl https://api.vigthoria.io/v1/chat/completions \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "vigthoria-creative-v2",
"messages": [{"role": "user", "content": "Write a poem about technology"}],
"stream": true
}'
The response comes as Server-Sent Events (SSE):
data: {"choices":[{"delta":{"content":"In"}}]}
data: {"choices":[{"delta":{"content":" circuits"}}]}
data: {"choices":[{"delta":{"content":" deep"}}]}
data: [DONE]
async function streamChat(prompt) {
const response = await fetch('https://api.vigthoria.io/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'vigthoria-creative-v2',
messages: [{ role: 'user', content: prompt }],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
let fullText = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
const json = JSON.parse(line.slice(6));
const content = json.choices[0]?.delta?.content || '';
fullText += content;
// Update UI in real-time
document.getElementById('output').textContent = fullText;
}
}
}
return fullText;
}
// Usage
streamChat('Explain quantum computing');
import requests
import json
def stream_chat(prompt):
response = requests.post(
'https://api.vigthoria.io/v1/chat/completions',
headers={
'Authorization': f'Bearer {API_KEY}',
'Content-Type': 'application/json',
},
json={
'model': 'vigthoria-creative-v2',
'messages': [{'role': 'user', 'content': prompt}],
'stream': True
},
stream=True
)
full_text = ''
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: ') and line != 'data: [DONE]':
data = json.loads(line[6:])
content = data['choices'][0].get('delta', {}).get('content', '')
full_text += content
print(content, end='', flush=True) # Real-time output
print() # Newline after stream ends
return full_text
# Usage
result = stream_chat('Write a haiku about AI')
import aiohttp
import asyncio
import json
async def stream_chat_async(prompt):
async with aiohttp.ClientSession() as session:
async with session.post(
'https://api.vigthoria.io/v1/chat/completions',
headers={
'Authorization': f'Bearer {API_KEY}',
'Content-Type': 'application/json',
},
json={
'model': 'vigthoria-creative-v2',
'messages': [{'role': 'user', 'content': prompt}],
'stream': True
}
) as response:
full_text = ''
async for line in response.content:
line = line.decode('utf-8').strip()
if line.startswith('data: ') and line != 'data: [DONE]':
data = json.loads(line[6:])
content = data['choices'][0].get('delta', {}).get('content', '')
full_text += content
yield content # Yield each chunk
# Usage
async def main():
async for chunk in stream_chat_async('Explain machine learning'):
print(chunk, end='', flush=True)
asyncio.run(main())
import { useState } from 'react';
function StreamingChat() {
const [response, setResponse] = useState('');
const [loading, setLoading] = useState(false);
async function handleSubmit(prompt) {
setLoading(true);
setResponse('');
const res = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt, stream: true })
});
const reader = res.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
const json = JSON.parse(line.slice(6));
const content = json.choices[0]?.delta?.content || '';
setResponse(prev => prev + content);
}
}
}
setLoading(false);
}
return (
<div>
<button onClick={() => handleSubmit('Hello!')}>
{loading ? 'Generating...' : 'Send'}
</button>
<div className="response">{response}</div>
</div>
);
}
AbortControllerconst controller = new AbortController();
// Start streaming
fetch(url, {
signal: controller.signal,
// ... other options
});
// Cancel button handler
cancelButton.onclick = () => controller.abort();