nova-api/api/streaming.py

108 lines
2.9 KiB
Python
Raw Normal View History

2023-08-04 03:30:56 +02:00
import os
import yaml
import asyncio
import aiohttp
import starlette
from dotenv import load_dotenv
import proxies
import load_balancing
from db import logs, users, stats
from rich import print
from helpers import network
load_dotenv()
DEMO_PAYLOAD = {
'model': 'gpt-3.5-turbo',
'messages': [
{
'role': 'user',
'content': '1+1='
}
]
}
with open('config/credits.yml', encoding='utf8') as f:
max_credits = yaml.safe_load(f)['max-credits']
async def stream(
path: str='/v1/chat/completions',
user: dict=None,
payload: dict=None,
credits_cost: int=0,
demo_mode: bool=False,
input_tokens: int=0,
incoming_request: starlette.requests.Request=None,
):
payload = payload or DEMO_PAYLOAD
if 'chat/completions' in path: # is a chat endpoint
target_request = await load_balancing.balance_chat_request(payload)
else:
target_request = await load_balancing.balance_organic_request(payload)
headers = {
'Content-Type': 'application/json'
}
for k, v in target_request.get('headers', {}).items():
headers[k] = v
for _ in range(5):
async with aiohttp.ClientSession(connector=proxies.default_proxy.connector) as session:
async with session.request(
method=target_request.get('method', 'POST'),
url=target_request['url'],
data=target_request.get('data'),
json=target_request.get('payload'),
headers=headers,
cookies=target_request.get('cookies'),
ssl=False,
timeout=aiohttp.ClientTimeout(total=float(os.getenv('TRANSFER_TIMEOUT', '120'))),
) as response:
try:
await response.raise_for_status()
except Exception as exc:
if 'Too Many Requests' in str(exc):
continue
else:
break
if user and incoming_request:
await logs.log_api_request(
user=user,
incoming_request=incoming_request,
target_url=target_request['url']
)
if credits_cost and user:
await users.update_by_id(user['_id'], {
'$inc': {'credits': -credits_cost}
})
if not demo_mode:
await stats.add_date()
await stats.add_ip_address(network.get_ip(incoming_request))
await stats.add_model(payload.get('model', '_non-chat'))
await stats.add_path(path)
await stats.add_target(target_request['url'])
await stats.add_tokens(input_tokens)
async for chunk in response.content.iter_chunks():
# chunk = f'{chunk.decode("utf8")}\n\n'
if demo_mode:
print(chunk)
yield chunk
if __name__ == '__main__':
asyncio.run(stream())