nova-api/api/responder.py

172 lines
6.2 KiB
Python
Raw Normal View History

2023-08-12 17:49:31 +02:00
"""This module contains the streaming logic for the API."""
2023-08-04 03:30:56 +02:00
import os
2023-08-04 17:29:49 +02:00
import json
import random
2023-08-04 03:30:56 +02:00
import aiohttp
import starlette
2023-08-04 17:29:49 +02:00
from rich import print
2023-08-04 03:30:56 +02:00
from dotenv import load_dotenv
import proxies
import provider_auth
import after_request
2023-08-04 03:30:56 +02:00
import load_balancing
from helpers import network, chat, errors
from db import key_validation
2023-08-04 03:30:56 +02:00
load_dotenv()
2023-09-11 02:47:21 +02:00
async def respond(
2023-08-04 03:30:56 +02:00
path: str='/v1/chat/completions',
user: dict=None,
payload: dict=None,
credits_cost: int=0,
input_tokens: int=0,
incoming_request: starlette.requests.Request=None,
):
2023-08-13 17:12:35 +02:00
"""Stream the completions request. Sends data in chunks
2023-08-14 10:47:03 +02:00
If not streaming, it sends the result in its entirety.
2023-08-13 17:12:35 +02:00
"""
2023-08-14 10:47:03 +02:00
2023-08-04 17:29:49 +02:00
is_chat = False
2023-08-04 03:30:56 +02:00
model = None
2023-09-14 18:18:19 +02:00
is_stream = False
2023-08-04 17:29:49 +02:00
if 'chat/completions' in path:
is_chat = True
model = payload['model']
2023-08-04 03:30:56 +02:00
2023-08-27 04:29:16 +02:00
json_response = {}
2023-08-04 03:30:56 +02:00
2023-08-25 19:13:39 +02:00
headers = {
'Content-Type': 'application/json'
2023-08-25 19:13:39 +02:00
}
for _ in range(20):
2023-08-18 21:23:00 +02:00
# Load balancing: randomly selecting a suitable provider
2023-08-13 18:29:45 +02:00
# If the request is a chat completion, then we need to load balance between chat providers
# If the request is an organic request, then we need to load balance between organic providers
try:
if is_chat:
target_request = await load_balancing.balance_chat_request(payload)
else:
2023-08-13 18:29:45 +02:00
# In this case we are doing a organic request. "organic" means that it's not using a reverse engineered front-end, but rather ClosedAI's API directly
# churchless.tech is an example of an organic provider, because it redirects the request to ClosedAI.
target_request = await load_balancing.balance_organic_request({
'method': incoming_request.method,
'path': path,
'payload': payload,
'headers': headers,
'cookies': incoming_request.cookies
})
except ValueError as exc:
yield await errors.yield_error(500, f'Sorry, the API has no active API keys for {model}.', 'Please use a different model.')
2023-08-06 12:46:41 +02:00
return
2023-08-05 02:30:42 +02:00
target_request['headers'].update(target_request.get('headers', {}))
if target_request['method'] == 'GET' and not payload:
target_request['payload'] = None
2023-08-04 17:29:49 +02:00
2023-08-13 18:29:45 +02:00
# We haven't done any requests as of right now, everything until now was just preparation
# Here, we process the request
2023-08-12 17:49:31 +02:00
async with aiohttp.ClientSession(connector=proxies.get_proxy().connector) as session:
2023-08-05 02:30:42 +02:00
try:
async with session.request(
method=target_request.get('method', 'POST'),
url=target_request['url'],
data=target_request.get('data'),
json=target_request.get('payload'),
headers=target_request.get('headers', {}),
2023-08-05 02:30:42 +02:00
cookies=target_request.get('cookies'),
ssl=False,
2023-08-16 15:06:16 +02:00
timeout=aiohttp.ClientTimeout(
connect=0.3,
2023-09-10 16:22:46 +02:00
total=float(os.getenv('TRANSFER_TIMEOUT', '500'))
2023-08-16 15:06:16 +02:00
),
2023-08-05 02:30:42 +02:00
) as response:
2023-09-14 18:18:19 +02:00
is_stream = response.content_type == 'text/event-stream'
2023-08-23 23:26:43 +02:00
if response.status == 429:
continue
if response.content_type == 'application/json':
data = await response.json()
2023-08-06 21:42:07 +02:00
error = data.get('error')
match error:
case None:
pass
case _:
key = target_request.get('provider_auth')
match error.get('code'):
case 'invalid_api_key':
await key_validation.log_rated_key(key)
print('[!] invalid key', key)
pass
case _:
print('[!] unknown error with key: ', key, error)
2023-09-06 11:44:29 +02:00
if 'method_not_supported' in str(data):
await errors.error(500, 'Sorry, this endpoint does not support this method.', data['error']['message'])
if 'invalid_api_key' in str(data) or 'account_deactivated' in str(data):
print('[!] invalid api key', target_request.get('provider_auth'))
await provider_auth.invalidate_key(target_request.get('provider_auth'))
2023-08-05 02:30:42 +02:00
continue
2023-08-04 03:30:56 +02:00
if response.ok:
json_response = data
else:
continue
if is_stream:
try:
response.raise_for_status()
except Exception as exc:
if 'Too Many Requests' in str(exc):
print('[!] too many requests')
continue
2023-09-14 18:18:19 +02:00
async for chunk in response.content.iter_any():
chunk = chunk.decode('utf8').strip()
yield chunk + '\n\n'
2023-08-04 17:29:49 +02:00
2023-08-05 02:30:42 +02:00
break
2023-08-04 17:29:49 +02:00
except Exception as exc:
print('[!] exception', exc)
2023-08-27 04:29:16 +02:00
continue
if (not json_response) and is_chat:
2023-08-27 04:29:16 +02:00
print('[!] chat response is empty')
2023-08-24 14:57:36 +02:00
continue
2023-09-06 11:44:29 +02:00
else:
print('[!] no response')
yield await errors.yield_error(500, 'Sorry, our API seems to have issues connecting to our provider(s).', 'This most likely isn\'t your fault. Please try again later.')
2023-09-06 11:44:29 +02:00
return
2023-08-24 14:57:36 +02:00
2023-09-06 11:44:29 +02:00
if (not is_stream) and json_response:
2023-08-06 21:42:07 +02:00
yield json.dumps(json_response)
await after_request.after_request(
incoming_request=incoming_request,
target_request=target_request,
user=user,
credits_cost=credits_cost,
input_tokens=input_tokens,
path=path,
is_chat=is_chat,
model=model,
)