mirror of
https://github.com/NovaOSS/nova-api.git
synced 2024-11-25 18:43:57 +01:00
108 lines
2.9 KiB
Python
108 lines
2.9 KiB
Python
|
import os
|
||
|
import yaml
|
||
|
import asyncio
|
||
|
import aiohttp
|
||
|
import starlette
|
||
|
|
||
|
from dotenv import load_dotenv
|
||
|
|
||
|
import proxies
|
||
|
import load_balancing
|
||
|
|
||
|
from db import logs, users, stats
|
||
|
from rich import print
|
||
|
from helpers import network
|
||
|
|
||
|
load_dotenv()
|
||
|
|
||
|
DEMO_PAYLOAD = {
|
||
|
'model': 'gpt-3.5-turbo',
|
||
|
'messages': [
|
||
|
{
|
||
|
'role': 'user',
|
||
|
'content': '1+1='
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
|
||
|
with open('config/credits.yml', encoding='utf8') as f:
|
||
|
max_credits = yaml.safe_load(f)['max-credits']
|
||
|
|
||
|
async def stream(
|
||
|
path: str='/v1/chat/completions',
|
||
|
user: dict=None,
|
||
|
payload: dict=None,
|
||
|
credits_cost: int=0,
|
||
|
demo_mode: bool=False,
|
||
|
input_tokens: int=0,
|
||
|
incoming_request: starlette.requests.Request=None,
|
||
|
):
|
||
|
payload = payload or DEMO_PAYLOAD
|
||
|
|
||
|
if 'chat/completions' in path: # is a chat endpoint
|
||
|
target_request = await load_balancing.balance_chat_request(payload)
|
||
|
else:
|
||
|
target_request = await load_balancing.balance_organic_request(payload)
|
||
|
|
||
|
headers = {
|
||
|
'Content-Type': 'application/json'
|
||
|
}
|
||
|
|
||
|
for k, v in target_request.get('headers', {}).items():
|
||
|
headers[k] = v
|
||
|
|
||
|
for _ in range(5):
|
||
|
async with aiohttp.ClientSession(connector=proxies.default_proxy.connector) as session:
|
||
|
async with session.request(
|
||
|
method=target_request.get('method', 'POST'),
|
||
|
url=target_request['url'],
|
||
|
|
||
|
data=target_request.get('data'),
|
||
|
json=target_request.get('payload'),
|
||
|
|
||
|
headers=headers,
|
||
|
cookies=target_request.get('cookies'),
|
||
|
|
||
|
ssl=False,
|
||
|
|
||
|
timeout=aiohttp.ClientTimeout(total=float(os.getenv('TRANSFER_TIMEOUT', '120'))),
|
||
|
) as response:
|
||
|
try:
|
||
|
await response.raise_for_status()
|
||
|
except Exception as exc:
|
||
|
if 'Too Many Requests' in str(exc):
|
||
|
continue
|
||
|
else:
|
||
|
break
|
||
|
|
||
|
if user and incoming_request:
|
||
|
await logs.log_api_request(
|
||
|
user=user,
|
||
|
incoming_request=incoming_request,
|
||
|
target_url=target_request['url']
|
||
|
)
|
||
|
|
||
|
if credits_cost and user:
|
||
|
await users.update_by_id(user['_id'], {
|
||
|
'$inc': {'credits': -credits_cost}
|
||
|
})
|
||
|
|
||
|
if not demo_mode:
|
||
|
await stats.add_date()
|
||
|
await stats.add_ip_address(network.get_ip(incoming_request))
|
||
|
await stats.add_model(payload.get('model', '_non-chat'))
|
||
|
await stats.add_path(path)
|
||
|
await stats.add_target(target_request['url'])
|
||
|
await stats.add_tokens(input_tokens)
|
||
|
|
||
|
async for chunk in response.content.iter_chunks():
|
||
|
# chunk = f'{chunk.decode("utf8")}\n\n'
|
||
|
|
||
|
if demo_mode:
|
||
|
print(chunk)
|
||
|
|
||
|
yield chunk
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
asyncio.run(stream())
|