Compare commits

...

3 commits

Author SHA1 Message Date
NovaOSS Admins 8f920f478f Handle TypeError in message content retrieval 2023-11-07 00:12:07 +00:00
NovaOSS Admins b79ebc05b8 Add system fingerprint to server JSON response 2023-11-07 00:05:00 +00:00
NovaOSS Admins b14383be3d Various improvements. 2023-11-06 23:56:43 +00:00
8 changed files with 117 additions and 65 deletions

View file

@ -2,7 +2,7 @@ from db import logs, stats, users
from helpers import network from helpers import network
async def after_request( async def after_request(
incoming_request: dict, incoming_request,
target_request: dict, target_request: dict,
user: dict, user: dict,
tokens: dict, tokens: dict,

View file

@ -62,7 +62,7 @@ async def handle(incoming_request: fastapi.Request):
user = await users.user_by_api_key(received_key.split('Bearer ')[1].strip()) user = await users.user_by_api_key(received_key.split('Bearer ')[1].strip())
if not user or not user['status']['active']: if not user or not user['status']['active']:
return await errors.error(418, 'Invalid or inactive NovaAI API key!', 'Create a new NovaOSS API key or reactivate your account.') return await errors.error(401, 'Invalid or inactive NovaAI API key!', 'Try /resetkey or /credentials.')
ban_reason = user['status']['ban_reason'] ban_reason = user['status']['ban_reason']
if ban_reason: if ban_reason:
@ -118,7 +118,10 @@ async def handle(incoming_request: fastapi.Request):
for message in payload.get('messages', []): for message in payload.get('messages', []):
if message.get('role') == 'user': if message.get('role') == 'user':
inp += message.get('content', '') + '\n' try:
inp += message.get('content', '') + '\n'
except TypeError:
inp += message['content'][0]['text'] + '\n'
if 'functions' in payload: if 'functions' in payload:
inp += '\n'.join([function.get('description', '') for function in payload.get('functions', [])]) inp += '\n'.join([function.get('description', '') for function in payload.get('functions', [])])

View file

@ -1,8 +1,10 @@
"""FastAPI setup.""" """FastAPI setup."""
import os import os
import ujson
import fastapi import fastapi
import pydantic import pydantic
import responder
from dotenv import load_dotenv from dotenv import load_dotenv
@ -77,6 +79,13 @@ async def v1_handler(request: fastapi.Request):
res = await handler.handle(incoming_request=request) res = await handler.handle(incoming_request=request)
return res return res
@app.route('/update-v1-models', methods=['GET'])
async def update_v1_models(request: fastapi.Request):
res = []
async for response in responder.respond(path='/v1/models', overwrite_method='GET'):
res.append(response)
return res
@limiter.limit('100/minute', '1000/hour') @limiter.limit('100/minute', '1000/hour')
@app.route('/enterprise/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH']) @app.route('/enterprise/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
async def enterprise_handler(request: fastapi.Request): async def enterprise_handler(request: fastapi.Request):

View file

@ -1,2 +1,2 @@
from . import ails, closed, closed4 from . import closed, closed4, azure
MODULES = [closed, closed4] MODULES = [closed, closed4, azure]

View file

@ -1,3 +1,5 @@
"""CLI Tool"""
import os import os
import sys import sys
import aiohttp import aiohttp
@ -22,10 +24,15 @@ async def main():
for file_name in os.listdir(os.path.dirname(__file__)): for file_name in os.listdir(os.path.dirname(__file__)):
if file_name.endswith('.py') and not file_name.startswith('_'): if file_name.endswith('.py') and not file_name.startswith('_'):
name = file_name.split('.')[0] model_name = file_name.split('.')[0]
models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS
print(f' {name} @ {", ".join(models)}') text = ''
for model in models:
text += f' - {model}\n'
print(f' {model_name}:\n{text}')
sys.exit(0) sys.exit(0)
@ -35,7 +42,7 @@ async def main():
print(exc) print(exc)
sys.exit(1) sys.exit(1)
if len(sys.argv) > 2: if len(sys.argv) == 3:
model = sys.argv[2] # choose a specific model model = sys.argv[2] # choose a specific model
else: else:
model = provider.MODELS[-1] # choose best model model = provider.MODELS[-1] # choose best model

View file

@ -6,17 +6,26 @@ except ModuleNotFoundError:
# Sort the models by their value/cost/rarity. # Sort the models by their value/cost/rarity.
GPT_3 = [ GPT_3 = [
'dall-e-2',
'code-davinci-002',
'text-davinci-002',
'text-davinci-003',
'gpt-3.5-turbo', 'gpt-3.5-turbo',
'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-16k',
'gpt-3.5-turbo-0613', 'gpt-3.5-turbo-0613',
'gpt-3.5-turbo-0301', 'gpt-3.5-turbo-0301',
'gpt-3.5-turbo-16k-0613', 'gpt-3.5-turbo-16k-0613',
'gpt-3.5-turbo-instruct',
] ]
GPT_4 = GPT_3 + [ GPT_4 = GPT_3 + [
'gpt-4', 'gpt-4',
'gpt-4-0314', 'gpt-4-0314',
'gpt-4-0613', 'gpt-4-0613',
'gpt-4-1106-preview',
'gpt-4-vision-preview'
] ]
GPT_4_32K = GPT_4 + [ GPT_4_32K = GPT_4 + [

View file

@ -22,6 +22,8 @@ from helpers.tokens import count_tokens_for_messages
load_dotenv() load_dotenv()
RETRIES = 10
CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated'] CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
keymanager = providerkeys.manager keymanager = providerkeys.manager
background_tasks: Set[asyncio.Task[Any]] = set() background_tasks: Set[asyncio.Task[Any]] = set()
@ -30,7 +32,7 @@ with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
config = yaml.safe_load(f) config = yaml.safe_load(f)
def create_background_task(coro: Coroutine[Any, Any, Any]) -> None: def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
"""asyncio.create_task, which prevents the task from being garbage collected. """Utilizes asyncio.create_task, which prevents the task from being garbage collected.
https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task
""" """
@ -42,7 +44,8 @@ async def respond(
path: str='/v1/chat/completions', path: str='/v1/chat/completions',
user: dict=None, user: dict=None,
payload: dict=None, payload: dict=None,
incoming_request: starlette.requests.Request=None, incoming_request=None,
overwrite_method=None
): ):
""" """
Stream the completions request. Sends data in chunks Stream the completions request. Sends data in chunks
@ -75,17 +78,27 @@ async def respond(
input_tokens = 0 input_tokens = 0
output_tokens = 0 output_tokens = 0
for _ in range(10): if incoming_request:
cookies = incoming_request.cookies
else:
cookies = {}
if overwrite_method:
method = overwrite_method
else:
method = incoming_request.method
for _ in range(RETRIES):
try: try:
if is_chat: if is_chat:
target_request = await load_balancing.balance_chat_request(payload) target_request = await load_balancing.balance_chat_request(payload)
else: else:
target_request = await load_balancing.balance_organic_request({ target_request = await load_balancing.balance_organic_request({
'method': incoming_request.method, 'method': method,
'path': path, 'path': path,
'payload': payload, 'payload': payload,
'headers': headers, 'headers': headers,
'cookies': incoming_request.cookies 'cookies': cookies
}) })
except ValueError: except ValueError:
@ -181,7 +194,7 @@ async def respond(
chunk_no = 0 chunk_no = 0
buffer = '' buffer = ''
async for chunk in response.content.iter_chunked(1024): async for chunk in response.content.iter_any():
chunk_no += 1 chunk_no += 1
chunk = chunk.decode('utf8') chunk = chunk.decode('utf8')
@ -192,16 +205,21 @@ async def respond(
if not chunk.strip() or chunk_no == 1: if not chunk.strip() or chunk_no == 1:
continue continue
subchunks = chunk.split('\n\n') buffer += chunk
buffer += subchunks[0] while '\n\n' in buffer:
subchunk, buffer = buffer.split('\n\n', 1)
if not subchunk.strip():
continue
for subchunk in [buffer] + subchunks[1:-1]:
if not subchunk.startswith('data: '): if not subchunk.startswith('data: '):
subchunk = 'data: ' + subchunk subchunk = 'data: ' + subchunk
yield subchunk + '\n\n' subchunk = subchunk.rsplit('[DONE]', 1)[0]
subchunk += '\n\n'
yield subchunk
buffer = subchunks[-1]
output_tokens = chunk_no output_tokens = chunk_no
break break
@ -209,10 +227,12 @@ async def respond(
except aiohttp.client_exceptions.ServerTimeoutError: except aiohttp.client_exceptions.ServerTimeoutError:
skipped_errors['timeout'] += 1 skipped_errors['timeout'] += 1
continue continue
else: else:
skipped_errors = {k: v for k, v in skipped_errors.items() if ((isinstance(v, int) and v > 0) or skipped_errors = {k: v for k, v in skipped_errors.items() if ((isinstance(v, int) and v > 0) or (isinstance(v, list) and len(v) > 0))}
(isinstance(v, list) and len(v) > 0))} skipped_errors['model'] = model
skipped_errors['provider'] = provider_name
print(f'[!] Skipped {RETRIES} errors:\n{skipped_errors}')
skipped_errors = ujson.dumps(skipped_errors, indent=4) skipped_errors = ujson.dumps(skipped_errors, indent=4)
yield await errors.yield_error(500, yield await errors.yield_error(500,
f'Sorry, our API seems to have issues connecting to "{model}".', f'Sorry, our API seems to have issues connecting to "{model}".',
@ -221,52 +241,51 @@ async def respond(
return return
if (not is_stream) and server_json_response: if (not is_stream) and server_json_response:
server_json_response['system_fingerprint'] = f'fp_' + os.urandom(5).hex()
yield json.dumps(server_json_response) yield json.dumps(server_json_response)
role = user.get('role', 'default') if incoming_request: # not called by other code, but actually a request
role = user.get('role', 'default')
model_multipliers = config['costs']
model_multiplier = model_multipliers['other']
model_multipliers = config['costs'] if is_chat:
model_multiplier = model_multipliers['other'] model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier)
total_tokens = input_tokens + output_tokens
credits_cost = total_tokens / 60
credits_cost = round(credits_cost * model_multiplier)
if is_chat: if credits_cost < 1:
model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier) credits_cost = 1
total_tokens = input_tokens + output_tokens
credits_cost = total_tokens / 60
credits_cost = round(credits_cost * model_multiplier)
if credits_cost < 1: tokens = {'input': input_tokens, 'output': output_tokens, 'total': total_tokens}
credits_cost = 1
tokens = { elif model == 'dall-e-2':
'input': input_tokens, credits_cost = 50
'output': output_tokens, tokens = {'input': 0,'output': 0,'total': credits_cost}
'total': total_tokens
}
else:
credits_cost = 5
tokens = {
'input': 0,
'output': 0,
'total': credits_cost
}
try: elif model == 'dall-e-3':
role_cost_multiplier = config['roles'][role]['bonus'] credits_cost = 100
except KeyError: tokens = {'input': 0, 'output': 0, 'total': credits_cost}
role_cost_multiplier = 1
credits_cost = round(credits_cost * role_cost_multiplier) try:
role_cost_multiplier = config['roles'][role]['bonus']
except KeyError:
role_cost_multiplier = 1
create_background_task(
after_request.after_request( credits_cost = round(credits_cost * role_cost_multiplier)
provider=provider_name,
incoming_request=incoming_request, create_background_task(
target_request=target_request, after_request.after_request(
user=user, provider=provider_name,
credits_cost=credits_cost, incoming_request=incoming_request,
tokens=tokens, target_request=target_request,
path=path, user=user,
is_chat=is_chat, credits_cost=credits_cost,
model=model, tokens=tokens,
path=path,
is_chat=is_chat,
model=model,
)
) )
)

View file

@ -56,7 +56,7 @@ async def test_chat_non_stream_gpt4() -> float:
"""Tests non-streamed chat completions with the GPT-4 model.""" """Tests non-streamed chat completions with the GPT-4 model."""
json_data = { json_data = {
'model': 'gpt-4', 'model': 'gpt-4-1106-preview',
'messages': MESSAGES, 'messages': MESSAGES,
'stream': False 'stream': False
} }
@ -74,7 +74,8 @@ async def test_chat_non_stream_gpt4() -> float:
try: try:
assert '1337' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.' assert '1337' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.'
except json.decoder.JSONDecodeError: except KeyError:
print(response.json())
return response.status_code return response.status_code
return time.perf_counter() - request_start return time.perf_counter() - request_start
@ -110,7 +111,11 @@ async def test_chat_stream_gpt3() -> float:
break break
if chunk: if chunk:
chunks.append(json.loads(chunk)) try:
chunks.append(json.loads(chunk))
except json.decoder.JSONDecodeError:
print(f'[!] Invalid chunk: {chunk}')
return f'Received chunk with invalid JSON. Status code {response.status_code}.'
try: try:
resulting_text += json.loads(chunk)['choices'][0]['delta']['content'] resulting_text += json.loads(chunk)['choices'][0]['delta']['content']