mirror of
https://github.com/NovaOSS/nova-api.git
synced 2024-11-25 20:13:57 +01:00
Compare commits
No commits in common. "8f920f478ff39ecfac9bc9ff61a7679d82da7af7" and "fffd7a276a455466e65e84b81f8415af244a1fde" have entirely different histories.
8f920f478f
...
fffd7a276a
|
@ -2,7 +2,7 @@ from db import logs, stats, users
|
||||||
from helpers import network
|
from helpers import network
|
||||||
|
|
||||||
async def after_request(
|
async def after_request(
|
||||||
incoming_request,
|
incoming_request: dict,
|
||||||
target_request: dict,
|
target_request: dict,
|
||||||
user: dict,
|
user: dict,
|
||||||
tokens: dict,
|
tokens: dict,
|
||||||
|
|
|
@ -62,7 +62,7 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
user = await users.user_by_api_key(received_key.split('Bearer ')[1].strip())
|
user = await users.user_by_api_key(received_key.split('Bearer ')[1].strip())
|
||||||
|
|
||||||
if not user or not user['status']['active']:
|
if not user or not user['status']['active']:
|
||||||
return await errors.error(401, 'Invalid or inactive NovaAI API key!', 'Try /resetkey or /credentials.')
|
return await errors.error(418, 'Invalid or inactive NovaAI API key!', 'Create a new NovaOSS API key or reactivate your account.')
|
||||||
|
|
||||||
ban_reason = user['status']['ban_reason']
|
ban_reason = user['status']['ban_reason']
|
||||||
if ban_reason:
|
if ban_reason:
|
||||||
|
@ -118,10 +118,7 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
|
|
||||||
for message in payload.get('messages', []):
|
for message in payload.get('messages', []):
|
||||||
if message.get('role') == 'user':
|
if message.get('role') == 'user':
|
||||||
try:
|
inp += message.get('content', '') + '\n'
|
||||||
inp += message.get('content', '') + '\n'
|
|
||||||
except TypeError:
|
|
||||||
inp += message['content'][0]['text'] + '\n'
|
|
||||||
|
|
||||||
if 'functions' in payload:
|
if 'functions' in payload:
|
||||||
inp += '\n'.join([function.get('description', '') for function in payload.get('functions', [])])
|
inp += '\n'.join([function.get('description', '') for function in payload.get('functions', [])])
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
"""FastAPI setup."""
|
"""FastAPI setup."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import ujson
|
|
||||||
import fastapi
|
import fastapi
|
||||||
import pydantic
|
import pydantic
|
||||||
import responder
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
@ -79,13 +77,6 @@ async def v1_handler(request: fastapi.Request):
|
||||||
res = await handler.handle(incoming_request=request)
|
res = await handler.handle(incoming_request=request)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@app.route('/update-v1-models', methods=['GET'])
|
|
||||||
async def update_v1_models(request: fastapi.Request):
|
|
||||||
res = []
|
|
||||||
async for response in responder.respond(path='/v1/models', overwrite_method='GET'):
|
|
||||||
res.append(response)
|
|
||||||
return res
|
|
||||||
|
|
||||||
@limiter.limit('100/minute', '1000/hour')
|
@limiter.limit('100/minute', '1000/hour')
|
||||||
@app.route('/enterprise/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
|
@app.route('/enterprise/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
|
||||||
async def enterprise_handler(request: fastapi.Request):
|
async def enterprise_handler(request: fastapi.Request):
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
from . import closed, closed4, azure
|
from . import ails, closed, closed4
|
||||||
MODULES = [closed, closed4, azure]
|
MODULES = [closed, closed4]
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
"""CLI Tool"""
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
@ -24,15 +22,10 @@ async def main():
|
||||||
|
|
||||||
for file_name in os.listdir(os.path.dirname(__file__)):
|
for file_name in os.listdir(os.path.dirname(__file__)):
|
||||||
if file_name.endswith('.py') and not file_name.startswith('_'):
|
if file_name.endswith('.py') and not file_name.startswith('_'):
|
||||||
model_name = file_name.split('.')[0]
|
name = file_name.split('.')[0]
|
||||||
models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS
|
models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS
|
||||||
|
|
||||||
text = ''
|
print(f' {name} @ {", ".join(models)}')
|
||||||
|
|
||||||
for model in models:
|
|
||||||
text += f' - {model}\n'
|
|
||||||
|
|
||||||
print(f' {model_name}:\n{text}')
|
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
@ -42,7 +35,7 @@ async def main():
|
||||||
print(exc)
|
print(exc)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if len(sys.argv) == 3:
|
if len(sys.argv) > 2:
|
||||||
model = sys.argv[2] # choose a specific model
|
model = sys.argv[2] # choose a specific model
|
||||||
else:
|
else:
|
||||||
model = provider.MODELS[-1] # choose best model
|
model = provider.MODELS[-1] # choose best model
|
||||||
|
|
|
@ -6,26 +6,17 @@ except ModuleNotFoundError:
|
||||||
# Sort the models by their value/cost/rarity.
|
# Sort the models by their value/cost/rarity.
|
||||||
|
|
||||||
GPT_3 = [
|
GPT_3 = [
|
||||||
'dall-e-2',
|
|
||||||
'code-davinci-002',
|
|
||||||
'text-davinci-002',
|
|
||||||
'text-davinci-003',
|
|
||||||
|
|
||||||
'gpt-3.5-turbo',
|
'gpt-3.5-turbo',
|
||||||
'gpt-3.5-turbo-16k',
|
'gpt-3.5-turbo-16k',
|
||||||
'gpt-3.5-turbo-0613',
|
'gpt-3.5-turbo-0613',
|
||||||
'gpt-3.5-turbo-0301',
|
'gpt-3.5-turbo-0301',
|
||||||
'gpt-3.5-turbo-16k-0613',
|
'gpt-3.5-turbo-16k-0613',
|
||||||
|
|
||||||
'gpt-3.5-turbo-instruct',
|
|
||||||
]
|
]
|
||||||
|
|
||||||
GPT_4 = GPT_3 + [
|
GPT_4 = GPT_3 + [
|
||||||
'gpt-4',
|
'gpt-4',
|
||||||
'gpt-4-0314',
|
'gpt-4-0314',
|
||||||
'gpt-4-0613',
|
'gpt-4-0613',
|
||||||
'gpt-4-1106-preview',
|
|
||||||
'gpt-4-vision-preview'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
GPT_4_32K = GPT_4 + [
|
GPT_4_32K = GPT_4 + [
|
||||||
|
|
125
api/responder.py
125
api/responder.py
|
@ -22,8 +22,6 @@ from helpers.tokens import count_tokens_for_messages
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
RETRIES = 10
|
|
||||||
|
|
||||||
CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
|
CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
|
||||||
keymanager = providerkeys.manager
|
keymanager = providerkeys.manager
|
||||||
background_tasks: Set[asyncio.Task[Any]] = set()
|
background_tasks: Set[asyncio.Task[Any]] = set()
|
||||||
|
@ -32,7 +30,7 @@ with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
|
||||||
config = yaml.safe_load(f)
|
config = yaml.safe_load(f)
|
||||||
|
|
||||||
def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
|
def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
|
||||||
"""Utilizes asyncio.create_task, which prevents the task from being garbage collected.
|
"""asyncio.create_task, which prevents the task from being garbage collected.
|
||||||
|
|
||||||
https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task
|
https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task
|
||||||
"""
|
"""
|
||||||
|
@ -44,8 +42,7 @@ async def respond(
|
||||||
path: str='/v1/chat/completions',
|
path: str='/v1/chat/completions',
|
||||||
user: dict=None,
|
user: dict=None,
|
||||||
payload: dict=None,
|
payload: dict=None,
|
||||||
incoming_request=None,
|
incoming_request: starlette.requests.Request=None,
|
||||||
overwrite_method=None
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Stream the completions request. Sends data in chunks
|
Stream the completions request. Sends data in chunks
|
||||||
|
@ -78,27 +75,17 @@ async def respond(
|
||||||
input_tokens = 0
|
input_tokens = 0
|
||||||
output_tokens = 0
|
output_tokens = 0
|
||||||
|
|
||||||
if incoming_request:
|
for _ in range(10):
|
||||||
cookies = incoming_request.cookies
|
|
||||||
else:
|
|
||||||
cookies = {}
|
|
||||||
|
|
||||||
if overwrite_method:
|
|
||||||
method = overwrite_method
|
|
||||||
else:
|
|
||||||
method = incoming_request.method
|
|
||||||
|
|
||||||
for _ in range(RETRIES):
|
|
||||||
try:
|
try:
|
||||||
if is_chat:
|
if is_chat:
|
||||||
target_request = await load_balancing.balance_chat_request(payload)
|
target_request = await load_balancing.balance_chat_request(payload)
|
||||||
else:
|
else:
|
||||||
target_request = await load_balancing.balance_organic_request({
|
target_request = await load_balancing.balance_organic_request({
|
||||||
'method': method,
|
'method': incoming_request.method,
|
||||||
'path': path,
|
'path': path,
|
||||||
'payload': payload,
|
'payload': payload,
|
||||||
'headers': headers,
|
'headers': headers,
|
||||||
'cookies': cookies
|
'cookies': incoming_request.cookies
|
||||||
})
|
})
|
||||||
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -194,7 +181,7 @@ async def respond(
|
||||||
chunk_no = 0
|
chunk_no = 0
|
||||||
buffer = ''
|
buffer = ''
|
||||||
|
|
||||||
async for chunk in response.content.iter_any():
|
async for chunk in response.content.iter_chunked(1024):
|
||||||
chunk_no += 1
|
chunk_no += 1
|
||||||
|
|
||||||
chunk = chunk.decode('utf8')
|
chunk = chunk.decode('utf8')
|
||||||
|
@ -205,21 +192,16 @@ async def respond(
|
||||||
if not chunk.strip() or chunk_no == 1:
|
if not chunk.strip() or chunk_no == 1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
buffer += chunk
|
subchunks = chunk.split('\n\n')
|
||||||
while '\n\n' in buffer:
|
buffer += subchunks[0]
|
||||||
subchunk, buffer = buffer.split('\n\n', 1)
|
|
||||||
|
|
||||||
if not subchunk.strip():
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
for subchunk in [buffer] + subchunks[1:-1]:
|
||||||
if not subchunk.startswith('data: '):
|
if not subchunk.startswith('data: '):
|
||||||
subchunk = 'data: ' + subchunk
|
subchunk = 'data: ' + subchunk
|
||||||
|
|
||||||
subchunk = subchunk.rsplit('[DONE]', 1)[0]
|
yield subchunk + '\n\n'
|
||||||
subchunk += '\n\n'
|
|
||||||
|
|
||||||
yield subchunk
|
|
||||||
|
|
||||||
|
buffer = subchunks[-1]
|
||||||
|
|
||||||
output_tokens = chunk_no
|
output_tokens = chunk_no
|
||||||
break
|
break
|
||||||
|
@ -227,12 +209,10 @@ async def respond(
|
||||||
except aiohttp.client_exceptions.ServerTimeoutError:
|
except aiohttp.client_exceptions.ServerTimeoutError:
|
||||||
skipped_errors['timeout'] += 1
|
skipped_errors['timeout'] += 1
|
||||||
continue
|
continue
|
||||||
else:
|
|
||||||
skipped_errors = {k: v for k, v in skipped_errors.items() if ((isinstance(v, int) and v > 0) or (isinstance(v, list) and len(v) > 0))}
|
|
||||||
skipped_errors['model'] = model
|
|
||||||
skipped_errors['provider'] = provider_name
|
|
||||||
print(f'[!] Skipped {RETRIES} errors:\n{skipped_errors}')
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
skipped_errors = {k: v for k, v in skipped_errors.items() if ((isinstance(v, int) and v > 0) or
|
||||||
|
(isinstance(v, list) and len(v) > 0))}
|
||||||
skipped_errors = ujson.dumps(skipped_errors, indent=4)
|
skipped_errors = ujson.dumps(skipped_errors, indent=4)
|
||||||
yield await errors.yield_error(500,
|
yield await errors.yield_error(500,
|
||||||
f'Sorry, our API seems to have issues connecting to "{model}".',
|
f'Sorry, our API seems to have issues connecting to "{model}".',
|
||||||
|
@ -241,51 +221,52 @@ async def respond(
|
||||||
return
|
return
|
||||||
|
|
||||||
if (not is_stream) and server_json_response:
|
if (not is_stream) and server_json_response:
|
||||||
server_json_response['system_fingerprint'] = f'fp_' + os.urandom(5).hex()
|
|
||||||
yield json.dumps(server_json_response)
|
yield json.dumps(server_json_response)
|
||||||
|
|
||||||
if incoming_request: # not called by other code, but actually a request
|
role = user.get('role', 'default')
|
||||||
role = user.get('role', 'default')
|
|
||||||
model_multipliers = config['costs']
|
|
||||||
model_multiplier = model_multipliers['other']
|
|
||||||
|
|
||||||
if is_chat:
|
model_multipliers = config['costs']
|
||||||
model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier)
|
model_multiplier = model_multipliers['other']
|
||||||
total_tokens = input_tokens + output_tokens
|
|
||||||
credits_cost = total_tokens / 60
|
|
||||||
credits_cost = round(credits_cost * model_multiplier)
|
|
||||||
|
|
||||||
if credits_cost < 1:
|
if is_chat:
|
||||||
credits_cost = 1
|
model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier)
|
||||||
|
total_tokens = input_tokens + output_tokens
|
||||||
|
credits_cost = total_tokens / 60
|
||||||
|
credits_cost = round(credits_cost * model_multiplier)
|
||||||
|
|
||||||
tokens = {'input': input_tokens, 'output': output_tokens, 'total': total_tokens}
|
if credits_cost < 1:
|
||||||
|
credits_cost = 1
|
||||||
|
|
||||||
elif model == 'dall-e-2':
|
tokens = {
|
||||||
credits_cost = 50
|
'input': input_tokens,
|
||||||
tokens = {'input': 0,'output': 0,'total': credits_cost}
|
'output': output_tokens,
|
||||||
|
'total': total_tokens
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
credits_cost = 5
|
||||||
|
tokens = {
|
||||||
|
'input': 0,
|
||||||
|
'output': 0,
|
||||||
|
'total': credits_cost
|
||||||
|
}
|
||||||
|
|
||||||
elif model == 'dall-e-3':
|
try:
|
||||||
credits_cost = 100
|
role_cost_multiplier = config['roles'][role]['bonus']
|
||||||
tokens = {'input': 0, 'output': 0, 'total': credits_cost}
|
except KeyError:
|
||||||
|
role_cost_multiplier = 1
|
||||||
|
|
||||||
try:
|
credits_cost = round(credits_cost * role_cost_multiplier)
|
||||||
role_cost_multiplier = config['roles'][role]['bonus']
|
|
||||||
except KeyError:
|
|
||||||
role_cost_multiplier = 1
|
|
||||||
|
|
||||||
|
create_background_task(
|
||||||
credits_cost = round(credits_cost * role_cost_multiplier)
|
after_request.after_request(
|
||||||
|
provider=provider_name,
|
||||||
create_background_task(
|
incoming_request=incoming_request,
|
||||||
after_request.after_request(
|
target_request=target_request,
|
||||||
provider=provider_name,
|
user=user,
|
||||||
incoming_request=incoming_request,
|
credits_cost=credits_cost,
|
||||||
target_request=target_request,
|
tokens=tokens,
|
||||||
user=user,
|
path=path,
|
||||||
credits_cost=credits_cost,
|
is_chat=is_chat,
|
||||||
tokens=tokens,
|
model=model,
|
||||||
path=path,
|
|
||||||
is_chat=is_chat,
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -56,7 +56,7 @@ async def test_chat_non_stream_gpt4() -> float:
|
||||||
"""Tests non-streamed chat completions with the GPT-4 model."""
|
"""Tests non-streamed chat completions with the GPT-4 model."""
|
||||||
|
|
||||||
json_data = {
|
json_data = {
|
||||||
'model': 'gpt-4-1106-preview',
|
'model': 'gpt-4',
|
||||||
'messages': MESSAGES,
|
'messages': MESSAGES,
|
||||||
'stream': False
|
'stream': False
|
||||||
}
|
}
|
||||||
|
@ -74,8 +74,7 @@ async def test_chat_non_stream_gpt4() -> float:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assert '1337' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.'
|
assert '1337' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.'
|
||||||
except KeyError:
|
except json.decoder.JSONDecodeError:
|
||||||
print(response.json())
|
|
||||||
return response.status_code
|
return response.status_code
|
||||||
|
|
||||||
return time.perf_counter() - request_start
|
return time.perf_counter() - request_start
|
||||||
|
@ -111,11 +110,7 @@ async def test_chat_stream_gpt3() -> float:
|
||||||
break
|
break
|
||||||
|
|
||||||
if chunk:
|
if chunk:
|
||||||
try:
|
chunks.append(json.loads(chunk))
|
||||||
chunks.append(json.loads(chunk))
|
|
||||||
except json.decoder.JSONDecodeError:
|
|
||||||
print(f'[!] Invalid chunk: {chunk}')
|
|
||||||
return f'Received chunk with invalid JSON. Status code {response.status_code}.'
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resulting_text += json.loads(chunk)['choices'][0]['delta']['content']
|
resulting_text += json.loads(chunk)['choices'][0]['delta']['content']
|
||||||
|
|
Loading…
Reference in a new issue