diff --git a/api/after_request.py b/api/after_request.py index 1db4cc6..7059ab4 100644 --- a/api/after_request.py +++ b/api/after_request.py @@ -2,7 +2,7 @@ from db import logs, stats, users from helpers import network async def after_request( - incoming_request: dict, + incoming_request, target_request: dict, user: dict, tokens: dict, diff --git a/api/main.py b/api/main.py index 97f3597..642aca4 100644 --- a/api/main.py +++ b/api/main.py @@ -1,8 +1,10 @@ """FastAPI setup.""" import os +import ujson import fastapi import pydantic +import responder from dotenv import load_dotenv @@ -77,6 +79,13 @@ async def v1_handler(request: fastapi.Request): res = await handler.handle(incoming_request=request) return res +@app.route('/update-v1-models', methods=['GET']) +async def update_v1_models(request: fastapi.Request): + res = [] + async for response in responder.respond(path='/v1/models', overwrite_method='GET'): + res.append(response) + return res + @limiter.limit('100/minute', '1000/hour') @app.route('/enterprise/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH']) async def enterprise_handler(request: fastapi.Request): diff --git a/api/providers/__init__.py b/api/providers/__init__.py index 12e4bca..f3b509f 100644 --- a/api/providers/__init__.py +++ b/api/providers/__init__.py @@ -1,2 +1,2 @@ -from . import ails, closed, closed4 -MODULES = [closed, closed4] +from . import closed, closed4, azure +MODULES = [closed, closed4, azure] diff --git a/api/providers/__main__.py b/api/providers/__main__.py index 33e09bc..d1406d4 100644 --- a/api/providers/__main__.py +++ b/api/providers/__main__.py @@ -1,3 +1,5 @@ +"""CLI Tool""" + import os import sys import aiohttp @@ -22,10 +24,15 @@ async def main(): for file_name in os.listdir(os.path.dirname(__file__)): if file_name.endswith('.py') and not file_name.startswith('_'): - name = file_name.split('.')[0] + model_name = file_name.split('.')[0] models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS - - print(f' {name} @ {", ".join(models)}') + + text = '' + + for model in models: + text += f' - {model}\n' + + print(f' {model_name}:\n{text}') sys.exit(0) @@ -35,7 +42,7 @@ async def main(): print(exc) sys.exit(1) - if len(sys.argv) > 2: + if len(sys.argv) == 3: model = sys.argv[2] # choose a specific model else: model = provider.MODELS[-1] # choose best model diff --git a/api/providers/helpers/utils.py b/api/providers/helpers/utils.py index af7c8bb..376f355 100644 --- a/api/providers/helpers/utils.py +++ b/api/providers/helpers/utils.py @@ -6,17 +6,26 @@ except ModuleNotFoundError: # Sort the models by their value/cost/rarity. GPT_3 = [ + 'dall-e-2', + 'code-davinci-002', + 'text-davinci-002', + 'text-davinci-003', + 'gpt-3.5-turbo', 'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-0613', 'gpt-3.5-turbo-0301', 'gpt-3.5-turbo-16k-0613', + + 'gpt-3.5-turbo-instruct', ] GPT_4 = GPT_3 + [ 'gpt-4', 'gpt-4-0314', 'gpt-4-0613', + 'gpt-4-1106-preview', + 'gpt-4-vision-preview' ] GPT_4_32K = GPT_4 + [ diff --git a/api/responder.py b/api/responder.py index 2f5058f..318af99 100644 --- a/api/responder.py +++ b/api/responder.py @@ -22,6 +22,8 @@ from helpers.tokens import count_tokens_for_messages load_dotenv() +RETRIES = 10 + CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated'] keymanager = providerkeys.manager background_tasks: Set[asyncio.Task[Any]] = set() @@ -30,7 +32,7 @@ with open(os.path.join('config', 'config.yml'), encoding='utf8') as f: config = yaml.safe_load(f) def create_background_task(coro: Coroutine[Any, Any, Any]) -> None: - """asyncio.create_task, which prevents the task from being garbage collected. + """Utilizes asyncio.create_task, which prevents the task from being garbage collected. https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task """ @@ -42,7 +44,8 @@ async def respond( path: str='/v1/chat/completions', user: dict=None, payload: dict=None, - incoming_request: starlette.requests.Request=None, + incoming_request=None, + overwrite_method=None ): """ Stream the completions request. Sends data in chunks @@ -75,17 +78,27 @@ async def respond( input_tokens = 0 output_tokens = 0 - for _ in range(10): + if incoming_request: + cookies = incoming_request.cookies + else: + cookies = {} + + if overwrite_method: + method = overwrite_method + else: + method = incoming_request.method + + for _ in range(RETRIES): try: if is_chat: target_request = await load_balancing.balance_chat_request(payload) else: target_request = await load_balancing.balance_organic_request({ - 'method': incoming_request.method, + 'method': method, 'path': path, 'payload': payload, 'headers': headers, - 'cookies': incoming_request.cookies + 'cookies': cookies }) except ValueError: @@ -181,7 +194,7 @@ async def respond( chunk_no = 0 buffer = '' - async for chunk in response.content.iter_chunked(1024): + async for chunk in response.content.iter_any(): chunk_no += 1 chunk = chunk.decode('utf8') @@ -192,16 +205,21 @@ async def respond( if not chunk.strip() or chunk_no == 1: continue - subchunks = chunk.split('\n\n') - buffer += subchunks[0] + buffer += chunk + while '\n\n' in buffer: + subchunk, buffer = buffer.split('\n\n', 1) + + if not subchunk.strip(): + continue - for subchunk in [buffer] + subchunks[1:-1]: if not subchunk.startswith('data: '): subchunk = 'data: ' + subchunk - yield subchunk + '\n\n' + subchunk = subchunk.rsplit('[DONE]', 1)[0] + subchunk += '\n\n' + + yield subchunk - buffer = subchunks[-1] output_tokens = chunk_no break @@ -209,10 +227,12 @@ async def respond( except aiohttp.client_exceptions.ServerTimeoutError: skipped_errors['timeout'] += 1 continue - else: - skipped_errors = {k: v for k, v in skipped_errors.items() if ((isinstance(v, int) and v > 0) or - (isinstance(v, list) and len(v) > 0))} + skipped_errors = {k: v for k, v in skipped_errors.items() if ((isinstance(v, int) and v > 0) or (isinstance(v, list) and len(v) > 0))} + skipped_errors['model'] = model + skipped_errors['provider'] = provider_name + print(f'[!] Skipped {RETRIES} errors:\n{skipped_errors}') + skipped_errors = ujson.dumps(skipped_errors, indent=4) yield await errors.yield_error(500, f'Sorry, our API seems to have issues connecting to "{model}".', @@ -223,50 +243,48 @@ async def respond( if (not is_stream) and server_json_response: yield json.dumps(server_json_response) - role = user.get('role', 'default') + if incoming_request: # not called by other code, but actually a request + role = user.get('role', 'default') + model_multipliers = config['costs'] + model_multiplier = model_multipliers['other'] - model_multipliers = config['costs'] - model_multiplier = model_multipliers['other'] + if is_chat: + model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier) + total_tokens = input_tokens + output_tokens + credits_cost = total_tokens / 60 + credits_cost = round(credits_cost * model_multiplier) - if is_chat: - model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier) - total_tokens = input_tokens + output_tokens - credits_cost = total_tokens / 60 - credits_cost = round(credits_cost * model_multiplier) + if credits_cost < 1: + credits_cost = 1 - if credits_cost < 1: - credits_cost = 1 + tokens = {'input': input_tokens, 'output': output_tokens, 'total': total_tokens} - tokens = { - 'input': input_tokens, - 'output': output_tokens, - 'total': total_tokens - } - else: - credits_cost = 5 - tokens = { - 'input': 0, - 'output': 0, - 'total': credits_cost - } + elif model == 'dall-e-2': + credits_cost = 50 + tokens = {'input': 0,'output': 0,'total': credits_cost} - try: - role_cost_multiplier = config['roles'][role]['bonus'] - except KeyError: - role_cost_multiplier = 1 + elif model == 'dall-e-3': + credits_cost = 100 + tokens = {'input': 0, 'output': 0, 'total': credits_cost} - credits_cost = round(credits_cost * role_cost_multiplier) + try: + role_cost_multiplier = config['roles'][role]['bonus'] + except KeyError: + role_cost_multiplier = 1 - create_background_task( - after_request.after_request( - provider=provider_name, - incoming_request=incoming_request, - target_request=target_request, - user=user, - credits_cost=credits_cost, - tokens=tokens, - path=path, - is_chat=is_chat, - model=model, + + credits_cost = round(credits_cost * role_cost_multiplier) + + create_background_task( + after_request.after_request( + provider=provider_name, + incoming_request=incoming_request, + target_request=target_request, + user=user, + credits_cost=credits_cost, + tokens=tokens, + path=path, + is_chat=is_chat, + model=model, + ) ) - ) diff --git a/checks/client.py b/checks/client.py index 2f8137f..b6dede5 100644 --- a/checks/client.py +++ b/checks/client.py @@ -56,7 +56,7 @@ async def test_chat_non_stream_gpt4() -> float: """Tests non-streamed chat completions with the GPT-4 model.""" json_data = { - 'model': 'gpt-4', + 'model': 'gpt-4-1106-preview', 'messages': MESSAGES, 'stream': False } @@ -74,7 +74,8 @@ async def test_chat_non_stream_gpt4() -> float: try: assert '1337' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.' - except json.decoder.JSONDecodeError: + except KeyError: + print(response.json()) return response.status_code return time.perf_counter() - request_start @@ -110,7 +111,11 @@ async def test_chat_stream_gpt3() -> float: break if chunk: - chunks.append(json.loads(chunk)) + try: + chunks.append(json.loads(chunk)) + except json.decoder.JSONDecodeError: + print(f'[!] Invalid chunk: {chunk}') + return f'Received chunk with invalid JSON. Status code {response.status_code}.' try: resulting_text += json.loads(chunk)['choices'][0]['delta']['content']