From d52aadd034bec11ee6696e1164090b2e4d63059a Mon Sep 17 00:00:00 2001 From: nsde Date: Mon, 9 Oct 2023 19:09:01 +0200 Subject: [PATCH] some stuff --- README.md | 2 +- api/core.py | 2 +- api/handler.py | 6 +++-- api/helpers/tokens.py | 4 ++-- api/main.py | 2 +- api/providers/__init__.py | 14 ++--------- api/providers/azure.py | 3 +-- api/providers/closed.py | 1 - api/providers/closed4.py | 1 - api/providers/closed432.py | 1 - api/providers/helpers/utils.py | 10 -------- api/providers/mandrill.py | 3 +-- api/providers/webraft.py | 25 ++++++++++++++++++++ api/responder.py | 43 ++++++++++++++++++++-------------- checks/client.py | 2 +- run/__main__.py | 1 - 16 files changed, 64 insertions(+), 56 deletions(-) create mode 100644 api/providers/webraft.py diff --git a/README.md b/README.md index fb68bd5..4763c99 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,7 @@ You can also just add the *beginning* of an API address, like `12.123.` (without ### Core Keys `CORE_API_KEY` specifies the **very secret key** for which need to access the entire user database etc. -`TEST_NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits. +`NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits. ### Webhooks `DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created. diff --git a/api/core.py b/api/core.py index f802e20..746e2bf 100644 --- a/api/core.py +++ b/api/core.py @@ -130,7 +130,7 @@ async def run_checks(incoming_request: fastapi.Request): checks.client.test_chat_non_stream_gpt4, checks.client.test_chat_stream_gpt3, checks.client.test_function_calling, - checks.client.test_image_generation, + # checks.client.test_image_generation, # checks.client.test_speech_to_text, checks.client.test_models ] diff --git a/api/handler.py b/api/handler.py index 891ed80..c0d922a 100644 --- a/api/handler.py +++ b/api/handler.py @@ -38,7 +38,10 @@ async def handle(incoming_request: fastapi.Request): ip_address = await network.get_ip(incoming_request) - if '/models' in path: + if '/dashboard' in path: + return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.') + + if path.startswith('/v1/models'): return fastapi.responses.JSONResponse(content=models_list) try: @@ -94,7 +97,6 @@ async def handle(incoming_request: fastapi.Request): if user['credits'] < cost: return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.') - if 'DISABLE_VARS' not in key_tags: payload_with_vars = json.dumps(payload) diff --git a/api/helpers/tokens.py b/api/helpers/tokens.py index 848cac8..86fe04b 100644 --- a/api/helpers/tokens.py +++ b/api/helpers/tokens.py @@ -38,10 +38,10 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> tokens_per_name = -1 # if there's a name, the role is omitted elif 'gpt-3.5-turbo' in model: - return count_for_messages(messages, model='gpt-3.5-turbo-0613') + return await count_for_messages(messages, model='gpt-3.5-turbo-0613') elif 'gpt-4' in model: - return count_for_messages(messages, model='gpt-4-0613') + return await count_for_messages(messages, model='gpt-4-0613') else: raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}. diff --git a/api/main.py b/api/main.py index a97d2d4..cc7e2cd 100644 --- a/api/main.py +++ b/api/main.py @@ -7,10 +7,10 @@ from rich import print from dotenv import load_dotenv from bson.objectid import ObjectId +from fastapi.middleware.cors import CORSMiddleware from slowapi.errors import RateLimitExceeded from slowapi.middleware import SlowAPIMiddleware -from fastapi.middleware.cors import CORSMiddleware from slowapi.util import get_remote_address from slowapi import Limiter, _rate_limit_exceeded_handler diff --git a/api/providers/__init__.py b/api/providers/__init__.py index 25ad1eb..41fbd12 100644 --- a/api/providers/__init__.py +++ b/api/providers/__init__.py @@ -1,12 +1,2 @@ -from . import \ - azure \ - # closed, \ - # closed4 - # closed432 - -MODULES = [ - azure, - # closed, - # closed4, - # closed432, -] +from . import azure, webraft +MODULES = [azure, webraft] diff --git a/api/providers/azure.py b/api/providers/azure.py index 89bb130..4d7dd36 100644 --- a/api/providers/azure.py +++ b/api/providers/azure.py @@ -2,7 +2,6 @@ from .helpers import utils AUTH = True ORGANIC = False -CONTEXT = True STREAMING = True MODERATIONS = False ENDPOINT = 'https://nova-00001.openai.azure.com' @@ -12,7 +11,7 @@ MODELS = [ 'gpt-4', 'gpt-4-32k' ] -# MODELS = [f'{model}-azure' for model in MODELS] +MODELS += [f'{model}-azure' for model in MODELS] AZURE_API = '2023-08-01-preview' diff --git a/api/providers/closed.py b/api/providers/closed.py index 2bb4515..c9ead32 100644 --- a/api/providers/closed.py +++ b/api/providers/closed.py @@ -2,7 +2,6 @@ from .helpers import utils AUTH = True ORGANIC = True -CONTEXT = True STREAMING = True MODERATIONS = True ENDPOINT = 'https://api.openai.com' diff --git a/api/providers/closed4.py b/api/providers/closed4.py index 6da901f..2847da5 100644 --- a/api/providers/closed4.py +++ b/api/providers/closed4.py @@ -2,7 +2,6 @@ from .helpers import utils AUTH = True ORGANIC = False -CONTEXT = True STREAMING = True MODERATIONS = True ENDPOINT = 'https://api.openai.com' diff --git a/api/providers/closed432.py b/api/providers/closed432.py index 8215bcf..8330531 100644 --- a/api/providers/closed432.py +++ b/api/providers/closed432.py @@ -2,7 +2,6 @@ from .helpers import utils AUTH = True ORGANIC = False -CONTEXT = True STREAMING = True MODERATIONS = False ENDPOINT = 'https://api.openai.com' diff --git a/api/providers/helpers/utils.py b/api/providers/helpers/utils.py index 2dbb4eb..7362e6f 100644 --- a/api/providers/helpers/utils.py +++ b/api/providers/helpers/utils.py @@ -23,15 +23,5 @@ GPT_4_32K = GPT_4 + [ 'gpt-4-32k-0613', ] -async def conversation_to_prompt(conversation: list) -> str: - text = '' - - for message in conversation: - text += f'<|{message["role"]}|>: {message["content"]}\n' - - text += '<|assistant|>:' - - return text - async def random_secret_for(name: str) -> str: return await providerkeys.manager.get_key(name) diff --git a/api/providers/mandrill.py b/api/providers/mandrill.py index dc0a8a3..4e9a7ee 100644 --- a/api/providers/mandrill.py +++ b/api/providers/mandrill.py @@ -2,7 +2,6 @@ from .helpers import utils AUTH = True ORGANIC = False -CONTEXT = True STREAMING = True MODELS = ['llama-2-7b-chat'] @@ -12,7 +11,7 @@ async def chat_completion(**kwargs): return { 'method': 'POST', - 'url': f'https://api.mandrillai.tech/v1/chat/completions', + 'url': 'https://api.mandrillai.tech/v1/chat/completions', 'payload': payload, 'headers': { 'Authorization': f'Bearer {key}' diff --git a/api/providers/webraft.py b/api/providers/webraft.py new file mode 100644 index 0000000..33d79b6 --- /dev/null +++ b/api/providers/webraft.py @@ -0,0 +1,25 @@ +from .helpers import utils + +AUTH = True +ORGANIC = False +STREAMING = True +MODELS = [ + 'gpt-3.5-turbo-0613', + 'gpt-3.5-turbo-0301', + 'gpt-3.5-turbo-16k-0613' +] + +async def chat_completion(**kwargs): + payload = kwargs + key = await utils.random_secret_for('webraft') + + return { + 'method': 'POST', + 'url': 'https://thirdparty.webraft.in/v1/chat/completions', + 'payload': payload, + 'headers': { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {key}' + }, + 'provider_auth': f'webraft>{key}' + } diff --git a/api/responder.py b/api/responder.py index eaf6f39..3bcf745 100644 --- a/api/responder.py +++ b/api/responder.py @@ -49,7 +49,14 @@ async def respond( 'Content-Type': 'application/json' } - for i in range(5): + skipped_errors = { + 'insufficient_quota': 0, + 'billing_not_active': 0, + 'critical_provider_error': 0, + 'timeout': 0 + } + + for _ in range(5): try: if is_chat: target_request = await load_balancing.balance_chat_request(payload) @@ -116,11 +123,13 @@ async def respond( if error_code == 'insufficient_quota': print('[!] insufficient quota') await keymanager.rate_limit_key(provider_name, provider_key, 86400) + skipped_errors['insufficient_quota'] += 1 continue if error_code == 'billing_not_active': print('[!] billing not active') await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active') + skipped_errors['billing_not_active'] += 1 continue critical_error = False @@ -128,25 +137,16 @@ async def respond( if error in str(client_json_response): await keymanager.deactivate_key(provider_name, provider_key, error) critical_error = True - + if critical_error: - print('[!] critical error') + print('[!] critical provider error') + skipped_errors['critical_provider_error'] += 1 continue if response.ok: server_json_response = client_json_response - else: - continue - if is_stream: - try: - response.raise_for_status() - except Exception as exc: - if 'Too Many Requests' in str(exc): - print('[!] too many requests') - continue - chunk_no = 0 buffer = '' @@ -156,7 +156,7 @@ async def respond( chunk = chunk.decode('utf8') if 'azure' in provider_name: - chunk = chunk.replace('data: ', '') + chunk = chunk.replace('data: ', '', 1) if not chunk or chunk_no == 1: continue @@ -164,19 +164,26 @@ async def respond( subchunks = chunk.split('\n\n') buffer += subchunks[0] - yield buffer + '\n\n' - buffer = subchunks[-1] + for subchunk in [buffer] + subchunks[1:-1]: + if not subchunk.startswith('data: '): + subchunk = 'data: ' + subchunk - for subchunk in subchunks[1:-1]: yield subchunk + '\n\n' + buffer = subchunks[-1] break except aiohttp.client_exceptions.ServerTimeoutError: + skipped_errors['timeout'] += 1 continue else: - yield await errors.yield_error(500, 'Sorry, our API seems to have issues connecting to our provider(s).', 'This most likely isn\'t your fault. Please try again later.') + skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0} + skipped_errors = ujson.dumps(skipped_errors, indent=4) + yield await errors.yield_error(500, + 'Sorry, our API seems to have issues connecting to our provider(s).', + f'Please send this info to support: {skipped_errors}' + ) return if (not is_stream) and server_json_response: diff --git a/checks/client.py b/checks/client.py index 7410c23..f6fe5d1 100644 --- a/checks/client.py +++ b/checks/client.py @@ -100,7 +100,7 @@ async def test_chat_stream_gpt3() -> float: async for chunk in response.aiter_text(): for subchunk in chunk.split('\n\n'): - chunk = subchunk.replace('data: ', '').strip() + chunk = subchunk.replace('data: ', '', 1).strip() if chunk == '[DONE]': break diff --git a/run/__main__.py b/run/__main__.py index ebd513b..ced6c7d 100644 --- a/run/__main__.py +++ b/run/__main__.py @@ -14,7 +14,6 @@ Runs for production on the speicified port. import os import sys -import time port = sys.argv[1] if len(sys.argv) > 1 else 2332 dev = True