some stuff

2024-11-25 14:23:57 +01:00 · 2023-10-09 19:09:01 +02:00 · 2023-10-09 19:09:01 +02:00 · d52aadd034
parent 23a904f3ce
commit d52aadd034
16 changed files with 64 additions and 56 deletions
--- a/README.md
+++ b/README.md
@ -167,7 +167,7 @@ You can also just add the *beginning* of an API address, like `12.123.` (without
 ### Core Keys
 `CORE_API_KEY` specifies the **very secret key** for  which need to access the entire user database etc.
-`TEST_NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits.
+`NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits.
 ### Webhooks
 `DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created.
--- a/api/core.py
+++ b/api/core.py
@ -130,7 +130,7 @@ async def run_checks(incoming_request: fastapi.Request):
        checks.client.test_chat_non_stream_gpt4,
        checks.client.test_chat_stream_gpt3,
        checks.client.test_function_calling,
-        checks.client.test_image_generation,
+        # checks.client.test_image_generation,
        # checks.client.test_speech_to_text,
        checks.client.test_models
    ]
--- a/api/handler.py
+++ b/api/handler.py
@ -38,7 +38,10 @@ async def handle(incoming_request: fastapi.Request):
    ip_address = await network.get_ip(incoming_request)
-    if '/models' in path:
+    if '/dashboard' in path:
        return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
    if path.startswith('/v1/models'):
        return fastapi.responses.JSONResponse(content=models_list)
    try:
@ -94,7 +97,6 @@ async def handle(incoming_request: fastapi.Request):
    if user['credits'] < cost:
        return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
    if 'DISABLE_VARS' not in key_tags:
        payload_with_vars = json.dumps(payload)
--- a/api/helpers/tokens.py
+++ b/api/helpers/tokens.py
@ -38,10 +38,10 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') ->
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif 'gpt-3.5-turbo' in model:
-        return count_for_messages(messages, model='gpt-3.5-turbo-0613')
+        return await count_for_messages(messages, model='gpt-3.5-turbo-0613')
    elif 'gpt-4' in model:
-        return count_for_messages(messages, model='gpt-4-0613')
+        return await count_for_messages(messages, model='gpt-4-0613')
    else:
        raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}.
--- a/api/main.py
+++ b/api/main.py
@ -7,10 +7,10 @@ from rich import print
 from dotenv import load_dotenv
 from bson.objectid import ObjectId
 from fastapi.middleware.cors import CORSMiddleware
 from slowapi.errors import RateLimitExceeded
 from slowapi.middleware import SlowAPIMiddleware
 from fastapi.middleware.cors import CORSMiddleware
 from slowapi.util import get_remote_address
 from slowapi import Limiter, _rate_limit_exceeded_handler
--- a/api/providers/init.py
+++ b/api/providers/init.py
@ -1,12 +1,2 @@
-from . import \
+from . import azure, webraft
-    azure \
+MODULES =    [azure, webraft]
    # closed, \
    # closed4
    # closed432
 MODULES = [
    azure,
    # closed,
    # closed4,
    # closed432,
 ]
--- a/api/providers/azure.py
+++ b/api/providers/azure.py
@ -2,7 +2,6 @@ from .helpers import utils
 AUTH = True
 ORGANIC = False
 CONTEXT = True
 STREAMING = True
 MODERATIONS = False
 ENDPOINT = 'https://nova-00001.openai.azure.com'
@ -12,7 +11,7 @@ MODELS = [
    'gpt-4',
    'gpt-4-32k'
 ]
-# MODELS = [f'{model}-azure' for model in MODELS]
+MODELS += [f'{model}-azure' for model in MODELS]
 AZURE_API = '2023-08-01-preview'
--- a/api/providers/closed.py
+++ b/api/providers/closed.py
@ -2,7 +2,6 @@ from .helpers import utils
 AUTH = True
 ORGANIC = True
 CONTEXT = True
 STREAMING = True
 MODERATIONS = True
 ENDPOINT = 'https://api.openai.com'
--- a/api/providers/closed4.py
+++ b/api/providers/closed4.py
@ -2,7 +2,6 @@ from .helpers import utils
 AUTH = True
 ORGANIC = False
 CONTEXT = True
 STREAMING = True
 MODERATIONS = True
 ENDPOINT = 'https://api.openai.com'
--- a/api/providers/closed432.py
+++ b/api/providers/closed432.py
@ -2,7 +2,6 @@ from .helpers import utils
 AUTH = True
 ORGANIC = False
 CONTEXT = True
 STREAMING = True
 MODERATIONS = False
 ENDPOINT = 'https://api.openai.com'
--- a/api/providers/helpers/utils.py
+++ b/api/providers/helpers/utils.py
@ -23,15 +23,5 @@ GPT_4_32K = GPT_4 + [
    'gpt-4-32k-0613',
 ]
 async def conversation_to_prompt(conversation: list) -> str:
    text = ''
    for message in conversation:
        text += f'<|{message["role"]}|>: {message["content"]}\n'
    text += '<|assistant|>:'
    return text
 async def random_secret_for(name: str) -> str:
    return await providerkeys.manager.get_key(name)
--- a/api/providers/mandrill.py
+++ b/api/providers/mandrill.py
@ -2,7 +2,6 @@ from .helpers import utils
 AUTH = True
 ORGANIC = False
 CONTEXT = True
 STREAMING = True
 MODELS = ['llama-2-7b-chat']
@ -12,7 +11,7 @@ async def chat_completion(**kwargs):
    return {
        'method': 'POST',
-        'url': f'https://api.mandrillai.tech/v1/chat/completions',
+        'url': 'https://api.mandrillai.tech/v1/chat/completions',
        'payload': payload,
        'headers': {
            'Authorization': f'Bearer {key}'
--- a/api/providers/webraft.py
+++ b/api/providers/webraft.py
@ -0,0 +1,25 @@
 from .helpers import utils
 AUTH = True
 ORGANIC = False
 STREAMING = True
 MODELS = [
    'gpt-3.5-turbo-0613',
    'gpt-3.5-turbo-0301',
    'gpt-3.5-turbo-16k-0613'
 ]
 async def chat_completion(**kwargs):
    payload = kwargs
    key = await utils.random_secret_for('webraft')
    return {
        'method': 'POST',
        'url': 'https://thirdparty.webraft.in/v1/chat/completions',
        'payload': payload,
        'headers': {
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {key}'
        },
        'provider_auth': f'webraft>{key}'
    }
--- a/api/responder.py
+++ b/api/responder.py
@ -49,7 +49,14 @@ async def respond(
        'Content-Type': 'application/json'
    }
-    for i in range(5):
+    skipped_errors = {
        'insufficient_quota': 0,
        'billing_not_active': 0,
        'critical_provider_error': 0,
        'timeout': 0
    }
    for _ in range(5):
        try:
            if is_chat:
                target_request = await load_balancing.balance_chat_request(payload)
@ -116,11 +123,13 @@ async def respond(
                        if error_code == 'insufficient_quota':
                            print('[!] insufficient quota')
                            await keymanager.rate_limit_key(provider_name, provider_key, 86400)
                            skipped_errors['insufficient_quota'] += 1
                            continue
                        if error_code == 'billing_not_active':
                            print('[!] billing not active')
                            await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active')
                            skipped_errors['billing_not_active'] += 1
                            continue
                        critical_error = False
@ -128,25 +137,16 @@ async def respond(
                            if error in str(client_json_response):
                                await keymanager.deactivate_key(provider_name, provider_key, error)
                                critical_error = True
-                        
+
                        if critical_error:
-                            print('[!] critical error')
+                            print('[!] critical provider error')
                            skipped_errors['critical_provider_error'] += 1
                            continue
                        if response.ok:
                            server_json_response = client_json_response
                        else:
                            continue
                    if is_stream:
                        try:
                            response.raise_for_status()
                        except Exception as exc:
                            if 'Too Many Requests' in str(exc):
                                print('[!] too many requests')
                                continue
                        chunk_no = 0
                        buffer = ''
@ -156,7 +156,7 @@ async def respond(
                            chunk = chunk.decode('utf8')
                            if 'azure' in provider_name:
-                                chunk = chunk.replace('data: ', '')
+                                chunk = chunk.replace('data: ', '', 1)
                                if not chunk or chunk_no == 1:
                                    continue
@ -164,19 +164,26 @@ async def respond(
                            subchunks = chunk.split('\n\n')
                            buffer += subchunks[0]
-                            yield buffer + '\n\n'
+                            for subchunk in [buffer] + subchunks[1:-1]:
-                            buffer = subchunks[-1]
+                                if not subchunk.startswith('data: '):
                                    subchunk = 'data: ' + subchunk
                            for subchunk in subchunks[1:-1]:
                                yield subchunk + '\n\n'
                            buffer = subchunks[-1]
                    break
            except aiohttp.client_exceptions.ServerTimeoutError:
                skipped_errors['timeout'] += 1
                continue
    else:
-        yield await errors.yield_error(500, 'Sorry, our API seems to have issues connecting to our provider(s).', 'This most likely isn\'t your fault. Please try again later.')
+        skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
        skipped_errors = ujson.dumps(skipped_errors, indent=4)
        yield await errors.yield_error(500,
            'Sorry, our API seems to have issues connecting to our provider(s).',
            f'Please send this info to support: {skipped_errors}'
        )
        return
    if (not is_stream) and server_json_response:
--- a/checks/client.py
+++ b/checks/client.py
@ -100,7 +100,7 @@ async def test_chat_stream_gpt3() -> float:
    async for chunk in response.aiter_text():
        for subchunk in chunk.split('\n\n'):
-            chunk = subchunk.replace('data: ', '').strip()
+            chunk = subchunk.replace('data: ', '', 1).strip()
            if chunk == '[DONE]':
                break
--- a/run/main.py
+++ b/run/main.py
@ -14,7 +14,6 @@ Runs for production on the speicified port.
 import os
 import sys
 import time
 port = sys.argv[1] if len(sys.argv) > 1 else 2332
 dev = True