nova-api/api/responder.py

"""This module contains the streaming logic for the API."""

import os
import json
import ujson
import aiohttp
import asyncio
import starlette

from typing import Any, Coroutine, Set
from rich import print
from dotenv import load_dotenv

import proxies
import after_request
import load_balancing

from helpers import errors
from db import providerkeys

load_dotenv()

CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']

keymanager = providerkeys.manager

background_tasks: Set[asyncio.Task[Any]] = set()


def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
    """asyncio.create_task, which prevents the task from being garbage collected.

    https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task
    """
    task = asyncio.create_task(coro)
    background_tasks.add(task)
    task.add_done_callback(background_tasks.discard)


async def respond(
    path: str='/v1/chat/completions',
    user: dict=None,
    payload: dict=None,
    credits_cost: int=0,
    input_tokens: int=0,
    incoming_request: starlette.requests.Request=None,
):
    """
    Stream the completions request. Sends data in chunks
    If not streaming, it sends the result in its entirety.
    """

    is_chat = False

    model = None

    if 'chat/completions' in path:
        is_chat = True
        model = payload['model']

    server_json_response = {}

    headers = {
        'Content-Type': 'application/json'
    }

    skipped_errors = {
        'insufficient_quota': 0,
        'billing_not_active': 0,
        'critical_provider_error': 0,
        'timeout': 0
    }

    for _ in range(5):
        try:
            if is_chat:
                target_request = await load_balancing.balance_chat_request(payload)
            else:
                target_request = await load_balancing.balance_organic_request({
                    'method': incoming_request.method,
                    'path': path,
                    'payload': payload,
                    'headers': headers,
                    'cookies': incoming_request.cookies
                })

        except ValueError:
            yield await errors.yield_error(500, f'Sorry, the API has no active API keys for {model}.', 'Please use a different model.')
            return

        provider_auth = target_request.get('provider_auth')

        if provider_auth:
            provider_name = provider_auth.split('>')[0]
            provider_key = provider_auth.split('>')[1]

        if provider_key == '--NO_KEY--':
            print(f'No key for {provider_name}')
            yield await errors.yield_error(500,
                'Sorry, our API seems to have issues connecting to our provider(s).',
                'This most likely isn\'t your fault. Please try again later.'
            )
            return

        target_request['headers'].update(target_request.get('headers', {}))

        if target_request['method'] == 'GET' and not payload:
            target_request['payload'] = None

        async with aiohttp.ClientSession(connector=proxies.get_proxy().connector) as session:
            try:
                async with session.request(
                    method=target_request.get('method', 'POST'),
                    url=target_request['url'],
                    data=target_request.get('data'),
                    json=target_request.get('payload'),
                    headers=target_request.get('headers', {}),
                    cookies=target_request.get('cookies'),
                    ssl=False,
                    timeout=aiohttp.ClientTimeout(
                        connect=0.75,
                        total=float(os.getenv('TRANSFER_TIMEOUT', '500'))
                    )
                ) as response:
                    is_stream = response.content_type == 'text/event-stream'

                    if response.content_type == 'application/json':
                        client_json_response = await response.json()

                        try:
                            error_code = client_json_response['error']['code']
                        except KeyError:
                            error_code = ''

                        if error_code == 'method_not_supported':
                            yield await errors.yield_error(400, 'Sorry, this endpoint does not support this method.', 'Please use a different method.')

                        if error_code == 'insufficient_quota':
                            print('[!] insufficient quota')
                            await keymanager.rate_limit_key(provider_name, provider_key, 86400)
                            skipped_errors['insufficient_quota'] += 1
                            continue

                        if error_code == 'billing_not_active':
                            print('[!] billing not active')
                            await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active')
                            skipped_errors['billing_not_active'] += 1
                            continue

                        critical_error = False
                        for error in CRITICAL_API_ERRORS:
                            if error in str(client_json_response):
                                await keymanager.deactivate_key(provider_name, provider_key, error)
                                critical_error = True

                        if critical_error:
                            print('[!] critical provider error')
                            skipped_errors['critical_provider_error'] += 1
                            continue

                        if response.ok:
                            server_json_response = client_json_response

                    if is_stream:
                        chunk_no = 0
                        buffer = ''

                        async for chunk  in response.content.iter_chunked(1024):
                            chunk_no += 1

                            chunk = chunk.decode('utf8')

                            if 'azure' in provider_name:
                                chunk = chunk.replace('data: ', '', 1)

                                if not chunk or chunk_no == 1:
                                    continue

                            subchunks = chunk.split('\n\n')
                            buffer += subchunks[0]

                            for subchunk in [buffer] + subchunks[1:-1]:
                                if not subchunk.startswith('data: '):
                                    subchunk = 'data: ' + subchunk

                                yield subchunk + '\n\n'

                            buffer = subchunks[-1]
                    break

            except aiohttp.client_exceptions.ServerTimeoutError:
                skipped_errors['timeout'] += 1
                continue

    else:
        skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
        skipped_errors = ujson.dumps(skipped_errors, indent=4)
        yield await errors.yield_error(500,
            'Sorry, our API seems to have issues connecting to our provider(s).',
            f'Please send this info to support: {skipped_errors}'
        )
        return

    if (not is_stream) and server_json_response:
        yield json.dumps(server_json_response)

    create_background_task(
        after_request.after_request(
            incoming_request=incoming_request,
            target_request=target_request,
            user=user,
            credits_cost=credits_cost,
            input_tokens=input_tokens,
            path=path,
            is_chat=is_chat,
            model=model,
        )
    )
Added more documentation 2023-08-12 17:49:31 +02:00			`"""This module contains the streaming logic for the API."""`

some stuff idfk 2023-08-04 03:30:56 +02:00			`import os`
proxies have issues 2023-08-04 17:29:49 +02:00			`import json`
pls dont abuse my api thnkx <3 2023-10-08 00:28:13 +02:00			`import ujson`
some stuff idfk 2023-08-04 03:30:56 +02:00			`import aiohttp`
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`import asyncio`
some stuff idfk 2023-08-04 03:30:56 +02:00			`import starlette`

Fix dangling asyncio tasks 2023-10-06 09:45:50 +02:00			`from typing import Any, Coroutine, Set`
proxies have issues 2023-08-04 17:29:49 +02:00			`from rich import print`
some stuff idfk 2023-08-04 03:30:56 +02:00			`from dotenv import load_dotenv`

			`import proxies`
Fixed moderation, cleanup and other performance changes 2023-08-28 00:58:32 +02:00			`import after_request`
some stuff idfk 2023-08-04 03:30:56 +02:00			`import load_balancing`

implemented key ratelimit checks 2023-10-02 21:09:39 +02:00			`from helpers import errors`
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`from db import providerkeys`
some stuff idfk 2023-08-04 03:30:56 +02:00
			`load_dotenv()`

yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']`

			`keymanager = providerkeys.manager`

Fix dangling asyncio tasks 2023-10-06 09:45:50 +02:00			`background_tasks: Set[asyncio.Task[Any]] = set()`


			`def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:`
			`"""asyncio.create_task, which prevents the task from being garbage collected.`

			`https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task`
			`"""`
			`task = asyncio.create_task(coro)`
			`background_tasks.add(task)`
			`task.add_done_callback(background_tasks.discard)`


I forgor 💀 2023-09-11 02:47:21 +02:00			`async def respond(`
some stuff idfk 2023-08-04 03:30:56 +02:00			`path: str='/v1/chat/completions',`
			`user: dict=None,`
			`payload: dict=None,`
			`credits_cost: int=0,`
			`input_tokens: int=0,`
			`incoming_request: starlette.requests.Request=None,`
			`):`
Documented almost everything 2023-10-12 00:03:15 +02:00			`"""`
			`Stream the completions request. Sends data in chunks`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00			`If not streaming, it sends the result in its entirety.`
Codebase changes + a lot of commenting 2023-08-13 17:12:35 +02:00			`"""`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00
proxies have issues 2023-08-04 17:29:49 +02:00			`is_chat = False`
some stuff idfk 2023-08-04 03:30:56 +02:00
Fixed moderation, cleanup and other performance changes 2023-08-28 00:58:32 +02:00			`model = None`

proxies have issues 2023-08-04 17:29:49 +02:00			`if 'chat/completions' in path:`
			`is_chat = True`
			`model = payload['model']`
some stuff idfk 2023-08-04 03:30:56 +02:00
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`server_json_response = {}`
some stuff idfk 2023-08-04 03:30:56 +02:00
User agent fix 2023-08-25 19:13:39 +02:00			`headers = {`
Added key validation by API-key instead of IP Added rate limited keys getting logged in a database 2023-09-23 21:41:48 +02:00			`'Content-Type': 'application/json'`
User agent fix 2023-08-25 19:13:39 +02:00			`}`

some stuff 2023-10-09 19:09:01 +02:00			`skipped_errors = {`
			`'insufficient_quota': 0,`
			`'billing_not_active': 0,`
			`'critical_provider_error': 0,`
			`'timeout': 0`
			`}`

			`for _ in range(5):`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`try:`
			`if is_chat:`
			`target_request = await load_balancing.balance_chat_request(payload)`
			`else:`
			`target_request = await load_balancing.balance_organic_request({`
			`'method': incoming_request.method,`
			`'path': path,`
			`'payload': payload,`
			`'headers': headers,`
			`'cookies': incoming_request.cookies`
			`})`
Added azure endpoints 2023-10-06 23:05:38 +02:00
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`except ValueError:`
Added key validation by API-key instead of IP Added rate limited keys getting logged in a database 2023-09-23 21:41:48 +02:00			`yield await errors.yield_error(500, f'Sorry, the API has no active API keys for {model}.', 'Please use a different model.')`
idk 2023-08-06 12:46:41 +02:00			`return`
some thingies 2023-08-05 02:30:42 +02:00
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`provider_auth = target_request.get('provider_auth')`

			`if provider_auth:`
			`provider_name = provider_auth.split('>')[0]`
			`provider_key = provider_auth.split('>')[1]`

Improved errors, checking and fixed ratelimit retrying 2023-10-05 14:17:53 +02:00			`if provider_key == '--NO_KEY--':`
Added azure endpoints 2023-10-06 23:05:38 +02:00			`print(f'No key for {provider_name}')`
Improved errors, checking and fixed ratelimit retrying 2023-10-05 14:17:53 +02:00			`yield await errors.yield_error(500,`
			`'Sorry, our API seems to have issues connecting to our provider(s).',`
			`'This most likely isn\'t your fault. Please try again later.'`
			`)`
			`return`

massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`target_request['headers'].update(target_request.get('headers', {}))`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00
			`if target_request['method'] == 'GET' and not payload:`
			`target_request['payload'] = None`
proxies have issues 2023-08-04 17:29:49 +02:00
Added more documentation 2023-08-12 17:49:31 +02:00			`async with aiohttp.ClientSession(connector=proxies.get_proxy().connector) as session:`
some thingies 2023-08-05 02:30:42 +02:00			`try:`
			`async with session.request(`
			`method=target_request.get('method', 'POST'),`
			`url=target_request['url'],`
			`data=target_request.get('data'),`
			`json=target_request.get('payload'),`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`headers=target_request.get('headers', {}),`
some thingies 2023-08-05 02:30:42 +02:00			`cookies=target_request.get('cookies'),`
			`ssl=False,`
Some fixes? 2023-08-16 15:06:16 +02:00			`timeout=aiohttp.ClientTimeout(`
Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00			`connect=0.75,`
Changed timeout to 500 2023-09-10 16:22:46 +02:00			`total=float(os.getenv('TRANSFER_TIMEOUT', '500'))`
Improved errors, checking and fixed ratelimit retrying 2023-10-05 14:17:53 +02:00			`)`
some thingies 2023-08-05 02:30:42 +02:00			`) as response:`
Fixed function calling 2023-09-14 18:18:19 +02:00			`is_stream = response.content_type == 'text/event-stream'`
Fixed moderation, cleanup and other performance changes 2023-08-28 00:58:32 +02:00
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`if response.content_type == 'application/json':`
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`client_json_response = await response.json()`
moderation is done yay 2023-08-06 21:42:07 +02:00
Added azure endpoints 2023-10-06 23:05:38 +02:00			`try:`
			`error_code = client_json_response['error']['code']`
			`except KeyError:`
			`error_code = ''`

			`if error_code == 'method_not_supported':`
			`yield await errors.yield_error(400, 'Sorry, this endpoint does not support this method.', 'Please use a different method.')`

			`if error_code == 'insufficient_quota':`
			`print('[!] insufficient quota')`
			`await keymanager.rate_limit_key(provider_name, provider_key, 86400)`
some stuff 2023-10-09 19:09:01 +02:00			`skipped_errors['insufficient_quota'] += 1`
Added azure endpoints 2023-10-06 23:05:38 +02:00			`continue`

			`if error_code == 'billing_not_active':`
			`print('[!] billing not active')`
			`await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active')`
some stuff 2023-10-09 19:09:01 +02:00			`skipped_errors['billing_not_active'] += 1`
Added azure endpoints 2023-10-06 23:05:38 +02:00			`continue`
Fixed non-stream responses 2023-09-06 11:44:29 +02:00
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`critical_error = False`
			`for error in CRITICAL_API_ERRORS:`
			`if error in str(client_json_response):`
			`await keymanager.deactivate_key(provider_name, provider_key, error)`
			`critical_error = True`
some stuff 2023-10-09 19:09:01 +02:00
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`if critical_error:`
some stuff 2023-10-09 19:09:01 +02:00			`print('[!] critical provider error')`
			`skipped_errors['critical_provider_error'] += 1`
some thingies 2023-08-05 02:30:42 +02:00			`continue`
some stuff idfk 2023-08-04 03:30:56 +02:00
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`if response.ok:`
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`server_json_response = client_json_response`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`if is_stream:`
aight 2023-10-08 21:53:27 +02:00			`chunk_no = 0`
Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00			`buffer = ''`

			`async for chunk in response.content.iter_chunked(1024):`
aight 2023-10-08 21:53:27 +02:00			`chunk_no += 1`
Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00
			`chunk = chunk.decode('utf8')`
pls dont abuse my api thnkx <3 2023-10-08 00:28:13 +02:00
			`if 'azure' in provider_name:`
some stuff 2023-10-09 19:09:01 +02:00			`chunk = chunk.replace('data: ', '', 1)`
pls dont abuse my api thnkx <3 2023-10-08 00:28:13 +02:00
aight 2023-10-08 21:53:27 +02:00			`if not chunk or chunk_no == 1:`
pls dont abuse my api thnkx <3 2023-10-08 00:28:13 +02:00			`continue`

Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00			`subchunks = chunk.split('\n\n')`
			`buffer += subchunks[0]`

some stuff 2023-10-09 19:09:01 +02:00			`for subchunk in [buffer] + subchunks[1:-1]:`
			`if not subchunk.startswith('data: '):`
			`subchunk = 'data: ' + subchunk`
Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00
			`yield subchunk + '\n\n'`
proxies have issues 2023-08-04 17:29:49 +02:00
some stuff 2023-10-09 19:09:01 +02:00			`buffer = subchunks[-1]`
some thingies 2023-08-05 02:30:42 +02:00			`break`
proxies have issues 2023-08-04 17:29:49 +02:00
Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00			`except aiohttp.client_exceptions.ServerTimeoutError:`
some stuff 2023-10-09 19:09:01 +02:00			`skipped_errors['timeout'] += 1`
Added buffering, fixing a common chunk yielding issue 2023-10-08 23:56:32 +02:00			`continue`
cool stuff or smth 2023-08-27 04:29:16 +02:00
Fixed non-stream responses 2023-09-06 11:44:29 +02:00			`else:`
some stuff 2023-10-09 19:09:01 +02:00			`skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}`
			`skipped_errors = ujson.dumps(skipped_errors, indent=4)`
			`yield await errors.yield_error(500,`
			`'Sorry, our API seems to have issues connecting to our provider(s).',`
			`f'Please send this info to support: {skipped_errors}'`
			`)`
Fixed non-stream responses 2023-09-06 11:44:29 +02:00			`return`
Resolved some issues 2023-08-24 14:57:36 +02:00
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`if (not is_stream) and server_json_response:`
			`yield json.dumps(server_json_response)`

Fix dangling asyncio tasks 2023-10-06 09:45:50 +02:00			`create_background_task(`
yoooo everything works now 100% :fire: 2023-10-04 23:24:55 +02:00			`after_request.after_request(`
			`incoming_request=incoming_request,`
			`target_request=target_request,`
			`user=user,`
			`credits_cost=credits_cost,`
			`input_tokens=input_tokens,`
			`path=path,`
			`is_chat=is_chat,`
			`model=model,`
			`)`
Fixed moderation, cleanup and other performance changes 2023-08-28 00:58:32 +02:00			`)`