nova-api/api/streaming.py

"""This module contains the streaming logic for the API."""

import os
import json
import dhooks
import asyncio
import aiohttp
import starlette
import datetime

from rich import print
from dotenv import load_dotenv
from python_socks._errors import ProxyError

import proxies
import provider_auth
import load_balancing

from db import logs
from db.users import UserManager
from db.stats import StatsManager
from helpers import network, chat, errors
import yaml


load_dotenv()

## Loads config which contains rate limits
with open('config/credits.yml', encoding='utf8') as f:
    config = yaml.safe_load(f)

## Where all rate limit requested data will be stored.
# Rate limit data is **not persistent** (It will be deleted on server stop/restart).
# user_last_request_time = {}

DEMO_PAYLOAD = {
    'model': 'gpt-3.5-turbo',
    'messages': [
        {
            'role': 'user',
            'content': '1+1='
        }
    ]
}

async def process_response(response, is_chat, chat_id, model, target_request):
    """Proccesses chunks from streaming

    Args:
        response (_type_): The response
        is_chat (bool): If there is 'chat/completions' in path
        chat_id (_type_): ID of chat with bot
        model (_type_): What AI model it is
    """
    async for chunk in response.content.iter_any():
        chunk = chunk.decode("utf8").strip()
        send = False

        if is_chat and '{' in chunk:
            data = json.loads(chunk.split('data: ')[1])
            chunk = chunk.replace(data['id'], chat_id)
            send = True

            if target_request['module'] == 'twa' and data.get('text'):
                chunk = await chat.create_chat_chunk(chat_id=chat_id, model=model, content=['text'])

            if (not data['choices'][0]['delta']) or data['choices'][0]['delta'] == {'role': 'assistant'}:
                send = False

        if send and chunk:
            yield chunk + '\n\n'

async def stream(
    path: str='/v1/chat/completions',
    user: dict=None,
    payload: dict=None,
    credits_cost: int=0,
    input_tokens: int=0,
    incoming_request: starlette.requests.Request=None,
):
    """Stream the completions request. Sends data in chunks
    If not streaming, it sends the result in its entirety.

    Args:
        path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
        user (dict, optional): User object (dict) Defaults to None.
        payload (dict, optional): Payload. Defaults to None.
        credits_cost (int, optional): Cost of the credits of the request. Defaults to 0.
        input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
        incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
    """

    ## Rate limits user.
    # If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down
    # last request time for future requests.
    # if user:
    #     role = user.get('role', 'default')
    #     rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)

    #     last_request_time = user_last_request_time.get(user['api_key'])
    #     time_since_last_request = datetime.datetime.now() - last_request_time

    #     if time_since_last_request < datetime.timedelta(seconds=rate_limit):
    #         yield await errors.yield_error(429, 'Rate limit exceeded', "You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen.")
    #         return
    #     else:
    #         user_last_request_time[user['_id']] = datetime.datetime.now()

    ## Setup managers
    db = UserManager()
    stats = StatsManager()

    is_chat = False
    is_stream = payload.get('stream', False)

    if 'chat/completions' in path:
        is_chat = True
        model = payload['model']

    if is_chat and is_stream:
        chat_id = await chat.create_chat_id()
        yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStart)
        yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=None)

    json_response = {'error': 'No JSON response could be received'}

    for _ in range(5):
        headers = {'Content-Type': 'application/json'}


        # Load balancing
        # If the request is a chat completion, then we need to load balance between chat providers
        # If the request is an organic request, then we need to load balance between organic providers

        try:
            if is_chat:
                target_request = await load_balancing.balance_chat_request(payload)
            else:
                
                # In this case we are doing a organic request. "organic" means that it's not using a reverse engineered front-end, but rather ClosedAI's API directly
                # churchless.tech is an example of an organic provider, because it redirects the request to ClosedAI.
                target_request = await load_balancing.balance_organic_request({
                    'method': incoming_request.method,
                    'path': path,
                    'payload': payload,
                    'headers': headers,
                    'cookies': incoming_request.cookies
                })
        except ValueError as exc:
            webhook = dhooks.Webhook(os.getenv('DISCORD_WEBHOOK__API_ISSUE'))
            webhook.send(content=f'API Issue: **`{exc}`**\nhttps://i.imgflip.com/7uv122.jpg')
            yield await errors.yield_error(500, 'Sorry, the API has no working keys anymore.', 'The admins have been messaged automatically.')
            return

        target_request['headers'].update(target_request.get('headers', {}))

        if target_request['method'] == 'GET' and not payload:
            target_request['payload'] = None

        # We haven't done any requests as of right now, everything until now was just preparation
        # Here, we process the request
        async with aiohttp.ClientSession(connector=proxies.get_proxy().connector) as session:
            try:
                async with session.request(
                    method=target_request.get('method', 'POST'),
                    url=target_request['url'],
                    data=target_request.get('data'),
                    json=target_request.get('payload'),
                    headers=target_request.get('headers', {}),
                    cookies=target_request.get('cookies'),
                    ssl=False,
                    timeout=aiohttp.ClientTimeout(
                        connect=3.0,
                        total=float(os.getenv('TRANSFER_TIMEOUT', '120'))
                    ),
                ) as response:
                    if response.content_type == 'application/json':
                        data = await response.json()

                        if data.get('code') == 'invalid_api_key':
                            await provider_auth.invalidate_key(target_request.get('provider_auth'))
                            continue

                        if response.ok:
                            json_response = data

                    if is_stream:
                        try:
                            response.raise_for_status()
                        except Exception as exc:
                            if 'Too Many Requests' in str(exc):
                                continue

                        async for chunk in process_response(response, is_chat, chat_id, model, target_request):
                            yield chunk

                    break

            except ProxyError as exc:
                print('[!] Proxy error:', exc)
                continue

    if is_chat and is_stream:
        yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStop)
        yield 'data: [DONE]\n\n'

    if not is_stream and json_response:
        yield json.dumps(json_response)

    if user and incoming_request:
        await logs.log_api_request(user=user, incoming_request=incoming_request, target_url=target_request['url'])

    if credits_cost and user:
        await db.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}})

    ip_address = await network.get_ip(incoming_request)
    await stats.add_date()
    await stats.add_ip_address(ip_address)
    await stats.add_path(path)
    await stats.add_target(target_request['url'])
    if is_chat:
        await stats.add_model(model)
        await stats.add_tokens(input_tokens, model)

if __name__ == '__main__':
    asyncio.run(stream())
Added more documentation 2023-08-12 17:49:31 +02:00			`"""This module contains the streaming logic for the API."""`

some stuff idfk 2023-08-04 03:30:56 +02:00			`import os`
proxies have issues 2023-08-04 17:29:49 +02:00			`import json`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`import dhooks`
some stuff idfk 2023-08-04 03:30:56 +02:00			`import asyncio`
			`import aiohttp`
			`import starlette`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00			`import datetime`
some stuff idfk 2023-08-04 03:30:56 +02:00
proxies have issues 2023-08-04 17:29:49 +02:00			`from rich import print`
some stuff idfk 2023-08-04 03:30:56 +02:00			`from dotenv import load_dotenv`
some thingies 2023-08-05 02:30:42 +02:00			`from python_socks._errors import ProxyError`
some stuff idfk 2023-08-04 03:30:56 +02:00
			`import proxies`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`import provider_auth`
some stuff idfk 2023-08-04 03:30:56 +02:00			`import load_balancing`

Alotta changes. Setup managers and modified other files, cleaning up codebase. Created user.py class for future type usage. 2023-08-14 05:11:15 +02:00			`from db import logs`
			`from db.users import UserManager`
			`from db.stats import StatsManager`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`from helpers import network, chat, errors`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00			`import yaml`

some stuff idfk 2023-08-04 03:30:56 +02:00
			`load_dotenv()`

Ratelimit system 🎉 2023-08-14 10:47:03 +02:00			`## Loads config which contains rate limits`
Commented out rate-limiting etc. 2023-08-16 12:08:58 +02:00			`with open('config/credits.yml', encoding='utf8') as f:`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00			`config = yaml.safe_load(f)`

			`## Where all rate limit requested data will be stored.`
changes to rate limits, check commit desc - made comments in config better - added todo to explain weird config - removed some bad stuff in streaming.py 2023-08-14 11:06:25 +02:00			`# Rate limit data is not persistent (It will be deleted on server stop/restart).`
Commented out rate-limiting etc. 2023-08-16 12:08:58 +02:00			`# user_last_request_time = {}`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00
some stuff idfk 2023-08-04 03:30:56 +02:00			`DEMO_PAYLOAD = {`
			`'model': 'gpt-3.5-turbo',`
			`'messages': [`
			`{`
			`'role': 'user',`
			`'content': '1+1='`
			`}`
			`]`
			`}`

Update streaming to work & change stats to class 2023-08-13 18:42:38 +02:00			`async def process_response(response, is_chat, chat_id, model, target_request):`
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`"""Proccesses chunks from streaming`

			`Args:`
			`response (_type_): The response`
			`is_chat (bool): If there is 'chat/completions' in path`
			`chat_id (_type_): ID of chat with bot`
			`model (_type_): What AI model it is`
			`"""`
			`async for chunk in response.content.iter_any():`
			`chunk = chunk.decode("utf8").strip()`
			`send = False`

			`if is_chat and '{' in chunk:`
			`data = json.loads(chunk.split('data: ')[1])`
			`chunk = chunk.replace(data['id'], chat_id)`
			`send = True`

			`if target_request['module'] == 'twa' and data.get('text'):`
			`chunk = await chat.create_chat_chunk(chat_id=chat_id, model=model, content=['text'])`

			`if (not data['choices'][0]['delta']) or data['choices'][0]['delta'] == {'role': 'assistant'}:`
			`send = False`

			`if send and chunk:`
			`yield chunk + '\n\n'`

some stuff idfk 2023-08-04 03:30:56 +02:00			`async def stream(`
			`path: str='/v1/chat/completions',`
			`user: dict=None,`
			`payload: dict=None,`
			`credits_cost: int=0,`
			`input_tokens: int=0,`
			`incoming_request: starlette.requests.Request=None,`
			`):`
Codebase changes + a lot of commenting 2023-08-13 17:12:35 +02:00			`"""Stream the completions request. Sends data in chunks`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00			`If not streaming, it sends the result in its entirety.`
Codebase changes + a lot of commenting 2023-08-13 17:12:35 +02:00
			`Args:`
			`path (str, optional): URL Path. Defaults to '/v1/chat/completions'.`
			`user (dict, optional): User object (dict) Defaults to None.`
			`payload (dict, optional): Payload. Defaults to None.`
			`credits_cost (int, optional): Cost of the credits of the request. Defaults to 0.`
			`input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.`
			`incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.`
			`"""`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00
changes to rate limits, check commit desc - made comments in config better - added todo to explain weird config - removed some bad stuff in streaming.py 2023-08-14 11:06:25 +02:00			`## Rate limits user.`
			`# If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down`
			`# last request time for future requests.`
Commented out rate-limiting etc. 2023-08-16 12:08:58 +02:00			`# if user:`
			`# role = user.get('role', 'default')`
			`# rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)`

			`# last_request_time = user_last_request_time.get(user['api_key'])`
			`# time_since_last_request = datetime.datetime.now() - last_request_time`

			`# if time_since_last_request < datetime.timedelta(seconds=rate_limit):`
			`# yield await errors.yield_error(429, 'Rate limit exceeded', "You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen.")`
			`# return`
			`# else:`
			`# user_last_request_time[user['_id']] = datetime.datetime.now()`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00
			`## Setup managers`
Alotta changes. Setup managers and modified other files, cleaning up codebase. Created user.py class for future type usage. 2023-08-14 05:11:15 +02:00			`db = UserManager()`
			`stats = StatsManager()`
Ratelimit system 🎉 2023-08-14 10:47:03 +02:00
proxies have issues 2023-08-04 17:29:49 +02:00			`is_chat = False`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`is_stream = payload.get('stream', False)`
some stuff idfk 2023-08-04 03:30:56 +02:00
proxies have issues 2023-08-04 17:29:49 +02:00			`if 'chat/completions' in path:`
			`is_chat = True`
			`model = payload['model']`
some stuff idfk 2023-08-04 03:30:56 +02:00
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`if is_chat and is_stream:`
			`chat_id = await chat.create_chat_id()`
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStart)`
			`yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=None)`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`json_response = {'error': 'No JSON response could be received'}`
some stuff idfk 2023-08-04 03:30:56 +02:00
moderation is done yay 2023-08-06 21:42:07 +02:00			`for _ in range(5):`
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`headers = {'Content-Type': 'application/json'}`
Added more documentation 2023-08-12 17:49:31 +02:00
add comments to streaming.py 2023-08-13 18:29:45 +02:00
			`# Load balancing`
			`# If the request is a chat completion, then we need to load balance between chat providers`
			`# If the request is an organic request, then we need to load balance between organic providers`

sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`try:`
			`if is_chat:`
			`target_request = await load_balancing.balance_chat_request(payload)`
			`else:`
add comments to streaming.py 2023-08-13 18:29:45 +02:00
			`# In this case we are doing a organic request. "organic" means that it's not using a reverse engineered front-end, but rather ClosedAI's API directly`
			`# churchless.tech is an example of an organic provider, because it redirects the request to ClosedAI.`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`target_request = await load_balancing.balance_organic_request({`
			`'method': incoming_request.method,`
			`'path': path,`
			`'payload': payload,`
			`'headers': headers,`
			`'cookies': incoming_request.cookies`
			`})`
			`except ValueError as exc:`
			`webhook = dhooks.Webhook(os.getenv('DISCORD_WEBHOOK__API_ISSUE'))`
			webhook.send(content=f'API Issue: `{exc}`\nhttps://i.imgflip.com/7uv122.jpg')
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`yield await errors.yield_error(500, 'Sorry, the API has no working keys anymore.', 'The admins have been messaged automatically.')`
idk 2023-08-06 12:46:41 +02:00			`return`
some thingies 2023-08-05 02:30:42 +02:00
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`target_request['headers'].update(target_request.get('headers', {}))`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00
			`if target_request['method'] == 'GET' and not payload:`
			`target_request['payload'] = None`
proxies have issues 2023-08-04 17:29:49 +02:00
add comments to streaming.py 2023-08-13 18:29:45 +02:00			`# We haven't done any requests as of right now, everything until now was just preparation`
			`# Here, we process the request`
Added more documentation 2023-08-12 17:49:31 +02:00			`async with aiohttp.ClientSession(connector=proxies.get_proxy().connector) as session:`
some thingies 2023-08-05 02:30:42 +02:00			`try:`
			`async with session.request(`
			`method=target_request.get('method', 'POST'),`
			`url=target_request['url'],`
			`data=target_request.get('data'),`
			`json=target_request.get('payload'),`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`headers=target_request.get('headers', {}),`
some thingies 2023-08-05 02:30:42 +02:00			`cookies=target_request.get('cookies'),`
			`ssl=False,`
Some fixes? 2023-08-16 15:06:16 +02:00			`timeout=aiohttp.ClientTimeout(`
			`connect=3.0,`
			`total=float(os.getenv('TRANSFER_TIMEOUT', '120'))`
			`),`
some thingies 2023-08-05 02:30:42 +02:00			`) as response:`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`if response.content_type == 'application/json':`
			`data = await response.json()`
moderation is done yay 2023-08-06 21:42:07 +02:00
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`if data.get('code') == 'invalid_api_key':`
			`await provider_auth.invalidate_key(target_request.get('provider_auth'))`
some thingies 2023-08-05 02:30:42 +02:00			`continue`
some stuff idfk 2023-08-04 03:30:56 +02:00
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`if response.ok:`
			`json_response = data`

sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`if is_stream:`
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`try:`
			`response.raise_for_status()`
			`except Exception as exc:`
			`if 'Too Many Requests' in str(exc):`
			`continue`

Update streaming to work & change stats to class 2023-08-13 18:42:38 +02:00			`async for chunk in process_response(response, is_chat, chat_id, model, target_request):`
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`yield chunk`
proxies have issues 2023-08-04 17:29:49 +02:00
some thingies 2023-08-05 02:30:42 +02:00			`break`
proxies have issues 2023-08-04 17:29:49 +02:00
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`except ProxyError as exc:`
Added more documentation 2023-08-12 17:49:31 +02:00			`print('[!] Proxy error:', exc)`
some thingies 2023-08-05 02:30:42 +02:00			`continue`
proxies have issues 2023-08-04 17:29:49 +02:00
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00			`if is_chat and is_stream:`
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStop)`
some thingies 2023-08-05 02:30:42 +02:00			`yield 'data: [DONE]\n\n'`
some stuff idfk 2023-08-04 03:30:56 +02:00
Added /v1/models and fixed key invalidation 2023-08-09 11:15:49 +02:00			`if not is_stream and json_response:`
moderation is done yay 2023-08-06 21:42:07 +02:00			`yield json.dumps(json_response)`

			`if user and incoming_request:`
massive cleanup of streaming (i think this works?) 2023-08-13 18:26:35 +02:00			`await logs.log_api_request(user=user, incoming_request=incoming_request, target_url=target_request['url'])`
moderation is done yay 2023-08-06 21:42:07 +02:00
			`if credits_cost and user:`
Alotta changes. Setup managers and modified other files, cleaning up codebase. Created user.py class for future type usage. 2023-08-14 05:11:15 +02:00			`await db.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}})`
moderation is done yay 2023-08-06 21:42:07 +02:00
			`ip_address = await network.get_ip(incoming_request)`
Alotta changes. Setup managers and modified other files, cleaning up codebase. Created user.py class for future type usage. 2023-08-14 05:11:15 +02:00			`await stats.add_date()`
			`await stats.add_ip_address(ip_address)`
			`await stats.add_path(path)`
			`await stats.add_target(target_request['url'])`
moderation is done yay 2023-08-06 21:42:07 +02:00			`if is_chat:`
Alotta changes. Setup managers and modified other files, cleaning up codebase. Created user.py class for future type usage. 2023-08-14 05:11:15 +02:00			`await stats.add_model(model)`
			`await stats.add_tokens(input_tokens, model)`
sleep deprivation caused me to be not productive today 2023-08-06 00:43:36 +02:00
some stuff idfk 2023-08-04 03:30:56 +02:00			`if __name__ == '__main__':`
			`asyncio.run(stream())`