Much cheaper API

Added token-based credit cost
2024-11-25 14:43:58 +01:00 · 2023-10-16 21:55:54 +00:00 · 2023-10-16 21:34:54 +00:00
14 changed files with 136 additions and 87 deletions
--- a/README.md
+++ b/README.md
@ -65,6 +65,7 @@ This one's code can be found in the following repository: [github.com/novaoss/no

 # Setup
 ## Requirements
+- **Rust** (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
 - newest **Python** version
 - newest Python **pip** version
 - **MongoDB** database
--- a/api/after_request.py
+++ b/api/after_request.py
@ -5,21 +5,28 @@ async def after_request(
    incoming_request: dict,
    target_request: dict,
    user: dict,
+    tokens: dict,
    credits_cost: int,
-    input_tokens: int,
    path: str,
    is_chat: bool,
    model: str,
+    provider: str,
 ) -> None:
    """Runs after every request."""

    if user and incoming_request:
-        await logs.log_api_request(user=user, incoming_request=incoming_request, target_url=target_request['url'])
+        await logs.log_api_request(
+            user=user,
+            incoming_request=incoming_request,
+            target_url=target_request['url'],
+            tokens=tokens,
+            provider=provider
+        )

    if credits_cost and user:
        await users.manager.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}})

-    ip_address = await network.get_ip(incoming_request)
+    ip_address = network.get_ip(incoming_request)

    await stats.manager.add_date()
    # await stats.manager.add_ip_address(ip_address)
@ -28,4 +35,3 @@ async def after_request(

    if is_chat:
        await stats.manager.add_model(model)
-        await stats.manager.add_tokens(input_tokens, model)
--- a/api/config/config.yml
+++ b/api/config/config.yml
@ -5,9 +5,9 @@ costs:
  other: 5 # Other endpoints

  chat-models: # chat completions
-    gpt-4-32k: 200
-    gpt-4: 50
-    gpt-3: 10
+    gpt-4-32k: 100
+    gpt-4: 20
+    gpt-3: 5

 ## Roles Explanation

@ -17,11 +17,11 @@ costs:
 roles:
  owner:
    bonus: 0
-  admin:
+  enterprise:
    bonus: 0.2
-  helper:
+  admin:
    bonus: 0.4
-  booster:
+  helper:
    bonus: 0.6
  default:
    bonus: 1.0
--- a/api/db/logs.py
+++ b/api/db/logs.py
@ -19,7 +19,7 @@ conn = AsyncIOMotorClient(os.environ['MONGO_URI'])
 async def _get_collection(collection_name: str):
    return conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]

-async def log_api_request(user: dict, incoming_request, target_url: str):
+async def log_api_request(user: dict, incoming_request, target_url: str, tokens: dict, provider: str) -> dict:
    """Logs the API Request into the database."""

    db = await _get_collection('logs')
@ -32,20 +32,24 @@ async def log_api_request(user: dict, incoming_request, target_url: str):
            pass

    model = payload.get('model')
-    ip_address = await network.get_ip(incoming_request)
+    ip_address = network.get_ip(incoming_request)
+
+    path = incoming_request.url.path
+    if path == '/v1/chat/completions':
+        path = 'c'

    new_log_item = {
        'timestamp': time.time(),
-        'method': incoming_request.method,
-        'path': incoming_request.url.path,
+        'path': path,
        'user_id': str(user['_id']),
        'security': {
            'ip': ip_address,
        },
        'details': {
            'model': model,
-            'target_url': target_url
-        }
+            'provider': provider,
+        },
+        'tokens': tokens,
    }

    inserted = await db.insert_one(new_log_item)
--- a/api/db/users.py
+++ b/api/db/users.py
@ -17,7 +17,16 @@ load_dotenv()
 with open(os.path.join(helpers.root, 'api', 'config', 'config.yml'), encoding='utf8') as f:
    credits_config = yaml.safe_load(f)

-## MONGODB Setup
+infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST')
+
+async def generate_api_key():
+    chars = string.ascii_letters + string.digits
+
+    suffix = ''.join(random.choices(chars, k=20))
+    prefix = ''.join(random.choices(chars, k=20))
+
+    new_api_key = f'nv2-{prefix}{infix}{suffix}'
+    return new_api_key

 class UserManager:
    """
@ -31,19 +40,13 @@ class UserManager:
        return self.conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
    
    async def get_all_users(self):
-        collection = self.conn[os.getenv('MONGO_NAME', 'nova-test')]['users']
+        collection = self.conn['nova-core']['users']
        return collection#.find()

    async def create(self, discord_id: str = '') -> dict:
        db = await self._get_collection('users')
-        chars = string.ascii_letters + string.digits
-
-        infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST')
-        suffix = ''.join(random.choices(chars, k=20))
-        prefix = ''.join(random.choices(chars, k=20))
-
-        new_api_key = f'nv2-{prefix}{infix}{suffix}'

+        new_api_key = await generate_api_key()
        existing_user = await self.user_by_discord_id(discord_id)
        if existing_user: # just change api key
            await db.update_one({'auth.discord': str(int(discord_id))}, {'$set': {'api_key': new_api_key}})
@ -73,7 +76,18 @@ class UserManager:

    async def user_by_discord_id(self, discord_id: str):
        db = await self._get_collection('users')
-        return await db.find_one({'auth.discord': str(int(discord_id))})
+
+        user = await db.find_one({'auth.discord': str(discord_id)})
+
+        if not user:
+            return
+
+        if user['api_key'] == '':
+            new_api_key = await generate_api_key()
+            await db.update_one({'auth.discord': str(discord_id)}, {'$set': {'api_key': new_api_key}})
+            user = await db.find_one({'auth.discord': str(discord_id)})
+
+        return user

    async def user_by_api_key(self, key: str):
        db = await self._get_collection('users')
@ -85,6 +99,7 @@ class UserManager:

    async def update_by_discord_id(self, discord_id: str, update):
        db = await self._get_collection('users')
+
        return await db.update_one({'auth.discord': str(int(discord_id))}, update)

    async def update_by_filter(self, obj_filter, update):
@ -98,7 +113,7 @@ class UserManager:
 manager = UserManager()

 async def demo():
-    user = await UserManager().create(69420)
+    user = await UserManager().create('1099385227077488700')
    print(user)

 if __name__ == '__main__':
--- a/api/handler.py
+++ b/api/handler.py
@ -22,9 +22,6 @@ with open(os.path.join('cache', 'models.json'), encoding='utf8') as f:
    models_list = json.load(f)
 models = [model['id'] for model in models_list['data']]

-with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
-    config = yaml.safe_load(f)
-
 moderation_debug_key = os.getenv('MODERATION_DEBUG_KEY')

 async def handle(incoming_request: fastapi.Request):
@ -36,10 +33,10 @@ async def handle(incoming_request: fastapi.Request):
    path = incoming_request.url.path
    path = path.replace('/v1/v1', '/v1')

-    ip_address = await network.get_ip(incoming_request)
+    ip_address = network.get_ip(incoming_request)

    if '/dashboard' in path:
-        return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
+        return await errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')

    if path.startswith('/v1/models'):
        return fastapi.responses.JSONResponse(content=models_list)
@ -79,25 +76,7 @@ async def handle(incoming_request: fastapi.Request):
    if 'account/credits' in path:
        return fastapi.responses.JSONResponse({'credits': user['credits']})

-    costs = config['costs']
-    cost = costs['other']
-
-    if 'chat/completions' in path:
-        cost = costs['chat-models'].get(payload.get('model'), cost)
-
-    role = user.get('role', 'default')
-
-    if 'enterprise' in role:
-        role_cost_multiplier = 0.1
-    else:
-        try:
-            role_cost_multiplier = config['roles'][role]['bonus']
-        except KeyError:
-            role_cost_multiplier = 1
-
-    cost = round(cost * role_cost_multiplier)
-
-    if user['credits'] < cost:
+    if user['credits'] < 1:
        return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')

    if 'DISABLE_VARS' not in key_tags:
@ -168,8 +147,6 @@ async def handle(incoming_request: fastapi.Request):
            user=user,
            path=path,
            payload=payload,
-            credits_cost=cost,
-            input_tokens=0,
            incoming_request=incoming_request,
        ),
        media_type=media_type
--- a/api/helpers/network.py
+++ b/api/helpers/network.py
@ -1,28 +1,19 @@
 import os
 import time
+
 from dotenv import load_dotenv
 from slowapi.util import get_remote_address

 load_dotenv()

-async def get_ip(request) -> str:
+def get_ip(request) -> str:
    """Get the IP address of the incoming request."""

-    xff = None
-    if request.headers.get('x-forwarded-for'):
-        xff, *_ = request.headers['x-forwarded-for'].split(', ')
-
-    possible_ips = [xff, request.headers.get('cf-connecting-ip'), request.client.host]
-    detected_ip = next((i for i in possible_ips if i), None)
-
+    detected_ip = request.headers.get('cf-connecting-ip', get_remote_address(request))
    return detected_ip

 def get_ratelimit_key(request) -> str:
    """Get the IP address of the incoming request."""
-    custom = os.environ('NO_RATELIMIT_IPS')
-    ip = get_remote_address(request)
-
-    if ip in custom:
-        return f'enterprise_{ip}'
-
-    return ip
+    
+    ip = get_ip(request)
+    return ip
--- a/api/helpers/tokens.py
+++ b/api/helpers/tokens.py
@ -2,7 +2,7 @@ import time
 import asyncio
 import tiktoken

-async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int:
+async def count_tokens_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int:
    """Return the number of tokens used by a list of messages

    Args:
@ -38,13 +38,13 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') ->
        tokens_per_name = -1  # if there's a name, the role is omitted

    elif 'gpt-3.5-turbo' in model:
-        return await count_for_messages(messages, model='gpt-3.5-turbo-0613')
+        return await count_tokens_for_messages(messages, model='gpt-3.5-turbo-0613')

    elif 'gpt-4' in model:
-        return await count_for_messages(messages, model='gpt-4-0613')
+        return await count_tokens_for_messages(messages, model='gpt-4-0613')

    else:
-        raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}.
+        raise NotImplementedError(f"""count_tokens_for_messages() is not implemented for model {model}.
 See https://github.com/openai/openai-python/blob/main/chatml.md
 for information on how messages are converted to tokens.""")
    
@ -66,8 +66,8 @@ if __name__ == '__main__':
    messages = [
        {
            'role': 'user',
-            'content': '1+1='
+            'content': 'Hi'
        }
    ]
-    print(asyncio.run(count_for_messages(messages)))
+    print(asyncio.run(count_tokens_for_messages(messages)))
    print(f'Took {(time.perf_counter() - start) * 1000}ms')
--- a/api/main.py
+++ b/api/main.py
@ -17,6 +17,8 @@ from slowapi import Limiter, _rate_limit_exceeded_handler
 import core
 import handler

+from helpers.network import get_ratelimit_key
+
 load_dotenv()

 app = fastapi.FastAPI()
@ -33,7 +35,7 @@ app.include_router(core.router)

 limiter = Limiter(
    swallow_errors=True,
-    key_func=get_remote_address,
+    key_func=get_ratelimit_key,
    default_limits=[
    '2/second',
    '30/minute',
--- a/api/providers/init.py
+++ b/api/providers/init.py
@ -1,2 +1,2 @@
-from . import ails, closed, closed4
-MODULES =    [ails, closed, closed4]
+from . import ails
+MODULES =    [ails]
--- a/api/providers/main.py
+++ b/api/providers/main.py
@ -22,7 +22,10 @@ async def main():

        for file_name in os.listdir(os.path.dirname(__file__)):
            if file_name.endswith('.py') and not file_name.startswith('_'):
-                print(file_name.split('.')[0])
+                name = file_name.split('.')[0]
+                models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS
+                
+                print(f'  {name} @ {", ".join(models)}')

        sys.exit(0)

--- a/api/providers/helpers/utils.py
+++ b/api/providers/helpers/utils.py
@ -3,6 +3,8 @@ try:
 except ModuleNotFoundError:
    from ...db import providerkeys

+# Sort the models by their value/cost/rarity.
+
 GPT_3 = [
    'gpt-3.5-turbo',
    'gpt-3.5-turbo-16k',
--- a/api/responder.py
+++ b/api/responder.py
@ -2,6 +2,7 @@

 import os
 import json
+import yaml
 import ujson
 import aiohttp
 import asyncio
@ -17,15 +18,16 @@ import load_balancing

 from helpers import errors
 from db import providerkeys
+from helpers.tokens import count_tokens_for_messages

 load_dotenv()

 CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
-
 keymanager = providerkeys.manager
-
 background_tasks: Set[asyncio.Task[Any]] = set()

+with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
+    config = yaml.safe_load(f)

 def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
    """asyncio.create_task, which prevents the task from being garbage collected.
@ -36,13 +38,10 @@ def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
    background_tasks.add(task)
    task.add_done_callback(background_tasks.discard)

-
 async def respond(
    path: str='/v1/chat/completions',
    user: dict=None,
    payload: dict=None,
-    credits_cost: int=0,
-    input_tokens: int=0,
    incoming_request: starlette.requests.Request=None,
 ):
    """
@ -72,6 +71,9 @@ async def respond(
        'timeout': 0
    }

+    input_tokens = 0
+    output_tokens = 0
+
    for _ in range(10):
        try:
            if is_chat:
@ -161,9 +163,15 @@ async def respond(
                            continue

                        if response.ok:
+                            if is_chat and not is_stream:
+                                input_tokens = client_json_response['usage']['prompt_tokens']
+                                output_tokens = client_json_response['usage']['completion_tokens']
+
                            server_json_response = client_json_response

                    if is_stream:
+                        input_tokens = await count_tokens_for_messages(payload['messages'], model=model)
+
                        chunk_no = 0
                        buffer = ''

@ -175,7 +183,7 @@ async def respond(
                            if 'azure' in provider_name:
                                chunk = chunk.replace('data: ', '', 1)

-                                if not chunk or chunk_no == 1:
+                                if not chunk.strip() or chunk_no == 1:
                                    continue

                            subchunks = chunk.split('\n\n')
@ -188,6 +196,8 @@ async def respond(
                                yield subchunk + '\n\n'

                            buffer = subchunks[-1]
+
+                        output_tokens = chunk_no
                    break

            except aiohttp.client_exceptions.ServerTimeoutError:
@ -198,7 +208,7 @@ async def respond(
        skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
        skipped_errors = ujson.dumps(skipped_errors, indent=4)
        yield await errors.yield_error(500,
-            'Sorry, our API seems to have issues connecting to our provider(s).',
+            f'Sorry, our API seems to have issues connecting to "{model}".',
            f'Please send this info to support: {skipped_errors}'
        )
        return
@ -206,13 +216,51 @@ async def respond(
    if (not is_stream) and server_json_response:
        yield json.dumps(server_json_response)

+
+    role = user.get('role', 'default')
+
+    model_multipliers = config['costs']
+    model_multiplier = model_multipliers['other']
+
+    if is_chat:
+        model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier)
+        total_tokens = input_tokens + output_tokens
+        credits_cost = total_tokens / 60
+        credits_cost = round(credits_cost * model_multiplier)
+
+        if credits_cost < 1:
+            credits_cost = 1
+
+        tokens = {
+            'input': input_tokens,
+            'output': output_tokens,
+            'total': total_tokens
+        }
+    else:
+        credits_cost = 5
+        tokens = {
+            'input': 0,
+            'output': 0,
+            'total': credits_cost
+        }
+
+    try:
+        role_cost_multiplier = config['roles'][role]['bonus']
+    except KeyError:
+        role_cost_multiplier = 1
+
+    credits_cost = round(credits_cost * role_cost_multiplier)
+
+    print(f'[bold]Credits cost[/bold]: {credits_cost}')
+
    create_background_task(
        after_request.after_request(
+            provider=provider_name,
            incoming_request=incoming_request,
            target_request=target_request,
            user=user,
            credits_cost=credits_cost,
-            input_tokens=input_tokens,
+            tokens=tokens,
            path=path,
            is_chat=is_chat,
            model=model,
--- a/rewards/settings.py
+++ b/rewards/settings.py
@ -1,5 +1,5 @@
 roles = {
-    '': '2500',
+    '': '2000',
    'lvl10': '2800',
    'lvl20': '3100',
    'lvl30': '3400',
Author	SHA1	Message	Date
NovaOSS Admins	a16ba559fb	Much cheaper API	2023-10-16 21:55:54 +00:00
NovaOSS Admins	30bd5616b8	Added token-based credit cost	2023-10-16 21:34:54 +00:00