diff --git a/README.md b/README.md index 48dc42b..40c19f4 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ This one's code can be found in the following repository: [github.com/novaoss/no # Setup ## Requirements +- **Rust** (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`) - newest **Python** version - newest Python **pip** version - **MongoDB** database diff --git a/api/after_request.py b/api/after_request.py index 7dbb3af..1db4cc6 100644 --- a/api/after_request.py +++ b/api/after_request.py @@ -5,21 +5,28 @@ async def after_request( incoming_request: dict, target_request: dict, user: dict, + tokens: dict, credits_cost: int, - input_tokens: int, path: str, is_chat: bool, model: str, + provider: str, ) -> None: """Runs after every request.""" if user and incoming_request: - await logs.log_api_request(user=user, incoming_request=incoming_request, target_url=target_request['url']) + await logs.log_api_request( + user=user, + incoming_request=incoming_request, + target_url=target_request['url'], + tokens=tokens, + provider=provider + ) if credits_cost and user: await users.manager.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}}) - ip_address = await network.get_ip(incoming_request) + ip_address = network.get_ip(incoming_request) await stats.manager.add_date() # await stats.manager.add_ip_address(ip_address) @@ -28,4 +35,3 @@ async def after_request( if is_chat: await stats.manager.add_model(model) - await stats.manager.add_tokens(input_tokens, model) diff --git a/api/config/config.yml b/api/config/config.yml index d12bd09..5a8f5c5 100644 --- a/api/config/config.yml +++ b/api/config/config.yml @@ -5,9 +5,9 @@ costs: other: 5 # Other endpoints chat-models: # chat completions - gpt-4-32k: 200 - gpt-4: 50 - gpt-3: 10 + gpt-4-32k: 100 + gpt-4: 20 + gpt-3: 5 ## Roles Explanation @@ -17,11 +17,11 @@ costs: roles: owner: bonus: 0 - admin: + enterprise: bonus: 0.2 - helper: + admin: bonus: 0.4 - booster: + helper: bonus: 0.6 default: bonus: 1.0 diff --git a/api/db/logs.py b/api/db/logs.py index e5f5798..edad3d4 100644 --- a/api/db/logs.py +++ b/api/db/logs.py @@ -19,7 +19,7 @@ conn = AsyncIOMotorClient(os.environ['MONGO_URI']) async def _get_collection(collection_name: str): return conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name] -async def log_api_request(user: dict, incoming_request, target_url: str): +async def log_api_request(user: dict, incoming_request, target_url: str, tokens: dict, provider: str) -> dict: """Logs the API Request into the database.""" db = await _get_collection('logs') @@ -32,20 +32,24 @@ async def log_api_request(user: dict, incoming_request, target_url: str): pass model = payload.get('model') - ip_address = await network.get_ip(incoming_request) + ip_address = network.get_ip(incoming_request) + + path = incoming_request.url.path + if path == '/v1/chat/completions': + path = 'c' new_log_item = { 'timestamp': time.time(), - 'method': incoming_request.method, - 'path': incoming_request.url.path, + 'path': path, 'user_id': str(user['_id']), 'security': { 'ip': ip_address, }, 'details': { 'model': model, - 'target_url': target_url - } + 'provider': provider, + }, + 'tokens': tokens, } inserted = await db.insert_one(new_log_item) diff --git a/api/db/users.py b/api/db/users.py index 691da68..36fdde3 100644 --- a/api/db/users.py +++ b/api/db/users.py @@ -17,7 +17,16 @@ load_dotenv() with open(os.path.join(helpers.root, 'api', 'config', 'config.yml'), encoding='utf8') as f: credits_config = yaml.safe_load(f) -## MONGODB Setup +infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST') + +async def generate_api_key(): + chars = string.ascii_letters + string.digits + + suffix = ''.join(random.choices(chars, k=20)) + prefix = ''.join(random.choices(chars, k=20)) + + new_api_key = f'nv2-{prefix}{infix}{suffix}' + return new_api_key class UserManager: """ @@ -31,19 +40,13 @@ class UserManager: return self.conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name] async def get_all_users(self): - collection = self.conn[os.getenv('MONGO_NAME', 'nova-test')]['users'] + collection = self.conn['nova-core']['users'] return collection#.find() async def create(self, discord_id: str = '') -> dict: db = await self._get_collection('users') - chars = string.ascii_letters + string.digits - - infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST') - suffix = ''.join(random.choices(chars, k=20)) - prefix = ''.join(random.choices(chars, k=20)) - - new_api_key = f'nv2-{prefix}{infix}{suffix}' + new_api_key = await generate_api_key() existing_user = await self.user_by_discord_id(discord_id) if existing_user: # just change api key await db.update_one({'auth.discord': str(int(discord_id))}, {'$set': {'api_key': new_api_key}}) @@ -73,7 +76,18 @@ class UserManager: async def user_by_discord_id(self, discord_id: str): db = await self._get_collection('users') - return await db.find_one({'auth.discord': str(int(discord_id))}) + + user = await db.find_one({'auth.discord': str(discord_id)}) + + if not user: + return + + if user['api_key'] == '': + new_api_key = await generate_api_key() + await db.update_one({'auth.discord': str(discord_id)}, {'$set': {'api_key': new_api_key}}) + user = await db.find_one({'auth.discord': str(discord_id)}) + + return user async def user_by_api_key(self, key: str): db = await self._get_collection('users') @@ -85,6 +99,7 @@ class UserManager: async def update_by_discord_id(self, discord_id: str, update): db = await self._get_collection('users') + return await db.update_one({'auth.discord': str(int(discord_id))}, update) async def update_by_filter(self, obj_filter, update): @@ -98,7 +113,7 @@ class UserManager: manager = UserManager() async def demo(): - user = await UserManager().create(69420) + user = await UserManager().create('1099385227077488700') print(user) if __name__ == '__main__': diff --git a/api/handler.py b/api/handler.py index 168aa09..ff946f6 100644 --- a/api/handler.py +++ b/api/handler.py @@ -22,9 +22,6 @@ with open(os.path.join('cache', 'models.json'), encoding='utf8') as f: models_list = json.load(f) models = [model['id'] for model in models_list['data']] -with open(os.path.join('config', 'config.yml'), encoding='utf8') as f: - config = yaml.safe_load(f) - moderation_debug_key = os.getenv('MODERATION_DEBUG_KEY') async def handle(incoming_request: fastapi.Request): @@ -36,10 +33,10 @@ async def handle(incoming_request: fastapi.Request): path = incoming_request.url.path path = path.replace('/v1/v1', '/v1') - ip_address = await network.get_ip(incoming_request) + ip_address = network.get_ip(incoming_request) if '/dashboard' in path: - return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.') + return await errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.') if path.startswith('/v1/models'): return fastapi.responses.JSONResponse(content=models_list) @@ -79,25 +76,7 @@ async def handle(incoming_request: fastapi.Request): if 'account/credits' in path: return fastapi.responses.JSONResponse({'credits': user['credits']}) - costs = config['costs'] - cost = costs['other'] - - if 'chat/completions' in path: - cost = costs['chat-models'].get(payload.get('model'), cost) - - role = user.get('role', 'default') - - if 'enterprise' in role: - role_cost_multiplier = 0.1 - else: - try: - role_cost_multiplier = config['roles'][role]['bonus'] - except KeyError: - role_cost_multiplier = 1 - - cost = round(cost * role_cost_multiplier) - - if user['credits'] < cost: + if user['credits'] < 1: return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.') if 'DISABLE_VARS' not in key_tags: @@ -168,8 +147,6 @@ async def handle(incoming_request: fastapi.Request): user=user, path=path, payload=payload, - credits_cost=cost, - input_tokens=0, incoming_request=incoming_request, ), media_type=media_type diff --git a/api/helpers/network.py b/api/helpers/network.py index 91c0cde..5bc8ce4 100644 --- a/api/helpers/network.py +++ b/api/helpers/network.py @@ -1,28 +1,19 @@ import os import time + from dotenv import load_dotenv from slowapi.util import get_remote_address load_dotenv() -async def get_ip(request) -> str: +def get_ip(request) -> str: """Get the IP address of the incoming request.""" - xff = None - if request.headers.get('x-forwarded-for'): - xff, *_ = request.headers['x-forwarded-for'].split(', ') - - possible_ips = [xff, request.headers.get('cf-connecting-ip'), request.client.host] - detected_ip = next((i for i in possible_ips if i), None) - + detected_ip = request.headers.get('cf-connecting-ip', get_remote_address(request)) return detected_ip def get_ratelimit_key(request) -> str: """Get the IP address of the incoming request.""" - custom = os.environ('NO_RATELIMIT_IPS') - ip = get_remote_address(request) - - if ip in custom: - return f'enterprise_{ip}' - - return ip \ No newline at end of file + + ip = get_ip(request) + return ip diff --git a/api/helpers/tokens.py b/api/helpers/tokens.py index 86fe04b..74f129a 100644 --- a/api/helpers/tokens.py +++ b/api/helpers/tokens.py @@ -2,7 +2,7 @@ import time import asyncio import tiktoken -async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int: +async def count_tokens_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int: """Return the number of tokens used by a list of messages Args: @@ -38,13 +38,13 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> tokens_per_name = -1 # if there's a name, the role is omitted elif 'gpt-3.5-turbo' in model: - return await count_for_messages(messages, model='gpt-3.5-turbo-0613') + return await count_tokens_for_messages(messages, model='gpt-3.5-turbo-0613') elif 'gpt-4' in model: - return await count_for_messages(messages, model='gpt-4-0613') + return await count_tokens_for_messages(messages, model='gpt-4-0613') else: - raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}. + raise NotImplementedError(f"""count_tokens_for_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") @@ -66,8 +66,8 @@ if __name__ == '__main__': messages = [ { 'role': 'user', - 'content': '1+1=' + 'content': 'Hi' } ] - print(asyncio.run(count_for_messages(messages))) + print(asyncio.run(count_tokens_for_messages(messages))) print(f'Took {(time.perf_counter() - start) * 1000}ms') diff --git a/api/main.py b/api/main.py index 997160e..97f3597 100644 --- a/api/main.py +++ b/api/main.py @@ -17,6 +17,8 @@ from slowapi import Limiter, _rate_limit_exceeded_handler import core import handler +from helpers.network import get_ratelimit_key + load_dotenv() app = fastapi.FastAPI() @@ -33,7 +35,7 @@ app.include_router(core.router) limiter = Limiter( swallow_errors=True, - key_func=get_remote_address, + key_func=get_ratelimit_key, default_limits=[ '2/second', '30/minute', diff --git a/api/providers/__init__.py b/api/providers/__init__.py index 31462d2..334654b 100644 --- a/api/providers/__init__.py +++ b/api/providers/__init__.py @@ -1,2 +1,2 @@ -from . import ails, closed, closed4 -MODULES = [ails, closed, closed4] +from . import ails +MODULES = [ails] diff --git a/api/providers/__main__.py b/api/providers/__main__.py index c3ce47d..33e09bc 100644 --- a/api/providers/__main__.py +++ b/api/providers/__main__.py @@ -22,7 +22,10 @@ async def main(): for file_name in os.listdir(os.path.dirname(__file__)): if file_name.endswith('.py') and not file_name.startswith('_'): - print(file_name.split('.')[0]) + name = file_name.split('.')[0] + models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS + + print(f' {name} @ {", ".join(models)}') sys.exit(0) diff --git a/api/providers/helpers/utils.py b/api/providers/helpers/utils.py index 884ed4f..af7c8bb 100644 --- a/api/providers/helpers/utils.py +++ b/api/providers/helpers/utils.py @@ -3,6 +3,8 @@ try: except ModuleNotFoundError: from ...db import providerkeys +# Sort the models by their value/cost/rarity. + GPT_3 = [ 'gpt-3.5-turbo', 'gpt-3.5-turbo-16k', diff --git a/api/responder.py b/api/responder.py index ed5ee07..ef33735 100644 --- a/api/responder.py +++ b/api/responder.py @@ -2,6 +2,7 @@ import os import json +import yaml import ujson import aiohttp import asyncio @@ -17,15 +18,16 @@ import load_balancing from helpers import errors from db import providerkeys +from helpers.tokens import count_tokens_for_messages load_dotenv() CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated'] - keymanager = providerkeys.manager - background_tasks: Set[asyncio.Task[Any]] = set() +with open(os.path.join('config', 'config.yml'), encoding='utf8') as f: + config = yaml.safe_load(f) def create_background_task(coro: Coroutine[Any, Any, Any]) -> None: """asyncio.create_task, which prevents the task from being garbage collected. @@ -36,13 +38,10 @@ def create_background_task(coro: Coroutine[Any, Any, Any]) -> None: background_tasks.add(task) task.add_done_callback(background_tasks.discard) - async def respond( path: str='/v1/chat/completions', user: dict=None, payload: dict=None, - credits_cost: int=0, - input_tokens: int=0, incoming_request: starlette.requests.Request=None, ): """ @@ -72,6 +71,9 @@ async def respond( 'timeout': 0 } + input_tokens = 0 + output_tokens = 0 + for _ in range(10): try: if is_chat: @@ -161,9 +163,15 @@ async def respond( continue if response.ok: + if is_chat and not is_stream: + input_tokens = client_json_response['usage']['prompt_tokens'] + output_tokens = client_json_response['usage']['completion_tokens'] + server_json_response = client_json_response if is_stream: + input_tokens = await count_tokens_for_messages(payload['messages'], model=model) + chunk_no = 0 buffer = '' @@ -175,7 +183,7 @@ async def respond( if 'azure' in provider_name: chunk = chunk.replace('data: ', '', 1) - if not chunk or chunk_no == 1: + if not chunk.strip() or chunk_no == 1: continue subchunks = chunk.split('\n\n') @@ -188,6 +196,8 @@ async def respond( yield subchunk + '\n\n' buffer = subchunks[-1] + + output_tokens = chunk_no break except aiohttp.client_exceptions.ServerTimeoutError: @@ -198,7 +208,7 @@ async def respond( skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0} skipped_errors = ujson.dumps(skipped_errors, indent=4) yield await errors.yield_error(500, - 'Sorry, our API seems to have issues connecting to our provider(s).', + f'Sorry, our API seems to have issues connecting to "{model}".', f'Please send this info to support: {skipped_errors}' ) return @@ -206,13 +216,48 @@ async def respond( if (not is_stream) and server_json_response: yield json.dumps(server_json_response) + + role = user.get('role', 'default') + + model_multipliers = config['costs'] + model_multiplier = model_multipliers['other'] + + if is_chat: + model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier) + total_tokens = input_tokens + output_tokens + credits_cost = total_tokens / 10 + credits_cost = round(credits_cost * model_multiplier) + + tokens = { + 'input': input_tokens, + 'output': output_tokens, + 'total': total_tokens + } + else: + credits_cost = 5 + tokens = { + 'input': 0, + 'output': 0, + 'total': credits_cost + } + + try: + role_cost_multiplier = config['roles'][role]['bonus'] + except KeyError: + role_cost_multiplier = 1 + + credits_cost = round(credits_cost * role_cost_multiplier) + + print(f'[bold]Credits cost[/bold]: {credits_cost}') + create_background_task( after_request.after_request( + provider=provider_name, incoming_request=incoming_request, target_request=target_request, user=user, credits_cost=credits_cost, - input_tokens=input_tokens, + tokens=tokens, path=path, is_chat=is_chat, model=model, diff --git a/rewards/settings.py b/rewards/settings.py index f1ce953..f8da5a8 100644 --- a/rewards/settings.py +++ b/rewards/settings.py @@ -1,5 +1,5 @@ roles = { - '': '2500', + '': '2000', 'lvl10': '2800', 'lvl20': '3100', 'lvl30': '3400',