diff --git a/api/config/config.yml b/api/config/config.yml new file mode 100644 index 0000000..4d3fefb --- /dev/null +++ b/api/config/config.yml @@ -0,0 +1,50 @@ +max-credits: 100001 +max-credits-owner: 694201337 +start-credits: 1000 + +costs: + other: 10 + + chat-models: + gpt-3: 10 + gpt-4: 30 + gpt-4-32k: 100 + +# bonuses are multiplier for costs: +# final_cost = cost * bonus +roles: + owner: + bonus: 0.1 + rate_limit: + other: 60 + gpt-3: 60 + gpt-4: 35 + gpt-4-32k: 5 + admin: + bonus: 0.3 + rate_limit: + other: 60 + gpt-3: 60 + gpt-4: 30 + gpt-4-32k: 4 + helper: + bonus: 0.4 + rate_limit: + other: 60 + gpt-3: 60 + gpt-4: 25 + gpt-4-32k: 3 + booster: + bonus: 0.5 + rate_limit: + other: 60 + gpt-3: 60 + gpt-4: 20 + gpt-4-32k: 2 + default: + bonus: 0 + rate_limit: + other: 60 + gpt-3: 60 + gpt-4: 15 + gpt-4-32k: 1 \ No newline at end of file diff --git a/api/config/credits.yml b/api/config/credits.yml deleted file mode 100644 index 29eeaa9..0000000 --- a/api/config/credits.yml +++ /dev/null @@ -1,19 +0,0 @@ -max-credits: 100001 -max-credits-owner: 694201337 -start-credits: 1000 - -costs: - other: 10 - - chat-models: - gpt-3: 10 - gpt-4: 30 - gpt-4-32k: 100 - -# bonuses are multiplier for costs: -# final_cost = cost * bonus -bonuses: - owner: 0.1 - admin: 0.3 - helper: 0.4 - booster: 0.5 diff --git a/api/core.py b/api/core.py index 4a652cf..8946516 100644 --- a/api/core.py +++ b/api/core.py @@ -12,7 +12,6 @@ from dotenv import load_dotenv load_dotenv() router = fastapi.APIRouter(tags=['core']) - async def check_core_auth(request): """ diff --git a/api/main.py b/api/main.py index 8bc8810..6815984 100644 --- a/api/main.py +++ b/api/main.py @@ -5,7 +5,6 @@ import fastapi from rich import print from dotenv import load_dotenv from fastapi.middleware.cors import CORSMiddleware - import core import transfer diff --git a/api/streaming.py b/api/streaming.py index 76c3d2e..dc8dfea 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -6,6 +6,7 @@ import dhooks import asyncio import aiohttp import starlette +import datetime from rich import print from dotenv import load_dotenv @@ -19,9 +20,19 @@ from db import logs from db.users import UserManager from db.stats import StatsManager from helpers import network, chat, errors +import yaml + load_dotenv() +## Loads config which contains rate limits +with open('config/config.yml', encoding='utf8') as f: + config = yaml.safe_load(f) + +## Where all rate limit requested data will be stored. +# Rate limit data is **not persistent**. I.E It will be deleted on server stop/restart. +user_last_request_time = {} + DEMO_PAYLOAD = { 'model': 'gpt-3.5-turbo', 'messages': [ @@ -68,6 +79,7 @@ async def stream( incoming_request: starlette.requests.Request=None, ): """Stream the completions request. Sends data in chunks + If not streaming, it sends the result in its entirety. Args: path (str, optional): URL Path. Defaults to '/v1/chat/completions'. @@ -77,8 +89,27 @@ async def stream( input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0. incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None. """ + + if user: + role = user.get('role', 'default') + rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10) + + last_request_time = user_last_request_time.get(user['api_key']) + time_since_last_request = datetime.now() - last_request_time + + if time_since_last_request < datetime.timedelta(seconds=rate_limit): + yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ") + return + else: + user_last_request_time[user['_id']] = datetime.now() + + ## Setup managers db = UserManager() stats = StatsManager() + + ## Check if breaching rate limit + + is_chat = False is_stream = payload.get('stream', False) diff --git a/api/transfer.py b/api/transfer.py index 61483e6..5e643a6 100644 --- a/api/transfer.py +++ b/api/transfer.py @@ -16,8 +16,8 @@ load_dotenv() models_list = json.load(open('models.json')) -with open('config/credits.yml', encoding='utf8') as f: - credits_config = yaml.safe_load(f) +with open('config/config.yml', encoding='utf8') as f: + config = yaml.safe_load(f) async def handle(incoming_request): """ @@ -59,7 +59,7 @@ async def handle(incoming_request): if path_contains_models: return fastapi.responses.JSONResponse(content=models_list) - costs = credits_config['costs'] + costs = config['costs'] cost = costs['other'] if 'chat/completions' in path: @@ -74,7 +74,7 @@ async def handle(incoming_request): if policy_violation: return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.') - role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1) + role_cost_multiplier = config['roles'][user['role']]['bonus'] cost = round(cost * role_cost_multiplier) if user['credits'] < cost: