changes to rate limits, check commit desc

- made comments in config better - added todo to explain weird config - removed some bad stuff in streaming.py
Ratelimit system 🎉
2024-11-25 20:43:56 +01:00 · 2023-08-14 14:06:25 +05:00 · 2023-08-14 13:47:03 +05:00
6 changed files with 94 additions and 25 deletions
--- a/api/config/config.yml
+++ b/api/config/config.yml
@ -0,0 +1,59 @@
 max-credits: 100001
 max-credits-owner: 694201337
 start-credits: 1000
 costs:
  other: 10
  chat-models:
    gpt-3: 10
    gpt-4: 30
    gpt-4-32k: 100
 ## Roles Explanation
 # Bonuses: They are a multiplier for costs
 #          They work like: final_cost = cost * bonus
 # Rate limits: Limit the requests of the user
 #              The rate limit is by how many seconds until a new request can be done.
 ## TODO: Setup proper rate limit settings for each role
 ##       Current settings are:
 ##       **NOT MEANT FOR PRODUCTION. DO NOT USE WITH THESE SETTINGS.**
 roles:
  owner:
    bonus: 0.1
    rate_limit: 
      other: 60
      gpt-3: 60
      gpt-4: 35
      gpt-4-32k: 5
  admin:
    bonus: 0.3
    rate_limit: 
      other: 60
      gpt-3: 60
      gpt-4: 30
      gpt-4-32k: 4
  helper:
    bonus: 0.4
    rate_limit: 
      other: 60
      gpt-3: 60
      gpt-4: 25
      gpt-4-32k: 3
  booster:
    bonus: 0.5
    rate_limit: 
      other: 60 
      gpt-3: 60
      gpt-4: 20
      gpt-4-32k: 2
  default:
    bonus: 0
    rate_limit: 
      other: 60 
      gpt-3: 60
      gpt-4: 15
      gpt-4-32k: 1
--- a/api/config/credits.yml
+++ b/api/config/credits.yml
@ -1,19 +0,0 @@
 max-credits: 100001
 max-credits-owner: 694201337
 start-credits: 1000
 costs:
  other: 10
  chat-models:
    gpt-3: 10
    gpt-4: 30
    gpt-4-32k: 100
 # bonuses are multiplier for costs:
 # final_cost = cost * bonus
 bonuses:
  owner: 0.1
  admin: 0.3
  helper: 0.4
  booster: 0.5
--- a/api/core.py
+++ b/api/core.py
@ -12,7 +12,6 @@ from dotenv import load_dotenv
 load_dotenv()
 router = fastapi.APIRouter(tags=['core'])
 async def check_core_auth(request):
    """
--- a/api/main.py
+++ b/api/main.py
@ -5,7 +5,6 @@ import fastapi
 from rich import print
 from dotenv import load_dotenv
 from fastapi.middleware.cors import CORSMiddleware
 import core
 import transfer
--- a/api/streaming.py
+++ b/api/streaming.py
@ -6,6 +6,7 @@ import dhooks
 import asyncio
 import aiohttp
 import starlette
 import datetime
 from rich import print
 from dotenv import load_dotenv
@ -19,9 +20,19 @@ from db import logs
 from db.users import UserManager
 from db.stats import StatsManager
 from helpers import network, chat, errors
 import yaml
 load_dotenv()
 ## Loads config which contains rate limits
 with open('config/config.yml', encoding='utf8') as f:
    config = yaml.safe_load(f)
 ## Where all rate limit requested data will be stored.
 # Rate limit data is **not persistent** (It will be deleted on server stop/restart).
 user_last_request_time = {}
 DEMO_PAYLOAD = {
    'model': 'gpt-3.5-turbo',
    'messages': [
@ -68,6 +79,7 @@ async def stream(
    incoming_request: starlette.requests.Request=None,
 ):
    """Stream the completions request. Sends data in chunks
    If not streaming, it sends the result in its entirety.
    Args:
        path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
@ -77,8 +89,27 @@ async def stream(
        input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
        incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
    """
    ## Rate limits user.
    # If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down
    # last request time for future requests.
    if user:
        role = user.get('role', 'default')
        rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)
        last_request_time = user_last_request_time.get(user['api_key'])
        time_since_last_request = datetime.now() - last_request_time
        if time_since_last_request < datetime.timedelta(seconds=rate_limit):
            yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ")
            return
        else:
            user_last_request_time[user['_id']] = datetime.now()
    ## Setup managers
    db = UserManager()
    stats = StatsManager()
    is_chat = False
    is_stream = payload.get('stream', False)
--- a/api/transfer.py
+++ b/api/transfer.py
@ -16,8 +16,8 @@ load_dotenv()
 models_list = json.load(open('models.json'))
-with open('config/credits.yml', encoding='utf8') as f:
+with open('config/config.yml', encoding='utf8') as f:
-    credits_config = yaml.safe_load(f)
+    config = yaml.safe_load(f)
 async def handle(incoming_request):
    """
@ -59,7 +59,7 @@ async def handle(incoming_request):
    if path_contains_models:
        return fastapi.responses.JSONResponse(content=models_list)
-    costs = credits_config['costs']
+    costs = config['costs']
    cost = costs['other']
    if 'chat/completions' in path:
@ -74,7 +74,7 @@ async def handle(incoming_request):
    if policy_violation:
        return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
-    role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1)
+    role_cost_multiplier = config['roles'][user['role']]['bonus']
    cost = round(cost * role_cost_multiplier)
    if user['credits'] < cost:
Author	SHA1	Message	Date
Game_Time	69bc2e33de	changes to rate limits, check commit desc - made comments in config better - added todo to explain weird config - removed some bad stuff in streaming.py	2023-08-14 14:06:25 +05:00
Game_Time	de954998d2	Ratelimit system 🎉	2023-08-14 13:47:03 +05:00