Ratelimit system 🎉

This commit is contained in:
Game_Time 2023-08-14 13:47:03 +05:00
parent f896b18968
commit de954998d2
6 changed files with 85 additions and 25 deletions

50
api/config/config.yml Normal file
View file

@ -0,0 +1,50 @@
max-credits: 100001
max-credits-owner: 694201337
start-credits: 1000
costs:
other: 10
chat-models:
gpt-3: 10
gpt-4: 30
gpt-4-32k: 100
# bonuses are multiplier for costs:
# final_cost = cost * bonus
roles:
owner:
bonus: 0.1
rate_limit:
other: 60
gpt-3: 60
gpt-4: 35
gpt-4-32k: 5
admin:
bonus: 0.3
rate_limit:
other: 60
gpt-3: 60
gpt-4: 30
gpt-4-32k: 4
helper:
bonus: 0.4
rate_limit:
other: 60
gpt-3: 60
gpt-4: 25
gpt-4-32k: 3
booster:
bonus: 0.5
rate_limit:
other: 60
gpt-3: 60
gpt-4: 20
gpt-4-32k: 2
default:
bonus: 0
rate_limit:
other: 60
gpt-3: 60
gpt-4: 15
gpt-4-32k: 1

View file

@ -1,19 +0,0 @@
max-credits: 100001
max-credits-owner: 694201337
start-credits: 1000
costs:
other: 10
chat-models:
gpt-3: 10
gpt-4: 30
gpt-4-32k: 100
# bonuses are multiplier for costs:
# final_cost = cost * bonus
bonuses:
owner: 0.1
admin: 0.3
helper: 0.4
booster: 0.5

View file

@ -12,7 +12,6 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
router = fastapi.APIRouter(tags=['core']) router = fastapi.APIRouter(tags=['core'])
async def check_core_auth(request): async def check_core_auth(request):
""" """

View file

@ -5,7 +5,6 @@ import fastapi
from rich import print from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
import core import core
import transfer import transfer

View file

@ -6,6 +6,7 @@ import dhooks
import asyncio import asyncio
import aiohttp import aiohttp
import starlette import starlette
import datetime
from rich import print from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
@ -19,9 +20,19 @@ from db import logs
from db.users import UserManager from db.users import UserManager
from db.stats import StatsManager from db.stats import StatsManager
from helpers import network, chat, errors from helpers import network, chat, errors
import yaml
load_dotenv() load_dotenv()
## Loads config which contains rate limits
with open('config/config.yml', encoding='utf8') as f:
config = yaml.safe_load(f)
## Where all rate limit requested data will be stored.
# Rate limit data is **not persistent**. I.E It will be deleted on server stop/restart.
user_last_request_time = {}
DEMO_PAYLOAD = { DEMO_PAYLOAD = {
'model': 'gpt-3.5-turbo', 'model': 'gpt-3.5-turbo',
'messages': [ 'messages': [
@ -68,6 +79,7 @@ async def stream(
incoming_request: starlette.requests.Request=None, incoming_request: starlette.requests.Request=None,
): ):
"""Stream the completions request. Sends data in chunks """Stream the completions request. Sends data in chunks
If not streaming, it sends the result in its entirety.
Args: Args:
path (str, optional): URL Path. Defaults to '/v1/chat/completions'. path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
@ -77,8 +89,27 @@ async def stream(
input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0. input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None. incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
""" """
if user:
role = user.get('role', 'default')
rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)
last_request_time = user_last_request_time.get(user['api_key'])
time_since_last_request = datetime.now() - last_request_time
if time_since_last_request < datetime.timedelta(seconds=rate_limit):
yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ")
return
else:
user_last_request_time[user['_id']] = datetime.now()
## Setup managers
db = UserManager() db = UserManager()
stats = StatsManager() stats = StatsManager()
## Check if breaching rate limit
is_chat = False is_chat = False
is_stream = payload.get('stream', False) is_stream = payload.get('stream', False)

View file

@ -16,8 +16,8 @@ load_dotenv()
models_list = json.load(open('models.json')) models_list = json.load(open('models.json'))
with open('config/credits.yml', encoding='utf8') as f: with open('config/config.yml', encoding='utf8') as f:
credits_config = yaml.safe_load(f) config = yaml.safe_load(f)
async def handle(incoming_request): async def handle(incoming_request):
""" """
@ -59,7 +59,7 @@ async def handle(incoming_request):
if path_contains_models: if path_contains_models:
return fastapi.responses.JSONResponse(content=models_list) return fastapi.responses.JSONResponse(content=models_list)
costs = credits_config['costs'] costs = config['costs']
cost = costs['other'] cost = costs['other']
if 'chat/completions' in path: if 'chat/completions' in path:
@ -74,7 +74,7 @@ async def handle(incoming_request):
if policy_violation: if policy_violation:
return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.') return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1) role_cost_multiplier = config['roles'][user['role']]['bonus']
cost = round(cost * role_cost_multiplier) cost = round(cost * role_cost_multiplier)
if user['credits'] < cost: if user['credits'] < cost: