Compare commits

...

2 commits

Author SHA1 Message Date
Game_Time 69bc2e33de changes to rate limits, check commit desc
- made comments in config better
- added todo to explain weird config
- removed some bad stuff in streaming.py
2023-08-14 14:06:25 +05:00
Game_Time de954998d2 Ratelimit system 🎉 2023-08-14 13:47:03 +05:00
6 changed files with 94 additions and 25 deletions

59
api/config/config.yml Normal file
View file

@ -0,0 +1,59 @@
max-credits: 100001
max-credits-owner: 694201337
start-credits: 1000
costs:
other: 10
chat-models:
gpt-3: 10
gpt-4: 30
gpt-4-32k: 100
## Roles Explanation
# Bonuses: They are a multiplier for costs
# They work like: final_cost = cost * bonus
# Rate limits: Limit the requests of the user
# The rate limit is by how many seconds until a new request can be done.
## TODO: Setup proper rate limit settings for each role
## Current settings are:
## **NOT MEANT FOR PRODUCTION. DO NOT USE WITH THESE SETTINGS.**
roles:
owner:
bonus: 0.1
rate_limit:
other: 60
gpt-3: 60
gpt-4: 35
gpt-4-32k: 5
admin:
bonus: 0.3
rate_limit:
other: 60
gpt-3: 60
gpt-4: 30
gpt-4-32k: 4
helper:
bonus: 0.4
rate_limit:
other: 60
gpt-3: 60
gpt-4: 25
gpt-4-32k: 3
booster:
bonus: 0.5
rate_limit:
other: 60
gpt-3: 60
gpt-4: 20
gpt-4-32k: 2
default:
bonus: 0
rate_limit:
other: 60
gpt-3: 60
gpt-4: 15
gpt-4-32k: 1

View file

@ -1,19 +0,0 @@
max-credits: 100001
max-credits-owner: 694201337
start-credits: 1000
costs:
other: 10
chat-models:
gpt-3: 10
gpt-4: 30
gpt-4-32k: 100
# bonuses are multiplier for costs:
# final_cost = cost * bonus
bonuses:
owner: 0.1
admin: 0.3
helper: 0.4
booster: 0.5

View file

@ -12,7 +12,6 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
router = fastapi.APIRouter(tags=['core']) router = fastapi.APIRouter(tags=['core'])
async def check_core_auth(request): async def check_core_auth(request):
""" """

View file

@ -5,7 +5,6 @@ import fastapi
from rich import print from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
import core import core
import transfer import transfer

View file

@ -6,6 +6,7 @@ import dhooks
import asyncio import asyncio
import aiohttp import aiohttp
import starlette import starlette
import datetime
from rich import print from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
@ -19,9 +20,19 @@ from db import logs
from db.users import UserManager from db.users import UserManager
from db.stats import StatsManager from db.stats import StatsManager
from helpers import network, chat, errors from helpers import network, chat, errors
import yaml
load_dotenv() load_dotenv()
## Loads config which contains rate limits
with open('config/config.yml', encoding='utf8') as f:
config = yaml.safe_load(f)
## Where all rate limit requested data will be stored.
# Rate limit data is **not persistent** (It will be deleted on server stop/restart).
user_last_request_time = {}
DEMO_PAYLOAD = { DEMO_PAYLOAD = {
'model': 'gpt-3.5-turbo', 'model': 'gpt-3.5-turbo',
'messages': [ 'messages': [
@ -68,6 +79,7 @@ async def stream(
incoming_request: starlette.requests.Request=None, incoming_request: starlette.requests.Request=None,
): ):
"""Stream the completions request. Sends data in chunks """Stream the completions request. Sends data in chunks
If not streaming, it sends the result in its entirety.
Args: Args:
path (str, optional): URL Path. Defaults to '/v1/chat/completions'. path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
@ -77,8 +89,27 @@ async def stream(
input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0. input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None. incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
""" """
## Rate limits user.
# If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down
# last request time for future requests.
if user:
role = user.get('role', 'default')
rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)
last_request_time = user_last_request_time.get(user['api_key'])
time_since_last_request = datetime.now() - last_request_time
if time_since_last_request < datetime.timedelta(seconds=rate_limit):
yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ")
return
else:
user_last_request_time[user['_id']] = datetime.now()
## Setup managers
db = UserManager() db = UserManager()
stats = StatsManager() stats = StatsManager()
is_chat = False is_chat = False
is_stream = payload.get('stream', False) is_stream = payload.get('stream', False)

View file

@ -16,8 +16,8 @@ load_dotenv()
models_list = json.load(open('models.json')) models_list = json.load(open('models.json'))
with open('config/credits.yml', encoding='utf8') as f: with open('config/config.yml', encoding='utf8') as f:
credits_config = yaml.safe_load(f) config = yaml.safe_load(f)
async def handle(incoming_request): async def handle(incoming_request):
""" """
@ -59,7 +59,7 @@ async def handle(incoming_request):
if path_contains_models: if path_contains_models:
return fastapi.responses.JSONResponse(content=models_list) return fastapi.responses.JSONResponse(content=models_list)
costs = credits_config['costs'] costs = config['costs']
cost = costs['other'] cost = costs['other']
if 'chat/completions' in path: if 'chat/completions' in path:
@ -74,7 +74,7 @@ async def handle(incoming_request):
if policy_violation: if policy_violation:
return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.') return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1) role_cost_multiplier = config['roles'][user['role']]['bonus']
cost = round(cost * role_cost_multiplier) cost = round(cost * role_cost_multiplier)
if user['credits'] < cost: if user['credits'] < cost: