Compare commits

..

2 commits

Author SHA1 Message Date
Game_Time 69bc2e33de changes to rate limits, check commit desc
- made comments in config better
- added todo to explain weird config
- removed some bad stuff in streaming.py
2023-08-14 14:06:25 +05:00
Game_Time de954998d2 Ratelimit system 🎉 2023-08-14 13:47:03 +05:00
6 changed files with 94 additions and 25 deletions

59
api/config/config.yml Normal file
View file

@ -0,0 +1,59 @@
max-credits: 100001
max-credits-owner: 694201337
start-credits: 1000
costs:
other: 10
chat-models:
gpt-3: 10
gpt-4: 30
gpt-4-32k: 100
## Roles Explanation
# Bonuses: They are a multiplier for costs
# They work like: final_cost = cost * bonus
# Rate limits: Limit the requests of the user
# The rate limit is by how many seconds until a new request can be done.
## TODO: Setup proper rate limit settings for each role
## Current settings are:
## **NOT MEANT FOR PRODUCTION. DO NOT USE WITH THESE SETTINGS.**
roles:
owner:
bonus: 0.1
rate_limit:
other: 60
gpt-3: 60
gpt-4: 35
gpt-4-32k: 5
admin:
bonus: 0.3
rate_limit:
other: 60
gpt-3: 60
gpt-4: 30
gpt-4-32k: 4
helper:
bonus: 0.4
rate_limit:
other: 60
gpt-3: 60
gpt-4: 25
gpt-4-32k: 3
booster:
bonus: 0.5
rate_limit:
other: 60
gpt-3: 60
gpt-4: 20
gpt-4-32k: 2
default:
bonus: 0
rate_limit:
other: 60
gpt-3: 60
gpt-4: 15
gpt-4-32k: 1

View file

@ -1,19 +0,0 @@
max-credits: 100001
max-credits-owner: 694201337
start-credits: 1000
costs:
other: 10
chat-models:
gpt-3: 10
gpt-4: 30
gpt-4-32k: 100
# bonuses are multiplier for costs:
# final_cost = cost * bonus
bonuses:
owner: 0.1
admin: 0.3
helper: 0.4
booster: 0.5

View file

@ -12,7 +12,6 @@ from dotenv import load_dotenv
load_dotenv()
router = fastapi.APIRouter(tags=['core'])
async def check_core_auth(request):
"""

View file

@ -5,7 +5,6 @@ import fastapi
from rich import print
from dotenv import load_dotenv
from fastapi.middleware.cors import CORSMiddleware
import core
import transfer

View file

@ -6,6 +6,7 @@ import dhooks
import asyncio
import aiohttp
import starlette
import datetime
from rich import print
from dotenv import load_dotenv
@ -19,9 +20,19 @@ from db import logs
from db.users import UserManager
from db.stats import StatsManager
from helpers import network, chat, errors
import yaml
load_dotenv()
## Loads config which contains rate limits
with open('config/config.yml', encoding='utf8') as f:
config = yaml.safe_load(f)
## Where all rate limit requested data will be stored.
# Rate limit data is **not persistent** (It will be deleted on server stop/restart).
user_last_request_time = {}
DEMO_PAYLOAD = {
'model': 'gpt-3.5-turbo',
'messages': [
@ -68,6 +79,7 @@ async def stream(
incoming_request: starlette.requests.Request=None,
):
"""Stream the completions request. Sends data in chunks
If not streaming, it sends the result in its entirety.
Args:
path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
@ -77,8 +89,27 @@ async def stream(
input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
"""
## Rate limits user.
# If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down
# last request time for future requests.
if user:
role = user.get('role', 'default')
rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)
last_request_time = user_last_request_time.get(user['api_key'])
time_since_last_request = datetime.now() - last_request_time
if time_since_last_request < datetime.timedelta(seconds=rate_limit):
yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ")
return
else:
user_last_request_time[user['_id']] = datetime.now()
## Setup managers
db = UserManager()
stats = StatsManager()
is_chat = False
is_stream = payload.get('stream', False)

View file

@ -16,8 +16,8 @@ load_dotenv()
models_list = json.load(open('models.json'))
with open('config/credits.yml', encoding='utf8') as f:
credits_config = yaml.safe_load(f)
with open('config/config.yml', encoding='utf8') as f:
config = yaml.safe_load(f)
async def handle(incoming_request):
"""
@ -59,7 +59,7 @@ async def handle(incoming_request):
if path_contains_models:
return fastapi.responses.JSONResponse(content=models_list)
costs = credits_config['costs']
costs = config['costs']
cost = costs['other']
if 'chat/completions' in path:
@ -74,7 +74,7 @@ async def handle(incoming_request):
if policy_violation:
return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1)
role_cost_multiplier = config['roles'][user['role']]['bonus']
cost = round(cost * role_cost_multiplier)
if user['credits'] < cost: