diff --git a/api/config/config.yml b/api/config/config.yml index 4d3fefb..593392c 100644 --- a/api/config/config.yml +++ b/api/config/config.yml @@ -10,8 +10,17 @@ costs: gpt-4: 30 gpt-4-32k: 100 -# bonuses are multiplier for costs: -# final_cost = cost * bonus +## Roles Explanation + +# Bonuses: They are a multiplier for costs +# They work like: final_cost = cost * bonus +# Rate limits: Limit the requests of the user +# The rate limit is by how many seconds until a new request can be done. + +## TODO: Setup proper rate limit settings for each role +## Current settings are: +## **NOT MEANT FOR PRODUCTION. DO NOT USE WITH THESE SETTINGS.** + roles: owner: bonus: 0.1 diff --git a/api/streaming.py b/api/streaming.py index dc8dfea..aa70a33 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -30,7 +30,7 @@ with open('config/config.yml', encoding='utf8') as f: config = yaml.safe_load(f) ## Where all rate limit requested data will be stored. -# Rate limit data is **not persistent**. I.E It will be deleted on server stop/restart. +# Rate limit data is **not persistent** (It will be deleted on server stop/restart). user_last_request_time = {} DEMO_PAYLOAD = { @@ -90,6 +90,9 @@ async def stream( incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None. """ + ## Rate limits user. + # If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down + # last request time for future requests. if user: role = user.get('role', 'default') rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10) @@ -107,9 +110,6 @@ async def stream( db = UserManager() stats = StatsManager() - ## Check if breaching rate limit - - is_chat = False is_stream = payload.get('stream', False)