diff --git a/api/config/config.yml b/api/config/config.yml deleted file mode 100644 index 593392c..0000000 --- a/api/config/config.yml +++ /dev/null @@ -1,59 +0,0 @@ -max-credits: 100001 -max-credits-owner: 694201337 -start-credits: 1000 - -costs: - other: 10 - - chat-models: - gpt-3: 10 - gpt-4: 30 - gpt-4-32k: 100 - -## Roles Explanation - -# Bonuses: They are a multiplier for costs -# They work like: final_cost = cost * bonus -# Rate limits: Limit the requests of the user -# The rate limit is by how many seconds until a new request can be done. - -## TODO: Setup proper rate limit settings for each role -## Current settings are: -## **NOT MEANT FOR PRODUCTION. DO NOT USE WITH THESE SETTINGS.** - -roles: - owner: - bonus: 0.1 - rate_limit: - other: 60 - gpt-3: 60 - gpt-4: 35 - gpt-4-32k: 5 - admin: - bonus: 0.3 - rate_limit: - other: 60 - gpt-3: 60 - gpt-4: 30 - gpt-4-32k: 4 - helper: - bonus: 0.4 - rate_limit: - other: 60 - gpt-3: 60 - gpt-4: 25 - gpt-4-32k: 3 - booster: - bonus: 0.5 - rate_limit: - other: 60 - gpt-3: 60 - gpt-4: 20 - gpt-4-32k: 2 - default: - bonus: 0 - rate_limit: - other: 60 - gpt-3: 60 - gpt-4: 15 - gpt-4-32k: 1 \ No newline at end of file diff --git a/api/config/credits.yml b/api/config/credits.yml new file mode 100644 index 0000000..29eeaa9 --- /dev/null +++ b/api/config/credits.yml @@ -0,0 +1,19 @@ +max-credits: 100001 +max-credits-owner: 694201337 +start-credits: 1000 + +costs: + other: 10 + + chat-models: + gpt-3: 10 + gpt-4: 30 + gpt-4-32k: 100 + +# bonuses are multiplier for costs: +# final_cost = cost * bonus +bonuses: + owner: 0.1 + admin: 0.3 + helper: 0.4 + booster: 0.5 diff --git a/api/db/users.py b/api/db/users.py index 962b752..6f2b862 100644 --- a/api/db/users.py +++ b/api/db/users.py @@ -9,7 +9,7 @@ from motor.motor_asyncio import AsyncIOMotorClient load_dotenv() -with open('config/config.yml', encoding='utf8') as f: +with open('config/credits.yml', encoding='utf8') as f: credits_config = yaml.safe_load(f) ## MONGODB Setup diff --git a/api/streaming.py b/api/streaming.py index aa70a33..7c3e9ba 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -26,12 +26,12 @@ import yaml load_dotenv() ## Loads config which contains rate limits -with open('config/config.yml', encoding='utf8') as f: +with open('config/credits.yml', encoding='utf8') as f: config = yaml.safe_load(f) ## Where all rate limit requested data will be stored. # Rate limit data is **not persistent** (It will be deleted on server stop/restart). -user_last_request_time = {} +# user_last_request_time = {} DEMO_PAYLOAD = { 'model': 'gpt-3.5-turbo', @@ -93,18 +93,18 @@ async def stream( ## Rate limits user. # If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down # last request time for future requests. - if user: - role = user.get('role', 'default') - rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10) + # if user: + # role = user.get('role', 'default') + # rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10) - last_request_time = user_last_request_time.get(user['api_key']) - time_since_last_request = datetime.now() - last_request_time + # last_request_time = user_last_request_time.get(user['api_key']) + # time_since_last_request = datetime.datetime.now() - last_request_time - if time_since_last_request < datetime.timedelta(seconds=rate_limit): - yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ") - return - else: - user_last_request_time[user['_id']] = datetime.now() + # if time_since_last_request < datetime.timedelta(seconds=rate_limit): + # yield await errors.yield_error(429, 'Rate limit exceeded', "You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen.") + # return + # else: + # user_last_request_time[user['_id']] = datetime.datetime.now() ## Setup managers db = UserManager() diff --git a/api/transfer.py b/api/transfer.py index 835da23..77e586f 100644 --- a/api/transfer.py +++ b/api/transfer.py @@ -16,8 +16,8 @@ load_dotenv() models_list = json.load(open('models.json')) -with open('config/config.yml', encoding='utf8') as f: - config = yaml.safe_load(f) +with open('config/credits.yml', encoding='utf8') as f: + credits_config = yaml.safe_load(f) async def handle(incoming_request): """ @@ -62,7 +62,7 @@ async def handle(incoming_request): if path_contains_models: return fastapi.responses.JSONResponse(content=models_list) - costs = config['costs'] + costs = credits_config['costs'] cost = costs['other'] if 'chat/completions' in path: @@ -77,7 +77,7 @@ async def handle(incoming_request): if policy_violation: return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.') - role_cost_multiplier = config['roles'][user['role']]['bonus'] + role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1) cost = round(cost * role_cost_multiplier) if user['credits'] < cost: