mirror of
https://github.com/NovaOSS/nova-api.git
synced 2024-11-25 21:23:56 +01:00
Compare commits
2 commits
f896b18968
...
69bc2e33de
Author | SHA1 | Date | |
---|---|---|---|
69bc2e33de | |||
de954998d2 |
59
api/config/config.yml
Normal file
59
api/config/config.yml
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
max-credits: 100001
|
||||||
|
max-credits-owner: 694201337
|
||||||
|
start-credits: 1000
|
||||||
|
|
||||||
|
costs:
|
||||||
|
other: 10
|
||||||
|
|
||||||
|
chat-models:
|
||||||
|
gpt-3: 10
|
||||||
|
gpt-4: 30
|
||||||
|
gpt-4-32k: 100
|
||||||
|
|
||||||
|
## Roles Explanation
|
||||||
|
|
||||||
|
# Bonuses: They are a multiplier for costs
|
||||||
|
# They work like: final_cost = cost * bonus
|
||||||
|
# Rate limits: Limit the requests of the user
|
||||||
|
# The rate limit is by how many seconds until a new request can be done.
|
||||||
|
|
||||||
|
## TODO: Setup proper rate limit settings for each role
|
||||||
|
## Current settings are:
|
||||||
|
## **NOT MEANT FOR PRODUCTION. DO NOT USE WITH THESE SETTINGS.**
|
||||||
|
|
||||||
|
roles:
|
||||||
|
owner:
|
||||||
|
bonus: 0.1
|
||||||
|
rate_limit:
|
||||||
|
other: 60
|
||||||
|
gpt-3: 60
|
||||||
|
gpt-4: 35
|
||||||
|
gpt-4-32k: 5
|
||||||
|
admin:
|
||||||
|
bonus: 0.3
|
||||||
|
rate_limit:
|
||||||
|
other: 60
|
||||||
|
gpt-3: 60
|
||||||
|
gpt-4: 30
|
||||||
|
gpt-4-32k: 4
|
||||||
|
helper:
|
||||||
|
bonus: 0.4
|
||||||
|
rate_limit:
|
||||||
|
other: 60
|
||||||
|
gpt-3: 60
|
||||||
|
gpt-4: 25
|
||||||
|
gpt-4-32k: 3
|
||||||
|
booster:
|
||||||
|
bonus: 0.5
|
||||||
|
rate_limit:
|
||||||
|
other: 60
|
||||||
|
gpt-3: 60
|
||||||
|
gpt-4: 20
|
||||||
|
gpt-4-32k: 2
|
||||||
|
default:
|
||||||
|
bonus: 0
|
||||||
|
rate_limit:
|
||||||
|
other: 60
|
||||||
|
gpt-3: 60
|
||||||
|
gpt-4: 15
|
||||||
|
gpt-4-32k: 1
|
|
@ -1,19 +0,0 @@
|
||||||
max-credits: 100001
|
|
||||||
max-credits-owner: 694201337
|
|
||||||
start-credits: 1000
|
|
||||||
|
|
||||||
costs:
|
|
||||||
other: 10
|
|
||||||
|
|
||||||
chat-models:
|
|
||||||
gpt-3: 10
|
|
||||||
gpt-4: 30
|
|
||||||
gpt-4-32k: 100
|
|
||||||
|
|
||||||
# bonuses are multiplier for costs:
|
|
||||||
# final_cost = cost * bonus
|
|
||||||
bonuses:
|
|
||||||
owner: 0.1
|
|
||||||
admin: 0.3
|
|
||||||
helper: 0.4
|
|
||||||
booster: 0.5
|
|
|
@ -12,7 +12,6 @@ from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
router = fastapi.APIRouter(tags=['core'])
|
router = fastapi.APIRouter(tags=['core'])
|
||||||
|
|
||||||
|
|
||||||
async def check_core_auth(request):
|
async def check_core_auth(request):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ import fastapi
|
||||||
from rich import print
|
from rich import print
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
import core
|
import core
|
||||||
import transfer
|
import transfer
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ import dhooks
|
||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import starlette
|
import starlette
|
||||||
|
import datetime
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
@ -19,9 +20,19 @@ from db import logs
|
||||||
from db.users import UserManager
|
from db.users import UserManager
|
||||||
from db.stats import StatsManager
|
from db.stats import StatsManager
|
||||||
from helpers import network, chat, errors
|
from helpers import network, chat, errors
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
## Loads config which contains rate limits
|
||||||
|
with open('config/config.yml', encoding='utf8') as f:
|
||||||
|
config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
## Where all rate limit requested data will be stored.
|
||||||
|
# Rate limit data is **not persistent** (It will be deleted on server stop/restart).
|
||||||
|
user_last_request_time = {}
|
||||||
|
|
||||||
DEMO_PAYLOAD = {
|
DEMO_PAYLOAD = {
|
||||||
'model': 'gpt-3.5-turbo',
|
'model': 'gpt-3.5-turbo',
|
||||||
'messages': [
|
'messages': [
|
||||||
|
@ -68,6 +79,7 @@ async def stream(
|
||||||
incoming_request: starlette.requests.Request=None,
|
incoming_request: starlette.requests.Request=None,
|
||||||
):
|
):
|
||||||
"""Stream the completions request. Sends data in chunks
|
"""Stream the completions request. Sends data in chunks
|
||||||
|
If not streaming, it sends the result in its entirety.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
|
path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
|
||||||
|
@ -77,8 +89,27 @@ async def stream(
|
||||||
input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
|
input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
|
||||||
incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
|
incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
## Rate limits user.
|
||||||
|
# If rate limit is exceeded, error code 429. Otherwise, lets the user pass but notes down
|
||||||
|
# last request time for future requests.
|
||||||
|
if user:
|
||||||
|
role = user.get('role', 'default')
|
||||||
|
rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)
|
||||||
|
|
||||||
|
last_request_time = user_last_request_time.get(user['api_key'])
|
||||||
|
time_since_last_request = datetime.now() - last_request_time
|
||||||
|
|
||||||
|
if time_since_last_request < datetime.timedelta(seconds=rate_limit):
|
||||||
|
yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
user_last_request_time[user['_id']] = datetime.now()
|
||||||
|
|
||||||
|
## Setup managers
|
||||||
db = UserManager()
|
db = UserManager()
|
||||||
stats = StatsManager()
|
stats = StatsManager()
|
||||||
|
|
||||||
is_chat = False
|
is_chat = False
|
||||||
is_stream = payload.get('stream', False)
|
is_stream = payload.get('stream', False)
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,8 @@ load_dotenv()
|
||||||
|
|
||||||
models_list = json.load(open('models.json'))
|
models_list = json.load(open('models.json'))
|
||||||
|
|
||||||
with open('config/credits.yml', encoding='utf8') as f:
|
with open('config/config.yml', encoding='utf8') as f:
|
||||||
credits_config = yaml.safe_load(f)
|
config = yaml.safe_load(f)
|
||||||
|
|
||||||
async def handle(incoming_request):
|
async def handle(incoming_request):
|
||||||
"""
|
"""
|
||||||
|
@ -59,7 +59,7 @@ async def handle(incoming_request):
|
||||||
if path_contains_models:
|
if path_contains_models:
|
||||||
return fastapi.responses.JSONResponse(content=models_list)
|
return fastapi.responses.JSONResponse(content=models_list)
|
||||||
|
|
||||||
costs = credits_config['costs']
|
costs = config['costs']
|
||||||
cost = costs['other']
|
cost = costs['other']
|
||||||
|
|
||||||
if 'chat/completions' in path:
|
if 'chat/completions' in path:
|
||||||
|
@ -74,7 +74,7 @@ async def handle(incoming_request):
|
||||||
if policy_violation:
|
if policy_violation:
|
||||||
return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
|
return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
|
||||||
|
|
||||||
role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1)
|
role_cost_multiplier = config['roles'][user['role']]['bonus']
|
||||||
cost = round(cost * role_cost_multiplier)
|
cost = round(cost * role_cost_multiplier)
|
||||||
|
|
||||||
if user['credits'] < cost:
|
if user['credits'] < cost:
|
||||||
|
|
Loading…
Reference in a new issue