Compare commits

...

2 commits

Author SHA1 Message Date
NovaOSS Admins a16ba559fb Much cheaper API 2023-10-16 21:55:54 +00:00
NovaOSS Admins 30bd5616b8 Added token-based credit cost 2023-10-16 21:34:54 +00:00
14 changed files with 136 additions and 87 deletions

View file

@ -65,6 +65,7 @@ This one's code can be found in the following repository: [github.com/novaoss/no
# Setup # Setup
## Requirements ## Requirements
- **Rust** (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
- newest **Python** version - newest **Python** version
- newest Python **pip** version - newest Python **pip** version
- **MongoDB** database - **MongoDB** database

View file

@ -5,21 +5,28 @@ async def after_request(
incoming_request: dict, incoming_request: dict,
target_request: dict, target_request: dict,
user: dict, user: dict,
tokens: dict,
credits_cost: int, credits_cost: int,
input_tokens: int,
path: str, path: str,
is_chat: bool, is_chat: bool,
model: str, model: str,
provider: str,
) -> None: ) -> None:
"""Runs after every request.""" """Runs after every request."""
if user and incoming_request: if user and incoming_request:
await logs.log_api_request(user=user, incoming_request=incoming_request, target_url=target_request['url']) await logs.log_api_request(
user=user,
incoming_request=incoming_request,
target_url=target_request['url'],
tokens=tokens,
provider=provider
)
if credits_cost and user: if credits_cost and user:
await users.manager.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}}) await users.manager.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}})
ip_address = await network.get_ip(incoming_request) ip_address = network.get_ip(incoming_request)
await stats.manager.add_date() await stats.manager.add_date()
# await stats.manager.add_ip_address(ip_address) # await stats.manager.add_ip_address(ip_address)
@ -28,4 +35,3 @@ async def after_request(
if is_chat: if is_chat:
await stats.manager.add_model(model) await stats.manager.add_model(model)
await stats.manager.add_tokens(input_tokens, model)

View file

@ -5,9 +5,9 @@ costs:
other: 5 # Other endpoints other: 5 # Other endpoints
chat-models: # chat completions chat-models: # chat completions
gpt-4-32k: 200 gpt-4-32k: 100
gpt-4: 50 gpt-4: 20
gpt-3: 10 gpt-3: 5
## Roles Explanation ## Roles Explanation
@ -17,11 +17,11 @@ costs:
roles: roles:
owner: owner:
bonus: 0 bonus: 0
admin: enterprise:
bonus: 0.2 bonus: 0.2
helper: admin:
bonus: 0.4 bonus: 0.4
booster: helper:
bonus: 0.6 bonus: 0.6
default: default:
bonus: 1.0 bonus: 1.0

View file

@ -19,7 +19,7 @@ conn = AsyncIOMotorClient(os.environ['MONGO_URI'])
async def _get_collection(collection_name: str): async def _get_collection(collection_name: str):
return conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name] return conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
async def log_api_request(user: dict, incoming_request, target_url: str): async def log_api_request(user: dict, incoming_request, target_url: str, tokens: dict, provider: str) -> dict:
"""Logs the API Request into the database.""" """Logs the API Request into the database."""
db = await _get_collection('logs') db = await _get_collection('logs')
@ -32,20 +32,24 @@ async def log_api_request(user: dict, incoming_request, target_url: str):
pass pass
model = payload.get('model') model = payload.get('model')
ip_address = await network.get_ip(incoming_request) ip_address = network.get_ip(incoming_request)
path = incoming_request.url.path
if path == '/v1/chat/completions':
path = 'c'
new_log_item = { new_log_item = {
'timestamp': time.time(), 'timestamp': time.time(),
'method': incoming_request.method, 'path': path,
'path': incoming_request.url.path,
'user_id': str(user['_id']), 'user_id': str(user['_id']),
'security': { 'security': {
'ip': ip_address, 'ip': ip_address,
}, },
'details': { 'details': {
'model': model, 'model': model,
'target_url': target_url 'provider': provider,
} },
'tokens': tokens,
} }
inserted = await db.insert_one(new_log_item) inserted = await db.insert_one(new_log_item)

View file

@ -17,7 +17,16 @@ load_dotenv()
with open(os.path.join(helpers.root, 'api', 'config', 'config.yml'), encoding='utf8') as f: with open(os.path.join(helpers.root, 'api', 'config', 'config.yml'), encoding='utf8') as f:
credits_config = yaml.safe_load(f) credits_config = yaml.safe_load(f)
## MONGODB Setup infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST')
async def generate_api_key():
chars = string.ascii_letters + string.digits
suffix = ''.join(random.choices(chars, k=20))
prefix = ''.join(random.choices(chars, k=20))
new_api_key = f'nv2-{prefix}{infix}{suffix}'
return new_api_key
class UserManager: class UserManager:
""" """
@ -31,19 +40,13 @@ class UserManager:
return self.conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name] return self.conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
async def get_all_users(self): async def get_all_users(self):
collection = self.conn[os.getenv('MONGO_NAME', 'nova-test')]['users'] collection = self.conn['nova-core']['users']
return collection#.find() return collection#.find()
async def create(self, discord_id: str = '') -> dict: async def create(self, discord_id: str = '') -> dict:
db = await self._get_collection('users') db = await self._get_collection('users')
chars = string.ascii_letters + string.digits
infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST')
suffix = ''.join(random.choices(chars, k=20))
prefix = ''.join(random.choices(chars, k=20))
new_api_key = f'nv2-{prefix}{infix}{suffix}'
new_api_key = await generate_api_key()
existing_user = await self.user_by_discord_id(discord_id) existing_user = await self.user_by_discord_id(discord_id)
if existing_user: # just change api key if existing_user: # just change api key
await db.update_one({'auth.discord': str(int(discord_id))}, {'$set': {'api_key': new_api_key}}) await db.update_one({'auth.discord': str(int(discord_id))}, {'$set': {'api_key': new_api_key}})
@ -73,7 +76,18 @@ class UserManager:
async def user_by_discord_id(self, discord_id: str): async def user_by_discord_id(self, discord_id: str):
db = await self._get_collection('users') db = await self._get_collection('users')
return await db.find_one({'auth.discord': str(int(discord_id))})
user = await db.find_one({'auth.discord': str(discord_id)})
if not user:
return
if user['api_key'] == '':
new_api_key = await generate_api_key()
await db.update_one({'auth.discord': str(discord_id)}, {'$set': {'api_key': new_api_key}})
user = await db.find_one({'auth.discord': str(discord_id)})
return user
async def user_by_api_key(self, key: str): async def user_by_api_key(self, key: str):
db = await self._get_collection('users') db = await self._get_collection('users')
@ -85,6 +99,7 @@ class UserManager:
async def update_by_discord_id(self, discord_id: str, update): async def update_by_discord_id(self, discord_id: str, update):
db = await self._get_collection('users') db = await self._get_collection('users')
return await db.update_one({'auth.discord': str(int(discord_id))}, update) return await db.update_one({'auth.discord': str(int(discord_id))}, update)
async def update_by_filter(self, obj_filter, update): async def update_by_filter(self, obj_filter, update):
@ -98,7 +113,7 @@ class UserManager:
manager = UserManager() manager = UserManager()
async def demo(): async def demo():
user = await UserManager().create(69420) user = await UserManager().create('1099385227077488700')
print(user) print(user)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -22,9 +22,6 @@ with open(os.path.join('cache', 'models.json'), encoding='utf8') as f:
models_list = json.load(f) models_list = json.load(f)
models = [model['id'] for model in models_list['data']] models = [model['id'] for model in models_list['data']]
with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
config = yaml.safe_load(f)
moderation_debug_key = os.getenv('MODERATION_DEBUG_KEY') moderation_debug_key = os.getenv('MODERATION_DEBUG_KEY')
async def handle(incoming_request: fastapi.Request): async def handle(incoming_request: fastapi.Request):
@ -36,10 +33,10 @@ async def handle(incoming_request: fastapi.Request):
path = incoming_request.url.path path = incoming_request.url.path
path = path.replace('/v1/v1', '/v1') path = path.replace('/v1/v1', '/v1')
ip_address = await network.get_ip(incoming_request) ip_address = network.get_ip(incoming_request)
if '/dashboard' in path: if '/dashboard' in path:
return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.') return await errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
if path.startswith('/v1/models'): if path.startswith('/v1/models'):
return fastapi.responses.JSONResponse(content=models_list) return fastapi.responses.JSONResponse(content=models_list)
@ -79,25 +76,7 @@ async def handle(incoming_request: fastapi.Request):
if 'account/credits' in path: if 'account/credits' in path:
return fastapi.responses.JSONResponse({'credits': user['credits']}) return fastapi.responses.JSONResponse({'credits': user['credits']})
costs = config['costs'] if user['credits'] < 1:
cost = costs['other']
if 'chat/completions' in path:
cost = costs['chat-models'].get(payload.get('model'), cost)
role = user.get('role', 'default')
if 'enterprise' in role:
role_cost_multiplier = 0.1
else:
try:
role_cost_multiplier = config['roles'][role]['bonus']
except KeyError:
role_cost_multiplier = 1
cost = round(cost * role_cost_multiplier)
if user['credits'] < cost:
return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.') return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
if 'DISABLE_VARS' not in key_tags: if 'DISABLE_VARS' not in key_tags:
@ -168,8 +147,6 @@ async def handle(incoming_request: fastapi.Request):
user=user, user=user,
path=path, path=path,
payload=payload, payload=payload,
credits_cost=cost,
input_tokens=0,
incoming_request=incoming_request, incoming_request=incoming_request,
), ),
media_type=media_type media_type=media_type

View file

@ -1,28 +1,19 @@
import os import os
import time import time
from dotenv import load_dotenv from dotenv import load_dotenv
from slowapi.util import get_remote_address from slowapi.util import get_remote_address
load_dotenv() load_dotenv()
async def get_ip(request) -> str: def get_ip(request) -> str:
"""Get the IP address of the incoming request.""" """Get the IP address of the incoming request."""
xff = None detected_ip = request.headers.get('cf-connecting-ip', get_remote_address(request))
if request.headers.get('x-forwarded-for'):
xff, *_ = request.headers['x-forwarded-for'].split(', ')
possible_ips = [xff, request.headers.get('cf-connecting-ip'), request.client.host]
detected_ip = next((i for i in possible_ips if i), None)
return detected_ip return detected_ip
def get_ratelimit_key(request) -> str: def get_ratelimit_key(request) -> str:
"""Get the IP address of the incoming request.""" """Get the IP address of the incoming request."""
custom = os.environ('NO_RATELIMIT_IPS')
ip = get_remote_address(request)
if ip in custom:
return f'enterprise_{ip}'
ip = get_ip(request)
return ip return ip

View file

@ -2,7 +2,7 @@ import time
import asyncio import asyncio
import tiktoken import tiktoken
async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int: async def count_tokens_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int:
"""Return the number of tokens used by a list of messages """Return the number of tokens used by a list of messages
Args: Args:
@ -38,13 +38,13 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') ->
tokens_per_name = -1 # if there's a name, the role is omitted tokens_per_name = -1 # if there's a name, the role is omitted
elif 'gpt-3.5-turbo' in model: elif 'gpt-3.5-turbo' in model:
return await count_for_messages(messages, model='gpt-3.5-turbo-0613') return await count_tokens_for_messages(messages, model='gpt-3.5-turbo-0613')
elif 'gpt-4' in model: elif 'gpt-4' in model:
return await count_for_messages(messages, model='gpt-4-0613') return await count_tokens_for_messages(messages, model='gpt-4-0613')
else: else:
raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}. raise NotImplementedError(f"""count_tokens_for_messages() is not implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md See https://github.com/openai/openai-python/blob/main/chatml.md
for information on how messages are converted to tokens.""") for information on how messages are converted to tokens.""")
@ -66,8 +66,8 @@ if __name__ == '__main__':
messages = [ messages = [
{ {
'role': 'user', 'role': 'user',
'content': '1+1=' 'content': 'Hi'
} }
] ]
print(asyncio.run(count_for_messages(messages))) print(asyncio.run(count_tokens_for_messages(messages)))
print(f'Took {(time.perf_counter() - start) * 1000}ms') print(f'Took {(time.perf_counter() - start) * 1000}ms')

View file

@ -17,6 +17,8 @@ from slowapi import Limiter, _rate_limit_exceeded_handler
import core import core
import handler import handler
from helpers.network import get_ratelimit_key
load_dotenv() load_dotenv()
app = fastapi.FastAPI() app = fastapi.FastAPI()
@ -33,7 +35,7 @@ app.include_router(core.router)
limiter = Limiter( limiter = Limiter(
swallow_errors=True, swallow_errors=True,
key_func=get_remote_address, key_func=get_ratelimit_key,
default_limits=[ default_limits=[
'2/second', '2/second',
'30/minute', '30/minute',

View file

@ -1,2 +1,2 @@
from . import ails, closed, closed4 from . import ails
MODULES = [ails, closed, closed4] MODULES = [ails]

View file

@ -22,7 +22,10 @@ async def main():
for file_name in os.listdir(os.path.dirname(__file__)): for file_name in os.listdir(os.path.dirname(__file__)):
if file_name.endswith('.py') and not file_name.startswith('_'): if file_name.endswith('.py') and not file_name.startswith('_'):
print(file_name.split('.')[0]) name = file_name.split('.')[0]
models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS
print(f' {name} @ {", ".join(models)}')
sys.exit(0) sys.exit(0)

View file

@ -3,6 +3,8 @@ try:
except ModuleNotFoundError: except ModuleNotFoundError:
from ...db import providerkeys from ...db import providerkeys
# Sort the models by their value/cost/rarity.
GPT_3 = [ GPT_3 = [
'gpt-3.5-turbo', 'gpt-3.5-turbo',
'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-16k',

View file

@ -2,6 +2,7 @@
import os import os
import json import json
import yaml
import ujson import ujson
import aiohttp import aiohttp
import asyncio import asyncio
@ -17,15 +18,16 @@ import load_balancing
from helpers import errors from helpers import errors
from db import providerkeys from db import providerkeys
from helpers.tokens import count_tokens_for_messages
load_dotenv() load_dotenv()
CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated'] CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
keymanager = providerkeys.manager keymanager = providerkeys.manager
background_tasks: Set[asyncio.Task[Any]] = set() background_tasks: Set[asyncio.Task[Any]] = set()
with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
config = yaml.safe_load(f)
def create_background_task(coro: Coroutine[Any, Any, Any]) -> None: def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
"""asyncio.create_task, which prevents the task from being garbage collected. """asyncio.create_task, which prevents the task from being garbage collected.
@ -36,13 +38,10 @@ def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
background_tasks.add(task) background_tasks.add(task)
task.add_done_callback(background_tasks.discard) task.add_done_callback(background_tasks.discard)
async def respond( async def respond(
path: str='/v1/chat/completions', path: str='/v1/chat/completions',
user: dict=None, user: dict=None,
payload: dict=None, payload: dict=None,
credits_cost: int=0,
input_tokens: int=0,
incoming_request: starlette.requests.Request=None, incoming_request: starlette.requests.Request=None,
): ):
""" """
@ -72,6 +71,9 @@ async def respond(
'timeout': 0 'timeout': 0
} }
input_tokens = 0
output_tokens = 0
for _ in range(10): for _ in range(10):
try: try:
if is_chat: if is_chat:
@ -161,9 +163,15 @@ async def respond(
continue continue
if response.ok: if response.ok:
if is_chat and not is_stream:
input_tokens = client_json_response['usage']['prompt_tokens']
output_tokens = client_json_response['usage']['completion_tokens']
server_json_response = client_json_response server_json_response = client_json_response
if is_stream: if is_stream:
input_tokens = await count_tokens_for_messages(payload['messages'], model=model)
chunk_no = 0 chunk_no = 0
buffer = '' buffer = ''
@ -175,7 +183,7 @@ async def respond(
if 'azure' in provider_name: if 'azure' in provider_name:
chunk = chunk.replace('data: ', '', 1) chunk = chunk.replace('data: ', '', 1)
if not chunk or chunk_no == 1: if not chunk.strip() or chunk_no == 1:
continue continue
subchunks = chunk.split('\n\n') subchunks = chunk.split('\n\n')
@ -188,6 +196,8 @@ async def respond(
yield subchunk + '\n\n' yield subchunk + '\n\n'
buffer = subchunks[-1] buffer = subchunks[-1]
output_tokens = chunk_no
break break
except aiohttp.client_exceptions.ServerTimeoutError: except aiohttp.client_exceptions.ServerTimeoutError:
@ -198,7 +208,7 @@ async def respond(
skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0} skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
skipped_errors = ujson.dumps(skipped_errors, indent=4) skipped_errors = ujson.dumps(skipped_errors, indent=4)
yield await errors.yield_error(500, yield await errors.yield_error(500,
'Sorry, our API seems to have issues connecting to our provider(s).', f'Sorry, our API seems to have issues connecting to "{model}".',
f'Please send this info to support: {skipped_errors}' f'Please send this info to support: {skipped_errors}'
) )
return return
@ -206,13 +216,51 @@ async def respond(
if (not is_stream) and server_json_response: if (not is_stream) and server_json_response:
yield json.dumps(server_json_response) yield json.dumps(server_json_response)
role = user.get('role', 'default')
model_multipliers = config['costs']
model_multiplier = model_multipliers['other']
if is_chat:
model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier)
total_tokens = input_tokens + output_tokens
credits_cost = total_tokens / 60
credits_cost = round(credits_cost * model_multiplier)
if credits_cost < 1:
credits_cost = 1
tokens = {
'input': input_tokens,
'output': output_tokens,
'total': total_tokens
}
else:
credits_cost = 5
tokens = {
'input': 0,
'output': 0,
'total': credits_cost
}
try:
role_cost_multiplier = config['roles'][role]['bonus']
except KeyError:
role_cost_multiplier = 1
credits_cost = round(credits_cost * role_cost_multiplier)
print(f'[bold]Credits cost[/bold]: {credits_cost}')
create_background_task( create_background_task(
after_request.after_request( after_request.after_request(
provider=provider_name,
incoming_request=incoming_request, incoming_request=incoming_request,
target_request=target_request, target_request=target_request,
user=user, user=user,
credits_cost=credits_cost, credits_cost=credits_cost,
input_tokens=input_tokens, tokens=tokens,
path=path, path=path,
is_chat=is_chat, is_chat=is_chat,
model=model, model=model,

View file

@ -1,5 +1,5 @@
roles = { roles = {
'': '2500', '': '2000',
'lvl10': '2800', 'lvl10': '2800',
'lvl20': '3100', 'lvl20': '3100',
'lvl30': '3400', 'lvl30': '3400',