mirror of
https://github.com/NovaOSS/nova-api.git
synced 2024-11-25 16:43:58 +01:00
Compare commits
2 commits
4444fc1315
...
a16ba559fb
Author | SHA1 | Date | |
---|---|---|---|
a16ba559fb | |||
30bd5616b8 |
|
@ -65,6 +65,7 @@ This one's code can be found in the following repository: [github.com/novaoss/no
|
||||||
|
|
||||||
# Setup
|
# Setup
|
||||||
## Requirements
|
## Requirements
|
||||||
|
- **Rust** (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
|
||||||
- newest **Python** version
|
- newest **Python** version
|
||||||
- newest Python **pip** version
|
- newest Python **pip** version
|
||||||
- **MongoDB** database
|
- **MongoDB** database
|
||||||
|
|
|
@ -5,21 +5,28 @@ async def after_request(
|
||||||
incoming_request: dict,
|
incoming_request: dict,
|
||||||
target_request: dict,
|
target_request: dict,
|
||||||
user: dict,
|
user: dict,
|
||||||
|
tokens: dict,
|
||||||
credits_cost: int,
|
credits_cost: int,
|
||||||
input_tokens: int,
|
|
||||||
path: str,
|
path: str,
|
||||||
is_chat: bool,
|
is_chat: bool,
|
||||||
model: str,
|
model: str,
|
||||||
|
provider: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Runs after every request."""
|
"""Runs after every request."""
|
||||||
|
|
||||||
if user and incoming_request:
|
if user and incoming_request:
|
||||||
await logs.log_api_request(user=user, incoming_request=incoming_request, target_url=target_request['url'])
|
await logs.log_api_request(
|
||||||
|
user=user,
|
||||||
|
incoming_request=incoming_request,
|
||||||
|
target_url=target_request['url'],
|
||||||
|
tokens=tokens,
|
||||||
|
provider=provider
|
||||||
|
)
|
||||||
|
|
||||||
if credits_cost and user:
|
if credits_cost and user:
|
||||||
await users.manager.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}})
|
await users.manager.update_by_id(user['_id'], {'$inc': {'credits': -credits_cost}})
|
||||||
|
|
||||||
ip_address = await network.get_ip(incoming_request)
|
ip_address = network.get_ip(incoming_request)
|
||||||
|
|
||||||
await stats.manager.add_date()
|
await stats.manager.add_date()
|
||||||
# await stats.manager.add_ip_address(ip_address)
|
# await stats.manager.add_ip_address(ip_address)
|
||||||
|
@ -28,4 +35,3 @@ async def after_request(
|
||||||
|
|
||||||
if is_chat:
|
if is_chat:
|
||||||
await stats.manager.add_model(model)
|
await stats.manager.add_model(model)
|
||||||
await stats.manager.add_tokens(input_tokens, model)
|
|
||||||
|
|
|
@ -5,9 +5,9 @@ costs:
|
||||||
other: 5 # Other endpoints
|
other: 5 # Other endpoints
|
||||||
|
|
||||||
chat-models: # chat completions
|
chat-models: # chat completions
|
||||||
gpt-4-32k: 200
|
gpt-4-32k: 100
|
||||||
gpt-4: 50
|
gpt-4: 20
|
||||||
gpt-3: 10
|
gpt-3: 5
|
||||||
|
|
||||||
## Roles Explanation
|
## Roles Explanation
|
||||||
|
|
||||||
|
@ -17,11 +17,11 @@ costs:
|
||||||
roles:
|
roles:
|
||||||
owner:
|
owner:
|
||||||
bonus: 0
|
bonus: 0
|
||||||
admin:
|
enterprise:
|
||||||
bonus: 0.2
|
bonus: 0.2
|
||||||
helper:
|
admin:
|
||||||
bonus: 0.4
|
bonus: 0.4
|
||||||
booster:
|
helper:
|
||||||
bonus: 0.6
|
bonus: 0.6
|
||||||
default:
|
default:
|
||||||
bonus: 1.0
|
bonus: 1.0
|
||||||
|
|
|
@ -19,7 +19,7 @@ conn = AsyncIOMotorClient(os.environ['MONGO_URI'])
|
||||||
async def _get_collection(collection_name: str):
|
async def _get_collection(collection_name: str):
|
||||||
return conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
|
return conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
|
||||||
|
|
||||||
async def log_api_request(user: dict, incoming_request, target_url: str):
|
async def log_api_request(user: dict, incoming_request, target_url: str, tokens: dict, provider: str) -> dict:
|
||||||
"""Logs the API Request into the database."""
|
"""Logs the API Request into the database."""
|
||||||
|
|
||||||
db = await _get_collection('logs')
|
db = await _get_collection('logs')
|
||||||
|
@ -32,20 +32,24 @@ async def log_api_request(user: dict, incoming_request, target_url: str):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
model = payload.get('model')
|
model = payload.get('model')
|
||||||
ip_address = await network.get_ip(incoming_request)
|
ip_address = network.get_ip(incoming_request)
|
||||||
|
|
||||||
|
path = incoming_request.url.path
|
||||||
|
if path == '/v1/chat/completions':
|
||||||
|
path = 'c'
|
||||||
|
|
||||||
new_log_item = {
|
new_log_item = {
|
||||||
'timestamp': time.time(),
|
'timestamp': time.time(),
|
||||||
'method': incoming_request.method,
|
'path': path,
|
||||||
'path': incoming_request.url.path,
|
|
||||||
'user_id': str(user['_id']),
|
'user_id': str(user['_id']),
|
||||||
'security': {
|
'security': {
|
||||||
'ip': ip_address,
|
'ip': ip_address,
|
||||||
},
|
},
|
||||||
'details': {
|
'details': {
|
||||||
'model': model,
|
'model': model,
|
||||||
'target_url': target_url
|
'provider': provider,
|
||||||
}
|
},
|
||||||
|
'tokens': tokens,
|
||||||
}
|
}
|
||||||
|
|
||||||
inserted = await db.insert_one(new_log_item)
|
inserted = await db.insert_one(new_log_item)
|
||||||
|
|
|
@ -17,7 +17,16 @@ load_dotenv()
|
||||||
with open(os.path.join(helpers.root, 'api', 'config', 'config.yml'), encoding='utf8') as f:
|
with open(os.path.join(helpers.root, 'api', 'config', 'config.yml'), encoding='utf8') as f:
|
||||||
credits_config = yaml.safe_load(f)
|
credits_config = yaml.safe_load(f)
|
||||||
|
|
||||||
## MONGODB Setup
|
infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST')
|
||||||
|
|
||||||
|
async def generate_api_key():
|
||||||
|
chars = string.ascii_letters + string.digits
|
||||||
|
|
||||||
|
suffix = ''.join(random.choices(chars, k=20))
|
||||||
|
prefix = ''.join(random.choices(chars, k=20))
|
||||||
|
|
||||||
|
new_api_key = f'nv2-{prefix}{infix}{suffix}'
|
||||||
|
return new_api_key
|
||||||
|
|
||||||
class UserManager:
|
class UserManager:
|
||||||
"""
|
"""
|
||||||
|
@ -31,19 +40,13 @@ class UserManager:
|
||||||
return self.conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
|
return self.conn[os.getenv('MONGO_NAME', 'nova-test')][collection_name]
|
||||||
|
|
||||||
async def get_all_users(self):
|
async def get_all_users(self):
|
||||||
collection = self.conn[os.getenv('MONGO_NAME', 'nova-test')]['users']
|
collection = self.conn['nova-core']['users']
|
||||||
return collection#.find()
|
return collection#.find()
|
||||||
|
|
||||||
async def create(self, discord_id: str = '') -> dict:
|
async def create(self, discord_id: str = '') -> dict:
|
||||||
db = await self._get_collection('users')
|
db = await self._get_collection('users')
|
||||||
chars = string.ascii_letters + string.digits
|
|
||||||
|
|
||||||
infix = os.getenv('KEYGEN_INFIX', 'S3LFH0ST')
|
|
||||||
suffix = ''.join(random.choices(chars, k=20))
|
|
||||||
prefix = ''.join(random.choices(chars, k=20))
|
|
||||||
|
|
||||||
new_api_key = f'nv2-{prefix}{infix}{suffix}'
|
|
||||||
|
|
||||||
|
new_api_key = await generate_api_key()
|
||||||
existing_user = await self.user_by_discord_id(discord_id)
|
existing_user = await self.user_by_discord_id(discord_id)
|
||||||
if existing_user: # just change api key
|
if existing_user: # just change api key
|
||||||
await db.update_one({'auth.discord': str(int(discord_id))}, {'$set': {'api_key': new_api_key}})
|
await db.update_one({'auth.discord': str(int(discord_id))}, {'$set': {'api_key': new_api_key}})
|
||||||
|
@ -73,7 +76,18 @@ class UserManager:
|
||||||
|
|
||||||
async def user_by_discord_id(self, discord_id: str):
|
async def user_by_discord_id(self, discord_id: str):
|
||||||
db = await self._get_collection('users')
|
db = await self._get_collection('users')
|
||||||
return await db.find_one({'auth.discord': str(int(discord_id))})
|
|
||||||
|
user = await db.find_one({'auth.discord': str(discord_id)})
|
||||||
|
|
||||||
|
if not user:
|
||||||
|
return
|
||||||
|
|
||||||
|
if user['api_key'] == '':
|
||||||
|
new_api_key = await generate_api_key()
|
||||||
|
await db.update_one({'auth.discord': str(discord_id)}, {'$set': {'api_key': new_api_key}})
|
||||||
|
user = await db.find_one({'auth.discord': str(discord_id)})
|
||||||
|
|
||||||
|
return user
|
||||||
|
|
||||||
async def user_by_api_key(self, key: str):
|
async def user_by_api_key(self, key: str):
|
||||||
db = await self._get_collection('users')
|
db = await self._get_collection('users')
|
||||||
|
@ -85,6 +99,7 @@ class UserManager:
|
||||||
|
|
||||||
async def update_by_discord_id(self, discord_id: str, update):
|
async def update_by_discord_id(self, discord_id: str, update):
|
||||||
db = await self._get_collection('users')
|
db = await self._get_collection('users')
|
||||||
|
|
||||||
return await db.update_one({'auth.discord': str(int(discord_id))}, update)
|
return await db.update_one({'auth.discord': str(int(discord_id))}, update)
|
||||||
|
|
||||||
async def update_by_filter(self, obj_filter, update):
|
async def update_by_filter(self, obj_filter, update):
|
||||||
|
@ -98,7 +113,7 @@ class UserManager:
|
||||||
manager = UserManager()
|
manager = UserManager()
|
||||||
|
|
||||||
async def demo():
|
async def demo():
|
||||||
user = await UserManager().create(69420)
|
user = await UserManager().create('1099385227077488700')
|
||||||
print(user)
|
print(user)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -22,9 +22,6 @@ with open(os.path.join('cache', 'models.json'), encoding='utf8') as f:
|
||||||
models_list = json.load(f)
|
models_list = json.load(f)
|
||||||
models = [model['id'] for model in models_list['data']]
|
models = [model['id'] for model in models_list['data']]
|
||||||
|
|
||||||
with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
|
|
||||||
config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
moderation_debug_key = os.getenv('MODERATION_DEBUG_KEY')
|
moderation_debug_key = os.getenv('MODERATION_DEBUG_KEY')
|
||||||
|
|
||||||
async def handle(incoming_request: fastapi.Request):
|
async def handle(incoming_request: fastapi.Request):
|
||||||
|
@ -36,10 +33,10 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
path = incoming_request.url.path
|
path = incoming_request.url.path
|
||||||
path = path.replace('/v1/v1', '/v1')
|
path = path.replace('/v1/v1', '/v1')
|
||||||
|
|
||||||
ip_address = await network.get_ip(incoming_request)
|
ip_address = network.get_ip(incoming_request)
|
||||||
|
|
||||||
if '/dashboard' in path:
|
if '/dashboard' in path:
|
||||||
return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
|
return await errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
|
||||||
|
|
||||||
if path.startswith('/v1/models'):
|
if path.startswith('/v1/models'):
|
||||||
return fastapi.responses.JSONResponse(content=models_list)
|
return fastapi.responses.JSONResponse(content=models_list)
|
||||||
|
@ -79,25 +76,7 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
if 'account/credits' in path:
|
if 'account/credits' in path:
|
||||||
return fastapi.responses.JSONResponse({'credits': user['credits']})
|
return fastapi.responses.JSONResponse({'credits': user['credits']})
|
||||||
|
|
||||||
costs = config['costs']
|
if user['credits'] < 1:
|
||||||
cost = costs['other']
|
|
||||||
|
|
||||||
if 'chat/completions' in path:
|
|
||||||
cost = costs['chat-models'].get(payload.get('model'), cost)
|
|
||||||
|
|
||||||
role = user.get('role', 'default')
|
|
||||||
|
|
||||||
if 'enterprise' in role:
|
|
||||||
role_cost_multiplier = 0.1
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
role_cost_multiplier = config['roles'][role]['bonus']
|
|
||||||
except KeyError:
|
|
||||||
role_cost_multiplier = 1
|
|
||||||
|
|
||||||
cost = round(cost * role_cost_multiplier)
|
|
||||||
|
|
||||||
if user['credits'] < cost:
|
|
||||||
return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
|
return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
|
||||||
|
|
||||||
if 'DISABLE_VARS' not in key_tags:
|
if 'DISABLE_VARS' not in key_tags:
|
||||||
|
@ -168,8 +147,6 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
user=user,
|
user=user,
|
||||||
path=path,
|
path=path,
|
||||||
payload=payload,
|
payload=payload,
|
||||||
credits_cost=cost,
|
|
||||||
input_tokens=0,
|
|
||||||
incoming_request=incoming_request,
|
incoming_request=incoming_request,
|
||||||
),
|
),
|
||||||
media_type=media_type
|
media_type=media_type
|
||||||
|
|
|
@ -1,28 +1,19 @@
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from slowapi.util import get_remote_address
|
from slowapi.util import get_remote_address
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
async def get_ip(request) -> str:
|
def get_ip(request) -> str:
|
||||||
"""Get the IP address of the incoming request."""
|
"""Get the IP address of the incoming request."""
|
||||||
|
|
||||||
xff = None
|
detected_ip = request.headers.get('cf-connecting-ip', get_remote_address(request))
|
||||||
if request.headers.get('x-forwarded-for'):
|
|
||||||
xff, *_ = request.headers['x-forwarded-for'].split(', ')
|
|
||||||
|
|
||||||
possible_ips = [xff, request.headers.get('cf-connecting-ip'), request.client.host]
|
|
||||||
detected_ip = next((i for i in possible_ips if i), None)
|
|
||||||
|
|
||||||
return detected_ip
|
return detected_ip
|
||||||
|
|
||||||
def get_ratelimit_key(request) -> str:
|
def get_ratelimit_key(request) -> str:
|
||||||
"""Get the IP address of the incoming request."""
|
"""Get the IP address of the incoming request."""
|
||||||
custom = os.environ('NO_RATELIMIT_IPS')
|
|
||||||
ip = get_remote_address(request)
|
ip = get_ip(request)
|
||||||
|
return ip
|
||||||
if ip in custom:
|
|
||||||
return f'enterprise_{ip}'
|
|
||||||
|
|
||||||
return ip
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import time
|
||||||
import asyncio
|
import asyncio
|
||||||
import tiktoken
|
import tiktoken
|
||||||
|
|
||||||
async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int:
|
async def count_tokens_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> int:
|
||||||
"""Return the number of tokens used by a list of messages
|
"""Return the number of tokens used by a list of messages
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -38,13 +38,13 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') ->
|
||||||
tokens_per_name = -1 # if there's a name, the role is omitted
|
tokens_per_name = -1 # if there's a name, the role is omitted
|
||||||
|
|
||||||
elif 'gpt-3.5-turbo' in model:
|
elif 'gpt-3.5-turbo' in model:
|
||||||
return await count_for_messages(messages, model='gpt-3.5-turbo-0613')
|
return await count_tokens_for_messages(messages, model='gpt-3.5-turbo-0613')
|
||||||
|
|
||||||
elif 'gpt-4' in model:
|
elif 'gpt-4' in model:
|
||||||
return await count_for_messages(messages, model='gpt-4-0613')
|
return await count_tokens_for_messages(messages, model='gpt-4-0613')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}.
|
raise NotImplementedError(f"""count_tokens_for_messages() is not implemented for model {model}.
|
||||||
See https://github.com/openai/openai-python/blob/main/chatml.md
|
See https://github.com/openai/openai-python/blob/main/chatml.md
|
||||||
for information on how messages are converted to tokens.""")
|
for information on how messages are converted to tokens.""")
|
||||||
|
|
||||||
|
@ -66,8 +66,8 @@ if __name__ == '__main__':
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
'role': 'user',
|
'role': 'user',
|
||||||
'content': '1+1='
|
'content': 'Hi'
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
print(asyncio.run(count_for_messages(messages)))
|
print(asyncio.run(count_tokens_for_messages(messages)))
|
||||||
print(f'Took {(time.perf_counter() - start) * 1000}ms')
|
print(f'Took {(time.perf_counter() - start) * 1000}ms')
|
||||||
|
|
|
@ -17,6 +17,8 @@ from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||||
import core
|
import core
|
||||||
import handler
|
import handler
|
||||||
|
|
||||||
|
from helpers.network import get_ratelimit_key
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
app = fastapi.FastAPI()
|
app = fastapi.FastAPI()
|
||||||
|
@ -33,7 +35,7 @@ app.include_router(core.router)
|
||||||
|
|
||||||
limiter = Limiter(
|
limiter = Limiter(
|
||||||
swallow_errors=True,
|
swallow_errors=True,
|
||||||
key_func=get_remote_address,
|
key_func=get_ratelimit_key,
|
||||||
default_limits=[
|
default_limits=[
|
||||||
'2/second',
|
'2/second',
|
||||||
'30/minute',
|
'30/minute',
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
from . import ails, closed, closed4
|
from . import ails
|
||||||
MODULES = [ails, closed, closed4]
|
MODULES = [ails]
|
||||||
|
|
|
@ -22,7 +22,10 @@ async def main():
|
||||||
|
|
||||||
for file_name in os.listdir(os.path.dirname(__file__)):
|
for file_name in os.listdir(os.path.dirname(__file__)):
|
||||||
if file_name.endswith('.py') and not file_name.startswith('_'):
|
if file_name.endswith('.py') and not file_name.startswith('_'):
|
||||||
print(file_name.split('.')[0])
|
name = file_name.split('.')[0]
|
||||||
|
models = importlib.import_module(f'.{file_name.split(".")[0]}', 'providers').MODELS
|
||||||
|
|
||||||
|
print(f' {name} @ {", ".join(models)}')
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,8 @@ try:
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
from ...db import providerkeys
|
from ...db import providerkeys
|
||||||
|
|
||||||
|
# Sort the models by their value/cost/rarity.
|
||||||
|
|
||||||
GPT_3 = [
|
GPT_3 = [
|
||||||
'gpt-3.5-turbo',
|
'gpt-3.5-turbo',
|
||||||
'gpt-3.5-turbo-16k',
|
'gpt-3.5-turbo-16k',
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import yaml
|
||||||
import ujson
|
import ujson
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
|
@ -17,15 +18,16 @@ import load_balancing
|
||||||
|
|
||||||
from helpers import errors
|
from helpers import errors
|
||||||
from db import providerkeys
|
from db import providerkeys
|
||||||
|
from helpers.tokens import count_tokens_for_messages
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
|
CRITICAL_API_ERRORS = ['invalid_api_key', 'account_deactivated']
|
||||||
|
|
||||||
keymanager = providerkeys.manager
|
keymanager = providerkeys.manager
|
||||||
|
|
||||||
background_tasks: Set[asyncio.Task[Any]] = set()
|
background_tasks: Set[asyncio.Task[Any]] = set()
|
||||||
|
|
||||||
|
with open(os.path.join('config', 'config.yml'), encoding='utf8') as f:
|
||||||
|
config = yaml.safe_load(f)
|
||||||
|
|
||||||
def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
|
def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
|
||||||
"""asyncio.create_task, which prevents the task from being garbage collected.
|
"""asyncio.create_task, which prevents the task from being garbage collected.
|
||||||
|
@ -36,13 +38,10 @@ def create_background_task(coro: Coroutine[Any, Any, Any]) -> None:
|
||||||
background_tasks.add(task)
|
background_tasks.add(task)
|
||||||
task.add_done_callback(background_tasks.discard)
|
task.add_done_callback(background_tasks.discard)
|
||||||
|
|
||||||
|
|
||||||
async def respond(
|
async def respond(
|
||||||
path: str='/v1/chat/completions',
|
path: str='/v1/chat/completions',
|
||||||
user: dict=None,
|
user: dict=None,
|
||||||
payload: dict=None,
|
payload: dict=None,
|
||||||
credits_cost: int=0,
|
|
||||||
input_tokens: int=0,
|
|
||||||
incoming_request: starlette.requests.Request=None,
|
incoming_request: starlette.requests.Request=None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -72,6 +71,9 @@ async def respond(
|
||||||
'timeout': 0
|
'timeout': 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input_tokens = 0
|
||||||
|
output_tokens = 0
|
||||||
|
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
try:
|
try:
|
||||||
if is_chat:
|
if is_chat:
|
||||||
|
@ -161,9 +163,15 @@ async def respond(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
|
if is_chat and not is_stream:
|
||||||
|
input_tokens = client_json_response['usage']['prompt_tokens']
|
||||||
|
output_tokens = client_json_response['usage']['completion_tokens']
|
||||||
|
|
||||||
server_json_response = client_json_response
|
server_json_response = client_json_response
|
||||||
|
|
||||||
if is_stream:
|
if is_stream:
|
||||||
|
input_tokens = await count_tokens_for_messages(payload['messages'], model=model)
|
||||||
|
|
||||||
chunk_no = 0
|
chunk_no = 0
|
||||||
buffer = ''
|
buffer = ''
|
||||||
|
|
||||||
|
@ -175,7 +183,7 @@ async def respond(
|
||||||
if 'azure' in provider_name:
|
if 'azure' in provider_name:
|
||||||
chunk = chunk.replace('data: ', '', 1)
|
chunk = chunk.replace('data: ', '', 1)
|
||||||
|
|
||||||
if not chunk or chunk_no == 1:
|
if not chunk.strip() or chunk_no == 1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
subchunks = chunk.split('\n\n')
|
subchunks = chunk.split('\n\n')
|
||||||
|
@ -188,6 +196,8 @@ async def respond(
|
||||||
yield subchunk + '\n\n'
|
yield subchunk + '\n\n'
|
||||||
|
|
||||||
buffer = subchunks[-1]
|
buffer = subchunks[-1]
|
||||||
|
|
||||||
|
output_tokens = chunk_no
|
||||||
break
|
break
|
||||||
|
|
||||||
except aiohttp.client_exceptions.ServerTimeoutError:
|
except aiohttp.client_exceptions.ServerTimeoutError:
|
||||||
|
@ -198,7 +208,7 @@ async def respond(
|
||||||
skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
|
skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
|
||||||
skipped_errors = ujson.dumps(skipped_errors, indent=4)
|
skipped_errors = ujson.dumps(skipped_errors, indent=4)
|
||||||
yield await errors.yield_error(500,
|
yield await errors.yield_error(500,
|
||||||
'Sorry, our API seems to have issues connecting to our provider(s).',
|
f'Sorry, our API seems to have issues connecting to "{model}".',
|
||||||
f'Please send this info to support: {skipped_errors}'
|
f'Please send this info to support: {skipped_errors}'
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
@ -206,13 +216,51 @@ async def respond(
|
||||||
if (not is_stream) and server_json_response:
|
if (not is_stream) and server_json_response:
|
||||||
yield json.dumps(server_json_response)
|
yield json.dumps(server_json_response)
|
||||||
|
|
||||||
|
|
||||||
|
role = user.get('role', 'default')
|
||||||
|
|
||||||
|
model_multipliers = config['costs']
|
||||||
|
model_multiplier = model_multipliers['other']
|
||||||
|
|
||||||
|
if is_chat:
|
||||||
|
model_multiplier = model_multipliers['chat-models'].get(payload.get('model'), model_multiplier)
|
||||||
|
total_tokens = input_tokens + output_tokens
|
||||||
|
credits_cost = total_tokens / 60
|
||||||
|
credits_cost = round(credits_cost * model_multiplier)
|
||||||
|
|
||||||
|
if credits_cost < 1:
|
||||||
|
credits_cost = 1
|
||||||
|
|
||||||
|
tokens = {
|
||||||
|
'input': input_tokens,
|
||||||
|
'output': output_tokens,
|
||||||
|
'total': total_tokens
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
credits_cost = 5
|
||||||
|
tokens = {
|
||||||
|
'input': 0,
|
||||||
|
'output': 0,
|
||||||
|
'total': credits_cost
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
role_cost_multiplier = config['roles'][role]['bonus']
|
||||||
|
except KeyError:
|
||||||
|
role_cost_multiplier = 1
|
||||||
|
|
||||||
|
credits_cost = round(credits_cost * role_cost_multiplier)
|
||||||
|
|
||||||
|
print(f'[bold]Credits cost[/bold]: {credits_cost}')
|
||||||
|
|
||||||
create_background_task(
|
create_background_task(
|
||||||
after_request.after_request(
|
after_request.after_request(
|
||||||
|
provider=provider_name,
|
||||||
incoming_request=incoming_request,
|
incoming_request=incoming_request,
|
||||||
target_request=target_request,
|
target_request=target_request,
|
||||||
user=user,
|
user=user,
|
||||||
credits_cost=credits_cost,
|
credits_cost=credits_cost,
|
||||||
input_tokens=input_tokens,
|
tokens=tokens,
|
||||||
path=path,
|
path=path,
|
||||||
is_chat=is_chat,
|
is_chat=is_chat,
|
||||||
model=model,
|
model=model,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
roles = {
|
roles = {
|
||||||
'': '2500',
|
'': '2000',
|
||||||
'lvl10': '2800',
|
'lvl10': '2800',
|
||||||
'lvl20': '3100',
|
'lvl20': '3100',
|
||||||
'lvl30': '3400',
|
'lvl30': '3400',
|
||||||
|
|
Loading…
Reference in a new issue