mirror of
https://github.com/NovaOSS/nova-api.git
synced 2024-11-25 14:23:57 +01:00
some stuff
This commit is contained in:
parent
23a904f3ce
commit
d52aadd034
|
@ -167,7 +167,7 @@ You can also just add the *beginning* of an API address, like `12.123.` (without
|
||||||
|
|
||||||
### Core Keys
|
### Core Keys
|
||||||
`CORE_API_KEY` specifies the **very secret key** for which need to access the entire user database etc.
|
`CORE_API_KEY` specifies the **very secret key** for which need to access the entire user database etc.
|
||||||
`TEST_NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits.
|
`NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits.
|
||||||
|
|
||||||
### Webhooks
|
### Webhooks
|
||||||
`DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created.
|
`DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created.
|
||||||
|
|
|
@ -130,7 +130,7 @@ async def run_checks(incoming_request: fastapi.Request):
|
||||||
checks.client.test_chat_non_stream_gpt4,
|
checks.client.test_chat_non_stream_gpt4,
|
||||||
checks.client.test_chat_stream_gpt3,
|
checks.client.test_chat_stream_gpt3,
|
||||||
checks.client.test_function_calling,
|
checks.client.test_function_calling,
|
||||||
checks.client.test_image_generation,
|
# checks.client.test_image_generation,
|
||||||
# checks.client.test_speech_to_text,
|
# checks.client.test_speech_to_text,
|
||||||
checks.client.test_models
|
checks.client.test_models
|
||||||
]
|
]
|
||||||
|
|
|
@ -38,7 +38,10 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
|
|
||||||
ip_address = await network.get_ip(incoming_request)
|
ip_address = await network.get_ip(incoming_request)
|
||||||
|
|
||||||
if '/models' in path:
|
if '/dashboard' in path:
|
||||||
|
return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
|
||||||
|
|
||||||
|
if path.startswith('/v1/models'):
|
||||||
return fastapi.responses.JSONResponse(content=models_list)
|
return fastapi.responses.JSONResponse(content=models_list)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -94,7 +97,6 @@ async def handle(incoming_request: fastapi.Request):
|
||||||
if user['credits'] < cost:
|
if user['credits'] < cost:
|
||||||
return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
|
return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
|
||||||
|
|
||||||
|
|
||||||
if 'DISABLE_VARS' not in key_tags:
|
if 'DISABLE_VARS' not in key_tags:
|
||||||
payload_with_vars = json.dumps(payload)
|
payload_with_vars = json.dumps(payload)
|
||||||
|
|
||||||
|
|
|
@ -38,10 +38,10 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') ->
|
||||||
tokens_per_name = -1 # if there's a name, the role is omitted
|
tokens_per_name = -1 # if there's a name, the role is omitted
|
||||||
|
|
||||||
elif 'gpt-3.5-turbo' in model:
|
elif 'gpt-3.5-turbo' in model:
|
||||||
return count_for_messages(messages, model='gpt-3.5-turbo-0613')
|
return await count_for_messages(messages, model='gpt-3.5-turbo-0613')
|
||||||
|
|
||||||
elif 'gpt-4' in model:
|
elif 'gpt-4' in model:
|
||||||
return count_for_messages(messages, model='gpt-4-0613')
|
return await count_for_messages(messages, model='gpt-4-0613')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}.
|
raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}.
|
||||||
|
|
|
@ -7,10 +7,10 @@ from rich import print
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from bson.objectid import ObjectId
|
from bson.objectid import ObjectId
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from slowapi.errors import RateLimitExceeded
|
from slowapi.errors import RateLimitExceeded
|
||||||
from slowapi.middleware import SlowAPIMiddleware
|
from slowapi.middleware import SlowAPIMiddleware
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
from slowapi.util import get_remote_address
|
from slowapi.util import get_remote_address
|
||||||
from slowapi import Limiter, _rate_limit_exceeded_handler
|
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,2 @@
|
||||||
from . import \
|
from . import azure, webraft
|
||||||
azure \
|
MODULES = [azure, webraft]
|
||||||
# closed, \
|
|
||||||
# closed4
|
|
||||||
# closed432
|
|
||||||
|
|
||||||
MODULES = [
|
|
||||||
azure,
|
|
||||||
# closed,
|
|
||||||
# closed4,
|
|
||||||
# closed432,
|
|
||||||
]
|
|
||||||
|
|
|
@ -2,7 +2,6 @@ from .helpers import utils
|
||||||
|
|
||||||
AUTH = True
|
AUTH = True
|
||||||
ORGANIC = False
|
ORGANIC = False
|
||||||
CONTEXT = True
|
|
||||||
STREAMING = True
|
STREAMING = True
|
||||||
MODERATIONS = False
|
MODERATIONS = False
|
||||||
ENDPOINT = 'https://nova-00001.openai.azure.com'
|
ENDPOINT = 'https://nova-00001.openai.azure.com'
|
||||||
|
@ -12,7 +11,7 @@ MODELS = [
|
||||||
'gpt-4',
|
'gpt-4',
|
||||||
'gpt-4-32k'
|
'gpt-4-32k'
|
||||||
]
|
]
|
||||||
# MODELS = [f'{model}-azure' for model in MODELS]
|
MODELS += [f'{model}-azure' for model in MODELS]
|
||||||
|
|
||||||
AZURE_API = '2023-08-01-preview'
|
AZURE_API = '2023-08-01-preview'
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@ from .helpers import utils
|
||||||
|
|
||||||
AUTH = True
|
AUTH = True
|
||||||
ORGANIC = True
|
ORGANIC = True
|
||||||
CONTEXT = True
|
|
||||||
STREAMING = True
|
STREAMING = True
|
||||||
MODERATIONS = True
|
MODERATIONS = True
|
||||||
ENDPOINT = 'https://api.openai.com'
|
ENDPOINT = 'https://api.openai.com'
|
||||||
|
|
|
@ -2,7 +2,6 @@ from .helpers import utils
|
||||||
|
|
||||||
AUTH = True
|
AUTH = True
|
||||||
ORGANIC = False
|
ORGANIC = False
|
||||||
CONTEXT = True
|
|
||||||
STREAMING = True
|
STREAMING = True
|
||||||
MODERATIONS = True
|
MODERATIONS = True
|
||||||
ENDPOINT = 'https://api.openai.com'
|
ENDPOINT = 'https://api.openai.com'
|
||||||
|
|
|
@ -2,7 +2,6 @@ from .helpers import utils
|
||||||
|
|
||||||
AUTH = True
|
AUTH = True
|
||||||
ORGANIC = False
|
ORGANIC = False
|
||||||
CONTEXT = True
|
|
||||||
STREAMING = True
|
STREAMING = True
|
||||||
MODERATIONS = False
|
MODERATIONS = False
|
||||||
ENDPOINT = 'https://api.openai.com'
|
ENDPOINT = 'https://api.openai.com'
|
||||||
|
|
|
@ -23,15 +23,5 @@ GPT_4_32K = GPT_4 + [
|
||||||
'gpt-4-32k-0613',
|
'gpt-4-32k-0613',
|
||||||
]
|
]
|
||||||
|
|
||||||
async def conversation_to_prompt(conversation: list) -> str:
|
|
||||||
text = ''
|
|
||||||
|
|
||||||
for message in conversation:
|
|
||||||
text += f'<|{message["role"]}|>: {message["content"]}\n'
|
|
||||||
|
|
||||||
text += '<|assistant|>:'
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
async def random_secret_for(name: str) -> str:
|
async def random_secret_for(name: str) -> str:
|
||||||
return await providerkeys.manager.get_key(name)
|
return await providerkeys.manager.get_key(name)
|
||||||
|
|
|
@ -2,7 +2,6 @@ from .helpers import utils
|
||||||
|
|
||||||
AUTH = True
|
AUTH = True
|
||||||
ORGANIC = False
|
ORGANIC = False
|
||||||
CONTEXT = True
|
|
||||||
STREAMING = True
|
STREAMING = True
|
||||||
MODELS = ['llama-2-7b-chat']
|
MODELS = ['llama-2-7b-chat']
|
||||||
|
|
||||||
|
@ -12,7 +11,7 @@ async def chat_completion(**kwargs):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'method': 'POST',
|
'method': 'POST',
|
||||||
'url': f'https://api.mandrillai.tech/v1/chat/completions',
|
'url': 'https://api.mandrillai.tech/v1/chat/completions',
|
||||||
'payload': payload,
|
'payload': payload,
|
||||||
'headers': {
|
'headers': {
|
||||||
'Authorization': f'Bearer {key}'
|
'Authorization': f'Bearer {key}'
|
||||||
|
|
25
api/providers/webraft.py
Normal file
25
api/providers/webraft.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
from .helpers import utils
|
||||||
|
|
||||||
|
AUTH = True
|
||||||
|
ORGANIC = False
|
||||||
|
STREAMING = True
|
||||||
|
MODELS = [
|
||||||
|
'gpt-3.5-turbo-0613',
|
||||||
|
'gpt-3.5-turbo-0301',
|
||||||
|
'gpt-3.5-turbo-16k-0613'
|
||||||
|
]
|
||||||
|
|
||||||
|
async def chat_completion(**kwargs):
|
||||||
|
payload = kwargs
|
||||||
|
key = await utils.random_secret_for('webraft')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'method': 'POST',
|
||||||
|
'url': 'https://thirdparty.webraft.in/v1/chat/completions',
|
||||||
|
'payload': payload,
|
||||||
|
'headers': {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {key}'
|
||||||
|
},
|
||||||
|
'provider_auth': f'webraft>{key}'
|
||||||
|
}
|
|
@ -49,7 +49,14 @@ async def respond(
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in range(5):
|
skipped_errors = {
|
||||||
|
'insufficient_quota': 0,
|
||||||
|
'billing_not_active': 0,
|
||||||
|
'critical_provider_error': 0,
|
||||||
|
'timeout': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for _ in range(5):
|
||||||
try:
|
try:
|
||||||
if is_chat:
|
if is_chat:
|
||||||
target_request = await load_balancing.balance_chat_request(payload)
|
target_request = await load_balancing.balance_chat_request(payload)
|
||||||
|
@ -116,11 +123,13 @@ async def respond(
|
||||||
if error_code == 'insufficient_quota':
|
if error_code == 'insufficient_quota':
|
||||||
print('[!] insufficient quota')
|
print('[!] insufficient quota')
|
||||||
await keymanager.rate_limit_key(provider_name, provider_key, 86400)
|
await keymanager.rate_limit_key(provider_name, provider_key, 86400)
|
||||||
|
skipped_errors['insufficient_quota'] += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if error_code == 'billing_not_active':
|
if error_code == 'billing_not_active':
|
||||||
print('[!] billing not active')
|
print('[!] billing not active')
|
||||||
await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active')
|
await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active')
|
||||||
|
skipped_errors['billing_not_active'] += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
critical_error = False
|
critical_error = False
|
||||||
|
@ -128,25 +137,16 @@ async def respond(
|
||||||
if error in str(client_json_response):
|
if error in str(client_json_response):
|
||||||
await keymanager.deactivate_key(provider_name, provider_key, error)
|
await keymanager.deactivate_key(provider_name, provider_key, error)
|
||||||
critical_error = True
|
critical_error = True
|
||||||
|
|
||||||
if critical_error:
|
if critical_error:
|
||||||
print('[!] critical error')
|
print('[!] critical provider error')
|
||||||
|
skipped_errors['critical_provider_error'] += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
server_json_response = client_json_response
|
server_json_response = client_json_response
|
||||||
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if is_stream:
|
if is_stream:
|
||||||
try:
|
|
||||||
response.raise_for_status()
|
|
||||||
except Exception as exc:
|
|
||||||
if 'Too Many Requests' in str(exc):
|
|
||||||
print('[!] too many requests')
|
|
||||||
continue
|
|
||||||
|
|
||||||
chunk_no = 0
|
chunk_no = 0
|
||||||
buffer = ''
|
buffer = ''
|
||||||
|
|
||||||
|
@ -156,7 +156,7 @@ async def respond(
|
||||||
chunk = chunk.decode('utf8')
|
chunk = chunk.decode('utf8')
|
||||||
|
|
||||||
if 'azure' in provider_name:
|
if 'azure' in provider_name:
|
||||||
chunk = chunk.replace('data: ', '')
|
chunk = chunk.replace('data: ', '', 1)
|
||||||
|
|
||||||
if not chunk or chunk_no == 1:
|
if not chunk or chunk_no == 1:
|
||||||
continue
|
continue
|
||||||
|
@ -164,19 +164,26 @@ async def respond(
|
||||||
subchunks = chunk.split('\n\n')
|
subchunks = chunk.split('\n\n')
|
||||||
buffer += subchunks[0]
|
buffer += subchunks[0]
|
||||||
|
|
||||||
yield buffer + '\n\n'
|
for subchunk in [buffer] + subchunks[1:-1]:
|
||||||
buffer = subchunks[-1]
|
if not subchunk.startswith('data: '):
|
||||||
|
subchunk = 'data: ' + subchunk
|
||||||
|
|
||||||
for subchunk in subchunks[1:-1]:
|
|
||||||
yield subchunk + '\n\n'
|
yield subchunk + '\n\n'
|
||||||
|
|
||||||
|
buffer = subchunks[-1]
|
||||||
break
|
break
|
||||||
|
|
||||||
except aiohttp.client_exceptions.ServerTimeoutError:
|
except aiohttp.client_exceptions.ServerTimeoutError:
|
||||||
|
skipped_errors['timeout'] += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
yield await errors.yield_error(500, 'Sorry, our API seems to have issues connecting to our provider(s).', 'This most likely isn\'t your fault. Please try again later.')
|
skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
|
||||||
|
skipped_errors = ujson.dumps(skipped_errors, indent=4)
|
||||||
|
yield await errors.yield_error(500,
|
||||||
|
'Sorry, our API seems to have issues connecting to our provider(s).',
|
||||||
|
f'Please send this info to support: {skipped_errors}'
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
if (not is_stream) and server_json_response:
|
if (not is_stream) and server_json_response:
|
||||||
|
|
|
@ -100,7 +100,7 @@ async def test_chat_stream_gpt3() -> float:
|
||||||
|
|
||||||
async for chunk in response.aiter_text():
|
async for chunk in response.aiter_text():
|
||||||
for subchunk in chunk.split('\n\n'):
|
for subchunk in chunk.split('\n\n'):
|
||||||
chunk = subchunk.replace('data: ', '').strip()
|
chunk = subchunk.replace('data: ', '', 1).strip()
|
||||||
|
|
||||||
if chunk == '[DONE]':
|
if chunk == '[DONE]':
|
||||||
break
|
break
|
||||||
|
|
|
@ -14,7 +14,6 @@ Runs for production on the speicified port.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
|
|
||||||
port = sys.argv[1] if len(sys.argv) > 1 else 2332
|
port = sys.argv[1] if len(sys.argv) > 1 else 2332
|
||||||
dev = True
|
dev = True
|
||||||
|
|
Loading…
Reference in a new issue