some stuff

This commit is contained in:
nsde 2023-10-09 19:09:01 +02:00
parent 23a904f3ce
commit d52aadd034
16 changed files with 64 additions and 56 deletions

View file

@ -167,7 +167,7 @@ You can also just add the *beginning* of an API address, like `12.123.` (without
### Core Keys ### Core Keys
`CORE_API_KEY` specifies the **very secret key** for which need to access the entire user database etc. `CORE_API_KEY` specifies the **very secret key** for which need to access the entire user database etc.
`TEST_NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits. `NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits.
### Webhooks ### Webhooks
`DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created. `DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created.

View file

@ -130,7 +130,7 @@ async def run_checks(incoming_request: fastapi.Request):
checks.client.test_chat_non_stream_gpt4, checks.client.test_chat_non_stream_gpt4,
checks.client.test_chat_stream_gpt3, checks.client.test_chat_stream_gpt3,
checks.client.test_function_calling, checks.client.test_function_calling,
checks.client.test_image_generation, # checks.client.test_image_generation,
# checks.client.test_speech_to_text, # checks.client.test_speech_to_text,
checks.client.test_models checks.client.test_models
] ]

View file

@ -38,7 +38,10 @@ async def handle(incoming_request: fastapi.Request):
ip_address = await network.get_ip(incoming_request) ip_address = await network.get_ip(incoming_request)
if '/models' in path: if '/dashboard' in path:
return errors.error(404, 'You can\'t access /dashboard.', 'This is a private endpoint.')
if path.startswith('/v1/models'):
return fastapi.responses.JSONResponse(content=models_list) return fastapi.responses.JSONResponse(content=models_list)
try: try:
@ -94,7 +97,6 @@ async def handle(incoming_request: fastapi.Request):
if user['credits'] < cost: if user['credits'] < cost:
return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.') return await errors.error(429, 'Not enough credits.', 'Wait or earn more credits. Learn more on our website or Discord server.')
if 'DISABLE_VARS' not in key_tags: if 'DISABLE_VARS' not in key_tags:
payload_with_vars = json.dumps(payload) payload_with_vars = json.dumps(payload)

View file

@ -38,10 +38,10 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') ->
tokens_per_name = -1 # if there's a name, the role is omitted tokens_per_name = -1 # if there's a name, the role is omitted
elif 'gpt-3.5-turbo' in model: elif 'gpt-3.5-turbo' in model:
return count_for_messages(messages, model='gpt-3.5-turbo-0613') return await count_for_messages(messages, model='gpt-3.5-turbo-0613')
elif 'gpt-4' in model: elif 'gpt-4' in model:
return count_for_messages(messages, model='gpt-4-0613') return await count_for_messages(messages, model='gpt-4-0613')
else: else:
raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}. raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}.

View file

@ -7,10 +7,10 @@ from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
from bson.objectid import ObjectId from bson.objectid import ObjectId
from fastapi.middleware.cors import CORSMiddleware
from slowapi.errors import RateLimitExceeded from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware from slowapi.middleware import SlowAPIMiddleware
from fastapi.middleware.cors import CORSMiddleware
from slowapi.util import get_remote_address from slowapi.util import get_remote_address
from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi import Limiter, _rate_limit_exceeded_handler

View file

@ -1,12 +1,2 @@
from . import \ from . import azure, webraft
azure \ MODULES = [azure, webraft]
# closed, \
# closed4
# closed432
MODULES = [
azure,
# closed,
# closed4,
# closed432,
]

View file

@ -2,7 +2,6 @@ from .helpers import utils
AUTH = True AUTH = True
ORGANIC = False ORGANIC = False
CONTEXT = True
STREAMING = True STREAMING = True
MODERATIONS = False MODERATIONS = False
ENDPOINT = 'https://nova-00001.openai.azure.com' ENDPOINT = 'https://nova-00001.openai.azure.com'
@ -12,7 +11,7 @@ MODELS = [
'gpt-4', 'gpt-4',
'gpt-4-32k' 'gpt-4-32k'
] ]
# MODELS = [f'{model}-azure' for model in MODELS] MODELS += [f'{model}-azure' for model in MODELS]
AZURE_API = '2023-08-01-preview' AZURE_API = '2023-08-01-preview'

View file

@ -2,7 +2,6 @@ from .helpers import utils
AUTH = True AUTH = True
ORGANIC = True ORGANIC = True
CONTEXT = True
STREAMING = True STREAMING = True
MODERATIONS = True MODERATIONS = True
ENDPOINT = 'https://api.openai.com' ENDPOINT = 'https://api.openai.com'

View file

@ -2,7 +2,6 @@ from .helpers import utils
AUTH = True AUTH = True
ORGANIC = False ORGANIC = False
CONTEXT = True
STREAMING = True STREAMING = True
MODERATIONS = True MODERATIONS = True
ENDPOINT = 'https://api.openai.com' ENDPOINT = 'https://api.openai.com'

View file

@ -2,7 +2,6 @@ from .helpers import utils
AUTH = True AUTH = True
ORGANIC = False ORGANIC = False
CONTEXT = True
STREAMING = True STREAMING = True
MODERATIONS = False MODERATIONS = False
ENDPOINT = 'https://api.openai.com' ENDPOINT = 'https://api.openai.com'

View file

@ -23,15 +23,5 @@ GPT_4_32K = GPT_4 + [
'gpt-4-32k-0613', 'gpt-4-32k-0613',
] ]
async def conversation_to_prompt(conversation: list) -> str:
text = ''
for message in conversation:
text += f'<|{message["role"]}|>: {message["content"]}\n'
text += '<|assistant|>:'
return text
async def random_secret_for(name: str) -> str: async def random_secret_for(name: str) -> str:
return await providerkeys.manager.get_key(name) return await providerkeys.manager.get_key(name)

View file

@ -2,7 +2,6 @@ from .helpers import utils
AUTH = True AUTH = True
ORGANIC = False ORGANIC = False
CONTEXT = True
STREAMING = True STREAMING = True
MODELS = ['llama-2-7b-chat'] MODELS = ['llama-2-7b-chat']
@ -12,7 +11,7 @@ async def chat_completion(**kwargs):
return { return {
'method': 'POST', 'method': 'POST',
'url': f'https://api.mandrillai.tech/v1/chat/completions', 'url': 'https://api.mandrillai.tech/v1/chat/completions',
'payload': payload, 'payload': payload,
'headers': { 'headers': {
'Authorization': f'Bearer {key}' 'Authorization': f'Bearer {key}'

25
api/providers/webraft.py Normal file
View file

@ -0,0 +1,25 @@
from .helpers import utils
AUTH = True
ORGANIC = False
STREAMING = True
MODELS = [
'gpt-3.5-turbo-0613',
'gpt-3.5-turbo-0301',
'gpt-3.5-turbo-16k-0613'
]
async def chat_completion(**kwargs):
payload = kwargs
key = await utils.random_secret_for('webraft')
return {
'method': 'POST',
'url': 'https://thirdparty.webraft.in/v1/chat/completions',
'payload': payload,
'headers': {
'Content-Type': 'application/json',
'Authorization': f'Bearer {key}'
},
'provider_auth': f'webraft>{key}'
}

View file

@ -49,7 +49,14 @@ async def respond(
'Content-Type': 'application/json' 'Content-Type': 'application/json'
} }
for i in range(5): skipped_errors = {
'insufficient_quota': 0,
'billing_not_active': 0,
'critical_provider_error': 0,
'timeout': 0
}
for _ in range(5):
try: try:
if is_chat: if is_chat:
target_request = await load_balancing.balance_chat_request(payload) target_request = await load_balancing.balance_chat_request(payload)
@ -116,11 +123,13 @@ async def respond(
if error_code == 'insufficient_quota': if error_code == 'insufficient_quota':
print('[!] insufficient quota') print('[!] insufficient quota')
await keymanager.rate_limit_key(provider_name, provider_key, 86400) await keymanager.rate_limit_key(provider_name, provider_key, 86400)
skipped_errors['insufficient_quota'] += 1
continue continue
if error_code == 'billing_not_active': if error_code == 'billing_not_active':
print('[!] billing not active') print('[!] billing not active')
await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active') await keymanager.deactivate_key(provider_name, provider_key, 'billing_not_active')
skipped_errors['billing_not_active'] += 1
continue continue
critical_error = False critical_error = False
@ -128,25 +137,16 @@ async def respond(
if error in str(client_json_response): if error in str(client_json_response):
await keymanager.deactivate_key(provider_name, provider_key, error) await keymanager.deactivate_key(provider_name, provider_key, error)
critical_error = True critical_error = True
if critical_error: if critical_error:
print('[!] critical error') print('[!] critical provider error')
skipped_errors['critical_provider_error'] += 1
continue continue
if response.ok: if response.ok:
server_json_response = client_json_response server_json_response = client_json_response
else:
continue
if is_stream: if is_stream:
try:
response.raise_for_status()
except Exception as exc:
if 'Too Many Requests' in str(exc):
print('[!] too many requests')
continue
chunk_no = 0 chunk_no = 0
buffer = '' buffer = ''
@ -156,7 +156,7 @@ async def respond(
chunk = chunk.decode('utf8') chunk = chunk.decode('utf8')
if 'azure' in provider_name: if 'azure' in provider_name:
chunk = chunk.replace('data: ', '') chunk = chunk.replace('data: ', '', 1)
if not chunk or chunk_no == 1: if not chunk or chunk_no == 1:
continue continue
@ -164,19 +164,26 @@ async def respond(
subchunks = chunk.split('\n\n') subchunks = chunk.split('\n\n')
buffer += subchunks[0] buffer += subchunks[0]
yield buffer + '\n\n' for subchunk in [buffer] + subchunks[1:-1]:
buffer = subchunks[-1] if not subchunk.startswith('data: '):
subchunk = 'data: ' + subchunk
for subchunk in subchunks[1:-1]:
yield subchunk + '\n\n' yield subchunk + '\n\n'
buffer = subchunks[-1]
break break
except aiohttp.client_exceptions.ServerTimeoutError: except aiohttp.client_exceptions.ServerTimeoutError:
skipped_errors['timeout'] += 1
continue continue
else: else:
yield await errors.yield_error(500, 'Sorry, our API seems to have issues connecting to our provider(s).', 'This most likely isn\'t your fault. Please try again later.') skipped_errors = {k: v for k, v in skipped_errors.items() if v > 0}
skipped_errors = ujson.dumps(skipped_errors, indent=4)
yield await errors.yield_error(500,
'Sorry, our API seems to have issues connecting to our provider(s).',
f'Please send this info to support: {skipped_errors}'
)
return return
if (not is_stream) and server_json_response: if (not is_stream) and server_json_response:

View file

@ -100,7 +100,7 @@ async def test_chat_stream_gpt3() -> float:
async for chunk in response.aiter_text(): async for chunk in response.aiter_text():
for subchunk in chunk.split('\n\n'): for subchunk in chunk.split('\n\n'):
chunk = subchunk.replace('data: ', '').strip() chunk = subchunk.replace('data: ', '', 1).strip()
if chunk == '[DONE]': if chunk == '[DONE]':
break break

View file

@ -14,7 +14,6 @@ Runs for production on the speicified port.
import os import os
import sys import sys
import time
port = sys.argv[1] if len(sys.argv) > 1 else 2332 port = sys.argv[1] if len(sys.argv) > 1 else 2332
dev = True dev = True