Compare commits

...

4 commits

Author SHA1 Message Date
nsde 2f2e8512eb Removed unneccessary stuff 2023-09-14 22:45:57 +02:00
nsde ed9ecceb77 Fixed checks 2023-09-14 22:37:29 +02:00
nsde e06073fba5 Fixed ObjectId issue 2023-09-14 20:43:24 +02:00
nsde 8b325d6b81 Fixed function calling 2023-09-14 18:18:19 +02:00
11 changed files with 231 additions and 152 deletions

View file

@ -20,8 +20,6 @@ We aim to fix that! NovaAI provides several AI models for you to use for free.
###### *I founded FoxGPT (called *NovaGPT* back then) ###### *I founded FoxGPT (called *NovaGPT* back then)
Old, slow, deprecated* FoxGPT vs new NovaAI repository star count:
<a href="https://star-history.com/#NovaOSS/nova-api&FoxGPT/gpt&Date"> <a href="https://star-history.com/#NovaOSS/nova-api&FoxGPT/gpt&Date">
<picture> <picture>
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=NovaOSS/nova-api,FoxGPT/gpt&type=Date&theme=dark" /> <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=NovaOSS/nova-api,FoxGPT/gpt&type=Date&theme=dark" />
@ -30,8 +28,6 @@ Old, slow, deprecated* FoxGPT vs new NovaAI repository star count:
</picture> </picture>
</a> </a>
<img alt="'Emotional damage' meme, with a man with a worried face and the yellow caption 'emotional damage'" src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Findianmemetemplates.com%2Fwp-content%2Fuploads%2Femotional-damage-1024x575.jpg&f=1&nofb=1&ipt=b325721ee0a7b9e11603a9bd484c8042b82e1704e639887107c6ce3e0d9b389e&ipo=images" height=100>
## NovaOSS APIs ## NovaOSS APIs
Our infrastructure might seem a bit confusing, but it's actually quite simple. Just the first one really matters for you, if you want to access our AI API. The other ones are just for the team. Our infrastructure might seem a bit confusing, but it's actually quite simple. Just the first one really matters for you, if you want to access our AI API. The other ones are just for the team.

View file

@ -1,30 +0,0 @@
import json
from helpers import chat
async def process_chunks(
chunks,
is_chat: bool,
chat_id: int,
target_request: dict,
model: str=None,
):
"""This function processes the response chunks from the providers and yields them.
"""
async for chunk in chunks:
chunk = chunk.decode("utf8").strip()
send = False
if is_chat and '{' in chunk:
data = json.loads(chunk.split('data: ')[1])
chunk = chunk.replace(data['id'], chat_id)
send = True
if target_request['module'] == 'twa' and data.get('text'):
chunk = await chat.create_chat_chunk(chat_id=chat_id, model=model, content=['text'])
if (not data['choices'][0]['delta']) or data['choices'][0]['delta'] == {'role': 'assistant'}:
send = False
if send and chunk:
yield chunk + '\n\n'

View file

@ -50,6 +50,9 @@ async def get_users(discord_id: int, incoming_request: fastapi.Request):
if not user: if not user:
return await errors.error(404, 'Discord user not found in the API database.', 'Check the `discord_id` parameter.') return await errors.error(404, 'Discord user not found in the API database.', 'Check the `discord_id` parameter.')
# turn the ObjectId into a string
user['_id'] = str(user['_id'])
return user return user
async def new_user_webhook(user: dict) -> None: async def new_user_webhook(user: dict) -> None:
@ -90,6 +93,8 @@ async def create_user(incoming_request: fastapi.Request):
user = await manager.create(discord_id) user = await manager.create(discord_id)
await new_user_webhook(user) await new_user_webhook(user)
user['_id'] = str(user['_id'])
return user return user
@router.put('/users') @router.put('/users')
@ -126,24 +131,23 @@ async def run_checks(incoming_request: fastapi.Request):
if auth_error: if auth_error:
return auth_error return auth_error
try: results = {}
chat = await checks.client.test_chat()
except Exception as exc:
print(exc)
chat = None
try: funcs = [
moderation = await checks.client.test_api_moderation() checks.client.test_chat_non_stream_gpt4,
except Exception: checks.client.test_chat_stream_gpt3,
moderation = None checks.client.test_function_calling,
checks.client.test_image_generation,
# checks.client.test_speech_to_text,
checks.client.test_models
]
try: for func in funcs:
models = await checks.client.test_models() try:
except Exception: result = await func()
models = None except Exception as exc:
results[func.__name__] = str(exc)
else:
results[func.__name__] = result
return { return results
'chat/completions': chat,
'models': models,
'moderations': moderation,
}

View file

@ -124,7 +124,14 @@ async def handle(incoming_request: fastapi.Request):
inp = payload.get('input', payload.get('prompt', '')) inp = payload.get('input', payload.get('prompt', ''))
if isinstance(payload.get('messages'), list): if isinstance(payload.get('messages'), list):
inp = '\n'.join([message['content'] for message in payload['messages']]) inp = ''
for message in payload.get('messages', []):
if message.get('role') == 'user':
inp += message.get('content', '') + '\n'
if 'functions' in payload:
inp += '\n'.join([function.get('description', '') for function in payload.get('functions', [])])
if inp and len(inp) > 2 and not inp.isnumeric(): if inp and len(inp) > 2 and not inp.isnumeric():
policy_violation = await moderation.is_policy_violated(inp) policy_violation = await moderation.is_policy_violated(inp)
@ -148,7 +155,7 @@ async def handle(incoming_request: fastapi.Request):
path=path, path=path,
payload=payload, payload=payload,
credits_cost=cost, credits_cost=cost,
input_tokens=-1, input_tokens=0,
incoming_request=incoming_request, incoming_request=incoming_request,
), ),
media_type=media_type media_type=media_type

View file

@ -2,12 +2,12 @@
import fastapi import fastapi
import pydantic import pydantic
import functools
from rich import print from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
from json import JSONDecodeError
from bson.objectid import ObjectId from bson.objectid import ObjectId
from slowapi.errors import RateLimitExceeded from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware from slowapi.middleware import SlowAPIMiddleware
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
@ -17,7 +17,6 @@ from helpers import network
import core import core
import handler import handler
import moderation
load_dotenv() load_dotenv()
@ -66,17 +65,5 @@ async def root():
@app.route('/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH']) @app.route('/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
async def v1_handler(request: fastapi.Request): async def v1_handler(request: fastapi.Request):
res = await handler.handle(request) res = await handler.handle(incoming_request=request)
return res return res
@functools.lru_cache()
@app.post('/moderate')
async def moderate(request: fastapi.Request):
try:
prompt = await request.json()
prompt = prompt['text']
except (KeyError, JSONDecodeError):
return fastapi.Response(status_code=400)
result = await moderation.is_policy_violated__own_model(prompt)
return result or ''

View file

@ -11,7 +11,6 @@ import starlette
from rich import print from rich import print
from dotenv import load_dotenv from dotenv import load_dotenv
import chunks
import proxies import proxies
import provider_auth import provider_auth
import after_request import after_request
@ -21,24 +20,6 @@ from helpers import network, chat, errors
load_dotenv() load_dotenv()
## Loads config which contains rate limits
with open('config/config.yml', encoding='utf8') as f:
config = yaml.safe_load(f)
## Where all rate limit requested data will be stored.
# Rate limit data is **not persistent** (It will be deleted on server stop/restart).
user_last_request_time = {}
DEMO_PAYLOAD = {
'model': 'gpt-3.5-turbo',
'messages': [
{
'role': 'user',
'content': '1+1='
}
]
}
async def respond( async def respond(
path: str='/v1/chat/completions', path: str='/v1/chat/completions',
user: dict=None, user: dict=None,
@ -52,27 +33,22 @@ async def respond(
""" """
is_chat = False is_chat = False
is_stream = payload.get('stream', False)
model = None model = None
is_stream = False
if 'chat/completions' in path: if 'chat/completions' in path:
is_chat = True is_chat = True
model = payload['model'] model = payload['model']
if is_chat and is_stream:
chat_id = await chat.create_chat_id()
yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStart)
yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=None)
json_response = {} json_response = {}
headers = { headers = {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'User-Agent': 'null' 'User-Agent': 'axios/0.21.1',
} }
for _ in range(5): for _ in range(10):
# Load balancing: randomly selecting a suitable provider # Load balancing: randomly selecting a suitable provider
# If the request is a chat completion, then we need to load balance between chat providers # If the request is a chat completion, then we need to load balance between chat providers
# If the request is an organic request, then we need to load balance between organic providers # If the request is an organic request, then we need to load balance between organic providers
@ -115,10 +91,11 @@ async def respond(
cookies=target_request.get('cookies'), cookies=target_request.get('cookies'),
ssl=False, ssl=False,
timeout=aiohttp.ClientTimeout( timeout=aiohttp.ClientTimeout(
connect=0.5, connect=0.3,
total=float(os.getenv('TRANSFER_TIMEOUT', '500')) total=float(os.getenv('TRANSFER_TIMEOUT', '500'))
), ),
) as response: ) as response:
is_stream = response.content_type == 'text/event-stream'
if response.status == 429: if response.status == 429:
continue continue
@ -144,35 +121,27 @@ async def respond(
if 'Too Many Requests' in str(exc): if 'Too Many Requests' in str(exc):
continue continue
async for chunk in chunks.process_chunks( async for chunk in response.content.iter_any():
chunks=response.content.iter_any(), chunk = chunk.decode('utf8').strip()
is_chat=is_chat, yield chunk + '\n\n'
chat_id=chat_id,
model=model,
target_request=target_request
):
yield chunk
break break
except Exception as exc: except Exception as exc:
# print(f'[!] {type(exc)} - {exc}')
continue continue
if (not json_response) and is_chat: if (not json_response) and is_chat:
print('[!] chat response is empty') print('[!] chat response is empty')
continue continue
else: else:
yield await errors.yield_error(500, 'Sorry, the API is not responding.', 'Please try again later.') yield await errors.yield_error(500, 'Sorry, the provider is not responding. We\'re possibly getting rate-limited.', 'Please try again later.')
return return
if is_chat and is_stream:
yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStop)
yield 'data: [DONE]\n\n'
if (not is_stream) and json_response: if (not is_stream) and json_response:
yield json.dumps(json_response) yield json.dumps(json_response)
print(f'[+] {path} -> {model or ""}')
await after_request.after_request( await after_request.after_request(
incoming_request=incoming_request, incoming_request=incoming_request,
target_request=target_request, target_request=target_request,
@ -183,5 +152,3 @@ async def respond(
is_chat=is_chat, is_chat=is_chat,
model=model, model=model,
) )
print(f'[+] {path} -> {model or ""}')

View file

@ -2,6 +2,7 @@
import os import os
import time import time
import json
import httpx import httpx
import openai import openai
import asyncio import asyncio
@ -10,6 +11,7 @@ import traceback
from rich import print from rich import print
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic import BaseModel
load_dotenv() load_dotenv()
@ -18,7 +20,7 @@ MODEL = 'gpt-3.5-turbo'
MESSAGES = [ MESSAGES = [
{ {
'role': 'user', 'role': 'user',
'content': '1+1=', 'content': 'Just respond with the number "1337", nothing else.'
} }
] ]
@ -43,12 +45,12 @@ async def test_server():
else: else:
return time.perf_counter() - request_start return time.perf_counter() - request_start
async def test_chat_non_stream(model: str=MODEL, messages: List[dict]=None) -> dict: async def test_chat_non_stream_gpt4() -> float:
"""Tests an API api_endpoint.""" """Tests non-streamed chat completions with the GPT-4 model."""
json_data = { json_data = {
'model': model, 'model': 'gpt-4',
'messages': messages or MESSAGES, 'messages': MESSAGES,
'stream': False 'stream': False
} }
@ -63,10 +65,52 @@ async def test_chat_non_stream(model: str=MODEL, messages: List[dict]=None) -> d
) )
response.raise_for_status() response.raise_for_status()
assert '2' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.' assert '1337' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.'
return time.perf_counter() - request_start return time.perf_counter() - request_start
async def test_sdxl(): async def test_chat_stream_gpt3() -> float:
"""Tests the text stream endpoint with the GPT-3.5-Turbo model."""
json_data = {
'model': 'gpt-3.5-turbo',
'messages': MESSAGES,
'stream': True,
}
request_start = time.perf_counter()
async with httpx.AsyncClient() as client:
response = await client.post(
url=f'{api_endpoint}/chat/completions',
headers=HEADERS,
json=json_data,
timeout=10,
)
response.raise_for_status()
chunks = []
resulting_text = ''
async for chunk in response.aiter_text():
for subchunk in chunk.split('\n\n'):
chunk = subchunk.replace('data: ', '').strip()
if chunk == '[DONE]':
break
if chunk:
chunks.append(json.loads(chunk))
try:
resulting_text += json.loads(chunk)['choices'][0]['delta']['content']
except KeyError:
pass
assert '1337' in resulting_text, 'The API did not return a correct response.'
return time.perf_counter() - request_start
async def test_image_generation() -> float:
"""Tests the image generation endpoint with the SDXL model.""" """Tests the image generation endpoint with the SDXL model."""
json_data = { json_data = {
@ -89,6 +133,48 @@ async def test_sdxl():
assert '://' in response.json()['data'][0]['url'] assert '://' in response.json()['data'][0]['url']
return time.perf_counter() - request_start return time.perf_counter() - request_start
class StepByStepAIResponse(BaseModel):
"""Demo response structure for the function calling test."""
title: str
steps: List[str]
async def test_function_calling():
"""Tests function calling functionality with newer GPT models."""
json_data = {
'stream': False,
'model': 'gpt-3.5-turbo-0613',
'messages': [
{"role": "user", "content": "Explain how to assemble a PC"}
],
'functions': [
{
'name': 'get_answer_for_user_query',
'description': 'Get user answer in series of steps',
'parameters': StepByStepAIResponse.schema()
}
],
'function_call': {'name': 'get_answer_for_user_query'}
}
request_start = time.perf_counter()
async with httpx.AsyncClient() as client:
response = await client.post(
url=f'{api_endpoint}/chat/completions',
headers=HEADERS,
json=json_data,
timeout=10,
)
response.raise_for_status()
res = response.json()
output = json.loads(res['choices'][0]['message']['function_call']['arguments'])
print(output)
assert output.get('title') and output.get('steps'), 'The API did not return a correct response.'
return time.perf_counter() - request_start
async def test_models(): async def test_models():
"""Tests the models endpoint.""" """Tests the models endpoint."""
@ -122,17 +208,20 @@ async def demo():
else: else:
raise ConnectionError('API Server is not running.') raise ConnectionError('API Server is not running.')
print('Checking non-streamed chat completions...') # print('[lightblue]Checking if function calling works...')
print(await test_chat_non_stream()) # print(await test_function_calling())
# print('[lightblue]Checking if SDXL image generation works...') # print('Checking non-streamed chat completions...')
# print(await test_sdxl()) # print(await test_chat_non_stream_gpt4())
# print('[lightblue]Checking if the moderation endpoint works...') # print('Checking streamed chat completions...')
# print(await test_api_moderation()) # print(await test_chat_stream_gpt3())
print('Checking the models endpoint...') # print('[lightblue]Checking if image generation works...')
print(await test_models()) # print(await test_image_generation())
# print('Checking the models endpoint...')
# print(await test_models())
except Exception as exc: except Exception as exc:
print('[red]Error: ' + str(exc)) print('[red]Error: ' + str(exc))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 566 KiB

BIN
image.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 566 KiB

View file

@ -0,0 +1,82 @@
import os
import json
import openai
from dotenv import load_dotenv
load_dotenv()
openai.api_base = 'http://localhost:2332/v1'
openai.api_key = os.environ['NOVA_KEY']
# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit='fahrenheit'):
"""Get the current weather in a given location"""
weather_info = {
'location': location,
'temperature': '72',
'unit': unit,
'forecast': ['sunny', 'windy'],
}
return json.dumps(weather_info)
def run_conversation():
# Step 1: send the conversation and available functions to GPT
messages = [{'role': 'user', 'content': 'What\'s the weather like in Boston?'}]
functions = [
{
'name': 'get_current_weather',
'description': 'Get the current weather in a given location',
'parameters': {
'type': 'object',
'properties': {
'location': {
'type': 'string',
'description': 'The city and state, e.g. San Francisco, CA',
},
'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']},
},
'required': ['location'],
},
}
]
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo-0613',
messages=messages,
functions=functions,
function_call='auto', # auto is default, but we'll be explicit
)
response_message = response['choices'][0]['message']
# Step 2: check if GPT wanted to call a function
if response_message.get('function_call'):
# Step 3: call the function
# Note: the JSON response may not always be valid; be sure to handle errors
available_functions = {
'get_current_weather': get_current_weather,
} # only one function in this example, but you can have multiple
function_name = response_message['function_call']['name']
fuction_to_call = available_functions[function_name]
function_args = json.loads(response_message['function_call']['arguments'])
function_response = fuction_to_call(
location=function_args.get('location'),
unit=function_args.get('unit'),
)
# Step 4: send the info on the function call and function response to GPT
messages.append(response_message) # extend conversation with assistant's reply
messages.append(
{
'role': 'function',
'name': function_name,
'content': function_response,
}
) # extend conversation with function response
second_response = openai.ChatCompletion.create(
model='gpt-3.5-turbo-0613',
messages=messages,
) # get a new response from GPT where it can see the function response
return second_response
print(run_conversation())

View file

@ -1,23 +0,0 @@
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse
from fastapi.requests import Request
from fastapi.responses import Response
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.util import get_remote_address
from slowapi.errors import RateLimitExceeded
limiter = Limiter(key_func=lambda: "test", default_limits=["5/minute"])
app = FastAPI()
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
# Note: the route decorator must be above the limit decorator, not below it
@app.get("/home")
@limiter.limit("5/minute")
async def homepage(request: Request):
return PlainTextResponse("test")
@app.get("/mars")
@limiter.limit("5/minute")
async def homepage(request: Request, response: Response):
return {"key": "value"}