From 8b325d6b811d7eaa5dea01695c637ded9c5c335e Mon Sep 17 00:00:00 2001
From: nsde <neostyxde@gmail.com>
Date: Thu, 14 Sep 2023 18:18:19 +0200
Subject: [PATCH] Fixed function calling

---
 api/chunks.py                 | 30 ------------
 api/core.py                   | 35 +++++++-------
 api/handler.py                | 11 ++++-
 api/main.py                   | 17 +------
 api/responder.py              | 55 +++++----------------
 checks/client.py              | 90 +++++++++++++++++++++++++++++++----
 playground/functioncalling.py | 82 +++++++++++++++++++++++++++++++
 7 files changed, 200 insertions(+), 120 deletions(-)
 delete mode 100644 api/chunks.py
 create mode 100644 playground/functioncalling.py

diff --git a/api/chunks.py b/api/chunks.py
deleted file mode 100644
index 7ad2752..0000000
--- a/api/chunks.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import json
-
-from helpers import chat
-
-async def process_chunks(
-    chunks,
-    is_chat: bool,
-    chat_id: int,
-    target_request: dict,
-    model: str=None,
-):
-    """This function processes the response chunks from the providers and yields them.
-    """
-    async for chunk in chunks:
-        chunk = chunk.decode("utf8").strip()
-        send = False
-
-        if is_chat and '{' in chunk:
-            data = json.loads(chunk.split('data: ')[1])
-            chunk = chunk.replace(data['id'], chat_id)
-            send = True
-
-            if target_request['module'] == 'twa' and data.get('text'):
-                chunk = await chat.create_chat_chunk(chat_id=chat_id, model=model, content=['text'])
-
-            if (not data['choices'][0]['delta']) or data['choices'][0]['delta'] == {'role': 'assistant'}:
-                send = False
-
-        if send and chunk:
-            yield chunk + '\n\n'
diff --git a/api/core.py b/api/core.py
index 64c1ef3..abaac92 100644
--- a/api/core.py
+++ b/api/core.py
@@ -126,24 +126,23 @@ async def run_checks(incoming_request: fastapi.Request):
     if auth_error:
         return auth_error
 
-    try:
-        chat = await checks.client.test_chat()
-    except Exception as exc:
-        print(exc)
-        chat = None
+    results = {}
 
-    try:
-        moderation = await checks.client.test_api_moderation()
-    except Exception:
-        moderation = None
+    funcs = [
+        checks.client.test_chat_non_stream_gpt4,
+        checks.client.test_chat_stream_gpt3,
+        checks.client.test_function_calling,
+        checks.client.test_image_generation,
+        checks.client.test_speech_to_text,
+        checks.client.test_models
+    ]
 
-    try:
-        models = await checks.client.test_models()
-    except Exception:
-        models = None
+    for func in funcs:
+        try:
+            result = await func()
+        except Exception as exc:
+            results[func.__name__] = str(exc)
+        else:
+            results[func.__name__] = result
 
-    return {
-        'chat/completions': chat,
-        'models': models,
-        'moderations': moderation,
-    }
+    return results
diff --git a/api/handler.py b/api/handler.py
index b829da6..101d2d9 100644
--- a/api/handler.py
+++ b/api/handler.py
@@ -124,7 +124,14 @@ async def handle(incoming_request: fastapi.Request):
                 inp = payload.get('input', payload.get('prompt', ''))
 
             if isinstance(payload.get('messages'), list):
-                inp = '\n'.join([message['content'] for message in payload['messages']])
+                inp = ''
+
+                for message in payload.get('messages', []):
+                    if message.get('role') == 'user':
+                        inp += message.get('content', '') + '\n'
+
+            if 'functions' in payload:
+                inp += '\n'.join([function.get('description', '') for function in payload.get('functions', [])])
 
             if inp and len(inp) > 2 and not inp.isnumeric():
                 policy_violation = await moderation.is_policy_violated(inp)
@@ -148,7 +155,7 @@ async def handle(incoming_request: fastapi.Request):
             path=path,
             payload=payload,
             credits_cost=cost,
-            input_tokens=-1,
+            input_tokens=0,
             incoming_request=incoming_request,
         ),
         media_type=media_type
diff --git a/api/main.py b/api/main.py
index 36c4e55..c7f95e3 100644
--- a/api/main.py
+++ b/api/main.py
@@ -2,11 +2,9 @@
 
 import fastapi
 import pydantic
-import functools
 
 from rich import print
 from dotenv import load_dotenv
-from json import JSONDecodeError
 from bson.objectid import ObjectId
 from slowapi.errors import RateLimitExceeded
 from slowapi.middleware import SlowAPIMiddleware
@@ -17,7 +15,6 @@ from helpers import network
 
 import core
 import handler
-import moderation
 
 load_dotenv()
 
@@ -66,17 +63,5 @@ async def root():
 
 @app.route('/v1/{path:path}', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
 async def v1_handler(request: fastapi.Request):
-    res = await handler.handle(request)
+    res = await handler.handle(incoming_request=request)
     return res
-
-@functools.lru_cache()
-@app.post('/moderate')
-async def moderate(request: fastapi.Request):
-    try:
-        prompt = await request.json()
-        prompt = prompt['text']
-    except (KeyError, JSONDecodeError):
-        return fastapi.Response(status_code=400)
-
-    result = await moderation.is_policy_violated__own_model(prompt)
-    return result or ''
diff --git a/api/responder.py b/api/responder.py
index 961c16f..047083b 100644
--- a/api/responder.py
+++ b/api/responder.py
@@ -11,7 +11,6 @@ import starlette
 from rich import print
 from dotenv import load_dotenv
 
-import chunks
 import proxies
 import provider_auth
 import after_request
@@ -21,24 +20,6 @@ from helpers import network, chat, errors
 
 load_dotenv()
 
-## Loads config which contains rate limits
-with open('config/config.yml', encoding='utf8') as f:
-    config = yaml.safe_load(f)
-
-## Where all rate limit requested data will be stored.
-# Rate limit data is **not persistent** (It will be deleted on server stop/restart).
-user_last_request_time = {}
-
-DEMO_PAYLOAD = {
-    'model': 'gpt-3.5-turbo',
-    'messages': [
-        {
-            'role': 'user',
-            'content': '1+1='
-        }
-    ]
-}
-
 async def respond(
     path: str='/v1/chat/completions',
     user: dict=None,
@@ -52,27 +33,22 @@ async def respond(
     """
 
     is_chat = False
-    is_stream = payload.get('stream', False)
 
     model = None
+    is_stream = False
 
     if 'chat/completions' in path:
         is_chat = True
         model = payload['model']
 
-    if is_chat and is_stream:
-        chat_id = await chat.create_chat_id()
-        yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStart)
-        yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=None)
-
     json_response = {}
 
     headers = {
         'Content-Type': 'application/json',
-        'User-Agent': 'null'
+        'User-Agent': 'axios/0.21.1',
     }
 
-    for _ in range(5):
+    for _ in range(10):
         # Load balancing: randomly selecting a suitable provider
         # If the request is a chat completion, then we need to load balance between chat providers
         # If the request is an organic request, then we need to load balance between organic providers
@@ -115,10 +91,11 @@ async def respond(
                     cookies=target_request.get('cookies'),
                     ssl=False,
                     timeout=aiohttp.ClientTimeout(
-                        connect=0.5,
+                        connect=0.3,
                         total=float(os.getenv('TRANSFER_TIMEOUT', '500'))
                     ),
                 ) as response:
+                    is_stream = response.content_type == 'text/event-stream'
 
                     if response.status == 429:
                         continue
@@ -144,35 +121,27 @@ async def respond(
                             if 'Too Many Requests' in str(exc):
                                 continue
 
-                        async for chunk in chunks.process_chunks(
-                            chunks=response.content.iter_any(),
-                            is_chat=is_chat,
-                            chat_id=chat_id,
-                            model=model,
-                            target_request=target_request
-                        ):
-                            yield chunk
+                        async for chunk in response.content.iter_any():
+                            chunk = chunk.decode('utf8').strip()
+                            yield chunk + '\n\n'
 
                     break
 
             except Exception as exc:
-                # print(f'[!] {type(exc)} - {exc}')
                 continue
 
             if (not json_response) and is_chat:
                 print('[!] chat response is empty')
                 continue
     else:
-        yield await errors.yield_error(500, 'Sorry, the API is not responding.', 'Please try again later.')
+        yield await errors.yield_error(500, 'Sorry, the provider is not responding. We\'re possibly getting rate-limited.', 'Please try again later.')
         return
 
-    if is_chat and is_stream:
-        yield await chat.create_chat_chunk(chat_id=chat_id, model=model, content=chat.CompletionStop)
-        yield 'data: [DONE]\n\n'
-
     if (not is_stream) and json_response:
         yield json.dumps(json_response)
 
+    print(f'[+] {path} -> {model or ""}')
+
     await after_request.after_request(
         incoming_request=incoming_request,
         target_request=target_request,
@@ -183,5 +152,3 @@ async def respond(
         is_chat=is_chat,
         model=model,
     )
-
-    print(f'[+] {path} -> {model or ""}')
diff --git a/checks/client.py b/checks/client.py
index 5a29264..c624204 100644
--- a/checks/client.py
+++ b/checks/client.py
@@ -2,6 +2,7 @@
 
 import os
 import time
+import json
 import httpx
 import openai
 import asyncio
@@ -10,6 +11,7 @@ import traceback
 from rich import print
 from typing import List
 from dotenv import load_dotenv
+from pydantic import BaseModel
 
 load_dotenv()
 
@@ -43,12 +45,12 @@ async def test_server():
     else:
         return time.perf_counter() - request_start
 
-async def test_chat_non_stream(model: str=MODEL, messages: List[dict]=None) -> dict:
-    """Tests an API api_endpoint."""
+async def test_chat_non_stream_gpt4() -> float:
+    """Tests non-streamed chat completions with the GPT-4 model."""
 
     json_data = {
-        'model': model,
-        'messages': messages or MESSAGES,
+        'model': 'gpt-4',
+        'messages': MESSAGES,
         'stream': False
     }
 
@@ -66,7 +68,30 @@ async def test_chat_non_stream(model: str=MODEL, messages: List[dict]=None) -> d
     assert '2' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.'
     return time.perf_counter() - request_start
 
-async def test_sdxl():
+async def test_chat_stream_gpt3() -> float:
+    """Tests the text stream endpoint with the GPT-3.5-Turbo model."""
+
+    json_data = {
+        'model': 'gpt-3.5-turbo',
+        'messages': MESSAGES,
+        'stream': True,
+    }
+
+    request_start = time.perf_counter()
+
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            url=f'{api_endpoint}/chat/completions',
+            headers=HEADERS,
+            json=json_data,
+            timeout=10,
+        )
+        response.raise_for_status()
+
+    assert '2' in response.json()['choices'][0]['message']['content'], 'The API did not return a correct response.'
+    return time.perf_counter() - request_start
+
+async def test_image_generation() -> float:
     """Tests the image generation endpoint with the SDXL model."""
 
     json_data = {
@@ -89,6 +114,48 @@ async def test_sdxl():
     assert '://' in response.json()['data'][0]['url']
     return time.perf_counter() - request_start
 
+class StepByStepAIResponse(BaseModel):
+    """Demo response structure for the function calling test."""
+    title: str
+    steps: List[str]
+
+async def test_function_calling():
+    """Tests function calling functionality with newer GPT models."""
+
+    json_data = {
+        'stream': False,
+        'model': 'gpt-3.5-turbo-0613',
+        'messages': [
+            {"role": "user", "content": "Explain how to assemble a PC"}
+        ],
+        'functions': [
+            {
+                'name': 'get_answer_for_user_query',
+                'description': 'Get user answer in series of steps',
+                'parameters': StepByStepAIResponse.schema()
+            }
+        ],
+        'function_call': {'name': 'get_answer_for_user_query'}
+    }
+
+    request_start = time.perf_counter()
+
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            url=f'{api_endpoint}/chat/completions',
+            headers=HEADERS,
+            json=json_data,
+            timeout=10,
+        )
+        response.raise_for_status()
+
+    res = response.json()
+    output = json.loads(res['choices'][0]['message']['function_call']['arguments'])
+    print(output)
+
+    assert output.get('title') and output.get('steps'), 'The API did not return a correct response.'
+    return time.perf_counter() - request_start
+
 async def test_models():
     """Tests the models endpoint."""
 
@@ -122,14 +189,17 @@ async def demo():
         else:
             raise ConnectionError('API Server is not running.')
 
+        # print('[lightblue]Checking if function calling works...')
+        # print(await test_function_calling())
+
         print('Checking non-streamed chat completions...')
-        print(await test_chat_non_stream())
+        print(await test_chat_non_stream_gpt4())
 
-        # print('[lightblue]Checking if SDXL image generation works...')
-        # print(await test_sdxl())
+        print('Checking streamed chat completions...')
+        print(await test_chat_stream_gpt3())
 
-        # print('[lightblue]Checking if the moderation endpoint works...')
-        # print(await test_api_moderation())
+        print('[lightblue]Checking if  image generation works...')
+        print(await test_image_generation())
 
         print('Checking the models endpoint...')
         print(await test_models())
diff --git a/playground/functioncalling.py b/playground/functioncalling.py
new file mode 100644
index 0000000..854ec3b
--- /dev/null
+++ b/playground/functioncalling.py
@@ -0,0 +1,82 @@
+import os
+import json
+import openai
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+openai.api_base = 'http://localhost:2332/v1'
+openai.api_key = os.environ['NOVA_KEY']
+
+# Example dummy function hard coded to return the same weather
+# In production, this could be your backend API or an external API
+def get_current_weather(location, unit='fahrenheit'):
+    """Get the current weather in a given location"""
+    weather_info = {
+        'location': location,
+        'temperature': '72',
+        'unit': unit,
+        'forecast': ['sunny', 'windy'],
+    }
+    return json.dumps(weather_info)
+
+def run_conversation():
+    # Step 1: send the conversation and available functions to GPT
+    messages = [{'role': 'user', 'content': 'What\'s the weather like in Boston?'}]
+    functions = [
+        {
+            'name': 'get_current_weather',
+            'description': 'Get the current weather in a given location',
+            'parameters': {
+                'type': 'object',
+                'properties': {
+                    'location': {
+                        'type': 'string',
+                        'description': 'The city and state, e.g. San Francisco, CA',
+                    },
+                    'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']},
+                },
+                'required': ['location'],
+            },
+        }
+    ]
+    response = openai.ChatCompletion.create(
+        model='gpt-3.5-turbo-0613',
+        messages=messages,
+        functions=functions,
+        function_call='auto',  # auto is default, but we'll be explicit
+    )
+    response_message = response['choices'][0]['message']
+
+    # Step 2: check if GPT wanted to call a function
+    if response_message.get('function_call'):
+        # Step 3: call the function
+        # Note: the JSON response may not always be valid; be sure to handle errors
+        available_functions = {
+            'get_current_weather': get_current_weather,
+        }  # only one function in this example, but you can have multiple
+        function_name = response_message['function_call']['name']
+        fuction_to_call = available_functions[function_name]
+        function_args = json.loads(response_message['function_call']['arguments'])
+        function_response = fuction_to_call(
+            location=function_args.get('location'),
+            unit=function_args.get('unit'),
+        )
+
+        # Step 4: send the info on the function call and function response to GPT
+        messages.append(response_message)  # extend conversation with assistant's reply
+        messages.append(
+            {
+                'role': 'function',
+                'name': function_name,
+                'content': function_response,
+            }
+        )  # extend conversation with function response
+        second_response = openai.ChatCompletion.create(
+            model='gpt-3.5-turbo-0613',
+            messages=messages,
+        )  # get a new response from GPT where it can see the function response
+        return second_response
+
+print(run_conversation())
\ No newline at end of file