diff --git a/api/config/config.yml b/api/config/config.yml
new file mode 100644
index 0000000..4d3fefb
--- /dev/null
+++ b/api/config/config.yml
@@ -0,0 +1,50 @@
+max-credits: 100001
+max-credits-owner: 694201337
+start-credits: 1000
+
+costs:
+  other: 10
+
+  chat-models:
+    gpt-3: 10
+    gpt-4: 30
+    gpt-4-32k: 100
+
+# bonuses are multiplier for costs:
+# final_cost = cost * bonus
+roles:
+  owner:
+    bonus: 0.1
+    rate_limit: 
+      other: 60
+      gpt-3: 60
+      gpt-4: 35
+      gpt-4-32k: 5
+  admin:
+    bonus: 0.3
+    rate_limit: 
+      other: 60
+      gpt-3: 60
+      gpt-4: 30
+      gpt-4-32k: 4
+  helper:
+    bonus: 0.4
+    rate_limit: 
+      other: 60
+      gpt-3: 60
+      gpt-4: 25
+      gpt-4-32k: 3
+  booster:
+    bonus: 0.5
+    rate_limit: 
+      other: 60 
+      gpt-3: 60
+      gpt-4: 20
+      gpt-4-32k: 2
+  default:
+    bonus: 0
+    rate_limit: 
+      other: 60 
+      gpt-3: 60
+      gpt-4: 15
+      gpt-4-32k: 1
\ No newline at end of file
diff --git a/api/config/credits.yml b/api/config/credits.yml
deleted file mode 100644
index 29eeaa9..0000000
--- a/api/config/credits.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-max-credits: 100001
-max-credits-owner: 694201337
-start-credits: 1000
-
-costs:
-  other: 10
-
-  chat-models:
-    gpt-3: 10
-    gpt-4: 30
-    gpt-4-32k: 100
-
-# bonuses are multiplier for costs:
-# final_cost = cost * bonus
-bonuses:
-  owner: 0.1
-  admin: 0.3
-  helper: 0.4
-  booster: 0.5
diff --git a/api/core.py b/api/core.py
index 4a652cf..8946516 100644
--- a/api/core.py
+++ b/api/core.py
@@ -12,7 +12,6 @@ from dotenv import load_dotenv
 load_dotenv()
 router = fastapi.APIRouter(tags=['core'])
 
-
 async def check_core_auth(request):
     """
     
diff --git a/api/main.py b/api/main.py
index 8bc8810..6815984 100644
--- a/api/main.py
+++ b/api/main.py
@@ -5,7 +5,6 @@ import fastapi
 from rich import print
 from dotenv import load_dotenv
 from fastapi.middleware.cors import CORSMiddleware
-
 import core
 import transfer
 
diff --git a/api/streaming.py b/api/streaming.py
index 76c3d2e..dc8dfea 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -6,6 +6,7 @@ import dhooks
 import asyncio
 import aiohttp
 import starlette
+import datetime
 
 from rich import print
 from dotenv import load_dotenv
@@ -19,9 +20,19 @@ from db import logs
 from db.users import UserManager
 from db.stats import StatsManager
 from helpers import network, chat, errors
+import yaml
+
 
 load_dotenv()
 
+## Loads config which contains rate limits
+with open('config/config.yml', encoding='utf8') as f:
+    config = yaml.safe_load(f)
+
+## Where all rate limit requested data will be stored.
+# Rate limit data is **not persistent**. I.E It will be deleted on server stop/restart.
+user_last_request_time = {}
+
 DEMO_PAYLOAD = {
     'model': 'gpt-3.5-turbo',
     'messages': [
@@ -68,6 +79,7 @@ async def stream(
     incoming_request: starlette.requests.Request=None,
 ):
     """Stream the completions request. Sends data in chunks
+    If not streaming, it sends the result in its entirety.
 
     Args:
         path (str, optional): URL Path. Defaults to '/v1/chat/completions'.
@@ -77,8 +89,27 @@ async def stream(
         input_tokens (int, optional): Total tokens calculated with tokenizer. Defaults to 0.
         incoming_request (starlette.requests.Request, optional): Incoming request. Defaults to None.
     """
+
+    if user:
+        role = user.get('role', 'default')
+        rate_limit = config['roles'][role]['rate_limit'].get(payload['model'], 10)
+
+        last_request_time = user_last_request_time.get(user['api_key'])
+        time_since_last_request = datetime.now() - last_request_time
+
+        if time_since_last_request < datetime.timedelta(seconds=rate_limit):
+            yield await errors.yield_error(429, "Rate limit exceeded', 'You are making requests too quickly. Please wait and try again later. Ask a administrator if you think this shouldn't happen. ")
+            return
+        else:
+            user_last_request_time[user['_id']] = datetime.now()
+
+    ## Setup managers
     db = UserManager()
     stats = StatsManager()
+
+    ## Check if breaching rate limit
+
+
     is_chat = False
     is_stream = payload.get('stream', False)
 
diff --git a/api/transfer.py b/api/transfer.py
index 61483e6..5e643a6 100644
--- a/api/transfer.py
+++ b/api/transfer.py
@@ -16,8 +16,8 @@ load_dotenv()
 
 models_list = json.load(open('models.json'))
 
-with open('config/credits.yml', encoding='utf8') as f:
-    credits_config = yaml.safe_load(f)
+with open('config/config.yml', encoding='utf8') as f:
+    config = yaml.safe_load(f)
 
 async def handle(incoming_request):
     """
@@ -59,7 +59,7 @@ async def handle(incoming_request):
     if path_contains_models:
         return fastapi.responses.JSONResponse(content=models_list)
 
-    costs = credits_config['costs']
+    costs = config['costs']
     cost = costs['other']
 
     if 'chat/completions' in path:
@@ -74,7 +74,7 @@ async def handle(incoming_request):
     if policy_violation:
         return await errors.error(400, f'The request contains content which violates this model\'s policies for "{policy_violation}".', 'We currently don\'t support any NSFW models.')
 
-    role_cost_multiplier = credits_config['bonuses'].get(user['role'], 1)
+    role_cost_multiplier = config['roles'][user['role']]['bonus']
     cost = round(cost * role_cost_multiplier)
 
     if user['credits'] < cost: