From a7bd57c5a80580c2a5aebf7b1d6c8a229a00dbc5 Mon Sep 17 00:00:00 2001 From: nsde Date: Sat, 12 Aug 2023 17:49:31 +0200 Subject: [PATCH 1/2] Added more documentation --- .gitignore | 3 --- api/core.py | 11 --------- api/helpers/chat.py | 4 +++ api/helpers/errors.py | 6 ++++- api/helpers/exceptions.py | 2 -- api/helpers/network.py | 4 +++ api/helpers/tokens.py | 6 ++--- api/load_balancing.py | 7 ++++-- api/moderation.py | 13 +++++++++- api/provider_auth.py | 10 +++++++- api/proxies.py | 52 ++++++++++++++++++++++++++------------- api/streaming.py | 41 +++++++++++++++++++++--------- api/transfer.py | 2 +- setup.md | 30 ++++++++++++++++++++-- 14 files changed, 136 insertions(+), 55 deletions(-) delete mode 100644 api/helpers/exceptions.py diff --git a/.gitignore b/.gitignore index 9abd447..08e41d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ last_update.txt -!api/providers/__main__.py -!api/providers/helpers/utils.py - *.log.json /logs /log diff --git a/api/core.py b/api/core.py index f266147..fe16cd9 100644 --- a/api/core.py +++ b/api/core.py @@ -63,14 +63,3 @@ async def create_user(incoming_request: fastapi.Request): await new_user_webhook(user) return user - -if __name__ == '__main__': - # new_user_webhook({ - # '_id': 'JUST_A_TEST_IGNORE_ME', - # 'auth': { - # 'discord': 123, - # 'github': 'abc' - # } - # }) - - pass diff --git a/api/helpers/chat.py b/api/helpers/chat.py index a1002fc..9cd35aa 100644 --- a/api/helpers/chat.py +++ b/api/helpers/chat.py @@ -12,12 +12,16 @@ class CompletionStop: """End of a chat""" async def create_chat_id() -> str: + """Generates a random chat ID""" + chars = string.ascii_letters + string.digits chat_id = ''.join(random.choices(chars, k=32)) return f'chatcmpl-{chat_id}' async def create_chat_chunk(chat_id: str, model: str, content=None) -> dict: + """Creates a new chat chunk""" + content = content or {} delta = {} diff --git a/api/helpers/errors.py b/api/helpers/errors.py index de4f3e5..a4bee00 100644 --- a/api/helpers/errors.py +++ b/api/helpers/errors.py @@ -1,7 +1,9 @@ import json import starlette -async def error(code: int, message: str, tip: str) -> starlette.responses.Response: +async def error(code: int, message: str, tip: str) -> starlette.responses.Response: + """Returns a starlette response JSON with the given error code, message and tip.""" + info = {'error': { 'code': code, 'message': message, @@ -13,6 +15,8 @@ async def error(code: int, message: str, tip: str) -> starlette.responses.Respon return starlette.responses.Response(status_code=code, content=json.dumps(info)) async def yield_error(code: int, message: str, tip: str) -> str: + """Returns a dumped JSON response with the given error code, message and tip.""" + return json.dumps({ 'code': code, 'message': message, diff --git a/api/helpers/exceptions.py b/api/helpers/exceptions.py deleted file mode 100644 index 1fcff8b..0000000 --- a/api/helpers/exceptions.py +++ /dev/null @@ -1,2 +0,0 @@ -class Retry(Exception): - """The server should retry the request.""" diff --git a/api/helpers/network.py b/api/helpers/network.py index 76a3bad..ce67410 100644 --- a/api/helpers/network.py +++ b/api/helpers/network.py @@ -2,6 +2,8 @@ import base64 import asyncio async def get_ip(request) -> str: + """Get the IP address of the incoming request.""" + xff = None if request.headers.get('x-forwarded-for'): xff, *_ = request.headers['x-forwarded-for'].split(', ') @@ -17,6 +19,8 @@ async def get_ip(request) -> str: return detected_ip async def add_proxy_auth_to_headers(username: str, password: str, headers: dict) -> dict: + """Add proxy authentication to the headers. This is useful if the proxy authentication doesn't work as it should.""" + proxy_auth = base64.b64encode(f'{username}:{password}'.encode()).decode() headers['Proxy-Authorization'] = f'Basic {proxy_auth}' return headers diff --git a/api/helpers/tokens.py b/api/helpers/tokens.py index e1aede6..10b3dd0 100644 --- a/api/helpers/tokens.py +++ b/api/helpers/tokens.py @@ -25,13 +25,13 @@ async def count_for_messages(messages: list, model: str='gpt-3.5-turbo-0613') -> tokens_per_name = -1 # if there's a name, the role is omitted elif 'gpt-3.5-turbo' in model: - return num_tokens_from_messages(messages, model='gpt-3.5-turbo-0613') + return count_for_messages(messages, model='gpt-3.5-turbo-0613') elif 'gpt-4' in model: - return num_tokens_from_messages(messages, model='gpt-4-0613') + return count_for_messages(messages, model='gpt-4-0613') else: - raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}. + raise NotImplementedError(f"""count_for_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") diff --git a/api/load_balancing.py b/api/load_balancing.py index 3fb6f54..cfcfe72 100644 --- a/api/load_balancing.py +++ b/api/load_balancing.py @@ -10,7 +10,8 @@ async def _get_module_name(module) -> str: return name async def balance_chat_request(payload: dict) -> dict: - """Load balance the chat completion request between chat providers.""" + """Load balance the chat completion request between chat providers. +""" providers_available = [] @@ -35,7 +36,9 @@ async def balance_chat_request(payload: dict) -> dict: return target async def balance_organic_request(request: dict) -> dict: - """Load balnace to non-chat completion request between other "organic" providers which respond in the desired format already.""" + """Load balnace to non-chat completion request between other "organic" providers which respond in the desired format already. +Organic providers are used for non-chat completions, such as moderation and other paths. +""" providers_available = [] diff --git a/api/moderation.py b/api/moderation.py index d0c8144..bdc9ff3 100644 --- a/api/moderation.py +++ b/api/moderation.py @@ -1,3 +1,5 @@ +"""This module contains functions for checking if a message violates the moderation policy.""" + import asyncio import aiohttp @@ -5,7 +7,16 @@ import proxies import provider_auth import load_balancing -async def is_policy_violated(inp) -> bool: +from typing import Union + +async def is_policy_violated(inp: Union[str, list]) -> bool: + """Check if a message violates the moderation policy. +You can either pass a list of messages consisting of dicts with "role" and "content", as used in the API parameter, +or just a simple string. + +Returns True if the message violates the policy, False otherwise. +""" + text = inp if isinstance(inp, list): diff --git a/api/provider_auth.py b/api/provider_auth.py index 9393109..bc499d7 100644 --- a/api/provider_auth.py +++ b/api/provider_auth.py @@ -1,6 +1,14 @@ +"""This module contains functions for authenticating with providers.""" + import asyncio -async def invalidate_key(provider_and_key): +async def invalidate_key(provider_and_key: str) -> none: + """Invalidates a key stored in the secret/ folder by storing it in the associated .invalid.txt file. +The schmea in which should be passed is: +, e.g. +closed4>sk-... +""" + if not provider_and_key: return diff --git a/api/proxies.py b/api/proxies.py index 639607a..930b5f9 100644 --- a/api/proxies.py +++ b/api/proxies.py @@ -12,8 +12,14 @@ from dotenv import load_dotenv load_dotenv() +USE_PROXY_LIST = os.getenv('USE_PROXY_LIST', 'False').lower() == 'true' + class Proxy: - """Represents a proxy. The type can be either http, https, socks4 or socks5.""" + """Represents a proxy. The type can be either http, https, socks4 or socks5. +You can also pass a url, which will be parsed into the other attributes. +URL format: + [type]://[username:password@]host:port +""" def __init__(self, url: str=None, @@ -36,7 +42,7 @@ class Proxy: self.proxy_type = proxy_type self.host_or_ip = host_or_ip - self.ip_address = socket.gethostbyname(self.host_or_ip) #if host_or_ip.replace('.', '').isdigit() else host_or_ip + self.ip_address = socket.gethostbyname(self.host_or_ip) # get ip address from host self.host = self.host_or_ip self.port = port self.username = username @@ -54,6 +60,8 @@ class Proxy: @property def connector(self): + """Returns an aiohttp_socks.ProxyConnector object. Which can be used in aiohttp.ClientSession.""" + proxy_types = { 'http': aiohttp_socks.ProxyType.HTTP, 'https': aiohttp_socks.ProxyType.HTTP, @@ -65,11 +73,13 @@ class Proxy: proxy_type=proxy_types[self.proxy_type], host=self.ip_address, port=self.port, - rdns=False, + rdns=False, # remote DNS username=self.username, password=self.password ) +# load proxies from files + proxies_in_files = [] try: @@ -84,27 +94,19 @@ try: except FileNotFoundError: pass -class ProxyChain: +# Proxy lists support + +class ProxyLists: def __init__(self): random_proxy = random.choice(proxies_in_files) self.get_random = Proxy(url=random_proxy) self.connector = aiohttp_socks.ChainProxyConnector.from_urls(proxies_in_files) -try: - default_chain = ProxyChain() - random_proxy = ProxyChain().get_random -except IndexError: - pass - -default_proxy = Proxy( - proxy_type=os.getenv('PROXY_TYPE', 'http'), - host_or_ip=os.getenv('PROXY_HOST', '127.0.0.1'), - port=int(os.getenv('PROXY_PORT', '8080')), - username=os.getenv('PROXY_USER'), - password=os.getenv('PROXY_PASS') -) +# ================================================================================================================================ # +# Proxy tests +# Can be useful if you want to troubleshoot your proxies def test_httpx_workaround(): import httpx @@ -153,6 +155,22 @@ async def text_httpx_socks(): res = await client.get('https://checkip.amazonaws.com') return res.text +# ================================================================================================================================ # + +def get_proxy() -> Proxy: + """Returns a Proxy object. The proxy is either from the proxy list or from the environment variables. +""" + if USE_PROXY_LIST: + return ProxyLists().get_random + + return Proxy( + proxy_type=os.getenv('PROXY_TYPE', 'http'), + host_or_ip=os.getenv('PROXY_HOST', '127.0.0.1'), + port=int(os.getenv('PROXY_PORT', '8080')), + username=os.getenv('PROXY_USER'), + password=os.getenv('PROXY_PASS') + ) + if __name__ == '__main__': # print(test_httpx()) # print(test_requests()) diff --git a/api/streaming.py b/api/streaming.py index 09dc000..295b06b 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1,3 +1,5 @@ +"""This module contains the streaming logic for the API.""" + import os import json import dhooks @@ -33,7 +35,6 @@ async def stream( user: dict=None, payload: dict=None, credits_cost: int=0, - demo_mode: bool=False, input_tokens: int=0, incoming_request: starlette.requests.Request=None, ): @@ -44,6 +45,7 @@ async def stream( is_chat = True model = payload['model'] + # Chat completions always have the same beginning if is_chat and is_stream: chat_id = await chat.create_chat_id() @@ -66,15 +68,22 @@ async def stream( 'error': 'No JSON response could be received' } + # Try to get a response from the API for _ in range(5): headers = { 'Content-Type': 'application/json' } + # Load balancing + # If the request is a chat completion, then we need to load balance between chat providers + # If the request is an organic request, then we need to load balance between organic providers + try: if is_chat: target_request = await load_balancing.balance_chat_request(payload) else: + # "organic" means that it's not using a reverse engineered front-end, but rather ClosedAI's API directly + # churchless.tech is an example of an organic provider, because it redirects the request to ClosedAI. target_request = await load_balancing.balance_organic_request({ 'method': incoming_request.method, 'path': path, @@ -83,9 +92,10 @@ async def stream( 'cookies': incoming_request.cookies }) except ValueError as exc: + # Error load balancing? Send a webhook to the admins webhook = dhooks.Webhook(os.getenv('DISCORD_WEBHOOK__API_ISSUE')) - webhook.send(content=f'API Issue: **`{exc}`**\nhttps://i.imgflip.com/7uv122.jpg') + error = await errors.yield_error( 500, 'Sorry, the API has no working keys anymore.', @@ -100,7 +110,9 @@ async def stream( if target_request['method'] == 'GET' and not payload: target_request['payload'] = None - async with aiohttp.ClientSession(connector=proxies.default_proxy.connector) as session: + # We haven't done any requests as of right now, everything until now was just preparation + # Here, we process the request + async with aiohttp.ClientSession(connector=proxies.get_proxy().connector) as session: try: async with session.request( method=target_request.get('method', 'POST'), @@ -116,9 +128,11 @@ async def stream( timeout=aiohttp.ClientTimeout(total=float(os.getenv('TRANSFER_TIMEOUT', '120'))), ) as response: + # if the answer is JSON if response.content_type == 'application/json': data = await response.json() + # Invalidate the key if it's not working if data.get('code') == 'invalid_api_key': await provider_auth.invalidate_key(target_request.get('provider_auth')) continue @@ -126,37 +140,40 @@ async def stream( if response.ok: json_response = data + # if the answer is a stream if is_stream: try: response.raise_for_status() except Exception as exc: + # Rate limit? Balance again if 'Too Many Requests' in str(exc): continue try: + # process the response chunks async for chunk in response.content.iter_any(): send = False chunk = f'{chunk.decode("utf8")}\n\n' - chunk = chunk.replace(os.getenv('MAGIC_WORD', 'novaOSScheckKeyword'), payload['model']) - chunk = chunk.replace(os.getenv('MAGIC_USER_WORD', 'novaOSSuserKeyword'), str(user['_id'])) if is_chat and '{' in chunk: + # parse the JSON data = json.loads(chunk.split('data: ')[1]) chunk = chunk.replace(data['id'], chat_id) send = True + # create a custom chunk if we're using specific providers if target_request['module'] == 'twa' and data.get('text'): chunk = await chat.create_chat_chunk( chat_id=chat_id, model=model, content=['text'] ) - if not data['choices'][0]['delta']: + + # don't send empty/unnecessary messages + if (not data['choices'][0]['delta']) or data['choices'][0]['delta'] == {'role': 'assistant'}: send = False - if data['choices'][0]['delta'] == {'role': 'assistant'}: - send = False - + # send the chunk if send and chunk.strip(): final_chunk = chunk.strip().replace('data: [DONE]', '') + '\n\n' yield final_chunk @@ -174,9 +191,10 @@ async def stream( break except ProxyError as exc: - print('proxy error') + print('[!] Proxy error:', exc) continue + # Chat completions always have the same ending if is_chat and is_stream: chunk = await chat.create_chat_chunk( chat_id=chat_id, @@ -186,10 +204,11 @@ async def stream( yield chunk yield 'data: [DONE]\n\n' + # If the response is JSON, then we need to yield it like this if not is_stream and json_response: yield json.dumps(json_response) - # DONE ========================================================= + # DONE WITH REQUEST, NOW LOGGING ETC. if user and incoming_request: await logs.log_api_request( diff --git a/api/transfer.py b/api/transfer.py index a4f83f6..23011f9 100644 --- a/api/transfer.py +++ b/api/transfer.py @@ -1,4 +1,4 @@ -"""Module for transferring requests to ClosedAI API""" +"""Does quite a few checks and prepares the incoming request for the target endpoint, so it can be streamed""" import json import yaml diff --git a/setup.md b/setup.md index d6ccda3..8835c4f 100644 --- a/setup.md +++ b/setup.md @@ -36,6 +36,9 @@ Set up a MongoDB database and set `MONGO_URI` to the MongoDB database connection - `PROXY_USER` (optional) - `PROXY_PASS` (optional) +Want to use a proxy list? See the according section! +Keep in mind to set `USE_PROXY_LIST` to `True`! Otherwise, the proxy list won't be used. + ### `ACTUAL_IPS` (optional) This is a security measure to make sure a proxy, VPN, Tor or any other IP hiding service is used by the host when accessing "Closed"AI's API. It is a space separated list of IP addresses that are allowed to access the API. @@ -49,13 +52,36 @@ You can also just add the *beginning* of an API address, like `12.123.` (without `CORE_API_KEY` specifies the **very secret key** for which need to access the entire user database etc. `TEST_NOVA_KEY` is the API key the which is used in tests. It should be one with tons of credits. -## Webhooks +### Webhooks `DISCORD_WEBHOOK__USER_CREATED` is the Discord webhook URL for when a user is created. `DISCORD_WEBHOOK__API_ISSUE` is the Discord webhook URL for when an API issue occurs. -## Other +### Other `KEYGEN_INFIX` can be almost any string (avoid spaces or special characters) - this string will be put in the middle of every NovaAI API key which is generated. This is useful for identifying the source of the key using e.g. RegEx. +## Proxy Lists +To use proxy lists, navigate to `api/secret/proxies/` and create the following files: +- `http.txt` +- `socks4.txt` +- `socks5.txt` + +Then, paste your proxies in the following format: + +``` +[username:password@]host:port +``` + +e.g. + +``` +1.2.3.4:8080 +user:pass@127.0.0.1:1337 +``` + +You can use comments just like in Python. + +**Important:** to use the proxy lists, you need to change the `USE_PROXY_LIST` environment variable to `True`! + ## Run > **Warning:** read the according section for production usage! From ea68a638e9d7264a55ba5213efb61d5d0f7248f2 Mon Sep 17 00:00:00 2001 From: nsde Date: Sat, 12 Aug 2023 17:53:54 +0200 Subject: [PATCH 2/2] Removed unneccesary code --- api/helpers/network.py | 17 - api/sockslib/socks.py | 770 ----------------------------------------- 2 files changed, 787 deletions(-) delete mode 100644 api/sockslib/socks.py diff --git a/api/helpers/network.py b/api/helpers/network.py index ce67410..997f497 100644 --- a/api/helpers/network.py +++ b/api/helpers/network.py @@ -17,20 +17,3 @@ async def get_ip(request) -> str: detected_ip = next((i for i in possible_ips if i), None) return detected_ip - -async def add_proxy_auth_to_headers(username: str, password: str, headers: dict) -> dict: - """Add proxy authentication to the headers. This is useful if the proxy authentication doesn't work as it should.""" - - proxy_auth = base64.b64encode(f'{username}:{password}'.encode()).decode() - headers['Proxy-Authorization'] = f'Basic {proxy_auth}' - return headers - -if __name__ == '__main__': - print(asyncio.run(add_proxy_auth_to_headers( - 'user', - 'pass', - { - 'Authorization': 'Bearer demo', - 'Another-Header': '123' - } - ))) diff --git a/api/sockslib/socks.py b/api/sockslib/socks.py deleted file mode 100644 index 19b2b91..0000000 --- a/api/sockslib/socks.py +++ /dev/null @@ -1,770 +0,0 @@ -""" -THE FOLLOWING CODE WAS TAKEN FROM https://raw.githubusercontent.com/m0rtem/CloudFail/master/socks.py - -SocksiPy - Python SOCKS module. -Version 1.5.7 - -Copyright 2006 Dan-Haim. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of Dan Haim nor the names of his contributors may be used - to endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY DAN HAIM "AS IS" AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL DAN HAIM OR HIS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA -OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMANGE. - - -This module provides a standard socket-like interface for Python -for tunneling connections through SOCKS proxies. - -=============================================================================== - -Minor modifications made by Christopher Gilbert (http://motomastyle.com/) -for use in PyLoris (http://pyloris.sourceforge.net/) - -Minor modifications made by Mario Vilas (http://breakingcode.wordpress.com/) -mainly to merge bug fixes found in Sourceforge - -Modifications made by Anorov (https://github.com/Anorov) --Forked and renamed to PySocks --Fixed issue with HTTP proxy failure checking (same bug that was in the old ___recvall() method) --Included SocksiPyHandler (sockshandler.py), to be used as a urllib2 handler, - courtesy of e000 (https://github.com/e000): https://gist.github.com/869791#file_socksipyhandler.py --Re-styled code to make it readable - -Aliased PROXY_TYPE_SOCKS5 -> SOCKS5 etc. - -Improved exception handling and output - -Removed irritating use of sequence indexes, replaced with tuple unpacked variables - -Fixed up Python 3 bytestring handling - chr(0x03).encode() -> b"\x03" - -Other general fixes --Added clarification that the HTTP proxy connection method only supports CONNECT-style tunneling HTTP proxies --Various small bug fixes -""" - -__version__ = "1.5.7" - -import socket -import struct -from errno import EOPNOTSUPP, EINVAL, EAGAIN -from io import BytesIO -from os import SEEK_CUR -from base64 import b64encode -try: - from collections.abc import Callable -except ImportError: - from collections import Callable - -PROXY_TYPE_SOCKS4 = SOCKS4 = 1 -PROXY_TYPE_SOCKS5 = SOCKS5 = 2 -PROXY_TYPE_HTTP = HTTP = 3 - -PROXY_TYPES = {"SOCKS4": SOCKS4, "SOCKS5": SOCKS5, "HTTP": HTTP} -PRINTABLE_PROXY_TYPES = dict(zip(PROXY_TYPES.values(), PROXY_TYPES.keys())) - -_orgsocket = _orig_socket = socket.socket - -class ProxyError(IOError): - """ - socket_err contains original socket.error exception. - """ - def __init__(self, msg, socket_err=None): - self.msg = msg - self.socket_err = socket_err - - if socket_err: - self.msg += ": {0}".format(socket_err) - - def __str__(self): - return self.msg - -class GeneralProxyError(ProxyError): pass -class ProxyConnectionError(ProxyError): pass -class SOCKS5AuthError(ProxyError): pass -class SOCKS5Error(ProxyError): pass -class SOCKS4Error(ProxyError): pass -class HTTPError(ProxyError): pass - -SOCKS4_ERRORS = { 0x5B: "Request rejected or failed", - 0x5C: "Request rejected because SOCKS server cannot connect to identd on the client", - 0x5D: "Request rejected because the client program and identd report different user-ids" - } - -SOCKS5_ERRORS = { 0x01: "General SOCKS server failure", - 0x02: "Connection not allowed by ruleset", - 0x03: "Network unreachable", - 0x04: "Host unreachable", - 0x05: "Connection refused", - 0x06: "TTL expired", - 0x07: "Command not supported, or protocol error", - 0x08: "Address type not supported" - } - -DEFAULT_PORTS = { SOCKS4: 1080, - SOCKS5: 1080, - HTTP: 8080 - } - -def set_default_proxy(proxy_type=None, addr=None, port=None, rdns=True, username=None, password=None): - """ - set_default_proxy(proxy_type, addr[, port[, rdns[, username, password]]]) - - Sets a default proxy which all further socksocket objects will use, - unless explicitly changed. All parameters are as for socket.set_proxy(). - """ - socksocket.default_proxy = (proxy_type, addr, port, rdns, - username.encode() if username else None, - password.encode() if password else None) - -setdefaultproxy = set_default_proxy - -def get_default_proxy(): - """ - Returns the default proxy, set by set_default_proxy. - """ - return socksocket.default_proxy - -getdefaultproxy = get_default_proxy - -def wrap_module(module): - """ - Attempts to replace a module's socket library with a SOCKS socket. Must set - a default proxy using set_default_proxy(...) first. - This will only work on modules that import socket directly into the namespace; - most of the Python Standard Library falls into this category. - """ - if socksocket.default_proxy: - module.socket.socket = socksocket - else: - raise GeneralProxyError("No default proxy specified") - -wrapmodule = wrap_module - -def create_connection(dest_pair, proxy_type=None, proxy_addr=None, - proxy_port=None, proxy_rdns=True, - proxy_username=None, proxy_password=None, - timeout=None, source_address=None, - socket_options=None): - """create_connection(dest_pair, *[, timeout], **proxy_args) -> socket object - - Like socket.create_connection(), but connects to proxy - before returning the socket object. - - dest_pair - 2-tuple of (IP/hostname, port). - **proxy_args - Same args passed to socksocket.set_proxy() if present. - timeout - Optional socket timeout value, in seconds. - source_address - tuple (host, port) for the socket to bind to as its source - address before connecting (only for compatibility) - """ - # Remove IPv6 brackets on the remote address and proxy address. - remote_host, remote_port = dest_pair - if remote_host.startswith('['): - remote_host = remote_host.strip('[]') - if proxy_addr and proxy_addr.startswith('['): - proxy_addr = proxy_addr.strip('[]') - - err = None - - # Allow the SOCKS proxy to be on IPv4 or IPv6 addresses. - for r in socket.getaddrinfo(proxy_addr, proxy_port, 0, socket.SOCK_STREAM): - family, socket_type, proto, canonname, sa = r - sock = None - try: - sock = socksocket(family, socket_type, proto) - - if socket_options is not None: - for opt in socket_options: - sock.setsockopt(*opt) - - if isinstance(timeout, (int, float)): - sock.settimeout(timeout) - - if proxy_type is not None: - sock.set_proxy(proxy_type, proxy_addr, proxy_port, proxy_rdns, - proxy_username, proxy_password) - if source_address is not None: - sock.bind(source_address) - - sock.connect((remote_host, remote_port)) - return sock - - except socket.error as e: - err = e - if sock is not None: - sock.close() - sock = None - - if err is not None: - raise err - - raise socket.error("gai returned empty list.") - -class _BaseSocket(socket.socket): - """Allows Python 2's "delegated" methods such as send() to be overridden - """ - def __init__(self, *pos, **kw): - _orig_socket.__init__(self, *pos, **kw) - - self._savedmethods = dict() - for name in self._savenames: - self._savedmethods[name] = getattr(self, name) - delattr(self, name) # Allows normal overriding mechanism to work - - _savenames = list() - -def _makemethod(name): - return lambda self, *pos, **kw: self._savedmethods[name](*pos, **kw) -for name in ("sendto", "send", "recvfrom", "recv"): - method = getattr(_BaseSocket, name, None) - - # Determine if the method is not defined the usual way - # as a function in the class. - # Python 2 uses __slots__, so there are descriptors for each method, - # but they are not functions. - if not isinstance(method, Callable): - _BaseSocket._savenames.append(name) - setattr(_BaseSocket, name, _makemethod(name)) - -class socksocket(_BaseSocket): - """socksocket([family[, type[, proto]]]) -> socket object - - Open a SOCKS enabled socket. The parameters are the same as - those of the standard socket init. In order for SOCKS to work, - you must specify family=AF_INET and proto=0. - The "type" argument must be either SOCK_STREAM or SOCK_DGRAM. - """ - - default_proxy = None - - def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, *args, **kwargs): - if type not in (socket.SOCK_STREAM, socket.SOCK_DGRAM): - msg = "Socket type must be stream or datagram, not {!r}" - raise ValueError(msg.format(type)) - - _BaseSocket.__init__(self, family, type, proto, *args, **kwargs) - self._proxyconn = None # TCP connection to keep UDP relay alive - - if self.default_proxy: - self.proxy = self.default_proxy - else: - self.proxy = (None, None, None, None, None, None) - self.proxy_sockname = None - self.proxy_peername = None - - def _readall(self, file, count): - """ - Receive EXACTLY the number of bytes requested from the file object. - Blocks until the required number of bytes have been received. - """ - data = b"" - while len(data) < count: - d = file.read(count - len(data)) - if not d: - raise GeneralProxyError("Connection closed unexpectedly") - data += d - return data - - def set_proxy(self, proxy_type=None, addr=None, port=None, rdns=True, username=None, password=None): - """set_proxy(proxy_type, addr[, port[, rdns[, username[, password]]]]) - Sets the proxy to be used. - - proxy_type - The type of the proxy to be used. Three types - are supported: PROXY_TYPE_SOCKS4 (including socks4a), - PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP - addr - The address of the server (IP or DNS). - port - The port of the server. Defaults to 1080 for SOCKS - servers and 8080 for HTTP proxy servers. - rdns - Should DNS queries be performed on the remote side - (rather than the local side). The default is True. - Note: This has no effect with SOCKS4 servers. - username - Username to authenticate with to the server. - The default is no authentication. - password - Password to authenticate with to the server. - Only relevant when username is also provided. - """ - self.proxy = (proxy_type, addr, port, rdns, - username.encode() if username else None, - password.encode() if password else None) - - setproxy = set_proxy - - def bind(self, *pos, **kw): - """ - Implements proxy connection for UDP sockets, - which happens during the bind() phase. - """ - proxy_type, proxy_addr, proxy_port, rdns, username, password = self.proxy - if not proxy_type or self.type != socket.SOCK_DGRAM: - return _orig_socket.bind(self, *pos, **kw) - - if self._proxyconn: - raise socket.error(EINVAL, "Socket already bound to an address") - if proxy_type != SOCKS5: - msg = "UDP only supported by SOCKS5 proxy type" - raise socket.error(EOPNOTSUPP, msg) - _BaseSocket.bind(self, *pos, **kw) - - # Need to specify actual local port because - # some relays drop packets if a port of zero is specified. - # Avoid specifying host address in case of NAT though. - _, port = self.getsockname() - dst = ("0", port) - - self._proxyconn = _orig_socket() - proxy = self._proxy_addr() - self._proxyconn.connect(proxy) - - UDP_ASSOCIATE = b"\x03" - _, relay = self._SOCKS5_request(self._proxyconn, UDP_ASSOCIATE, dst) - - # The relay is most likely on the same host as the SOCKS proxy, - # but some proxies return a private IP address (10.x.y.z) - host, _ = proxy - _, port = relay - _BaseSocket.connect(self, (host, port)) - self.proxy_sockname = ("0.0.0.0", 0) # Unknown - - def sendto(self, bytes, *args, **kwargs): - if self.type != socket.SOCK_DGRAM: - return _BaseSocket.sendto(self, bytes, *args, **kwargs) - if not self._proxyconn: - self.bind(("", 0)) - - address = args[-1] - flags = args[:-1] - - header = BytesIO() - RSV = b"\x00\x00" - header.write(RSV) - STANDALONE = b"\x00" - header.write(STANDALONE) - self._write_SOCKS5_address(address, header) - - sent = _BaseSocket.send(self, header.getvalue() + bytes, *flags, **kwargs) - return sent - header.tell() - - def send(self, bytes, flags=0, **kwargs): - if self.type == socket.SOCK_DGRAM: - return self.sendto(bytes, flags, self.proxy_peername, **kwargs) - else: - return _BaseSocket.send(self, bytes, flags, **kwargs) - - def recvfrom(self, bufsize, flags=0): - if self.type != socket.SOCK_DGRAM: - return _BaseSocket.recvfrom(self, bufsize, flags) - if not self._proxyconn: - self.bind(("", 0)) - - buf = BytesIO(_BaseSocket.recv(self, bufsize, flags)) - buf.seek(+2, SEEK_CUR) - frag = buf.read(1) - if ord(frag): - raise NotImplementedError("Received UDP packet fragment") - fromhost, fromport = self._read_SOCKS5_address(buf) - - if self.proxy_peername: - peerhost, peerport = self.proxy_peername - if fromhost != peerhost or peerport not in (0, fromport): - raise socket.error(EAGAIN, "Packet filtered") - - return (buf.read(), (fromhost, fromport)) - - def recv(self, *pos, **kw): - bytes, _ = self.recvfrom(*pos, **kw) - return bytes - - def close(self): - if self._proxyconn: - self._proxyconn.close() - return _BaseSocket.close(self) - - def get_proxy_sockname(self): - """ - Returns the bound IP address and port number at the proxy. - """ - return self.proxy_sockname - - getproxysockname = get_proxy_sockname - - def get_proxy_peername(self): - """ - Returns the IP and port number of the proxy. - """ - return _BaseSocket.getpeername(self) - - getproxypeername = get_proxy_peername - - def get_peername(self): - """ - Returns the IP address and port number of the destination - machine (note: get_proxy_peername returns the proxy) - """ - return self.proxy_peername - - getpeername = get_peername - - def _negotiate_SOCKS5(self, *dest_addr): - """ - Negotiates a stream connection through a SOCKS5 server. - """ - CONNECT = b"\x01" - self.proxy_peername, self.proxy_sockname = self._SOCKS5_request(self, - CONNECT, dest_addr) - - def _SOCKS5_request(self, conn, cmd, dst): - """ - Send SOCKS5 request with given command (CMD field) and - address (DST field). Returns resolved DST address that was used. - """ - proxy_type, addr, port, rdns, username, password = self.proxy - - writer = conn.makefile("wb") - reader = conn.makefile("rb", 0) # buffering=0 renamed in Python 3 - try: - # First we'll send the authentication packages we support. - if username and password: - # The username/password details were supplied to the - # set_proxy method so we support the USERNAME/PASSWORD - # authentication (in addition to the standard none). - writer.write(b"\x05\x02\x00\x02") - else: - # No username/password were entered, therefore we - # only support connections with no authentication. - writer.write(b"\x05\x01\x00") - - # We'll receive the server's response to determine which - # method was selected - writer.flush() - chosen_auth = self._readall(reader, 2) - - if chosen_auth[0:1] != b"\x05": - # Note: string[i:i+1] is used because indexing of a bytestring - # via bytestring[i] yields an integer in Python 3 - raise GeneralProxyError("SOCKS5 proxy server sent invalid data") - - # Check the chosen authentication method - - if chosen_auth[1:2] == b"\x02": - # Okay, we need to perform a basic username/password - # authentication. - writer.write(b"\x01" + chr(len(username)).encode() - + username - + chr(len(password)).encode() - + password) - writer.flush() - auth_status = self._readall(reader, 2) - if auth_status[0:1] != b"\x01": - # Bad response - raise GeneralProxyError("SOCKS5 proxy server sent invalid data") - if auth_status[1:2] != b"\x00": - # Authentication failed - raise SOCKS5AuthError("SOCKS5 authentication failed") - - # Otherwise, authentication succeeded - - # No authentication is required if 0x00 - elif chosen_auth[1:2] != b"\x00": - # Reaching here is always bad - if chosen_auth[1:2] == b"\xFF": - raise SOCKS5AuthError("All offered SOCKS5 authentication methods were rejected") - else: - raise GeneralProxyError("SOCKS5 proxy server sent invalid data") - - # Now we can request the actual connection - writer.write(b"\x05" + cmd + b"\x00") - resolved = self._write_SOCKS5_address(dst, writer) - writer.flush() - - # Get the response - resp = self._readall(reader, 3) - if resp[0:1] != b"\x05": - raise GeneralProxyError("SOCKS5 proxy server sent invalid data") - - status = ord(resp[1:2]) - if status != 0x00: - # Connection failed: server returned an error - error = SOCKS5_ERRORS.get(status, "Unknown error") - raise SOCKS5Error("{0:#04x}: {1}".format(status, error)) - - # Get the bound address/port - bnd = self._read_SOCKS5_address(reader) - return (resolved, bnd) - finally: - reader.close() - writer.close() - - def _write_SOCKS5_address(self, addr, file): - """ - Return the host and port packed for the SOCKS5 protocol, - and the resolved address as a tuple object. - """ - host, port = addr - proxy_type, _, _, rdns, username, password = self.proxy - family_to_byte = {socket.AF_INET: b"\x01", socket.AF_INET6: b"\x04"} - - # If the given destination address is an IP address, we'll - # use the IP address request even if remote resolving was specified. - # Detect whether the address is IPv4/6 directly. - for family in (socket.AF_INET, socket.AF_INET6): - try: - addr_bytes = socket.inet_pton(family, host) - file.write(family_to_byte[family] + addr_bytes) - host = socket.inet_ntop(family, addr_bytes) - file.write(struct.pack(">H", port)) - return host, port - except socket.error: - continue - - # Well it's not an IP number, so it's probably a DNS name. - if rdns: - # Resolve remotely - host_bytes = host.encode('idna') - file.write(b"\x03" + chr(len(host_bytes)).encode() + host_bytes) - else: - # Resolve locally - addresses = socket.getaddrinfo(host, port, socket.AF_UNSPEC, socket.SOCK_STREAM, socket.IPPROTO_TCP, socket.AI_ADDRCONFIG) - # We can't really work out what IP is reachable, so just pick the - # first. - target_addr = addresses[0] - family = target_addr[0] - host = target_addr[4][0] - - addr_bytes = socket.inet_pton(family, host) - file.write(family_to_byte[family] + addr_bytes) - host = socket.inet_ntop(family, addr_bytes) - file.write(struct.pack(">H", port)) - return host, port - - def _read_SOCKS5_address(self, file): - atyp = self._readall(file, 1) - if atyp == b"\x01": - addr = socket.inet_ntoa(self._readall(file, 4)) - elif atyp == b"\x03": - length = self._readall(file, 1) - addr = self._readall(file, ord(length)) - elif atyp == b"\x04": - addr = socket.inet_ntop(socket.AF_INET6, self._readall(file, 16)) - else: - raise GeneralProxyError("SOCKS5 proxy server sent invalid data") - - port = struct.unpack(">H", self._readall(file, 2))[0] - return addr, port - - def _negotiate_SOCKS4(self, dest_addr, dest_port): - """ - Negotiates a connection through a SOCKS4 server. - """ - proxy_type, addr, port, rdns, username, password = self.proxy - - writer = self.makefile("wb") - reader = self.makefile("rb", 0) # buffering=0 renamed in Python 3 - try: - # Check if the destination address provided is an IP address - remote_resolve = False - try: - addr_bytes = socket.inet_aton(dest_addr) - except socket.error: - # It's a DNS name. Check where it should be resolved. - if rdns: - addr_bytes = b"\x00\x00\x00\x01" - remote_resolve = True - else: - addr_bytes = socket.inet_aton(socket.gethostbyname(dest_addr)) - - # Construct the request packet - writer.write(struct.pack(">BBH", 0x04, 0x01, dest_port)) - writer.write(addr_bytes) - - # The username parameter is considered userid for SOCKS4 - if username: - writer.write(username) - writer.write(b"\x00") - - # DNS name if remote resolving is required - # NOTE: This is actually an extension to the SOCKS4 protocol - # called SOCKS4A and may not be supported in all cases. - if remote_resolve: - writer.write(dest_addr.encode('idna') + b"\x00") - writer.flush() - - # Get the response from the server - resp = self._readall(reader, 8) - if resp[0:1] != b"\x00": - # Bad data - raise GeneralProxyError("SOCKS4 proxy server sent invalid data") - - status = ord(resp[1:2]) - if status != 0x5A: - # Connection failed: server returned an error - error = SOCKS4_ERRORS.get(status, "Unknown error") - raise SOCKS4Error("{0:#04x}: {1}".format(status, error)) - - # Get the bound address/port - self.proxy_sockname = (socket.inet_ntoa(resp[4:]), struct.unpack(">H", resp[2:4])[0]) - if remote_resolve: - self.proxy_peername = socket.inet_ntoa(addr_bytes), dest_port - else: - self.proxy_peername = dest_addr, dest_port - finally: - reader.close() - writer.close() - - def _negotiate_HTTP(self, dest_addr, dest_port): - """ - Negotiates a connection through an HTTP server. - NOTE: This currently only supports HTTP CONNECT-style proxies. - """ - proxy_type, addr, port, rdns, username, password = self.proxy - - # If we need to resolve locally, we do this now - addr = dest_addr if rdns else socket.gethostbyname(dest_addr) - - http_headers = [ - b"CONNECT " + addr.encode('idna') + b":" + str(dest_port).encode() + b" HTTP/1.1", - b"Host: " + dest_addr.encode('idna') - ] - - if username and password: - http_headers.append(b"Proxy-Authorization: basic " + b64encode(username + b":" + password)) - - http_headers.append(b"\r\n") - - self.sendall(b"\r\n".join(http_headers)) - - # We just need the first line to check if the connection was successful - fobj = self.makefile() - status_line = fobj.readline() - fobj.close() - - if not status_line: - raise GeneralProxyError("Connection closed unexpectedly") - - try: - proto, status_code, status_msg = status_line.split(" ", 2) - except ValueError: - raise GeneralProxyError("HTTP proxy server sent invalid response") - - if not proto.startswith("HTTP/"): - raise GeneralProxyError("Proxy server does not appear to be an HTTP proxy") - - try: - status_code = int(status_code) - except ValueError: - raise HTTPError("HTTP proxy server did not return a valid HTTP status") - - if status_code != 200: - error = "{0}: {1}".format(status_code, status_msg) - if status_code in (400, 403, 405): - # It's likely that the HTTP proxy server does not support the CONNECT tunneling method - error += ("\n[*] Note: The HTTP proxy server may not be supported by PySocks" - " (must be a CONNECT tunnel proxy)") - raise HTTPError(error) - - self.proxy_sockname = (b"0.0.0.0", 0) - self.proxy_peername = addr, dest_port - - _proxy_negotiators = { - SOCKS4: _negotiate_SOCKS4, - SOCKS5: _negotiate_SOCKS5, - HTTP: _negotiate_HTTP - } - - - def connect(self, dest_pair): - """ - Connects to the specified destination through a proxy. - Uses the same API as socket's connect(). - To select the proxy server, use set_proxy(). - - dest_pair - 2-tuple of (IP/hostname, port). - """ - if len(dest_pair) != 2 or dest_pair[0].startswith("["): - # Probably IPv6, not supported -- raise an error, and hope - # Happy Eyeballs (RFC6555) makes sure at least the IPv4 - # connection works... - raise socket.error("PySocks doesn't support IPv6") - - dest_addr, dest_port = dest_pair - - if self.type == socket.SOCK_DGRAM: - if not self._proxyconn: - self.bind(("", 0)) - dest_addr = socket.gethostbyname(dest_addr) - - # If the host address is INADDR_ANY or similar, reset the peer - # address so that packets are received from any peer - if dest_addr == "0.0.0.0" and not dest_port: - self.proxy_peername = None - else: - self.proxy_peername = (dest_addr, dest_port) - return - - proxy_type, proxy_addr, proxy_port, rdns, username, password = self.proxy - - # Do a minimal input check first - if (not isinstance(dest_pair, (list, tuple)) - or len(dest_pair) != 2 - or not dest_addr - or not isinstance(dest_port, int)): - raise GeneralProxyError("Invalid destination-connection (host, port) pair") - - - if proxy_type is None: - # Treat like regular socket object - self.proxy_peername = dest_pair - _BaseSocket.connect(self, (dest_addr, dest_port)) - return - - proxy_addr = self._proxy_addr() - - try: - # Initial connection to proxy server - _BaseSocket.connect(self, proxy_addr) - - except socket.error as error: - # Error while connecting to proxy - self.close() - proxy_addr, proxy_port = proxy_addr - proxy_server = "{0}:{1}".format(proxy_addr, proxy_port) - printable_type = PRINTABLE_PROXY_TYPES[proxy_type] - - msg = "Error connecting to {0} proxy {1}".format(printable_type, - proxy_server) - raise ProxyConnectionError(msg, error) - - else: - # Connected to proxy server, now negotiate - try: - # Calls negotiate_{SOCKS4, SOCKS5, HTTP} - negotiate = self._proxy_negotiators[proxy_type] - negotiate(self, dest_addr, dest_port) - except socket.error as error: - # Wrap socket errors - self.close() - raise GeneralProxyError("Socket error", error) - except ProxyError: - # Protocol error while negotiating with proxy - self.close() - raise - - def _proxy_addr(self): - """ - Return proxy address to connect to as tuple object - """ - proxy_type, proxy_addr, proxy_port, rdns, username, password = self.proxy - proxy_port = proxy_port or DEFAULT_PORTS.get(proxy_type) - if not proxy_port: - raise GeneralProxyError("Invalid proxy type") - return proxy_addr, proxy_port