from abc import ABC, abstractmethod
from typing import TypedDict, Optional
class InvalidProxyConfig(Exception):
pass
class RequestsProxyConfigDict(TypedDict):
"""
This type represents the Dict that is used by the requests library to configure
the proxies used. More information on this can be found in the official requests
documentation: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
"""
http: str
https: str
class ProxyConfig(ABC):
"""
The base class for all proxy configs. Anything can be a proxy config, as longs as
it can be turned into a `RequestsProxyConfigDict` by calling `to_requests_dict`.
"""
@abstractmethod
def to_requests_dict(self) -> RequestsProxyConfigDict:
"""
Turns this proxy config into the Dict that is expected by the requests library.
More information on this can be found in the official requests documentation:
https://requests.readthedocs.io/en/latest/user/advanced/#proxies
"""
pass
@property
def prevent_keeping_connections_alive(self) -> bool:
"""
If you are using rotating proxies, it can be useful to prevent the HTTP
client from keeping TCP connections alive, as your IP won't be rotated on
every request, if your connection stays open.
"""
return False
@property
def retries_when_blocked(self) -> int:
"""
Defines how many times we should retry if a request is blocked. When using
rotating residential proxies with a large IP pool it can make sense to retry a
couple of times when a blocked IP is encountered, since a retry will trigger
an IP rotation and the next IP might not be blocked.
"""
return 0
class GenericProxyConfig(ProxyConfig):
"""
This proxy config can be used to set up any generic HTTP/HTTPS/SOCKS proxy. As it
the requests library is used under the hood, you can follow the requests
documentation to get more detailed information on how to set up proxies:
https://requests.readthedocs.io/en/latest/user/advanced/#proxies
If only an HTTP or an HTTPS proxy is provided, it will be used for both types of
connections. However, you will have to provide at least one of the two.
"""
def __init__(self, http_url: Optional[str] = None, https_url: Optional[str] = None):
"""
If only an HTTP or an HTTPS proxy is provided, it will be used for both types of
connections. However, you will have to provide at least one of the two.
:param http_url: the proxy URL used for HTTP requests. Defaults to `https_url`
if None.
:param https_url: the proxy URL used for HTTPS requests. Defaults to `http_url`
if None.
"""
if not http_url and not https_url:
raise InvalidProxyConfig(
"GenericProxyConfig requires you to define at least one of the two: "
"http or https"
)
self.http_url = http_url
self.https_url = https_url
def to_requests_dict(self) -> RequestsProxyConfigDict:
return {
"http": self.http_url or self.https_url,
"https": self.https_url or self.http_url,
}
class WebshareProxyConfig(GenericProxyConfig):
"""
Webshare is a provider offering rotating residential proxies, which is the
most reliable way to work around being blocked by YouTube.
If you don't have a Webshare account yet, you will have to create one
at https://www.webshare.io/?referral_code=w0xno53eb50g and purchase a "Residential"
proxy package that suits your workload, to be able to use this proxy config (make
sure NOT to purchase "Proxy Server" or "Static Residential"!).
Once you have created an account you only need the "Proxy Username" and
"Proxy Password" that you can find in your Webshare settings
at https://dashboard.webshare.io/proxy/settings to set up this config class, which
will take care of setting up your proxies as needed, by defaulting to rotating
proxies.
Note that referral links are used here and any purchases made through these links
will support this Open Source project, which is very much appreciated! :)
However, you can of course integrate your own proxy solution by using the
`GenericProxyConfig` class, if that's what you prefer.
"""
DEFAULT_DOMAIN_NAME = "p.webshare.io"
DEFAULT_PORT = 80
def __init__(
self,
proxy_username: str,
proxy_password: str,
retries_when_blocked: int = 10,
domain_name: str = DEFAULT_DOMAIN_NAME,
proxy_port: int = DEFAULT_PORT,
):
"""
Once you have created a Webshare account at
https://www.webshare.io/?referral_code=w0xno53eb50g and purchased a
"Residential" package (make sure NOT to purchase "Proxy Server" or
"Static Residential"!), this config class allows you to easily use it,
by defaulting to the most reliable proxy settings (rotating residential
proxies).
:param proxy_username: "Proxy Username" found at
https://dashboard.webshare.io/proxy/settings
:param proxy_password: "Proxy Password" found at
https://dashboard.webshare.io/proxy/settings
:param retries_when_blocked: Define how many times we should retry if a request
is blocked. When using rotating residential proxies with a large IP pool it
makes sense to retry a couple of times when a blocked IP is encountered,
since a retry will trigger an IP rotation and the next IP might not be
blocked. Defaults to 10.
"""
self.proxy_username = proxy_username
self.proxy_password = proxy_password
self.domain_name = domain_name
self.proxy_port = proxy_port
self._retries_when_blocked = retries_when_blocked
@property
def url(self) -> str:
return (
f"http://{self.proxy_username}-rotate:{self.proxy_password}"
f"@{self.domain_name}:{self.proxy_port}/"
)
@property
def http_url(self) -> str:
return self.url
@property
def https_url(self) -> str:
return self.url
@property
def prevent_keeping_connections_alive(self) -> bool:
return True
@property
def retries_when_blocked(self) -> int:
return self._retries_when_blocked