from pathlib import Path
from typing import Iterable, Optional, List
from requests import HTTPError
from ._settings import WATCH_URL
from .proxies import ProxyConfig, GenericProxyConfig, WebshareProxyConfig
class YouTubeTranscriptApiException(Exception):
pass
class CookieError(YouTubeTranscriptApiException):
pass
class CookiePathInvalid(CookieError):
def __init__(self, cookie_path: Path):
super().__init__(f"Can't load the provided cookie file: {cookie_path}")
class CookieInvalid(CookieError):
def __init__(self, cookie_path: Path):
super().__init__(
f"The cookies provided are not valid (may have expired): {cookie_path}"
)
class CouldNotRetrieveTranscript(YouTubeTranscriptApiException):
"""
Raised if a transcript could not be retrieved.
"""
ERROR_MESSAGE = "\nCould not retrieve a transcript for the video {video_url}!"
CAUSE_MESSAGE_INTRO = " This is most likely caused by:\n\n{cause}"
CAUSE_MESSAGE = ""
GITHUB_REFERRAL = (
"\n\nIf you are sure that the described cause is not responsible for this error "
"and that a transcript should be retrievable, please create an issue at "
"https://github.com/jdepoix/youtube-transcript-api/issues. "
"Please add which version of youtube_transcript_api you are using "
"and provide the information needed to replicate the error. "
"Also make sure that there are no open issues which already describe your problem!"
)
def __init__(self, video_id: str):
self.video_id = video_id
super().__init__()
def _build_error_message(self) -> str:
error_message = self.ERROR_MESSAGE.format(
video_url=WATCH_URL.format(video_id=self.video_id)
)
cause = self.cause
if cause:
error_message += (
self.CAUSE_MESSAGE_INTRO.format(cause=cause) + self.GITHUB_REFERRAL
)
return error_message
@property
def cause(self) -> str:
return self.CAUSE_MESSAGE
def __str__(self) -> str:
return self._build_error_message()
class YouTubeDataUnparsable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
"The data required to fetch the transcript is not parsable. This should "
"not happen, please open an issue (make sure to include the video ID)!"
)
class YouTubeRequestFailed(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "Request to YouTube failed: {reason}"
def __init__(self, video_id: str, http_error: HTTPError):
self.reason = str(http_error)
super().__init__(video_id)
@property
def cause(self) -> str:
return self.CAUSE_MESSAGE.format(
reason=self.reason,
)
class VideoUnplayable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "The video is unplayable for the following reason: {reason}"
SUBREASON_MESSAGE = "\n\nAdditional Details:\n{sub_reasons}"
def __init__(self, video_id: str, reason: Optional[str], sub_reasons: List[str]):
self.reason = reason
self.sub_reasons = sub_reasons
super().__init__(video_id)
@property
def cause(self):
reason = "No reason specified!" if self.reason is None else self.reason
if self.sub_reasons:
sub_reasons = "\n".join(
f" - {sub_reason}" for sub_reason in self.sub_reasons
)
reason = f"{reason}{self.SUBREASON_MESSAGE.format(sub_reasons=sub_reasons)}"
return self.CAUSE_MESSAGE.format(
reason=reason,
)
class VideoUnavailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "The video is no longer available"
class InvalidVideoId(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
"You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n"
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
)
class RequestBlocked(CouldNotRetrieveTranscript):
BASE_CAUSE_MESSAGE = (
"YouTube is blocking requests from your IP. This usually is due to one of the "
"following reasons:\n"
"- You have done too many requests and your IP has been blocked by YouTube\n"
"- You are doing requests from an IP belonging to a cloud provider (like AWS, "
"Google Cloud Platform, Azure, etc.). Unfortunately, most IPs from cloud "
"providers are blocked by YouTube.\n\n"
)
CAUSE_MESSAGE = (
f"{BASE_CAUSE_MESSAGE}"
"There are two things you can do to work around this:\n"
'1. Use proxies to hide your IP address, as explained in the "Working around '
'IP bans" section of the README '
"(https://github.com/jdepoix/youtube-transcript-api"
"?tab=readme-ov-file"
"#working-around-ip-bans-requestblocked-or-ipblocked-exception).\n"
"2. (NOT RECOMMENDED) If you authenticate your requests using cookies, you "
"will be able to continue doing requests for a while. However, YouTube will "
"eventually permanently ban the account that you have used to authenticate "
"with! So only do this if you don't mind your account being banned!"
)
WITH_GENERIC_PROXY_CAUSE_MESSAGE = (
"YouTube is blocking your requests, despite you using proxies. Keep in mind "
"a proxy is just a way to hide your real IP behind the IP of that proxy, but "
"there is no guarantee that the IP of that proxy won't be blocked as well.\n\n"
"The only truly reliable way to prevent IP blocks is rotating through a large "
"pool of residential IPs, by using a provider like Webshare "
"(https://www.webshare.io/?referral_code=w0xno53eb50g), which provides you "
"with a pool of >30M residential IPs (make sure to purchase "
'"Residential" proxies, NOT "Proxy Server" or "Static Residential"!).\n\n'
"You will find more information on how to easily integrate Webshare here: "
"https://github.com/jdepoix/youtube-transcript-api"
"?tab=readme-ov-file#using-webshare"
)
WITH_WEBSHARE_PROXY_CAUSE_MESSAGE = (
"YouTube is blocking your requests, despite you using Webshare proxies. "
'Please make sure that you have purchased "Residential" proxies and '
'NOT "Proxy Server" or "Static Residential", as those won\'t work as '
'reliably! The free tier also uses "Proxy Server" and will NOT work!\n\n'
'The only reliable option is using "Residential" proxies (not "Static '
'Residential"), as this allows you to rotate through a pool of over 30M IPs, '
"which means you will always find an IP that hasn't been blocked by YouTube "
"yet!\n\n"
"You can support the development of this open source project by making your "
"Webshare purchases through this affiliate link: "
"https://www.webshare.io/?referral_code=w0xno53eb50g \n\n"
"Thank you for your support! <3"
)
def __init__(self, video_id: str):
self._proxy_config = None
super().__init__(video_id)
def with_proxy_config(
self, proxy_config: Optional[ProxyConfig]
) -> "RequestBlocked":
self._proxy_config = proxy_config
return self
@property
def cause(self) -> str:
if isinstance(self._proxy_config, WebshareProxyConfig):
return self.WITH_WEBSHARE_PROXY_CAUSE_MESSAGE
if isinstance(self._proxy_config, GenericProxyConfig):
return self.WITH_GENERIC_PROXY_CAUSE_MESSAGE
return super().cause
class IpBlocked(RequestBlocked):
CAUSE_MESSAGE = (
f"{RequestBlocked.BASE_CAUSE_MESSAGE}"
'Ways to work around this are explained in the "Working around IP '
'bans" section of the README (https://github.com/jdepoix/youtube-transcript-api'
"?tab=readme-ov-file"
"#working-around-ip-bans-requestblocked-or-ipblocked-exception).\n"
)
class TranscriptsDisabled(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "Subtitles are disabled for this video"
class AgeRestricted(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
"This video is age-restricted. Therefore, you will have to authenticate to be "
"able to retrieve transcripts for it. You will have to provide a cookie to "
'authenticate yourself, as explained in the "Cookie Authentication" section of '
"the README (https://github.com/jdepoix/youtube-transcript-api"
"?tab=readme-ov-file#cookie-authentication)"
)
class NotTranslatable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "The requested language is not translatable"
class TranslationLanguageNotAvailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "The requested translation language is not available"
class FailedToCreateConsentCookie(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = "Failed to automatically give consent to saving cookies"
class NoTranscriptFound(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
"No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n"
"{transcript_data}"
)
def __init__(
self,
video_id: str,
requested_language_codes: Iterable[str],
transcript_data: "TranscriptList", # noqa: F821
):
self._requested_language_codes = requested_language_codes
self._transcript_data = transcript_data
super().__init__(video_id)
@property
def cause(self) -> str:
return self.CAUSE_MESSAGE.format(
requested_language_codes=self._requested_language_codes,
transcript_data=str(self._transcript_data),
)