import argparse from typing import List from .proxies import GenericProxyConfig, WebshareProxyConfig from .formatters import FormatterLoader from ._api import YouTubeTranscriptApi, FetchedTranscript, TranscriptList class YouTubeTranscriptCli: def __init__(self, args: List[str]): self._args = args def run(self) -> str: parsed_args = self._parse_args() if parsed_args.exclude_manually_created and parsed_args.exclude_generated: return "" proxy_config = None if parsed_args.http_proxy != "" or parsed_args.https_proxy != "": proxy_config = GenericProxyConfig( http_url=parsed_args.http_proxy, https_url=parsed_args.https_proxy, ) if ( parsed_args.webshare_proxy_username is not None or parsed_args.webshare_proxy_password is not None ): proxy_config = WebshareProxyConfig( proxy_username=parsed_args.webshare_proxy_username, proxy_password=parsed_args.webshare_proxy_password, ) cookie_path = parsed_args.cookies transcripts = [] exceptions = [] ytt_api = YouTubeTranscriptApi( proxy_config=proxy_config, cookie_path=cookie_path, ) for video_id in parsed_args.video_ids: try: transcript_list = ytt_api.list(video_id) if parsed_args.list_transcripts: transcripts.append(transcript_list) else: transcripts.append( self._fetch_transcript( parsed_args, transcript_list, ) ) except Exception as exception: exceptions.append(exception) print_sections = [str(exception) for exception in exceptions] if transcripts: if parsed_args.list_transcripts: print_sections.extend( str(transcript_list) for transcript_list in transcripts ) else: print_sections.append( FormatterLoader() .load(parsed_args.format) .format_transcripts(transcripts) ) return "\n\n".join(print_sections) def _fetch_transcript( self, parsed_args, transcript_list: TranscriptList, ) -> FetchedTranscript: if parsed_args.exclude_manually_created: transcript = transcript_list.find_generated_transcript( parsed_args.languages ) elif parsed_args.exclude_generated: transcript = transcript_list.find_manually_created_transcript( parsed_args.languages ) else: transcript = transcript_list.find_transcript(parsed_args.languages) if parsed_args.translate: transcript = transcript.translate(parsed_args.translate) return transcript.fetch() def _parse_args(self): parser = argparse.ArgumentParser( description=( "This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. " "It also works for automatically generated subtitles and it does not require a headless browser, like " "other selenium based solutions do!" ) ) parser.add_argument( "--list-transcripts", action="store_const", const=True, default=False, help="This will list the languages in which the given videos are available in.", ) parser.add_argument( "video_ids", nargs="+", type=str, help="List of YouTube video IDs." ) parser.add_argument( "--languages", nargs="*", default=[ "en", ], type=str, help=( 'A list of language codes in a descending priority. For example, if this is set to "de en" it will ' "first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails " "to do so. As I can't provide a complete list of all working language codes with full certainty, you " "may have to play around with the language codes a bit, to find the one which is working for you!" ), ) parser.add_argument( "--exclude-generated", action="store_const", const=True, default=False, help="If this flag is set transcripts which have been generated by YouTube will not be retrieved.", ) parser.add_argument( "--exclude-manually-created", action="store_const", const=True, default=False, help="If this flag is set transcripts which have been manually created will not be retrieved.", ) parser.add_argument( "--format", type=str, default="pretty", choices=tuple(FormatterLoader.TYPES.keys()), ) parser.add_argument( "--translate", default="", help=( "The language code for the language you want this transcript to be translated to. Use the " "--list-transcripts feature to find out which languages are translatable and which translation " "languages are available." ), ) parser.add_argument( "--webshare-proxy-username", default=None, type=str, help='Specify your Webshare "Proxy Username" found at https://dashboard.webshare.io/proxy/settings', ) parser.add_argument( "--webshare-proxy-password", default=None, type=str, help='Specify your Webshare "Proxy Password" found at https://dashboard.webshare.io/proxy/settings', ) parser.add_argument( "--http-proxy", default="", metavar="URL", help="Use the specified HTTP proxy.", ) parser.add_argument( "--https-proxy", default="", metavar="URL", help="Use the specified HTTPS proxy.", ) parser.add_argument( "--cookies", default=None, help="The cookie file that will be used for authorization with youtube.", ) return self._sanitize_video_ids(parser.parse_args(self._args)) def _sanitize_video_ids(self, args): args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids] return args