import youtube_dl import sys import os url = sys.argv[1] lang = sys.argv[2] # https://youtu.be/dxt7w5NUub8 # https://www.youtube.com/watch?v=456456 if lang == "fr": inFrench = " in French" else: inFrench = "" bad_url = "youtu.be/" good_url = "www.youtube.com/watch?v=" if "youtu.be" in url: url_update = url.replace(bad_url, good_url) url = url_update ydl_opts = { 'writesubtitles': True, 'writeautomaticsub': True, 'subtitlesformat': 'vtt', 'subtitleslangs': [lang], 'lkjsdlfkjsdf': 'sdflkj', 'outtmpl': 'output/%(id)s', 'skip_download': True, 'writethumbnail': False, 'quiet': True } ydl = youtube_dl.YoutubeDL(ydl_opts) ydl.download([url]) info = ydl.extract_info(url, download=False) creator = info['uploader'] title = info['title'] id = info['id'] subFn = "output/" + id + "." + lang + ".vtt" txt = "" with open(subFn, "r") as f: lines = f.readlines() for i in range(len(lines)): if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "": txt += lines[i] txt = txt.replace("\n"," ") print("Here is the full transcript of the video titled: « " + title + " » - " + txt) words = txt.split() sList = [] wCount = 0 maxWordCount = 2000 currentString = "" for w in words: wCount +=1 currentString += w + " " if wCount == maxWordCount: sList.append(currentString) wCount = 0 currentString = "" if currentString: sList.append(currentString) blocks = len(sList) if blocks == 1 : print("This is the transcript of a video titled: « " + title + " » : " + sList[0]) print("Based on this transcript, write a summary with a list of relevant points with a title and few sentences for each point" + inFrench) else : for i in range(blocks): print('This is part '+ str(i+1) +'/'+ str(blocks) + ' of the transcript of a video titled: « ' + title + ' » : ' + sList[i]) print("Based on the " + str(blocks) + " parts of the transcript, write a summary with a list of relevant points with a title and few sentences for each point" + inFrench)