import youtube_dl import sys print("hello from y2t.py") url = sys.argv[1] lang = sys.argv[2] print("python script will now process url:" + url + "in lang:" + lang) ydl_opts = { 'writesubtitles': True, 'writeautomaticsub': True, 'subtitlesformat': 'vtt', 'subtitleslangs': [lang], 'lkjsdlfkjsdf': 'sdflkj', 'outtmpl': '%(id)s', 'skip_download': True, 'writethumbnail': True, 'quiet': True } ydl = youtube_dl.YoutubeDL(ydl_opts) ydl.download([url]) info = ydl.extract_info(url, download=False) creator = info['uploader'] title = info['title'] id = info['id'] subFn = id + "." + lang + ".vtt" txt = "" with open(subFn, "r") as f: lines = f.readlines() for i in range(len(lines)): if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "": txt += lines[i] txt = txt.replace("\n"," ") #print(txt) words = txt.split() sList = [] wCount = 0 maxWordCount = 1000 currentString = "" for w in words: wCount +=1 currentString += w + " " if wCount == maxWordCount: sList.append(currentString) wCount = 0 currentString = "" if currentString: sList.append(currentString) blocks = len(sList) for i in range(blocks): print('This is part '+ str(i+1) +'/'+ str(blocks) + " of the transcript of a video named: "+ title) print(sList[i])