import youtube_dl
import sys

print("hello from y2t.py")

url = sys.argv[1]
lang = sys.argv[2]

print("python script will now process url:" + url + "in lang:" + lang)

ydl_opts = {
    'writesubtitles': True,
    'writeautomaticsub': True,
    'subtitlesformat': 'vtt',
    'subtitleslangs': [lang],
    'lkjsdlfkjsdf': 'sdflkj',
    'outtmpl': '%(id)s',
    'skip_download': True,
    'writethumbnail': True,
    'quiet': True
}

ydl = youtube_dl.YoutubeDL(ydl_opts)

ydl.download([url])

info = ydl.extract_info(url, download=False)

creator = info['uploader']
title = info['title']
id = info['id']

subFn = id + "." + lang + ".vtt"

txt = ""

with open(subFn, "r") as f:
    lines = f.readlines()
    
    for i in range(len(lines)):
        if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "":
            txt += lines[i]
            
          
txt = txt.replace("\n"," ")  
#print(txt)
words = txt.split()

sList = []
wCount = 0
maxWordCount = 1000
currentString = ""

for w in words:
    wCount +=1
    currentString += w + " "
    if wCount == maxWordCount:
        sList.append(currentString)
        wCount = 0
        currentString = ""
if currentString:
    sList.append(currentString)

blocks = len(sList)

for i in range(blocks):
    print('This is part '+ str(i+1) +'/'+ str(blocks) + " of the transcript of a video named: "+ title)
    print(sList[i])