86 lines
2.2 KiB
Python
86 lines
2.2 KiB
Python
import yt_dlp as youtube_dl
|
|
import sys
|
|
import os
|
|
|
|
url = sys.argv[1]
|
|
lang = sys.argv[2]
|
|
|
|
# https://youtu.be/dxt7w5NUub8
|
|
# https://www.youtube.com/watch?v=456456
|
|
|
|
if lang == "fr":
|
|
inFrench = " in French"
|
|
else:
|
|
inFrench = ""
|
|
|
|
bad_url = "youtu.be/"
|
|
good_url = "www.youtube.com/watch?v="
|
|
|
|
if "youtu.be" in url:
|
|
url_update = url.replace(bad_url, good_url)
|
|
url = url_update
|
|
|
|
ydl_opts = {
|
|
'writesubtitles': True,
|
|
'writeautomaticsub': True,
|
|
'subtitlesformat': 'vtt',
|
|
'subtitleslangs': [lang],
|
|
'lkjsdlfkjsdf': 'sdflkj',
|
|
'outtmpl': 'output/%(id)s',
|
|
'skip_download': True,
|
|
'writethumbnail': False,
|
|
'quiet': True
|
|
}
|
|
|
|
ydl = youtube_dl.YoutubeDL(ydl_opts)
|
|
|
|
ydl.download([url])
|
|
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
creator = info['uploader']
|
|
title = info['title']
|
|
id = info['id']
|
|
|
|
subFn = "output/" + id + "." + lang + ".vtt"
|
|
|
|
txt = ""
|
|
|
|
with open(subFn, "r") as f:
|
|
lines = f.readlines()
|
|
|
|
for i in range(len(lines)):
|
|
if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "":
|
|
txt += lines[i]
|
|
|
|
|
|
txt = txt.replace("\n"," ")
|
|
print("Here is the full transcript of the video titled: « " + title + " » - " + txt)
|
|
words = txt.split()
|
|
|
|
sList = []
|
|
wCount = 0
|
|
maxWordCount = 2000
|
|
currentString = ""
|
|
|
|
for w in words:
|
|
wCount +=1
|
|
currentString += w + " "
|
|
if wCount == maxWordCount:
|
|
sList.append(currentString)
|
|
wCount = 0
|
|
currentString = ""
|
|
if currentString:
|
|
sList.append(currentString)
|
|
|
|
blocks = len(sList)
|
|
|
|
if blocks == 1 :
|
|
print("This is the transcript of a video titled: « " + title + " » : " + sList[0])
|
|
print("Based on this transcript, write a summary with a list of relevant points with a title and few sentences for each point" + inFrench)
|
|
|
|
else :
|
|
for i in range(blocks):
|
|
print('This is part '+ str(i+1) +'/'+ str(blocks) + ' of the transcript of a video titled: « ' + title + ' » : ' + sList[i])
|
|
|
|
print("Based on the " + str(blocks) + " parts of the transcript, write a summary with a list of relevant points with a title and few sentences for each point" + inFrench) |