66 lines
1.8 KiB
Python
66 lines
1.8 KiB
Python
import yt_dlp as youtube_dl
|
|
import os
|
|
|
|
MAXWORDCOUNT = 2000
|
|
LANG = "en"
|
|
|
|
ydl_opts = {
|
|
'writesubtitles': True,
|
|
'writeautomaticsub': True,
|
|
'subtitlesformat': 'vtt',
|
|
'subtitleslangs': ['en'],
|
|
'outtmpl': 'output/%(id)s',
|
|
'skip_download': True,
|
|
'writethumbnail': True,
|
|
'progress' : False,
|
|
'noprogress': True,
|
|
'quiet': True
|
|
}
|
|
|
|
ydl = youtube_dl.YoutubeDL(ydl_opts)
|
|
|
|
class Video:
|
|
def __init__(self, url, creator, title, transcript, parts):
|
|
self.url = url
|
|
self.creator = creator
|
|
self.title = title
|
|
self.transcript = transcript
|
|
self.parts = parts
|
|
self.ytgenerated = False
|
|
|
|
def GetVideo(url):
|
|
ydl.download([url])
|
|
info = ydl.extract_info(url, download=False)
|
|
creator = info['uploader']
|
|
title = info['title']
|
|
id = info['id']
|
|
subFn = "output/" + id + "." + LANG + ".vtt"
|
|
if not os.path.exists(subFn):
|
|
video = Video(url, creator, title, "This video doesn't have available subtitles :-/", [])
|
|
return video
|
|
txt = ""
|
|
with open(subFn, "r") as f:
|
|
lines = f.readlines()
|
|
|
|
for i in range(len(lines)):
|
|
if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "":
|
|
txt += lines[i]
|
|
txt = txt.replace("\n"," ")
|
|
words = txt.split()
|
|
sList = []
|
|
wCount = 0
|
|
maxWordCount = MAXWORDCOUNT
|
|
currentString = ""
|
|
for w in words:
|
|
wCount +=1
|
|
currentString += w + " "
|
|
if wCount == maxWordCount:
|
|
sList.append(currentString)
|
|
wCount = 0
|
|
currentString = ""
|
|
if currentString:
|
|
sList.append(currentString)
|
|
blocks = len(sList)
|
|
video = Video(url, creator, title, txt, sList)
|
|
video.ytgenerated = True
|
|
return video |