flask-brz9-backend/tldw.py

66 lines
1.8 KiB
Python

import yt_dlp as youtube_dl
import os
MAXWORDCOUNT = 2000
LANG = "en"
ydl_opts = {
'writesubtitles': True,
'writeautomaticsub': True,
'subtitlesformat': 'vtt',
'subtitleslangs': ['en'],
'outtmpl': 'output/%(id)s',
'skip_download': True,
'writethumbnail': True,
'progress' : False,
'noprogress': True,
'quiet': True
}
ydl = youtube_dl.YoutubeDL(ydl_opts)
class Video:
def __init__(self, url, creator, title, transcript, parts):
self.url = url
self.creator = creator
self.title = title
self.transcript = transcript
self.parts = parts
self.ytgenerated = False
def GetVideo(url):
ydl.download([url])
info = ydl.extract_info(url, download=False)
creator = info['uploader']
title = info['title']
id = info['id']
subFn = "output/" + id + "." + LANG + ".vtt"
if not os.path.exists(subFn):
video = Video(url, creator, title, "This video doesn't have available subtitles :-/", [])
return video
txt = ""
with open(subFn, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "":
txt += lines[i]
txt = txt.replace("\n"," ")
words = txt.split()
sList = []
wCount = 0
maxWordCount = MAXWORDCOUNT
currentString = ""
for w in words:
wCount +=1
currentString += w + " "
if wCount == maxWordCount:
sList.append(currentString)
wCount = 0
currentString = ""
if currentString:
sList.append(currentString)
blocks = len(sList)
video = Video(url, creator, title, txt, sList)
video.ytgenerated = True
return video