From 5e3d90247167a894e4578e3378738127e8ebd6bd Mon Sep 17 00:00:00 2001 From: edbrz9 Date: Thu, 29 Dec 2022 12:22:48 +0100 Subject: [PATCH] y2t added --- proj/y2t/handler.php | 0 proj/y2t/index.php | 27 +++++++++++++++++++ proj/y2t/y2t.py | 63 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 proj/y2t/handler.php create mode 100644 proj/y2t/index.php create mode 100644 proj/y2t/y2t.py diff --git a/proj/y2t/handler.php b/proj/y2t/handler.php new file mode 100644 index 0000000..e69de29 diff --git a/proj/y2t/index.php b/proj/y2t/index.php new file mode 100644 index 0000000..d372ebf --- /dev/null +++ b/proj/y2t/index.php @@ -0,0 +1,27 @@ + + + +Y2T + + + + + + +
+ +
+
+
+
+

+ +
+ + +
+ + + + + \ No newline at end of file diff --git a/proj/y2t/y2t.py b/proj/y2t/y2t.py new file mode 100644 index 0000000..cfe2e9f --- /dev/null +++ b/proj/y2t/y2t.py @@ -0,0 +1,63 @@ +import youtube_dl +import sys + +url = sys.argv[1] +lang = sys.argv[2] + +ydl_opts = { + 'writesubtitles': True, + 'writeautomaticsub': True, + 'subtitlesformat': 'vtt', + 'subtitleslangs': [lang], + 'lkjsdlfkjsdf': 'sdflkj', + 'outtmpl': '%(id)s', + 'skip_download': True, + 'writethumbnail': True, + 'quiet': True +} + +ydl = youtube_dl.YoutubeDL(ydl_opts) + +ydl.download([url]) + +info = ydl.extract_info(url, download=False) + +creator = info['uploader'] +title = info['title'] +id = info['id'] + +subFn = id + "." + lang + ".vtt" + +txt = "" + +with open(subFn, "r") as f: + lines = f.readlines() + + for i in range(len(lines)): + if i < len(lines) - 1 and ">" not in lines[i] and ":" not in lines[i] and lines[i].strip() != "" and lines[i + 1].strip() == "": + txt += lines[i] + + +txt = txt.replace("\n"," ") +#print(txt) +words = txt.split() + +sList = [] +wCount = 0 +maxWordCount = 1000 +currentString = "" + +for w in words: + wCount +=1 + currentString += w + " " + if wCount == maxWordCount: + sList.append(currentString) + wCount = 0 + currentString = "" +if currentString: + sList.append(currentString) + +blocks = len(sList) + +for i in range(blocks): + print('This is part '+ str(i+1) +'/'+ str(blocks) + "of the transcript of a video named: "+ title)