// ==UserScript==
// @name Youtube Subtitle Downloader v36
// @description Download Subtitles
// @include https://*youtube.com/*
// @author Cheng Zheng
// @copyright 2009 Tim Smart; 2011 gw111zz; 2014~2023 Cheng Zheng;
// @license GNU GPL v3.0 or later. http://www.gnu.org/copyleft/gpl.html
// @require https://code.jquery.com/jquery-1.12.4.min.js
// @version 36
// @grant GM_xmlhttpRequest
// @grant unsafeWindow
// @namespace https://greasyfork.org/users/5711
// ==/UserScript==
/*
[What is this?]
This Tampermonkey script allows you to download Youtube "Automatic subtitle" and "closed subtitle".
[Note]
If it doesn't work (rarely), try to refresh the page.
If problem still exists after refreshing, send an email to [email protected].
[Who built this?]
Author : Cheng Zheng (郑诚)
Email : [email protected]
Github : https://github.com/1c7/Youtube-Auto-Subtitle-Download
[Note for Developers]
1. Some comments are written in Chinese.
2. This code handles both "Auto" and "Closed" subtitles.
[Test Video]
https://www.youtube.com/watch?v=bkVsus8Ehxs
This videos only has a closed English subtitle, with no auto subtitles.
https://www.youtube.com/watch?v=-WEqFzyrbbs
no subtitle at all
https://www.youtube.com/watch?v=9AzNEG1GB-k
have a lot of subtitles
https://www.youtube.com/watch?v=tqGkOvrKGfY
1:36:33 super long subtitle
[How does it work?]
The code can be roughly divided into three parts:
1. Add a button on the page. (UI)
2. Detect if subtitle exists.
3. Convert subtitle format, then download.
[Test Enviroment]
Works best on Chrome + Tampermonkey.
There are plenty Chromium-based Browser, I do not guarantee this work on all of them;
备注:
有时候不能用,是因为 jQuery 的 CDN 无法载入,解决办法是修改这一行
// @require https://code.jquery.com/jquery-1.12.4.min.js
改成一个别的 jQuery 地址,比如
https://cdn.bootcdn.net/ajax/libs/jquery/1.12.4/jquery.js
https://cdn.staticfile.org/jquery/1.12.4/jquery.min.js
更新日志
## 2022年12月23号:升级到 v35
常规升级。把下载框挪到标题下面,之前太靠下了(放到了描述的下面)现在挪上去一点。
*/
;(function () {
// Config
var NO_SUBTITLE = 'No Subtitle'
var HAVE_SUBTITLE = 'Download Subtitles'
var TEXT_LOADING = 'Loading...'
const BUTTON_ID =
'youtube-subtitle-downloader-by-1c7-latest-update-2022-decemeber-23'
// Config
var HASH_BUTTON_ID = `#${BUTTON_ID}`
// initialize
var first_load = true // indicate if first load this webpage or not
var youtube_playerResponse_1c7 = null // for auto subtitle
unsafeWindow.caption_array = [] // store all subtitle
$(document).ready(function () {
make_sure_it_load_properly_before_continue()
})
async function wait_until_element_exists(element_identifier) {
var retry_count = 0
var RETRY_LIMIT = 50
return new Promise(function (resolve, reject) {
var intervalID = setInterval(function () {
try {
var element = document.querySelector(element_identifier)
if (element != null) {
resolve(true)
} else {
retry_count = retry_count + 1
// console.log(`重试次数 ${retry_count}`);
if (retry_count > RETRY_LIMIT) {
clearInterval(intervalID)
reject(false)
}
}
} catch (error) {
reject(false)
}
}, 330)
})
}
async function make_sure_it_load_properly_before_continue() {
var id = new_Youtube_2022_UI_element_identifier()
var result = await wait_until_element_exists(id)
if (result) {
init_UI()
}
}
// trigger when loading new page
// (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)
// (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)
var body = document.getElementsByTagName('body')[0]
body.addEventListener('yt-navigate-finish', function (event) {
// 2021-8-9 测试结果:yt-navigate-finish 可以正常触发
if (current_page_is_video_page() === false) {
return
}
youtube_playerResponse_1c7 = event.detail.response.playerResponse // for auto subtitle
unsafeWindow.caption_array = [] // clean up (important, otherwise would have more and more item and cause error)
// if use click to another page, init again to get correct subtitle
if (first_load === false) {
remove_subtitle_download_button()
init_UI()
}
})
// 我们用这个元素判断是不是 2022 年新 UI 。
// return Element;
function new_Youtube_2022_UI_element() {
return document.querySelector(new_Youtube_2022_UI_element_identifier())
}
function new_Youtube_2022_UI_element_identifier() {
var document_querySelector = '#owner.item.style-scope.ytd-watch-metadata'
return document_querySelector
}
// return true / false
// Detect [new version UI(material design)] OR [old version UI]
// I tested this, accurated.
function new_material_design_version() {
var old_title_element = document.getElementById('watch7-headline')
if (old_title_element) {
return false
} else {
return true
}
}
// return true / false
function current_page_is_video_page() {
return get_url_video_id() !== null
}
// return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"
// or null
function get_url_video_id() {
return getURLParameter('v')
}
//https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513
function getURLParameter(name) {
return (
decodeURIComponent(
(new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(
location.search
) || [null, ''])[1].replace(/\+/g, '%20')
) || null
)
}
function remove_subtitle_download_button() {
$(HASH_BUTTON_ID).remove()
}
// 初始化
function init_UI() {
var html_element = get_main_UI_element()
// 旧版 UI
var old_anchor_element = document.getElementById('watch7-headline')
if (old_anchor_element != null) {
old_anchor_element.appendChild(html_element)
}
// 新版 UI
var anchor = document.querySelector('#above-the-fold #title')
if (anchor) {
anchor.appendChild(html_element)
}
first_load = false
}
function get_main_UI_element() {
var div = document.createElement('div'),
select = document.createElement('select'),
option = document.createElement('option')
var css_div = `display: table;
margin-top:4px;
border: 1px solid rgb(0, 183, 90);
cursor: pointer; color: rgb(255, 255, 255);
border-top-left-radius: 3px;
border-top-right-radius: 3px;
border-bottom-right-radius: 3px;
border-bottom-left-radius: 3px;
background-color: #00B75A;
`
div.setAttribute('style', css_div)
div.id = BUTTON_ID
select.id = 'captions_selector'
select.disabled = true
let css_select = `display:block;
border: 1px solid rgb(0, 183, 90);
cursor: pointer;
color: rgb(255, 255, 255);
background-color: #00B75A;
padding: 4px;
`
select.setAttribute('style', css_select)
option.textContent = TEXT_LOADING
option.selected = true
select.appendChild(option)
// 下拉菜单里,选择一项后触发下载
select.addEventListener(
'change',
function () {
download_subtitle(this)
},
false
)
div.appendChild(select) // put <select> into <div>
// put the div into page: new material design
var title_element = document.querySelectorAll(
'.title.style-scope.ytd-video-primary-info-renderer'
)
if (title_element) {
$(title_element[0]).after(div)
}
load_language_list(select)
// <a> element is for download
var a = document.createElement('a')
a.style.cssText = 'display:none;'
a.setAttribute('id', 'ForSubtitleDownload')
var body = document.getElementsByTagName('body')[0]
body.appendChild(a)
return div
}
// trigger when user select <option>
async function download_subtitle(selector) {
// if user select first <option>, we just return, do nothing.
if (selector.selectedIndex == 0) {
return
}
var caption = caption_array[selector.selectedIndex - 1]
// because first <option> is for display, so index - 1
var result = null
var filename = null // 保存文件名
// if user choose auto subtitle
if (caption.lang_code == 'AUTO') {
result = await get_auto_subtitle()
filename = get_file_name(get_auto_subtitle_name())
} else {
// closed subtitle
let lang_code = caption.lang_code
let lang_name = caption.lang_name
result = await get_closed_subtitle(lang_code)
filename = get_file_name(lang_name)
}
let srt = parse_youtube_XML_to_SRT(result)
downloadString(srt, 'text/plain', filename)
// After download, select first <option>
selector.options[0].selected = true
}
// Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"
function get_file_name(x) {
// var method_1 = '(' + x + ')' + document.title + '.srt'; // 如果有通知数,文件名也会带上,比较烦,这种方式不好
// var method_2 = '(' + x + ')' + get_title() + '.srt';
var method_3 = `(${x})${get_title()}_video_id_${get_video_id()}.srt`
return method_3
}
// 拿完整字幕的 XML
// async function get_closed_subtitles() {
// var list_url = 'https://video.google.com/timedtext?hl=en&v=' + get_url_video_id() + '&type=list';
// // Example: https://video.google.com/timedtext?hl=en&v=if36bqHypqk&type=list
// return new Promise(function (resolve, reject) {
// GM_xmlhttpRequest({
// method: 'GET',
// url: list_url,
// onload: function (xhr) {
// resolve(xhr.responseText)
// }
// })
// })
// }
// detect if "auto subtitle" and "closed subtitle" exist
// and add <option> into <select>
async function load_language_list(select) {
// auto
var auto_subtitle_exist = false
// closed
var closed_subtitle_exist = false
// get auto subtitle
var auto_subtitle_url = get_auto_subtitle_xml_url()
if (auto_subtitle_url != false) {
auto_subtitle_exist = true
}
var captionTracks = get_captionTracks()
if (
captionTracks != undefined &&
typeof captionTracks === 'object' &&
captionTracks.length > 0
) {
closed_subtitle_exist = true
}
// if no subtitle at all, just say no and stop
if (auto_subtitle_exist == false && closed_subtitle_exist == false) {
select.options[0].textContent = NO_SUBTITLE
disable_download_button()
return false
}
// if at least one type of subtitle exist
select.options[0].textContent = HAVE_SUBTITLE
select.disabled = false
var option = null // for <option>
var caption_info = null // for our custom object
// if auto subtitle exist
if (auto_subtitle_exist) {
caption_info = {
lang_code: 'AUTO', // later we use this to know if it's auto subtitle
lang_name: get_auto_subtitle_name(), // for display only
}
caption_array.push(caption_info)
option = document.createElement('option')
option.textContent = caption_info.lang_name
select.appendChild(option)
}
// if closed_subtitle_exist
if (closed_subtitle_exist) {
for (var i = 0, il = captionTracks.length; i < il; i++) {
var caption = captionTracks[i]
if (caption.kind == 'asr') {
continue
}
let lang_code = caption.languageCode
let lang_translated = caption.name.simpleText
let lang_name = lang_code_to_local_name(lang_code, lang_translated)
caption_info = {
lang_code: lang_code,
lang_name: lang_name,
}
caption_array.push(caption_info)
// 加到 caption_array 里, 一个全局变量, 待会要靠它来下载
option = document.createElement('option')
option.textContent = caption_info.lang_name
select.appendChild(option)
}
}
}
function disable_download_button() {
$(HASH_BUTTON_ID)
.css('border', '#95a5a6')
.css('cursor', 'not-allowed')
.css('background-color', '#95a5a6')
$('#captions_selector')
.css('border', '#95a5a6')
.css('cursor', 'not-allowed')
.css('background-color', '#95a5a6')
if (new_material_design_version()) {
$(HASH_BUTTON_ID).css('padding', '6px')
} else {
$(HASH_BUTTON_ID).css('padding', '5px')
}
}
// 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029"
// 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350
function process_time(s) {
s = s.toFixed(3)
// 超棒的函数, 不论是整数还是小数都给弄成3位小数形式
// 举个柚子:
// 671.33 -> 671.330
// 671 -> 671.000
// 注意函数会四舍五入. 具体读文档
var array = s.split('.')
// 把开始时间根据句号分割
// 671.330 会分割成数组: [671, 330]
var Hour = 0
var Minute = 0
var Second = array[0] // 671
var MilliSecond = array[1] // 330
// 先声明下变量, 待会把这几个拼好就行了
// 我们来处理秒数. 把"分钟"和"小时"除出来
if (Second >= 60) {
Minute = Math.floor(Second / 60)
Second = Second - Minute * 60
// 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒
Hour = Math.floor(Minute / 60)
Minute = Minute - Hour * 60
// 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟
}
// 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。
if (Minute < 10) {
Minute = '0' + Minute
}
// 小时
if (Hour < 10) {
Hour = '0' + Hour
}
// 秒
if (Second < 10) {
Second = '0' + Second
}
return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond
}
// copy from: https://gist.github.com/danallison/3ec9d5314788b337b682
// Thanks! https://github.com/danallison
// work in Chrome 66
// test passed: 2018-5-19
function downloadString(text, fileType, fileName) {
var blob = new Blob([text], {
type: fileType,
})
var a = document.createElement('a')
a.download = fileName
a.href = URL.createObjectURL(blob)
a.dataset.downloadurl = [fileType, a.download, a.href].join(':')
a.style.display = 'none'
document.body.appendChild(a)
a.click()
document.body.removeChild(a)
setTimeout(function () {
URL.revokeObjectURL(a.href)
}, 1500)
}
// https://css-tricks.com/snippets/javascript/unescape-html-in-js/
// turn HTML entity back to text, example: " should be "
function htmlDecode(input) {
var e = document.createElement('div')
e.class =
'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity'
e.innerHTML = input
return e.childNodes.length === 0 ? '' : e.childNodes[0].nodeValue
}
// return URL or null;
// later we can send a AJAX and get XML subtitle
function get_auto_subtitle_xml_url() {
try {
var captionTracks = get_captionTracks()
for (var index in captionTracks) {
var caption = captionTracks[index]
if (caption.kind === 'asr') {
return captionTracks[index].baseUrl
}
// ASR – A caption track generated using automatic speech recognition.
// https://developers.google.com/youtube/v3/docs/captions
}
return false
} catch (error) {
return false
}
}
async function get_auto_subtitle() {
var url = get_auto_subtitle_xml_url()
if (url == false) {
return false
}
var result = await get(url)
return result
}
async function get_closed_subtitle(lang_code) {
try {
var captionTracks = get_captionTracks()
for (var i in captionTracks) {
var caption = captionTracks[i]
if (caption.languageCode === lang_code && caption.kind != 'asr') {
// 必须写 caption.kind != 'asr'
// 否则会下载2个字幕文件(也就是这个分支会进来2次)
// 因为 lang_code 是 "en" 会 match 2条纪录,一条是自动字幕,一条是完整字幕
// "自动字幕"那条是 kind=asr
// "完整字幕"那条没有 kind 属性
let url = captionTracks[i].baseUrl
let result = await get(url)
return result
}
}
return false
} catch (error) {
return false
}
}
// Youtube return XML. we want SRT
// input: Youtube XML format
// output: SRT format
function parse_youtube_XML_to_SRT(youtube_xml_string) {
if (youtube_xml_string === '') {
return false
}
var text = youtube_xml_string.getElementsByTagName('text')
var result = ''
var BOM = '\uFEFF'
result = BOM + result // store final SRT result
var len = text.length
for (var i = 0; i < len; i++) {
var index = i + 1
var content = text[i].textContent.toString()
content = content.replace(/(<([^>]+)>)/gi, '') // remove all html tag.
var start = text[i].getAttribute('start')
var end =
parseFloat(text[i].getAttribute('start')) +
parseFloat(text[i].getAttribute('dur'))
// 保留这段代码
// 如果希望字幕的结束时间和下一行的开始时间相同(连在一起)
// 可以取消下面的注释
// if (i + 1 >= len) {
// end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
// } else {
// end = text[i + 1].getAttribute('start');
// }
// we want SRT format:
/*
1
00:00:01,939 --> 00:00:04,350
everybody Craig Adams here I'm a
2
00:00:04,350 --> 00:00:06,720
filmmaker on YouTube who's digging
*/
var new_line = '\n'
result = result + index + new_line
// 1
var start_time = process_time(parseFloat(start))
var end_time = process_time(parseFloat(end))
result = result + start_time
result = result + ' --> '
result = result + end_time + new_line
// 00:00:01,939 --> 00:00:04,350
content = htmlDecode(content)
// turn HTML entity back to text. example: ' back to apostrophe (')
result = result + content + new_line + new_line
// everybody Craig Adams here I'm a
}
return result
}
// return "English (auto-generated)" or a default name;
function get_auto_subtitle_name() {
try {
var captionTracks = get_captionTracks()
for (var index in captionTracks) {
var caption = captionTracks[index]
if (typeof caption.kind === 'string' && caption.kind == 'asr') {
return captionTracks[index].name.simpleText
}
}
return 'Auto Subtitle'
} catch (error) {
return 'Auto Subtitle'
}
}
function get_youtube_data() {
return document.getElementsByTagName('ytd-app')[0].data.playerResponse
}
function get_captionTracks() {
let data = get_youtube_data()
var captionTracks =
data?.captions?.playerCaptionsTracklistRenderer?.captionTracks
return captionTracks
}
// Input a language code, output that language name in current locale
// 如果当前语言是中文简体, Input: "de" Output: 德语
// if current locale is English(US), Input: "de" Output: "Germany"
function lang_code_to_local_name(languageCode, fallback_name) {
try {
var captionTracks = get_captionTracks()
for (var i in captionTracks) {
var caption = captionTracks[i]
if (caption.languageCode === languageCode) {
let simpleText = captionTracks[i].name.simpleText
if (simpleText) {
return simpleText
} else {
return fallback_name
}
}
}
} catch (error) {
return fallback_name
}
}
// 获取视频标题
function get_title() {
// 方法1:先尝试拿到标题
var title_element = document.querySelector(
'h1.title.style-scope.ytd-video-primary-info-renderer'
)
if (title_element != null) {
var title = title_element.innerText
// 能拿到就返回
if (title != undefined && title != null && title != '') {
return title
}
}
// 方法2:如果方法1失效用这个
return ytplayer.bootstrapPlayerResponse.videoDetails.videoId // 这个会 delay, 如果页面跳转了,这个获得的标题还是旧的
}
function get_video_id() {
return ytplayer.bootstrapPlayerResponse.videoDetails.videoId
}
// Usage: var result = await get(url)
function get(url) {
return $.ajax({
url: url,
type: 'get',
success: function (r) {
return r
},
fail: function (error) {
return error
},
})
}
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
// 等待一个元素存在
// https://stackoverflow.com/questions/5525071/how-to-wait-until-an-element-exists
function waitForElm(selector) {
return new Promise((resolve) => {
if (document.querySelector(selector)) {
return resolve(document.querySelector(selector))
}
const observer = new MutationObserver((mutations) => {
if (document.querySelector(selector)) {
resolve(document.querySelector(selector))
observer.disconnect()
}
})
observer.observe(document.body, {
childList: true,
subtree: true,
})
})
}
})()