HowTo: Выдрать субтитры SRT из видео файла на youtube. tt2srt

Dec 21, 2011 05:04

Скрипт для выдирания субтитров в формате SRT из видео файла на youtube
Использование: tt2srt <идентификатор видео> [язык (напр. ru, по умолчанию en)]
Вывод полученных субтитров идет на stdout
Если кому пригодится и нужны дополнения, пишите сюда.

#!/usr/bin/ruby
# coding:utf-8
require 'open-uri'
subst = { '"'=>/&quot;/m,"'"=>/&apos;/m,'&'=>/&amp;/m,'<'=>/&lt;/m,
'>'=>/&gt;/m,' '=>/&nbsp;/m,'¡'=>/&iexcl;/m,'¢'=>/&cent;/m,
'£'=>/&pound;/m,'¤'=>/&curren;/m,'¥'=>/&yen;/m,'¦'=>/&brvbar;/m,
'§'=>/&sect;/m,'¨'=>/&uml;/m,'©'=>/&copy;/m,'ª'=>/&ordf;/m,
'«'=>/&laquo;/m,'¬'=>/&not;/m,'­'=>/&shy;/m,'®'=>/&reg;/m,
'¯'=>/&macr;/m,'°'=>/&deg;/m,'±'=>/&plusmn;/m,'²'=>/&sup2;/m,
'³'=>/&sup3;/m,'´'=>/&acute;/m,'µ'=>/&micro;/m,'¶'=>/&para;/m,
'·'=>/&middot;/m,'¸'=>/&cedil;/m,'¹'=>/&sup1;/m,'º'=>/&ordm;/m,
'»'=>/&raquo;/m,'¼'=>/&frac14;/m,'½'=>/&frac12;/m,'¾'=>/&frac34;/m,
'¿'=>/&iquest;/m,'×'=>/&times;/m,'÷'=>/&divide;/m,'À'=>/&Agrave;/m,
'Á'=>/&Aacute;/m,'Â'=>/&Acirc;/m,'Ã'=>/&Atilde;/m,'Ä'=>/&Auml;/m,
'Å'=>/&Aring;/m,'Æ'=>/&AElig;/m,'Ç'=>/&Ccedil;/m,'È'=>/&Egrave;/m,
'É'=>/&Eacute;/m,'Ê'=>/&Ecirc;/m,'Ë'=>/&Euml;/m,'Ì'=>/&Igrave;/m,
'Í'=>/&Iacute;/m,'Î'=>/&Icirc;/m,'Ï'=>/&Iuml;/m,'Ð'=>/&ETH;/m,
'Ñ'=>/&Ntilde;/m,'Ò'=>/&Ograve;/m,'Ó'=>/&Oacute;/m,'Ô'=>/&Ocirc;/m,
'Õ'=>/&Otilde;/m,'Ö'=>/&Ouml;/m,'Ø'=>/&Oslash;/m,'Ù'=>/&Ugrave;/m,
'Ú'=>/&Uacute;/m,'Û'=>/&Ucirc;/m,'Ü'=>/&Uuml;/m,'Ý'=>/&Yacute;/m,
'Þ'=>/&THORN;/m,'ß'=>/&szlig;/m,'à'=>/&agrave;/m,'á'=>/&aacute;/m,
'â'=>/&acirc;/m,'ã'=>/&atilde;/m,'ä'=>/&auml;/m,'å'=>/&aring;/m,
'æ'=>/&aelig;/m,'ç'=>/&ccedil;/m,'è'=>/&egrave;/m,'é'=>/&eacute;/m,
'ê'=>/&ecirc;/m,'ë'=>/&euml;/m,'ì'=>/&igrave;/m,'í'=>/&iacute;/m,
'î'=>/&icirc;/m,'ï'=>/&iuml;/m,'ð'=>/&eth;/m,'ñ'=>/&ntilde;/m,
'ò'=>/&ograve;/m,'ó'=>/&oacute;/m,'ô'=>/&ocirc;/m,'õ'=>/&otilde;/m,
'ö'=>/&ouml;/m,'ø'=>/&oslash;/m,'ù'=>/&ugrave;/m,'ú'=>/&uacute;/m,
'û'=>/&ucirc;/m,'ü'=>/&uuml;/m,'ý'=>/&yacute;/m,'þ'=>/&thorn;/m,
'ÿ'=>/&yuml;/m}

if ARGV.count < 1 or ARGV.count > 2
puts "tt2srt SRT subtitles downloader from youtube"
puts "usage: tt2srt [language]"
else
uri = 'http://video.google.com/timedtext?lang=%%s&v=%s' % [ ARGV[0] ]
if ( f = open(URI.parse(uri % [ARGV[1].nil? ? 'en' : ARGV[1]])).read.strip).empty? and
( ARGV.count==1 or (f=open(URI.parse(uri % ['en'])).read.strip).empty? )
puts "!!!ERROR: No subtitles for video"
else
cnt, e = 0, {}
def tt(t) return "%0.2d:%0.2d:%.3f" % [Integer(t)/3600, Integer(t)/60, t%60 ] end
f.gsub(/(.*?)<\/text>/m) do
a,b,c = [Float($1), Float($2), $3.gsub(/&#(\d+);/) {Integer($1).chr(Encoding::UTF_8) }.strip]
subst.each { |k,v| c.gsub!(v,k)}
c.gsub(/&(.*?);/) { e[$1] = c }
puts "%d\n%s --> %s\n%s\n\n" % [cnt+=1, tt(a), tt(a+b), c ]
end
e.each {|k,v| puts("!!!ERROR: Unknown entity: %s in <<%s>>" % [k,v])}
end
end

Previous post Next post
Up