aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Bottomley <James.Bottomley@HansenPartnership.com>2013-06-08 08:14:49 -0700
committerJames Bottomley <JBottomley@Parallels.com>2013-06-08 19:22:40 -0700
commitd145c2c471afc7817e5750e176f75d80a92142d9 (patch)
treeee9c9f72231b2c01f8669ad367d11659cc946a8d
parent802bd6d0284d20bddf3f9a68a118e051b8ff902e (diff)
downloadget-flash-videos-d145c2c471afc7817e5750e176f75d80a92142d9.tar.gz
Add F4V fragment downloader (also known as F4F)
The F4V format and manifest specifications are documented by adobe: http://download.macromedia.com/f4v/video_file_format_spec_v10_1.pdf http://osmf.org/dev/osmf/specpdfs/FlashMediaManifestFileFormatSpecification.pdf They contain just enough information to work out how to do a fragment downloader, so this is the first pass at such a beast. It's only really been tested with www.destiny.com and www.syfy.com. It takes the manifest file and does the download. Since the manifest is the only way to get the stream resolutions, the F4VDownloader also works out the best resolution to choose. The problems are 1. No current way to resume downloads. In theory, since the video could be restarted with each fragment, this should be easily possible. However, the problem is that the fragments are exact time intervals apart (usually around 10s) but they are variable sizes, so given a current file size, it's very difficult to say how many fragments it contains. 2. No support for DRM streams 3. No internal restarts ... if a fragment download fails for some reason, the whole thing just stops Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
-rwxr-xr-xget_flash_videos5
-rw-r--r--lib/FlashVideo/F4VDownloader.pm413
2 files changed, 418 insertions, 0 deletions
diff --git a/get_flash_videos b/get_flash_videos
index 6f70dcf..5914030 100755
--- a/get_flash_videos
+++ b/get_flash_videos
@@ -42,6 +42,7 @@ use FlashVideo::Mechanize;
use FlashVideo::Downloader;
use FlashVideo::RTMPDownloader;
use FlashVideo::FFmpegDownloader;
+use FlashVideo::F4VDownloader;
use FlashVideo::Search;
use FlashVideo::Utils;
use FlashVideo::VideoPreferences;
@@ -411,6 +412,10 @@ sub download {
if (defined($data->{downloader}) && $data->{downloader} eq "ffmpeg") {
$downloader = FlashVideo::FFmpegDownloader->new;
$file ||= $data->{flv};
+ } elsif (defined($data->{downloader}) && $data->{downloader} eq "f4m") {
+ $downloader = FlashVideo::F4VDownloader->new;
+ $file ||= $data->{flv};
+ $data->{prefs} = $prefs;
} else {
# RTMP data
$downloader = FlashVideo::RTMPDownloader->new;
diff --git a/lib/FlashVideo/F4VDownloader.pm b/lib/FlashVideo/F4VDownloader.pm
new file mode 100644
index 0000000..0ca6a46
--- /dev/null
+++ b/lib/FlashVideo/F4VDownloader.pm
@@ -0,0 +1,413 @@
+##
+# Downloader for F4V files using the documented fragment format.
+#
+# Copyright (c) 2013 James Bottomley <James.Bottomley@HansenPartnership.com>
+#
+# Released as part of get-flash-videos under the Apache Licence version 2.0
+#
+# The F4V specification is available at
+# http://download.macromedia.com/f4v/video_file_format_spec_v10_1.pdf
+#
+# And the manifest xml format at
+# http://osmf.org/dev/osmf/specpdfs/FlashMediaManifestFileFormatSpecification.pdf
+#
+# And the
+##
+# A short intro is that a Flash Fragment file (F4F) consists of segments and
+# fragments. Each segment is a complete video file and is built from a set of
+# fragments. Fragments are incomplete video files, but may be rendered
+# complete by the addition of the metadata for the segment. The downloader
+# code tries to download all segments and fragments, but it's only ever been
+# tested on single segment downloads (because one download is usually one
+# video file).
+##
+
+package FlashVideo::F4VDownloader;
+
+use strict;
+use base 'FlashVideo::Downloader';
+use IPC::Open3;
+use Fcntl ();
+use Symbol qw(gensym);
+use File::Temp qw(tempfile tempdir);
+use Storable qw(dclone);
+use FlashVideo::Utils;
+use MIME::Base64;
+use Data::Dumper;
+use Data::AMF::Parser::AMF0;
+
+sub read_box_header {
+
+ my ($self, $s, $a) = @_;
+
+ ($a->{len}, $a->{type}) = unpack('L>A4', $s);
+ debug "box header, size ".$a->{len}." type ".$a->{type};
+ die "string not big enough (".length($s)." but should be >= ".$a->{len} if ($a->{len} > length($s));
+ return substr($s, 8);
+}
+
+sub read_segment_runtable {
+ my ($self, $s, $a) = @_;
+ my ($len, $totlen);
+ $s = $self->read_box_header($s, $a);
+ die "found wrong box ".$a->{type}." expecting asrt" if ($a->{type} ne 'asrt');
+ $totlen = 8;
+ ($a->{version},
+ $a->{flags},
+ $a->{qualityentrycount},
+ $len,
+ ) = unpack('CB24C.*', $s);
+ $totlen += $len;
+ $s = substr($s, $len);
+ if ($a->{qualityentrycount} > 0) {
+ @_ = unpack('Z'.$a->{qualityentrycount}.'.*', $s);
+ $a->{qualitysegmenturlmodifiers} = @_;
+ $len = pop $a->{qualitysegmenturlmodifiers};
+ $s = substr($s, $len);
+ $totlen += $len;
+ }
+ ($a->{segmentrunentrycount}, $len) = unpack('L>.*', $s);
+ $s = substr($s, $len);
+ $totlen += $len;
+ my $i;
+ $a->{segmentrunentrytable} = [()];
+ for ($i = 0; $i < $a->{segmentrunentrycount}; $i++) {
+ my $t = {};
+ ($t->{firstsegment},
+ $t->{fragmentspersegment},
+ $len
+ ) = unpack('L>L>.*', $s);
+ push $a->{segmentrunentrytable},$t;
+ $s = substr($s, $len);
+ $totlen += $len;
+ }
+ die("Length mismatch in segment runtable ".$totlen." != ".$a->{len}) if ($totlen != $a->{len});
+ return $s;
+}
+
+sub read_fragment_runtable {
+ my ($self, $s, $a) = @_;
+ my ($len, $totlen);
+ $s = $self->read_box_header($s, $a);
+ die "found wrong box ".$a->{type}." expecting afrt" if ($a->{type} ne 'afrt');
+ $totlen = 8;
+ ($a->{version},
+ $a->{flags},
+ $a->{timescale},
+ $a->{qualityentrycount},
+ $len,
+ ) = unpack('CB24L>C.*', $s);
+ $totlen += $len;
+ $s = substr($s, $len);
+ if ($a->{qualityentrycount} > 0) {
+ @_ = unpack('Z'.$a->{qualityentrycount}.'.*', $s);
+ $a->{qualitysegmenturlmodifiers} = @_;
+ $len = pop $a->{qualitysegmenturlmodifiers};
+ $s = substr($s, $len);
+ $totlen += $len;
+ }
+ ($a->{fragmentrunentrycount}, $len) = unpack('L>.*', $s);
+ $s = substr($s, $len);
+ $totlen += $len;
+
+ my $i;
+ $a->{fragmentrunentrytable} = [()];
+ for ($i = 0; $i < $a->{fragmentrunentrycount}; $i++) {
+ my $t = {};
+ ($t->{firstsegment},
+ $t->{fragmenttimestamp},
+ $t->{fragmentduration},
+ $len
+ ) = unpack('L>Q>L>.*', $s);
+ $s = substr($s, $len);
+ $totlen += $len;
+ if ($t->{fragmentduration} == 0) {
+ ($t->{discontinuityindicator},$len) = unpack('C.*', $s);
+ $s = substr($s, $len);
+ $totlen += $len;
+ }
+
+ push $a->{fragmentrunentrytable},$t;
+ }
+ die("Length mismatch in fragment runtable ".$totlen." != ".$a->{len}) if ($totlen != $a->{len});
+ return $s;
+}
+
+sub read_bootstrap_box {
+ my ($self, $s) = @_;
+
+ my $bb = {};
+ $s = $self->read_box_header($s, $bb);
+
+ die "found wrong box ".$bb->{type}." expecting abst" if ($bb->{type} ne 'abst');
+ my ($len, $totlen, $packedbit);
+ $totlen = 8;
+ ($bb->{version},
+ $bb->{flags},
+ $bb->{bootstrapinfoversion},
+ $packedbit,
+ $bb->{timescale},
+ $bb->{currentmediatime},
+ $bb->{smptetimecodeoffset},
+ $bb->{movieidentifier},
+ $bb->{serverentrycount},
+ $len,
+ ) = unpack('CB24L>B8L>Q>Q>Z*C.*', $s);
+ $bb->{profile} = oct('0b'.substr($packedbit, 0, 2));
+ $bb->{live} = oct('0b'.substr($packedbit, 2, 1));
+ $bb->{update} = oct('0b'.substr($packedbit, 3, 1));
+ $s = substr($s, $len);
+ $totlen += $len;
+ if ($bb->{serverentrytable} > 0) {
+ @_ = unpack('Z'.$bb->{serverentrycount}.'.*', $s);
+ $bb->{serverentrytable} = @_;
+ $len = pop $bb->{serverentrytable};
+ $s = substr($s, $len);
+ $totlen += $len;
+ }
+ ($bb->{qualityentrycount}) = unpack('C', $s);
+ $s = substr($s, 1);
+ $totlen += 1;
+ if ($bb->{qualityentrycount} > 0) {
+ @_ = unpack('Z'.$bb->{qualityentrycount}.'.*', $s);
+ $bb->{qualityentrytable} = @_;
+ $len = pop $bb->{qualityentrytable};
+ $s = substr($s, $len);
+ $totlen += $len;
+ }
+ ($bb->{drmdata},
+ $bb->{metadata},
+ $bb->{segmentruntablecount},
+ $len
+ ) = unpack('ZZC.*', $s);
+ $s = substr($s, $len);
+ $totlen += $len;
+ $bb->{segmentruntable} = [()];
+ my $i;
+ for ($i = 0; $i < $bb->{segmentruntablecount}; $i++) {
+ my $seg = {};
+ $s = $self->read_segment_runtable($s, $seg);
+ push $bb->{segmentruntable},$seg;
+ $totlen += $seg->{len};
+ }
+ ($bb->{fragmentruntablecount},$len) = unpack('C.*', $s);
+ $s = substr($s, $len);
+ $totlen += $len;
+ $bb->{fragmentruntable} = [()];
+ for ($i = 0; $i < $bb->{segmentruntablecount}; $i++) {
+ my $seg = {};
+ $s = $self->read_fragment_runtable($s, $seg);
+ push $bb->{fragmentruntable},$seg;
+ $totlen += $seg->{len};
+ }
+
+ die("Length mismatch in bootstrap box ".$totlen." != ".$bb->{len}) if ($totlen != $bb->{len});
+
+ return $bb;
+}
+
+sub download {
+ my ($self, $data, $file, $browser) = @_;
+
+ $self->{printable_filename} = $file;
+
+ $file = $data->{flv} = $self->get_filename($file);
+
+ if (-s $file && !$data->{live}) {
+ info "F4V output filename '$self->{printable_filename}' already " .
+ "exists, asking to resume...";
+ $data->{resume} = '';
+ ##
+ # this is a bit of a bitch. We have all the information necessary to
+ # resume a stream at any timestamp. What we lack is the knowledge of how
+ # many bytes go with each fragment (fragments are fixed durations but not
+ # fixed sizes). So, given a resume file of size $x, we have no idea which
+ # fragment to resume from without downloading all the prior fragments to
+ # get their sizes.
+ #
+ # FIXME: could we do something clever by only downloading the http headers
+ # of each of the prior fragments and working out the sizes from them?
+ ##
+ die "resuming f4v streams is currently unimplemented";
+ }
+
+ my($r_fh, $w_fh); # So Perl doesn't close them behind our back..
+
+ if ($data->{live} && $self->action eq 'play') {
+ # Playing live stream, we pipe this straight to the player, rather than
+ # saving on disk.
+
+ pipe($r_fh, $w_fh);
+
+ my $pid = fork;
+ die "Fork failed" unless defined $pid;
+ if(!$pid) {
+ fcntl $r_fh, Fcntl::F_SETFD(), ~Fcntl::FD_CLOEXEC();
+ exec $self->replace_filename($self->player, "/dev/fd/" . fileno $r_fh);
+ die "Exec failed\n";
+ }
+
+ fcntl $w_fh, Fcntl::F_SETFD(), ~Fcntl::FD_CLOEXEC();
+ $data->{flv} = "/dev/fd/" . fileno $w_fh;
+
+ $self->{stream} = undef;
+ }
+
+ if($self->debug) {
+ $data->{verbose} = undef;
+ }
+
+ my $url = $data->{manifest};
+ $browser->get($url);
+ if (!$browser->success) {
+ die "Couldn't download manifest $url: ".$browser->response->status_line;
+ }
+
+ my $xml = from_xml($browser->content);
+ my $manifest = $xml;
+ my $baseurl;
+ if (defined($manifest->{baseURL})) {
+ $baseurl = $manifest->{baseURL};
+ } else {
+ # kill any post data
+ info "data manifest is ".$data->{manifest};
+ ($baseurl) = split /\?/, $data->{manifest};
+ # now strip to the base dir the manifest was in
+ $baseurl =~ m,^(.*)/.*$,;
+ $baseurl = $1;
+ }
+
+ info "Manifest id \"".$manifest->{id}."\"" if (defined($manifest->{id}));
+
+ # standard says this must be present, but it often isn't, sigh
+ die "This download isn't streaming media" if (defined($manifest->{deliveryType}) && $manifest->{deliveryType} ne 'streaming');
+
+ die "F4VDownloader can't currently handle DRM encoded files" if (defined($manifest->{drmAdditionalHeader}));
+
+ # can't do http streaming without bootstrap information
+ die ("manifest has no bootstrapbox") if (!defined($manifest->{bootstrapInfo}));
+
+ ##
+ # select the media element; FIXME just selecting highest bistream
+ ##
+ my $bitrate = 0;
+ my $res = 0;
+ my $maxres = $data->{prefs}->quality->quality_to_resolution($data->{prefs}->{quality});
+ my $media;
+ foreach (@{$manifest->{media}}) {
+ my $br = 0;
+ my $metadata = decode_base64($_->{metadata});
+ my @md = Data::AMF::Parser::AMF0->parse($metadata);
+ die "wrong metadata, expecting onMetaData, found ".$md[0] if ($md[0] ne 'onMetaData');
+ my $md = $md[1];
+ next if (@$maxres[1] < $md->{height});
+ if ($res > $md->{height}) {
+ $res = $md->{height};
+ # reset bitrate: lower resolution may have higher bitrate
+ $bitrate = 0;
+ }
+ $br = $_->{bitrate} if (defined($_->{bitrate}));
+ if ($br >= $bitrate) {
+ $media = $_;
+ $bitrate = $br;
+ $media->{md} = $md;
+ $media->{metadata} = $metadata;
+ }
+ }
+ info "Selected stream of resolution ".$media->{md}->{height}."x".$media->{md}->{width}." and bitrate ".$bitrate;
+ $media->{bootstrapInfo} = $manifest->{bootstrapInfo}->{$media->{bootstrapInfoId}};
+
+ my $bb = $self->read_bootstrap_box(decode_base64($media->{bootstrapInfo}->{content}));
+
+ debug 'Parsed media bootstrap data is '.Dumper($bb);
+ debug 'Parsed media metadata is '.Dumper($media);
+
+ if ($media->{url} =~ m,^http[s]?://,) {
+ $baseurl = $media->{url};
+ } else {
+ $baseurl .= '/'.$media->{url};
+ }
+
+ my $totfrags = 0;
+ my $prev;
+
+ foreach (@{$bb->{segmentruntable}}) {
+ foreach (@{$_->{segmentrunentrytable}}) {
+ $totfrags += $_->{firstsegment};
+ if (defined($prev)) {
+ $totfrags += ($_->{firstsegment} - $prev->{firstsegment} - 1) * $prev->{fragmentspersegment};
+ }
+ $totfrags += $_->{fragmentspersegment};
+ $prev = $_;
+ }
+ }
+ die "Invalid fragment count ".$totfrags if ($totfrags < 0);
+ $totfrags += $bb->{segmentruntable}->[0]->{segmentrunentrytable}->[0]->{firstfragment} - 1;
+
+ info "downloading $totfrags Fragments from $baseurl to $file";
+
+ die "Currently cannot process live media (requires bootstrap info recomputes)" if ($bb->{live});
+
+ my ($seg, $frag);
+ for ($seg = 1; $seg <= $bb->{segmentruntablecount}; $seg++) {
+ my $segent = $bb->{segmentruntable}->[$seg - 1]->{segmentrunentrytable}->[0];
+ die "Can't handle split segmentrunentry tables" if ($bb->{segmentruntable}->[$seg - 1]->{segmentrunentrycount} != 1);
+ open(my $fh, ">", $file) || die "can't open file $file for writing: $!";
+
+ my $mdl = length($media->{metadata});
+ ##
+ # most of the Flash header is the metadata, but need to pad with
+ # global file headers first
+ ##
+ # Flash header for audio + video file
+ print $fh pack('H*', '464c5601050000000900000000');
+ # now the metadata introduction
+ print $fh pack('CCCCCCCL>', 0x12, ($mdl >> 16) & 0xff, ($mdl >> 8) & 0xff,
+ $mdl & 0xff, 0, 0, 0, 0);
+ # the actual metadata from the manifest
+ print $fh $media->{metadata};
+ # finally the previous tag length
+ print $fh pack('L>', $mdl + 11);
+
+ ##
+ # now just glue the mdat boxes of the fragments together
+ ##
+ my $filebytecount = 0;
+ for ($frag = $segent->{firstsegment};
+ $frag < $segent->{firstsegment} + $segent->{fragmentspersegment};
+ $frag ++) {
+ my $dl = $baseurl.'Seg'.$seg.'-Frag'.$frag;
+ $dl .= $data->{auth} if (defined $data->{auth});
+ debug "downloading $dl";
+ $browser->get($dl);
+ if (!$browser->success) {
+ die "Couldn't download fragment: $dl".$browser->response->status_line;
+ }
+ my $s = $browser->content;
+ my $dat;
+ $bb = undef;
+ while(length($s) > 0) {
+ $a = {};
+ $self->read_box_header($s, $a);
+ die "Fragment $frag too short" if (length($s) < $a->{len});
+ $bb = $self->read_bootstrap_box($s) if ($a->{type} eq 'abst');
+ $dat = substr($s, 8, $a->{len} - 8) if ($a->{type} eq 'mdat');
+ $s = substr($s, $a->{len});
+ }
+ die "Didn't find the movie data box in fragment $frag" if (!defined($dat));
+ $filebytecount += length($dat);
+ print STDERR sprintf("%s: %d (Frag %d: %.2f\%)\r", $file, $filebytecount, $frag, ($filebytecount/$media->{md}->{filesize})*100);
+ print $fh $dat;
+ }
+ close($fh);
+ print STDERR "\n";
+ }
+ return $media->{md}->{filesize};
+}
+
+# Check if a stream is active by downloading a sample
+sub try_download {
+ return 1;
+}
+
+1;