diff options
author | H. Peter Anvin <hpa@zytor.com> | 2011-09-26 16:20:58 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2011-09-26 16:20:58 -0700 |
commit | 737af4895f2b11324e328a2421d76f1e1564f5a0 (patch) | |
tree | 7d32795617cfd0cc1f927443172c8a1bae9ddf25 | |
parent | c07d2757d8b0f128aca6242bcc56da525d7a687c (diff) | |
download | kup-737af4895f2b11324e328a2421d76f1e1564f5a0.tar.gz |
Clean up the verification of valid UTF-8 in filenames
-rw-r--r-- | data-upload.pl | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/data-upload.pl b/data-upload.pl index 657bd2c..875174c 100644 --- a/data-upload.pl +++ b/data-upload.pl @@ -35,6 +35,7 @@ use strict; use warnings; use bytes; +use Encode qw(encode decode); use File::Temp qw(tempdir); use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; @@ -90,7 +91,7 @@ sub url_unescape($) if ($c eq '%') { $c = substr($s, $i+1, 2); return undef if (length($c) != 2); - $o .= chr(hex $c); + $o .= pack("C", hex $c); $i += 2; } else { $o .= $c; @@ -123,14 +124,18 @@ sub parse_line($) } # This returns true if the given argument is a valid filename in its -# canonical form. Double slashes, relative paths, and control -# characters are not permitted. +# canonical form. Double slashes, relative paths, control characters, +# and malformed UTF-8 is not permitted. sub is_valid_file_name($) { - my($f) = @_; + no bytes; + use feature 'unicode_strings'; + + my($b) = @_; + my $f = decode('UTF-8', $b, Encode::FB_DEFAULT); return 0 if ($f !~ m:^/:); - return 0 if ($f =~ m:[\0-\x1f\x7f-\x9f]:); + return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:); return 0 if ($f =~ m:/$:); return 0 if ($f =~ m://:); return 0 if ($f =~ m:/(\.|\.\.)(/|$):); |