aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2011-09-26 16:20:58 -0700
committerH. Peter Anvin <hpa@zytor.com>2011-09-26 16:20:58 -0700
commit737af4895f2b11324e328a2421d76f1e1564f5a0 (patch)
tree7d32795617cfd0cc1f927443172c8a1bae9ddf25
parentc07d2757d8b0f128aca6242bcc56da525d7a687c (diff)
downloadkup-737af4895f2b11324e328a2421d76f1e1564f5a0.tar.gz
Clean up the verification of valid UTF-8 in filenames
-rw-r--r--data-upload.pl15
1 files changed, 10 insertions, 5 deletions
diff --git a/data-upload.pl b/data-upload.pl
index 657bd2c..875174c 100644
--- a/data-upload.pl
+++ b/data-upload.pl
@@ -35,6 +35,7 @@
use strict;
use warnings;
use bytes;
+use Encode qw(encode decode);
use File::Temp qw(tempdir);
use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ;
@@ -90,7 +91,7 @@ sub url_unescape($)
if ($c eq '%') {
$c = substr($s, $i+1, 2);
return undef if (length($c) != 2);
- $o .= chr(hex $c);
+ $o .= pack("C", hex $c);
$i += 2;
} else {
$o .= $c;
@@ -123,14 +124,18 @@ sub parse_line($)
}
# This returns true if the given argument is a valid filename in its
-# canonical form. Double slashes, relative paths, and control
-# characters are not permitted.
+# canonical form. Double slashes, relative paths, control characters,
+# and malformed UTF-8 is not permitted.
sub is_valid_file_name($)
{
- my($f) = @_;
+ no bytes;
+ use feature 'unicode_strings';
+
+ my($b) = @_;
+ my $f = decode('UTF-8', $b, Encode::FB_DEFAULT);
return 0 if ($f !~ m:^/:);
- return 0 if ($f =~ m:[\0-\x1f\x7f-\x9f]:);
+ return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:);
return 0 if ($f =~ m:/$:);
return 0 if ($f =~ m://:);
return 0 if ($f =~ m:/(\.|\.\.)(/|$):);