aboutsummaryrefslogtreecommitdiffstats
path: root/t
diff options
context:
space:
mode:
authorThalia Archibald <thalia@archibald.dev>2024-04-14 01:11:37 +0000
committerJunio C Hamano <gitster@pobox.com>2024-04-15 10:06:17 -0700
commit0df86b66899f9d6f1c09cceb4743c8cef733836a (patch)
tree656c4f83a2c4c4e69ba43b753f5eb8be51136a92 /t
parent91ec36f2cca02d33ab0ed6e87195c6fe801debae (diff)
downloadgit-0df86b66899f9d6f1c09cceb4743c8cef733836a.tar.gz
fast-import: tighten path unquoting
Path parsing in fast-import is inconsistent and many unquoting errors are suppressed or not checked. <path> appears in the grammar in these places: filemodify ::= 'M' SP <mode> (<dataref> | 'inline') SP <path> LF filedelete ::= 'D' SP <path> LF filecopy ::= 'C' SP <path> SP <path> LF filerename ::= 'R' SP <path> SP <path> LF ls ::= 'ls' SP <dataref> SP <path> LF ls-commit ::= 'ls' SP <path> LF and fast-import.c parses them in five different ways: 1. For filemodify and filedelete: Try to unquote <path>. If it unquotes without errors, use the unquoted version; otherwise, treat it as literal bytes to the end of the line (including any number of SP). 2. For filecopy (source) and filerename (source): Try to unquote <path>. If it unquotes without errors, use the unquoted version; otherwise, treat it as literal bytes up to, but not including, the next SP. 3. For filecopy (dest) and filerename (dest): Like 1., but an unquoted empty string is forbidden. 4. For ls: If <path> starts with `"`, unquote it and report parse errors; otherwise, treat it as literal bytes to the end of the line (including any number of SP). 5. For ls-commit: Unquote <path> and report parse errors. (It must start with `"` to disambiguate from ls.) In the first three, any errors from trying to unquote a string are suppressed, so a quoted string that contains invalid escapes would be interpreted as literal bytes. For example, `"\xff"` would fail to unquote (because hex escapes are not supported), and it would instead be interpreted as the byte sequence '"', '\\', 'x', 'f', 'f', '"', which is certainly not intended. Some front-ends erroneously use their language's standard quoting routine instead of matching Git's, which could silently introduce escapes that would be incorrectly parsed due to this and lead to data corruption. The documentation states “To use a source path that contains SP the path must be quoted.”, so it is expected that some implementations depend on spaces being allowed in paths in the final position. Thus we have two documented ways to parse paths, so simplify the implementation to that. Now we have: 1. `parse_path_eol` for filemodify, filedelete, filecopy (dest), filerename (dest), ls, and ls-commit: If <path> starts with `"`, unquote it and report parse errors; otherwise, treat it as literal bytes to the end of the line (including any number of SP). 2. `parse_path_space` for filecopy (source) and filerename (source): If <path> starts with `"`, unquote it and report parse errors; otherwise, treat it as literal bytes up to, but not including, the next SP. It must be followed by SP. There remain two special cases: The dest <path> in filecopy and rename cannot be an unquoted empty string (this will be addressed subsequently) and <path> in ls-commit must be quoted to disambiguate it from ls. Signed-off-by: Thalia Archibald <thalia@archibald.dev> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't')
-rwxr-xr-xt/t9300-fast-import.sh258
1 files changed, 257 insertions, 1 deletions
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 60e30fed3c..de2f1304e8 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -2142,6 +2142,7 @@ test_expect_success 'Q: deny note on empty branch' '
EOF
test_must_fail git fast-import <input
'
+
###
### series R (feature and option)
###
@@ -2790,7 +2791,7 @@ test_expect_success 'R: blob appears only once' '
'
###
-### series S
+### series S (mark and path parsing)
###
#
# Make sure missing spaces and EOLs after mark references
@@ -3060,6 +3061,261 @@ test_expect_success 'S: ls with garbage after sha1 must fail' '
test_grep "space after tree-ish" err
'
+#
+# Path parsing
+#
+# There are two sorts of ways a path can be parsed, depending on whether it is
+# the last field on the line. Additionally, ls without a <dataref> has a special
+# case. Test every occurrence of <path> in the grammar against every error case.
+#
+
+#
+# Valid paths at the end of a line: filemodify, filedelete, filecopy (dest),
+# filerename (dest), and ls.
+#
+# commit :301 from root -- modify hello.c (for setup)
+# commit :302 from :301 -- modify $path
+# commit :303 from :302 -- delete $path
+# commit :304 from :301 -- copy hello.c $path
+# commit :305 from :301 -- rename hello.c $path
+# ls :305 $path
+#
+test_path_eol_success () {
+ local test="$1" path="$2" unquoted_path="$3"
+ test_expect_success "S: paths at EOL with $test must work" '
+ test_when_finished "git branch -D S-path-eol" &&
+
+ git fast-import --export-marks=marks.out <<-EOF >out 2>err &&
+ blob
+ mark :401
+ data <<BLOB
+ hello world
+ BLOB
+
+ blob
+ mark :402
+ data <<BLOB
+ hallo welt
+ BLOB
+
+ commit refs/heads/S-path-eol
+ mark :301
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ initial commit
+ COMMIT
+ M 100644 :401 hello.c
+
+ commit refs/heads/S-path-eol
+ mark :302
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit filemodify
+ COMMIT
+ from :301
+ M 100644 :402 $path
+
+ commit refs/heads/S-path-eol
+ mark :303
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit filedelete
+ COMMIT
+ from :302
+ D $path
+
+ commit refs/heads/S-path-eol
+ mark :304
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit filecopy dest
+ COMMIT
+ from :301
+ C hello.c $path
+
+ commit refs/heads/S-path-eol
+ mark :305
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit filerename dest
+ COMMIT
+ from :301
+ R hello.c $path
+
+ ls :305 $path
+ EOF
+
+ commit_m=$(grep :302 marks.out | cut -d\ -f2) &&
+ commit_d=$(grep :303 marks.out | cut -d\ -f2) &&
+ commit_c=$(grep :304 marks.out | cut -d\ -f2) &&
+ commit_r=$(grep :305 marks.out | cut -d\ -f2) &&
+ blob1=$(grep :401 marks.out | cut -d\ -f2) &&
+ blob2=$(grep :402 marks.out | cut -d\ -f2) &&
+
+ (
+ printf "100644 blob $blob2\t$unquoted_path\n" &&
+ printf "100644 blob $blob1\thello.c\n"
+ ) | sort >tree_m.exp &&
+ git ls-tree $commit_m | sort >tree_m.out &&
+ test_cmp tree_m.exp tree_m.out &&
+
+ printf "100644 blob $blob1\thello.c\n" >tree_d.exp &&
+ git ls-tree $commit_d >tree_d.out &&
+ test_cmp tree_d.exp tree_d.out &&
+
+ (
+ printf "100644 blob $blob1\t$unquoted_path\n" &&
+ printf "100644 blob $blob1\thello.c\n"
+ ) | sort >tree_c.exp &&
+ git ls-tree $commit_c | sort >tree_c.out &&
+ test_cmp tree_c.exp tree_c.out &&
+
+ printf "100644 blob $blob1\t$unquoted_path\n" >tree_r.exp &&
+ git ls-tree $commit_r >tree_r.out &&
+ test_cmp tree_r.exp tree_r.out &&
+
+ test_cmp out tree_r.exp
+ '
+}
+
+test_path_eol_success 'quoted spaces' '" hello world.c "' ' hello world.c '
+test_path_eol_success 'unquoted spaces' ' hello world.c ' ' hello world.c '
+
+#
+# Valid paths before a space: filecopy (source) and filerename (source).
+#
+# commit :301 from root -- modify $path (for setup)
+# commit :302 from :301 -- copy $path hello2.c
+# commit :303 from :301 -- rename $path hello2.c
+#
+test_path_space_success () {
+ local test="$1" path="$2" unquoted_path="$3"
+ test_expect_success "S: paths before space with $test must work" '
+ test_when_finished "git branch -D S-path-space" &&
+
+ git fast-import --export-marks=marks.out <<-EOF 2>err &&
+ blob
+ mark :401
+ data <<BLOB
+ hello world
+ BLOB
+
+ commit refs/heads/S-path-space
+ mark :301
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ initial commit
+ COMMIT
+ M 100644 :401 $path
+
+ commit refs/heads/S-path-space
+ mark :302
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit filecopy source
+ COMMIT
+ from :301
+ C $path hello2.c
+
+ commit refs/heads/S-path-space
+ mark :303
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit filerename source
+ COMMIT
+ from :301
+ R $path hello2.c
+
+ EOF
+
+ commit_c=$(grep :302 marks.out | cut -d\ -f2) &&
+ commit_r=$(grep :303 marks.out | cut -d\ -f2) &&
+ blob=$(grep :401 marks.out | cut -d\ -f2) &&
+
+ (
+ printf "100644 blob $blob\t$unquoted_path\n" &&
+ printf "100644 blob $blob\thello2.c\n"
+ ) | sort >tree_c.exp &&
+ git ls-tree $commit_c | sort >tree_c.out &&
+ test_cmp tree_c.exp tree_c.out &&
+
+ printf "100644 blob $blob\thello2.c\n" >tree_r.exp &&
+ git ls-tree $commit_r >tree_r.out &&
+ test_cmp tree_r.exp tree_r.out
+ '
+}
+
+test_path_space_success 'quoted spaces' '" hello world.c "' ' hello world.c '
+test_path_space_success 'no unquoted spaces' 'hello_world.c' 'hello_world.c'
+
+#
+# Test a single commit change with an invalid path. Run it with all occurrences
+# of <path> in the grammar against all error kinds.
+#
+test_path_fail () {
+ local change="$1" what="$2" prefix="$3" path="$4" suffix="$5" err_grep="$6"
+ test_expect_success "S: $change with $what must fail" '
+ test_must_fail git fast-import <<-EOF 2>err &&
+ blob
+ mark :1
+ data <<BLOB
+ hello world
+ BLOB
+
+ commit refs/heads/S-path-fail
+ mark :2
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit setup
+ COMMIT
+ M 100644 :1 hello.c
+
+ commit refs/heads/S-path-fail
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ data <<COMMIT
+ commit with bad path
+ COMMIT
+ from :2
+ $prefix$path$suffix
+ EOF
+
+ test_grep "$err_grep" err
+ '
+}
+
+test_path_base_fail () {
+ local change="$1" prefix="$2" field="$3" suffix="$4"
+ test_path_fail "$change" 'unclosed " in '"$field" "$prefix" '"hello.c' "$suffix" "Invalid $field"
+ test_path_fail "$change" "invalid escape in quoted $field" "$prefix" '"hello\xff"' "$suffix" "Invalid $field"
+}
+test_path_eol_quoted_fail () {
+ local change="$1" prefix="$2" field="$3"
+ test_path_base_fail "$change" "$prefix" "$field" ''
+ test_path_fail "$change" "garbage after quoted $field" "$prefix" '"hello.c"' 'x' "Garbage after $field"
+ test_path_fail "$change" "space after quoted $field" "$prefix" '"hello.c"' ' ' "Garbage after $field"
+}
+test_path_eol_fail () {
+ local change="$1" prefix="$2" field="$3"
+ test_path_eol_quoted_fail "$change" "$prefix" "$field"
+}
+test_path_space_fail () {
+ local change="$1" prefix="$2" field="$3"
+ test_path_base_fail "$change" "$prefix" "$field" ' world.c'
+ test_path_fail "$change" "missing space after quoted $field" "$prefix" '"hello.c"' 'x world.c' "Missing space after $field"
+ test_path_fail "$change" "missing space after unquoted $field" "$prefix" 'hello.c' '' "Missing space after $field"
+}
+
+test_path_eol_fail filemodify 'M 100644 :1 ' path
+test_path_eol_fail filedelete 'D ' path
+test_path_space_fail filecopy 'C ' source
+test_path_eol_fail filecopy 'C hello.c ' dest
+test_path_space_fail filerename 'R ' source
+test_path_eol_fail filerename 'R hello.c ' dest
+test_path_eol_fail 'ls (in commit)' 'ls :2 ' path
+
+# When 'ls' has no <dataref>, the <path> must be quoted.
+test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path
+
###
### series T (ls)
###