From 96b50cc19003d54f5962d65597c94e2c52eb22e7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:37:56 -0500 Subject: read-tree: add tests for confusing paths like ".." and ".git" We should prevent nonsense paths from entering the index in the first place, as they can cause confusing results if they are ever checked out into the working tree. We already do so, but we never tested it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1014-read-tree-confusing.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 t/t1014-read-tree-confusing.sh (limited to 't') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh new file mode 100755 index 000000000..7b31d5319 --- /dev/null +++ b/t/t1014-read-tree-confusing.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='check that read-tree rejects confusing paths' +. ./test-lib.sh + +test_expect_success 'create base tree' ' + echo content >file && + git add file && + git commit -m base && + blob=$(git rev-parse HEAD:file) && + tree=$(git rev-parse HEAD^{tree}) +' + +while read path; do + test_expect_success "reject $path at end of path" ' + printf "100644 blob %s\t%s" "$blob" "$path" >tree && + bogus=$(git mktree tree && + bogus=$(git mktree Date: Mon, 24 Nov 2014 13:39:12 -0500 Subject: verify_dotfile(): reject .git case-insensitively We do not allow ".git" to enter into the index as a path component, because checking out the result to the working tree may causes confusion for subsequent git commands. However, on case-insensitive file systems, ".Git" or ".GIT" is the same. We should catch and prevent those, too. Note that technically we could allow this for repos on case-sensitive filesystems. But there's not much point. It's unlikely that anybody cares, and it creates a repository that is unexpectedly non-portable to other systems. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1014-read-tree-confusing.sh | 1 + 1 file changed, 1 insertion(+) (limited to 't') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index 7b31d5319..eff8aedf7 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -27,6 +27,7 @@ done <<-\EOF . .. .git +.GIT EOF test_done -- cgit v1.2.1 From 450870cba7a9bac94b5527021800bd8bf037c99c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:40:11 -0500 Subject: t1450: refactor ".", "..", and ".git" fsck tests We check that fsck notices and complains about confusing paths in trees. However, there are a few shortcomings: 1. We check only for these paths as file entries, not as intermediate paths (so ".git" and not ".git/foo"). 2. We check "." and ".." together, so it is possible that we notice only one and not the other. 3. We repeat a lot of boilerplate. Let's use some loops to be more thorough in our testing, and still end up with shorter code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 't') diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index d730734fd..4d8a4fe3c 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -237,35 +237,32 @@ test_expect_success 'fsck notices submodule entry pointing to null sha1' ' ) ' -test_expect_success 'fsck notices "." and ".." in trees' ' - ( - git init dots && - cd dots && - blob=$(echo foo | git hash-object -w --stdin) && - tab=$(printf "\\t") && - git mktree <<-EOF && - 100644 blob $blob$tab. - 100644 blob $blob$tab.. - EOF - git fsck 2>out && - cat out && - grep "warning.*\\." out - ) -' - -test_expect_success 'fsck notices ".git" in trees' ' - ( - git init dotgit && - cd dotgit && - blob=$(echo foo | git hash-object -w --stdin) && - tab=$(printf "\\t") && - git mktree <<-EOF && - 100644 blob $blob$tab.git - EOF - git fsck 2>out && - cat out && - grep "warning.*\\.git" out - ) -' +while read name path; do + while read mode type; do + test_expect_success "fsck notices $path as $type" ' + ( + git init $name-$type && + cd $name-$type && + echo content >file && + git add file && + git commit -m base && + blob=$(git rev-parse :file) && + tree=$(git rev-parse HEAD^{tree}) && + value=$(eval "echo \$$type") && + printf "$mode $type %s\t%s" "$value" "$path" >bad && + git mktree out && + cat out && + grep "warning.*\\." out + )' + done <<-\EOF + 100644 blob + 040000 tree + EOF +done <<-\EOF +dot . +dotdot .. +dotgit .git +EOF test_done -- cgit v1.2.1 From 76e86fc6e3523d28e8db00e7b10c33c553d996b8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:40:44 -0500 Subject: fsck: notice .git case-insensitively We complain about ".git" in a tree because it cannot be loaded into the index or checked out. Since we now also reject ".GIT" case-insensitively, fsck should notice the same, so that errors do not propagate. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 1 + 1 file changed, 1 insertion(+) (limited to 't') diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 4d8a4fe3c..043871255 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -263,6 +263,7 @@ done <<-\EOF dot . dotdot .. dotgit .git +dotgit-case .GIT EOF test_done -- cgit v1.2.1 From a42643aa8d88a2278acad2da6bc702e426476e9b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 15 Dec 2014 18:15:20 -0500 Subject: read-cache: optionally disallow HFS+ .git variants The point of disallowing ".git" in the index is that we would never want to accidentally overwrite files in the repository directory. But this means we need to respect the filesystem's idea of when two paths are equal. The prior commit added a helper to make such a comparison for HFS+; let's use it in verify_path. We make this check optional for two reasons: 1. It restricts the set of allowable filenames, which is unnecessary for people who are not on HFS+. In practice this probably doesn't matter, though, as the restricted names are rather obscure and almost certainly would never come up in practice. 2. It has a minor performance penalty for every path we insert into the index. This patch ties the check to the core.protectHFS config option. Though this is expected to be most useful on OS X, we allow it to be set everywhere, as HFS+ may be mounted on other platforms. The variable does default to on for OS X, though. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1014-read-tree-confusing.sh | 24 ++++++++++++++++++++---- t/test-lib.sh | 6 +++++- 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 't') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index eff8aedf7..ec310d593 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -11,23 +11,39 @@ test_expect_success 'create base tree' ' tree=$(git rev-parse HEAD^{tree}) ' -while read path; do - test_expect_success "reject $path at end of path" ' +test_expect_success 'enable core.protectHFS for rejection tests' ' + git config core.protectHFS true +' + +while read path pretty; do + : ${pretty:=$path} + test_expect_success "reject $pretty at end of path" ' printf "100644 blob %s\t%s" "$blob" "$path" >tree && bogus=$(git mktree tree && bogus=$(git mktree tree && + ok=$(git mktree Date: Mon, 15 Dec 2014 18:21:57 -0500 Subject: fsck: complain about HFS+ ".git" aliases in trees Now that the index can block pathnames that case-fold to ".git" on HFS+, it would be helpful for fsck to notice such problematic paths. This lets servers which use receive.fsckObjects block them before the damage spreads. Note that the fsck check is always on, even for systems without core.protectHFS set. This is technically more restrictive than we need to be, as a set of users on ext4 could happily use these odd filenames without caring about HFS+. However, on balance, it's helpful for all servers to block these (because the paths can be used for mischief, and servers which bother to fsck would want to stop the spread whether they are on HFS+ themselves or not), and hardly anybody will be affected (because the blocked names are variants of .git with invisible Unicode code-points mixed in, meaning mischief is almost certainly what the tree author had in mind). Ideally these would be controlled by a separate "fsck.protectHFS" flag. However, it would be much nicer to be able to enable/disable _any_ fsck flag individually, and any scheme we choose should match such a system. Given the likelihood of anybody using such a path in practice, it is not unreasonable to wait until such a system materializes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 't') diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 043871255..8158b98e6 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -237,9 +237,10 @@ test_expect_success 'fsck notices submodule entry pointing to null sha1' ' ) ' -while read name path; do +while read name path pretty; do while read mode type; do - test_expect_success "fsck notices $path as $type" ' + : ${pretty:=$path} + test_expect_success "fsck notices $pretty as $type" ' ( git init $name-$type && cd $name-$type && @@ -259,11 +260,12 @@ while read name path; do 100644 blob 040000 tree EOF -done <<-\EOF +done <<-EOF dot . dotdot .. dotgit .git dotgit-case .GIT +dotgit-unicode .gI${u200c}T .gI{u200c}T EOF test_done -- cgit v1.2.1 From 2b4c6efc82119ba8f4169717473d95d1a89e4c69 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 16 Dec 2014 23:46:59 +0100 Subject: read-cache: optionally disallow NTFS .git variants The point of disallowing ".git" in the index is that we would never want to accidentally overwrite files in the repository directory. But this means we need to respect the filesystem's idea of when two paths are equal. The prior commit added a helper to make such a comparison for NTFS and FAT32; let's use it in verify_path(). We make this check optional for two reasons: 1. It restricts the set of allowable filenames, which is unnecessary for people who are not on NTFS nor FAT32. In practice this probably doesn't matter, though, as the restricted names are rather obscure and almost certainly would never come up in practice. 2. It has a minor performance penalty for every path we insert into the index. This patch ties the check to the core.protectNTFS config option. Though this is expected to be most useful on Windows, we allow it to be set everywhere, as NTFS may be mounted on other platforms. The variable does default to on for Windows, though. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t1014-read-tree-confusing.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 't') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index ec310d593..2f5a25d50 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -15,8 +15,17 @@ test_expect_success 'enable core.protectHFS for rejection tests' ' git config core.protectHFS true ' +test_expect_success 'enable core.protectNTFS for rejection tests' ' + git config core.protectNTFS true +' + while read path pretty; do : ${pretty:=$path} + case "$path" in + *SPACE) + path="${path%SPACE} " + ;; + esac test_expect_success "reject $pretty at end of path" ' printf "100644 blob %s\t%s" "$blob" "$path" >tree && bogus=$(git mktree Date: Wed, 10 Dec 2014 22:28:27 +0100 Subject: fsck: complain about NTFS ".git" aliases in trees Now that the index can block pathnames that can be mistaken to mean ".git" on NTFS and FAT32, it would be helpful for fsck to notice such problematic paths. This lets servers which use receive.fsckObjects block them before the damage spreads. Note that the fsck check is always on, even for systems without core.protectNTFS set. This is technically more restrictive than we need to be, as a set of users on ext4 could happily use these odd filenames without caring about NTFS. However, on balance, it's helpful for all servers to block these (because the paths can be used for mischief, and servers which bother to fsck would want to stop the spread whether they are on NTFS themselves or not), and hardly anybody will be affected (because the blocked names are variants of .git or git~1, meaning mischief is almost certainly what the tree author had in mind). Ideally these would be controlled by a separate "fsck.protectNTFS" flag. However, it would be much nicer to be able to enable/disable _any_ fsck flag individually, and any scheme we choose should match such a system. Given the likelihood of anybody using such a path in practice, it is not unreasonable to wait until such a system materializes. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 't') diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 8158b98e6..6edd99a81 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -251,10 +251,10 @@ while read name path pretty; do tree=$(git rev-parse HEAD^{tree}) && value=$(eval "echo \$$type") && printf "$mode $type %s\t%s" "$value" "$path" >bad && - git mktree out && cat out && - grep "warning.*\\." out + grep "warning.*tree $bad_tree" out )' done <<-\EOF 100644 blob @@ -266,6 +266,11 @@ dotdot .. dotgit .git dotgit-case .GIT dotgit-unicode .gI${u200c}T .gI{u200c}T +dotgit-case2 .Git +git-tilde1 git~1 +dotgitdot .git. +dot-backslash-case .\\\\.GIT\\\\foobar +dotgit-case-backslash .git\\\\foobar EOF test_done -- cgit v1.2.1