aboutsummaryrefslogtreecommitdiff
path: root/dir.h
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2015-03-08 17:12:25 +0700
committerJunio C Hamano <gitster@pobox.com>2015-03-12 13:45:14 -0700
commit0dcb8d7fe0ec2687d4a6ae201ae72907d862437c (patch)
treea30fd21cac44e671c2204c923f23b07db1920def /dir.h
parent55fe6f51f41f254d3d87994d18bff04664aa013b (diff)
downloadgit-0dcb8d7fe0ec2687d4a6ae201ae72907d862437c.tar.gz
git-0dcb8d7fe0ec2687d4a6ae201ae72907d862437c.tar.xz
untracked cache: record .gitignore information and dir hierarchy
The idea is if we can capture all input and (non-rescursive) output of read_directory_recursive(), and can verify later that all the input is the same, then the second r_d_r() should produce the same output as in the first run. The requirement for this to work is stat info of a directory MUST change if an entry is added to or removed from that directory (and should not change often otherwise). If your OS and filesystem do not meet this requirement, untracked cache is not for you. Most file systems on *nix should be fine. On Windows, NTFS is fine while FAT may not be [1] even though FAT on Linux seems to be fine. The list of input of r_d_r() is in the big comment block in dir.h. In short, the output of a directory (not counting subdirs) mainly depends on stat info of the directory in question, all .gitignore leading to it and the check_only flag when r_d_r() is called recursively. This patch records all this info (and the output) as r_d_r() runs. Two hash_sha1_file() are required for $GIT_DIR/info/exclude and core.excludesfile unless their stat data matches. hash_sha1_file() is only needed when .gitignore files in the worktree are modified, otherwise their SHA-1 in index is used (see the previous patch). We could store stat data for .gitignore files so we don't have to rehash them if their content is different from index, but I think .gitignore files are rarely modified, so not worth extra cache data (and hashing penalty read-cache.c:verify_hdr(), as we will be storing this as an index extension). The implication is, if you change .gitignore, you better add it to the index soon or you lose all the benefit of untracked cache because a modified .gitignore invalidates all subdirs recursively. This is especially bad for .gitignore at root. This cached output is about untracked files only, not ignored files because the number of tracked files is usually small, so small cache overhead, while the number of ignored files could go really high (e.g. *.o files mixing with source code). [1] "Description of NTFS date and time stamps for files and folders" http://support.microsoft.com/kb/299648 Helped-by: Torsten Bögershausen <tboegi@web.de> Helped-by: David Turner <dturner@twopensource.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'dir.h')
-rw-r--r--dir.h60
1 files changed, 60 insertions, 0 deletions
diff --git a/dir.h b/dir.h
index cdca71b3b..9ab74b4c1 100644
--- a/dir.h
+++ b/dir.h
@@ -66,6 +66,7 @@ struct exclude_stack {
struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
int baselen;
int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
+ struct untracked_cache_dir *ucd;
};
struct exclude_list_group {
@@ -79,6 +80,60 @@ struct sha1_stat {
int valid;
};
+/*
+ * Untracked cache
+ *
+ * The following inputs are sufficient to determine what files in a
+ * directory are excluded:
+ *
+ * - The list of files and directories of the directory in question
+ * - The $GIT_DIR/index
+ * - dir_struct flags
+ * - The content of $GIT_DIR/info/exclude
+ * - The content of core.excludesfile
+ * - The content (or the lack) of .gitignore of all parent directories
+ * from $GIT_WORK_TREE
+ * - The check_only flag in read_directory_recursive (for
+ * DIR_HIDE_EMPTY_DIRECTORIES)
+ *
+ * The first input can be checked using directory mtime. In many
+ * filesystems, directory mtime (stat_data field) is updated when its
+ * files or direct subdirs are added or removed.
+ *
+ * The second one can be hooked from cache_tree_invalidate_path().
+ * Whenever a file (or a submodule) is added or removed from a
+ * directory, we invalidate that directory.
+ *
+ * The remaining inputs are easy, their SHA-1 could be used to verify
+ * their contents (exclude_sha1[], info_exclude_sha1[] and
+ * excludes_file_sha1[])
+ */
+struct untracked_cache_dir {
+ struct untracked_cache_dir **dirs;
+ char **untracked;
+ struct stat_data stat_data;
+ unsigned int untracked_alloc, dirs_nr, dirs_alloc;
+ unsigned int untracked_nr;
+ unsigned int check_only : 1;
+ /* null SHA-1 means this directory does not have .gitignore */
+ unsigned char exclude_sha1[20];
+ char name[FLEX_ARRAY];
+};
+
+struct untracked_cache {
+ struct sha1_stat ss_info_exclude;
+ struct sha1_stat ss_excludes_file;
+ const char *exclude_per_dir;
+ /*
+ * dir_struct#flags must match dir_flags or the untracked
+ * cache is ignored.
+ */
+ unsigned dir_flags;
+ struct untracked_cache_dir *root;
+ /* Statistics */
+ int dir_created;
+};
+
struct dir_struct {
int nr, alloc;
int ignored_nr, ignored_alloc;
@@ -126,6 +181,11 @@ struct dir_struct {
struct exclude_stack *exclude_stack;
struct exclude *exclude;
struct strbuf basebuf;
+
+ /* Enable untracked file cache if set */
+ struct untracked_cache *untracked;
+ struct sha1_stat ss_info_exclude;
+ struct sha1_stat ss_excludes_file;
};
/*