aboutsummaryrefslogtreecommitdiff
path: root/symlinks.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-05-09 09:21:07 -0700
committerJunio C Hamano <gitster@pobox.com>2008-05-10 18:16:31 -0700
commitc40641b77b0274186fd1b327d5dc3246f814aaaf (patch)
tree9d455fe976c1d08a3ea0cab763bb45c312e0aaec /symlinks.c
parentd177cab0480f9fa172103071203fed1bff95f5c2 (diff)
downloadgit-c40641b77b0274186fd1b327d5dc3246f814aaaf.tar.gz
git-c40641b77b0274186fd1b327d5dc3246f814aaaf.tar.xz
Optimize symlink/directory detection
This is the base for making symlink detection in the middle fo a pathname saner and (much) more efficient. Under various loads, we want to verify that the full path leading up to a filename is a real directory tree, and that when we successfully do an 'lstat()' on a filename, we don't get a false positive due to a symlink in the middle of the path that git should have seen as a symlink, not as a normal path component. The 'has_symlink_leading_path()' function already did this, and cached a single level of symlink information, but didn't cache the _lack_ of a symlink, so the normal behaviour was actually the wrong way around, and we ended up doing an 'lstat()' on each path component to check that it was a real directory. This caches the last detected full directory and symlink entries, and speeds up especially deep directory structures a lot by avoiding to lstat() all the directories leading up to each entry in the index. [ This can - and should - probably be extended upon so that we eventually never do a bare 'lstat()' on any path entries at *all* when checking the index, but always check the full path carefully. Right now we do not generally check the whole path for all our normal quick index revalidation. We should also make sure that we're careful about all the invalidation, ie when we remove a link and replace it by a directory we should invalidate the symlink cache if it matches (and vice versa for the directory cache). But regardless, the basic function needs to be sane to do that. The old 'has_symlink_leading_path()' was not capable enough - or indeed the code readable enough - to really do that sanely. So I'm pushing this as not just an optimization, but as a base for further work. ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'symlinks.c')
-rw-r--r--symlinks.c82
1 files changed, 49 insertions, 33 deletions
diff --git a/symlinks.c b/symlinks.c
index be9ace6c0..5a5e781a1 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,48 +1,64 @@
#include "cache.h"
-int has_symlink_leading_path(const char *name, char *last_symlink)
-{
+struct pathname {
+ int len;
char path[PATH_MAX];
- const char *sp, *ep;
- char *dp;
-
- sp = name;
- dp = path;
-
- if (last_symlink && *last_symlink) {
- size_t last_len = strlen(last_symlink);
- size_t len = strlen(name);
- if (last_len < len &&
- !strncmp(name, last_symlink, last_len) &&
- name[last_len] == '/')
- return 1;
- *last_symlink = '\0';
+};
+
+/* Return matching pathname prefix length, or zero if not matching */
+static inline int match_pathname(int len, const char *name, struct pathname *match)
+{
+ int match_len = match->len;
+ return (len > match_len &&
+ name[match_len] == '/' &&
+ !memcmp(name, match->path, match_len)) ? match_len : 0;
+}
+
+static inline void set_pathname(int len, const char *name, struct pathname *match)
+{
+ if (len < PATH_MAX) {
+ match->len = len;
+ memcpy(match->path, name, len);
+ match->path[len] = 0;
}
+}
+
+int has_symlink_leading_path(int len, const char *name)
+{
+ static struct pathname link, nonlink;
+ char path[PATH_MAX];
+ struct stat st;
+ char *sp;
+ int known_dir;
- while (1) {
- size_t len;
- struct stat st;
+ /*
+ * See if the last known symlink cache matches.
+ */
+ if (match_pathname(len, name, &link))
+ return 1;
- ep = strchr(sp, '/');
- if (!ep)
- break;
- len = ep - sp;
- if (PATH_MAX <= dp + len - path + 2)
- return 0; /* new name is longer than that??? */
- memcpy(dp, sp, len);
- dp[len] = 0;
+ /*
+ * Get rid of the last known directory part
+ */
+ known_dir = match_pathname(len, name, &nonlink);
+
+ while ((sp = strchr(name + known_dir + 1, '/')) != NULL) {
+ int thislen = sp - name ;
+ memcpy(path, name, thislen);
+ path[thislen] = 0;
if (lstat(path, &st))
return 0;
+ if (S_ISDIR(st.st_mode)) {
+ set_pathname(thislen, path, &nonlink);
+ known_dir = thislen;
+ continue;
+ }
if (S_ISLNK(st.st_mode)) {
- if (last_symlink)
- strcpy(last_symlink, path);
+ set_pathname(thislen, path, &link);
return 1;
}
-
- dp[len++] = '/';
- dp = dp + len;
- sp = ep + 1;
+ break;
}
return 0;
}