From ef1286d3c0ba714c6c2ae87e14edf3c462aef114 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:42 -0400 Subject: use xsnprintf for generating git object headers We generally use 32-byte buffers to format git's "type size" header fields. These should not generally overflow unless you can produce some truly gigantic objects (and our types come from our internal array of constant strings). But it is a good idea to use xsnprintf to make sure this is the case. Note that we slightly modify the interface to write_sha1_file_prepare, which nows uses "hdrlen" as an "in" parameter as well as an "out" (on the way in it stores the allocated size of the header, and on the way out it returns the ultimate size of the header). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index d295a3225..f10609147 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1464,7 +1464,7 @@ int check_sha1_signature(const unsigned char *sha1, void *map, return -1; /* Generate the header */ - hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1; + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1; /* Sha1.. */ git_SHA1_Init(&c); @@ -2930,7 +2930,7 @@ static void write_sha1_file_prepare(const void *buf, unsigned long len, git_SHA_CTX c; /* Generate the header */ - *hdrlen = sprintf(hdr, "%s %lu", type, len)+1; + *hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1; /* Sha1.. */ git_SHA1_Init(&c); @@ -2993,7 +2993,7 @@ int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { char hdr[32]; - int hdrlen; + int hdrlen = sizeof(hdr); write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); return 0; } @@ -3139,7 +3139,7 @@ static int freshen_packed_object(const unsigned char *sha1) int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { char hdr[32]; - int hdrlen; + int hdrlen = sizeof(hdr); /* Normally if we have it in the pack then we do not bother writing * it out into .git/objects/??/?{38} file. @@ -3157,7 +3157,8 @@ int hash_sha1_file_literally(const void *buf, unsigned long len, const char *typ int hdrlen, status = 0; /* type string, SP, %lu of the length plus NUL must fit this */ - header = xmalloc(strlen(type) + 32); + hdrlen = strlen(type) + 32; + header = xmalloc(hdrlen); write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen); if (!(flags & HASH_WRITE_OBJECT)) @@ -3185,7 +3186,7 @@ int force_object_loose(const unsigned char *sha1, time_t mtime) buf = read_packed_sha1(sha1, &type, &len); if (!buf) return error("cannot read sha1_file for %s", sha1_to_hex(sha1)); - hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1; + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1; ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime); free(buf); -- cgit v1.2.1 From 48bcc1c3cc09db1a6da0ce47460fae6e5f7edd4b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:06:55 -0400 Subject: add_packed_git: convert strcpy into xsnprintf We have the path "foo.idx", and we create a buffer big enough to hold "foo.pack" and "foo.keep", and then strcpy straight into it. This isn't a bug (we have enough space), but it's very hard to tell from the strcpy that this is so. Let's instead use strip_suffix to take off the ".idx", record the size of our allocation, and use xsnprintf to make sure we don't violate our assumptions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index f10609147..592226eb7 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1146,11 +1146,12 @@ static void try_to_free_pack_memory(size_t size) release_pack_memory(size); } -struct packed_git *add_packed_git(const char *path, int path_len, int local) +struct packed_git *add_packed_git(const char *path, size_t path_len, int local) { static int have_set_try_to_free_routine; struct stat st; - struct packed_git *p = alloc_packed_git(path_len + 2); + size_t alloc; + struct packed_git *p; if (!have_set_try_to_free_routine) { have_set_try_to_free_routine = 1; @@ -1161,18 +1162,22 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) * Make sure a corresponding .pack file exists and that * the index looks sane. */ - path_len -= strlen(".idx"); - if (path_len < 1) { - free(p); + if (!strip_suffix_mem(path, &path_len, ".idx")) return NULL; - } + + /* + * ".pack" is long enough to hold any suffix we're adding (and + * the use xsnprintf double-checks that) + */ + alloc = path_len + strlen(".pack") + 1; + p = alloc_packed_git(alloc); memcpy(p->pack_name, path, path_len); - strcpy(p->pack_name + path_len, ".keep"); + xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep"); if (!access(p->pack_name, F_OK)) p->pack_keep = 1; - strcpy(p->pack_name + path_len, ".pack"); + xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); return NULL; -- cgit v1.2.1 From 9ae97018fb2e7f30ab92fdc2965d1dcff2c5c296 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:09 -0400 Subject: use strip_suffix and xstrfmt to replace suffix When we want to convert "foo.pack" to "foo.idx", we do it by duplicating the original string and then munging the bytes in place. Let's use strip_suffix and xstrfmt instead, which has several advantages: 1. It's more clear what the intent is. 2. It does not implicitly rely on the fact that strlen(".idx") <= strlen(".pack") to avoid an overflow. 3. We communicate the assumption that the input file ends with ".pack" (and get a run-time check that this is so). 4. We drop calls to strcpy, which makes auditing the code base easier. Likewise, we can do this to convert ".pack" to ".bitmap", avoiding some manual memory computation. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 592226eb7..2be1afdde 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -671,13 +671,15 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) int open_pack_index(struct packed_git *p) { char *idx_name; + size_t len; int ret; if (p->index_data) return 0; - idx_name = xstrdup(p->pack_name); - strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx"); + if (!strip_suffix(p->pack_name, ".pack", &len)) + die("BUG: pack_name does not end in .pack"); + idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name); ret = check_packed_git_idx(idx_name, p); free(idx_name); return ret; -- cgit v1.2.1 From ac5190cc48bd75586566ccc052304d40bbc63147 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:34 -0400 Subject: sha1_get_pack_name: use a strbuf We do some manual memory computation here, and there's no check that our 60 is not overflowed by the raw sprintf (it isn't, because the "which" parameter is never longer than "pack"). We can simplify this greatly with a strbuf. Technically the end result is not identical, as the original took care not to rewrite the object directory on each call for performance reasons. We could do that here, too (by saving the baselen and resetting to it), but it's not worth the complexity; this function is not called a lot (generally once per packfile that we open). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 2be1afdde..c26fdcbd8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -208,44 +208,25 @@ const char *sha1_file_name(const unsigned char *sha1) * provided by the caller. which should be "pack" or "idx". */ static char *sha1_get_pack_name(const unsigned char *sha1, - char **name, char **base, const char *which) + struct strbuf *buf, + const char *which) { - static const char hex[] = "0123456789abcdef"; - char *buf; - int i; - - if (!*base) { - const char *sha1_file_directory = get_object_directory(); - int len = strlen(sha1_file_directory); - *base = xmalloc(len + 60); - sprintf(*base, "%s/pack/pack-1234567890123456789012345678901234567890.%s", - sha1_file_directory, which); - *name = *base + len + 11; - } - - buf = *name; - - for (i = 0; i < 20; i++) { - unsigned int val = *sha1++; - *buf++ = hex[val >> 4]; - *buf++ = hex[val & 0xf]; - } - - return *base; + strbuf_reset(buf); + strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(), + sha1_to_hex(sha1), which); + return buf->buf; } char *sha1_pack_name(const unsigned char *sha1) { - static char *name, *base; - - return sha1_get_pack_name(sha1, &name, &base, "pack"); + static struct strbuf buf = STRBUF_INIT; + return sha1_get_pack_name(sha1, &buf, "pack"); } char *sha1_pack_index_name(const unsigned char *sha1) { - static char *name, *base; - - return sha1_get_pack_name(sha1, &name, &base, "idx"); + static struct strbuf buf = STRBUF_INIT; + return sha1_get_pack_name(sha1, &buf, "idx"); } struct alternate_object_database *alt_odb_list; -- cgit v1.2.1 From d4b3d11a03c5733a37656ca2f23171be6efad7d3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:07:49 -0400 Subject: write_loose_object: convert to strbuf When creating a loose object tempfile, we use a fixed PATH_MAX-sized buffer, and strcpy directly into it. This isn't buggy, because we do a rough check of the size, but there's no verification that our guesstimate of the required space is enough (in fact, it's several bytes too big for the current naming scheme). Let's switch to a strbuf, which makes this much easier to verify. The allocation overhead should be negligible, since we are replacing a static buffer with a static strbuf, and we'll only need to allocate on the first call. While we're here, we can also document a subtle interaction with mkstemp that would be easy to overlook. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index c26fdcbd8..4211af1d8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3011,29 +3011,31 @@ static inline int directory_size(const char *filename) * We want to avoid cross-directory filename renames, because those * can have problems on various filesystems (FAT, NFS, Coda). */ -static int create_tmpfile(char *buffer, size_t bufsiz, const char *filename) +static int create_tmpfile(struct strbuf *tmp, const char *filename) { int fd, dirlen = directory_size(filename); - if (dirlen + 20 > bufsiz) { - errno = ENAMETOOLONG; - return -1; - } - memcpy(buffer, filename, dirlen); - strcpy(buffer + dirlen, "tmp_obj_XXXXXX"); - fd = git_mkstemp_mode(buffer, 0444); + strbuf_reset(tmp); + strbuf_add(tmp, filename, dirlen); + strbuf_addstr(tmp, "tmp_obj_XXXXXX"); + fd = git_mkstemp_mode(tmp->buf, 0444); if (fd < 0 && dirlen && errno == ENOENT) { - /* Make sure the directory exists */ - memcpy(buffer, filename, dirlen); - buffer[dirlen-1] = 0; - if (mkdir(buffer, 0777) && errno != EEXIST) + /* + * Make sure the directory exists; note that the contents + * of the buffer are undefined after mkstemp returns an + * error, so we have to rewrite the whole buffer from + * scratch. + */ + strbuf_reset(tmp); + strbuf_add(tmp, filename, dirlen - 1); + if (mkdir(tmp->buf, 0777) && errno != EEXIST) return -1; - if (adjust_shared_perm(buffer)) + if (adjust_shared_perm(tmp->buf)) return -1; /* Try again */ - strcpy(buffer + dirlen - 1, "/tmp_obj_XXXXXX"); - fd = git_mkstemp_mode(buffer, 0444); + strbuf_addstr(tmp, "/tmp_obj_XXXXXX"); + fd = git_mkstemp_mode(tmp->buf, 0444); } return fd; } @@ -3046,10 +3048,10 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, git_zstream stream; git_SHA_CTX c; unsigned char parano_sha1[20]; - static char tmp_file[PATH_MAX]; + static struct strbuf tmp_file = STRBUF_INIT; const char *filename = sha1_file_name(sha1); - fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename); + fd = create_tmpfile(&tmp_file, filename); if (fd < 0) { if (errno == EACCES) return error("insufficient permission for adding an object to repository database %s", get_object_directory()); @@ -3098,12 +3100,12 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, struct utimbuf utb; utb.actime = mtime; utb.modtime = mtime; - if (utime(tmp_file, &utb) < 0) + if (utime(tmp_file.buf, &utb) < 0) warning("failed utime() on %s: %s", - tmp_file, strerror(errno)); + tmp_file.buf, strerror(errno)); } - return finalize_object_file(tmp_file, filename); + return finalize_object_file(tmp_file.buf, filename); } static int freshen_loose_object(const unsigned char *sha1) -- cgit v1.2.1 From c7ab0ba3405dc6bc8ade1296ef070a5a89660e76 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 Sep 2015 17:08:12 -0400 Subject: avoid sprintf and strcpy with flex arrays When we are allocating a struct with a FLEX_ARRAY member, we generally compute the size of the array and then sprintf or strcpy into it. Normally we could improve a dynamic allocation like this by using xstrfmt, but it doesn't work here; we have to account for the size of the rest of the struct. But we can improve things a bit by storing the length that we use for the allocation, and then feeding it to xsnprintf or memcpy, which makes it more obvious that we are not writing more than the allocated number of bytes. It would be nice if we had some kind of helper for allocating generic flex arrays, but it doesn't work that well: - the call signature is a little bit unwieldy: d = flex_struct(sizeof(*d), offsetof(d, path), fmt, ...); You need offsetof here instead of just writing to the end of the base size, because we don't know how the struct is packed (partially this is because FLEX_ARRAY might not be zero, though we can account for that; but the size of the struct may actually be rounded up for alignment, and we can't know that). - some sites do clever things, like over-allocating because they know they will write larger things into the buffer later (e.g., struct packed_git here). So we're better off to just write out each allocation (or add type-specific helpers, though many of these are one-off allocations anyway). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 4211af1d8..cc3de244e 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1180,9 +1180,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path) { const char *path = sha1_pack_name(sha1); - struct packed_git *p = alloc_packed_git(strlen(path) + 1); + int alloc = strlen(path) + 1; + struct packed_git *p = alloc_packed_git(alloc); - strcpy(p->pack_name, path); + memcpy(p->pack_name, path, alloc); /* includes NUL */ hashcpy(p->sha1, sha1); if (check_packed_git_idx(idx_path, p)) { free(p); -- cgit v1.2.1