From 90321c106ca6e36c0e884ca677c9a52dea47bdde Mon Sep 17 00:00:00 2001 From: Peter Eriksen Date: Mon, 3 Apr 2006 19:30:46 +0100 Subject: Replace xmalloc+memset(0) with xcalloc. Signed-off-by: Peter Eriksen Signed-off-by: Junio C Hamano --- blob.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index 84ec1212e..c1fdd861c 100644 --- a/blob.c +++ b/blob.c @@ -8,8 +8,7 @@ struct blob *lookup_blob(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); if (!obj) { - struct blob *ret = xmalloc(sizeof(struct blob)); - memset(ret, 0, sizeof(struct blob)); + struct blob *ret = xcalloc(1, sizeof(struct blob)); created_object(sha1, &ret->object); ret->object.type = blob_type; return ret; -- cgit v1.2.1 From 885a86abe2e9f7b96a4e2012183c6751635840aa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 14 Jun 2006 16:45:13 -0700 Subject: Shrink "struct object" a bit This shrinks "struct object" by a small amount, by getting rid of the "struct type *" pointer and replacing it with a 3-bit bitfield instead. In addition, we merge the bitfields and the "flags" field, which incidentally should also remove a useless 4-byte padding from the object when in 64-bit mode. Now, our "struct object" is still too damn large, but it's now less obviously bloated, and of the remaining fields, only the "util" (which is not used by most things) is clearly something that should be eventually discarded. This shrinks the "git-rev-list --all" memory use by about 2.5% on the kernel archive (and, perhaps more importantly, on the larger mozilla archive). That may not sound like much, but I suspect it's more on a 64-bit platform. There are other remaining inefficiencies (the parent lists, for example, probably have horrible malloc overhead), but this was pretty obvious. Most of the patch is just changing the comparison of the "type" pointer from one of the constant string pointers to the appropriate new TYPE_xxx small integer constant. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- blob.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index c1fdd861c..737700874 100644 --- a/blob.c +++ b/blob.c @@ -10,14 +10,14 @@ struct blob *lookup_blob(const unsigned char *sha1) if (!obj) { struct blob *ret = xcalloc(1, sizeof(struct blob)); created_object(sha1, &ret->object); - ret->object.type = blob_type; + ret->object.type = TYPE_BLOB; return ret; } if (!obj->type) - obj->type = blob_type; - if (obj->type != blob_type) { - error("Object %s is a %s, not a blob", - sha1_to_hex(sha1), obj->type); + obj->type = TYPE_BLOB; + if (obj->type != TYPE_BLOB) { + error("Object %s is a %s, not a blob", + sha1_to_hex(sha1), typename(obj->type)); return NULL; } return (struct blob *) obj; -- cgit v1.2.1 From 855419f764a65e92f1d5dd1b3d50ee987db1d9de Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 Jun 2006 10:44:15 -0700 Subject: Add specialized object allocator This creates a simple specialized object allocator for basic objects. This avoids wasting space with malloc overhead (metadata and extra alignment), since the specialized allocator knows the alignment, and that objects, once allocated, are never freed. It also allows us to track some basic statistics about object allocations. For example, for the mozilla import, it shows object usage as follows: blobs: 627629 (14710 kB) trees: 1119035 (34969 kB) commits: 196423 (8440 kB) tags: 1336 (46 kB) and the simpler allocator shaves off about 2.5% off the memory footprint off a "git-rev-list --all --objects", and is a bit faster too. [ Side note: this concludes the series of "save memory in object storage". The thing is, there simply isn't much more to be saved on the objects. Doing "git-rev-list --all --objects" on the mozilla archive has a final total RSS of 131498 pages for me: that's about 513MB. Of that, the object overhead is now just 56MB, the rest is going somewhere else (put another way: the fact that this patch shaves off 2.5% of the total memory overhead, considering that objects are now not much more than 10% of the total shows how big the wasted space really was: this makes object allocations much more memory- and time-efficient). I haven't looked at where the rest is, but I suspect the bulk of it is just the pack-file loading. It may be that we should pack the tree objects separately from the blob objects: for git-rev-list --objects, we don't actually ever need to even look at the blobs, but since trees and blobs are interspersed in the pack-file, we end up not being dense in the tree accesses, so we end up looking at more pages than we strictly need to. So with a 535MB pack-file, it's entirely possible - even likely - that most of the remaining RSS is just the mmap of the pack-file itself. We don't need to map in _all_ of it, but we do end up mapping a fair amount. ] Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- blob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index 737700874..496f27004 100644 --- a/blob.c +++ b/blob.c @@ -8,7 +8,7 @@ struct blob *lookup_blob(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); if (!obj) { - struct blob *ret = xcalloc(1, sizeof(struct blob)); + struct blob *ret = alloc_blob_node(); created_object(sha1, &ret->object); ret->object.type = TYPE_BLOB; return ret; -- cgit v1.2.1 From 1974632c664c2d573b36a00fa993c1c13dd8a967 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 11 Jul 2006 20:45:31 -0700 Subject: Remove TYPE_* constant macros and use object_type enums consistently. This updates the type-enumeration constants introduced to reduce the memory footprint of "struct object" to match the type bits already used in the packfile format, by removing the former (i.e. TYPE_* constant macros) and using the latter (i.e. enum object_type) throughout the code for consistency. Eventually we can stop passing around the "type strings" entirely, and this will help - no confusion about two different integer enumeration. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- blob.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index 496f27004..d1af2e62f 100644 --- a/blob.c +++ b/blob.c @@ -10,12 +10,12 @@ struct blob *lookup_blob(const unsigned char *sha1) if (!obj) { struct blob *ret = alloc_blob_node(); created_object(sha1, &ret->object); - ret->object.type = TYPE_BLOB; + ret->object.type = OBJ_BLOB; return ret; } if (!obj->type) - obj->type = TYPE_BLOB; - if (obj->type != TYPE_BLOB) { + obj->type = OBJ_BLOB; + if (obj->type != OBJ_BLOB) { error("Object %s is a %s, not a blob", sha1_to_hex(sha1), typename(obj->type)); return NULL; -- cgit v1.2.1 From 85023577a8f4b540aa64aa37f6f44578c0c305a3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 19 Dec 2006 14:34:12 -0800 Subject: simplify inclusion of system header files. This is a mechanical clean-up of the way *.c files include system header files. (1) sources under compat/, platform sha-1 implementations, and xdelta code are exempt from the following rules; (2) the first #include must be "git-compat-util.h" or one of our own header file that includes it first (e.g. config.h, builtin.h, pkt-line.h); (3) system headers that are included in "git-compat-util.h" need not be included in individual C source files. (4) "git-compat-util.h" does not have to include subsystem specific header files (e.g. expat.h). Signed-off-by: Junio C Hamano --- blob.c | 1 - 1 file changed, 1 deletion(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index d1af2e62f..9776beac5 100644 --- a/blob.c +++ b/blob.c @@ -1,6 +1,5 @@ #include "cache.h" #include "blob.h" -#include const char *blob_type = "blob"; -- cgit v1.2.1 From 21666f1aae4e890d8f50924f9e80763b27e6a45d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 26 Feb 2007 14:55:59 -0500 Subject: convert object type handling from a string to a number We currently have two parallel notation for dealing with object types in the code: a string and a numerical value. One of them is obviously redundent, and the most used one requires more stack space and a bunch of strcmp() all over the place. This is an initial step for the removal of the version using a char array found in object reading code paths. The patch is unfortunately large but there is no sane way to split it in smaller parts without breaking the system. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- blob.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index 9776beac5..0a9ea417b 100644 --- a/blob.c +++ b/blob.c @@ -30,18 +30,18 @@ int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size) int parse_blob(struct blob *item) { - char type[20]; + enum object_type type; void *buffer; unsigned long size; int ret; if (item->object.parsed) return 0; - buffer = read_sha1_file(item->object.sha1, type, &size); + buffer = read_sha1_file(item->object.sha1, &type, &size); if (!buffer) return error("Could not read %s", sha1_to_hex(item->object.sha1)); - if (strcmp(type, blob_type)) + if (type != OBJ_BLOB) return error("Object %s not a blob", sha1_to_hex(item->object.sha1)); ret = parse_blob_buffer(item, buffer, size); -- cgit v1.2.1 From 100c5f3b0b27ec6617de1a785c4ff481e92636c1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 Apr 2007 22:11:43 -0700 Subject: Clean up object creation to use more common code This replaces the fairly odd "created_object()" function that did _most_ of the object setup with a more complete "create_object()" function that also has a more natural calling convention. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- blob.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index 0a9ea417b..bd7d078e1 100644 --- a/blob.c +++ b/blob.c @@ -6,12 +6,8 @@ const char *blob_type = "blob"; struct blob *lookup_blob(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); - if (!obj) { - struct blob *ret = alloc_blob_node(); - created_object(sha1, &ret->object); - ret->object.type = OBJ_BLOB; - return ret; - } + if (!obj) + return create_object(sha1, OBJ_BLOB, alloc_blob_node()); if (!obj->type) obj->type = OBJ_BLOB; if (obj->type != OBJ_BLOB) { -- cgit v1.2.1 From 837d395a5c0b98ab938d71db8e2b6b9f69ddcc4d Mon Sep 17 00:00:00 2001 From: Daniel Barkalow Date: Mon, 18 Jan 2010 13:06:28 -0500 Subject: Replace parse_blob() with an explanatory comment parse_blob() has never actually been used; it has served simply to avoid having a confusing gap in the API. Instead of leaving it, put in a comment that explains what "parsing a blob" entails (making sure the object is actually readable), and why code might care whether a blob has been parsed or not. Signed-off-by: Daniel Barkalow Signed-off-by: Junio C Hamano --- blob.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index bd7d078e1..ae320bd8f 100644 --- a/blob.c +++ b/blob.c @@ -23,24 +23,3 @@ int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size) item->object.parsed = 1; return 0; } - -int parse_blob(struct blob *item) -{ - enum object_type type; - void *buffer; - unsigned long size; - int ret; - - if (item->object.parsed) - return 0; - buffer = read_sha1_file(item->object.sha1, &type, &size); - if (!buffer) - return error("Could not read %s", - sha1_to_hex(item->object.sha1)); - if (type != OBJ_BLOB) - return error("Object %s not a blob", - sha1_to_hex(item->object.sha1)); - ret = parse_blob_buffer(item, buffer, size); - free(buffer); - return ret; -} -- cgit v1.2.1