aboutsummaryrefslogtreecommitdiff
path: root/vcs-svn
diff options
context:
space:
mode:
authorDavid Barr <david.barr@cordelta.com>2010-08-09 17:34:42 -0500
committerJunio C Hamano <gitster@pobox.com>2010-08-14 19:35:37 -0700
commit1d73b52f5ba4184de6acf474f14668001304a10c (patch)
treeeab339299282709e45101c011ba57b565278a18e /vcs-svn
parent951f316470acc7c785c460a4e40735b22822349f (diff)
downloadgit-1d73b52f5ba4184de6acf474f14668001304a10c.tar.gz
git-1d73b52f5ba4184de6acf474f14668001304a10c.tar.xz
Add string-specific memory pool
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'vcs-svn')
-rw-r--r--vcs-svn/string_pool.c102
-rw-r--r--vcs-svn/string_pool.h11
-rw-r--r--vcs-svn/string_pool.txt43
3 files changed, 156 insertions, 0 deletions
diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
new file mode 100644
index 000000000..f5b1da836
--- /dev/null
+++ b/vcs-svn/string_pool.c
@@ -0,0 +1,102 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "trp.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+static struct trp_root tree = { ~0 };
+
+struct node {
+ uint32_t offset;
+ struct trp_node children;
+};
+
+/* Two memory pools: one for struct node, and another for strings */
+obj_pool_gen(node, struct node, 4096)
+obj_pool_gen(string, char, 4096)
+
+static char *node_value(struct node *node)
+{
+ return node ? string_pointer(node->offset) : NULL;
+}
+
+static int node_cmp(struct node *a, struct node *b)
+{
+ return strcmp(node_value(a), node_value(b));
+}
+
+/* Build a Treap from the node structure (a trp_node w/ offset) */
+trp_gen(static, tree_, struct node, children, node, node_cmp);
+
+const char *pool_fetch(uint32_t entry)
+{
+ return node_value(node_pointer(entry));
+}
+
+uint32_t pool_intern(const char *key)
+{
+ /* Canonicalize key */
+ struct node *match = NULL, *node;
+ uint32_t key_len;
+ if (key == NULL)
+ return ~0;
+ key_len = strlen(key) + 1;
+ node = node_pointer(node_alloc(1));
+ node->offset = string_alloc(key_len);
+ strcpy(node_value(node), key);
+ match = tree_search(&tree, node);
+ if (!match) {
+ tree_insert(&tree, node);
+ } else {
+ node_free(1);
+ string_free(key_len);
+ node = match;
+ }
+ return node_offset(node);
+}
+
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
+{
+ char *token = strtok_r(str, delim, saveptr);
+ return token ? pool_intern(token) : ~0;
+}
+
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
+{
+ uint32_t i;
+ for (i = 0; i < len && ~seq[i]; i++) {
+ fputs(pool_fetch(seq[i]), stream);
+ if (i < len - 1 && ~seq[i + 1])
+ fputc(delim, stream);
+ }
+}
+
+uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
+{
+ char *context = NULL;
+ uint32_t token = ~0;
+ uint32_t length;
+
+ if (sz == 0)
+ return ~0;
+ if (str)
+ token = pool_tok_r(str, delim, &context);
+ for (length = 0; length < sz; length++) {
+ seq[length] = token;
+ if (token == ~0)
+ return length;
+ token = pool_tok_r(NULL, delim, &context);
+ }
+ seq[sz - 1] = ~0;
+ return sz;
+}
+
+void pool_reset(void)
+{
+ node_reset();
+ string_reset();
+}
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
new file mode 100644
index 000000000..222fb66e6
--- /dev/null
+++ b/vcs-svn/string_pool.h
@@ -0,0 +1,11 @@
+#ifndef STRING_POOL_H_
+#define STRING_POOL_H_
+
+uint32_t pool_intern(const char *key);
+const char *pool_fetch(uint32_t entry);
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
+uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
+void pool_reset(void);
+
+#endif
diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt
new file mode 100644
index 000000000..1b41f1562
--- /dev/null
+++ b/vcs-svn/string_pool.txt
@@ -0,0 +1,43 @@
+string_pool API
+===============
+
+The string_pool API provides facilities for replacing strings
+with integer keys that can be more easily compared and stored.
+The facilities are designed so that one could teach Git without
+too much trouble to store the information needed for these keys to
+remain valid over multiple executions.
+
+Functions
+---------
+
+pool_intern::
+ Include a string in the string pool and get its key.
+ If that string is already in the pool, retrieves its
+ existing key.
+
+pool_fetch::
+ Retrieve the string associated to a given key.
+
+pool_tok_r::
+ Extract the key of the next token from a string.
+ Interface mimics strtok_r.
+
+pool_print_seq::
+ Print a sequence of strings named by key to a file, using the
+ specified delimiter to separate them.
+
+ If NULL (key ~0) appears in the sequence, the sequence ends
+ early.
+
+pool_tok_seq::
+ Split a string into tokens, storing the keys of segments
+ into a caller-provided array.
+
+ Unless sz is 0, the array will always be ~0-terminated.
+ If there is not enough room for all the tokens, the
+ array holds as many tokens as fit in the entries before
+ the terminating ~0. Return value is the index after the
+ last token, or sz if the tokens did not fit.
+
+pool_reset::
+ Deallocate storage for the string pool.