From 147394470c4be34038e520e74a017da6a3745e90 Mon Sep 17 00:00:00 2001 From: Heiko Voigt Date: Wed, 16 Nov 2016 16:11:04 +0100 Subject: serialize collection of changed submodules To check whether a submodule needs to be pushed we need to collect all changed submodules. Lets collect them first and then execute the possibly expensive test whether certain revisions are already pushed only once per submodule. There is further potential for optimization since we can assemble one command and only issued that instead of one call for each remote ref in the submodule. Signed-off-by: Heiko Voigt Signed-off-by: Junio C Hamano --- submodule.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) (limited to 'submodule.c') diff --git a/submodule.c b/submodule.c index 2de06a335..d2b29944e 100644 --- a/submodule.c +++ b/submodule.c @@ -554,19 +554,34 @@ static int submodule_needs_pushing(const char *path, const unsigned char sha1[20 return 0; } +static struct sha1_array *submodule_commits(struct string_list *submodules, + const char *path) +{ + struct string_list_item *item; + + item = string_list_insert(submodules, path); + if (item->util) + return (struct sha1_array *) item->util; + + /* NEEDSWORK: should we have sha1_array_init()? */ + item->util = xcalloc(1, sizeof(struct sha1_array)); + return (struct sha1_array *) item->util; +} + static void collect_submodules_from_diff(struct diff_queue_struct *q, struct diff_options *options, void *data) { int i; - struct string_list *needs_pushing = data; + struct string_list *submodules = data; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; + struct sha1_array *commits; if (!S_ISGITLINK(p->two->mode)) continue; - if (submodule_needs_pushing(p->two->path, p->two->oid.hash)) - string_list_insert(needs_pushing, p->two->path); + commits = submodule_commits(submodules, p->two->path); + sha1_array_append(commits, p->two->oid.hash); } } @@ -582,6 +597,30 @@ static void find_unpushed_submodule_commits(struct commit *commit, diff_tree_combined_merge(commit, 1, &rev); } +struct collect_submodule_from_sha1s_data { + char *submodule_path; + struct string_list *needs_pushing; +}; + +static int collect_submodules_from_sha1s(const unsigned char sha1[20], + void *data) +{ + struct collect_submodule_from_sha1s_data *me = data; + + if (submodule_needs_pushing(me->submodule_path, sha1)) + string_list_insert(me->needs_pushing, me->submodule_path); + + return 0; +} + +static void free_submodules_sha1s(struct string_list *submodules) +{ + struct string_list_item *item; + for_each_string_list_item(item, submodules) + sha1_array_clear((struct sha1_array *) item->util); + string_list_clear(submodules, 1); +} + int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name, struct string_list *needs_pushing) { @@ -590,6 +629,8 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *argv[] = {NULL, NULL, "--not", "NULL", NULL}; int argc = ARRAY_SIZE(argv) - 1; char *sha1_copy; + struct string_list submodules = STRING_LIST_INIT_DUP; + struct string_list_item *submodule; struct strbuf remotes_arg = STRBUF_INIT; @@ -603,12 +644,22 @@ int find_unpushed_submodules(unsigned char new_sha1[20], die("revision walk setup failed"); while ((commit = get_revision(&rev)) != NULL) - find_unpushed_submodule_commits(commit, needs_pushing); + find_unpushed_submodule_commits(commit, &submodules); reset_revision_walk(); free(sha1_copy); strbuf_release(&remotes_arg); + for_each_string_list_item(submodule, &submodules) { + struct collect_submodule_from_sha1s_data data; + data.submodule_path = submodule->string; + data.needs_pushing = needs_pushing; + sha1_array_for_each_unique((struct sha1_array *) submodule->util, + collect_submodules_from_sha1s, + &data); + } + free_submodules_sha1s(&submodules); + return needs_pushing->nr; } -- cgit v1.2.1 From 9cfa1c260fd10dadb2dfbb62f8e120a10cabfd06 Mon Sep 17 00:00:00 2001 From: Heiko Voigt Date: Wed, 16 Nov 2016 16:11:05 +0100 Subject: serialize collection of refs that contain submodule changes We are iterating over each pushed ref and want to check whether it contains changes to submodules. Instead of immediately checking each ref lets first collect them and then do the check for all of them in one revision walk. Signed-off-by: Heiko Voigt Signed-off-by: Junio C Hamano --- submodule.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'submodule.c') diff --git a/submodule.c b/submodule.c index d2b29944e..d07390bc5 100644 --- a/submodule.c +++ b/submodule.c @@ -522,6 +522,13 @@ static int has_remote(const char *refname, const struct object_id *oid, return 1; } +static int append_sha1_to_argv(const unsigned char sha1[20], void *data) +{ + struct argv_array *argv = data; + argv_array_push(argv, sha1_to_hex(sha1)); + return 0; +} + static int submodule_needs_pushing(const char *path, const unsigned char sha1[20]) { if (add_submodule_odb(path) || !lookup_commit_reference(sha1)) @@ -621,25 +628,24 @@ static void free_submodules_sha1s(struct string_list *submodules) string_list_clear(submodules, 1); } -int find_unpushed_submodules(unsigned char new_sha1[20], +int find_unpushed_submodules(struct sha1_array *commits, const char *remotes_name, struct string_list *needs_pushing) { struct rev_info rev; struct commit *commit; - const char *argv[] = {NULL, NULL, "--not", "NULL", NULL}; - int argc = ARRAY_SIZE(argv) - 1; - char *sha1_copy; struct string_list submodules = STRING_LIST_INIT_DUP; struct string_list_item *submodule; + struct argv_array argv = ARGV_ARRAY_INIT; - struct strbuf remotes_arg = STRBUF_INIT; - - strbuf_addf(&remotes_arg, "--remotes=%s", remotes_name); init_revisions(&rev, NULL); - sha1_copy = xstrdup(sha1_to_hex(new_sha1)); - argv[1] = sha1_copy; - argv[3] = remotes_arg.buf; - setup_revisions(argc, argv, &rev, NULL); + + /* argv.argv[0] will be ignored by setup_revisions */ + argv_array_push(&argv, "find_unpushed_submodules"); + sha1_array_for_each_unique(commits, append_sha1_to_argv, &argv); + argv_array_push(&argv, "--not"); + argv_array_pushf(&argv, "--remotes=%s", remotes_name); + + setup_revisions(argv.argc, argv.argv, &rev, NULL); if (prepare_revision_walk(&rev)) die("revision walk setup failed"); @@ -647,8 +653,7 @@ int find_unpushed_submodules(unsigned char new_sha1[20], find_unpushed_submodule_commits(commit, &submodules); reset_revision_walk(); - free(sha1_copy); - strbuf_release(&remotes_arg); + argv_array_clear(&argv); for_each_string_list_item(submodule, &submodules) { struct collect_submodule_from_sha1s_data data; @@ -685,12 +690,12 @@ static int push_submodule(const char *path) return 1; } -int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name) +int push_unpushed_submodules(struct sha1_array *commits, const char *remotes_name) { int i, ret = 1; struct string_list needs_pushing = STRING_LIST_INIT_DUP; - if (!find_unpushed_submodules(new_sha1, remotes_name, &needs_pushing)) + if (!find_unpushed_submodules(commits, remotes_name, &needs_pushing)) return 1; for (i = 0; i < needs_pushing.nr; i++) { -- cgit v1.2.1 From 5b6607d23f8a262e1c0ede954f0477664934eed8 Mon Sep 17 00:00:00 2001 From: Heiko Voigt Date: Wed, 16 Nov 2016 16:11:06 +0100 Subject: batch check whether submodule needs pushing into one call We run a command for each sha1 change in a submodule. This is unnecessary since we can simply batch all sha1's we want to check into one command. Lets do it so we can speedup the check when many submodule changes are in need of checking. Signed-off-by: Heiko Voigt Signed-off-by: Junio C Hamano --- submodule.c | 62 ++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'submodule.c') diff --git a/submodule.c b/submodule.c index d07390bc5..85287a1d1 100644 --- a/submodule.c +++ b/submodule.c @@ -529,27 +529,49 @@ static int append_sha1_to_argv(const unsigned char sha1[20], void *data) return 0; } -static int submodule_needs_pushing(const char *path, const unsigned char sha1[20]) +static int check_has_commit(const unsigned char sha1[20], void *data) { - if (add_submodule_odb(path) || !lookup_commit_reference(sha1)) + int *has_commit = data; + + if (!lookup_commit_reference(sha1)) + *has_commit = 0; + + return 0; +} + +static int submodule_has_commits(const char *path, struct sha1_array *commits) +{ + int has_commit = 1; + + if (add_submodule_odb(path)) + return 0; + + sha1_array_for_each_unique(commits, check_has_commit, &has_commit); + return has_commit; +} + +static int submodule_needs_pushing(const char *path, struct sha1_array *commits) +{ + if (!submodule_has_commits(path, commits)) return 0; if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) { struct child_process cp = CHILD_PROCESS_INIT; - const char *argv[] = {"rev-list", NULL, "--not", "--remotes", "-n", "1" , NULL}; struct strbuf buf = STRBUF_INIT; int needs_pushing = 0; - argv[1] = sha1_to_hex(sha1); - cp.argv = argv; + argv_array_push(&cp.args, "rev-list"); + sha1_array_for_each_unique(commits, append_sha1_to_argv, &cp.args); + argv_array_pushl(&cp.args, "--not", "--remotes", "-n", "1" , NULL); + prepare_submodule_repo_env(&cp.env_array); cp.git_cmd = 1; cp.no_stdin = 1; cp.out = -1; cp.dir = path; if (start_command(&cp)) - die("Could not run 'git rev-list %s --not --remotes -n 1' command in submodule %s", - sha1_to_hex(sha1), path); + die("Could not run 'git rev-list --not --remotes -n 1' command in submodule %s", + path); if (strbuf_read(&buf, cp.out, 41)) needs_pushing = 1; finish_command(&cp); @@ -604,22 +626,6 @@ static void find_unpushed_submodule_commits(struct commit *commit, diff_tree_combined_merge(commit, 1, &rev); } -struct collect_submodule_from_sha1s_data { - char *submodule_path; - struct string_list *needs_pushing; -}; - -static int collect_submodules_from_sha1s(const unsigned char sha1[20], - void *data) -{ - struct collect_submodule_from_sha1s_data *me = data; - - if (submodule_needs_pushing(me->submodule_path, sha1)) - string_list_insert(me->needs_pushing, me->submodule_path); - - return 0; -} - static void free_submodules_sha1s(struct string_list *submodules) { struct string_list_item *item; @@ -656,12 +662,10 @@ int find_unpushed_submodules(struct sha1_array *commits, argv_array_clear(&argv); for_each_string_list_item(submodule, &submodules) { - struct collect_submodule_from_sha1s_data data; - data.submodule_path = submodule->string; - data.needs_pushing = needs_pushing; - sha1_array_for_each_unique((struct sha1_array *) submodule->util, - collect_submodules_from_sha1s, - &data); + struct sha1_array *commits = (struct sha1_array *) submodule->util; + + if (submodule_needs_pushing(submodule->string, commits)) + string_list_insert(needs_pushing, submodule->string); } free_submodules_sha1s(&submodules); -- cgit v1.2.1 From 250ab24ab3a35d5857855a2e00483dcd8867fdca Mon Sep 17 00:00:00 2001 From: Heiko Voigt Date: Wed, 16 Nov 2016 16:11:07 +0100 Subject: submodule_needs_pushing(): explain the behaviour when we cannot answer When we do not have commits that are involved in the update of the superproject in our copy of submodule, we cannot tell if the remote end needs to acquire these commits to be able to check out the superproject tree. Explain why we answer "no there is no need/point in pushing from our submodule repository" in this case. Signed-off-by: Heiko Voigt Signed-off-by: Junio C Hamano --- submodule.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'submodule.c') diff --git a/submodule.c b/submodule.c index 85287a1d1..b50948818 100644 --- a/submodule.c +++ b/submodule.c @@ -553,6 +553,17 @@ static int submodule_has_commits(const char *path, struct sha1_array *commits) static int submodule_needs_pushing(const char *path, struct sha1_array *commits) { if (!submodule_has_commits(path, commits)) + /* + * NOTE: We do consider it safe to return "no" here. The + * correct answer would be "We do not know" instead of + * "No push needed", but it is quite hard to change + * the submodule pointer without having the submodule + * around. If a user did however change the submodules + * without having the submodule around, this indicates + * an expert who knows what they are doing or a + * maintainer integrating work from other people. In + * both cases it should be safe to skip this check. + */ return 0; if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) { -- cgit v1.2.1