diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-07-16 10:05:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-07-16 10:05:26 -0700 |
commit | c5f7674a97e621bfab5544165098b4860ee6e247 (patch) | |
tree | 5653f607a0af70840636086cb3f7571f88ba0b76 /tools/mailsplit.c | |
parent | fc7ef1e8ae8c19c06a4fa3d574c13d793e66186a (diff) | |
download | git-c5f7674a97e621bfab5544165098b4860ee6e247.tar.gz git-c5f7674a97e621bfab5544165098b4860ee6e247.tar.xz |
Prepare git-tools for merging into the main git archive
Rename into a "tools" subdirectory, and change name of "dotest" to "applymbox".
Remove stripspace (which was already copied into git) and cvs2git (which
was likewise already copied into git, and then replaced by a much better
perl version).
All of this was brought on by Ryan Anderson shaming me into it. Thanks.
I guess.
Diffstat (limited to 'tools/mailsplit.c')
-rw-r--r-- | tools/mailsplit.c | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/tools/mailsplit.c b/tools/mailsplit.c new file mode 100644 index 000000000..9379fbc5e --- /dev/null +++ b/tools/mailsplit.c @@ -0,0 +1,144 @@ +/* + * Totally braindamaged mbox splitter program. + * + * It just splits a mbox into a list of files: "0001" "0002" .. + * so you can process them further from there. + */ +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <assert.h> + +static int usage(void) +{ + fprintf(stderr, "mailsplit <mbox> <directory>\n"); + exit(1); +} + +static int linelen(const char *map, unsigned long size) +{ + int len = 0, c; + + do { + c = *map; + map++; + size--; + len++; + } while (size && c != '\n'); + return len; +} + +static int is_from_line(const char *line, int len) +{ + const char *colon; + + if (len < 20 || memcmp("From ", line, 5)) + return 0; + + colon = line + len - 2; + line += 5; + for (;;) { + if (colon < line) + return 0; + if (*--colon == ':') + break; + } + + if (!isdigit(colon[-4]) || + !isdigit(colon[-2]) || + !isdigit(colon[-1]) || + !isdigit(colon[ 1]) || + !isdigit(colon[ 2])) + return 0; + + /* year */ + if (strtol(colon+3, NULL, 10) <= 90) + return 0; + + /* Ok, close enough */ + return 1; +} + +static int parse_email(const void *map, unsigned long size) +{ + unsigned long offset; + + if (size < 6 || memcmp("From ", map, 5)) + goto corrupt; + + /* Make sure we don't trigger on this first line */ + map++; size--; offset=1; + + /* + * Search for a line beginning with "From ", and + * having smething that looks like a date format. + */ + do { + int len = linelen(map, size); + if (is_from_line(map, len)) + return offset; + map += len; + size -= len; + offset += len; + } while (size); + return offset; + +corrupt: + fprintf(stderr, "corrupt mailbox\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int fd, nr; + struct stat st; + unsigned long size; + void *map; + + if (argc != 3) + usage(); + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + exit(1); + } + if (chdir(argv[2]) < 0) + usage(); + if (fstat(fd, &st) < 0) { + perror("stat"); + exit(1); + } + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (-1 == (int)(long)map) { + perror("mmap"); + exit(1); + } + close(fd); + nr = 0; + do { + char name[10]; + unsigned long len = parse_email(map, size); + assert(len <= size); + sprintf(name, "%04d", ++nr); + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (fd < 0) { + perror(name); + exit(1); + } + if (write(fd, map, len) != len) { + perror("write"); + exit(1); + } + close(fd); + map += len; + size -= len; + } while (size > 0); + return 0; +} |