From: Volodymyr Poltavets Date: Wed, 19 May 2021 18:39:07 +0000 (+0200) Subject: Initial commit X-Git-Url: https://git.packet-gain.de/?a=commitdiff_plain;h=refs%2Fheads%2Fmaster;p=tfv.git Initial commit --- c525762efb4bdff5d3f17d97b18dd7c3ca1d0e50 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..62c0328 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +CFLAGS=-Wall -Werror -O2 +CFLAGS_G=-Wall -Werror -O0 -DDEBUG -g -ggdb +LDFLAGS=-lcrypto + +.PHONY: all fmt release debug clean + +all: release debug + +release: tfv +debug: tfv_g + +tfv: tfv.c bencode.c + +tfv_g: tfv.c bencode.c + $(CC) $(CFLAGS_G) -o $@ $^ $(LDFLAGS) + +fmt: tfv.c + indent -kr -brf -i4 -as -nut $^ + +clean: + rm -rf tfv tfv_g *.c~ diff --git a/bencode.c b/bencode.c new file mode 100644 index 0000000..4dd3162 --- /dev/null +++ b/bencode.c @@ -0,0 +1,2683 @@ +/* + * libbencodetools + * + * Written by Heikki Orsila and + * Janne Kulmala in 2011. + */ + +#include "bencode.h" + +#include +#include +#include +#include +#include +#include +#include + +#define die(fmt, args...) do { fprintf(stderr, "bencode(%u): fatal error: " fmt, __LINE__, ## args); abort(); } while (0) +#define warn(fmt, args...) do { fprintf(stderr, "bencode: warning: " fmt, ## args); } while (0) + +#define MAX_ALLOC (((size_t) -1) / sizeof(struct bencode *) / 2) +#define DICT_MAX_ALLOC (((size_t) -1) / sizeof(struct bencode_dict_node) / 2) + +struct ben_decode_ctx { + const char *data; + const size_t len; + size_t off; + int error; + int level; + char c; + int line; + struct bencode_type **types; +}; + +struct ben_encode_ctx { + char *data; + size_t size; + size_t pos; +}; + +/* + * Buffer size for fitting all unsigned long long and long long integers, + * assuming it is at most 64 bits. If long long is larger than 64 bits, + * an error is produced when too large an integer is converted. + */ +#define LONGLONGSIZE 21 + +static struct bencode *decode_printed(struct ben_decode_ctx *ctx); +static void inplace_ben_str(struct bencode_str *b, const char *s, size_t len); +static int resize_dict(struct bencode_dict *d, size_t newalloc); +static int resize_list(struct bencode_list *list, size_t newalloc); +static int unpack(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl); +static struct bencode *pack(struct ben_decode_ctx *ctx, va_list *vl); + +static size_t type_size(int type) +{ + switch (type) { + case BENCODE_BOOL: + return sizeof(struct bencode_bool); + case BENCODE_DICT: + return sizeof(struct bencode_dict); + case BENCODE_INT: + return sizeof(struct bencode_int); + case BENCODE_LIST: + return sizeof(struct bencode_list); + case BENCODE_STR: + return sizeof(struct bencode_str); + default: + die("Unknown type: %d\n", type); + } +} + +static void *alloc(int type) +{ + struct bencode *b = calloc(1, type_size(type)); + if (b == NULL) + return NULL; + b->type = type; + return b; +} + +void *ben_alloc_user(struct bencode_type *type) +{ + struct bencode_user *user = calloc(1, type->size); + if (user == NULL) + return NULL; + user->type = BENCODE_USER; + user->info = type; + return user; +} + +static int insufficient(struct ben_decode_ctx *ctx) +{ + ctx->error = BEN_INSUFFICIENT; + return -1; +} + +static int invalid(struct ben_decode_ctx *ctx) +{ + ctx->error = BEN_INVALID; + return -1; +} + +static int mismatch(struct ben_decode_ctx *ctx) +{ + ctx->error = BEN_MISMATCH; + return -1; +} + +void *ben_insufficient_ptr(struct ben_decode_ctx *ctx) +{ + ctx->error = BEN_INSUFFICIENT; + return NULL; +} + +void *ben_invalid_ptr(struct ben_decode_ctx *ctx) +{ + ctx->error = BEN_INVALID; + return NULL; +} + +void *ben_oom_ptr(struct ben_decode_ctx *ctx) +{ + ctx->error = BEN_NO_MEMORY; + return NULL; +} + +int ben_need_bytes(const struct ben_decode_ctx *ctx, size_t n) +{ + return ((ctx->off + n) <= ctx->len) ? 0 : -1; +} + +char ben_current_char(const struct ben_decode_ctx *ctx) +{ + return ctx->data[ctx->off]; +} + +const char *ben_current_buf(const struct ben_decode_ctx *ctx, size_t n) +{ + return ben_need_bytes(ctx, n) ? NULL : ctx->data + ctx->off; +} + +void ben_skip(struct ben_decode_ctx *ctx, size_t n) +{ + ctx->off += n; +} + +static struct bencode *internal_blob(void *data, size_t len) +{ + struct bencode_str *b = alloc(BENCODE_STR); + if (b == NULL) + return NULL; + b->s = data; + b->len = len; + assert(b->s[len] == 0); + return (struct bencode *) b; +} + +static void skip_to_next_line(struct ben_decode_ctx *ctx) +{ + for (; ctx->off < ctx->len; ctx->off++) { + if (ben_current_char(ctx) == '\n') { + ctx->line++; + ctx->off++; + break; + } + } +} + +static int seek_char(struct ben_decode_ctx *ctx) +{ + while (ctx->off < ctx->len) { + char c = ben_current_char(ctx); + if (isspace(c)) { + if (c == '\n') + ctx->line++; + ctx->off++; + } else if (c == '#') { + /* Skip comment */ + ctx->off++; + skip_to_next_line(ctx); + } else { + return 0; + } + } + return insufficient(ctx); +} + +/* + * Test if string 's' is located at current position. + * Increment current position and return 0 if the string matches. + * Returns -1 otherwise. The function avoids buffer overflow. + */ +static int try_match(struct ben_decode_ctx *ctx, const char *s) +{ + size_t n = strlen(s); + if (ben_need_bytes(ctx, n)) + return -1; + if (memcmp(ctx->data + ctx->off, s, n) != 0) + return -1; + ctx->off += n; + return 0; +} + +static int try_match_with_errors(struct ben_decode_ctx *ctx, const char *s) +{ + size_t n = strlen(s); + size_t left = ctx->len - ctx->off; + + assert(ctx->off <= ctx->len); + + if (left == 0) + return insufficient(ctx); + + if (left < n) { + if (memcmp(ctx->data + ctx->off, s, left) != 0) + return invalid(ctx); + return insufficient(ctx); + } + + if (memcmp(ctx->data + ctx->off, s, n) != 0) + return invalid(ctx); + + ctx->off += n; + return 0; +} + +int ben_allocate(struct bencode *b, size_t n) +{ + switch (b->type) { + case BENCODE_DICT: + return resize_dict(ben_dict_cast(b), n); + case BENCODE_LIST: + return resize_list(ben_list_cast(b), n); + default: + die("ben_allocate(): Unknown type %d\n", b->type); + } +} + +static struct bencode *clone_dict(const struct bencode_dict *d) +{ + struct bencode *key; + struct bencode *value; + struct bencode *newkey; + struct bencode *newvalue; + size_t pos; + struct bencode *newdict = ben_dict(); + if (newdict == NULL) + return NULL; + ben_dict_for_each(key, value, pos, (const struct bencode *) d) { + newkey = ben_clone(key); + newvalue = ben_clone(value); + if (newkey == NULL || newvalue == NULL) { + ben_free(newkey); + ben_free(newvalue); + goto error; + } + if (ben_dict_set(newdict, newkey, newvalue)) { + ben_free(newkey); + ben_free(newvalue); + goto error; + } + newkey = NULL; + newvalue = NULL; + } + return newdict; + +error: + ben_free(newdict); + return NULL; +} + +static struct bencode *clone_list(const struct bencode_list *list) +{ + struct bencode *value; + struct bencode *newvalue; + size_t pos; + struct bencode *newlist = ben_list(); + if (newlist == NULL) + return NULL; + ben_list_for_each(value, pos, (const struct bencode *) list) { + newvalue = ben_clone(value); + if (newvalue == NULL) + goto error; + if (ben_list_append(newlist, newvalue)) { + ben_free(newvalue); + goto error; + } + newvalue = NULL; + } + return newlist; + +error: + ben_free(newlist); + return NULL; +} + +static struct bencode *clone_str(const struct bencode_str *s) +{ + return ben_blob(s->s, s->len); +} + +static struct bencode *share_dict(const struct bencode_dict *d) +{ + struct bencode *newdict = ben_dict(); + if (newdict == NULL) + return NULL; + memcpy(newdict, d, sizeof(*d)); + ((struct bencode_dict *) newdict)->shared = 1; + return newdict; +} + +static struct bencode *share_list(const struct bencode_list *list) +{ + struct bencode *newlist = ben_list(); + if (newlist == NULL) + return NULL; + memcpy(newlist, list, sizeof(*list)); + ((struct bencode_list *) newlist)->shared = 1; + return newlist; +} + +struct bencode *ben_clone(const struct bencode *b) +{ + switch (b->type) { + case BENCODE_BOOL: + return ben_bool(ben_bool_const_cast(b)->b); + case BENCODE_DICT: + return clone_dict(ben_dict_const_cast(b)); + case BENCODE_INT: + return ben_int(ben_int_const_cast(b)->ll); + case BENCODE_LIST: + return clone_list(ben_list_const_cast(b)); + case BENCODE_STR: + return clone_str(ben_str_const_cast(b)); + default: + die("Invalid type %c\n", b->type); + } +} + +struct bencode *ben_shared_clone(const struct bencode *b) +{ + switch (b->type) { + case BENCODE_DICT: + return share_dict(ben_dict_const_cast(b)); + break; + case BENCODE_LIST: + return share_list(ben_list_const_cast(b)); + break; + default: + return ben_clone(b); + } +} + +static int cmp_dict(const struct bencode *a, const struct bencode *b) +{ + size_t len = ben_dict_len(a); + size_t pos; + struct bencode *key; + struct bencode *va; + struct bencode *vb; + int ret = 0; + struct bencode_keyvalue *pairs; + + if (len != ben_dict_len(b)) { + /* Returning any non-zero value is allowed */ + return (len < ben_dict_len(b)) ? -1 : 1; + } + + pairs = ben_dict_ordered_items(a); + for (pos = 0; pos < len; pos++) { + key = pairs[pos].key; + va = pairs[pos].value; + vb = ben_dict_get(b, key); + if (vb == NULL) { + /* Returning any non-zero value is allowed */ + ret = (a < b) ? -1 : 1; + break; + } + ret = ben_cmp(va, vb); + if (ret) + break; + } + + free(pairs); + return ret; +} + +static int cmp_list(const struct bencode *a, const struct bencode *b) +{ + const struct bencode_list *la; + const struct bencode_list *lb; + struct bencode *va; + struct bencode *vb; + size_t cmplen; + size_t i; + int ret; + + la = ben_list_const_cast(a); + lb = ben_list_const_cast(b); + cmplen = (la->n <= lb->n) ? la->n : lb->n; + + for (i = 0; i < cmplen; ++i) { + va = ben_list_get(a, i); + vb = ben_list_get(b, i); + ret = ben_cmp(va, vb); + if (ret) + return ret; + } + if (la->n != lb->n) + return (la->n < lb->n) ? -1 : 1; + return 0; +} + +int ben_cmp(const struct bencode *a, const struct bencode *b) +{ + size_t cmplen; + int ret; + const struct bencode_int *ia; + const struct bencode_int *ib; + const struct bencode_str *sa; + const struct bencode_str *sb; + const struct bencode_user *ua; + const struct bencode_user *ub; + + if (a->type != b->type) + return (a->type == BENCODE_INT) ? -1 : 1; + + switch (a->type) { + case BENCODE_INT: + ia = ben_int_const_cast(a); + ib = ben_int_const_cast(b); + if (ia->ll < ib->ll) + return -1; + if (ib->ll < ia->ll) + return 1; + return 0; + case BENCODE_STR: + sa = ben_str_const_cast(a); + sb = ben_str_const_cast(b); + cmplen = (sa->len <= sb->len) ? sa->len : sb->len; + ret = memcmp(sa->s, sb->s, cmplen); + if (ret) + return ret < 0 ? -1 : 1; + if (sa->len != sb->len) + return (sa->len < sb->len) ? -1 : 1; + return 0; + case BENCODE_DICT: + return cmp_dict(a, b); + case BENCODE_LIST: + return cmp_list(a, b); + case BENCODE_USER: + ua = ben_user_const_cast(a); + ub = ben_user_const_cast(b); + if (ua->info != ub->info) + return (a < b) ? -1 : 1; + return ua->info->cmp(a, b); + default: + die("Invalid type %c\n", b->type); + } +} + +int ben_cmp_with_str(const struct bencode *a, const char *s) +{ + struct bencode_str b; + inplace_ben_str(&b, s, strlen(s)); + return ben_cmp(a, (struct bencode *) &b); +} + +int ben_cmp_qsort(const void *a, const void *b) +{ + const struct bencode *akey = ((const struct bencode_keyvalue *) a)->key; + const struct bencode *bkey = ((const struct bencode_keyvalue *) b)->key; + return ben_cmp(akey, bkey); +} + +static struct bencode *decode_bool(struct ben_decode_ctx *ctx) +{ + struct bencode_bool *b; + char value; + char c; + if (ben_need_bytes(ctx, 2)) + return ben_insufficient_ptr(ctx); + ctx->off++; + + c = ben_current_char(ctx); + if (c != '0' && c != '1') + return ben_invalid_ptr(ctx); + + value = (c == '1'); + b = alloc(BENCODE_BOOL); + if (b == NULL) + return ben_oom_ptr(ctx); + + b->b = value; + ctx->off++; + return (struct bencode *) b; +} + +static size_t hash_bucket(long long hash, const struct bencode_dict *d) +{ + return hash & (d->alloc - 1); +} + +static size_t hash_bucket_head(long long hash, const struct bencode_dict *d) +{ + if (d->buckets == NULL) + return -1; + return d->buckets[hash_bucket(hash, d)]; +} + +static int resize_dict(struct bencode_dict *d, size_t newalloc) +{ + size_t *newbuckets; + struct bencode_dict_node *newnodes;; + size_t pos; + + if (newalloc == -1) { + if (d->alloc >= DICT_MAX_ALLOC) + return -1; + + if (d->alloc == 0) + newalloc = 4; + else + newalloc = d->alloc * 2; + } else { + size_t x; + if (newalloc < d->n || newalloc > DICT_MAX_ALLOC) + return -1; + /* Round to next power of two */ + x = 1; + while (x < newalloc) + x <<= 1; + assert(x >= newalloc); + newalloc = x; + if (newalloc > DICT_MAX_ALLOC) + return -1; + } + + /* size must be a power of two */ + assert((newalloc & (newalloc - 1)) == 0); + + newbuckets = realloc(d->buckets, sizeof(newbuckets[0]) * newalloc); + newnodes = realloc(d->nodes, sizeof(newnodes[0]) * newalloc); + if (newnodes == NULL || newbuckets == NULL) { + free(newnodes); + free(newbuckets); + return -1; + } + + d->alloc = newalloc; + d->buckets = newbuckets; + d->nodes = newnodes; + + /* Clear all buckets */ + memset(d->buckets, -1, d->alloc * sizeof(d->buckets[0])); + + /* Reinsert nodes into buckets */ + for (pos = 0; pos < d->n; pos++) { + struct bencode_dict_node *node = &d->nodes[pos]; + size_t bucket = hash_bucket(node->hash, d); + node->next = d->buckets[bucket]; + d->buckets[bucket] = pos; + } + + return 0; +} + +/* The string/binary object hash is copied from Python */ +static long long str_hash(const unsigned char *s, size_t len) +{ + long long hash; + size_t i; + if (len == 0) + return 0; + hash = s[0] << 7; + for (i = 0; i < len; i++) + hash = (1000003 * hash) ^ s[i]; + hash ^= len; + if (hash == -1) + hash = -2; + return hash; +} + +long long ben_str_hash(const struct bencode *b) +{ + const struct bencode_str *bstr = ben_str_const_cast(b); + const unsigned char *s = (unsigned char *) bstr->s; + return str_hash(s, bstr->len); +} + +long long ben_int_hash(const struct bencode *b) +{ + long long x = ben_int_const_cast(b)->ll; + return (x == -1) ? -2 : x; +} + +long long ben_hash(const struct bencode *b) +{ + switch (b->type) { + case BENCODE_INT: + return ben_int_hash(b); + case BENCODE_STR: + return ben_str_hash(b); + default: + die("hash: Invalid type: %d\n", b->type); + } +} + +static struct bencode *decode_dict(struct ben_decode_ctx *ctx) +{ + struct bencode *key; + struct bencode *lastkey = NULL; + struct bencode *value; + struct bencode_dict *d; + + d = alloc(BENCODE_DICT); + if (d == NULL) { + warn("Not enough memory for dict\n"); + return ben_oom_ptr(ctx); + } + + ctx->off++; + + while (ctx->off < ctx->len && ben_current_char(ctx) != 'e') { + key = ben_ctx_decode(ctx); + if (key == NULL) + goto error; + if (key->type != BENCODE_INT && key->type != BENCODE_STR) { + ben_free(key); + key = NULL; + ctx->error = BEN_INVALID; + warn("Invalid dict key type\n"); + goto error; + } + + if (lastkey != NULL && ben_cmp(lastkey, key) >= 0) { + ben_free(key); + key = NULL; + ctx->error = BEN_INVALID; + goto error; + } + + value = ben_ctx_decode(ctx); + if (value == NULL) { + ben_free(key); + key = NULL; + goto error; + } + + if (ben_dict_set((struct bencode *) d, key, value)) { + ben_free(key); + ben_free(value); + key = NULL; + value = NULL; + ctx->error = BEN_NO_MEMORY; + goto error; + } + + lastkey = key; + } + if (ctx->off >= ctx->len) { + ctx->error = BEN_INSUFFICIENT; + goto error; + } + + ctx->off++; + + return (struct bencode *) d; + +error: + ben_free((struct bencode *) d); + return NULL; +} + +static size_t find(const struct ben_decode_ctx *ctx, char c) +{ + char *match = memchr(ctx->data + ctx->off, c, ctx->len - ctx->off); + if (match == NULL) + return -1; + return (size_t) (match - ctx->data); +} + +/* off is the position of first number in */ +static int read_long_long(long long *ll, struct ben_decode_ctx *ctx, int c) +{ + char buf[LONGLONGSIZE]; /* fits all 64 bit integers */ + char *endptr; + size_t slen; + size_t pos = find(ctx, c); + + if (pos == -1) + return insufficient(ctx); + + slen = pos - ctx->off; + if (slen == 0 || slen >= sizeof buf) + return invalid(ctx); + + assert(slen < sizeof buf); + memcpy(buf, ctx->data + ctx->off, slen); + buf[slen] = 0; + + if (buf[0] != '-' && !isdigit(buf[0])) + return invalid(ctx); + + errno = 0; + *ll = strtoll(buf, &endptr, 10); + if (errno == ERANGE || *endptr != 0) + return invalid(ctx); + + /* + * Demand a unique encoding for all integers. + * Zero may not begin with a (minus) sign. + * Non-zero integers may not have leading zeros in the encoding. + */ + if (buf[0] == '-' && buf[1] == '0') + return invalid(ctx); + if (buf[0] == '0' && pos != (ctx->off + 1)) + return invalid(ctx); + + ctx->off = pos + 1; + return 0; +} + +static struct bencode *decode_int(struct ben_decode_ctx *ctx) +{ + struct bencode_int *b; + long long ll; + ctx->off++; + if (read_long_long(&ll, ctx, 'e')) + return NULL; + b = alloc(BENCODE_INT); + if (b == NULL) + return ben_oom_ptr(ctx); + b->ll = ll; + return (struct bencode *) b; +} + +static int resize_list(struct bencode_list *list, size_t newalloc) +{ + struct bencode **newvalues; + size_t newsize; + + if (newalloc == -1) { + if (list->alloc >= MAX_ALLOC) + return -1; + if (list->alloc == 0) + newalloc = 4; + else + newalloc = list->alloc * 2; + } else { + if (newalloc < list->n || newalloc > MAX_ALLOC) + return -1; + } + + newsize = sizeof(list->values[0]) * newalloc; + newvalues = realloc(list->values, newsize); + if (newvalues == NULL) + return -1; + list->alloc = newalloc; + list->values = newvalues; + return 0; +} + +static struct bencode *decode_list(struct ben_decode_ctx *ctx) +{ + struct bencode_list *l = alloc(BENCODE_LIST); + if (l == NULL) + return ben_oom_ptr(ctx); + + ctx->off++; + + while (ctx->off < ctx->len && ben_current_char(ctx) != 'e') { + struct bencode *b = ben_ctx_decode(ctx); + if (b == NULL) + goto error; + if (ben_list_append((struct bencode *) l, b)) { + ben_free(b); + ctx->error = BEN_NO_MEMORY; + goto error; + } + } + + if (ctx->off >= ctx->len) { + ctx->error = BEN_INSUFFICIENT; + goto error; + } + + ctx->off++; + return (struct bencode *) l; + +error: + ben_free((struct bencode *) l); + return NULL; +} + +static size_t read_size_t(struct ben_decode_ctx *ctx, int c) +{ + long long ll; + size_t s; + if (read_long_long(&ll, ctx, c)) + return -1; + if (ll < 0) + return invalid(ctx); + /* + * Test that information is not lost when converting from long long + * to size_t + */ + s = (size_t) ll; + if (ll != (long long) s) + return invalid(ctx); + return s; +} + +static struct bencode *decode_str(struct ben_decode_ctx *ctx) +{ + struct bencode *b; + size_t datalen = read_size_t(ctx, ':'); /* Read the string length */ + if (datalen == -1) + return NULL; + + if (ben_need_bytes(ctx, datalen)) + return ben_insufficient_ptr(ctx); + + /* Allocate string structure and copy data into it */ + b = ben_blob(ctx->data + ctx->off, datalen); + ctx->off += datalen; + return b; +} + +struct bencode *ben_ctx_decode(struct ben_decode_ctx *ctx) +{ + char c; + struct bencode_type *type; + struct bencode *b; + ctx->level++; + if (ctx->level > 256) + return ben_invalid_ptr(ctx); + + if (ctx->off == ctx->len) + return ben_insufficient_ptr(ctx); + + assert (ctx->off < ctx->len); + c = ben_current_char(ctx); + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + b = decode_str(ctx); + break; + case 'b': + b = decode_bool(ctx); + break; + case 'd': + b = decode_dict(ctx); + break; + case 'i': + b = decode_int(ctx); + break; + case 'l': + b = decode_list(ctx); + break; + default: + if (ctx->types && (unsigned char) c < 128) { + type = ctx->types[(unsigned char) c]; + if (type) { + ctx->off++; + b = type->decode(ctx); + } else + return ben_invalid_ptr(ctx); + } else + return ben_invalid_ptr(ctx); + } + ctx->level--; + return b; +} + +struct bencode *ben_decode(const void *data, size_t len) +{ + struct ben_decode_ctx ctx = {.data = data, .len = len}; + struct bencode *b = ben_ctx_decode(&ctx); + if (b != NULL && ctx.off != len) { + ben_free(b); + return NULL; + } + return b; +} + +struct bencode *ben_decode2(const void *data, size_t len, size_t *off, int *error) +{ + struct ben_decode_ctx ctx = {.data = data, .len = len, .off = *off}; + struct bencode *b = ben_ctx_decode(&ctx); + *off = ctx.off; + if (error != NULL) { + assert((b != NULL) ^ (ctx.error != 0)); + *error = ctx.error; + } + return b; +} + +struct bencode *ben_decode3(const void *data, size_t len, size_t *off, int *error, struct bencode_type *types[128]) +{ + struct ben_decode_ctx ctx = {.data = data, .len = len, .off = *off, + .types = types}; + struct bencode *b = ben_ctx_decode(&ctx); + *off = ctx.off; + if (error != NULL) { + assert((b != NULL) ^ (ctx.error != 0)); + *error = ctx.error; + } + return b; +} + +static struct bencode *decode_printed_bool(struct ben_decode_ctx *ctx) +{ + struct bencode *b; + int bval = -1; + + if (try_match(ctx, "True")) { + if (ben_need_bytes(ctx, 4)) + return ben_insufficient_ptr(ctx); + } else { + bval = 1; + } + + if (bval < 0) { + /* It's not 'True', so it can only be 'False'. Verify it. */ + if (try_match_with_errors(ctx, "False")) + return NULL; + bval = 0; + } + + assert(bval == 0 || bval == 1); + b = ben_bool(bval); + if (b == NULL) + return ben_oom_ptr(ctx); + return b; +} + +static struct bencode *decode_printed_dict(struct ben_decode_ctx *ctx) +{ + struct bencode *d = ben_dict(); + struct bencode *key = NULL; + struct bencode *value = NULL; + + if (d == NULL) + return ben_oom_ptr(ctx); + + ctx->off++; + + while (1) { + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) == '}') { + ctx->off++; + break; + } + + key = decode_printed(ctx); + if (key == NULL) + goto nullpath; + + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) != ':') + goto invalidpath; + ctx->off++; + + value = decode_printed(ctx); + if (value == NULL) + goto nullpath; + + if (ben_dict_set(d, key, value)) { + ben_free(key); + ben_free(value); + ben_free(d); + return ben_oom_ptr(ctx); + } + key = NULL; + value = NULL; + + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) == ',') + ctx->off++; + else if (ben_current_char(ctx) != '}') + goto invalidpath; + } + return d; + +invalidpath: + ben_free(key); + ben_free(value); + ben_free(d); + return ben_invalid_ptr(ctx); + +nullpath: + ben_free(key); + ben_free(value); + ben_free(d); + return NULL; +} + +static struct bencode *decode_printed_int(struct ben_decode_ctx *ctx) +{ + long long ll; + char buf[LONGLONGSIZE]; + char *end; + size_t pos = 0; + struct bencode *b; + int gotzero = 0; + int base = 10; + int neg = 0; + + if (ben_current_char(ctx) == '-') { + neg = 1; + ctx->off++; + } + if (ctx->off == ctx->len) + return ben_insufficient_ptr(ctx); + + if (ben_current_char(ctx) == '0') { + buf[pos] = '0'; + pos++; + ctx->off++; + gotzero = 1; + } + + if (gotzero) { + if (ctx->off == ctx->len) { + ll = 0; + goto returnwithval; + } + if (ben_current_char(ctx) == 'x') { + pos = 0; + base = 16; + ctx->off++; + if (ctx->off == ctx->len) + return ben_insufficient_ptr(ctx); + } else if (isdigit(ben_current_char(ctx))) { + base = 8; + } + } else { + if (ctx->off == ctx->len) + return ben_insufficient_ptr(ctx); + } + + while (ctx->off < ctx->len && pos < sizeof buf) { + char c = ben_current_char(ctx); + if (base == 16) { + if (!isxdigit(c)) + break; + } else { + if (!isdigit(c)) + break; + } + buf[pos] = c; + pos++; + ctx->off++; + } + if (pos == 0 || pos == sizeof buf) + return ben_invalid_ptr(ctx); + buf[pos] = 0; + ll = strtoll(buf, &end, base); + if (*end != 0) + return ben_invalid_ptr(ctx); + +returnwithval: + if (neg) + ll = -ll; + b = ben_int(ll); + if (b == NULL) + return ben_oom_ptr(ctx); + return b; +} + +static struct bencode *decode_printed_list(struct ben_decode_ctx *ctx) +{ + struct bencode *l = ben_list(); + struct bencode *b = NULL; + + if (l == NULL) + return ben_oom_ptr(ctx); + + ctx->off++; + + while (1) { + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) == ']') { + ctx->off++; + break; + } + b = decode_printed(ctx); + if (b == NULL) + goto nullpath; + if (ben_list_append(l, b)) { + ben_free(b); + ben_free(l); + return ben_oom_ptr(ctx); + } + b = NULL; + + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) == ',') + ctx->off++; + else if (ben_current_char(ctx) != ']') { + ben_free(l); + return ben_invalid_ptr(ctx); + } + } + return l; + +nullpath: + ben_free(b); + ben_free(l); + return NULL; +} + +static struct bencode *decode_printed_str(struct ben_decode_ctx *ctx) +{ + size_t pos; + char *s = NULL; + size_t len = 0; + char initial = ben_current_char(ctx); + struct bencode *b; + + ctx->off++; + pos = ctx->off; + while (pos < ctx->len) { + char c = ctx->data[pos]; + if (!isprint(c)) + return ben_invalid_ptr(ctx); + if (c == initial) + break; + len++; + pos++; + if (c != '\\') + continue; /* Normal printable char, e.g. 'a' */ + /* Handle '\\' */ + if (pos == ctx->len) + return ben_insufficient_ptr(ctx); + + c = ctx->data[pos]; + pos++; + if (c == 'x') { + /* hexadecimal value: \xHH */ + pos += 2; + } + } + if (pos >= ctx->len) + return ben_insufficient_ptr(ctx); + + s = malloc(len + 1); + if (s == NULL) + return ben_oom_ptr(ctx); + + pos = 0; + while (ctx->off < ctx->len) { + char c = ben_current_char(ctx); + assert(isprint(c)); + if (c == initial) + break; + assert(pos < len); + ctx->off++; + if (c != '\\') { + s[pos] = c; + pos++; + continue; /* Normal printable char, e.g. 'a' */ + } + /* Handle '\\' */ + + /* + * Note, we do assert because we have already verified in the + * previous loop that there is sufficient data. + */ + assert(ctx->off != ctx->len); + c = ben_current_char(ctx); + ctx->off++; + if (c == 'x') { + /* hexadecimal value: \xHH */ + char *end; + unsigned long x; + char buf[3]; + assert((ctx->off + 1) < ctx->len); + buf[0] = ctx->data[ctx->off + 0]; + buf[1] = ctx->data[ctx->off + 1]; + buf[2] = 0; + ctx->off += 2; + x = strtoul(buf, &end, 16); + if (*end != 0) + goto invalid; + assert(x < 256); + c = (char) x; + } + s[pos] = c; + pos++; + } + assert(pos == len); + if (ctx->off >= ctx->len) + return ben_insufficient_ptr(ctx); + ctx->off++; + + s[pos] = 0; /* the area must always be zero terminated! */ + + b = internal_blob(s, len); + if (b == NULL) { + free(s); + return ben_oom_ptr(ctx); + } + return b; + +invalid: + free(s); + return ben_invalid_ptr(ctx); +} + +static struct bencode *decode_printed(struct ben_decode_ctx *ctx) +{ + struct bencode *b; + + ctx->level++; + if (ctx->level > 256) + return ben_invalid_ptr(ctx); + + if (seek_char(ctx)) + return NULL; + + switch (ben_current_char(ctx)) { + case '\'': + case '"': + b = decode_printed_str(ctx); + break; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + b = decode_printed_int(ctx); + break; + case 'F': + case 'T': + b = decode_printed_bool(ctx); + break; + case '[': + b = decode_printed_list(ctx); + break; + case '{': + b = decode_printed_dict(ctx); + break; + default: + return ben_invalid_ptr(ctx); + } + ctx->level--; + return b; +} + +struct bencode *ben_decode_printed(const void *data, size_t len) +{ + struct ben_decode_ctx ctx = {.data = data, .len = len}; + return decode_printed(&ctx); +} + +struct bencode *ben_decode_printed2(const void *data, size_t len, size_t *off, struct bencode_error *error) +{ + struct ben_decode_ctx ctx = {.data = data, .len = len, .off = *off}; + struct bencode *b = decode_printed(&ctx); + *off = ctx.off; + if (error != NULL) { + assert((b != NULL) ^ (ctx.error != 0)); + error->error = ctx.error; + if (b != NULL) { + error->off = 0; + error->line = 0; + } else { + error->off = ctx.off; + error->line = ctx.line; + } + } + return b; +} + +static void free_dict(struct bencode_dict *d) +{ + size_t pos; + if (d->shared) + return; + for (pos = 0; pos < d->n; pos++) { + ben_free(d->nodes[pos].key); + ben_free(d->nodes[pos].value); + d->nodes[pos].key = NULL; + d->nodes[pos].value = NULL; + } + free(d->buckets); + free(d->nodes); +} + +static void free_list(struct bencode_list *list) +{ + size_t pos; + if (list->shared) + return; + for (pos = 0; pos < list->n; pos++) { + ben_free(list->values[pos]); + list->values[pos] = NULL; + } + free(list->values); +} + +int ben_put_char(struct ben_encode_ctx *ctx, char c) +{ + if (ctx->pos >= ctx->size) + return -1; + ctx->data[ctx->pos] = c; + ctx->pos++; + return 0; +} + +int ben_put_buffer(struct ben_encode_ctx *ctx, const void *buf, size_t len) +{ + if ((ctx->pos + len) > ctx->size) + return -1; + memcpy(ctx->data + ctx->pos, buf, len); + ctx->pos += len; + return 0; +} + +static int puthexchar(struct ben_encode_ctx *ctx, unsigned char hex) +{ + char buf[5]; + int len = snprintf(buf, sizeof buf, "\\x%.2x", hex); + assert(len == 4); + return ben_put_buffer(ctx, buf, len); +} + +static int putlonglong(struct ben_encode_ctx *ctx, long long ll) +{ + char buf[LONGLONGSIZE]; + int len = snprintf(buf, sizeof buf, "%lld", ll); + assert(len > 0); + return ben_put_buffer(ctx, buf, len); +} + +static int putunsignedlonglong(struct ben_encode_ctx *ctx, unsigned long long llu) +{ + char buf[LONGLONGSIZE]; + int len = snprintf(buf, sizeof buf, "%llu", llu); + assert(len > 0); + return ben_put_buffer(ctx, buf, len); +} + +static int putstr(struct ben_encode_ctx *ctx, char *s) +{ + return ben_put_buffer(ctx, s, strlen(s)); +} + +static int print(struct ben_encode_ctx *ctx, const struct bencode *b) +{ + const struct bencode_bool *boolean; + const struct bencode_int *integer; + const struct bencode_list *list; + const struct bencode_str *s; + size_t i; + size_t len; + struct bencode_keyvalue *pairs; + + switch (b->type) { + case BENCODE_BOOL: + boolean = ben_bool_const_cast(b); + return putstr(ctx, boolean->b ? "True" : "False"); + + case BENCODE_DICT: + if (ben_put_char(ctx, '{')) + return -1; + + pairs = ben_dict_ordered_items(b); + if (pairs == NULL) { + warn("No memory for dict serialization\n"); + return -1; + } + + len = ben_dict_len(b); + for (i = 0; i < len; i++) { + if (print(ctx, pairs[i].key)) + break; + if (putstr(ctx, ": ")) + break; + if (print(ctx, pairs[i].value)) + break; + if (i < (len - 1)) { + if (putstr(ctx, ", ")) + break; + } + } + free(pairs); + pairs = NULL; + if (i < len) + return -1; + + return ben_put_char(ctx, '}'); + + case BENCODE_INT: + integer = ben_int_const_cast(b); + + if (putlonglong(ctx, integer->ll)) + return -1; + + return 0; + + case BENCODE_LIST: + if (ben_put_char(ctx, '[')) + return -1; + list = ben_list_const_cast(b); + for (i = 0; i < list->n; i++) { + if (print(ctx, list->values[i])) + return -1; + if (i < (list->n - 1) && putstr(ctx, ", ")) + return -1; + } + return ben_put_char(ctx, ']'); + + case BENCODE_STR: + s = ben_str_const_cast(b); + if (ben_put_char(ctx, '\'')) + return -1; + for (i = 0; i < s->len; i++) { + if (!isprint(s->s[i])) { + if (puthexchar(ctx, s->s[i])) + return -1; + continue; + } + + switch (s->s[i]) { + case '\'': + case '\\': + /* Need escape character */ + if (ben_put_char(ctx, '\\')) + return -1; + default: + if (ben_put_char(ctx, s->s[i])) + return -1; + break; + } + } + return ben_put_char(ctx, '\''); + default: + die("serialization type %d not implemented\n", b->type); + } +} + +static size_t get_printed_size(const struct bencode *b) +{ + size_t pos; + const struct bencode_bool *boolean; + const struct bencode_dict *d; + const struct bencode_int *i; + const struct bencode_list *l; + const struct bencode_str *s; + size_t size = 0; + char buf[1]; + + switch (b->type) { + case BENCODE_BOOL: + boolean = ben_bool_const_cast(b); + return boolean->b ? 4 : 5; /* "True" and "False" */ + case BENCODE_DICT: + size++; /* "{" */ + d = ben_dict_const_cast(b); + for (pos = 0; pos < d->n; pos++) { + size += get_printed_size(d->nodes[pos].key); + size += 2; /* ": " */ + size += get_printed_size(d->nodes[pos].value); + if (pos < (d->n - 1)) + size += 2; /* ", " */ + } + size++; /* "}" */ + return size; + case BENCODE_INT: + i = ben_int_const_cast(b); + return snprintf(buf, 0, "%lld", i->ll); + case BENCODE_LIST: + size++; /* "[" */ + l = ben_list_const_cast(b); + for (pos = 0; pos < l->n; pos++) { + size += get_printed_size(l->values[pos]); + if (pos < (l->n - 1)) + size += 2; /* ", " */ + } + size++; /* "]" */ + return size; + case BENCODE_STR: + s = ben_str_const_cast(b); + size++; /* ' */ + for (pos = 0; pos < s->len; pos++) { + if (!isprint(s->s[pos])) { + size += 4; /* "\xDD" */ + continue; + } + switch (s->s[pos]) { + case '\'': + case '\\': + size += 2; /* escaped characters */ + break; + default: + size++; + break; + } + } + size++; /* ' */ + return size; + default: + die("Unknown type: %c\n", b->type); + } +} + +int ben_ctx_encode(struct ben_encode_ctx *ctx, const struct bencode *b) +{ + const struct bencode_bool *boolean; + const struct bencode_int *integer; + const struct bencode_list *list; + const struct bencode_str *s; + const struct bencode_user *u; + size_t i; + size_t len; + struct bencode_keyvalue *pairs; + + switch (b->type) { + case BENCODE_BOOL: + boolean = ben_bool_const_cast(b); + return putstr(ctx, boolean->b ? "b1" : "b0"); + + case BENCODE_DICT: + if (ben_put_char(ctx, 'd')) + return -1; + + pairs = ben_dict_ordered_items(b); + if (pairs == NULL) { + warn("No memory for dict serialization\n"); + return -1; + } + + len = ben_dict_len(b); + for (i = 0; i < len; i++) { + if (ben_ctx_encode(ctx, pairs[i].key)) + break; + if (ben_ctx_encode(ctx, pairs[i].value)) + break; + } + free(pairs); + pairs = NULL; + if (i < len) + return -1; + + return ben_put_char(ctx, 'e'); + + case BENCODE_INT: + if (ben_put_char(ctx, 'i')) + return -1; + integer = ben_int_const_cast(b); + if (putlonglong(ctx, integer->ll)) + return -1; + return ben_put_char(ctx, 'e'); + + case BENCODE_LIST: + if (ben_put_char(ctx, 'l')) + return -1; + + list = ben_list_const_cast(b); + for (i = 0; i < list->n; i++) { + if (ben_ctx_encode(ctx, list->values[i])) + return -1; + } + + return ben_put_char(ctx, 'e'); + + case BENCODE_STR: + s = ben_str_const_cast(b); + if (putunsignedlonglong(ctx, ((long long) s->len))) + return -1; + if (ben_put_char(ctx, ':')) + return -1; + return ben_put_buffer(ctx, s->s, s->len); + + case BENCODE_USER: + u = ben_user_const_cast(b); + return u->info->encode(ctx, b); + + default: + die("serialization type %d not implemented\n", b->type); + } +} + +static size_t get_size(const struct bencode *b) +{ + size_t pos; + const struct bencode_dict *d; + const struct bencode_int *i; + const struct bencode_list *l; + const struct bencode_str *s; + const struct bencode_user *u; + size_t size = 0; + char buf[1]; + + switch (b->type) { + case BENCODE_BOOL: + return 2; + case BENCODE_DICT: + d = ben_dict_const_cast(b); + for (pos = 0; pos < d->n; pos++) { + size += get_size(d->nodes[pos].key); + size += get_size(d->nodes[pos].value); + } + return size + 2; + case BENCODE_INT: + i = ben_int_const_cast(b); + return 2 + snprintf(buf, 0, "%lld", i->ll); + case BENCODE_LIST: + l = ben_list_const_cast(b); + for (pos = 0; pos < l->n; pos++) + size += get_size(l->values[pos]); + return size + 2; + case BENCODE_STR: + s = ben_str_const_cast(b); + return snprintf(buf, 0, "%zu", s->len) + 1 + s->len; + case BENCODE_USER: + u = ben_user_const_cast(b); + return u->info->get_size(b); + default: + die("Unknown type: %c\n", b->type); + } +} + +size_t ben_encoded_size(const struct bencode *b) +{ + return get_size(b); +} + +void *ben_encode(size_t *len, const struct bencode *b) +{ + size_t size = get_size(b); + void *data = malloc(size); + struct ben_encode_ctx ctx = {.data = data, .size = size}; + if (data == NULL) { + warn("No memory to encode\n"); + return NULL; + } + if (ben_ctx_encode(&ctx, b)) { + free(ctx.data); + return NULL; + } + assert(ctx.pos == size); + *len = ctx.pos; + return data; +} + +size_t ben_encode2(char *data, size_t maxlen, const struct bencode *b) +{ + struct ben_encode_ctx ctx = {.data = data, .size = maxlen, .pos = 0}; + if (ben_ctx_encode(&ctx, b)) + return -1; + return ctx.pos; +} + +void ben_free(struct bencode *b) +{ + struct bencode_str *s; + struct bencode_user *u; + size_t size; + if (b == NULL) + return; + switch (b->type) { + case BENCODE_BOOL: + break; + case BENCODE_DICT: + free_dict(ben_dict_cast(b)); + break; + case BENCODE_INT: + break; + case BENCODE_LIST: + free_list(ben_list_cast(b)); + break; + case BENCODE_STR: + s = ben_str_cast(b); + free(s->s); + break; + case BENCODE_USER: + u = ben_user_cast(b); + if (u->info->free) + u->info->free(b); + break; + default: + die("invalid type: %d\n", b->type); + } + + if (b->type == BENCODE_USER) + size = ((struct bencode_user *) b)->info->size; + else + size = type_size(b->type); + memset(b, -1, size); /* data poison */ + free(b); +} + +struct bencode *ben_blob(const void *data, size_t len) +{ + struct bencode_str *b = alloc(BENCODE_STR); + if (b == NULL) + return NULL; + /* Allocate one extra byte for zero termination for convenient use */ + b->s = malloc(len + 1); + if (b->s == NULL) { + free(b); + return NULL; + } + memcpy(b->s, data, len); + b->len = len; + b->s[len] = 0; + return (struct bencode *) b; +} + +struct bencode *ben_bool(int boolean) +{ + struct bencode_bool *b = alloc(BENCODE_BOOL); + if (b == NULL) + return NULL; + b->b = boolean ? 1 : 0; + return (struct bencode *) b; +} + +struct bencode *ben_dict(void) +{ + return alloc(BENCODE_DICT); +} + +struct bencode *ben_dict_get(const struct bencode *dict, const struct bencode *key) +{ + const struct bencode_dict *d = ben_dict_const_cast(dict); + long long hash = ben_hash(key); + size_t pos = hash_bucket_head(hash, d); + while (pos != -1) { + assert(pos < d->n); + if (d->nodes[pos].hash == hash && + ben_cmp(d->nodes[pos].key, key) == 0) + return d->nodes[pos].value; + pos = d->nodes[pos].next; + } + return NULL; +} + +/* + * Note, we do not re-allocate memory, so one may not call ben_free for these + * instances. These are only used to optimize speed. + */ +static void inplace_ben_str(struct bencode_str *b, const char *s, size_t len) +{ + b->type = BENCODE_STR; + b->len = len; + b->s = (char *) s; +} + +static void inplace_ben_int(struct bencode_int *i, long long ll) +{ + i->type = BENCODE_INT; + i->ll = ll; +} + +struct bencode *ben_dict_get_by_str(const struct bencode *dict, const char *key) +{ + struct bencode_str s; + inplace_ben_str(&s, key, strlen(key)); + return ben_dict_get(dict, (struct bencode *) &s); +} + +struct bencode *ben_dict_get_by_int(const struct bencode *dict, long long key) +{ + struct bencode_int i; + inplace_ben_int(&i, key); + return ben_dict_get(dict, (struct bencode *) &i); +} + +struct bencode_keyvalue *ben_dict_ordered_items(const struct bencode *b) +{ + struct bencode_keyvalue *pairs; + size_t i; + const struct bencode_dict *dict = ben_dict_const_cast(b); + if (dict == NULL) + return NULL; + pairs = malloc(dict->n * sizeof(pairs[0])); + if (pairs == NULL) + return NULL; + for (i = 0; i < dict->n; i++) { + pairs[i].key = dict->nodes[i].key; + pairs[i].value = dict->nodes[i].value; + } + qsort(pairs, dict->n, sizeof(pairs[0]), ben_cmp_qsort); + return pairs; +} + +static size_t dict_find_pos(struct bencode_dict *d, + const struct bencode *key, long long hash) +{ + size_t pos = hash_bucket_head(hash, d); + while (pos != -1) { + assert(pos < d->n); + if (d->nodes[pos].hash == hash && + ben_cmp(d->nodes[pos].key, key) == 0) + break; + pos = d->nodes[pos].next; + } + return pos; +} + +static void dict_unlink(struct bencode_dict *d, size_t bucket, size_t unlinkpos) +{ + size_t pos = d->buckets[bucket]; + size_t next; + size_t nextnext; + + assert(unlinkpos < d->n); + + if (pos == unlinkpos) { + next = d->nodes[unlinkpos].next; + assert(next < d->n || next == -1); + d->buckets[bucket] = next; + return; + } + while (pos != -1) { + assert(pos < d->n); + next = d->nodes[pos].next; + if (next == unlinkpos) { + nextnext = d->nodes[next].next; + assert(nextnext < d->n || nextnext == -1); + d->nodes[pos].next = nextnext; + return; + } + pos = next; + } + die("Key should have been found. Can not unlink position %zu.\n", unlinkpos); +} + +/* Remove node from the linked list, if found */ +static struct bencode *dict_pop(struct bencode_dict *d, + const struct bencode *key, long long hash) +{ + struct bencode *value; + size_t removebucket = hash_bucket(hash, d); + size_t tailpos = d->n - 1; + size_t tailhash = d->nodes[tailpos].hash; + size_t tailbucket = hash_bucket(tailhash, d); + size_t removepos; + + removepos = dict_find_pos(d, key, hash); + if (removepos == -1) + return NULL; + key = NULL; /* avoid using the pointer again, it may not be valid */ + + /* + * WARNING: complicated code follows. + * + * First, unlink the node to be removed and the tail node. + * We will actually later swap the positions of removed node and + * tail node inside the d->nodes array. We want to preserve + * d->nodes array in a state where positions from 0 to (d->n - 1) + * are always occupied with a valid node. This is done to make + * dictionary walk fast by simply walking positions 0 to (d->n - 1) + * in a for loop. + */ + dict_unlink(d, removebucket, removepos); + if (removepos != tailpos) + dict_unlink(d, tailbucket, tailpos); + + /* Then read the removed node and free its key */ + value = d->nodes[removepos].value; + ben_free(d->nodes[removepos].key); + + /* Then re-insert the unliked tail node in the place of removed node */ + d->nodes[removepos] = d->nodes[tailpos]; + memset(&d->nodes[tailpos], 0, sizeof d->nodes[tailpos]); /* poison */ + d->nodes[tailpos].next = ((size_t) -1) / 2; + + /* + * Then re-link the tail node to its bucket, unless the tail node + * was the one to be removed. + */ + if (removepos != tailpos) { + d->nodes[removepos].next = d->buckets[tailbucket]; + d->buckets[tailbucket] = removepos; + } + + d->n--; + + if (d->n <= (d->alloc / 4) && d->alloc >= 8) + resize_dict(d, d->alloc / 2); + + return value; +} + +struct bencode *ben_dict_pop(struct bencode *dict, const struct bencode *key) +{ + struct bencode_dict *d = ben_dict_cast(dict); + return dict_pop(d, key, ben_hash(key)); +} + +struct bencode *ben_dict_pop_by_str(struct bencode *dict, const char *key) +{ + struct bencode_str s; + inplace_ben_str(&s, key, strlen(key)); + return ben_dict_pop(dict, (struct bencode *) &s); +} + +struct bencode *ben_dict_pop_by_int(struct bencode *dict, long long key) +{ + struct bencode_int i; + inplace_ben_int(&i, key); + return ben_dict_pop(dict, (struct bencode *) &i); +} + +/* This can be used from the ben_dict_for_each() iterator */ +struct bencode *ben_dict_pop_current(struct bencode *dict, size_t *pos) +{ + struct bencode_dict *d = ben_dict_cast(dict); + struct bencode *value = ben_dict_pop(dict, d->nodes[*pos].key); + (*pos)--; + return value; +} + +int ben_dict_set(struct bencode *dict, struct bencode *key, struct bencode *value) +{ + struct bencode_dict *d = ben_dict_cast(dict); + long long hash = ben_hash(key); + size_t bucket; + size_t pos; + + assert(value != NULL); + + pos = hash_bucket_head(hash, d); + for (; pos != -1; pos = d->nodes[pos].next) { + assert(pos < d->n); + if (d->nodes[pos].hash != hash || ben_cmp(d->nodes[pos].key, key) != 0) + continue; + ben_free(d->nodes[pos].key); + ben_free(d->nodes[pos].value); + d->nodes[pos].key = key; + d->nodes[pos].value = value; + /* 'hash' and 'next' members stay the same */ + return 0; + } + + assert(d->n <= d->alloc); + if (d->n == d->alloc && resize_dict(d, -1)) + return -1; + + bucket = hash_bucket(hash, d); + pos = d->n; + d->nodes[pos] = (struct bencode_dict_node) {.hash = hash, + .key = key, + .value = value, + .next = d->buckets[bucket]}; + d->n++; + d->buckets[bucket] = pos; + return 0; +} + +int ben_dict_set_by_str(struct bencode *dict, const char *key, struct bencode *value) +{ + struct bencode *bkey = ben_str(key); + if (bkey == NULL) + return -1; + if (ben_dict_set(dict, bkey, value)) { + ben_free(bkey); + return -1; + } + return 0; +} + +int ben_dict_set_str_by_str(struct bencode *dict, const char *key, const char *value) +{ + struct bencode *bkey = ben_str(key); + struct bencode *bvalue = ben_str(value); + if (bkey == NULL || bvalue == NULL) { + ben_free(bkey); + ben_free(bvalue); + return -1; + } + if (ben_dict_set(dict, bkey, bvalue)) { + ben_free(bkey); + ben_free(bvalue); + return -1; + } + return 0; +} + +struct bencode *ben_int(long long ll) +{ + struct bencode_int *b = alloc(BENCODE_INT); + if (b == NULL) + return NULL; + b->ll = ll; + return (struct bencode *) b; +} + +struct bencode *ben_list(void) +{ + return alloc(BENCODE_LIST); +} + +int ben_list_append(struct bencode *list, struct bencode *b) +{ + struct bencode_list *l = ben_list_cast(list); + /* NULL pointer de-reference if the cast fails */ + assert(l->n <= l->alloc); + if (l->n == l->alloc && resize_list(l, -1)) + return -1; + assert(b != NULL); + l->values[l->n] = b; + l->n++; + return 0; +} + +int ben_list_append_str(struct bencode *list, const char *s) +{ + struct bencode *bs = ben_str(s); + if (bs == NULL) + return -1; + return ben_list_append(list, bs); +} + +int ben_list_append_int(struct bencode *list, long long ll) +{ + struct bencode *bll = ben_int(ll); + if (bll == NULL) + return -1; + return ben_list_append(list, bll); +} + +struct bencode *ben_list_pop(struct bencode *list, size_t pos) +{ + struct bencode_list *l = ben_list_cast(list); + struct bencode *value; + + assert(pos < l->n); + + value = ben_list_get(list, pos); + + for (; (pos + 1) < l->n; pos++) + l->values[pos] = l->values[pos + 1]; + + l->values[l->n - 1] = NULL; + l->n--; + return value; +} + +void ben_list_set(struct bencode *list, size_t i, struct bencode *b) +{ + struct bencode_list *l = ben_list_cast(list); + if (i >= l->n) + die("ben_list_set() out of bounds: %zu\n", i); + + ben_free(l->values[i]); + assert(b != NULL); + l->values[i] = b; +} + +char *ben_print(const struct bencode *b) +{ + size_t size = get_printed_size(b); + char *data = malloc(size + 1); + struct ben_encode_ctx ctx = {.data = data, .size = size, .pos = 0}; + if (data == NULL) { + warn("No memory to print\n"); + return NULL; + } + if (print(&ctx, b)) { + free(data); + return NULL; + } + assert(ctx.pos == size); + data[ctx.pos] = 0; + return data; +} + +struct bencode *ben_str(const char *s) +{ + return ben_blob(s, strlen(s)); +} + +const char *ben_strerror(int error) +{ + switch (error) { + case BEN_OK: + return "OK (no error)"; + case BEN_INVALID: + return "Invalid data"; + case BEN_INSUFFICIENT: + return "Insufficient amount of data (need more data)"; + case BEN_NO_MEMORY: + return "Out of memory"; + case BEN_MISMATCH: + return "A given structure did not match unpack format"; + default: + fprintf(stderr, "Unknown error code: %d\n", error); + return NULL; + } +} + +static int unpack_pointer(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl) +{ + const char **str; + const struct bencode **ptr; + + ctx->off++; + + if (ctx->off >= ctx->len) + return insufficient(ctx); + + switch (ben_current_char(ctx)) { + case 's': /* %ps */ + ctx->off++; + if (b->type != BENCODE_STR) + return mismatch(ctx); + str = va_arg(*vl, const char **); + *str = ben_str_val(b); + return 0; + + case 'b': /* %pb */ + ctx->off++; + ptr = va_arg(*vl, const struct bencode **); + *ptr = b; + return 0; + + default: + return invalid(ctx); + } +} + +static int unpack_value(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl) +{ + long long val; + long long *ll; + long *l; + int *i; + unsigned long long *ull; + unsigned long *ul; + unsigned int *ui; + int longflag = 0; + + ctx->off++; + + while (ctx->off < ctx->len) { + switch (ben_current_char(ctx)) { + case 'l': + ctx->off++; + longflag++; + break; + case 'L': + case 'q': + ctx->off++; + longflag = 2; + break; + + case 'p': + return unpack_pointer(b, ctx, vl); + + /* signed */ + case 'd': + ctx->off++; + if (b->type != BENCODE_INT) + return mismatch(ctx); + val = ben_int_val(b); + switch (longflag) { + case 0: + i = va_arg(*vl, int *); + *i = val; + /* Test that no information was lost in conversion */ + if ((long long) *i != val) + return mismatch(ctx); + break; + case 1: + l = va_arg(*vl, long *); + *l = val; + if ((long long) *l != val) + return mismatch(ctx); + break; + case 2: + ll = va_arg(*vl, long long *); + *ll = val; + break; + } + return 0; + + /* unsigned */ + case 'u': + ctx->off++; + if (b->type != BENCODE_INT) + return mismatch(ctx); + val = ben_int_val(b); + if (val < 0) + return mismatch(ctx); + switch (longflag) { + case 0: + ui = va_arg(*vl, unsigned int *); + *ui = val; + if ((long long) *ui != val) + return mismatch(ctx); + break; + case 1: + ul = va_arg(*vl, unsigned long *); + *ul = val; + if ((long long) *ul != val) + return mismatch(ctx); + break; + case 2: + ull = va_arg(*vl, unsigned long long *); + *ull = val; + break; + } + return 0; + + default: + return invalid(ctx); + } + } + return insufficient(ctx); +} + +static int unpack_dict(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl) +{ + struct bencode *key = NULL; + const struct bencode *val; + + if (b->type != BENCODE_DICT) + return mismatch(ctx); + + ctx->off++; + + while (1) { + if (seek_char(ctx)) + return -1; + + if (ben_current_char(ctx) == '}') { + ctx->off++; + break; + } + switch (ben_current_char(ctx)) { + case '\'': + case '"': + key = decode_printed_str(ctx); + break; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + key = decode_printed_int(ctx); + break; + default: + return invalid(ctx); + } + if (key == NULL) + return -1; + val = ben_dict_get(b, key); + ben_free(key); + if (val == NULL) + return mismatch(ctx); + + if (seek_char(ctx)) + return -1; + if (ben_current_char(ctx) != ':') + return invalid(ctx); + ctx->off++; + + if (unpack(val, ctx, vl)) + return -1; + + if (seek_char(ctx)) + return -1; + if (ben_current_char(ctx) == ',') + ctx->off++; + else if (ben_current_char(ctx) != '}') + return invalid(ctx); + } + return 0; +} + +static int unpack_list(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl) +{ + const struct bencode_list *list; + size_t i = 0; + + if (b->type != BENCODE_LIST) + return mismatch(ctx); + list = ben_list_const_cast(b); + + ctx->off++; + + while (1) { + if (seek_char(ctx)) + return -1; + + if (ben_current_char(ctx) == ']') { + ctx->off++; + break; + } + if (i >= list->n) + return mismatch(ctx); + if (unpack(list->values[i], ctx, vl)) + return -1; + i++; + + if (seek_char(ctx)) + return -1; + if (ben_current_char(ctx) == ',') + ctx->off++; + else if (ben_current_char(ctx) != ']') + return invalid(ctx); + } + if (i != list->n) + return mismatch(ctx); + return 0; +} + +static int unpack(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl) +{ + if (seek_char(ctx)) + return insufficient(ctx); + + switch (ben_current_char(ctx)) { + case '{': + return unpack_dict(b, ctx, vl); + case '[': + return unpack_list(b, ctx, vl); + case '%': + return unpack_value(b, ctx, vl); + default: + break; + } + return -1; +} + +static int unpack_all(const struct bencode *b, struct ben_decode_ctx *ctx, + va_list *vl) +{ + if (unpack(b, ctx, vl)) + return -1; + /* check for left over characters */ + seek_char(ctx); + ctx->error = 0; + if (ctx->off < ctx->len) + return invalid(ctx); + return 0; +} + +int ben_unpack(const struct bencode *b, const char *fmt, ...) +{ + struct ben_decode_ctx ctx = {.data = fmt, .len = strlen(fmt)}; + int ret; + va_list vl; + va_start(vl, fmt); + ret = unpack_all(b, &ctx, &vl); + va_end(vl); + return ret; +} + +int ben_unpack2(const struct bencode *b, size_t *off, struct bencode_error *error, const char *fmt, ...) +{ + struct ben_decode_ctx ctx = {.data = fmt, .len = strlen(fmt)}; + int ret; + va_list vl; + va_start(vl, fmt); + ret = unpack_all(b, &ctx, &vl); + va_end(vl); + + *off = ctx.off; + if (error != NULL) { + assert((ret == 0) ^ (ctx.error != 0)); + error->error = ctx.error; + if (ret != 0) { + error->off = 0; + error->line = 0; + } else { + error->off = ctx.off; + error->line = ctx.line; + } + } + return 0; +} + +static struct bencode *pack_pointer(struct ben_decode_ctx *ctx, va_list *vl) +{ + struct bencode *b = NULL; + + ctx->off++; + + if (ctx->off >= ctx->len) + return ben_insufficient_ptr(ctx); + + switch (ben_current_char(ctx)) { + case 'b': /* %pb */ + ctx->off++; + b = va_arg(*vl, struct bencode *); + break; + default: + return ben_invalid_ptr(ctx); + } + return b; +} + +static struct bencode *pack_value(struct ben_decode_ctx *ctx, va_list *vl) +{ + struct bencode *b = NULL; + unsigned long long ull; + long long val; + int longflag = 0; + + ctx->off++; + + while (ctx->off < ctx->len) { + switch (ben_current_char(ctx)) { + case 'l': + ctx->off++; + longflag++; + break; + case 'L': + case 'q': + ctx->off++; + longflag = 2; + break; + + case 's': + ctx->off++; + b = ben_str(va_arg(*vl, const char *)); + if (b == NULL) + return ben_oom_ptr(ctx); + break; + + case 'p': + b = pack_pointer(ctx, vl); + break; + + /* signed */ + case 'd': + ctx->off++; + switch (longflag) { + case 0: + val = va_arg(*vl, int); + break; + case 1: + val = va_arg(*vl, long); + break; + case 2: + val = va_arg(*vl, long long); + break; + default: + return ben_invalid_ptr(ctx); + } + b = ben_int(val); + if (b == NULL) + return ben_oom_ptr(ctx); + break; + + /* unsigned */ + case 'u': + ctx->off++; + switch (longflag) { + case 0: + val = va_arg(*vl, unsigned int); + break; + case 1: + val = va_arg(*vl, unsigned long); + break; + case 2: + ull = va_arg(*vl, unsigned long long); + /* Check that no information was lost */ + val = ull; + if ((long long) ull != val) + return ben_invalid_ptr(ctx); + break; + default: + return ben_invalid_ptr(ctx); + } + b = ben_int(val); + if (b == NULL) + return ben_oom_ptr(ctx); + break; + + default: + return ben_invalid_ptr(ctx); + } + if (b) + return b; + } + return ben_insufficient_ptr(ctx); +} + +static struct bencode *pack_dict(struct ben_decode_ctx *ctx, va_list *vl) +{ + struct bencode *d = ben_dict(); + struct bencode *key = NULL; + struct bencode *value = NULL; + + if (d == NULL) + return ben_oom_ptr(ctx); + + ctx->off++; + + while (1) { + if (seek_char(ctx)) + goto nullpath; + + if (ben_current_char(ctx) == '}') { + ctx->off++; + break; + } + key = pack(ctx, vl); + if (key == NULL) + goto nullpath; + + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) != ':') + goto invalidpath; + ctx->off++; + + value = pack(ctx, vl); + if (value == NULL) + goto nullpath; + + if (ben_dict_set(d, key, value)) { + ben_free(key); + ben_free(value); + ben_free(d); + return ben_oom_ptr(ctx); + } + key = NULL; + value = NULL; + + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) == ',') + ctx->off++; + else if (ben_current_char(ctx) != '}') + goto invalidpath; + } + return d; + +nullpath: + ben_free(d); + ben_free(key); + ben_free(value); + return NULL; + +invalidpath: + ben_free(d); + ben_free(key); + ben_free(value); + return ben_invalid_ptr(ctx); +} + +static struct bencode *pack_list(struct ben_decode_ctx *ctx, va_list *vl) +{ + struct bencode *l = ben_list(); + struct bencode *val = NULL; + + if (l == NULL) + return ben_oom_ptr(ctx); + + ctx->off++; + + while (1) { + if (seek_char(ctx)) + goto nullpath; + + if (ben_current_char(ctx) == ']') { + ctx->off++; + break; + } + val = pack(ctx, vl); + if (val == NULL) + goto nullpath; + + if (ben_list_append(l, val)) { + ben_free(val); + ben_free(l); + return ben_oom_ptr(ctx); + } + val = NULL; + + if (seek_char(ctx)) + goto nullpath; + if (ben_current_char(ctx) == ',') + ctx->off++; + else if (ben_current_char(ctx) != ']') { + ben_free(l); + return ben_invalid_ptr(ctx); + } + } + + return l; + +nullpath: + ben_free(l); + ben_free(val); + return NULL; +} + +static struct bencode *pack(struct ben_decode_ctx *ctx, va_list *vl) +{ + if (seek_char(ctx)) + return ben_insufficient_ptr(ctx); + + switch (ben_current_char(ctx)) { + case '\'': + case '"': + return decode_printed_str(ctx); + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return decode_printed_int(ctx); + case 'F': + case 'T': + return decode_printed_bool(ctx); + case '{': + return pack_dict(ctx, vl); + case '[': + return pack_list(ctx, vl); + case '%': + return pack_value(ctx, vl); + default: + return ben_invalid_ptr(ctx); + } + return NULL; +} + +struct bencode *ben_pack(const char *fmt, ...) +{ + struct ben_decode_ctx ctx = {.data = fmt, .len = strlen(fmt)}; + struct bencode *b; + va_list vl; + va_start(vl, fmt); + b = pack(&ctx, &vl); + va_end(vl); + + /* check for left over characters */ + seek_char(&ctx); + if (ctx.off < ctx.len) { + ben_free(b); + return NULL; + } + return b; +} diff --git a/bencode.h b/bencode.h new file mode 100644 index 0000000..a908bf6 --- /dev/null +++ b/bencode.h @@ -0,0 +1,730 @@ +/* + * libbencodetools + * + * Written by Heikki Orsila and + * Janne Kulmala in 2011. + */ + +#ifndef TYPEVALIDATOR_BENCODE_H +#define TYPEVALIDATOR_BENCODE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Used to verify format strings in compile time */ +#ifdef __GNUC__ +#define BEN_CHECK_FORMAT(args...) __attribute__ ((format( args ))) +#else +#define BEN_CHECK_FORMAT(args...) +#endif + +enum { + BENCODE_BOOL = 1, + BENCODE_DICT, + BENCODE_INT, + BENCODE_LIST, + BENCODE_STR, + BENCODE_USER, +}; + +enum { + BEN_OK = 0, /* No errors. Set to zero. Non-zero implies an error. */ + BEN_INVALID, /* Invalid data was given to decoder */ + BEN_INSUFFICIENT, /* Insufficient amount of data for decoding */ + BEN_NO_MEMORY, /* Memory allocation failed */ + BEN_MISMATCH, /* A given structure did not match unpack format */ +}; + +struct bencode { + char type; +}; + +struct bencode_bool { + char type; + char b; +}; + +struct bencode_dict_node { + long long hash; + struct bencode *key; + struct bencode *value; + size_t next; +}; + +struct bencode_dict { + char type; + char shared; /* non-zero means that the internal data is shared with + other instances and should not be freed */ + size_t n; + size_t alloc; + size_t *buckets; + struct bencode_dict_node *nodes; +}; + +struct bencode_int { + char type; + long long ll; +}; + +struct bencode_list { + char type; + char shared; /* non-zero means that the internal data is shared with + other instances and should not be freed */ + size_t n; + size_t alloc; + struct bencode **values; +}; + +struct bencode_str { + char type; + size_t len; + char *s; +}; + +struct ben_decode_ctx; +struct ben_encode_ctx; + +struct bencode_type { + size_t size; + struct bencode *(*decode) (struct ben_decode_ctx *ctx); + int (*encode) (struct ben_encode_ctx *ctx, const struct bencode *b); + size_t (*get_size) (const struct bencode *b); + void (*free) (struct bencode *b); + int (*cmp) (const struct bencode *a, const struct bencode *b); +}; + +struct bencode_user { + char type; + struct bencode_type *info; +}; + +struct bencode_error { + int error; /* 0 if no errors */ + int line; /* Error line: 0 is the first line */ + size_t off; /* Error offset in bytes from the start */ +}; + +/* Allocate an instance of a user-defined type */ +void *ben_alloc_user(struct bencode_type *type); + +/* + * Try to set capacity of a list or a dict to 'n' objects. + * The function does nothing if 'n' is less than or equal to the number of + * objects in 'b'. That is, nothing happens if n <= ben_{dict|list}_len(b). + * + * This function is used only for advice. The implementation need not obey it. + * + * The function returns 0 if the new capacity is used, otherwise -1. + * + * Note: This can be used to make construction of lists and dicts + * more efficient when the number of inserted items is known in advance. + */ +int ben_allocate(struct bencode *b, size_t n); + +/* + * Returns an identical but a separate copy of structure b. Returns NULL if + * there is no memory to make a copy. The copy is made recursively. + */ +struct bencode *ben_clone(const struct bencode *b); + +/* + * Returns a weak reference copy of structure b. Only a minimum amount of + * data is copied because the returned structure references to the same + * internal data as the original structure. As a result, the original + * structure must remain valid until the copy is destroyed. + * + * This function is used for optimization for special cases. + */ +struct bencode *ben_shared_clone(const struct bencode *b); + +/* + * ben_cmp() is similar to strcmp(). It compares integers, strings and lists + * similar to Python. User-defined types can be also compared. + * Note: an integer is always less than a string. + * + * ben_cmp(a, b) returns a negative value if "a < b", 0 if "a == b", + * or a positive value if "a > b". + * + * Algorithm for comparing dictionaries is: + * If 'a' and 'b' have different number of keys or keys have different values, + * a non-zero value is returned. Otherwise, they have the exact same keys + * and comparison is done in ben_cmp() order of keys. The value for each key + * is compared, and the first inequal value (ben_cmp() != 0) defines the + * return value of the comparison. + * + * Note: recursive dictionaries in depth have the same issues. + */ +int ben_cmp(const struct bencode *a, const struct bencode *b); + +/* Same as ben_cmp(), but the second argument is a C string */ +int ben_cmp_with_str(const struct bencode *a, const char *s); + +/* + * Comparison function suitable for qsort(). Uses ben_cmp(), so this can be + * used to order both integer and string arrays. + */ +int ben_cmp_qsort(const void *a, const void *b); + +/* + * Decode 'data' with 'len' bytes of data. Returns NULL on error. + * The encoded data must be exactly 'len' bytes (not less), otherwise NULL + * is returned. ben_decode2() function supports partial decoding ('len' is + * larger than actual decoded message) and gives more accurate error reports. + */ +struct bencode *ben_decode(const void *data, size_t len); + +/* + * Same as ben_decode(), but allows one to set start offset for decoding with + * 'off' and reports errors more accurately. + * + * '*off' must point to decoding start offset inside 'data'. + * If decoding is successful, '*off' is updated to point to the next byte + * after the decoded message. + * + * If 'error != NULL', it is updated according to the success and error of + * the decoding. BEN_OK is success, BEN_INVALID means invalid data. + * BEN_INSUFFICIENT means data is invalid but could be valid if more data + * was given for decoding. BEN_NO_MEMORY means decoding ran out of memory. + */ +struct bencode *ben_decode2(const void *data, size_t len, size_t *off, int *error); + +/* + * Same as ben_decode2(), but allows one to define user types. + */ +struct bencode *ben_decode3(const void *data, size_t len, size_t *off, int *error, struct bencode_type *types[128]); + +/* + * Same as ben_decode(), but decodes data encoded with ben_print(). This is + * whitespace tolerant, so intended Python syntax can also be read. + * The decoder skips comments that begin with a '#' character. + * The comment starts from '#' character and ends at the end of the same line. + * + * For example, this can be used to read in config files written as a Python + * dictionary. + * + * ben_decode_printed2() fills information about the error in + * struct bencode_error. + * error->error is 0 on success, otherwise it is an error code + * (see ben_decode2()). + * error->line is the line number where error occured. + * error->off is the byte offset of error (approximation). + */ +struct bencode *ben_decode_printed(const void *data, size_t len); +struct bencode *ben_decode_printed2(const void *data, size_t len, size_t *off, struct bencode_error *error); + +/* Get the serialization size of bencode structure 'b' */ +size_t ben_encoded_size(const struct bencode *b); + +/* encode 'b'. Return encoded data with a pointer, and length in '*len' */ +void *ben_encode(size_t *len, const struct bencode *b); + +/* + * encode 'b' into 'data' buffer with at most 'maxlen' bytes. + * Returns the size of encoded data. + */ +size_t ben_encode2(char *data, size_t maxlen, const struct bencode *b); + +/* + * You must use ben_free() for all allocated bencode structures after use. + * If b == NULL, ben_free does nothing. + * + * ben_free() frees all the objects contained within the bencoded structure. + * It recursively iterates over lists and dictionaries and frees objects. + */ +void ben_free(struct bencode *b); + +long long ben_str_hash(const struct bencode *b); +long long ben_int_hash(const struct bencode *b); +long long ben_hash(const struct bencode *b); + +/* Create a string from binary data with len bytes */ +struct bencode *ben_blob(const void *data, size_t len); + +/* Create a boolean from integer */ +struct bencode *ben_bool(int b); + +/* Create an empty dictionary */ +struct bencode *ben_dict(void); + +/* + * Try to locate 'key' in dictionary. Returns the associated value, if found. + * Returns NULL if the key does not exist. + */ +struct bencode *ben_dict_get(const struct bencode *d, const struct bencode *key); + +struct bencode *ben_dict_get_by_str(const struct bencode *d, const char *key); +struct bencode *ben_dict_get_by_int(const struct bencode *d, long long key); + +struct bencode_keyvalue { + struct bencode *key; + struct bencode *value; +}; + +/* + * Returns an array of key-value pairs in key order as defined by ben_cmp(). + * Array elements are struct bencode_keyvalue members. Returns NULL if + * the array can not be allocated or the bencode object is not a dictionary. + * The returned array must be freed by using free(). The length of the + * array can be determined with ben_dict_len(d). + * + * Warning: key and value pointers in the array are pointers to exact same + * objects in the dictionary. Therefore, the dictionary and its key-values + * must exist while the same keys and values are accessed from the array. + */ +struct bencode_keyvalue *ben_dict_ordered_items(const struct bencode *d); + +/* + * Try to locate 'key' in dictionary. Returns the associated value, if found. + * The value must be later freed with ben_free(). Returns NULL if the key + * does not exist. + */ +struct bencode *ben_dict_pop(struct bencode *d, const struct bencode *key); + +struct bencode *ben_dict_pop_by_str(struct bencode *d, const char *key); +struct bencode *ben_dict_pop_by_int(struct bencode *d, long long key); + +/* + * Set 'key' in dictionary to be 'value'. An old value exists for the key + * is freed if it exists. 'key' and 'value' are owned by the dictionary + * after a successful call (one may not call ben_free() for 'key' or + * 'value'). One may free 'key' and 'value' if the call is unsuccessful. + * + * Returns 0 on success, -1 on failure (no memory). + */ +int ben_dict_set(struct bencode *d, struct bencode *key, struct bencode *value); + +/* Same as ben_dict_set(), but the key is a C string */ +int ben_dict_set_by_str(struct bencode *d, const char *key, struct bencode *value); + +/* Same as ben_dict_set(), but the key and value are C strings */ +int ben_dict_set_str_by_str(struct bencode *d, const char *key, const char *value); + +struct bencode *ben_int(long long ll); + +/* Create an empty list */ +struct bencode *ben_list(void); + +/* + * Append 'b' to 'list'. Returns 0 on success, -1 on failure (no memory). + * One may not call ben_free(b) after a successful call, because the list owns + * the object 'b'. + */ +int ben_list_append(struct bencode *list, struct bencode *b); + +int ben_list_append_str(struct bencode *list, const char *s); +int ben_list_append_int(struct bencode *list, long long ll); + +/* Remove and return value at position 'pos' in list */ +struct bencode *ben_list_pop(struct bencode *list, size_t pos); + +/* + * Returns a Python formatted C string representation of 'b' on success, + * NULL on failure. The returned string should be freed with free(). + * + * Note: The string is terminated with '\0'. All instances of '\0' bytes in + * the bencoded data are escaped so that there is only one '\0' byte + * in the generated string at the end. + */ +char *ben_print(const struct bencode *b); + +/* Create a string from C string (note bencode string may contain '\0'. */ +struct bencode *ben_str(const char *s); + +/* Return a human readable explanation of error returned with ben_decode2() */ +const char *ben_strerror(int error); + +/* + * Unpack a Bencoded structure similar to scanf(). Takes a format string and + * a list of pointers as variable arguments. The given b structure is checked + * against the format and values are unpacked using the given specifiers. + * A specifier begins with a percent (%) that follows a string of specifier + * characters documented below. + * The syntax is similar to Python format for recursive data structures, and + * consists of tokens {, }, [, ] with any number of spaces between them. + * The keys of a dictionary are given as literal strings or integers and + * matched against the keys of the Bencoded structure. + * + * Unpack modifiers: + * l The integer is of type long or unsigned long, and the type of the + * argument is expected to be long * or unsigned long *. + * ll The integer is a long long or an unsigned long long, and the + * argument is long long * or unsigned long long *. + * L Same as ll. + * q Same as ll. + * + * Unpack specifiers: + * %ps The Bencode value must be a string and a pointer to a string + * (char **) is expected to be given as arguments. Note, returns a + * reference to the internal string buffer. The returned memory should + * not be freed and it has the same life time as the Bencode string. + * + * %pb Takes any structure and writes a pointer given as an argument. + * The argument is expected to be "struct bencode **". Note, returns a + * reference to the value inside the structure passed to ben_unpack(). + * The returned memory should not be freed and it has the same life + * time as the original structure. + * + * %d The bencode value is expected to be a (signed) integer. The + * preceeding conversion modifiers define the type of the given + * pointer. + + * %u The bencode value is expected to be an unsigned integer. The + * preceeding conversion modifiers define the type of the given + * pointer. + */ +int ben_unpack(const struct bencode *b, const char *fmt, ...) + BEN_CHECK_FORMAT(scanf, 2, 3); + +int ben_unpack2(const struct bencode *b, size_t *off, struct bencode_error *error, const char *fmt, ...) + BEN_CHECK_FORMAT(scanf, 4, 5); + +/* + * Pack a Bencoded structure similar to printf(). Takes a format string and + * a list of values as variable arguments. + * Works similarly to ben_decode_printed(), but allows the string to values + * specifiers which are replaced with values given as arguments. + * A specifier begins with a percent (%) that follows a string of specifier + * characters documented below. + * + * Value modifiers: + * l The integer is of type long or unsigned long. + * ll The integer is a long long or an unsigned long long. + * L Same as ll. + * q Same as ll. + * + * Value specifiers: + * %s A string pointer (char *) expected to be given as argument. A new + * Bencode string is constructed from the given string. + * + * %pb A Bencode structure (struct bencode *) is expected to be given as + * argument. Note, takes ownership of the structure, even when an + * error is returned. + * + * %d Constructs a new integer from the given (signed) integer. The + * preceeding conversion modifiers define the type of the value. + * + * %u Constructs a new integer from the given unsigned integer. The + * preceeding conversion modifiers define the type of the value. + */ +struct bencode *ben_pack(const char *fmt, ...) + BEN_CHECK_FORMAT(printf, 1, 2); + +/* ben_is_bool() returns 1 iff b is a boolean, 0 otherwise */ +static inline int ben_is_bool(const struct bencode *b) +{ + return b->type == BENCODE_BOOL; +} +static inline int ben_is_dict(const struct bencode *b) +{ + return b->type == BENCODE_DICT; +} +static inline int ben_is_int(const struct bencode *b) +{ + return b->type == BENCODE_INT; +} +static inline int ben_is_list(const struct bencode *b) +{ + return b->type == BENCODE_LIST; +} +static inline int ben_is_str(const struct bencode *b) +{ + return b->type == BENCODE_STR; +} +static inline int ben_is_user(const struct bencode *b) +{ + return b->type == BENCODE_USER; +} + +/* + * ben_bool_const_cast(b) returns "(const struct bencode_bool *) b" if the + * underlying object is a boolean, NULL otherwise. + */ +static inline const struct bencode_bool *ben_bool_const_cast(const struct bencode *b) +{ + return b->type == BENCODE_BOOL ? ((const struct bencode_bool *) b) : NULL; +} + +/* + * ben_bool_cast(b) returns "(struct bencode_bool *) b" if the + * underlying object is a boolean, NULL otherwise. + */ +static inline struct bencode_bool *ben_bool_cast(struct bencode *b) +{ + return b->type == BENCODE_BOOL ? ((struct bencode_bool *) b) : NULL; +} + +static inline const struct bencode_dict *ben_dict_const_cast(const struct bencode *b) +{ + return b->type == BENCODE_DICT ? ((const struct bencode_dict *) b) : NULL; +} +static inline struct bencode_dict *ben_dict_cast(struct bencode *b) +{ + return b->type == BENCODE_DICT ? ((struct bencode_dict *) b) : NULL; +} + +static inline const struct bencode_int *ben_int_const_cast(const struct bencode *i) +{ + return i->type == BENCODE_INT ? ((const struct bencode_int *) i) : NULL; +} +static inline struct bencode_int *ben_int_cast(struct bencode *i) +{ + return i->type == BENCODE_INT ? ((struct bencode_int *) i) : NULL; +} + +static inline const struct bencode_list *ben_list_const_cast(const struct bencode *list) +{ + return list->type == BENCODE_LIST ? ((const struct bencode_list *) list) : NULL; +} +static inline struct bencode_list *ben_list_cast(struct bencode *list) +{ + return list->type == BENCODE_LIST ? ((struct bencode_list *) list) : NULL; +} + +static inline const struct bencode_str *ben_str_const_cast(const struct bencode *str) +{ + return str->type == BENCODE_STR ? ((const struct bencode_str *) str) : NULL; +} +static inline struct bencode_str *ben_str_cast(struct bencode *str) +{ + return str->type == BENCODE_STR ? ((struct bencode_str *) str) : NULL; +} + +static inline const struct bencode_user *ben_user_const_cast(const struct bencode *user) +{ + return user->type == BENCODE_USER ? ((const struct bencode_user *) user) : NULL; +} +static inline struct bencode_user *ben_user_cast(struct bencode *user) +{ + return user->type == BENCODE_USER ? ((struct bencode_user *) user) : NULL; +} + +static inline int ben_is_user_type(const struct bencode *b, struct bencode_type *type) +{ + return b->type == BENCODE_USER ? ((const struct bencode_user *) b)->info == type : 0; +} + +static inline const void *ben_user_type_const_cast(const struct bencode *b, struct bencode_type *type) +{ + return (b->type == BENCODE_USER && ((const struct bencode_user *) b)->info == type) ? b : NULL; +} +static inline void *ben_user_type_cast(struct bencode *b, struct bencode_type *type) +{ + return (b->type == BENCODE_USER && ((const struct bencode_user *) b)->info == type) ? b : NULL; +} + +/* Return the number of keys in a dictionary 'b' */ +static inline size_t ben_dict_len(const struct bencode *b) +{ + return ben_dict_const_cast(b)->n; +} + +/* Return the number of items in a list 'b' */ +static inline size_t ben_list_len(const struct bencode *b) +{ + return ben_list_const_cast(b)->n; +} + +/* ben_list_get(list, i) returns object at position i in list */ +static inline struct bencode *ben_list_get(const struct bencode *list, size_t i) +{ + const struct bencode_list *l = ben_list_const_cast(list); + if (i >= l->n) { + fprintf(stderr, "bencode: List index out of bounds\n"); + abort(); + } + return l->values[i]; +} + +/* + * ben_list_set(list, i, b) sets object b to list at position i. + * The old value at position i is freed. + * The program aborts if position i is out of bounds. + */ +void ben_list_set(struct bencode *list, size_t i, struct bencode *b); + +/* Return the number of bytes in a string 'b' */ +static inline size_t ben_str_len(const struct bencode *b) +{ + return ben_str_const_cast(b)->len; +} + +/* Return boolean value (0 or 1) of 'b' */ +static inline int ben_bool_val(const struct bencode *b) +{ + return ben_bool_const_cast(b)->b ? 1 : 0; +} + +/* Return integer value of 'b' */ +static inline long long ben_int_val(const struct bencode *b) +{ + return ben_int_const_cast(b)->ll; +} + +/* + * Note: the string is always zero terminated. Also, the string may + * contain more than one zero. + * bencode strings are not compatible with C strings. + */ +static inline const char *ben_str_val(const struct bencode *b) +{ + return ben_str_const_cast(b)->s; +} + +/* + * ben_list_for_each() is an iterator macro for bencoded lists. + * + * Note, it is not allowed to change the list while iterating except by + * using ben_list_pop_current(). + * + * pos is a size_t. + * + * Example: + * + * size_t pos; + * struct bencode *list = xxx; + * struct bencode *value; + * ben_list_for_each(value, pos, list) { + * inspect(value); + * } + */ +#define ben_list_for_each(value, pos, l) \ + for ((pos) = (size_t) 0; \ + (pos) < (ben_list_const_cast(l))->n && \ + ((value) = ((const struct bencode_list *) (l))->values[(pos)]) != NULL ; \ + (pos)++) + +/* + * ben_list_pop_current() returns and removes the current item at 'pos' + * while iterating the list with ben_list_for_each(). + * It can be used more than once per walk, but only once per item. + * Example below: + * + * Filter out all items from list whose string value does not begin with "foo". + * + * ben_list_for_each(value, pos, list) { + * if (strncmp(ben_str_val(value), "foo", 3) != 0) + * ben_free(ben_list_pop_current(&pos, list)); + * } + */ +static inline struct bencode *ben_list_pop_current(struct bencode *list, + size_t *pos) +{ + struct bencode *value = ben_list_pop(list, *pos); + (*pos)--; + return value; +} + +/* + * ben_dict_for_each() is an iterator macro for bencoded dictionaries. + * + * Note, it is not allowed to change the dictionary while iterating except + * by using ben_dict_pop_current(). + * + * struct bencode *dict = ben_dict(); + * size_t pos; + * struct bencode *key; + * struct bencode *value; + * ben_dict_set_str_by_str(dict, "foo", "bar"); + * + * ben_dict_for_each(key, value, pos, dict) { + * use(key, value); + * } + * + * pos is a size_t. + */ +#define ben_dict_for_each(bkey, bvalue, pos, d) \ + for ((pos) = 0; \ + (pos) < (ben_dict_const_cast(d))->n && \ + ((bkey) = ((const struct bencode_dict *) (d))->nodes[(pos)].key) != NULL && \ + ((bvalue) = ((const struct bencode_dict *) (d))->nodes[(pos)].value) != NULL; \ + (pos)++) + +/* + * ben_dict_pop_current() deletes the current item at 'pos' while iterating + * the dictionary with ben_dict_for_each(). It can be used more than once + * per walk, but only once per item. Example below: + * + * Filter out all items from dictionary whose key does not begin with "foo". + * + * ben_dict_for_each(key, value, pos, dict) { + * if (strncmp(ben_str_val(key), "foo", 3) != 0) + * ben_free(ben_dict_pop_current(dict, &pos)); + * } + */ +struct bencode *ben_dict_pop_current(struct bencode *dict, size_t *pos); + +/* Report an error while decoding. Returns NULL. */ +void *ben_insufficient_ptr(struct ben_decode_ctx *ctx); +void *ben_invalid_ptr(struct ben_decode_ctx *ctx); +void *ben_oom_ptr(struct ben_decode_ctx *ctx); + +/* + * Decode from the current position of 'ctx'. + * + * This function is used to implement decoders for user-defined types. + */ +struct bencode *ben_ctx_decode(struct ben_decode_ctx *ctx); + +/* + * Test whether the input of 'ctx' has at least n bytes left. + * Returns 0 when there is enough bytes left and -1 when there isn't. + * + * This function is used to implement decoders for user-defined types. + */ +int ben_need_bytes(const struct ben_decode_ctx *ctx, size_t n); + +/* + * Returns the character in current position of 'ctx'. + * + * This function is used to implement decoders for user-defined types. + */ +char ben_current_char(const struct ben_decode_ctx *ctx); + +/* + * Get the next n bytes from input. + * Returns pointer to the data or NULL when there aren't enough bytes left. + * + * This function is used to implement decoders for user-defined types. + */ +const char *ben_current_buf(const struct ben_decode_ctx *ctx, size_t n); + +/* + * Increments current position by n. + * + * This function is used to implement decoders for user-defined types. + */ +void ben_skip(struct ben_decode_ctx *ctx, size_t n); + +/* + * Encode to the output of 'ctx'. The size of the encoded data can be obtained + * with ben_encoded_size(). + * + * This function is used to implement encoders for user-defined types. + */ +int ben_ctx_encode(struct ben_encode_ctx *ctx, const struct bencode *b); + +/* + * Append one character to output of 'ctx'. The amount of bytes written to the + * output must be the same as returned by get_size(). + * + * This function is used to implement encoders for user-defined types. + */ +int ben_put_char(struct ben_encode_ctx *ctx, char c); + +/* + * Append data to output of 'ctx'. The amount of bytes written to the output + * must be the same as returned by get_size(). + * + * This function is used to implement encoders for user-defined types. + */ +int ben_put_buffer(struct ben_encode_ctx *ctx, const void *buf, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tfv.c b/tfv.c new file mode 100644 index 0000000..09e5923 --- /dev/null +++ b/tfv.c @@ -0,0 +1,537 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "bencode.h" + +//#define DEBUG + +#ifdef DEBUG +#define dprintf printf +#else +#define dprintf(...) +#endif + +struct file_t { + char *path; + size_t length; + int valid; + int first_piece; + int last_piece; +}; + +static struct torrent_t { + char *name; + size_t length; + size_t piece_length; + int files_count; + struct file_t **files; + void *pieces; +} meta; + +static int process_object(const struct bencode *torrent) { + if (!ben_is_dict(torrent)) + return -1; + + struct bencode *info; + info = ben_dict_get_by_str(torrent, "info"); + if (info && ben_is_dict(info)) { + struct bencode *piece_length = + ben_dict_get_by_str(info, "piece length"); + if (piece_length && ben_is_int(piece_length)) + meta.piece_length = ben_int_val(piece_length); + struct bencode *pieces = ben_dict_get_by_str(info, "pieces"); + if (pieces && ben_is_str(pieces)) { + meta.pieces = malloc(ben_str_len(pieces)); + memcpy(meta.pieces, ben_str_val(pieces), ben_str_len(pieces)); + } + + struct bencode *files = ben_dict_get_by_str(info, "files"); + + if (files && ben_is_list(files)) { + int i; + size_t n; + n = ben_list_len(files); + dprintf("Have %lu files\n", n); + meta.files_count = n; + meta.files = calloc(n, sizeof(struct file_t)); + off_t total = 0; + for (i = 0; i < n; i++) { + struct file_t *fi = malloc(sizeof(struct file_t)); + struct bencode *file = ben_list_get(files, i); + struct bencode *path = ben_dict_get_by_str(file, "path"); + struct bencode *length = + ben_dict_get_by_str(file, "length"); + size_t pn; + int j; + char *fullname = NULL; + pn = ben_list_len(path); + //printf("%p\n", fi); + for (j = 0; j < pn; j++) { + struct bencode *pelem = ben_list_get(path, j); + size_t sn = ben_str_len(pelem); + size_t len; + if (fullname) + strcat(fullname, "/"); + len = strlen(fullname ? fullname : ""); + fullname = realloc(fullname, len + sn + 2); + fullname[len] = '\0'; + strcat(fullname, ben_str_val(pelem)); + } + fi->path = fullname; + fi->valid = -1; + fi->length = ben_int_val(length); + fi->first_piece = total / meta.piece_length; + total += fi->length; + fi->last_piece = total / meta.piece_length; + meta.files[i] = fi; + dprintf("Added %s with %lu bytes [%u] -> [%u]\n", fi->path, + fi->length, fi->first_piece, fi->last_piece); + } + struct bencode *name = ben_dict_get_by_str(info, "name"); + meta.name = strdup(ben_str_val(name)); + } else { + struct bencode *name = ben_dict_get_by_str(info, "name"); + struct bencode *length = ben_dict_get_by_str(info, "length"); + meta.files = NULL; + meta.name = strdup(ben_str_val(name)); + meta.length = ben_int_val(length); + meta.files_count = 0; + } + } else { + //not torrent? + } + + return 0; +} + +/* START code borrowed from bencode-tools: https://github.com/heikkiorsila/bencode-tools.git */ +#define BUFFER_SIZE 4096 + +/* + * xread() is the same as the read(), but it automatically restarts read() + * operations with a recoverable error (EAGAIN and EINTR). xread() + * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. + */ +static ssize_t xread(int fd, void *buf, size_t count) { + ssize_t nr; + while (1) { + nr = read(fd, buf, count); + if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + return nr; + } +} + +static int reallocarray(char **buf, size_t *size) { + size_t newsize = BUFFER_SIZE; + char *p; + if (*buf != NULL) { + if (*size > 0) + newsize = *size; + if (newsize >= (((size_t) -1) / 2)) + return -1; + newsize *= 2; + } + p = realloc(*buf, newsize); + if (p != NULL) { + *buf = p; + *size = newsize; + } + return (*buf != NULL) ? 0 : -1; +} + +static void shift_buffer(char *buf, size_t *size, size_t off) { + assert(off <= *size); + memmove(buf, buf + off, *size - off); + *size -= off; +} + +static int process_buffer(char *buf, size_t *size, size_t len) { + shift_buffer(buf, size, len); + return 0; +} + +static int process(int fd) { + char *buf = NULL; + size_t size = 0; + size_t alloc = 0; + ssize_t ret; + int error; + int needmore = 1; + size_t off = 0; + struct bencode *b; + + while (1) { + if ((alloc - size) == 0 && reallocarray(&buf, &alloc)) { + fprintf(stderr, "bencat: Out of memory\n"); + goto error; + } + + if (needmore) { + ret = xread(fd, buf + size, alloc - size); + if (ret < 0) { + fprintf(stderr, + "bencat: Unexpected error in data stream (%s)\n", + strerror(errno)); + goto error; + } + if (ret == 0) + break; + size += ret; + needmore = 0; + } + + off = 0; + b = ben_decode2(buf, size, &off, &error); + if (b == NULL) { + if (error == BEN_INSUFFICIENT) { + needmore = 1; + continue; + } + fprintf(stderr, + "bencat: Invalid data stream at offset %zu\n", off); + goto error; + } + + assert(off > 0); + + ret = process_buffer(buf, &size, off) | process_object(b); + + ben_free(b); + b = NULL; + + if (ret) + goto error; + } + + if (size > 0) { + fprintf(stderr, "bencat: Incomplete data in stream\n"); + goto error; + } + + free(buf); + return 0; + + error: + free(buf); + return -1; +} + +static int xclose(int fd) { + while (close(fd)) { + if (errno == EINTR) + continue; + return 1; + } + return 0; +} + +/* END code borrowed from bencode-tools */ + +static int check_open(const char *name, off_t length) { + struct stat st; + int err; + + errno = 0; + dprintf("Trying %s\n", name); + err = stat(name, &st); + if (0 == err) + err = (st.st_size != length) ? -2 : open(name, O_RDONLY); + dprintf("check_open(): %d\n", err); + return err; +} + +/* TODO: expand reported reason for failure (e.g. "file not found") + instead of one "bad crc" fits all +*/ +static void log_result(const char *name, int valid) { + const char *erm = "bad crc"; + if (errno) + erm = strerror(errno); + printf("\r%s : %-40s\n", name, valid ? "ok" : erm); +} + +static void log_progress(const char *name, off_t offset, off_t length) { + printf("\r%s : %lu%%", name, offset * 100 / length); + fflush(stdout); +} + +static int validate_single(struct torrent_t *meta) { + int err, i, fd; + unsigned char *pbuf, *phash; + ssize_t bread; + + fd = check_open(meta->name, meta->length); + err = 0; + if (err < 0) + err = fd; + i = 0; + pbuf = malloc(meta->piece_length); + if (!pbuf) + err = -5; + while (!err) { + unsigned char rhash[SHA_DIGEST_LENGTH]; + phash = meta->pieces + i++ * SHA_DIGEST_LENGTH; + bread = read(fd, pbuf, meta->piece_length); + if (bread) { + SHA1(pbuf, bread, rhash); + if (0 != memcmp(phash, rhash, SHA_DIGEST_LENGTH)) + err = -3; + log_progress(meta->name, lseek(fd, 0L, SEEK_CUR), + meta->length); + } else { + if (lseek(fd, 0L, SEEK_CUR) != meta->length) + err = -4; + break; + } + } + + if (pbuf) + free(pbuf); + + return err; + +} + +static int verify(void) { + unsigned char *pbuf; + int si, i, err; + int pix, fix; + int fd; + int pvalid; + off_t poff; + int sok, sbad; + int eof; + int next; + char cwd[1024]; + + err = 0; + + dprintf("Piece length: %lu\n", meta.piece_length); + + if (0 == meta.files_count) { + err = validate_single(&meta); + log_result(meta.name, !err); + return err; + } + + pbuf = malloc(meta.piece_length); + if (!pbuf) { + for (i = 0; i < meta.files_count; i++) { + free(meta.files[i]->path); + free(meta.files[i]); + } + free(meta.files); + return -5; + } + + getcwd(cwd, sizeof cwd - 1); + if (meta.files_count && 0 == access(meta.name, X_OK)) + chdir(meta.name); +//----------------------------------------- + pix = poff = next = fix = 0; + fd = -1; + off_t total = 0; + while (fix < meta.files_count) { + pvalid = -1; //initial state: unknown +#if 1 + if (0 == meta.files[fix]->valid) { //skip directly to the next piece + //corner case: it's the last file so we can and things here + if (fix == meta.files_count - 1) + break; + pix = meta.files[fix]->last_piece; + dprintf("Skipping to piece %u\n", pix); + if (fd >= 0) { + off_t roff; + roff = total % meta.piece_length; + dprintf + ("We have open file, length=%lu, poff=%lu, roff=%lu, rpos=%lu\n", + meta.files[fix]->length, poff, roff, lseek(fd, 0, + SEEK_CUR)); + lseek(fd, -roff, SEEK_END); + poff = 0; + dprintf("We have open file, poff=%lu, rpos=%lu\n", poff, + lseek(fd, 0, SEEK_CUR)); + } + } +#endif + if (next) { + fix++; + next = 0; + continue; + } + + dprintf("Validating piece %u\n", pix); + poff %= meta.piece_length; + si = fix; + while ((pix >= meta.files[fix]->first_piece + && pix <= meta.files[fix]->last_piece)) { + + + dprintf("[%u]-[%u]-[%u]\n", meta.files[fix]->first_piece, pix, + meta.files[fix]->last_piece); + + if (pix == meta.files[fix]->first_piece) { + if (fd >= 0) { + close(fd); + fd = -1; + } + fd = check_open(meta.files[fix]->path, + meta.files[fix]->length); + + if (poff) { //Have some piece data from a previous file + /* + This is a no-op if all files are present and have correct length + but will fix offset into a full piece if some files are missing, + have incorrect length or cannot be opened for any other reason. + The resulting frankenpiece will never validate, of course. + */ + off_t toff = total % meta.piece_length; + dprintf("Adjusted poff [%lu -> %lu]\n", poff, toff); + poff = toff; + } + + total += meta.files[fix]->length; + + if (fd < 0) { + meta.files[fix]->valid = 0; + log_result(meta.files[fix]->path, 0); + next = 1; + break; + } + + } + + off_t rem, bread; + rem = meta.piece_length - poff; + dprintf("Reading %lu bytes @ %lu pos\n", rem, poff); + bread = read(fd, pbuf + poff, rem); + dprintf("Got %lu\n", bread); + if (0 == bread) { + //panic(); + } + log_progress(meta.files[fix]->path, lseek(fd, 0L, SEEK_CUR), + meta.files[fix]->length); + + if (bread < 0) { + meta.files[fix]->valid = 0; + log_result(meta.files[fix]->path, 0); + next = 1; + break; + } + + poff += bread; + eof = (lseek(fd, 0L, SEEK_CUR) == meta.files[fix]->length); + + if (poff >= meta.piece_length + || (fix == meta.files_count - 1 && eof)) { + + unsigned char rhash[SHA_DIGEST_LENGTH]; + unsigned char *phash = + meta.pieces + (pix * SHA_DIGEST_LENGTH); + SHA1(pbuf, poff, rhash); + dprintf("%02X%02X..%02X%02X vs. ", phash[0], phash[1], + phash[18], phash[19]); + dprintf("%02X%02X..%02X%02X\n", rhash[0], rhash[1], + rhash[18], rhash[19]); + pvalid = (0 == memcmp(phash, rhash, SHA_DIGEST_LENGTH)); + + for (i = si; i <= fix; i++) { + //for any file for which the judgement has not been passed + if (-1 == meta.files[i]->valid) { + switch (pvalid) { + case 0: //bad piece invalidates anywhere + if ((pix >= meta.files[i]->first_piece + || pix <= meta.files[i]->last_piece)) + meta.files[i]->valid = 0; + break; + case 1: //good piece only validates if it's the last + if (meta.files[i]->last_piece == pix) + meta.files[i]->valid = 1; + break; + default: + dprintf("wtf?\n"); + break; + } + //report the change in file's validity + if (-1 != meta.files[i]->valid) + log_result(meta.files[i]->path, + meta.files[i]->valid); + } + } + + } //end if have full piece data + + if (eof) + fix++; + + if (pvalid != -1) + break; + } + dprintf("Finished piece %u with status %d, file index: %u\n", pix, + pvalid, fix); + pix++; + } + +//----------------------------------------- + + if (fd >= 0) { + close(fd); + fd = -1; + } + + for (sok = sbad = i = 0; i < meta.files_count; i++) { + if (1 == meta.files[i]->valid) + sok++; + else + sbad++; + free(meta.files[i]->path); + free(meta.files[i]); + } + free(meta.files); + free(pbuf); + printf("%u files, %u ok, %u bad crc\n", meta.files_count, sok, sbad); + chdir(cwd); + return err; +} + +int main(void) { + + int err; + int i; + glob_t g; + + err = glob("*.torrent", 0, 0, &g); + if (0 == err) + for (i = 0; i < g.gl_pathc; ++i) { + printf("Using %s\n", g.gl_pathv[i]); + err = -1; + int fd = open(g.gl_pathv[i], O_RDONLY); + if (fd < 0) + break; + meta.name = meta.pieces = NULL; + err = process(fd); + xclose(fd); + if (0 == err) + verify(); + if (meta.name) + free(meta.name); + if (meta.pieces) + free(meta.pieces); + } + globfree(&g); + return err; +}