From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on sa.int.altlinux.org X-Spam-Level: X-Spam-Status: No, score=-1.5 required=5.0 tests=BAYES_00,DNS_FROM_OPENWHOIS autolearn=no version=3.2.5 From: "Kirill A. Shutsemov" To: "Dmitry V. Levin" , Alexey Tourbin Date: Tue, 16 Nov 2010 17:56:38 +0200 Message-Id: <1289923002-14132-5-git-send-email-kirill@shutemov.name> X-Mailer: git-send-email 1.7.2.3 In-Reply-To: <1289923002-14132-1-git-send-email-kirill@shutemov.name> References: <1289923002-14132-1-git-send-email-kirill@shutemov.name> Cc: "Alexey I. Froloff" , devel@lists.altlinux.org, Alexey Gladkov , "Kirill A. Shutemov" Subject: [devel] [PATCH 4/8] set.c: slightly reformat code to increase its readability X-BeenThere: devel@lists.altlinux.org X-Mailman-Version: 2.1.12 Precedence: list Reply-To: ALT Linux Team development discussions List-Id: ALT Linux Team development discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Nov 2010 15:57:04 -0000 Archived-At: List-Archive: List-Post: From: Kirill A. Shutemov Signed-off-by: Kirill A. Shutemov --- lib/set.c | 212 +++++++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 149 insertions(+), 63 deletions(-) diff --git a/lib/set.c b/lib/set.c index 03c8149..9dd3eb4 100644 --- a/lib/set.c +++ b/lib/set.c @@ -61,48 +61,54 @@ int encode_base62(int bitc, const char *bitv, char *base62) int bits2 = 0; /* number of high bits set */ int bits6 = 0; /* number of regular bits set */ int num6b = 0; /* pending 6-bit number */ + while (bitc-- > 0) { assert(bits6 + bits2 < 6); num6b |= (*bitv++ << bits6++); - if (bits6 + bits2 == 6) { - switch (num6b) { - case 61: - /* escape */ - base62 = put_digit(base62, 61); - /* extra "00...." high bits (in the next character) */ - bits2 = 2; - bits6 = 0; - num6b = 0; - break; - case 62: - base62 = put_digit(base62, 61); - /* extra "01...." hight bits */ - bits2 = 2; - bits6 = 0; - num6b = 16; - break; - case 63: - base62 = put_digit(base62, 61); - /* extra "10...." hight bits */ - bits2 = 2; - bits6 = 0; - num6b = 32; - break; - default: - assert(num6b < 61); - base62 = put_digit(base62, num6b); - bits2 = 0; - bits6 = 0; - num6b = 0; - break; - } + + if (bits6 + bits2 != 6) + continue; + + switch (num6b) { + case 61: + /* escape */ + base62 = put_digit(base62, 61); + /* extra "00...." high bits (in the next character) */ + bits2 = 2; + bits6 = 0; + num6b = 0; + break; + case 62: + base62 = put_digit(base62, 61); + /* extra "01...." hight bits */ + bits2 = 2; + bits6 = 0; + num6b = 16; + break; + case 63: + base62 = put_digit(base62, 61); + /* extra "10...." hight bits */ + bits2 = 2; + bits6 = 0; + num6b = 32; + break; + default: + assert(num6b < 61); + base62 = put_digit(base62, num6b); + bits2 = 0; + bits6 = 0; + num6b = 0; + break; } } + if (bits6 + bits2) { assert(num6b < 61); base62 = put_digit(base62, num6b); } + *base62 = '\0'; + return base62 - base62_start; } @@ -111,6 +117,7 @@ static inline int decode_base62_size(const char *base62) { int len = strlen(base62); + /* Each character will fill at most 6 bits. */ return (len << 2) + (len << 1); } @@ -124,6 +131,7 @@ int char_to_num(int c) return c - 'a' + 10; if (c >= 'A' && c <= 'Z') return c - 'A' + 36; + return -1; } @@ -157,37 +165,44 @@ int decode_base62(const char *base62, char *bitv) { char *bitv_start = bitv; int c; + while ((c = *base62++)) { int num6b = char_to_num(c); + if (num6b < 0) return -1; - if (num6b == 61) { - c = *base62++; - if (c == 0) - return -2; - num6b = char_to_num(c); - if (num6b < 0) - return -3; - switch (num6b & (16 + 32)) { - case 0: - bitv = put6bits(bitv, 61); - break; - case 16: - bitv = put6bits(bitv, 62); - break; - case 32: - bitv = put6bits(bitv, 63); - break; - default: - return -4; - break; - } - bitv = put4bits(bitv, num6b); - } - else { + + if (num6b != 61) { bitv = put6bits(bitv, num6b); + continue; + } + + c = *base62++; + if (c == 0) + return -2; + + num6b = char_to_num(c); + if (num6b < 0) + return -3; + + switch (num6b & (16 + 32)) { + case 0: + bitv = put6bits(bitv, 61); + break; + case 16: + bitv = put6bits(bitv, 62); + break; + case 32: + bitv = put6bits(bitv, 63); + break; + default: + return -4; + break; } + + bitv = put4bits(bitv, num6b); } + return bitv - bitv_start; } @@ -254,8 +269,10 @@ static inline int log2i(int n) { int m = 0; + while (n >>= 1) m++; + return m; } @@ -268,11 +285,14 @@ int encode_golomb_Mshift(int c, int bpp) * Recheck "Compression and coding algorithms" by Moffat & Turpin. */ int Mshift = bpp - log2i(c) - 1; + /* Adjust out-of-range values. */ if (Mshift < 7) Mshift = 7; + if (Mshift > 31) Mshift = 31; + assert(Mshift < bpp); return Mshift; } @@ -294,20 +314,25 @@ int encode_golomb(int c, const unsigned *v, int Mshift, char *bitv) { char *bitv_start = bitv; const unsigned mask = (1 << Mshift) - 1; + while (c > 0) { c--; unsigned v0 = *v++; int i; /* first part: variable-length sequence */ unsigned q = v0 >> Mshift; + for (i = 0; i < (int)q; i++) *bitv++ = 0; *bitv++ = 1; + /* second part: lower Mshift bits */ unsigned r = v0 & mask; + for (i = 0; i < Mshift; i++) *bitv++ = (r >> i) & 1; } + return bitv - bitv_start; } @@ -327,11 +352,13 @@ static int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v) { unsigned *v_start = v; + /* next value */ while (bitc > 0) { /* first part */ unsigned q = 0; char bit = 0; + while (bitc > 0) { bitc--; bit = *bitv++; @@ -340,15 +367,19 @@ int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v) else break; } + /* trailing zero bits in the input are okay */ if (bitc == 0 && bit == 0) break; + /* otherwise, incomplete value is not okay */ if (bitc < Mshift) return -1; + /* second part */ unsigned r = 0; int i; + for (i = 0; i < Mshift; i++) { bitc--; if (*bitv++) @@ -357,6 +388,7 @@ int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v) /* the value */ *v++ = (q << Mshift) | r; } + return v - v_start; } @@ -412,8 +444,10 @@ void test_golomb(void) static void encode_delta(int c, unsigned *v) { - assert(c > 0); unsigned int v0 = *v++; + + assert(c > 0); + while (--c > 0) { *v -= v0; v0 += *v++; @@ -423,8 +457,10 @@ void encode_delta(int c, unsigned *v) static void decode_delta(int c, unsigned *v) { - assert(c > 0); unsigned int v0 = *v++; + + assert(c > 0); + while (--c > 0) { *v += v0; v0 = *v++; @@ -469,10 +505,12 @@ int cmpv(const void *arg1, const void *arg2) { unsigned v1 = *(unsigned *) arg1; unsigned v2 = *(unsigned *) arg2; + if (v1 > v2) return 1; if (v1 < v2) return -1; + return 0; } @@ -486,11 +524,13 @@ static int uniqv(int c, unsigned *v) { int i, j; + for (i = 0, j = 0; i < c; i++) { while (i + 1 < c && v[i] == v[i+1]) i++; v[j++] = v[i]; } + assert(j <= c); return j; } @@ -535,6 +575,7 @@ int encode_set_size(int c, int bpp) { int Mshift = encode_golomb_Mshift(c, bpp); int bitc = encode_golomb_size(c, Mshift); + /* two leading characters are special */ return 2 + encode_base62_size(bitc); } @@ -546,27 +587,35 @@ int encode_set(int c, unsigned *v, int bpp, char *base62) int Mshift = encode_golomb_Mshift(c, bpp); int bitc = encode_golomb_size(c, Mshift); char bitv[bitc]; + /* bpp */ if (bpp < 10 || bpp > 32) return -1; *base62++ = bpp - 7 + 'a'; + /* golomb parameter */ if (Mshift < 7 || Mshift > 31) return -2; *base62++ = Mshift - 7 + 'a'; + /* delta */ encode_delta(c, v); + /* golomb */ bitc = encode_golomb(c, v, Mshift, bitv); + #ifdef SELF_TEST decode_delta(c, v); #endif + if (bitc < 0) return -3; + /* base62 */ int len = encode_base62(bitc, bitv, base62); if (len < 0) return -4; + return 2 + len; } @@ -575,17 +624,23 @@ int decode_set_init(const char *str, int *pbpp, int *pMshift) { /* 7..32 values encoded with 'a'..'z' */ int bpp = *str++ + 7 - 'a'; + if (bpp < 10 || bpp > 32) return -1; + /* golomb parameter */ int Mshift = *str++ + 7 - 'a'; + if (Mshift < 7 || Mshift > 31) return -2; + if (Mshift >= bpp) return -3; + /* no empty sets for now */ if (*str == '\0') return -4; + *pbpp = bpp; *pMshift = Mshift; return 0; @@ -606,12 +661,15 @@ int decode_set(const char *str, int Mshift, unsigned *v) /* base62 */ char bitv[decode_base62_size(str)]; int bitc = decode_base62(str, bitv); + if (bitc < 0) return -1; + /* golomb */ int c = decode_golomb(bitc, bitv, Mshift, v); if (c < 0) return -2; + /* delta */ decode_delta(c, v); return c; @@ -621,8 +679,10 @@ static int downsample_set(int c, unsigned *v, int bpp) { unsigned mask = (1 << bpp) - 1; + maskv(c, v, mask); sortv(c, v); + return uniqv(c, v); } @@ -675,23 +735,29 @@ int rpmsetcmp(const char *str1, const char *str2) str1 += 4; if (strncmp(str2, "set:", 4) == 0) str2 += 4; + /* initialize decoding */ int bpp1, Mshift1; int bpp2, Mshift2; + if (decode_set_init(str1, &bpp1, &Mshift1) < 0) return -3; if (decode_set_init(str2, &bpp2, &Mshift2) < 0) return -4; + /* make room for hash values */ unsigned v1[decode_set_size(str1, Mshift1)]; unsigned v2[decode_set_size(str2, Mshift2)]; + /* decode hash values */ int c1 = decode_set(str1, Mshift1, v1); if (c1 < 0) return -3; + int c2 = decode_set(str2, Mshift2, v2); if (c2 < 0) return -4; + /* adjust for comparison */ if (bpp1 > bpp2) { bpp1 = bpp2; @@ -701,28 +767,30 @@ int rpmsetcmp(const char *str1, const char *str2) bpp2 = bpp1; c2 = downsample_set(c2, v2, bpp2); } + /* compare */ int ge = 1; int le = 1; int i1 = 0, i2 = 0; - while (i1 < c1 && i2 < c2) + while (i1 < c1 && i2 < c2) { if (v1[i1] < v2[i2]) { le = 0; i1++; - } - else if (v1[i1] > v2[i2]) { + } else if (v1[i1] > v2[i2]) { ge = 0; i2++; - } - else { + } else { i1++; i2++; } + } + /* return */ if (i1 < c1) le = 0; if (i2 < c2) ge = 0; + if (le && ge) return 0; if (ge) @@ -751,16 +819,20 @@ struct set { struct set *set_new() { struct set *set = xmalloc(sizeof *set); + set->c = 0; set->sv = NULL; + return set; } void set_add(struct set *set, const char *sym) { const int delta = 1024; + if ((set->c & (delta - 1)) == 0) set->sv = xrealloc(set->sv, sizeof(*set->sv) * (set->c + delta)); + set->sv[set->c].s = xstrdup(sym); set->sv[set->c].v = 0; set->c++; @@ -770,8 +842,10 @@ struct set *set_free(struct set *set) { if (set) { int i; + for (i = 0; i < set->c; i++) set->sv[i].s = _free(set->sv[i].s); + set->sv = _free(set->sv); } return NULL; @@ -783,14 +857,17 @@ unsigned int jenkins_hash(const char *str) { unsigned int hash = 0x9e3779b9; const unsigned char *p = (const unsigned char *) str; + while (*p) { hash += *p++; hash += (hash << 10); hash ^= (hash >> 6); } + hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); + return hash; } @@ -799,10 +876,13 @@ int cmp_sv(const void *arg1, const void *arg2) { struct sv *sv1 = (struct sv *) arg1; struct sv *sv2 = (struct sv *) arg2; + if (sv1->v > sv2->v) return 1; + if (sv2->v > sv1->v) return -1; + return 0; } @@ -816,12 +896,15 @@ const char *set_fini(struct set *set, int bpp) if (bpp > 32) return NULL; unsigned mask = (bpp < 32) ? (1u << bpp) - 1 : ~0u; + /* hash sv strings */ int i; for (i = 0; i < set->c; i++) set->sv[i].v = jenkins_hash(set->sv[i].s) & mask; + /* sort by hash value */ qsort(set->sv, set->c, sizeof *set->sv, cmp_sv); + /* warn on hash collisions */ for (i = 0; i < set->c - 1; i++) { if (set->sv[i].v != set->sv[i+1].v) @@ -831,15 +914,18 @@ const char *set_fini(struct set *set, int bpp) fprintf(stderr, "warning: hash collision: %s %s\n", set->sv[i].s, set->sv[i+1].s); } + /* encode */ unsigned v[set->c]; for (i = 0; i < set->c; i++) v[i] = set->sv[i].v; + int c = uniqv(set->c, v); char base62[encode_set_size(c, bpp)]; int len = encode_set(c, v, bpp, base62); if (len < 0) return NULL; + return xstrdup(base62); } -- 1.7.3.2