From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Tue, 10 Aug 2010 14:50:11 +0400 From: Alexey Tourbin To: ALT Linux Team development discussions Message-ID: <20100810105011.GB21522@imap.altlinux.org> References: <20090421133755.GB28945@snowwhite.immo> <20090421142453.GC28945@snowwhite.immo> <20090503175514.GF6103@wrars-comp.wrarsdomain> <20100805170000.GA21853@wo.int.altlinux.org> <20100807032908.GA11133@shutemov.name> <20100809231453.GA8352@shutemov.name> Mime-Version: 1.0 Content-Type: text/plain; charset=koi8-r Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <20100806154328.GA6231@atlas.home> Subject: [devel] base2 <-> base62 X-BeenThere: devel@lists.altlinux.org X-Mailman-Version: 2.1.12 Precedence: list Reply-To: ALT Linux Team development discussions List-Id: ALT Linux Team development discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 10 Aug 2010 10:50:11 -0000 Archived-At: List-Archive: List-Post: vsu: > > Нужно учитывать, что значение 61 (Z) может идти два или более раза > > подряд. Например, следующий bitv[] кодируется в "ZZ1". Это значит, > > что мы не можем сразу же распаковать "Z", посмотрев на следующую букву. > > На самом деле можем, если упаковывать биты в буквы, начиная со > старшего; тогда, прочитав после "Z" ещё "Z", которое в данном случае > обозначает 1111(01|10|11), можно использовать старшие 11, не > заглядывая далее. I didn't quite get your idea. Can't you elaborate please (possibly using the code below)? But I've got another (possibly similar) idea. You see, for values 61, 62, and 63 two hight bits are set (to "11"). Other high bits combinations, which are "00", "01", and "10", can be used to amend previous value, which is what we need. So the code goes like this. #include #include #include #define xrealloc realloc #define AUTO_REALLOC(ptr, size, delta) \ do { \ if (((size) & ((delta) - 1)) == 0) \ ptr = xrealloc((ptr), sizeof(*(ptr)) * ((size) + (delta))); \ } while (0) struct uintcv { unsigned int c; unsigned int *v; }; struct charcv { unsigned int c; char *v; }; const char *bits_to_base62(int bitc, char bitv[]) { int base62c = 0; char *base62v = NULL; void put_char(int c) { AUTO_REALLOC(base62v, base62c, 1024); base62v[base62c++] = c; } void put_digit(int c) { if (c < 10) put_char(c + '0'); else if (c < 36) put_char(c - 10 + 'a'); else if (c < 62) put_char(c - 36 + 'A'); } int i; int bits6 = 0; int num6b = 0; for (i = 0; i < bitc; i++) { if (bits6 < 6) num6b |= bitv[i] << (5 - bits6++); if (bits6 == 6) { switch (num6b) { case 61: put_digit(61); // extra "00" high bits bits6 = 2; num6b = 0; break; case 62: put_digit(61); // extra "01" hight bits bits6 = 2; num6b = 16; break; case 63: put_digit(61); // extra "10" hight bits bits6 = 2; num6b = 32; break; default: assert(num6b < 61); put_digit(num6b); bits6 = 0; num6b = 0; break; } } } if (bits6) { assert(num6b < 61); put_digit(num6b); } put_char(0); return base62v; } void base62_to_bits(const char *base62, struct charcv *bits) { int c2d(int c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'z') return c - 'a' + 10; if (c >= 'A' && c <= 'Z') return c - 'A' + 36; return -1; } void put_bit(char bit) { AUTO_REALLOC(bits->v, bits->c, 1024); bits->v[bits->c++] = bit; } void put6bits(int c) { put_bit((c >> 5) & 1); put_bit((c >> 4) & 1); put_bit((c >> 3) & 1); put_bit((c >> 2) & 1); put_bit((c >> 1) & 1); put_bit((c >> 0) & 1); } void put4bits(int c) { put_bit((c >> 3) & 1); put_bit((c >> 2) & 1); put_bit((c >> 1) & 1); put_bit((c >> 0) & 1); } int c; while ((c = *base62++)) { int d = c2d(c); assert(d >= 0); if (d == 61) { c = *base62++; assert(c); d = c2d(c); assert(d >= 0); switch (d & 48) { case 0: put6bits(61); break; case 16: put6bits(62); break; case 32: put6bits(63); break; default: assert(0); break; } put4bits(d); } else { put6bits(d); } } return; } int main() { char bitv[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, }; int bitc = sizeof(bitv)/sizeof(*bitv); const char *base62 = bits_to_base62(bitc, bitv); fprintf(stderr, "base62=%s\n", base62); int i; struct charcv bits = {0}; base62_to_bits(base62, &bits); fprintf(stderr, "bitc=%d bits.c=%d\n", bitc, bits.c); assert(bitc <= bits.c); for (i = 0; i < bitc; i++) assert(bitv[i] == bits.v[i]); return 0; } Note that there is still a possibility not to deal with high bits, e.g. 1) Let Z = 11111111; or 2) Let special value be 0 = 000000. So, after all, what do you think? Isn't there a better way to handle base62 somehow? Or shall we resort to base64? We'll need it soon. The main requirement is that the decoder be simple and robust (since it is going to be executed countless times during e.g. unmets check).