ALT Linux Team development discussions
 help / color / mirror / Atom feed
From: Alexey Tourbin <at@altlinux.ru>
To: ALT Linux Team development discussions <devel@lists.altlinux.org>
Subject: [devel]  base2 <-> base62
Date: Tue, 10 Aug 2010 14:50:11 +0400
Message-ID: <20100810105011.GB21522@imap.altlinux.org> (raw)
In-Reply-To: <20100806154328.GA6231@atlas.home>

vsu:
> > Нужно учитывать, что значение 61 (Z) может идти два или более раза
> > подряд.  Например, следующий bitv[] кодируется в "ZZ1".  Это значит,
> > что мы не можем сразу же распаковать "Z", посмотрев на следующую букву.
> 																	
> На самом деле можем, если упаковывать биты в буквы, начиная со
> старшего; тогда, прочитав после "Z" ещё "Z", которое в данном случае
> обозначает 1111(01|10|11), можно использовать старшие 11, не
> заглядывая далее.

I didn't quite get your idea.  Can't you elaborate please (possibly
using the code below)?

But I've got another (possibly similar) idea.  You see, for values
61, 62, and 63 two hight bits are set (to "11").  Other high bits
combinations, which are "00", "01", and "10", can be used to amend
previous value, which is what we need.

So the code goes like this.

    #include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>

    #define xrealloc realloc
    #define AUTO_REALLOC(ptr, size, delta) \
	do { \
	    if (((size) & ((delta) - 1)) == 0) \
		ptr = xrealloc((ptr), sizeof(*(ptr)) * ((size) + (delta))); \
	} while (0)


    struct uintcv {
	unsigned int c;
	unsigned int *v;
    };

    struct charcv {
	unsigned int c;
	char *v;
    };

    const char *bits_to_base62(int bitc, char bitv[])
    {
	int base62c = 0;
	char *base62v = NULL;
	void put_char(int c)
	{
	    AUTO_REALLOC(base62v, base62c, 1024);
	    base62v[base62c++] = c;
	}
	void put_digit(int c)
	{
	    if (c < 10)
		put_char(c + '0');
	    else if (c < 36)
		put_char(c - 10 + 'a');
	    else if (c < 62)
		put_char(c - 36 + 'A');
	}
	int i;
	int bits6 = 0;
	int num6b = 0;
	for (i = 0; i < bitc; i++) {
	    if (bits6 < 6)
		num6b |= bitv[i] << (5 - bits6++);
	    if (bits6 == 6) {
		switch (num6b) {
		case 61:
		    put_digit(61);
		    // extra "00" high bits
		    bits6 = 2;
		    num6b = 0;
		    break;
		case 62:
		    put_digit(61);
		    // extra "01" hight bits
		    bits6 = 2;
		    num6b = 16;
		    break;
		case 63:
		    put_digit(61);
		    // extra "10" hight bits
		    bits6 = 2;
		    num6b = 32;
		    break;
		default:
		    assert(num6b < 61);
		    put_digit(num6b);
		    bits6 = 0;
		    num6b = 0;
		    break;
		}
	    }
	}
	if (bits6) {
	    assert(num6b < 61);
	    put_digit(num6b);
	}
	put_char(0);

	return base62v;
    }

    void base62_to_bits(const char *base62, struct charcv *bits)
    {
	int c2d(int c)
	{
	    if (c >= '0' && c <= '9')
		return c - '0';
	    if (c >= 'a' && c <= 'z')
		return c - 'a' + 10;
	    if (c >= 'A' && c <= 'Z')
		return c - 'A' + 36;
	    return -1;
	}

	void put_bit(char bit)
	{
	    AUTO_REALLOC(bits->v, bits->c, 1024);
	    bits->v[bits->c++] = bit;
	}

	void put6bits(int c)
	{
	    put_bit((c >> 5) & 1);
	    put_bit((c >> 4) & 1);
	    put_bit((c >> 3) & 1);
	    put_bit((c >> 2) & 1);
	    put_bit((c >> 1) & 1);
	    put_bit((c >> 0) & 1);
	}

	void put4bits(int c)
	{
	    put_bit((c >> 3) & 1);
	    put_bit((c >> 2) & 1);
	    put_bit((c >> 1) & 1);
	    put_bit((c >> 0) & 1);
	}

	int c;
	while ((c = *base62++)) {
	    int d = c2d(c);
	    assert(d >= 0);
	    if (d == 61) {
		c = *base62++;
		assert(c);
		d = c2d(c);
		assert(d >= 0);
		switch (d & 48) {
		case 0:
		    put6bits(61);
		    break;
		case 16:
		    put6bits(62);
		    break;
		case 32:
		    put6bits(63);
		    break;
		default:
		    assert(0);
		    break;
		}
		put4bits(d);
	    }
	    else {
		put6bits(d);
	    }
	}
	return;

    }

    int main()
    {
	char bitv[] = {
	    1, 1, 1, 1, 1, 1,
	    1, 1, 1, 1, 1, 0,
	};
	int bitc = sizeof(bitv)/sizeof(*bitv);
	const char *base62 = bits_to_base62(bitc, bitv);
	fprintf(stderr, "base62=%s\n", base62);

	int i;
	struct charcv bits = {0};
	base62_to_bits(base62, &bits);
	fprintf(stderr, "bitc=%d bits.c=%d\n", bitc, bits.c);
	assert(bitc <= bits.c);
	for (i = 0; i < bitc; i++)
	    assert(bitv[i] == bits.v[i]);

	return 0;
    }

Note that there is still a possibility not to deal with high bits, e.g.
1) Let Z = 11111111; or
2) Let special value be 0 = 000000.

So, after all, what do you think?  Isn't there a better way to handle
base62 somehow?  Or shall we resort to base64?  We'll need it soon.
The main requirement is that the decoder be simple and robust (since
it is going to be executed countless times during e.g. unmets check).


  reply	other threads:[~2010-08-10 10:50 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-04 21:18 Alexey Tourbin
2010-08-04 21:26 ` Dmitry V. Levin
2010-08-04 21:38   ` Alexey Tourbin
2010-08-04 22:05     ` Dmitry V. Levin
2010-08-05  7:41       ` Alexey Tourbin
2010-08-05 12:44         ` Денис Смирнов
2010-08-06 13:17         ` Alexey Tourbin
2010-08-06 14:26           ` Alexey Tourbin
2010-08-06 15:43             ` Sergey Vlasov
2010-08-10 10:50               ` Alexey Tourbin [this message]
2010-08-05 13:07 ` Alexander Bokovoy
2010-08-05 14:48   ` Alexey Tourbin
  -- strict thread matches above, loose matches on Subject: below --
2009-04-16 12:27 [devel] *-gdb пакеты Max Ivanov
2009-04-21 13:37 ` Pavlov Konstantin
2009-04-21 14:24   ` [devel] автогенерация debug-пакетов в rpm Pavlov Konstantin
2009-04-21 18:18     ` Alexey Tourbin
2009-04-21 18:28       ` Alexey I. Froloff
2009-04-21 18:32       ` Pavlov Konstantin
2009-04-21 20:28         ` Alexey Tourbin
2009-05-02 17:24           ` Andrey Rahmatullin
2009-04-21 20:34       ` Mikhail Gusarov
2009-04-27  8:58     ` Денис Смирнов
2009-04-27 22:01       ` Хихин Руслан
2009-04-28  6:23       ` Slava Semushin
2009-05-03 17:55     ` Andrey Rahmatullin
2010-08-05 17:00       ` Dmitry V. Levin
2010-08-07  3:29         ` Kirill A. Shutemov
2010-08-07  9:36           ` Andrey Rahmatullin
2010-08-07 11:05             ` Kirill A. Shutemov
2010-08-07 14:11               ` Andrey Rahmatullin
2010-08-08  4:56                 ` Денис Смирнов
2010-08-08  7:27                   ` Andrey Rahmatullin
2010-08-08 16:32                     ` Денис Смирнов
2010-08-07 16:49               ` Alexey Tourbin
2010-08-07 18:38               ` Michael Shigorin
2010-08-07 18:41                 ` Andrey Rahmatullin
2010-08-07 18:50                   ` Michael Shigorin
2010-08-07 18:55                     ` Andrey Rahmatullin
2010-08-07 19:29                     ` Led
2010-08-08  4:32                       ` Денис Смирнов
2010-08-07 19:35                     ` Kirill A. Shutemov
2010-08-07 19:44                       ` Michael Shigorin
2010-08-07 19:45                         ` Andrey Rahmatullin
2010-08-07 19:48                           ` Michael Shigorin
2010-08-07 13:57             ` Dmitry V. Levin
2010-08-07 14:09               ` Andrey Rahmatullin
2010-08-07 14:12                 ` Andrey Rahmatullin
2010-08-09 23:14           ` Kirill A. Shutemov
2010-08-11  0:28             ` Dmitry V. Levin
2010-08-11  1:11               ` Kirill A. Shutemov
2010-08-11  1:35                 ` Kirill A. Shutemov
2010-08-11 13:52                 ` Dmitry V. Levin
2010-08-12  0:09                   ` Kirill A. Shutemov
2010-08-12  0:35                     ` Dmitry V. Levin
2010-08-11 15:02           ` [devel] перевод rpm на свежий beecrypt Dmitry V. Levin
2010-08-11 19:08             ` Kirill A. Shutemov
2010-08-11 19:34               ` Dmitry V. Levin
2010-08-12  0:06                 ` Kirill A. Shutemov
2010-08-12 16:22                   ` Dmitry V. Levin
2010-08-12 21:07                     ` Dmitry V. Levin
2010-08-12 21:48                       ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100810105011.GB21522@imap.altlinux.org \
    --to=at@altlinux.ru \
    --cc=devel@lists.altlinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

ALT Linux Team development discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://lore.altlinux.org/devel/0 devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 devel devel/ http://lore.altlinux.org/devel \
		devel@altlinux.org devel@altlinux.ru devel@lists.altlinux.org devel@lists.altlinux.ru devel@linux.iplabs.ru mandrake-russian@linuxteam.iplabs.ru sisyphus@linuxteam.iplabs.ru
	public-inbox-index devel

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://lore.altlinux.org/org.altlinux.lists.devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git