ALT Linux Team development discussions
 help / color / mirror / Atom feed
* [devel] [PATCH 0/3] optimize rpmsetcmp()
@ 2010-11-25 22:04 Kirill A. Shutsemov
  2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
                   ` (4 more replies)
  0 siblings, 5 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
  To: Dmitry V. Levin, Alexey Tourbin
  Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov

From: Kirill A. Shutemov <kirill@shutemov.name>

Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.

rpm-4.0.4-alt100.4:

 Performance counter stats for 'apt-cache unmet' (10 runs):

        3396.569863  task-clock-msecs         #      1.000 CPUs    ( +-   0.018% )
                 12  context-switches         #      0.000 M/sec   ( +-   9.296% )
                  5  CPU-migrations           #      0.000 M/sec   ( +-  21.414% )
               9680  page-faults              #      0.003 M/sec   ( +-   0.002% )
         8525321008  cycles                   #   2509.980 M/sec   ( +-   0.016% )  (scaled from 33.34%)
         7937229883  instructions             #      0.931 IPC     ( +-   0.041% )  (scaled from 50.00%)
         1468168069  branches                 #    432.250 M/sec   ( +-   0.014% )  (scaled from 49.99%)
          257179182  branch-misses            #     17.517 %       ( +-   0.047% )  (scaled from 50.01%)
           26275740  cache-references         #      7.736 M/sec   ( +-   0.114% )  (scaled from 33.35%)
             350852  cache-misses             #      0.103 M/sec   ( +-   1.127% )  (scaled from 33.35%)

        3.398038183  seconds time elapsed   ( +-   0.018% )

rpm-4.0.4-alt100.4 + patchset:

 Performance counter stats for 'apt-cache unmet' (10 runs):

        2010.112427  task-clock-msecs         #      1.000 CPUs    ( +-   0.038% )
                  8  context-switches         #      0.000 M/sec   ( +-  17.232% )
                  4  CPU-migrations           #      0.000 M/sec   ( +-  21.237% )
               9675  page-faults              #      0.005 M/sec   ( +-   0.005% )
         5043579686  cycles                   #   2509.103 M/sec   ( +-   0.041% )  (scaled from 33.32%)
         5567840605  instructions             #      1.104 IPC     ( +-   0.021% )  (scaled from 50.00%)
         1028369972  branches                 #    511.598 M/sec   ( +-   0.016% )  (scaled from 50.00%)
           94986026  branch-misses            #      9.237 %       ( +-   0.080% )  (scaled from 50.03%)
           16227132  cache-references         #      8.073 M/sec   ( +-   0.303% )  (scaled from 33.36%)
             323117  cache-misses             #      0.161 M/sec   ( +-   1.241% )  (scaled from 33.34%)

        2.011050788  seconds time elapsed   ( +-   0.044% )

Kirill A. Shutemov (3):
  set.c: use packed bitmap for bit vector
  set.c: optimize putbits()
  set.c: optimize decode_golomb()

 lib/set.c |  186 ++++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 118 insertions(+), 68 deletions(-)

-- 
1.7.3.2



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector
  2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
@ 2010-11-25 22:04 ` Kirill A. Shutsemov
  2010-11-25 22:04 ` [devel] [PATCH 2/3] set.c: optimize putbits() Kirill A. Shutsemov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
  To: Dmitry V. Levin, Alexey Tourbin
  Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov

From: Kirill A. Shutemov <kirill@shutemov.name>

Currently, set.c uses array of chars to store bit vector. Each char
stores one bit: 0 or 1.

Let's use packed bitmap instead. It creates room for optimizations.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
---
 lib/set.c |  152 ++++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 95 insertions(+), 57 deletions(-)

diff --git a/lib/set.c b/lib/set.c
index 68bcc8c..4b91dab 100644
--- a/lib/set.c
+++ b/lib/set.c
@@ -16,6 +16,24 @@
 #include <stdlib.h>
 #include <assert.h>
 
+#define DIV_ROUND_UP(n,d)	(((n) + (d) - 1) / (d))
+#define BITS_PER_BYTE           8
+#define BITS_PER_LONG		(sizeof(long) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BIT(nr)			(1UL << (nr))
+
+static inline
+int get_bit(const unsigned long *bitmap, int offset)
+{
+    return !!(bitmap[offset / BITS_PER_LONG] & BIT(offset % BITS_PER_LONG));
+}
+
+static inline
+void set_bit(unsigned long *bitmap, int offset)
+{
+    bitmap[offset / BITS_PER_LONG] |= BIT(offset % BITS_PER_LONG);
+}
+
 /*
  * Base62 routines - encode bits with alnum characters.
  *
@@ -55,19 +73,20 @@ void put_digit(char **base62, int c)
     (*base62)++;
 }
 
-/* Main base62 encoding routine: pack bitv into base62 string. */
+/* Main base62 encoding routine: pack bitmap into base62 string. */
 static
-int encode_base62(int bitc, const char *bitv, char *base62)
+int encode_base62(char *base62, const unsigned long *bitmap, size_t nbits)
 {
     char *base62_start = base62;
     int bits2 = 0; /* number of high bits set */
     int bits6 = 0; /* number of regular bits set */
     int num6b = 0; /* pending 6-bit number */
+    unsigned int i;
 
-    while (bitc-- > 0) {
+    for(i = 0; i < nbits; i++) {
 	assert(bits6 + bits2 < 6);
-	num6b |= (*bitv++ << bits6++);
 
+	num6b |= (get_bit(bitmap, i) << bits6++);
 	if (bits6 + bits2 != 6)
 	    continue;
 
@@ -138,21 +157,20 @@ int char_to_num(int c)
 }
 
 static inline
-void putbits(char **bitv, int n, int c)
+void putbits(unsigned long *bitmap, int *offset, int c, int nbits)
 {
     int i;
 
-    for(i = 0; i < n; i++) {
-	**bitv = (c >> i) & 1;
-	(*bitv)++;
-    }
+    for (i = 0; i < nbits; i++, (*offset)++)
+	if (c & BIT(i))
+	    set_bit(bitmap, *offset);
 }
 
-/* Main base62 decoding routine: unpack base62 string into bitv. */
+/* Main base62 decoding routine: unpack base62 string into bitmap. */
 static
-int decode_base62(const char *base62, char *bitv)
+int decode_base62(unsigned long *bitmap, const char *base62)
 {
-    char *bitv_start = bitv;
+    int offset = 0;
     int c;
 
     while ((c = *base62++)) {
@@ -162,7 +180,7 @@ int decode_base62(const char *base62, char *bitv)
 	    return -1;
 
 	if (num6b != 61) {
-	    putbits(&bitv, 6, num6b);
+	    putbits(bitmap, &offset, num6b, 6);
 	    continue;
 	}
 
@@ -177,27 +195,37 @@ int decode_base62(const char *base62, char *bitv)
 
 	switch (num6b & (16 + 32)) {
 	case 0:
-	    putbits(&bitv, 6, 61);
+	    putbits(bitmap, &offset, 61, 6);
 	    break;
 	case 16:
-	    putbits(&bitv, 6, 62);
+	    putbits(bitmap, &offset, 62, 6);
 	    break;
 	case 32:
-	    putbits(&bitv, 6, 63);
+	    putbits(bitmap, &offset, 63, 6);
 	    break;
 	default:
 	    return -4;
 	    break;
 	}
 
-	putbits(&bitv, 4, num6b);
+	putbits(bitmap, &offset, num6b, 4);
     }
 
-    return bitv - bitv_start;
+    return offset;
 }
 
 #ifdef SELF_TEST
 static
+void bitv_to_bitmap(unsigned long *bitmap, const char *bitv, size_t n)
+{
+    unsigned int i;
+
+    for (i = 0; i < n; i++)
+	if (bitv[i])
+	    set_bit(bitmap, i);
+}
+
+static
 void test_base62(void)
 {
     const char rnd_bitv[] = {
@@ -208,15 +236,20 @@ void test_base62(void)
 	/* trigger some 'Z' */
 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     };
-    const int rnd_bitc = sizeof(rnd_bitv);
-    char *base62, *bitv;
-    int i, len, bitc;
+    const unsigned int rnd_bitc = sizeof(rnd_bitv);
+    unsigned long bitmap[BITS_TO_LONGS(rnd_bitc)];
+    unsigned long *new_bitmap;
+    char *base62;
+    unsigned int i, len, bitc;
+
+    memset(&bitmap, 0, sizeof(bitmap));
+    bitv_to_bitmap(bitmap, rnd_bitv, rnd_bitc);
 
     /* encode */
     base62 = alloca(encode_base62_size(rnd_bitc));
-    len = encode_base62(rnd_bitc, rnd_bitv, base62);
+    len = encode_base62(base62, bitmap, rnd_bitc);
     assert(len > 0);
-    assert(len == (int) strlen(base62));
+    assert(len == strlen(base62));
     fprintf(stderr, "len=%d base62=%s\n", len, base62);
 
     /* The length cannot be shorter than 6 bits per symbol. */
@@ -226,17 +259,17 @@ void test_base62(void)
     assert(len <= rnd_bitc / 2 / 4 + rnd_bitc / 2 / 6 + 1);
 
     /* decode */
-    bitv = alloca(decode_base62_size(base62));
-    bitc = decode_base62(base62, bitv);
+    bitc = decode_base62_size(base62);
+    new_bitmap = alloca(BITS_TO_LONGS(bitc) * sizeof(long));
+    memset(new_bitmap, 0, BITS_TO_LONGS(bitc) * sizeof(long));
+    bitc = decode_base62(new_bitmap, base62);
     fprintf(stderr, "rnd_bitc=%d bitc=%d\n", rnd_bitc, bitc);
     assert(bitc >= rnd_bitc);
 
     /* Decoded bits must match. */
-    for (i = 0; i < rnd_bitc; i++)
-	assert(rnd_bitv[i] == bitv[i]);
-    /* The remaining bits must be zero bits. */
-    for (i = rnd_bitc; i < bitc; i++)
-	assert(bitv[i] == 0);
+    for (i = 0; i < sizeof(bitmap) / sizeof(*bitmap); i++)
+	assert(bitmap[i] == new_bitmap[i]);
+
     fprintf(stderr, "%s: base62 test OK\n", __FILE__);
 }
 #endif
@@ -306,32 +339,32 @@ int encode_golomb_size(int c, int Mshift)
     return (Mshift << 1) * c + 16;
 }
 
-/* Main golomb encoding routine: package integers into bits. */
+/* Main golomb encoding routine: package integers into bitmap. */
 static
-int encode_golomb(int c, const unsigned *v, int Mshift, char *bitv)
+int encode_golomb(int c, const unsigned *v, int Mshift, unsigned long *bitmap)
 {
-    char *bitv_start = bitv;
+    int offset = 0;
     const unsigned mask = (1 << Mshift) - 1;
 
     while (c-- > 0) {
-	unsigned v0, q, r;
+	unsigned v0, r;
 	int i;
 
 	v0 = *v++;
 
 	/* first part: variable-length sequence */
-	q = v0 >> Mshift;
-	for (i = 0; i < (int) q; i++)
-	    *bitv++ = 0;
-	*bitv++ = 1;
+	offset += v0 >> Mshift;
+	set_bit(bitmap, offset++);
 
 	/* second part: lower Mshift bits */
 	r = v0 & mask;
-	for (i = 0; i < Mshift; i++)
-	    *bitv++ = (r >> i) & 1;
+	for (i = 0; i < Mshift; i++, offset++) {
+	    if (r & BIT(i))
+		    set_bit(bitmap, offset);
+	}
     }
 
-    return bitv - bitv_start;
+    return offset;
 }
 
 /* Estimate how many values will emerge. */
@@ -345,11 +378,12 @@ int decode_golomb_size(int bitc, int Mshift)
     return bitc / (Mshift + 1);
 }
 
-/* Main golomb decoding routine: unpackage bits into values. */
+/* Main golomb decoding routine: unpackage bitmap into values. */
 static
-int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
+int decode_golomb(int bitc, const unsigned long *bitmap, int Mshift, unsigned *v)
 {
     unsigned *v_start = v;
+    int offset = 0;
 
     /* next value */
     while (bitc > 0) {
@@ -362,7 +396,7 @@ int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
 	bit = 0;
 	while (bitc > 0) {
 	    bitc--;
-	    bit = *bitv++;
+	    bit = get_bit(bitmap, offset++);
 	    if (bit == 0)
 		q++;
 	    else
@@ -381,8 +415,8 @@ int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
 	r = 0;
 	for (i = 0; i < Mshift; i++) {
 	    bitc--;
-	    if (*bitv++)
-		r |= (1 << i);
+	    if (get_bit(bitmap, offset++))
+		r |= BIT(i);
 	}
 
 	/* the value */
@@ -401,7 +435,7 @@ void test_golomb(void)
 	7, 6, 5, 4, 3, 2, 1,
     };
     const int rnd_c = sizeof(rnd_v) / sizeof(*rnd_v);
-    char *bitv;
+    unsigned long *bitmap;
     unsigned *v;
     int bpp, Mshift;
     int alloc_bitc, bitc;
@@ -417,8 +451,9 @@ void test_golomb(void)
     /* encode */
     alloc_bitc = encode_golomb_size(rnd_c, Mshift);
     assert(alloc_bitc > rnd_c);
-    bitv = alloca(alloc_bitc);
-    bitc = encode_golomb(rnd_c, rnd_v, Mshift, bitv);
+    bitmap = alloca(BITS_TO_LONGS(alloc_bitc) * sizeof(long));
+    memset(bitmap, 0, BITS_TO_LONGS(alloc_bitc) * sizeof(long));
+    bitc = encode_golomb(rnd_c, rnd_v, Mshift, bitmap);
     fprintf(stderr, "alloc_bitc=%d bitc=%d\n", alloc_bitc, bitc);
     assert(bitc > rnd_c);
     assert(bitc <= alloc_bitc);
@@ -427,7 +462,7 @@ void test_golomb(void)
     alloc_c = decode_golomb_size(bitc, Mshift);
     assert(alloc_c >= rnd_c);
     v = alloca(sizeof(*v) * alloc_c);
-    c = decode_golomb(bitc, bitv, Mshift, v);
+    c = decode_golomb(bitc, bitmap, Mshift, v);
     fprintf(stderr, "rnd_c=%d alloc_c=%d c=%d\n", rnd_c, alloc_c, c);
     assert(alloc_c >= c);
 
@@ -597,8 +632,9 @@ int encode_set(int c, unsigned *v, int bpp, char *base62)
     int Mshift = encode_golomb_Mshift(c, bpp);
     int bitc = encode_golomb_size(c, Mshift);
     int len;
-    char bitv[bitc];
+    unsigned long bitmap[BITS_TO_LONGS(bitc)];
 
+    memset(bitmap, 0, sizeof(bitmap));
     /* bpp */
     if (bpp < 10 || bpp > 32)
 	return -1;
@@ -613,7 +649,7 @@ int encode_set(int c, unsigned *v, int bpp, char *base62)
     encode_delta(c, v);
 
     /* golomb */
-    bitc = encode_golomb(c, v, Mshift, bitv);
+    bitc = encode_golomb(c, v, Mshift, bitmap);
 
 #ifdef SELF_TEST
     decode_delta(c, v);
@@ -623,7 +659,7 @@ int encode_set(int c, unsigned *v, int bpp, char *base62)
 	return -3;
 
     /* base62 */
-    len = encode_base62(bitc, bitv, base62);
+    len = encode_base62(base62, bitmap, bitc);
     if (len < 0)
 	return -4;
 
@@ -671,21 +707,23 @@ int decode_set_size(const char *str, int Mshift)
 static
 int decode_set(const char *str, int Mshift, unsigned *v)
 {
-    char *bitv;
+    unsigned long *bitmap;
     int bitc;
     int c;
 
     str += 2;
 
     /* base62 */
-    bitv = alloca(decode_base62_size(str));
-    bitc = decode_base62(str, bitv);
+    bitc = decode_base62_size(str);
+    bitmap = alloca(BITS_TO_LONGS(bitc) * sizeof(long));
+    memset(bitmap, 0, BITS_TO_LONGS(bitc) * sizeof(long));
+    bitc = decode_base62(bitmap, str);
 
     if (bitc < 0)
 	return -1;
 
     /* golomb */
-    c = decode_golomb(bitc, bitv, Mshift, v);
+    c = decode_golomb(bitc, bitmap, Mshift, v);
     if (c < 0)
 	return -2;
 
-- 
1.7.3.2



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [devel] [PATCH 2/3] set.c: optimize putbits()
  2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
  2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
@ 2010-11-25 22:04 ` Kirill A. Shutsemov
  2010-11-25 22:04 ` [devel] [PATCH 3/3] set.c: optimize decode_golomb() Kirill A. Shutsemov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
  To: Dmitry V. Levin, Alexey Tourbin
  Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov

From: Kirill A. Shutemov <kirill@shutemov.name>

Use bit operations instead of cycles.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
---
 lib/set.c |   22 ++++++++++++++++------
 1 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/lib/set.c b/lib/set.c
index 4b91dab..02a6c31 100644
--- a/lib/set.c
+++ b/lib/set.c
@@ -21,6 +21,7 @@
 #define BITS_PER_LONG		(sizeof(long) * BITS_PER_BYTE)
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
 #define BIT(nr)			(1UL << (nr))
+#define MASK(nbits)		(BIT(nbits) - 1)
 
 static inline
 int get_bit(const unsigned long *bitmap, int offset)
@@ -157,13 +158,22 @@ int char_to_num(int c)
 }
 
 static inline
-void putbits(unsigned long *bitmap, int *offset, int c, int nbits)
+void putbits(unsigned long *bitmap, int *offset, unsigned long c, int nbits)
 {
-    int i;
+    int quot, rem;
+
+    assert(!(c & ~MASK(nbits)));
+
+    quot = *offset / BITS_PER_LONG;
+    rem = *offset % BITS_PER_LONG;
+
+    bitmap[quot] |= c << rem;
+    c >>= BITS_PER_LONG - rem;
+
+    if (nbits + rem > (int) BITS_PER_LONG)
+	bitmap[quot + 1] = c;
 
-    for (i = 0; i < nbits; i++, (*offset)++)
-	if (c & BIT(i))
-	    set_bit(bitmap, *offset);
+    *offset += nbits;
 }
 
 /* Main base62 decoding routine: unpack base62 string into bitmap. */
@@ -208,7 +218,7 @@ int decode_base62(unsigned long *bitmap, const char *base62)
 	    break;
 	}
 
-	putbits(bitmap, &offset, num6b, 4);
+	putbits(bitmap, &offset, num6b & MASK(4), 4);
     }
 
     return offset;
-- 
1.7.3.2



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [devel] [PATCH 3/3] set.c: optimize decode_golomb()
  2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
  2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
  2010-11-25 22:04 ` [devel] [PATCH 2/3] set.c: optimize putbits() Kirill A. Shutsemov
@ 2010-11-25 22:04 ` Kirill A. Shutsemov
  2010-11-26  8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
  2010-12-05  1:24 ` Alexey Tourbin
  4 siblings, 0 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
  To: Dmitry V. Levin, Alexey Tourbin
  Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov

From: Kirill A. Shutemov <kirill@shutemov.name>

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
---
 lib/set.c |   26 ++++++++++++++------------
 1 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/lib/set.c b/lib/set.c
index 02a6c31..8a425b7 100644
--- a/lib/set.c
+++ b/lib/set.c
@@ -396,16 +396,16 @@ int decode_golomb(int bitc, const unsigned long *bitmap, int Mshift, unsigned *v
     int offset = 0;
 
     /* next value */
-    while (bitc > 0) {
+    while (offset < bitc) {
 	unsigned q, r;
 	char bit;
-	int i;
+	int quot, rem;
+	int k;
 
 	/* first part */
 	q = 0;
 	bit = 0;
-	while (bitc > 0) {
-	    bitc--;
+	while (offset < bitc) {
 	    bit = get_bit(bitmap, offset++);
 	    if (bit == 0)
 		q++;
@@ -414,20 +414,22 @@ int decode_golomb(int bitc, const unsigned long *bitmap, int Mshift, unsigned *v
 	}
 
 	/* trailing zero bits in the input are okay */
-	if (bitc == 0 && bit == 0)
+	if (offset == bitc && bit == 0)
 	    break;
 
 	/* otherwise, incomplete value is not okay */
-	if (bitc < Mshift)
+	if (bitc - offset < Mshift)
 	    return -1;
 
+	quot = offset / BITS_PER_LONG;
+	rem = offset % BITS_PER_LONG;
+	k = rem + Mshift - BITS_PER_LONG;
+
 	/* second part */
-	r = 0;
-	for (i = 0; i < Mshift; i++) {
-	    bitc--;
-	    if (get_bit(bitmap, offset++))
-		r |= BIT(i);
-	}
+	r = (bitmap[quot] & (MASK(Mshift) << rem)) >> rem;
+	if (k > 0)
+	    r |= (bitmap[quot + 1] & MASK(k)) << (Mshift - k);
+	offset += Mshift;
 
 	/* the value */
 	*v++ = (q << Mshift) | r;
-- 
1.7.3.2



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
                   ` (2 preceding siblings ...)
  2010-11-25 22:04 ` [devel] [PATCH 3/3] set.c: optimize decode_golomb() Kirill A. Shutsemov
@ 2010-11-26  8:35 ` Kirill A. Shutemov
  2010-11-30  0:35   ` Dmitry V. Levin
  2010-12-05  1:24 ` Alexey Tourbin
  4 siblings, 1 reply; 29+ messages in thread
From: Kirill A. Shutemov @ 2010-11-26  8:35 UTC (permalink / raw)
  To: Dmitry V. Levin, Alexey Tourbin; +Cc: Alexey I. Froloff, devel, Alexey Gladkov

On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> From: Kirill A. Shutemov <kirill@shutemov.name>
> 
> Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.

apt-shell < /dev/null

rpm-4.0.4-alt100.4:

  Performance counter stats for 'apt-shell' (5 runs):

        7431.705536  task-clock-msecs         #      1.000 CPUs    ( +-   0.037% )
                 39  context-switches         #      0.000 M/sec   ( +-  14.774% )
                 13  CPU-migrations           #      0.000 M/sec   ( +-  27.252% )
              12135  page-faults              #      0.002 M/sec   ( +-   0.003% )
        18654610617  cycles                   #   2510.139 M/sec   ( +-   0.034% )  (scaled from 33.34%)
        17117123518  instructions             #      0.918 IPC     ( +-   0.014% )  (scaled from 50.00%)
         3151838786  branches                 #    424.107 M/sec   ( +-   0.039% )  (scaled from 50.00%)
          558860303  branch-misses            #     17.731 %       ( +-   0.063% )  (scaled from 50.00%)
           52927655  cache-references         #      7.122 M/sec   ( +-   0.316% )  (scaled from 33.34%)
             930111  cache-misses             #      0.125 M/sec   ( +-   1.324% )  (scaled from 33.34%)

        7.434342478  seconds time elapsed   ( +-   0.049% )

rpm-4.0.4-alt100.4 + patches:

  Performance counter stats for 'apt-shell' (5 runs):

        4364.794613  task-clock-msecs         #      1.000 CPUs    ( +-   0.066% )
                 33  context-switches         #      0.000 M/sec   ( +-  20.124% )
                 12  CPU-migrations           #      0.000 M/sec   ( +-   6.780% )
              11681  page-faults              #      0.003 M/sec   ( +-   0.005% )
        10954135791  cycles                   #   2509.657 M/sec   ( +-   0.063% )  (scaled from 33.32%)
        11905100223  instructions             #      1.087 IPC     ( +-   0.020% )  (scaled from 49.99%)
         2181001479  branches                 #    499.680 M/sec   ( +-   0.035% )  (scaled from 49.99%)
          200182757  branch-misses            #      9.178 %       ( +-   0.104% )  (scaled from 50.01%)
           31301034  cache-references         #      7.171 M/sec   ( +-   0.377% )  (scaled from 33.36%)
             940816  cache-misses             #      0.216 M/sec   ( +-   2.534% )  (scaled from 33.34%)

        4.366582986  seconds time elapsed   ( +-   0.067% )

Around 41% of speed up.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-11-26  8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
@ 2010-11-30  0:35   ` Dmitry V. Levin
  2010-12-02 21:48     ` Dmitry V. Levin
                       ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-11-30  0:35 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 1817 bytes --]

On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> > 
> > Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
> 
> apt-shell < /dev/null
[...]
> Around 41% of speed up.

On AMD Opteron Processor 275:
without patches:
2.98user 0.19system 0:03.18elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+10142minor)pagefaults 0swaps
with patches:
2.20user 0.21system 0:02.42elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9978minor)pagefaults 0swaps
speedup is around 24%

On AMD Opteron Processor 2216:
without patches:
2.42user 0.02system 0:02.44elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9784minor)pagefaults 0swaps
with patches:
1.86user 0.02system 0:01.88elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9601minor)pagefaults 0swaps
speedup is around 23%

On Intel Xeon 5110
without patches:
2.98user 0.04system 0:03.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9789minor)pagefaults 0swaps
with patches:
2.54user 0.01system 0:02.55elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9608minor)pagefaults 0swaps
speedup is around 16%

On Intel Xeon E5520
without patches:
2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9730minor)pagefaults 0swaps
with patches:
1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9565minor)pagefaults 0swaps
speedup is around 12%

Not so amazing as on your test platform, but awesome anyway.
I'm going to apply these patches.
If anybody has objections, please speak up now.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-11-30  0:35   ` Dmitry V. Levin
@ 2010-12-02 21:48     ` Dmitry V. Levin
  2010-12-04 13:02     ` Alexey Tourbin
  2010-12-04 15:06     ` Alexey Tourbin
  2 siblings, 0 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-02 21:48 UTC (permalink / raw)
  To: ALT Devel discussion list; +Cc: Alexey M. Tourbin

[-- Attachment #1: Type: text/plain, Size: 2143 bytes --]

On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> > > 
> > > Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
> > 
> > apt-shell < /dev/null
> [...]
> > Around 41% of speed up.
> 
> On AMD Opteron Processor 275:
> without patches:
> 2.98user 0.19system 0:03.18elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+10142minor)pagefaults 0swaps
> with patches:
> 2.20user 0.21system 0:02.42elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9978minor)pagefaults 0swaps
> speedup is around 24%
> 
> On AMD Opteron Processor 2216:
> without patches:
> 2.42user 0.02system 0:02.44elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9784minor)pagefaults 0swaps
> with patches:
> 1.86user 0.02system 0:01.88elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9601minor)pagefaults 0swaps
> speedup is around 23%
> 
> On Intel Xeon 5110
> without patches:
> 2.98user 0.04system 0:03.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9789minor)pagefaults 0swaps
> with patches:
> 2.54user 0.01system 0:02.55elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9608minor)pagefaults 0swaps
> speedup is around 16%
> 
> On Intel Xeon E5520
> without patches:
> 2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9730minor)pagefaults 0swaps
> with patches:
> 1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9565minor)pagefaults 0swaps
> speedup is around 12%
> 
> Not so amazing as on your test platform, but awesome anyway.
> I'm going to apply these patches.
> If anybody has objections, please speak up now.

These patches were made on top of your cleanup patches.  Looks like
I'll have to override unreasoned NAK from Alexey Tourbin and apply
these cleanups as well.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-11-30  0:35   ` Dmitry V. Levin
  2010-12-02 21:48     ` Dmitry V. Levin
@ 2010-12-04 13:02     ` Alexey Tourbin
  2010-12-04 16:30       ` Dmitry V. Levin
  2010-12-04 15:06     ` Alexey Tourbin
  2 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 13:02 UTC (permalink / raw)
  To: ALT Devel discussion list

On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> On Intel Xeon E5520
> without patches:
> 2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9730minor)pagefaults 0swaps
> with patches:
> 1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9565minor)pagefaults 0swaps
> speedup is around 12%
> 
> Not so amazing as on your test platform, but awesome anyway.
> I'm going to apply these patches.
> If anybody has objections, please speak up now.

I think I've made more amazing a change.
http://git.altlinux.org/gears/r/rpm.git?a=commitdiff;h=fc5b5a5f

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-11-30  0:35   ` Dmitry V. Levin
  2010-12-02 21:48     ` Dmitry V. Levin
  2010-12-04 13:02     ` Alexey Tourbin
@ 2010-12-04 15:06     ` Alexey Tourbin
  2010-12-04 16:29       ` Dmitry V. Levin
  2 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 15:06 UTC (permalink / raw)
  To: ALT Devel discussion list

On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > apt-shell < /dev/null

> Not so amazing as on your test platform, but awesome anyway.
> I'm going to apply these patches.
> If anybody has objections, please speak up now.

The objection is, why do you think the problem is with here, not with apt?
Go yonder and optimize apt!

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 15:06     ` Alexey Tourbin
@ 2010-12-04 16:29       ` Dmitry V. Levin
  2010-12-04 16:42         ` Alexey Tourbin
  0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 16:29 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 660 bytes --]

On Sat, Dec 04, 2010 at 06:06:18PM +0300, Alexey Tourbin wrote:
> On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > > apt-shell < /dev/null
> 
> > Not so amazing as on your test platform, but awesome anyway.
> > I'm going to apply these patches.
> > If anybody has objections, please speak up now.
> 
> The objection is, why do you think the problem is with here, not with apt?
> Go yonder and optimize apt!

It was rpm that was changed to work 10x times slower, not apt.
But apt certainly could be made better, so feel free to optimize it.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 13:02     ` Alexey Tourbin
@ 2010-12-04 16:30       ` Dmitry V. Levin
  2010-12-04 16:55         ` Alexey Tourbin
  0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 16:30 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 848 bytes --]

On Sat, Dec 04, 2010 at 04:02:17PM +0300, Alexey Tourbin wrote:
> On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > On Intel Xeon E5520
> > without patches:
> > 2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> > 0inputs+0outputs (0major+9730minor)pagefaults 0swaps
> > with patches:
> > 1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> > 0inputs+0outputs (0major+9565minor)pagefaults 0swaps
> > speedup is around 12%
> > 
> > Not so amazing as on your test platform, but awesome anyway.
> > I'm going to apply these patches.
> > If anybody has objections, please speak up now.
> 
> I think I've made more amazing a change.
> http://git.altlinux.org/gears/r/rpm.git?a=commitdiff;h=fc5b5a5f

Have you tried to combine both optimizations?


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 16:29       ` Dmitry V. Levin
@ 2010-12-04 16:42         ` Alexey Tourbin
  2010-12-04 16:52           ` Dmitry V. Levin
  0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 16:42 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sat, Dec 04, 2010 at 07:29:11PM +0300, Dmitry V. Levin wrote:
> On Sat, Dec 04, 2010 at 06:06:18PM +0300, Alexey Tourbin wrote:
> > On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > > On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > > > apt-shell < /dev/null
> > 
> > > Not so amazing as on your test platform, but awesome anyway.
> > > I'm going to apply these patches.
> > > If anybody has objections, please speak up now.
> > 
> > The objection is, why do you think the problem is with here, not with apt?
> > Go yonder and optimize apt!
> 
> It was rpm that was changed to work 10x times slower, not apt.
> But apt certainly could be made better, so feel free to optimize it.

RPM goes reasonably fast, and it's not exactly "slower".
Something that might be called unreasonable is behind the RPM. 

I can't see why rpm is now 10x times slower.
Slower compared to... what?

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 16:42         ` Alexey Tourbin
@ 2010-12-04 16:52           ` Dmitry V. Levin
  2010-12-04 17:05             ` Alexey Tourbin
  0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 16:52 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 1351 bytes --]

On Sat, Dec 04, 2010 at 07:42:40PM +0300, Alexey Tourbin wrote:
> On Sat, Dec 04, 2010 at 07:29:11PM +0300, Dmitry V. Levin wrote:
> > On Sat, Dec 04, 2010 at 06:06:18PM +0300, Alexey Tourbin wrote:
> > > On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > > > On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > > > > apt-shell < /dev/null
> > > 
> > > > Not so amazing as on your test platform, but awesome anyway.
> > > > I'm going to apply these patches.
> > > > If anybody has objections, please speak up now.
> > > 
> > > The objection is, why do you think the problem is with here, not with apt?
> > > Go yonder and optimize apt!
> > 
> > It was rpm that was changed to work 10x times slower, not apt.
> > But apt certainly could be made better, so feel free to optimize it.
> 
> RPM goes reasonably fast, and it's not exactly "slower".
> Something that might be called unreasonable is behind the RPM. 
> 
> I can't see why rpm is now 10x times slower.
> Slower compared to... what?

You cannot dismiss arguments so freely.  The abovementioned command
"apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
No matter how unreasonable apt is, it was rpm update that slowed it down.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 16:30       ` Dmitry V. Levin
@ 2010-12-04 16:55         ` Alexey Tourbin
  0 siblings, 0 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 16:55 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sat, Dec 04, 2010 at 07:30:49PM +0300, Dmitry V. Levin wrote:
> > I think I've made more amazing a change.
> > http://git.altlinux.org/gears/r/rpm.git?a=commitdiff;h=fc5b5a5f
> 
> Have you tried to combine both optimizations?

What's the criteria of doing good enough?
I always thought that 'char bitv[]' is good enough.
Let's call it a reference implementation.

Kirill argued that changing bitv to bitmap could actually win both
some space and some time, as there's a possibility to handle 6 bits
at a time, and even Mshift bits at a time.  

The problem is that, well, *bitv++ is replaced with something else.
The code will be no longer a reference implementation.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 16:52           ` Dmitry V. Levin
@ 2010-12-04 17:05             ` Alexey Tourbin
  2010-12-04 17:52               ` Dmitry V. Levin
  0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 17:05 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sat, Dec 04, 2010 at 07:52:44PM +0300, Dmitry V. Levin wrote:
> > RPM goes reasonably fast, and it's not exactly "slower".
> > Something that might be called unreasonable is behind the RPM. 
> > 
> > I can't see why rpm is now 10x times slower.
> > Slower compared to... what?
> 
> You cannot dismiss arguments so freely.  The abovementioned command
> "apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
> after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
> No matter how unreasonable apt is, it was rpm update that slowed it down.

Okay, what's the exact reason everything went 10x times slower?

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 17:05             ` Alexey Tourbin
@ 2010-12-04 17:52               ` Dmitry V. Levin
  2010-12-04 21:28                 ` Alexey Tourbin
  0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 17:52 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 806 bytes --]

On Sat, Dec 04, 2010 at 08:05:57PM +0300, Alexey Tourbin wrote:
> On Sat, Dec 04, 2010 at 07:52:44PM +0300, Dmitry V. Levin wrote:
> > > RPM goes reasonably fast, and it's not exactly "slower".
> > > Something that might be called unreasonable is behind the RPM. 
> > > 
> > > I can't see why rpm is now 10x times slower.
> > > Slower compared to... what?
> > 
> > You cannot dismiss arguments so freely.  The abovementioned command
> > "apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
> > after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
> > No matter how unreasonable apt is, it was rpm update that slowed it down.
> 
> Okay, what's the exact reason everything went 10x times slower?

Huge amount of new set-versioned dependencies.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 17:52               ` Dmitry V. Levin
@ 2010-12-04 21:28                 ` Alexey Tourbin
  2010-12-04 23:26                   ` Dmitry V. Levin
  0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 21:28 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sat, Dec 04, 2010 at 08:52:00PM +0300, Dmitry V. Levin wrote:
> On Sat, Dec 04, 2010 at 08:05:57PM +0300, Alexey Tourbin wrote:
> > On Sat, Dec 04, 2010 at 07:52:44PM +0300, Dmitry V. Levin wrote:
> > > > RPM goes reasonably fast, and it's not exactly "slower".
> > > > Something that might be called unreasonable is behind the RPM. 
> > > > 
> > > > I can't see why rpm is now 10x times slower.
> > > > Slower compared to... what?
> > > 
> > > You cannot dismiss arguments so freely.  The abovementioned command
> > > "apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
> > > after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
> > > No matter how unreasonable apt is, it was rpm update that slowed it down.
> > 
> > Okay, what's the exact reason everything went 10x times slower?
> Huge amount of new set-versioned dependencies.

It's not exactly "huge", as it is predictably associated with the number
of shared libraries in use.  Also, I remember someone wondering about
"gosh, such a long version".  And he felt not that the library was
exporting too many symbols, though.  You see the pattern: the library
was okay.

There is indeed a considerable amount of new set-version dependencies.
And I never promised them to be a free lunch, or as cheap as strcmp().
Special compression routines (simplified Golomb-Rice encoding/decoding)
have been crafted to make set-versions shorter.  They are very cheap
(as compared to general-purpose compression routines such as found in
zlib), but they are still not free.

So the whole talk is that there's a price.  I believe that the price
is very affordable (or, at least, it could've been much worse).

Now as to apt misbehavior.  "You cannot dismiss arguments so freely."
What you urge to do is to cut down the price even more.  But there is
clearly something wrong with apt, as it tries to resolve every single
dependency upon every startup.  And, fortunately, it takes only a few
seconds!

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 21:28                 ` Alexey Tourbin
@ 2010-12-04 23:26                   ` Dmitry V. Levin
  2010-12-04 23:41                     ` Alexey Tourbin
  0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 23:26 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 700 bytes --]

On Sun, Dec 05, 2010 at 12:28:31AM +0300, Alexey Tourbin wrote:
[...]
> What you urge to do is to cut down the price even more.  But there is
> clearly something wrong with apt, as it tries to resolve every single
> dependency upon every startup.  And, fortunately, it takes only a few
> seconds!

It will take twice or trice longer time when _all_ packages with ELF
files will get their set-versioned dependencies, so I expect apt-get
startup will eventually be about 10x slower again (compared to 5.1 branch).

apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
knowledge.  Unfortunately, stating this fact isn't sufficient to make
apt-rpm work faster.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 23:26                   ` Dmitry V. Levin
@ 2010-12-04 23:41                     ` Alexey Tourbin
  2010-12-05  0:03                       ` Dmitry V. Levin
  0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 23:41 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sun, Dec 05, 2010 at 02:26:44AM +0300, Dmitry V. Levin wrote:
> On Sun, Dec 05, 2010 at 12:28:31AM +0300, Alexey Tourbin wrote:
> [...]
> > What you urge to do is to cut down the price even more.  But there is
> > clearly something wrong with apt, as it tries to resolve every single
> > dependency upon every startup.  And, fortunately, it takes only a few
> > seconds!
> 
> It will take twice or trice longer time when _all_ packages with ELF
> files will get their set-versioned dependencies, so I expect apt-get
> startup will eventually be about 10x slower again (compared to 5.1 branch).
> 
> apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
> knowledge.  Unfortunately, stating this fact isn't sufficient to make
> apt-rpm work faster.

So what do you think?  There's a possibility to change bitv[] to bitmap,
per Kirill's proposal.  That mighit yield about, say 30% user time cutdown.
However, note that apt is not only eager for user time.  System time is
being spent with splendor as well.

Personally, I like bitv[], and I don't like something like <sys/select.h> stuff.

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-04 23:41                     ` Alexey Tourbin
@ 2010-12-05  0:03                       ` Dmitry V. Levin
  2010-12-05  0:21                         ` Alexey Tourbin
  0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-05  0:03 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 1501 bytes --]

On Sun, Dec 05, 2010 at 02:41:31AM +0300, Alexey Tourbin wrote:
> On Sun, Dec 05, 2010 at 02:26:44AM +0300, Dmitry V. Levin wrote:
> > On Sun, Dec 05, 2010 at 12:28:31AM +0300, Alexey Tourbin wrote:
> > [...]
> > > What you urge to do is to cut down the price even more.  But there is
> > > clearly something wrong with apt, as it tries to resolve every single
> > > dependency upon every startup.  And, fortunately, it takes only a few
> > > seconds!
> > 
> > It will take twice or trice longer time when _all_ packages with ELF
> > files will get their set-versioned dependencies, so I expect apt-get
> > startup will eventually be about 10x slower again (compared to 5.1 branch).
> > 
> > apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
> > knowledge.  Unfortunately, stating this fact isn't sufficient to make
> > apt-rpm work faster.
> 
> So what do you think?  There's a possibility to change bitv[] to bitmap,
> per Kirill's proposal.  That mighit yield about, say 30% user time cutdown.

30% is a bit optimistic, according to my measurements.

> However, note that apt is not only eager for user time.  System time is
> being spent with splendor as well.

System time takes only about 15% of elapsed time, according to my
measurements.

> Personally, I like bitv[], and I don't like something like <sys/select.h> stuff.

When performance is the issue, there should be a good rationale to choose
the approach that works slower.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05  0:03                       ` Dmitry V. Levin
@ 2010-12-05  0:21                         ` Alexey Tourbin
  2010-12-05 12:49                           ` Michael Shigorin
  2010-12-07 17:50                           ` Dmitry V. Levin
  0 siblings, 2 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05  0:21 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sun, Dec 05, 2010 at 03:03:16AM +0300, Dmitry V. Levin wrote:
> > > apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
> > > knowledge.  Unfortunately, stating this fact isn't sufficient to make
> > > apt-rpm work faster.
> > 
> > So what do you think?  There's a possibility to change bitv[] to bitmap,
> > per Kirill's proposal.  That mighit yield about, say 30% user time cutdown.
> 
> 30% is a bit optimistic, according to my measurements.
> 
> > However, note that apt is not only eager for user time.  System time is
> > being spent with splendor as well.
> 
> System time takes only about 15% of elapsed time, according to my
> measurements.

Under VirtualBox, which seems to become a common hardware nowadays,
system time is somewhat more noticable.

> > Personally, I like bitv[], and I don't like something like <sys/select.h> stuff.
> 
> When performance is the issue, there should be a good rationale to choose
> the approach that works slower.

There are other reasons, such as keeping the code comprehensible (at the
expense of some runtime penalties).  I think the concept of bitv[] is
crucial for understanding the code, and it helps to keep the code clean
and self-evident.  Replacing bitv[] with bitmap might be a major trade-off
for the sake of performance.  Or might not.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
                   ` (3 preceding siblings ...)
  2010-11-26  8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
@ 2010-12-05  1:24 ` Alexey Tourbin
  2010-12-05 11:18   ` Dmitry V. Levin
  2010-12-05 12:39   ` Michael Shigorin
  4 siblings, 2 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05  1:24 UTC (permalink / raw)
  To: ALT Linux Team development discussions
  Cc: Alexey I. Froloff, Kirill A. Shutemov, Alexey Gladkov, Dmitry V. Levin

The answer is: no.
The reason is: compilcated.
The explanation is:
  Don't try to improve my code.

On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> From: Kirill A. Shutemov <kirill@shutemov.name>
> 
> Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
> 
> rpm-4.0.4-alt100.4:
> 
>  Performance counter stats for 'apt-cache unmet' (10 runs):
> 
>         3396.569863  task-clock-msecs         #      1.000 CPUs    ( +-   0.018% )
>                  12  context-switches         #      0.000 M/sec   ( +-   9.296% )
>                   5  CPU-migrations           #      0.000 M/sec   ( +-  21.414% )
>                9680  page-faults              #      0.003 M/sec   ( +-   0.002% )
>          8525321008  cycles                   #   2509.980 M/sec   ( +-   0.016% )  (scaled from 33.34%)
>          7937229883  instructions             #      0.931 IPC     ( +-   0.041% )  (scaled from 50.00%)
>          1468168069  branches                 #    432.250 M/sec   ( +-   0.014% )  (scaled from 49.99%)
>           257179182  branch-misses            #     17.517 %       ( +-   0.047% )  (scaled from 50.01%)
>            26275740  cache-references         #      7.736 M/sec   ( +-   0.114% )  (scaled from 33.35%)
>              350852  cache-misses             #      0.103 M/sec   ( +-   1.127% )  (scaled from 33.35%)
> 
>         3.398038183  seconds time elapsed   ( +-   0.018% )
> 
> rpm-4.0.4-alt100.4 + patchset:
> 
>  Performance counter stats for 'apt-cache unmet' (10 runs):
> 
>         2010.112427  task-clock-msecs         #      1.000 CPUs    ( +-   0.038% )
>                   8  context-switches         #      0.000 M/sec   ( +-  17.232% )
>                   4  CPU-migrations           #      0.000 M/sec   ( +-  21.237% )
>                9675  page-faults              #      0.005 M/sec   ( +-   0.005% )
>          5043579686  cycles                   #   2509.103 M/sec   ( +-   0.041% )  (scaled from 33.32%)
>          5567840605  instructions             #      1.104 IPC     ( +-   0.021% )  (scaled from 50.00%)
>          1028369972  branches                 #    511.598 M/sec   ( +-   0.016% )  (scaled from 50.00%)
>            94986026  branch-misses            #      9.237 %       ( +-   0.080% )  (scaled from 50.03%)
>            16227132  cache-references         #      8.073 M/sec   ( +-   0.303% )  (scaled from 33.36%)
>              323117  cache-misses             #      0.161 M/sec   ( +-   1.241% )  (scaled from 33.34%)
> 
>         2.011050788  seconds time elapsed   ( +-   0.044% )
> 
> Kirill A. Shutemov (3):
>   set.c: use packed bitmap for bit vector
>   set.c: optimize putbits()
>   set.c: optimize decode_golomb()
> 
>  lib/set.c |  186 ++++++++++++++++++++++++++++++++++++++----------------------
>  1 files changed, 118 insertions(+), 68 deletions(-)
> 
> -- 
> 1.7.3.2


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05  1:24 ` Alexey Tourbin
@ 2010-12-05 11:18   ` Dmitry V. Levin
  2010-12-05 12:39     ` Alexey Tourbin
  2010-12-05 12:39   ` Michael Shigorin
  1 sibling, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-05 11:18 UTC (permalink / raw)
  To: ALT Linux Team development discussions

[-- Attachment #1: Type: text/plain, Size: 408 bytes --]

On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> The answer is: no.
> The reason is: compilcated.
> The explanation is:
>   Don't try to improve my code.

Your pride makes you blind: you cannot grasp the idea that your code
has a room for optimization :(.  That weakens all your reasoning.
Note that somebody has to support your code when you are missing for
months.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05 11:18   ` Dmitry V. Levin
@ 2010-12-05 12:39     ` Alexey Tourbin
  0 siblings, 0 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05 12:39 UTC (permalink / raw)
  To: ALT Linux Team development discussions

On Sun, Dec 05, 2010 at 02:18:10PM +0300, Dmitry V. Levin wrote:
> On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> > The answer is: no.
> > The reason is: compilcated.
> > The explanation is:
> >   Don't try to improve my code.
> 
> Your pride makes you blind: you cannot grasp the idea that your code
> has a room for optimization :(.  That weakens all your reasoning.

Indeed, as for set.c, I exercise djb-like approach: either show me
a major problem, or, basically, no way.  However, the code is released
under GPL and LGPL - you can make changes on your own (and I can revert
them later).

> Note that somebody has to support your code when you are missing for
> months.

I am now tempted to say that there is no need to support my code.
And I'm not missing for months - rather, I can't speak in a low mood.

> -- 
> ldv


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05  1:24 ` Alexey Tourbin
  2010-12-05 11:18   ` Dmitry V. Levin
@ 2010-12-05 12:39   ` Michael Shigorin
  2010-12-05 12:58     ` Alexey Tourbin
  1 sibling, 1 reply; 29+ messages in thread
From: Michael Shigorin @ 2010-12-05 12:39 UTC (permalink / raw)
  To: ALT Linux Team development discussions

On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> The explanation is:
>   Don't try to improve my code.

Дык тормозит же ж.  И на сборочнице, и на localhost.
Приходится дольше сидеть в коридоре у розетки,
чтоб собрать тестовую исошку.

-- 
 ---- WBR, Michael Shigorin <mike@altlinux.ru>
  ------ Linux.Kiev http://www.linux.kiev.ua/


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05  0:21                         ` Alexey Tourbin
@ 2010-12-05 12:49                           ` Michael Shigorin
  2010-12-07 17:50                           ` Dmitry V. Levin
  1 sibling, 0 replies; 29+ messages in thread
From: Michael Shigorin @ 2010-12-05 12:49 UTC (permalink / raw)
  To: ALT Devel discussion list

On Sun, Dec 05, 2010 at 03:21:15AM +0300, Alexey Tourbin wrote:
> I think the concept of bitv[] is crucial for understanding the code

README.set?

-- 
 ---- WBR, Michael Shigorin <mike@altlinux.ru>
  ------ Linux.Kiev http://www.linux.kiev.ua/


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05 12:39   ` Michael Shigorin
@ 2010-12-05 12:58     ` Alexey Tourbin
  2010-12-05 15:27       ` Dmitry V. Levin
  0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05 12:58 UTC (permalink / raw)
  To: ALT Linux Team development discussions

On Sun, Dec 05, 2010 at 02:39:48PM +0200, Michael Shigorin wrote:
> On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> > The explanation is:
> >   Don't try to improve my code.
> 
> Дык тормозит же ж.  И на сборочнице, и на localhost.
> Приходится дольше сидеть в коридоре у розетки,
> чтоб собрать тестовую исошку.

4.0.4-alt100.3 -> 4.0.4-alt100.5 примерно в 4 раза быстрее.
Меркантильные соображения тут не нужны.  Вопрос по сути кода
остается - стоит ли переходить от 'char bitv[]' к битовой шкале.
С точки зрения прозрачности кода - не стоит.  С точки зрения
скорости исполнения - выгода заметная, но не офигительная.

К тому же Кирилл там использует целочисленное деление, а оно
выполняется не везде дёшево.  Хотя, с константой в знаменателе,
gcc скорее всего заменяет деление на сдвиг.  Но это уже детали.

> -- 
>  ---- WBR, Michael Shigorin <mike@altlinux.ru>
>   ------ Linux.Kiev http://www.linux.kiev.ua/


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05 12:58     ` Alexey Tourbin
@ 2010-12-05 15:27       ` Dmitry V. Levin
  0 siblings, 0 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-05 15:27 UTC (permalink / raw)
  To: ALT Linux Team development discussions

[-- Attachment #1: Type: text/plain, Size: 1060 bytes --]

On Sun, Dec 05, 2010 at 03:58:49PM +0300, Alexey Tourbin wrote:
> On Sun, Dec 05, 2010 at 02:39:48PM +0200, Michael Shigorin wrote:
> > On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> > > The explanation is:
> > >   Don't try to improve my code.
> > 
> > Дык тормозит же ж.  И на сборочнице, и на localhost.
> > Приходится дольше сидеть в коридоре у розетки,
> > чтоб собрать тестовую исошку.
> 
> 4.0.4-alt100.3 -> 4.0.4-alt100.5 примерно в 4 раза быстрее.
> Меркантильные соображения тут не нужны.

Да нет же, по сравнению с 5.1 пользователь видит не ускорение в 4 раза, а
замедление в 4 раза, и по мере пересборки Сизифа с set-versioned deps это
замедление будет ещё заметнее.  Например, во время тестовой пересборки
это замедление выливается в несколько часов машинного времени.
Не надо делать вид, что проблемы нет.  Вот только одной лишь
оптимизацией lib/set.c эту проблему не решить.

> Вопрос по сути кода
> остается - стоит ли переходить от 'char bitv[]' к битовой шкале.

А вот это уже детали.


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
  2010-12-05  0:21                         ` Alexey Tourbin
  2010-12-05 12:49                           ` Michael Shigorin
@ 2010-12-07 17:50                           ` Dmitry V. Levin
  1 sibling, 0 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-07 17:50 UTC (permalink / raw)
  To: ALT Devel discussion list

[-- Attachment #1: Type: text/plain, Size: 513 bytes --]

On Sun, Dec 05, 2010 at 03:21:15AM +0300, Alexey Tourbin wrote:
> There are other reasons, such as keeping the code comprehensible (at the
> expense of some runtime penalties).  I think the concept of bitv[] is
> crucial for understanding the code, and it helps to keep the code clean
> and self-evident.  Replacing bitv[] with bitmap might be a major trade-off
> for the sake of performance.  Or might not.

Storing one byte per bit is crucial for understanding the code?
What a nonsense!


-- 
ldv

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2010-12-07 17:50 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 2/3] set.c: optimize putbits() Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 3/3] set.c: optimize decode_golomb() Kirill A. Shutsemov
2010-11-26  8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
2010-11-30  0:35   ` Dmitry V. Levin
2010-12-02 21:48     ` Dmitry V. Levin
2010-12-04 13:02     ` Alexey Tourbin
2010-12-04 16:30       ` Dmitry V. Levin
2010-12-04 16:55         ` Alexey Tourbin
2010-12-04 15:06     ` Alexey Tourbin
2010-12-04 16:29       ` Dmitry V. Levin
2010-12-04 16:42         ` Alexey Tourbin
2010-12-04 16:52           ` Dmitry V. Levin
2010-12-04 17:05             ` Alexey Tourbin
2010-12-04 17:52               ` Dmitry V. Levin
2010-12-04 21:28                 ` Alexey Tourbin
2010-12-04 23:26                   ` Dmitry V. Levin
2010-12-04 23:41                     ` Alexey Tourbin
2010-12-05  0:03                       ` Dmitry V. Levin
2010-12-05  0:21                         ` Alexey Tourbin
2010-12-05 12:49                           ` Michael Shigorin
2010-12-07 17:50                           ` Dmitry V. Levin
2010-12-05  1:24 ` Alexey Tourbin
2010-12-05 11:18   ` Dmitry V. Levin
2010-12-05 12:39     ` Alexey Tourbin
2010-12-05 12:39   ` Michael Shigorin
2010-12-05 12:58     ` Alexey Tourbin
2010-12-05 15:27       ` Dmitry V. Levin

ALT Linux Team development discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://lore.altlinux.org/devel/0 devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 devel devel/ http://lore.altlinux.org/devel \
		devel@altlinux.org devel@altlinux.ru devel@lists.altlinux.org devel@lists.altlinux.ru devel@linux.iplabs.ru mandrake-russian@linuxteam.iplabs.ru sisyphus@linuxteam.iplabs.ru
	public-inbox-index devel

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://lore.altlinux.org/org.altlinux.lists.devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git