* [devel] [PATCH 0/3] optimize rpmsetcmp()
@ 2010-11-25 22:04 Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
` (4 more replies)
0 siblings, 5 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
To: Dmitry V. Levin, Alexey Tourbin
Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov
From: Kirill A. Shutemov <kirill@shutemov.name>
Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
rpm-4.0.4-alt100.4:
Performance counter stats for 'apt-cache unmet' (10 runs):
3396.569863 task-clock-msecs # 1.000 CPUs ( +- 0.018% )
12 context-switches # 0.000 M/sec ( +- 9.296% )
5 CPU-migrations # 0.000 M/sec ( +- 21.414% )
9680 page-faults # 0.003 M/sec ( +- 0.002% )
8525321008 cycles # 2509.980 M/sec ( +- 0.016% ) (scaled from 33.34%)
7937229883 instructions # 0.931 IPC ( +- 0.041% ) (scaled from 50.00%)
1468168069 branches # 432.250 M/sec ( +- 0.014% ) (scaled from 49.99%)
257179182 branch-misses # 17.517 % ( +- 0.047% ) (scaled from 50.01%)
26275740 cache-references # 7.736 M/sec ( +- 0.114% ) (scaled from 33.35%)
350852 cache-misses # 0.103 M/sec ( +- 1.127% ) (scaled from 33.35%)
3.398038183 seconds time elapsed ( +- 0.018% )
rpm-4.0.4-alt100.4 + patchset:
Performance counter stats for 'apt-cache unmet' (10 runs):
2010.112427 task-clock-msecs # 1.000 CPUs ( +- 0.038% )
8 context-switches # 0.000 M/sec ( +- 17.232% )
4 CPU-migrations # 0.000 M/sec ( +- 21.237% )
9675 page-faults # 0.005 M/sec ( +- 0.005% )
5043579686 cycles # 2509.103 M/sec ( +- 0.041% ) (scaled from 33.32%)
5567840605 instructions # 1.104 IPC ( +- 0.021% ) (scaled from 50.00%)
1028369972 branches # 511.598 M/sec ( +- 0.016% ) (scaled from 50.00%)
94986026 branch-misses # 9.237 % ( +- 0.080% ) (scaled from 50.03%)
16227132 cache-references # 8.073 M/sec ( +- 0.303% ) (scaled from 33.36%)
323117 cache-misses # 0.161 M/sec ( +- 1.241% ) (scaled from 33.34%)
2.011050788 seconds time elapsed ( +- 0.044% )
Kirill A. Shutemov (3):
set.c: use packed bitmap for bit vector
set.c: optimize putbits()
set.c: optimize decode_golomb()
lib/set.c | 186 ++++++++++++++++++++++++++++++++++++++----------------------
1 files changed, 118 insertions(+), 68 deletions(-)
--
1.7.3.2
^ permalink raw reply [flat|nested] 29+ messages in thread
* [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
@ 2010-11-25 22:04 ` Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 2/3] set.c: optimize putbits() Kirill A. Shutsemov
` (3 subsequent siblings)
4 siblings, 0 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
To: Dmitry V. Levin, Alexey Tourbin
Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov
From: Kirill A. Shutemov <kirill@shutemov.name>
Currently, set.c uses array of chars to store bit vector. Each char
stores one bit: 0 or 1.
Let's use packed bitmap instead. It creates room for optimizations.
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
---
lib/set.c | 152 ++++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 95 insertions(+), 57 deletions(-)
diff --git a/lib/set.c b/lib/set.c
index 68bcc8c..4b91dab 100644
--- a/lib/set.c
+++ b/lib/set.c
@@ -16,6 +16,24 @@
#include <stdlib.h>
#include <assert.h>
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BIT(nr) (1UL << (nr))
+
+static inline
+int get_bit(const unsigned long *bitmap, int offset)
+{
+ return !!(bitmap[offset / BITS_PER_LONG] & BIT(offset % BITS_PER_LONG));
+}
+
+static inline
+void set_bit(unsigned long *bitmap, int offset)
+{
+ bitmap[offset / BITS_PER_LONG] |= BIT(offset % BITS_PER_LONG);
+}
+
/*
* Base62 routines - encode bits with alnum characters.
*
@@ -55,19 +73,20 @@ void put_digit(char **base62, int c)
(*base62)++;
}
-/* Main base62 encoding routine: pack bitv into base62 string. */
+/* Main base62 encoding routine: pack bitmap into base62 string. */
static
-int encode_base62(int bitc, const char *bitv, char *base62)
+int encode_base62(char *base62, const unsigned long *bitmap, size_t nbits)
{
char *base62_start = base62;
int bits2 = 0; /* number of high bits set */
int bits6 = 0; /* number of regular bits set */
int num6b = 0; /* pending 6-bit number */
+ unsigned int i;
- while (bitc-- > 0) {
+ for(i = 0; i < nbits; i++) {
assert(bits6 + bits2 < 6);
- num6b |= (*bitv++ << bits6++);
+ num6b |= (get_bit(bitmap, i) << bits6++);
if (bits6 + bits2 != 6)
continue;
@@ -138,21 +157,20 @@ int char_to_num(int c)
}
static inline
-void putbits(char **bitv, int n, int c)
+void putbits(unsigned long *bitmap, int *offset, int c, int nbits)
{
int i;
- for(i = 0; i < n; i++) {
- **bitv = (c >> i) & 1;
- (*bitv)++;
- }
+ for (i = 0; i < nbits; i++, (*offset)++)
+ if (c & BIT(i))
+ set_bit(bitmap, *offset);
}
-/* Main base62 decoding routine: unpack base62 string into bitv. */
+/* Main base62 decoding routine: unpack base62 string into bitmap. */
static
-int decode_base62(const char *base62, char *bitv)
+int decode_base62(unsigned long *bitmap, const char *base62)
{
- char *bitv_start = bitv;
+ int offset = 0;
int c;
while ((c = *base62++)) {
@@ -162,7 +180,7 @@ int decode_base62(const char *base62, char *bitv)
return -1;
if (num6b != 61) {
- putbits(&bitv, 6, num6b);
+ putbits(bitmap, &offset, num6b, 6);
continue;
}
@@ -177,27 +195,37 @@ int decode_base62(const char *base62, char *bitv)
switch (num6b & (16 + 32)) {
case 0:
- putbits(&bitv, 6, 61);
+ putbits(bitmap, &offset, 61, 6);
break;
case 16:
- putbits(&bitv, 6, 62);
+ putbits(bitmap, &offset, 62, 6);
break;
case 32:
- putbits(&bitv, 6, 63);
+ putbits(bitmap, &offset, 63, 6);
break;
default:
return -4;
break;
}
- putbits(&bitv, 4, num6b);
+ putbits(bitmap, &offset, num6b, 4);
}
- return bitv - bitv_start;
+ return offset;
}
#ifdef SELF_TEST
static
+void bitv_to_bitmap(unsigned long *bitmap, const char *bitv, size_t n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ if (bitv[i])
+ set_bit(bitmap, i);
+}
+
+static
void test_base62(void)
{
const char rnd_bitv[] = {
@@ -208,15 +236,20 @@ void test_base62(void)
/* trigger some 'Z' */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
- const int rnd_bitc = sizeof(rnd_bitv);
- char *base62, *bitv;
- int i, len, bitc;
+ const unsigned int rnd_bitc = sizeof(rnd_bitv);
+ unsigned long bitmap[BITS_TO_LONGS(rnd_bitc)];
+ unsigned long *new_bitmap;
+ char *base62;
+ unsigned int i, len, bitc;
+
+ memset(&bitmap, 0, sizeof(bitmap));
+ bitv_to_bitmap(bitmap, rnd_bitv, rnd_bitc);
/* encode */
base62 = alloca(encode_base62_size(rnd_bitc));
- len = encode_base62(rnd_bitc, rnd_bitv, base62);
+ len = encode_base62(base62, bitmap, rnd_bitc);
assert(len > 0);
- assert(len == (int) strlen(base62));
+ assert(len == strlen(base62));
fprintf(stderr, "len=%d base62=%s\n", len, base62);
/* The length cannot be shorter than 6 bits per symbol. */
@@ -226,17 +259,17 @@ void test_base62(void)
assert(len <= rnd_bitc / 2 / 4 + rnd_bitc / 2 / 6 + 1);
/* decode */
- bitv = alloca(decode_base62_size(base62));
- bitc = decode_base62(base62, bitv);
+ bitc = decode_base62_size(base62);
+ new_bitmap = alloca(BITS_TO_LONGS(bitc) * sizeof(long));
+ memset(new_bitmap, 0, BITS_TO_LONGS(bitc) * sizeof(long));
+ bitc = decode_base62(new_bitmap, base62);
fprintf(stderr, "rnd_bitc=%d bitc=%d\n", rnd_bitc, bitc);
assert(bitc >= rnd_bitc);
/* Decoded bits must match. */
- for (i = 0; i < rnd_bitc; i++)
- assert(rnd_bitv[i] == bitv[i]);
- /* The remaining bits must be zero bits. */
- for (i = rnd_bitc; i < bitc; i++)
- assert(bitv[i] == 0);
+ for (i = 0; i < sizeof(bitmap) / sizeof(*bitmap); i++)
+ assert(bitmap[i] == new_bitmap[i]);
+
fprintf(stderr, "%s: base62 test OK\n", __FILE__);
}
#endif
@@ -306,32 +339,32 @@ int encode_golomb_size(int c, int Mshift)
return (Mshift << 1) * c + 16;
}
-/* Main golomb encoding routine: package integers into bits. */
+/* Main golomb encoding routine: package integers into bitmap. */
static
-int encode_golomb(int c, const unsigned *v, int Mshift, char *bitv)
+int encode_golomb(int c, const unsigned *v, int Mshift, unsigned long *bitmap)
{
- char *bitv_start = bitv;
+ int offset = 0;
const unsigned mask = (1 << Mshift) - 1;
while (c-- > 0) {
- unsigned v0, q, r;
+ unsigned v0, r;
int i;
v0 = *v++;
/* first part: variable-length sequence */
- q = v0 >> Mshift;
- for (i = 0; i < (int) q; i++)
- *bitv++ = 0;
- *bitv++ = 1;
+ offset += v0 >> Mshift;
+ set_bit(bitmap, offset++);
/* second part: lower Mshift bits */
r = v0 & mask;
- for (i = 0; i < Mshift; i++)
- *bitv++ = (r >> i) & 1;
+ for (i = 0; i < Mshift; i++, offset++) {
+ if (r & BIT(i))
+ set_bit(bitmap, offset);
+ }
}
- return bitv - bitv_start;
+ return offset;
}
/* Estimate how many values will emerge. */
@@ -345,11 +378,12 @@ int decode_golomb_size(int bitc, int Mshift)
return bitc / (Mshift + 1);
}
-/* Main golomb decoding routine: unpackage bits into values. */
+/* Main golomb decoding routine: unpackage bitmap into values. */
static
-int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
+int decode_golomb(int bitc, const unsigned long *bitmap, int Mshift, unsigned *v)
{
unsigned *v_start = v;
+ int offset = 0;
/* next value */
while (bitc > 0) {
@@ -362,7 +396,7 @@ int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
bit = 0;
while (bitc > 0) {
bitc--;
- bit = *bitv++;
+ bit = get_bit(bitmap, offset++);
if (bit == 0)
q++;
else
@@ -381,8 +415,8 @@ int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
r = 0;
for (i = 0; i < Mshift; i++) {
bitc--;
- if (*bitv++)
- r |= (1 << i);
+ if (get_bit(bitmap, offset++))
+ r |= BIT(i);
}
/* the value */
@@ -401,7 +435,7 @@ void test_golomb(void)
7, 6, 5, 4, 3, 2, 1,
};
const int rnd_c = sizeof(rnd_v) / sizeof(*rnd_v);
- char *bitv;
+ unsigned long *bitmap;
unsigned *v;
int bpp, Mshift;
int alloc_bitc, bitc;
@@ -417,8 +451,9 @@ void test_golomb(void)
/* encode */
alloc_bitc = encode_golomb_size(rnd_c, Mshift);
assert(alloc_bitc > rnd_c);
- bitv = alloca(alloc_bitc);
- bitc = encode_golomb(rnd_c, rnd_v, Mshift, bitv);
+ bitmap = alloca(BITS_TO_LONGS(alloc_bitc) * sizeof(long));
+ memset(bitmap, 0, BITS_TO_LONGS(alloc_bitc) * sizeof(long));
+ bitc = encode_golomb(rnd_c, rnd_v, Mshift, bitmap);
fprintf(stderr, "alloc_bitc=%d bitc=%d\n", alloc_bitc, bitc);
assert(bitc > rnd_c);
assert(bitc <= alloc_bitc);
@@ -427,7 +462,7 @@ void test_golomb(void)
alloc_c = decode_golomb_size(bitc, Mshift);
assert(alloc_c >= rnd_c);
v = alloca(sizeof(*v) * alloc_c);
- c = decode_golomb(bitc, bitv, Mshift, v);
+ c = decode_golomb(bitc, bitmap, Mshift, v);
fprintf(stderr, "rnd_c=%d alloc_c=%d c=%d\n", rnd_c, alloc_c, c);
assert(alloc_c >= c);
@@ -597,8 +632,9 @@ int encode_set(int c, unsigned *v, int bpp, char *base62)
int Mshift = encode_golomb_Mshift(c, bpp);
int bitc = encode_golomb_size(c, Mshift);
int len;
- char bitv[bitc];
+ unsigned long bitmap[BITS_TO_LONGS(bitc)];
+ memset(bitmap, 0, sizeof(bitmap));
/* bpp */
if (bpp < 10 || bpp > 32)
return -1;
@@ -613,7 +649,7 @@ int encode_set(int c, unsigned *v, int bpp, char *base62)
encode_delta(c, v);
/* golomb */
- bitc = encode_golomb(c, v, Mshift, bitv);
+ bitc = encode_golomb(c, v, Mshift, bitmap);
#ifdef SELF_TEST
decode_delta(c, v);
@@ -623,7 +659,7 @@ int encode_set(int c, unsigned *v, int bpp, char *base62)
return -3;
/* base62 */
- len = encode_base62(bitc, bitv, base62);
+ len = encode_base62(base62, bitmap, bitc);
if (len < 0)
return -4;
@@ -671,21 +707,23 @@ int decode_set_size(const char *str, int Mshift)
static
int decode_set(const char *str, int Mshift, unsigned *v)
{
- char *bitv;
+ unsigned long *bitmap;
int bitc;
int c;
str += 2;
/* base62 */
- bitv = alloca(decode_base62_size(str));
- bitc = decode_base62(str, bitv);
+ bitc = decode_base62_size(str);
+ bitmap = alloca(BITS_TO_LONGS(bitc) * sizeof(long));
+ memset(bitmap, 0, BITS_TO_LONGS(bitc) * sizeof(long));
+ bitc = decode_base62(bitmap, str);
if (bitc < 0)
return -1;
/* golomb */
- c = decode_golomb(bitc, bitv, Mshift, v);
+ c = decode_golomb(bitc, bitmap, Mshift, v);
if (c < 0)
return -2;
--
1.7.3.2
^ permalink raw reply [flat|nested] 29+ messages in thread
* [devel] [PATCH 2/3] set.c: optimize putbits()
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
@ 2010-11-25 22:04 ` Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 3/3] set.c: optimize decode_golomb() Kirill A. Shutsemov
` (2 subsequent siblings)
4 siblings, 0 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
To: Dmitry V. Levin, Alexey Tourbin
Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov
From: Kirill A. Shutemov <kirill@shutemov.name>
Use bit operations instead of cycles.
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
---
lib/set.c | 22 ++++++++++++++++------
1 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/lib/set.c b/lib/set.c
index 4b91dab..02a6c31 100644
--- a/lib/set.c
+++ b/lib/set.c
@@ -21,6 +21,7 @@
#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
#define BIT(nr) (1UL << (nr))
+#define MASK(nbits) (BIT(nbits) - 1)
static inline
int get_bit(const unsigned long *bitmap, int offset)
@@ -157,13 +158,22 @@ int char_to_num(int c)
}
static inline
-void putbits(unsigned long *bitmap, int *offset, int c, int nbits)
+void putbits(unsigned long *bitmap, int *offset, unsigned long c, int nbits)
{
- int i;
+ int quot, rem;
+
+ assert(!(c & ~MASK(nbits)));
+
+ quot = *offset / BITS_PER_LONG;
+ rem = *offset % BITS_PER_LONG;
+
+ bitmap[quot] |= c << rem;
+ c >>= BITS_PER_LONG - rem;
+
+ if (nbits + rem > (int) BITS_PER_LONG)
+ bitmap[quot + 1] = c;
- for (i = 0; i < nbits; i++, (*offset)++)
- if (c & BIT(i))
- set_bit(bitmap, *offset);
+ *offset += nbits;
}
/* Main base62 decoding routine: unpack base62 string into bitmap. */
@@ -208,7 +218,7 @@ int decode_base62(unsigned long *bitmap, const char *base62)
break;
}
- putbits(bitmap, &offset, num6b, 4);
+ putbits(bitmap, &offset, num6b & MASK(4), 4);
}
return offset;
--
1.7.3.2
^ permalink raw reply [flat|nested] 29+ messages in thread
* [devel] [PATCH 3/3] set.c: optimize decode_golomb()
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 2/3] set.c: optimize putbits() Kirill A. Shutsemov
@ 2010-11-25 22:04 ` Kirill A. Shutsemov
2010-11-26 8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
2010-12-05 1:24 ` Alexey Tourbin
4 siblings, 0 replies; 29+ messages in thread
From: Kirill A. Shutsemov @ 2010-11-25 22:04 UTC (permalink / raw)
To: Dmitry V. Levin, Alexey Tourbin
Cc: Alexey I. Froloff, devel, Alexey Gladkov, Kirill A. Shutemov
From: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
---
lib/set.c | 26 ++++++++++++++------------
1 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/lib/set.c b/lib/set.c
index 02a6c31..8a425b7 100644
--- a/lib/set.c
+++ b/lib/set.c
@@ -396,16 +396,16 @@ int decode_golomb(int bitc, const unsigned long *bitmap, int Mshift, unsigned *v
int offset = 0;
/* next value */
- while (bitc > 0) {
+ while (offset < bitc) {
unsigned q, r;
char bit;
- int i;
+ int quot, rem;
+ int k;
/* first part */
q = 0;
bit = 0;
- while (bitc > 0) {
- bitc--;
+ while (offset < bitc) {
bit = get_bit(bitmap, offset++);
if (bit == 0)
q++;
@@ -414,20 +414,22 @@ int decode_golomb(int bitc, const unsigned long *bitmap, int Mshift, unsigned *v
}
/* trailing zero bits in the input are okay */
- if (bitc == 0 && bit == 0)
+ if (offset == bitc && bit == 0)
break;
/* otherwise, incomplete value is not okay */
- if (bitc < Mshift)
+ if (bitc - offset < Mshift)
return -1;
+ quot = offset / BITS_PER_LONG;
+ rem = offset % BITS_PER_LONG;
+ k = rem + Mshift - BITS_PER_LONG;
+
/* second part */
- r = 0;
- for (i = 0; i < Mshift; i++) {
- bitc--;
- if (get_bit(bitmap, offset++))
- r |= BIT(i);
- }
+ r = (bitmap[quot] & (MASK(Mshift) << rem)) >> rem;
+ if (k > 0)
+ r |= (bitmap[quot + 1] & MASK(k)) << (Mshift - k);
+ offset += Mshift;
/* the value */
*v++ = (q << Mshift) | r;
--
1.7.3.2
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
` (2 preceding siblings ...)
2010-11-25 22:04 ` [devel] [PATCH 3/3] set.c: optimize decode_golomb() Kirill A. Shutsemov
@ 2010-11-26 8:35 ` Kirill A. Shutemov
2010-11-30 0:35 ` Dmitry V. Levin
2010-12-05 1:24 ` Alexey Tourbin
4 siblings, 1 reply; 29+ messages in thread
From: Kirill A. Shutemov @ 2010-11-26 8:35 UTC (permalink / raw)
To: Dmitry V. Levin, Alexey Tourbin; +Cc: Alexey I. Froloff, devel, Alexey Gladkov
On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> From: Kirill A. Shutemov <kirill@shutemov.name>
>
> Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
apt-shell < /dev/null
rpm-4.0.4-alt100.4:
Performance counter stats for 'apt-shell' (5 runs):
7431.705536 task-clock-msecs # 1.000 CPUs ( +- 0.037% )
39 context-switches # 0.000 M/sec ( +- 14.774% )
13 CPU-migrations # 0.000 M/sec ( +- 27.252% )
12135 page-faults # 0.002 M/sec ( +- 0.003% )
18654610617 cycles # 2510.139 M/sec ( +- 0.034% ) (scaled from 33.34%)
17117123518 instructions # 0.918 IPC ( +- 0.014% ) (scaled from 50.00%)
3151838786 branches # 424.107 M/sec ( +- 0.039% ) (scaled from 50.00%)
558860303 branch-misses # 17.731 % ( +- 0.063% ) (scaled from 50.00%)
52927655 cache-references # 7.122 M/sec ( +- 0.316% ) (scaled from 33.34%)
930111 cache-misses # 0.125 M/sec ( +- 1.324% ) (scaled from 33.34%)
7.434342478 seconds time elapsed ( +- 0.049% )
rpm-4.0.4-alt100.4 + patches:
Performance counter stats for 'apt-shell' (5 runs):
4364.794613 task-clock-msecs # 1.000 CPUs ( +- 0.066% )
33 context-switches # 0.000 M/sec ( +- 20.124% )
12 CPU-migrations # 0.000 M/sec ( +- 6.780% )
11681 page-faults # 0.003 M/sec ( +- 0.005% )
10954135791 cycles # 2509.657 M/sec ( +- 0.063% ) (scaled from 33.32%)
11905100223 instructions # 1.087 IPC ( +- 0.020% ) (scaled from 49.99%)
2181001479 branches # 499.680 M/sec ( +- 0.035% ) (scaled from 49.99%)
200182757 branch-misses # 9.178 % ( +- 0.104% ) (scaled from 50.01%)
31301034 cache-references # 7.171 M/sec ( +- 0.377% ) (scaled from 33.36%)
940816 cache-misses # 0.216 M/sec ( +- 2.534% ) (scaled from 33.34%)
4.366582986 seconds time elapsed ( +- 0.067% )
Around 41% of speed up.
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-11-26 8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
@ 2010-11-30 0:35 ` Dmitry V. Levin
2010-12-02 21:48 ` Dmitry V. Levin
` (2 more replies)
0 siblings, 3 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-11-30 0:35 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 1817 bytes --]
On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> >
> > Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
>
> apt-shell < /dev/null
[...]
> Around 41% of speed up.
On AMD Opteron Processor 275:
without patches:
2.98user 0.19system 0:03.18elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+10142minor)pagefaults 0swaps
with patches:
2.20user 0.21system 0:02.42elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9978minor)pagefaults 0swaps
speedup is around 24%
On AMD Opteron Processor 2216:
without patches:
2.42user 0.02system 0:02.44elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9784minor)pagefaults 0swaps
with patches:
1.86user 0.02system 0:01.88elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9601minor)pagefaults 0swaps
speedup is around 23%
On Intel Xeon 5110
without patches:
2.98user 0.04system 0:03.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9789minor)pagefaults 0swaps
with patches:
2.54user 0.01system 0:02.55elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9608minor)pagefaults 0swaps
speedup is around 16%
On Intel Xeon E5520
without patches:
2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9730minor)pagefaults 0swaps
with patches:
1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9565minor)pagefaults 0swaps
speedup is around 12%
Not so amazing as on your test platform, but awesome anyway.
I'm going to apply these patches.
If anybody has objections, please speak up now.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-11-30 0:35 ` Dmitry V. Levin
@ 2010-12-02 21:48 ` Dmitry V. Levin
2010-12-04 13:02 ` Alexey Tourbin
2010-12-04 15:06 ` Alexey Tourbin
2 siblings, 0 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-02 21:48 UTC (permalink / raw)
To: ALT Devel discussion list; +Cc: Alexey M. Tourbin
[-- Attachment #1: Type: text/plain, Size: 2143 bytes --]
On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> > >
> > > Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
> >
> > apt-shell < /dev/null
> [...]
> > Around 41% of speed up.
>
> On AMD Opteron Processor 275:
> without patches:
> 2.98user 0.19system 0:03.18elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+10142minor)pagefaults 0swaps
> with patches:
> 2.20user 0.21system 0:02.42elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9978minor)pagefaults 0swaps
> speedup is around 24%
>
> On AMD Opteron Processor 2216:
> without patches:
> 2.42user 0.02system 0:02.44elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9784minor)pagefaults 0swaps
> with patches:
> 1.86user 0.02system 0:01.88elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9601minor)pagefaults 0swaps
> speedup is around 23%
>
> On Intel Xeon 5110
> without patches:
> 2.98user 0.04system 0:03.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9789minor)pagefaults 0swaps
> with patches:
> 2.54user 0.01system 0:02.55elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9608minor)pagefaults 0swaps
> speedup is around 16%
>
> On Intel Xeon E5520
> without patches:
> 2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9730minor)pagefaults 0swaps
> with patches:
> 1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9565minor)pagefaults 0swaps
> speedup is around 12%
>
> Not so amazing as on your test platform, but awesome anyway.
> I'm going to apply these patches.
> If anybody has objections, please speak up now.
These patches were made on top of your cleanup patches. Looks like
I'll have to override unreasoned NAK from Alexey Tourbin and apply
these cleanups as well.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-11-30 0:35 ` Dmitry V. Levin
2010-12-02 21:48 ` Dmitry V. Levin
@ 2010-12-04 13:02 ` Alexey Tourbin
2010-12-04 16:30 ` Dmitry V. Levin
2010-12-04 15:06 ` Alexey Tourbin
2 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 13:02 UTC (permalink / raw)
To: ALT Devel discussion list
On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> On Intel Xeon E5520
> without patches:
> 2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9730minor)pagefaults 0swaps
> with patches:
> 1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+9565minor)pagefaults 0swaps
> speedup is around 12%
>
> Not so amazing as on your test platform, but awesome anyway.
> I'm going to apply these patches.
> If anybody has objections, please speak up now.
I think I've made more amazing a change.
http://git.altlinux.org/gears/r/rpm.git?a=commitdiff;h=fc5b5a5f
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-11-30 0:35 ` Dmitry V. Levin
2010-12-02 21:48 ` Dmitry V. Levin
2010-12-04 13:02 ` Alexey Tourbin
@ 2010-12-04 15:06 ` Alexey Tourbin
2010-12-04 16:29 ` Dmitry V. Levin
2 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 15:06 UTC (permalink / raw)
To: ALT Devel discussion list
On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > apt-shell < /dev/null
> Not so amazing as on your test platform, but awesome anyway.
> I'm going to apply these patches.
> If anybody has objections, please speak up now.
The objection is, why do you think the problem is with here, not with apt?
Go yonder and optimize apt!
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 15:06 ` Alexey Tourbin
@ 2010-12-04 16:29 ` Dmitry V. Levin
2010-12-04 16:42 ` Alexey Tourbin
0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 16:29 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 660 bytes --]
On Sat, Dec 04, 2010 at 06:06:18PM +0300, Alexey Tourbin wrote:
> On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > > apt-shell < /dev/null
>
> > Not so amazing as on your test platform, but awesome anyway.
> > I'm going to apply these patches.
> > If anybody has objections, please speak up now.
>
> The objection is, why do you think the problem is with here, not with apt?
> Go yonder and optimize apt!
It was rpm that was changed to work 10x times slower, not apt.
But apt certainly could be made better, so feel free to optimize it.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 13:02 ` Alexey Tourbin
@ 2010-12-04 16:30 ` Dmitry V. Levin
2010-12-04 16:55 ` Alexey Tourbin
0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 16:30 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 848 bytes --]
On Sat, Dec 04, 2010 at 04:02:17PM +0300, Alexey Tourbin wrote:
> On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > On Intel Xeon E5520
> > without patches:
> > 2.02user 0.00system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> > 0inputs+0outputs (0major+9730minor)pagefaults 0swaps
> > with patches:
> > 1.77user 0.00system 0:01.77elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
> > 0inputs+0outputs (0major+9565minor)pagefaults 0swaps
> > speedup is around 12%
> >
> > Not so amazing as on your test platform, but awesome anyway.
> > I'm going to apply these patches.
> > If anybody has objections, please speak up now.
>
> I think I've made more amazing a change.
> http://git.altlinux.org/gears/r/rpm.git?a=commitdiff;h=fc5b5a5f
Have you tried to combine both optimizations?
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 16:29 ` Dmitry V. Levin
@ 2010-12-04 16:42 ` Alexey Tourbin
2010-12-04 16:52 ` Dmitry V. Levin
0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 16:42 UTC (permalink / raw)
To: ALT Devel discussion list
On Sat, Dec 04, 2010 at 07:29:11PM +0300, Dmitry V. Levin wrote:
> On Sat, Dec 04, 2010 at 06:06:18PM +0300, Alexey Tourbin wrote:
> > On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > > On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > > > apt-shell < /dev/null
> >
> > > Not so amazing as on your test platform, but awesome anyway.
> > > I'm going to apply these patches.
> > > If anybody has objections, please speak up now.
> >
> > The objection is, why do you think the problem is with here, not with apt?
> > Go yonder and optimize apt!
>
> It was rpm that was changed to work 10x times slower, not apt.
> But apt certainly could be made better, so feel free to optimize it.
RPM goes reasonably fast, and it's not exactly "slower".
Something that might be called unreasonable is behind the RPM.
I can't see why rpm is now 10x times slower.
Slower compared to... what?
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 16:42 ` Alexey Tourbin
@ 2010-12-04 16:52 ` Dmitry V. Levin
2010-12-04 17:05 ` Alexey Tourbin
0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 16:52 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 1351 bytes --]
On Sat, Dec 04, 2010 at 07:42:40PM +0300, Alexey Tourbin wrote:
> On Sat, Dec 04, 2010 at 07:29:11PM +0300, Dmitry V. Levin wrote:
> > On Sat, Dec 04, 2010 at 06:06:18PM +0300, Alexey Tourbin wrote:
> > > On Tue, Nov 30, 2010 at 03:35:42AM +0300, Dmitry V. Levin wrote:
> > > > On Fri, Nov 26, 2010 at 10:35:37AM +0200, Kirill A. Shutemov wrote:
> > > > > apt-shell < /dev/null
> > >
> > > > Not so amazing as on your test platform, but awesome anyway.
> > > > I'm going to apply these patches.
> > > > If anybody has objections, please speak up now.
> > >
> > > The objection is, why do you think the problem is with here, not with apt?
> > > Go yonder and optimize apt!
> >
> > It was rpm that was changed to work 10x times slower, not apt.
> > But apt certainly could be made better, so feel free to optimize it.
>
> RPM goes reasonably fast, and it's not exactly "slower".
> Something that might be called unreasonable is behind the RPM.
>
> I can't see why rpm is now 10x times slower.
> Slower compared to... what?
You cannot dismiss arguments so freely. The abovementioned command
"apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
No matter how unreasonable apt is, it was rpm update that slowed it down.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 16:30 ` Dmitry V. Levin
@ 2010-12-04 16:55 ` Alexey Tourbin
0 siblings, 0 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 16:55 UTC (permalink / raw)
To: ALT Devel discussion list
On Sat, Dec 04, 2010 at 07:30:49PM +0300, Dmitry V. Levin wrote:
> > I think I've made more amazing a change.
> > http://git.altlinux.org/gears/r/rpm.git?a=commitdiff;h=fc5b5a5f
>
> Have you tried to combine both optimizations?
What's the criteria of doing good enough?
I always thought that 'char bitv[]' is good enough.
Let's call it a reference implementation.
Kirill argued that changing bitv to bitmap could actually win both
some space and some time, as there's a possibility to handle 6 bits
at a time, and even Mshift bits at a time.
The problem is that, well, *bitv++ is replaced with something else.
The code will be no longer a reference implementation.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 16:52 ` Dmitry V. Levin
@ 2010-12-04 17:05 ` Alexey Tourbin
2010-12-04 17:52 ` Dmitry V. Levin
0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 17:05 UTC (permalink / raw)
To: ALT Devel discussion list
On Sat, Dec 04, 2010 at 07:52:44PM +0300, Dmitry V. Levin wrote:
> > RPM goes reasonably fast, and it's not exactly "slower".
> > Something that might be called unreasonable is behind the RPM.
> >
> > I can't see why rpm is now 10x times slower.
> > Slower compared to... what?
>
> You cannot dismiss arguments so freely. The abovementioned command
> "apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
> after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
> No matter how unreasonable apt is, it was rpm update that slowed it down.
Okay, what's the exact reason everything went 10x times slower?
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 17:05 ` Alexey Tourbin
@ 2010-12-04 17:52 ` Dmitry V. Levin
2010-12-04 21:28 ` Alexey Tourbin
0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 17:52 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 806 bytes --]
On Sat, Dec 04, 2010 at 08:05:57PM +0300, Alexey Tourbin wrote:
> On Sat, Dec 04, 2010 at 07:52:44PM +0300, Dmitry V. Levin wrote:
> > > RPM goes reasonably fast, and it's not exactly "slower".
> > > Something that might be called unreasonable is behind the RPM.
> > >
> > > I can't see why rpm is now 10x times slower.
> > > Slower compared to... what?
> >
> > You cannot dismiss arguments so freely. The abovementioned command
> > "apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
> > after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
> > No matter how unreasonable apt is, it was rpm update that slowed it down.
>
> Okay, what's the exact reason everything went 10x times slower?
Huge amount of new set-versioned dependencies.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 17:52 ` Dmitry V. Levin
@ 2010-12-04 21:28 ` Alexey Tourbin
2010-12-04 23:26 ` Dmitry V. Levin
0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 21:28 UTC (permalink / raw)
To: ALT Devel discussion list
On Sat, Dec 04, 2010 at 08:52:00PM +0300, Dmitry V. Levin wrote:
> On Sat, Dec 04, 2010 at 08:05:57PM +0300, Alexey Tourbin wrote:
> > On Sat, Dec 04, 2010 at 07:52:44PM +0300, Dmitry V. Levin wrote:
> > > > RPM goes reasonably fast, and it's not exactly "slower".
> > > > Something that might be called unreasonable is behind the RPM.
> > > >
> > > > I can't see why rpm is now 10x times slower.
> > > > Slower compared to... what?
> > >
> > > You cannot dismiss arguments so freely. The abovementioned command
> > > "apt-shell < /dev/null" from apt-0.5.15lorg2-alt33 works 10x times slower
> > > after update from librpm-4.0.4-alt98.45 to librpm-4.0.4-alt98.49.
> > > No matter how unreasonable apt is, it was rpm update that slowed it down.
> >
> > Okay, what's the exact reason everything went 10x times slower?
> Huge amount of new set-versioned dependencies.
It's not exactly "huge", as it is predictably associated with the number
of shared libraries in use. Also, I remember someone wondering about
"gosh, such a long version". And he felt not that the library was
exporting too many symbols, though. You see the pattern: the library
was okay.
There is indeed a considerable amount of new set-version dependencies.
And I never promised them to be a free lunch, or as cheap as strcmp().
Special compression routines (simplified Golomb-Rice encoding/decoding)
have been crafted to make set-versions shorter. They are very cheap
(as compared to general-purpose compression routines such as found in
zlib), but they are still not free.
So the whole talk is that there's a price. I believe that the price
is very affordable (or, at least, it could've been much worse).
Now as to apt misbehavior. "You cannot dismiss arguments so freely."
What you urge to do is to cut down the price even more. But there is
clearly something wrong with apt, as it tries to resolve every single
dependency upon every startup. And, fortunately, it takes only a few
seconds!
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 21:28 ` Alexey Tourbin
@ 2010-12-04 23:26 ` Dmitry V. Levin
2010-12-04 23:41 ` Alexey Tourbin
0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-04 23:26 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 700 bytes --]
On Sun, Dec 05, 2010 at 12:28:31AM +0300, Alexey Tourbin wrote:
[...]
> What you urge to do is to cut down the price even more. But there is
> clearly something wrong with apt, as it tries to resolve every single
> dependency upon every startup. And, fortunately, it takes only a few
> seconds!
It will take twice or trice longer time when _all_ packages with ELF
files will get their set-versioned dependencies, so I expect apt-get
startup will eventually be about 10x slower again (compared to 5.1 branch).
apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
knowledge. Unfortunately, stating this fact isn't sufficient to make
apt-rpm work faster.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 23:26 ` Dmitry V. Levin
@ 2010-12-04 23:41 ` Alexey Tourbin
2010-12-05 0:03 ` Dmitry V. Levin
0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-04 23:41 UTC (permalink / raw)
To: ALT Devel discussion list
On Sun, Dec 05, 2010 at 02:26:44AM +0300, Dmitry V. Levin wrote:
> On Sun, Dec 05, 2010 at 12:28:31AM +0300, Alexey Tourbin wrote:
> [...]
> > What you urge to do is to cut down the price even more. But there is
> > clearly something wrong with apt, as it tries to resolve every single
> > dependency upon every startup. And, fortunately, it takes only a few
> > seconds!
>
> It will take twice or trice longer time when _all_ packages with ELF
> files will get their set-versioned dependencies, so I expect apt-get
> startup will eventually be about 10x slower again (compared to 5.1 branch).
>
> apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
> knowledge. Unfortunately, stating this fact isn't sufficient to make
> apt-rpm work faster.
So what do you think? There's a possibility to change bitv[] to bitmap,
per Kirill's proposal. That mighit yield about, say 30% user time cutdown.
However, note that apt is not only eager for user time. System time is
being spent with splendor as well.
Personally, I like bitv[], and I don't like something like <sys/select.h> stuff.
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-04 23:41 ` Alexey Tourbin
@ 2010-12-05 0:03 ` Dmitry V. Levin
2010-12-05 0:21 ` Alexey Tourbin
0 siblings, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-05 0:03 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 1501 bytes --]
On Sun, Dec 05, 2010 at 02:41:31AM +0300, Alexey Tourbin wrote:
> On Sun, Dec 05, 2010 at 02:26:44AM +0300, Dmitry V. Levin wrote:
> > On Sun, Dec 05, 2010 at 12:28:31AM +0300, Alexey Tourbin wrote:
> > [...]
> > > What you urge to do is to cut down the price even more. But there is
> > > clearly something wrong with apt, as it tries to resolve every single
> > > dependency upon every startup. And, fortunately, it takes only a few
> > > seconds!
> >
> > It will take twice or trice longer time when _all_ packages with ELF
> > files will get their set-versioned dependencies, so I expect apt-get
> > startup will eventually be about 10x slower again (compared to 5.1 branch).
> >
> > apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
> > knowledge. Unfortunately, stating this fact isn't sufficient to make
> > apt-rpm work faster.
>
> So what do you think? There's a possibility to change bitv[] to bitmap,
> per Kirill's proposal. That mighit yield about, say 30% user time cutdown.
30% is a bit optimistic, according to my measurements.
> However, note that apt is not only eager for user time. System time is
> being spent with splendor as well.
System time takes only about 15% of elapsed time, according to my
measurements.
> Personally, I like bitv[], and I don't like something like <sys/select.h> stuff.
When performance is the issue, there should be a good rationale to choose
the approach that works slower.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 0:03 ` Dmitry V. Levin
@ 2010-12-05 0:21 ` Alexey Tourbin
2010-12-05 12:49 ` Michael Shigorin
2010-12-07 17:50 ` Dmitry V. Levin
0 siblings, 2 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05 0:21 UTC (permalink / raw)
To: ALT Devel discussion list
On Sun, Dec 05, 2010 at 03:03:16AM +0300, Dmitry V. Levin wrote:
> > > apt-rpm indeed calls rpmRangesOverlap() too many times, it's common
> > > knowledge. Unfortunately, stating this fact isn't sufficient to make
> > > apt-rpm work faster.
> >
> > So what do you think? There's a possibility to change bitv[] to bitmap,
> > per Kirill's proposal. That mighit yield about, say 30% user time cutdown.
>
> 30% is a bit optimistic, according to my measurements.
>
> > However, note that apt is not only eager for user time. System time is
> > being spent with splendor as well.
>
> System time takes only about 15% of elapsed time, according to my
> measurements.
Under VirtualBox, which seems to become a common hardware nowadays,
system time is somewhat more noticable.
> > Personally, I like bitv[], and I don't like something like <sys/select.h> stuff.
>
> When performance is the issue, there should be a good rationale to choose
> the approach that works slower.
There are other reasons, such as keeping the code comprehensible (at the
expense of some runtime penalties). I think the concept of bitv[] is
crucial for understanding the code, and it helps to keep the code clean
and self-evident. Replacing bitv[] with bitmap might be a major trade-off
for the sake of performance. Or might not.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
` (3 preceding siblings ...)
2010-11-26 8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
@ 2010-12-05 1:24 ` Alexey Tourbin
2010-12-05 11:18 ` Dmitry V. Levin
2010-12-05 12:39 ` Michael Shigorin
4 siblings, 2 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05 1:24 UTC (permalink / raw)
To: ALT Linux Team development discussions
Cc: Alexey I. Froloff, Kirill A. Shutemov, Alexey Gladkov, Dmitry V. Levin
The answer is: no.
The reason is: compilcated.
The explanation is:
Don't try to improve my code.
On Fri, Nov 26, 2010 at 12:04:23AM +0200, Kirill A. Shutsemov wrote:
> From: Kirill A. Shutemov <kirill@shutemov.name>
>
> Tested on Intel Core2 Duo P9500, 3GiB RAM. i586.
>
> rpm-4.0.4-alt100.4:
>
> Performance counter stats for 'apt-cache unmet' (10 runs):
>
> 3396.569863 task-clock-msecs # 1.000 CPUs ( +- 0.018% )
> 12 context-switches # 0.000 M/sec ( +- 9.296% )
> 5 CPU-migrations # 0.000 M/sec ( +- 21.414% )
> 9680 page-faults # 0.003 M/sec ( +- 0.002% )
> 8525321008 cycles # 2509.980 M/sec ( +- 0.016% ) (scaled from 33.34%)
> 7937229883 instructions # 0.931 IPC ( +- 0.041% ) (scaled from 50.00%)
> 1468168069 branches # 432.250 M/sec ( +- 0.014% ) (scaled from 49.99%)
> 257179182 branch-misses # 17.517 % ( +- 0.047% ) (scaled from 50.01%)
> 26275740 cache-references # 7.736 M/sec ( +- 0.114% ) (scaled from 33.35%)
> 350852 cache-misses # 0.103 M/sec ( +- 1.127% ) (scaled from 33.35%)
>
> 3.398038183 seconds time elapsed ( +- 0.018% )
>
> rpm-4.0.4-alt100.4 + patchset:
>
> Performance counter stats for 'apt-cache unmet' (10 runs):
>
> 2010.112427 task-clock-msecs # 1.000 CPUs ( +- 0.038% )
> 8 context-switches # 0.000 M/sec ( +- 17.232% )
> 4 CPU-migrations # 0.000 M/sec ( +- 21.237% )
> 9675 page-faults # 0.005 M/sec ( +- 0.005% )
> 5043579686 cycles # 2509.103 M/sec ( +- 0.041% ) (scaled from 33.32%)
> 5567840605 instructions # 1.104 IPC ( +- 0.021% ) (scaled from 50.00%)
> 1028369972 branches # 511.598 M/sec ( +- 0.016% ) (scaled from 50.00%)
> 94986026 branch-misses # 9.237 % ( +- 0.080% ) (scaled from 50.03%)
> 16227132 cache-references # 8.073 M/sec ( +- 0.303% ) (scaled from 33.36%)
> 323117 cache-misses # 0.161 M/sec ( +- 1.241% ) (scaled from 33.34%)
>
> 2.011050788 seconds time elapsed ( +- 0.044% )
>
> Kirill A. Shutemov (3):
> set.c: use packed bitmap for bit vector
> set.c: optimize putbits()
> set.c: optimize decode_golomb()
>
> lib/set.c | 186 ++++++++++++++++++++++++++++++++++++++----------------------
> 1 files changed, 118 insertions(+), 68 deletions(-)
>
> --
> 1.7.3.2
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 1:24 ` Alexey Tourbin
@ 2010-12-05 11:18 ` Dmitry V. Levin
2010-12-05 12:39 ` Alexey Tourbin
2010-12-05 12:39 ` Michael Shigorin
1 sibling, 1 reply; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-05 11:18 UTC (permalink / raw)
To: ALT Linux Team development discussions
[-- Attachment #1: Type: text/plain, Size: 408 bytes --]
On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> The answer is: no.
> The reason is: compilcated.
> The explanation is:
> Don't try to improve my code.
Your pride makes you blind: you cannot grasp the idea that your code
has a room for optimization :(. That weakens all your reasoning.
Note that somebody has to support your code when you are missing for
months.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 11:18 ` Dmitry V. Levin
@ 2010-12-05 12:39 ` Alexey Tourbin
0 siblings, 0 replies; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05 12:39 UTC (permalink / raw)
To: ALT Linux Team development discussions
On Sun, Dec 05, 2010 at 02:18:10PM +0300, Dmitry V. Levin wrote:
> On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> > The answer is: no.
> > The reason is: compilcated.
> > The explanation is:
> > Don't try to improve my code.
>
> Your pride makes you blind: you cannot grasp the idea that your code
> has a room for optimization :(. That weakens all your reasoning.
Indeed, as for set.c, I exercise djb-like approach: either show me
a major problem, or, basically, no way. However, the code is released
under GPL and LGPL - you can make changes on your own (and I can revert
them later).
> Note that somebody has to support your code when you are missing for
> months.
I am now tempted to say that there is no need to support my code.
And I'm not missing for months - rather, I can't speak in a low mood.
> --
> ldv
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 1:24 ` Alexey Tourbin
2010-12-05 11:18 ` Dmitry V. Levin
@ 2010-12-05 12:39 ` Michael Shigorin
2010-12-05 12:58 ` Alexey Tourbin
1 sibling, 1 reply; 29+ messages in thread
From: Michael Shigorin @ 2010-12-05 12:39 UTC (permalink / raw)
To: ALT Linux Team development discussions
On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> The explanation is:
> Don't try to improve my code.
Дык тормозит же ж. И на сборочнице, и на localhost.
Приходится дольше сидеть в коридоре у розетки,
чтоб собрать тестовую исошку.
--
---- WBR, Michael Shigorin <mike@altlinux.ru>
------ Linux.Kiev http://www.linux.kiev.ua/
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 0:21 ` Alexey Tourbin
@ 2010-12-05 12:49 ` Michael Shigorin
2010-12-07 17:50 ` Dmitry V. Levin
1 sibling, 0 replies; 29+ messages in thread
From: Michael Shigorin @ 2010-12-05 12:49 UTC (permalink / raw)
To: ALT Devel discussion list
On Sun, Dec 05, 2010 at 03:21:15AM +0300, Alexey Tourbin wrote:
> I think the concept of bitv[] is crucial for understanding the code
README.set?
--
---- WBR, Michael Shigorin <mike@altlinux.ru>
------ Linux.Kiev http://www.linux.kiev.ua/
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 12:39 ` Michael Shigorin
@ 2010-12-05 12:58 ` Alexey Tourbin
2010-12-05 15:27 ` Dmitry V. Levin
0 siblings, 1 reply; 29+ messages in thread
From: Alexey Tourbin @ 2010-12-05 12:58 UTC (permalink / raw)
To: ALT Linux Team development discussions
On Sun, Dec 05, 2010 at 02:39:48PM +0200, Michael Shigorin wrote:
> On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> > The explanation is:
> > Don't try to improve my code.
>
> Дык тормозит же ж. И на сборочнице, и на localhost.
> Приходится дольше сидеть в коридоре у розетки,
> чтоб собрать тестовую исошку.
4.0.4-alt100.3 -> 4.0.4-alt100.5 примерно в 4 раза быстрее.
Меркантильные соображения тут не нужны. Вопрос по сути кода
остается - стоит ли переходить от 'char bitv[]' к битовой шкале.
С точки зрения прозрачности кода - не стоит. С точки зрения
скорости исполнения - выгода заметная, но не офигительная.
К тому же Кирилл там использует целочисленное деление, а оно
выполняется не везде дёшево. Хотя, с константой в знаменателе,
gcc скорее всего заменяет деление на сдвиг. Но это уже детали.
> --
> ---- WBR, Michael Shigorin <mike@altlinux.ru>
> ------ Linux.Kiev http://www.linux.kiev.ua/
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 12:58 ` Alexey Tourbin
@ 2010-12-05 15:27 ` Dmitry V. Levin
0 siblings, 0 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-05 15:27 UTC (permalink / raw)
To: ALT Linux Team development discussions
[-- Attachment #1: Type: text/plain, Size: 1060 bytes --]
On Sun, Dec 05, 2010 at 03:58:49PM +0300, Alexey Tourbin wrote:
> On Sun, Dec 05, 2010 at 02:39:48PM +0200, Michael Shigorin wrote:
> > On Sun, Dec 05, 2010 at 04:24:51AM +0300, Alexey Tourbin wrote:
> > > The explanation is:
> > > Don't try to improve my code.
> >
> > Дык тормозит же ж. И на сборочнице, и на localhost.
> > Приходится дольше сидеть в коридоре у розетки,
> > чтоб собрать тестовую исошку.
>
> 4.0.4-alt100.3 -> 4.0.4-alt100.5 примерно в 4 раза быстрее.
> Меркантильные соображения тут не нужны.
Да нет же, по сравнению с 5.1 пользователь видит не ускорение в 4 раза, а
замедление в 4 раза, и по мере пересборки Сизифа с set-versioned deps это
замедление будет ещё заметнее. Например, во время тестовой пересборки
это замедление выливается в несколько часов машинного времени.
Не надо делать вид, что проблемы нет. Вот только одной лишь
оптимизацией lib/set.c эту проблему не решить.
> Вопрос по сути кода
> остается - стоит ли переходить от 'char bitv[]' к битовой шкале.
А вот это уже детали.
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [devel] [PATCH 0/3] optimize rpmsetcmp()
2010-12-05 0:21 ` Alexey Tourbin
2010-12-05 12:49 ` Michael Shigorin
@ 2010-12-07 17:50 ` Dmitry V. Levin
1 sibling, 0 replies; 29+ messages in thread
From: Dmitry V. Levin @ 2010-12-07 17:50 UTC (permalink / raw)
To: ALT Devel discussion list
[-- Attachment #1: Type: text/plain, Size: 513 bytes --]
On Sun, Dec 05, 2010 at 03:21:15AM +0300, Alexey Tourbin wrote:
> There are other reasons, such as keeping the code comprehensible (at the
> expense of some runtime penalties). I think the concept of bitv[] is
> crucial for understanding the code, and it helps to keep the code clean
> and self-evident. Replacing bitv[] with bitmap might be a major trade-off
> for the sake of performance. Or might not.
Storing one byte per bit is crucial for understanding the code?
What a nonsense!
--
ldv
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply [flat|nested] 29+ messages in thread
end of thread, other threads:[~2010-12-07 17:50 UTC | newest]
Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-25 22:04 [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 1/3] set.c: use packed bitmap for bit vector Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 2/3] set.c: optimize putbits() Kirill A. Shutsemov
2010-11-25 22:04 ` [devel] [PATCH 3/3] set.c: optimize decode_golomb() Kirill A. Shutsemov
2010-11-26 8:35 ` [devel] [PATCH 0/3] optimize rpmsetcmp() Kirill A. Shutemov
2010-11-30 0:35 ` Dmitry V. Levin
2010-12-02 21:48 ` Dmitry V. Levin
2010-12-04 13:02 ` Alexey Tourbin
2010-12-04 16:30 ` Dmitry V. Levin
2010-12-04 16:55 ` Alexey Tourbin
2010-12-04 15:06 ` Alexey Tourbin
2010-12-04 16:29 ` Dmitry V. Levin
2010-12-04 16:42 ` Alexey Tourbin
2010-12-04 16:52 ` Dmitry V. Levin
2010-12-04 17:05 ` Alexey Tourbin
2010-12-04 17:52 ` Dmitry V. Levin
2010-12-04 21:28 ` Alexey Tourbin
2010-12-04 23:26 ` Dmitry V. Levin
2010-12-04 23:41 ` Alexey Tourbin
2010-12-05 0:03 ` Dmitry V. Levin
2010-12-05 0:21 ` Alexey Tourbin
2010-12-05 12:49 ` Michael Shigorin
2010-12-07 17:50 ` Dmitry V. Levin
2010-12-05 1:24 ` Alexey Tourbin
2010-12-05 11:18 ` Dmitry V. Levin
2010-12-05 12:39 ` Alexey Tourbin
2010-12-05 12:39 ` Michael Shigorin
2010-12-05 12:58 ` Alexey Tourbin
2010-12-05 15:27 ` Dmitry V. Levin
ALT Linux Team development discussions
This inbox may be cloned and mirrored by anyone:
git clone --mirror http://lore.altlinux.org/devel/0 devel/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 devel devel/ http://lore.altlinux.org/devel \
devel@altlinux.org devel@altlinux.ru devel@lists.altlinux.org devel@lists.altlinux.ru devel@linux.iplabs.ru mandrake-russian@linuxteam.iplabs.ru sisyphus@linuxteam.iplabs.ru
public-inbox-index devel
Example config snippet for mirrors.
Newsgroup available over NNTP:
nntp://lore.altlinux.org/org.altlinux.lists.devel
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git