From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Mon, 5 Jun 2006 06:41:47 +0400 From: Alexey Tourbin To: sisyphus@lists.altlinux.org Message-ID: <20060605024147.GD18120@localhost.localdomain> Mail-Followup-To: sisyphus@lists.altlinux.org References: <20060604210546.GA18120@localhost.localdomain> <200606041804.54270.iadzhubey@rics.bwh.harvard.edu> <20060604222630.GB18120@localhost.localdomain> <20060605011709.GC18120@localhost.localdomain> Mime-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="DrWhICOqskFTAXiy" Content-Disposition: inline In-Reply-To: <20060605011709.GC18120@localhost.localdomain> Subject: Re: [sisyphus] ATLAS vs BLAS performance X-BeenThere: sisyphus@lists.altlinux.org X-Mailman-Version: 2.1.7 Precedence: list Reply-To: ALT Linux Sisyphus discussion list List-Id: ALT Linux Sisyphus discussion list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 05 Jun 2006 02:41:39 -0000 Archived-At: List-Archive: List-Post: --DrWhICOqskFTAXiy Content-Type: text/plain; charset=koi8-r Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Mon, Jun 05, 2006 at 05:17:09AM +0400, Alexey Tourbin wrote: > > > > ATLAS w/ SSE2: > > > > > mm <- matrix(rnorm(10^6), ncol =3D 10^3) > > > > > system.time(crossprod(mm)) > > > > > > > > [1] 0.344 0.020 0.369 0.000 0.000 ^^^^^^^^^^^^^^^^^ > > > =E1 =D7=CF=D4 =D3 GotoBLAS 1.2 (P4 2.8GHz, R 2.3.0): > > >=20 > > > > mm <- matrix(rnorm(10^6), ncol =3D 10^3) > > > > system.time(crossprod(mm)) > > > [1] 0.232 0.012 0.270 0.000 0.000 > >=20 > > model name : AMD Athlon(tm) 64 Processor 3200+ > > cpu MHz : 2050.186 > > cache size : 512 KB >=20 > > =F7 =CF=C2=DD=C5=CD, =C5=D3=CC=C9 =DC=D4=CF =C2=D5=C4=C5=D4 =D2=C1=C2= =CF=D4=C1=D4=D8 =C9=DA =CB=CF=D2=CF=C2=CB=C9 =CB=C1=CB =CE=C1=C4=CF =C9 =C5= =D3=CC=C9 =D0=CF =D3=D2=C1=D7=CE=C5=CE=C9=C0 > > =D3 GotoBLAS =D2=C1=DA=CE=C9=C3=C1 =C2=D5=C4=C5=D4 =CE=C5 =C8=D5=D6=C5,= =DE=C5=CD =D7 =D0=CF=CC=D4=CF=D2=C1 =D2=C1=DA=C1, =D4=CF=C7=C4=C1 =D1 =C2= =D5=C4=D5 > > =C4=CF=D7=CF=CC=C5=CC=C5=CE. =F0=CF=D4=CF=CD=D5 =DE=D4=CF =CF=D3=CE=CF= =D7=CE=CF=CA =D2=C1=DA=D2=D9=D7 =D0=CF =D3=D2=C1=D7=CE=C5=CE=C9=C0 =D3 =C6= =CF=D2=D4=D2=C1=CE=CF=CD =D7=D3=A3 > > =D2=C1=D7=CE=CF =CC=C9=CB=D7=C9=C4=C9=D2=CF=D7=C1=CE, =C1 10-20% =CE=C1= =D0=D2=C1=CB=D4=C9=CB=C5 =D2=C5=C4=CB=CF =DE=D4=CF =D2=C5=DB=C1=C0=D4. >=20 > =E1 =D7=CF=D4 ATLAS =C2=C5=DA =CF=D0=D4=C9=CD=C9=DA=C1=C3=C9=C9 =D0=CF=C4= P4SSE2: >=20 > > mm <- matrix(rnorm(10^6), ncol =3D 10^3) > > system.time(crossprod(mm)) > [1] 0.584 0.012 0.624 0.000 0.000 =E1 =D7=CF=D4 =D3=CF=C2=D2=C1=CE=CE=D9=CA =D0=CF=C4 Athlon64 (HAMMER32SSE2,= =D4.=C5. =C2=CC=C9=D6=C5 =D7=D3=C5=C7=CF =CB =D4=CF=CD=D5 =D0=D2=CF=C3=C5=D3=D3=CF=D2=D5, =DE=D4=CF =D5 =CD=C5=CE=D1). > mm <- matrix(rnorm(10^6), ncol =3D 10^3) > system.time(crossprod(mm)) [1] 0.312 0.016 0.330 0.000 0.000 =F4.=C5. =CE=C5=CD=CE=CF=C7=CF =CC=D5=DE=DB=C5, =DE=C5=CD =C4=CC=D1 P4SSE2.= (=EE=C1=C2=CF=D2 =C9=CE=D3=D4=D2=D5=CB=C3=C9=CA =D7 =C4=C1=CE=CE=CF=CD = =D3=CC=D5=DE=C1=C5 =CE=C5 =CF=D4=CC=C9=DE=C1=C5=D4=D3=D1, =CF=D4=CC=C9=DE=C1=C0=D4=D3=D1 =D4= =CF=CC=D8=CB=CF =CE=C5=CB=CF=D4=CF=D2=D9=C5 =D0=C1=D2=C1=CD=C5=D4=D2=D9 =D5= =D0=D2=C1=D7=CC=C5=CE=C9=D1 L1/L2 =CB=C5=DB=C5=CD, =C9=CC=C9 =DE=D4=CF =D4=C1=CD =C5=DD=A3). SSE2 =D3=C5=CA=DE=C1=D3 =D1=D7=CC=D1=C5=D4=D3=D1 =CF=C2=DD=C9=CD =DA=CE=C1= =CD=C5=CE=C1=D4=C5=CC=C5=CD =C4=CC=D1 P4 =C9 =D3=CF=D7=D2=C5=CD=C5=CE=CE=D9= =C8 =C1=D4=CC=CF=CE=CF=D7. =F0=CF=D3=CB=CF=CC=D8=CB=D5 x86 =C5=DD=A3 =CE=C5=CB=CF=D4=CF=D2=CF=C5 =D7= =D2=C5=CD=D1 =C2=D5=C4=C5=D4 =D3=D5=DD=C5=D3=D4=D7=CF=D7=C1=D4=D8, =D4=CF = =D3=CF=C6=D4, =C4=CC=D1 =CB=CF=D4=CF=D2=CF=C7=CF =D3=D0=C5=C3=C9=C6=C9=DE=C5=D3=CB=C1=D1 =CF=D0=D4= =C9=CD=C9=DA=C1=C3=C9=D1 =C9=CD=C5=C5=D4 =D3=CD=D9=D3=CC (=C9 =C4=C1=C5=D4 = =D3=D5=DD=C5=D3=D4=D7=C5=CE=CE=D9=CA =D7=D9=C9=C7=D2=D9=DB, =CB=C1=CB =D7 =D3=CC=D5=DE=C1=C5 =D3 =DE=C9=D3=CC=CF= =C4=D2=CF=C2=C9=CC=D8=CE=D9=CD=C9 =C2=C9=C2=CC=C9=CF=D4=C5=CB=C1=CD=C9), = =D3=D4=CF=C9=D4 =D3=CF=C2=C9=D2=C1=D4=D8 =D0=D2=C1=CC=CC=C5=CC=D8=CE=CF =C9 =D3=D4=C1=D7=C9= =D4=D8 =D7 /usr/lib/sse2. =F0=D2=C1=D7=C4=C1, =CB=D2=CF=C5=CD=C5 ATLAS =D3=C8=CF=C4=D5 =CE=C1 =D5=CD =CE=C9=DE=C5=C7=CF =CE=C5 =D0=D2=C9=C8=CF=C4= =C9=D4. =E1 =D7=CF=CF=C2=DD=C5 =CE=C1=C4=CF =D0=C5=D2=C5=C8=CF=C4=C9=D4=D8 =CE=C1 x= 86_64, =CB=C1=CB =D1 =D0=CF=CE=C9=CD=C1=C0 =D0=D2=CF=C2=CC=C5=CD =D3 =CF=D0= =D4=C9=CD=C9=DA=C1=C3=C9=C5=CA "=D0=CF=C4 =C1=D2=C8=C9=D4=C5=CB=D4=D5=D2=D5" =D4=C1=CD =C2=D5=C4=C5=D4 =C7= =CF=D2=C1=DA=C4=CF =CD=C5=CE=D8=DB=C5 (=D4.=C5. =D7 =D0=C5=D2=D7=D9=C8 Athl= on64 =CE=C5 =C2=D9=CC=CF SSE3, =C1 =D7 P4 =C5=D3=D3-=CE=CF =CE=C5=D4 3dNow, =CE=CF =CF= =D3=D4=C1=CC=D8=CE=CF=C5 =D7=D3=A3 =D3=CF=D7=D0=C1=C4=C1=C5=D4). --DrWhICOqskFTAXiy Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.2.2 (GNU/Linux) iD8DBQFEg5nrfBKgtDjnu0YRAiLbAKCd3iiQtwnKuXHLp9OphRJyJ1+xmgCg0IAT +07kW8dmOKgDXLyo2RZwnoQ= =UPIx -----END PGP SIGNATURE----- --DrWhICOqskFTAXiy--