Another point in favor of Euphoria!
- Posted by rforno at tutopia.com
Oct 05, 2002
This is a multi-part message in MIME format.
------=_NextPart_000_0010_01C26CA1.0A5E93A0
charset="iso-8859-1"
Rob:
I posed to the students at the University the classic problem of finding =
duplicated and missing records in a huge file. Please see the resulting =
Euphoria and C/C++ programs. The Euphoria program ran without trouble. =
The C/C++ program, under Borland 5.02, worked up to 1000000 characters =
in the vector, and when I tried it with ten million characters, it =
crashed. I have 256Mb RAM. I was unable to find a fix. Perhaps the fix =
exists, but it is hidden in the guts of the Borland compiler.
Another point in favor of Euphoria! Euphoria is more dependable than =
Borland!
Regards.
// C/C++ program
// Program to find duplicated and missing records (simulation)
#include <iostream.h>
#include <stdlib.h>
#include <conio.h>
#define MAX 10000000
#define LEN (MAX / 8)
void main()
{
char mask[8], dat[LEN];
int i, r;
long x, z, k;
mask[0] =3D 1;
for (i =3D 0; i < 7; i++)
mask[i + 1] =3D mask[i] << 1;
for (x =3D 0; x < LEN; x++)
dat[x] =3D 0;
for (x =3D 0; x < MAX; x++)
{
z =3D random(MAX);
k =3D z >> 3;
r =3D z & 7;
if (dat[k] & mask[r])
cout << "Duplicated: " << z << endl;
else
dat[k] |=3D mask[r];
}
for (x =3D 0; x < LEN; x++)
for (i =3D 0; i < 7; i++)
if (0 =3D=3D (mask[i] & dat[x]))
cout << "Missing: " << ((x << 3) + i) << endl;
getch();
}
--Euphoria program
--Program to find duplicated and missing records (simulation)
sequence mask, dat
integer k, r, z, m, d
constant MAX =3D 10 --00000000
constant SIZE =3D 30
constant LEN =3D - floor(- MAX / SIZE)
mask =3D repeat(1, SIZE)
for i =3D 1 to SIZE - 1 do
mask[i + 1] =3D mask[i] * 2
end for
dat =3D repeat(0, LEN)
for i =3D 0 to MAX - 1 do
z =3D rand(MAX) - 1
k =3D floor(z / SIZE)
r =3D z - k * SIZE + 1
k +=3D 1
d =3D dat[k]
m =3D mask[r]
if and_bits(d, m) then
printf(1, "Duplicated: %d\n", z)
else
dat[k] =3D or_bits(d, m)
end if
end for
for i =3D 0 to MAX - 1 do
k =3D floor(i / SIZE)
r =3D i - k * SIZE + 1
k +=3D 1
if and_bits(dat[k], mask[r]) =3D 0 then
printf(1, "Missing: %d\n", i)
end if
end for
PS: Please see next how I use my strange technique to generate =
pseudo-random (very very pseudo!) non-repeating integers. The above =
program was modified in order to use this technique, and no duplicate =
nor missing numbers are reported.
--Euphoria program, modified to use non-repeating pseudo-random integers
--Program to find duplicated and missing records (simulation)
function calcpot3(integer upto)
integer pot3
pot3 =3D 3
while pot3 < upto do
pot3 +=3D pot3 + pot3
end while
return pot3
end function
sequence mask, dat
integer k, r, m, d, x, u
constant MAX =3D 100000000 --now, this constant cannot be 1000000000
constant SIZE =3D 30 --size of Euphoria integers
constant LEN =3D - floor(- MAX / SIZE)
mask =3D repeat(1, SIZE)
for i =3D 1 to SIZE - 1 do
mask[i + 1] =3D mask[i] * 2
end for
integer pot3
pot3 =3D calcpot3(MAX)
x =3D rand(floor(pot3 / 3) * 2) - 1
x +=3D 3 * pot3 - 2 + floor(x / 2)
u =3D rand(pot3) - 1
dat =3D repeat(0, LEN)
for i =3D 1 to MAX do
while u >=3D MAX do
u =3D remainder(x - 2 * u, pot3)
end while
k =3D floor(u / SIZE)
r =3D u - k * SIZE + 1
k +=3D 1
d =3D dat[k]
m =3D mask[r]
if and_bits(d, m) then
printf(1, "Duplicated: %d\n", u)
else
dat[k] =3D or_bits(d, m)
end if
u =3D remainder(x - 2 * u, pot3)
end for
for i =3D 0 to MAX - 1 do
k =3D floor(i / SIZE)
r =3D i - k * SIZE + 1
k +=3D 1
if and_bits(dat[k], mask[r]) =3D 0 then
printf(1, "Missing: %d\n", i)
end if
end for
------=_NextPart_000_0010_01C26CA1.0A5E93A0
Content-Type: text/html;
charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML><HEAD>
<META content=3D"text/html; charset=3Diso-8859-1" =
http-equiv=3DContent-Type>
<META content=3D"MSHTML 5.00.2314.1000" name=3DGENERATOR>
<STYLE></STYLE>
</HEAD>
<BODY bgColor=3D#ffffff>
<DIV><FONT face=3DArial size=3D3>Rob:</FONT></DIV>
<DIV><FONT face=3DArial>I posed to the students at the University the =
classic=20
problem of finding duplicated and missing records in a huge file. Please =
see the=20
resulting Euphoria and C/C++ programs. The Euphoria program ran without =
trouble.=20
The C/C++ program, under Borland 5.02, worked up to 1000000 characters =
in the=20
vector, and when I tried it with ten million characters, it =
crashed. I=20
have 256Mb RAM. I was unable to find a fix. Perhaps the fix exists, =
but it=20
is hidden in the guts of the Borland compiler.</FONT></DIV>
<DIV><FONT face=3DArial>Another point in favor of Euphoria! Euphoria is =
more=20
dependable than Borland!</FONT></DIV>
<DIV><FONT face=3DArial>Regards.</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=3DArial>// C/C++ program</FONT></DIV>
<DIV><FONT face=3DArial>// Program to find duplicated and missing =
records=20
(simulation)<BR>#include <iostream.h><BR>#include=20
<stdlib.h><BR>#include <conio.h><BR>#define MAX =
10000000<BR>#define=20
LEN (MAX / 8)<BR>void main()<BR>{<BR> char mask[8], =
dat[LEN];<BR> int=20
i, r;<BR> long x, z, k;<BR> mask[0] =3D 1;<BR> for (i =
=3D 0; i=20
< 7; i++)<BR> mask[i + 1] =3D mask[i] << =
1;<BR> for (x=20
=3D 0; x < LEN; x++)<BR> dat[x] =3D 0;<BR> for (x =
=3D 0; x <=20
MAX; x++)<BR> {<BR> z =3D =
random(MAX);<BR> k =3D=20
z >> 3;<BR> r =3D z & =
7;<BR> if=20
(dat[k] & mask[r])<BR> cout << =
"Duplicated: "=20
<< z << endl;<BR> =
else<BR> =20
dat[k] |=3D mask[r];<BR> }<BR> for (x =3D 0; x < =
LEN;=20
x++)<BR> for (i =3D 0; i < 7; i++)<BR> =
if=20
(0 =3D=3D (mask[i] & dat[x]))<BR> =
cout=20
<< "Missing: " << ((x << 3) + i) << =
endl;<BR> =20
getch();<BR>}</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=3DArial size=3D3>--Euphoria program</FONT></DIV>
<DIV><FONT face=3DArial size=3D3>--Program to find duplicated and =
missing records=20
(simulation)<BR>sequence mask, dat<BR>integer k, r, z, m, d<BR>constant =
MAX =3D 10=20
--00000000<BR>constant SIZE =3D 30<BR>constant LEN =3D - floor(- MAX / =
SIZE)<BR>mask=20
=3D repeat(1, SIZE)<BR>for i =3D 1 to SIZE - 1 do<BR> =
mask[i + 1]=20
=3D mask[i] * 2<BR>end for<BR>dat =3D repeat(0, LEN)<BR>for i =3D 0 to =
MAX - 1=20
do<BR> z =3D rand(MAX) - 1<BR> k =3D =
floor(z /=20
SIZE)<BR> r =3D z - k * SIZE + 1<BR> =
k +=3D=20
1<BR> d =3D dat[k]<BR> m =3D=20
mask[r]<BR> if and_bits(d, m) then<BR> printf(1,=20
"Duplicated: %d\n", z)<BR> else<BR> dat[k] =3D =
or_bits(d,=20
m)<BR> end if<BR>end for<BR>for i =3D 0 to MAX - 1=20
do<BR> k =3D floor(i / SIZE)<BR> r =
=3D i - k *=20
SIZE + 1<BR> k +=3D 1<BR> if =
and_bits(dat[k],=20
mask[r]) =3D 0 then<BR> printf(1, "Missing: %d\n", =
i)<BR> =20
end if<BR>end for</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=3DArial size=3D3>PS: Please see next how I use my =
strange technique=20
to generate pseudo-random (very very pseudo!) non-repeating integers. =
The above=20
program was modified in order to use this technique, and no duplicate =
nor=20
missing numbers are reported.</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=3DArial size=3D3>--Euphoria program, modified to use =
non-repeating=20
pseudo-random integers</FONT></DIV>
<DIV><FONT face=3DArial size=3D3>--Program to find duplicated and =
missing records=20
(simulation)<BR>function calcpot3(integer upto)<BR> =
integer=20
pot3<BR> pot3 =3D 3<BR> while pot3 =
< upto=20
do<BR> pot3 +=3D pot3 + pot3<BR> end=20
while<BR> return pot3<BR>end function</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=3DArial size=3D3>sequence mask, dat<BR>integer k, r, m, =
d, x,=20
u<BR>constant MAX =3D 100000000 --now, this constant cannot be=20
1000000000<BR>constant SIZE =3D 30 --size of Euphoria =
integers<BR>constant LEN =3D -=20
floor(- MAX / SIZE)<BR>mask =3D repeat(1, SIZE)<BR>for i =3D 1 to SIZE - =
1=20
do<BR> mask[i + 1] =3D mask[i] * 2<BR>end =
for<BR>integer=20
pot3<BR>pot3 =3D calcpot3(MAX)<BR>x =3D rand(floor(pot3 / 3) * 2) - =
1<BR>x +=3D 3 *=20
pot3 - 2 + floor(x / 2)<BR>u =3D rand(pot3) - 1<BR>dat =3D repeat(0, =
LEN)<BR>for i =3D=20
1 to MAX do<BR> while u >=3D MAX do<BR> u =3D =
remainder(x=20
- 2 * u, pot3)<BR> end while<BR> k =
=3D floor(u=20
/ SIZE)<BR> r =3D u - k * SIZE + =
1<BR> k +=3D=20
1<BR> d =3D dat[k]<BR> m =3D=20
mask[r]<BR> if and_bits(d, m) then<BR> printf(1,=20
"Duplicated: %d\n", u)<BR> else<BR> dat[k] =3D =
or_bits(d,=20
m)<BR> end if<BR> u =3D remainder(x =
- 2 * u,=20
pot3)<BR>end for<BR>for i =3D 0 to MAX - 1 do<BR> k =
=3D floor(i /=20
SIZE)<BR> r =3D i - k * SIZE + 1<BR> =
k +=3D=20
1<BR> if and_bits(dat[k], mask[r]) =3D 0 =
then<BR> printf(1,=20
"Missing: %d\n", i)<BR> end if<BR>end for</FONT></DIV>
<DIV> </DIV>
------=_NextPart_000_0010_01C26CA1.0A5E93A0--
|
Not Categorized, Please Help
|
|