Another point in favor of Euphoria!

new topic     » topic index » view thread      » older message » newer message

This is a multi-part message in MIME format.

------=_NextPart_000_0010_01C26CA1.0A5E93A0
	charset="iso-8859-1"

Rob:
I posed to the students at the University the classic problem of finding =
duplicated and missing records in a huge file. Please see the resulting =
Euphoria and C/C++ programs. The Euphoria program ran without trouble. =
The C/C++ program, under Borland 5.02, worked up to 1000000 characters =
in the vector, and when I tried it with ten million characters, it =
crashed. I have 256Mb RAM. I was unable to find a fix. Perhaps the fix =
exists, but it is hidden in the guts of the Borland compiler.
Another point in favor of Euphoria! Euphoria is more dependable than =
Borland!
Regards.

// C/C++ program
// Program to find duplicated and missing records (simulation)
#include <iostream.h>
#include <stdlib.h>
#include <conio.h>
#define MAX 10000000
#define LEN (MAX / 8)
void main()
{
 char mask[8], dat[LEN];
  int i, r;
  long x, z, k;
  mask[0] =3D 1;
  for (i =3D 0; i < 7; i++)
   mask[i + 1] =3D mask[i] << 1;
  for (x =3D 0; x < LEN; x++)
   dat[x] =3D 0;
  for (x =3D 0; x < MAX; x++)
  {
   z =3D random(MAX);
    k =3D z >> 3;
    r =3D z & 7;
    if (dat[k] & mask[r])
     cout << "Duplicated: " << z << endl;
    else
     dat[k] |=3D mask[r];
  }
  for (x =3D 0; x < LEN; x++)
   for (i =3D 0; i < 7; i++)
     if (0 =3D=3D (mask[i] & dat[x]))
       cout << "Missing: " << ((x << 3) + i) << endl;
  getch();
}

--Euphoria program
--Program to find duplicated and missing records (simulation)
sequence mask, dat
integer k, r, z, m, d
constant MAX =3D 10 --00000000
constant SIZE =3D 30
constant LEN =3D - floor(- MAX / SIZE)
mask =3D repeat(1, SIZE)
for i =3D 1 to SIZE - 1 do
    mask[i + 1] =3D mask[i] * 2
end for
dat =3D repeat(0, LEN)
for i =3D 0 to MAX - 1 do
    z =3D rand(MAX) - 1
    k =3D floor(z / SIZE)
    r =3D z - k * SIZE + 1
    k +=3D 1
    d =3D dat[k]
    m =3D mask[r]
    if and_bits(d, m) then
 printf(1, "Duplicated: %d\n", z)
    else
 dat[k] =3D or_bits(d, m)
    end if
end for
for i =3D 0 to MAX - 1 do
    k =3D floor(i / SIZE)
    r =3D i - k * SIZE + 1
    k +=3D 1
    if and_bits(dat[k], mask[r]) =3D 0 then
 printf(1, "Missing: %d\n", i)
    end if
end for

PS: Please see next how I use my strange technique to generate =
pseudo-random (very very pseudo!) non-repeating integers. The above =
program was modified in order to use this technique, and no duplicate =
nor missing numbers are reported.

--Euphoria program, modified to use non-repeating pseudo-random integers
--Program to find duplicated and missing records (simulation)
function calcpot3(integer upto)
    integer pot3
    pot3 =3D 3
    while pot3 < upto do
 pot3 +=3D pot3 + pot3
    end while
    return pot3
end function

sequence mask, dat
integer k, r, m, d, x, u
constant MAX =3D 100000000 --now, this constant cannot be 1000000000
constant SIZE =3D 30 --size of Euphoria integers
constant LEN =3D - floor(- MAX / SIZE)
mask =3D repeat(1, SIZE)
for i =3D 1 to SIZE - 1 do
    mask[i + 1] =3D mask[i] * 2
end for
integer pot3
pot3 =3D calcpot3(MAX)
x =3D rand(floor(pot3 / 3) * 2) - 1
x +=3D 3 * pot3 - 2 + floor(x / 2)
u =3D rand(pot3) - 1
dat =3D repeat(0, LEN)
for i =3D 1 to MAX do
    while u >=3D MAX do
 u =3D remainder(x - 2 * u, pot3)
    end while
    k =3D floor(u / SIZE)
    r =3D u - k * SIZE + 1
    k +=3D 1
    d =3D dat[k]
    m =3D mask[r]
    if and_bits(d, m) then
 printf(1, "Duplicated: %d\n", u)
    else
 dat[k] =3D or_bits(d, m)
    end if
    u =3D remainder(x - 2 * u, pot3)
end for
for i =3D 0 to MAX - 1 do
    k =3D floor(i / SIZE)
    r =3D i - k * SIZE + 1
    k +=3D 1
    if and_bits(dat[k], mask[r]) =3D 0 then
 printf(1, "Missing: %d\n", i)
    end if
end for



------=_NextPart_000_0010_01C26CA1.0A5E93A0
Content-Type: text/html;
	charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML><HEAD>
<META content=3D"text/html; charset=3Diso-8859-1" =
http-equiv=3DContent-Type>
<META content=3D"MSHTML 5.00.2314.1000" name=3DGENERATOR>
<STYLE></STYLE>
</HEAD>
<BODY bgColor=3D#ffffff>
<DIV><FONT face=3DArial size=3D3>Rob:</FONT></DIV>
<DIV><FONT face=3DArial>I posed to the students at the University the =
classic=20
problem of finding duplicated and missing records in a huge file. Please =
see the=20
resulting Euphoria and C/C++ programs. The Euphoria program ran without =
trouble.=20
The C/C++ program, under Borland 5.02, worked up to 1000000 characters =
in the=20
vector, and&nbsp;when I tried it with ten million characters, it =
crashed. I=20
have&nbsp;256Mb RAM. I was unable to find a fix. Perhaps the fix exists, =
but it=20
is hidden in the guts of the Borland compiler.</FONT></DIV>
<DIV><FONT face=3DArial>Another point in favor of Euphoria! Euphoria is =
more=20
dependable than Borland!</FONT></DIV>
<DIV><FONT face=3DArial>Regards.</FONT></DIV>
<DIV>&nbsp;</DIV>
<DIV><FONT face=3DArial>// C/C++ program</FONT></DIV>
<DIV><FONT face=3DArial>// Program to find duplicated and missing =
records=20
(simulation)<BR>#include &lt;iostream.h&gt;<BR>#include=20
&lt;stdlib.h&gt;<BR>#include &lt;conio.h&gt;<BR>#define MAX =
10000000<BR>#define=20
LEN (MAX / 8)<BR>void main()<BR>{<BR>&nbsp;char mask[8], =
dat[LEN];<BR>&nbsp; int=20
i, r;<BR>&nbsp; long x, z, k;<BR>&nbsp; mask[0] =3D 1;<BR>&nbsp; for (i =
=3D 0; i=20
&lt; 7; i++)<BR>&nbsp; &nbsp;mask[i + 1] =3D mask[i] &lt;&lt; =
1;<BR>&nbsp; for (x=20
=3D 0; x &lt; LEN; x++)<BR>&nbsp; &nbsp;dat[x] =3D 0;<BR>&nbsp; for (x =
=3D 0; x &lt;=20
MAX; x++)<BR>&nbsp; {<BR>&nbsp; &nbsp;z =3D =
random(MAX);<BR>&nbsp;&nbsp;&nbsp; k =3D=20
z &gt;&gt; 3;<BR>&nbsp;&nbsp;&nbsp; r =3D z &amp; =
7;<BR>&nbsp;&nbsp;&nbsp; if=20
(dat[k] &amp; mask[r])<BR>&nbsp;&nbsp;&nbsp; &nbsp;cout &lt;&lt; =
"Duplicated: "=20
&lt;&lt; z &lt;&lt; endl;<BR>&nbsp;&nbsp;&nbsp; =
else<BR>&nbsp;&nbsp;&nbsp;=20
&nbsp;dat[k] |=3D mask[r];<BR>&nbsp; }<BR>&nbsp; for (x =3D 0; x &lt; =
LEN;=20
x++)<BR>&nbsp; &nbsp;for (i =3D 0; i &lt; 7; i++)<BR>&nbsp;&nbsp;&nbsp; =
&nbsp;if=20
(0 =3D=3D (mask[i] &amp; dat[x]))<BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; =
&nbsp;cout=20
&lt;&lt; "Missing: " &lt;&lt; ((x &lt;&lt; 3) + i) &lt;&lt; =
endl;<BR>&nbsp;=20
getch();<BR>}</FONT></DIV>
<DIV>&nbsp;</DIV>
<DIV><FONT face=3DArial size=3D3>--Euphoria program</FONT></DIV>
<DIV><FONT face=3DArial size=3D3>--Program to find duplicated and =
missing records=20
(simulation)<BR>sequence mask, dat<BR>integer k, r, z, m, d<BR>constant =
MAX =3D 10=20
--00000000<BR>constant SIZE =3D 30<BR>constant LEN =3D - floor(- MAX / =
SIZE)<BR>mask=20
=3D repeat(1, SIZE)<BR>for i =3D 1 to SIZE - 1 do<BR>&nbsp;&nbsp;&nbsp; =
mask[i + 1]=20
=3D mask[i] * 2<BR>end for<BR>dat =3D repeat(0, LEN)<BR>for i =3D 0 to =
MAX - 1=20
do<BR>&nbsp;&nbsp;&nbsp; z =3D rand(MAX) - 1<BR>&nbsp;&nbsp;&nbsp; k =3D =
floor(z /=20
SIZE)<BR>&nbsp;&nbsp;&nbsp; r =3D z - k * SIZE + 1<BR>&nbsp;&nbsp;&nbsp; =
k +=3D=20
1<BR>&nbsp;&nbsp;&nbsp; d =3D dat[k]<BR>&nbsp;&nbsp;&nbsp; m =3D=20
mask[r]<BR>&nbsp;&nbsp;&nbsp; if and_bits(d, m) then<BR>&nbsp;printf(1,=20
"Duplicated: %d\n", z)<BR>&nbsp;&nbsp;&nbsp; else<BR>&nbsp;dat[k] =3D =
or_bits(d,=20
m)<BR>&nbsp;&nbsp;&nbsp; end if<BR>end for<BR>for i =3D 0 to MAX - 1=20
do<BR>&nbsp;&nbsp;&nbsp; k =3D floor(i / SIZE)<BR>&nbsp;&nbsp;&nbsp; r =
=3D i - k *=20
SIZE + 1<BR>&nbsp;&nbsp;&nbsp; k +=3D 1<BR>&nbsp;&nbsp;&nbsp; if =
and_bits(dat[k],=20
mask[r]) =3D 0 then<BR>&nbsp;printf(1, "Missing: %d\n", =
i)<BR>&nbsp;&nbsp;&nbsp;=20
end if<BR>end for</FONT></DIV>
<DIV>&nbsp;</DIV>
<DIV><FONT face=3DArial size=3D3>PS: Please see next how I use my =
strange technique=20
to generate pseudo-random (very very pseudo!) non-repeating integers. =
The above=20
program was modified in order to use this technique, and no duplicate =
nor=20
missing numbers are reported.</FONT></DIV>
<DIV>&nbsp;</DIV>
<DIV><FONT face=3DArial size=3D3>--Euphoria program, modified to use =
non-repeating=20
pseudo-random integers</FONT></DIV>
<DIV><FONT face=3DArial size=3D3>--Program to find duplicated and =
missing records=20
(simulation)<BR>function calcpot3(integer upto)<BR>&nbsp;&nbsp;&nbsp; =
integer=20
pot3<BR>&nbsp;&nbsp;&nbsp; pot3 =3D 3<BR>&nbsp;&nbsp;&nbsp; while pot3 =
&lt; upto=20
do<BR>&nbsp;pot3 +=3D pot3 + pot3<BR>&nbsp;&nbsp;&nbsp; end=20
while<BR>&nbsp;&nbsp;&nbsp; return pot3<BR>end function</FONT></DIV>
<DIV>&nbsp;</DIV>
<DIV><FONT face=3DArial size=3D3>sequence mask, dat<BR>integer k, r, m, =
d, x,=20
u<BR>constant MAX =3D 100000000 --now, this constant cannot be=20
1000000000<BR>constant SIZE =3D 30 --size of Euphoria =
integers<BR>constant LEN =3D -=20
floor(- MAX / SIZE)<BR>mask =3D repeat(1, SIZE)<BR>for i =3D 1 to SIZE - =
1=20
do<BR>&nbsp;&nbsp;&nbsp; mask[i + 1] =3D mask[i] * 2<BR>end =
for<BR>integer=20
pot3<BR>pot3 =3D calcpot3(MAX)<BR>x =3D rand(floor(pot3 / 3) * 2) - =
1<BR>x +=3D 3 *=20
pot3 - 2 + floor(x / 2)<BR>u =3D rand(pot3) - 1<BR>dat =3D repeat(0, =
LEN)<BR>for i =3D=20
1 to MAX do<BR>&nbsp;&nbsp;&nbsp; while u &gt;=3D MAX do<BR>&nbsp;u =3D =
remainder(x=20
- 2 * u, pot3)<BR>&nbsp;&nbsp;&nbsp; end while<BR>&nbsp;&nbsp;&nbsp; k =
=3D floor(u=20
/ SIZE)<BR>&nbsp;&nbsp;&nbsp; r =3D u - k * SIZE + =
1<BR>&nbsp;&nbsp;&nbsp; k +=3D=20
1<BR>&nbsp;&nbsp;&nbsp; d =3D dat[k]<BR>&nbsp;&nbsp;&nbsp; m =3D=20
mask[r]<BR>&nbsp;&nbsp;&nbsp; if and_bits(d, m) then<BR>&nbsp;printf(1,=20
"Duplicated: %d\n", u)<BR>&nbsp;&nbsp;&nbsp; else<BR>&nbsp;dat[k] =3D =
or_bits(d,=20
m)<BR>&nbsp;&nbsp;&nbsp; end if<BR>&nbsp;&nbsp;&nbsp; u =3D remainder(x =
- 2 * u,=20
pot3)<BR>end for<BR>for i =3D 0 to MAX - 1 do<BR>&nbsp;&nbsp;&nbsp; k =
=3D floor(i /=20
SIZE)<BR>&nbsp;&nbsp;&nbsp; r =3D i - k * SIZE + 1<BR>&nbsp;&nbsp;&nbsp; =
k +=3D=20
1<BR>&nbsp;&nbsp;&nbsp; if and_bits(dat[k], mask[r]) =3D 0 =
then<BR>&nbsp;printf(1,=20
"Missing: %d\n", i)<BR>&nbsp;&nbsp;&nbsp; end if<BR>end for</FONT></DIV>
<DIV>&nbsp;</DIV>

------=_NextPart_000_0010_01C26CA1.0A5E93A0--

new topic     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu