Re: getc( ) and gets( ) speed

new topic     » goto parent     » topic index » view thread      » older message » newer message

Euman wrote:

> ----- Original Message ----- 
> From: "Juergen Luethje"
>
>> Tommy wrote:

<snip>

>>> Robert, couldn't you consider making get_bytes a
>>> low-level, builtin routine to dramatically improve its performance? I
>>> think get_bytes is a really standard routine that is probably quite
>>> frequently used.
>>
>> I would appreciate that _very_ much.
>
> If you're using windows, try this:

Yes, at the moment, I need it for Windows, so your code is very useful
for me. Thank you!
However, this Windows API stuff is neither simple nor cross-platform.
So I agree with Tommy, that get_bytes() is really a standard routine,
which should be considerably faster.

[code snipped]

> This is FAST....be carefull, I think Rob C. says Gets( ) reads 1024 bytes
> at any one time. This routine can read as many as you supply, you could get
> into trouble or have a slow routine if virtual memory is needed to store
> the data.

Thanks for the warning. My program reads chunks of 32 KB each, so this
shouldn't be a problem.

I wrote a small program that compares the speed of gets(), get_bytes(),
and your Windows API code. On my Pentium 2, 400 MHz, 64 MB RAM (under
Win 98/1st ed.), the results for reading a 10 MB text file, consisting
of 150000 lines are as follows:
1.13 sec. using gets()                -- 2 times faster than get_bytes()
2.25 sec. using get_bytes()
0.70 sec. using Windows API routines  -- 3 times faster than get_bytes()


--====================================================================--
include file.e
include get.e
include dll.e
include machine.e

constant MAX_CHUNK = 32*1024    -- 32 KB

-----------=-----------=------------=------------=----------=-----------

global function read_file1 (sequence fileName)
   object buffer
   integer fn

   fn = open(fileName, "r")
   if fn = -1 then
      return -1        -- error
   end if

   buffer = gets(fn)
   while sequence(buffer) do
      --** do something with the stuff in the buffer
      buffer = gets(fn)
   end while

   close(fn)
   return 0            -- success
end function

-----------=-----------=------------=------------=----------=-----------

global function read_file2 (sequence fileName, atom fileSize)
   sequence buffer
   atom remaining
   integer fn, buffSize

   fn = open(fileName, "r")
   if fn = -1 then
      return -1        -- error
   end if

   buffSize = MAX_CHUNK
   remaining = fileSize
   while remaining > 0 do
      if remaining < buffSize then
         buffSize = remaining
      end if
      buffer = get_bytes(fn, buffSize)
      --** do something with the stuff in the buffer
      remaining -= buffSize
   end while

   close(fn)
   return 0            -- success
end function

-----------=-----------=------------=------------=----------=-----------
-- Euman's API code

constant
   kernel32 = open_dll("kernel32.dll"),
   xCreateFile = define_c_func(kernel32,"CreateFileA",{C_POINTER,C_LONG,
                               C_LONG,C_POINTER,C_LONG,C_LONG,C_INT},
                               C_LONG),
   xReadFile = define_c_func(kernel32,"ReadFile",{C_INT,C_POINTER,C_UINT,
                               C_POINTER,C_POINTER},C_LONG),
   xCloseHandle = define_c_func(kernel32,"CloseHandle",{C_LONG},C_LONG)

constant
   GENERIC_READ              = #80000000,
   FILE_ATTRIBUTE_NORMAL     = #80,
   FILE_FLAG_SEQUENTIAL_SCAN = #8000000,
   OPEN_EXISTING             = 3

function OpenFile_rb (sequence fname)
   atom handle, FileName

   FileName = allocate_string(fname)
   handle = c_func(xCreateFile,{FileName,
                               GENERIC_READ,
                               0,
                               NULL,
                               OPEN_EXISTING,
                               FILE_ATTRIBUTE_NORMAL+FILE_FLAG_SEQUENTIAL_SCAN,
                               NULL})
   return handle
end function

atom lpNumberOfBytesRead       -- actual No. of bytes read by routine

function ReadFile (atom hFile, atom lpBuffer, atom nNumberOfBytesToRead)
return
   c_func(xReadFile,{hFile,lpBuffer,nNumberOfBytesToRead,lpNumberOfBytesRead,0})
end function


global function read_file3 (sequence fileName, atom fileSize)
   sequence buffer
   atom lpBuffer, remaining
   integer fn, buffSize, void

   fn = OpenFile_rb(fileName)
   if fn = -1 then
      return -1        -- error
   end if

   buffSize = MAX_CHUNK
   lpBuffer = allocate(buffSize)
   lpNumberOfBytesRead = allocate(4)
   remaining = fileSize
   while remaining > 0 do
      if remaining < buffSize then
         buffSize = remaining
      end if
      void = ReadFile(fn, lpBuffer, buffSize)
      buffer = peek({lpBuffer, buffSize})
      --** do something with the stuff in the buffer
      remaining -= buffSize
   end while

   free(lpBuffer)
   free(lpNumberOfBytesRead)
   void = c_func(xCloseHandle, {fn})
   return 0            -- success
end function

-----------=-----------=------------=------------=----------=-----------

-- Compare speed of the 3 functions.

procedure wait_abort (sequence msg, integer code)
   puts(1, msg & "\n\nPress any key ...")
   if wait_key() then end if
   abort(code)
end procedure

object temp
sequence file
atom fileSize, t1, t2, t3
integer err

file = "test.txt"      -- Text file

temp = dir(file)
if atom(temp) then
   wait_abort("File '" & file & "' not found.", 1)
end if
fileSize = temp[1][D_SIZE]
printf(1, "Results of reading '%s' (%.2f MB):\n",
          {file, fileSize/(1024*1024)})

t1 = time()
err = read_file1(file)
t1 = time()-t1
if err != 0 then
   wait_abort("\n\nerror using gets().", 1)
end if
printf(1, "   %.2f sec. using gets()\n", {t1})

t2 = time()
err = read_file2(file, fileSize)
t2 = time()-t2
if err != 0 then
   wait_abort("\n\nerror using get_bytes().", 1)
end if
printf(1, "   %.2f sec. using get_bytes()\n", {t2})

t3 = time()
err = read_file3(file, fileSize)
t3 = time()-t3
if err != 0 then
   wait_abort("\n\nerror using Windows API routines.", 1)
end if
printf(1, "   %.2f sec. using Windows API routines", {t3})

wait_abort("", 0)
--====================================================================--


Regards,
   Juergen

-- 
 /"\  ASCII ribbon campain  |  This message has been ROT-13 encrypted
 \ /  against HTML in       |  twice for higher security.
  X   e-mail and news,      |
 / \  and unneeded MIME     |  http://home.arcor.de/luethje/prog/

new topic     » goto parent     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu