Re: getc( ) and gets( ) speed
- Posted by "Juergen Luethje" <j.lue at gmx.de> Jan 03, 2004
- 515 views
Euman wrote: > ----- Original Message ----- > From: "Juergen Luethje" > >> Tommy wrote: <snip> >>> Robert, couldn't you consider making get_bytes a >>> low-level, builtin routine to dramatically improve its performance? I >>> think get_bytes is a really standard routine that is probably quite >>> frequently used. >> >> I would appreciate that _very_ much. > > If you're using windows, try this: Yes, at the moment, I need it for Windows, so your code is very useful for me. Thank you! However, this Windows API stuff is neither simple nor cross-platform. So I agree with Tommy, that get_bytes() is really a standard routine, which should be considerably faster. [code snipped] > This is FAST....be carefull, I think Rob C. says Gets( ) reads 1024 bytes > at any one time. This routine can read as many as you supply, you could get > into trouble or have a slow routine if virtual memory is needed to store > the data. Thanks for the warning. My program reads chunks of 32 KB each, so this shouldn't be a problem. I wrote a small program that compares the speed of gets(), get_bytes(), and your Windows API code. On my Pentium 2, 400 MHz, 64 MB RAM (under Win 98/1st ed.), the results for reading a 10 MB text file, consisting of 150000 lines are as follows: 1.13 sec. using gets() -- 2 times faster than get_bytes() 2.25 sec. using get_bytes() 0.70 sec. using Windows API routines -- 3 times faster than get_bytes() --====================================================================-- include file.e include get.e include dll.e include machine.e constant MAX_CHUNK = 32*1024 -- 32 KB -----------=-----------=------------=------------=----------=----------- global function read_file1 (sequence fileName) object buffer integer fn fn = open(fileName, "r") if fn = -1 then return -1 -- error end if buffer = gets(fn) while sequence(buffer) do --** do something with the stuff in the buffer buffer = gets(fn) end while close(fn) return 0 -- success end function -----------=-----------=------------=------------=----------=----------- global function read_file2 (sequence fileName, atom fileSize) sequence buffer atom remaining integer fn, buffSize fn = open(fileName, "r") if fn = -1 then return -1 -- error end if buffSize = MAX_CHUNK remaining = fileSize while remaining > 0 do if remaining < buffSize then buffSize = remaining end if buffer = get_bytes(fn, buffSize) --** do something with the stuff in the buffer remaining -= buffSize end while close(fn) return 0 -- success end function -----------=-----------=------------=------------=----------=----------- -- Euman's API code constant kernel32 = open_dll("kernel32.dll"), xCreateFile = define_c_func(kernel32,"CreateFileA",{C_POINTER,C_LONG, C_LONG,C_POINTER,C_LONG,C_LONG,C_INT}, C_LONG), xReadFile = define_c_func(kernel32,"ReadFile",{C_INT,C_POINTER,C_UINT, C_POINTER,C_POINTER},C_LONG), xCloseHandle = define_c_func(kernel32,"CloseHandle",{C_LONG},C_LONG) constant GENERIC_READ = #80000000, FILE_ATTRIBUTE_NORMAL = #80, FILE_FLAG_SEQUENTIAL_SCAN = #8000000, OPEN_EXISTING = 3 function OpenFile_rb (sequence fname) atom handle, FileName FileName = allocate_string(fname) handle = c_func(xCreateFile,{FileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL+FILE_FLAG_SEQUENTIAL_SCAN, NULL}) return handle end function atom lpNumberOfBytesRead -- actual No. of bytes read by routine function ReadFile (atom hFile, atom lpBuffer, atom nNumberOfBytesToRead) return c_func(xReadFile,{hFile,lpBuffer,nNumberOfBytesToRead,lpNumberOfBytesRead,0}) end function global function read_file3 (sequence fileName, atom fileSize) sequence buffer atom lpBuffer, remaining integer fn, buffSize, void fn = OpenFile_rb(fileName) if fn = -1 then return -1 -- error end if buffSize = MAX_CHUNK lpBuffer = allocate(buffSize) lpNumberOfBytesRead = allocate(4) remaining = fileSize while remaining > 0 do if remaining < buffSize then buffSize = remaining end if void = ReadFile(fn, lpBuffer, buffSize) buffer = peek({lpBuffer, buffSize}) --** do something with the stuff in the buffer remaining -= buffSize end while free(lpBuffer) free(lpNumberOfBytesRead) void = c_func(xCloseHandle, {fn}) return 0 -- success end function -----------=-----------=------------=------------=----------=----------- -- Compare speed of the 3 functions. procedure wait_abort (sequence msg, integer code) puts(1, msg & "\n\nPress any key ...") if wait_key() then end if abort(code) end procedure object temp sequence file atom fileSize, t1, t2, t3 integer err file = "test.txt" -- Text file temp = dir(file) if atom(temp) then wait_abort("File '" & file & "' not found.", 1) end if fileSize = temp[1][D_SIZE] printf(1, "Results of reading '%s' (%.2f MB):\n", {file, fileSize/(1024*1024)}) t1 = time() err = read_file1(file) t1 = time()-t1 if err != 0 then wait_abort("\n\nerror using gets().", 1) end if printf(1, " %.2f sec. using gets()\n", {t1}) t2 = time() err = read_file2(file, fileSize) t2 = time()-t2 if err != 0 then wait_abort("\n\nerror using get_bytes().", 1) end if printf(1, " %.2f sec. using get_bytes()\n", {t2}) t3 = time() err = read_file3(file, fileSize) t3 = time()-t3 if err != 0 then wait_abort("\n\nerror using Windows API routines.", 1) end if printf(1, " %.2f sec. using Windows API routines", {t3}) wait_abort("", 0) --====================================================================-- Regards, Juergen -- /"\ ASCII ribbon campain | This message has been ROT-13 encrypted \ / against HTML in | twice for higher security. X e-mail and news, | / \ and unneeded MIME | http://home.arcor.de/luethje/prog/