Re: Could this be faster?
- Posted by ags <eu at 531pi.co.nz> Dec 14, 2005
- 592 views
C Bouzy wrote: > > ags wrote: > > I found using Greg's win_walkdir to be much faster than walk_dir. > > I tried using Greg's win_walkdir and I could not get it to work at all. > It just returned the directory name and thats it. > > ----If you continue to do what you have always done, > you will get what you have always gotten.---- I had to modify it I think for the way I was using it. Here's the code (thanks again Greg :) followed by a short context of how I use it: Oh and if it finds a new file, it has to open it (using BASS) and get the length, which makes the initial database creation very slow, hence the 10 mins mentioned in my previous post.
-- win_dir() function -- by Greg Haberek <ghaberek at gmail.com> -- -- walk_win_dir() -- modified RDS Euphoria walk_dir in file.e to deal with trailing slash quirks -- Windows only function, naturally. -- by Gary Shingles <eu at 531pi.co.nz> -- -- Works just like dir() but specifically for Windows. Uses Unicode and supports -- any and all available characters, and up to 32,767 character path names. -- -- The idea for this goes to Euman who devised a similar function in his -- Euphoria Free File Manager (EFFM), in the Archive. -- -- Note: -- I know my structure access is a little crude, but I didn't -- want this library to be dependant on anything like Win32Lib. include dll.e include get.e include file.e include machine.e without warning global constant kernel32_dll = open_dll( "kernel32.dll" ), xlstrlenW = define_c_func( kernel32_dll, "lstrlenW", {C_POINTER}, C_LONG ), xFindFirstFile = define_c_func( kernel32_dll, "FindFirstFileW", {C_POINTER, C_POINTER}, C_LONG ), xFindNextFile = define_c_func( kernel32_dll, "FindNextFileW", {C_LONG, C_POINTER}, C_INT ), xFindClose = define_c_proc( kernel32_dll, "FindClose", {C_LONG} ), xFileTimeToSystemTime = define_c_func( kernel32_dll, "FileTimeToSystemTime", {C_POINTER, C_POINTER}, C_INT ) global function allocate_unicode( sequence string ) -- allocates a null-terminated Unicode string into memory integer len, byte0, byte1 atom mem len = length( string ) mem = allocate( (len*2) + 2 ) for i = 1 to len do byte0 = and_bits( string[i], #FF ) byte1 = floor( string[i] / #100 ) poke( mem + ((i-1)*2)+0, byte0 ) poke( mem + ((i-1)*2)+1, byte1 ) end for -- double null terminator poke( mem + (len*2)+0, 0 ) poke( mem + (len*2)+1, 0 ) return mem end function global function peek_unicode( atom mem ) -- reads a Unicode string from memory integer i, byte0, byte1 sequence string i = 1 string = "" while length(string) <= 32767 do byte0 = peek( mem + ((i-1)*2)+0 ) byte1 = peek( mem + ((i-1)*2)+1 ) if byte0 = 0 and byte1 = 0 then -- null terminator string = string[1..i-1] exit else string &= (byte1 * #100) + byte0 i += 1 end if end while return string end function function peek2( atom mem ) -- read a 2-byte value (Word) from memory return (peek(mem+1) * #100) + peek(mem+0) end function procedure poke2( atom mem, atom word ) poke( mem+0, and_bits( word, #FF ) ) poke( mem+1, floor( word / #100 ) ) end procedure --global constant -- -- FILETIME Structure -- ft_dwLowDateTime = w32allot( DWord ), -- The FILETIME structure is a 64-bit value representing the -- ft_dwHighDateTime = w32allot( DWord ), -- number of 100-nanosecond intervals since January 1, 1601. -- SIZEOF_FILETIME = w32allotted_size() constant FILETIME_SIZE = 8 function FILETIME( object ft ) atom ptr, dwLowDateTime, dwHighDateTime if atom( ft ) then -- read structure ptr = ft dwLowDateTime = peek4s( ptr + 0 ) dwHighDateTime = peek4s( ptr + 4 ) return {dwLowDateTime, dwHighDateTime} else -- write structure ptr = allocate( FILETIME_SIZE ) poke4( ptr + 0, ft[1] ) poke4( ptr + 4, ft[2] ) return ptr end if end function --global constant -- -- SYSTEMTIME Structure -- st_wYear = w32allot( Word ), -- st_wMonth = w32allot( Word ), -- st_wDayOfWeek = w32allot( Word ), -- st_wDay = w32allot( Word ), -- st_wHour = w32allot( Word ), -- st_wMinute = w32allot( Word ), -- st_wSecond = w32allot( Word ), -- st_wMilliseconds = w32allot( Word ), -- SIZEOF_SYSTEMTIME = w32allotted_size() constant SYSTEMTIME_SIZE = 16 function SYSTEMTIME( object st ) atom ptr, wYear, wMonth, wDayOfWeek, wDay, wHour, wMinute, wSecond, wMilliseconds if atom( st ) then -- read structure ptr = st wYear = peek2( ptr + 0 ) wMonth = peek2( ptr + 2 ) wDayOfWeek = peek2( ptr + 4 ) wDay = peek2( ptr + 6 ) wHour = peek2( ptr + 8 ) wMinute = peek2( ptr + 10 ) wSecond = peek2( ptr + 12 ) wMilliseconds = peek2( ptr + 14 ) return {wYear, wMonth, wDayOfWeek, wDay, wHour, wMinute, wSecond, wMilliseconds} else -- write structure ptr = allocate( SYSTEMTIME_SIZE ) poke2( ptr + 0, st[1] ) poke2( ptr + 2, st[2] ) poke2( ptr + 4, st[3] ) poke2( ptr + 6, st[4] ) poke2( ptr + 8, st[5] ) poke2( ptr + 10, st[6] ) poke2( ptr + 12, st[7] ) poke2( ptr + 14, st[8] ) return ptr end if end function --global constant -- -- WIN32_FIND_DATA Structure -- wfd_dwFileAttributes = w32allot( DWord ), -- wfd_ftCreationTime = w32allot( SIZEOF_FILETIME ), -- wfd_ftLastAccessTime = w32allot( SIZEOF_FILETIME ), -- wfd_ftLastWriteTime = w32allot( SIZEOF_FILETIME ), -- wfd_nFileSizeHigh = w32allot( DWord ), -- wfd_nFileSizeLow = w32allot( DWord ), -- wfd_dwReserved0 = w32allot( DWord ), -- wfd_dwReserved1 = w32allot( DWord ), -- wfd_cFileName = w32allot({ Lpsz, MAX_SIZE }), -- wfd_cAlternateFileName = w32allot({ Lpsz, 14 }), -- SIZEOF_WIN32_FIND_DATA = allotted_size() constant WIN32_FIND_DATA_SIZE = 40 + 32767 + 12 function WIN32_FIND_DATA( object wfd ) integer len atom ptr, dwFileAttributes, nFileSizeHigh, nFileSizeLow, dwReserved0, dwReserved1 sequence ftCreationTime, ftLastAccessTime, ftLastWriteTime, cFileName, cAlternateFileName if atom( wfd ) then -- read structure ptr = wfd dwFileAttributes = peek4s( ptr + 0 ) ftCreationTime = FILETIME( ptr + 4 ) ftLastAccessTime = FILETIME( ptr + 12 ) ftLastWriteTime = FILETIME( ptr + 20 ) nFileSizeHigh = peek4s( ptr + 28 ) nFileSizeLow = peek4s( ptr + 32 ) dwReserved0 = peek4s( ptr + 36 ) dwReserved1 = peek4s( ptr + 40 ) len = c_func( xlstrlenW, {ptr+44} ) cFileName = peek_unicode( ptr+44 ) cAlternateFileName = peek_unicode( ptr+44+len ) return {dwFileAttributes, ftCreationTime, ftLastAccessTime, ftLastWriteTime, nFileSizeHigh, nFileSizeLow, dwReserved0, dwReserved1, cFileName, cAlternateFileName} else -- write structure ptr = allocate( WIN32_FIND_DATA_SIZE + (length(wfd[9])*2)+2 + (length(wfd[10])*2)+2 ) poke4( ptr + 0, wfd[1] ) poke4( ptr + 4, wfd[2] ) poke4( ptr + 12, wfd[3] ) poke4( ptr + 20, wfd[4] ) poke4( ptr + 24, wfd[5] ) poke4( ptr + 28, wfd[6] ) poke4( ptr + 32, wfd[7] ) poke4( ptr + 36, wfd[8] ) len = length( wfd[9] ) poke4( ptr + 40, wfd[9] ) poke4( ptr + 40 + len, wfd[10] ) return ptr end if end function constant -- File Attributes zFILE_ATTRIBUTE_READONLY = #00000001, zFILE_ATTRIBUTE_HIDDEN = #00000002, zFILE_ATTRIBUTE_SYSTEM = #00000004, zFILE_ATTRIBUTE_DIRECTORY = #00000010, zFILE_ATTRIBUTE_ARCHIVE = #00000020, zFILE_ATTRIBUTE_DEVICE = #00000040, zFILE_ATTRIBUTE_NORMAL = #00000080, zFILE_ATTRIBUTE_TEMPORARY = #00000100, zFILE_ATTRIBUTE_SPARSE_FILE = #00000200, zFILE_ATTRIBUTE_REPARSE_POINT = #00000400, zFILE_ATTRIBUTE_COMPRESSED = #00000800, zFILE_ATTRIBUTE_OFFLINE = #00001000, zFILE_ATTRIBUTE_NOT_CONTENT_INDEXED = #00002000, zFILE_ATTRIBUTE_ENCRYPTED = #00004000 constant INVALID_HANDLE_VALUE = -1 constant POSSIBLE_ATTRIBUTES = { { zFILE_ATTRIBUTE_DIRECTORY, 'd' }, { zFILE_ATTRIBUTE_READONLY, 'r' }, { zFILE_ATTRIBUTE_HIDDEN, 'h' }, { zFILE_ATTRIBUTE_SYSTEM, 's' }, { zFILE_ATTRIBUTE_DEVICE, 'v' }, -- just guessing here { zFILE_ATTRIBUTE_ARCHIVE, 'a' }} -- must include file.e for these constants: --global constant -- D_NAME = 1, -- D_ATTRIBUTES = 2, -- D_SIZE = 3, -- -- D_YEAR = 4, -- D_MONTH = 5, -- D_DAY = 6, -- -- D_HOUR = 7, -- D_MINUTE = 8, -- D_SECOND = 9 function check_path( sequence path ) if length(path) then if path[$] = '\\' then path &= '*' end if path = "\\\\?\\" & path return path end if end function function append_file( sequence wfd ) sequence entry object filetime, systemtime entry = repeat(0,9) entry[D_NAME] = wfd[9] entry[D_SIZE] = (wfd[5] * #10000) + wfd[6] entry[D_ATTRIBUTES] = "" for i = 1 to length(POSSIBLE_ATTRIBUTES) do if and_bits( wfd[1], POSSIBLE_ATTRIBUTES[i][1] ) then entry[D_ATTRIBUTES] &= POSSIBLE_ATTRIBUTES[i][2] end if end for filetime = FILETIME( wfd[4] ) systemtime = allocate( SYSTEMTIME_SIZE ) if c_func( xFileTimeToSystemTime, {filetime, systemtime} ) then systemtime = SYSTEMTIME( systemtime ) entry[D_YEAR] = systemtime[1] - 1900 entry[D_MONTH] = systemtime[2] entry[D_DAY] = systemtime[4] + 1 entry[D_HOUR] = systemtime[5] entry[D_MINUTE] = systemtime[6] entry[D_SECOND] = systemtime[7] end if return {entry} end function global function win_dir( sequence name ) -- returns directory information, given the name -- of a file or directory. Format returned is: -- { -- {"name1", attributes, size, year, month, day, hour, minute, second}, -- {"name2", ... }, -- } atom lpName, lpWfd, handle sequence wfd, files name = check_path( name ) files = {} lpName = allocate_unicode( name ) lpWfd = allocate( WIN32_FIND_DATA_SIZE ) handle = c_func( xFindFirstFile, {lpName, lpWfd} ) if handle = INVALID_HANDLE_VALUE then -- failed! free( lpName ) free( lpWfd ) return 0 end if wfd = WIN32_FIND_DATA( lpWfd ) files &= append_file( wfd ) while c_func( xFindNextFile, {handle, lpWfd} ) do wfd = WIN32_FIND_DATA( lpWfd ) files &= append_file( wfd ) end while c_proc( xFindClose, {handle} ) free( lpName ) free( lpWfd ) return files end function -- ags -- win_dir() not compatible with walk_dir, see new walk_win_dir --global function walk_win_dir(sequence path_name, integer your_function, -- integer scan_subdirs) -- object nothing -- trace(1) -- my_dir = routine_id("win_dir") -- use win_dir() please -- -- my_dir is global atom in file.e -- nothing = walk_dir(path_name & "\\", your_function, scan_subdirs) -- my_dir = -2 -- constant DEFAULT=-2 in file.e -- return nothing --end function -- check_slash(): appends a '\' to the path if needed so that win_dir can append '*' -- NOTE: since this is windows specific, slash is only '\' -- won't cope with "\\*" though function check_slash(sequence path) if not equal (path[$], "\\") then path = path & "\\" -- to be compatible with win_dir() end if return path end function global function walk_win_dir(sequence path_name, integer your_function, integer scan_subdirs) -- Generalized Directory Walker -- modified by Gary Shingles <eu at 531pi.co.nz> for win_dir() -- Walk through a directory and (optionally) its subdirectories, -- "visiting" each file and subdirectory. Your function will be called -- via its routine id. The visits will occur in alphabetical order. -- Your function should accept the path name and dir() entry for -- each file and subdirectory. It should return 0 to keep going, -- or an error code (greater than 0) to quit, or it can return -- any sequence or atom other than 0 as a useful diagnostic value. object d, abort_now, path -- get the full directory information path = check_slash(path_name) d = win_dir(path) if atom(d) then return W_BAD_PATH end if -- trim any trailing blanks or slashes from the path while length(path) > 0 and find(path[$], {' ', '\\'}) do path = path[1..$-1] end while for i = 1 to length(d) do if find('d', d[i][D_ATTRIBUTES]) then -- a directory if not find(d[i][D_NAME], {".", ".."}) then abort_now = call_func(your_function, {path, d[i]}) if not equal(abort_now, 0) then return abort_now end if if scan_subdirs then abort_now = walk_win_dir(path & '\\' & d[i][D_NAME], your_function, scan_subdirs) if not equal(abort_now, 0) and not equal(abort_now, W_BAD_PATH) then -- allow BAD PATH, user might delete a file or directory return abort_now end if end if end if else -- a file abort_now = call_func(your_function, {path_name, d[i]}) -- note: path_name without slash if not equal(abort_now, 0) then return abort_now end if end if end for return 0 end function
Example use (essential bits only):
constant MODE_UPDATE = 1, MODE_INSERT = 2 function real_indexer(sequence pathname, sequence entry) real_pathname = pathname & "\\" & entry[D_NAME] rec = blank_record
str = lower(reverse(entry[D_NAME])) FOLDERS if equal(str,".") or equal(str, "..") then return 0
" & entry[D_NAME])
if length(real_pathname) = 0 then msg(sprintf("Broken Link %s", {entry[D_NAME]})) return 0
if equal(str[1..4], "3pm.") then files = {real_pathname} elsif equal(str[1..4], "trc.") then files = get_cart_files(real_pathname) else return 0
rec[TRK_KEY] = pathname & "
" & entry[D_NAME] always rec[TRK_PATH] = pathname rec[TRK_FILE] = files
idx = db_find_key(rec[TRK_KEY]) if idx >= 0 then orig_rec = db_record_data(idx) orig_ts = orig_rec[TRK_TS] comp = compare(entry[3..9], orig_ts) size and/or time changed if comp = 0 then return 0 no change else cart_seq = orig_rec[TRK_CART] mode = MODE_UPDATE end if else mode = MODE_INSERT end if
parent eg: C:\MUSIC, parent = C: C:\MUSIC\MODERN parent = C:\MUSIC bits = split(pathname, '
') if length(bits) = 0 then
if i != length(bits) - 1 then str &= '
' end if end for cats = append(cats, bits[length(bits)]) end if pdep = length(bits) - 1 TRK_PDEP path depth rec[TRK_PDEP] = pdep TRK_PARENT if find(pathname, rootfols) then this is a root folder, parent is {} rec[TRK_PARENT] = {} else rec[TRK_PARENT] = str end if
TRK_CATS rec[TRK_CATS] = cats
if mode = MODE_UPDATE then msg(sprintf("Updated: %s", {rec[TRK_KEY]})) db_replace_data(idx, rec) else msg(sprintf("Inserting: %s", {rec[TRK_KEY]})) junk = db_insert(rec[TRK_KEY], rec) if junk != DB_OK then msg(sprintf("Insert failed! %s (err: %d)", {entry[D_NAME], junk})) end if end if return 0 doEvents(0) end function
procedure index_folder(sequence folname) junk = walk_win_dir(folname, routine_id("real_indexer"),1) end procedure
gary }}}