Re: Could this be faster?

new topic     » goto parent     » topic index » view thread      » older message » newer message

C Bouzy wrote:
> 
> ags wrote:
> > I found using Greg's win_walkdir to be much faster than walk_dir.
> 
> I tried using Greg's win_walkdir and I could not get it to work at all.
> It just returned the directory name and thats it.
> 
>  ----If you continue to do what you have always done,
> you will get what you have always gotten.----

I had to modify it I think for the way I was using it.  Here's the code 
(thanks again Greg :) followed by a short context of how I use it:

Oh and if it finds a new file, it has to open it (using BASS) and get the
length, which makes the initial database creation very slow, hence the
10 mins mentioned in my previous post.

--	win_dir() function
--	by Greg Haberek <ghaberek at gmail.com>
--
--      walk_win_dir()
--      modified RDS Euphoria walk_dir in file.e to deal with trailing slash
quirks
--      Windows only function, naturally.
--      by Gary Shingles <eu at 531pi.co.nz>
--
--	Works just like dir() but specifically for Windows. Uses Unicode and supports
--	any and all available characters, and up to 32,767 character path names.
--
--	The idea for this goes to Euman who devised a similar function in his
--	Euphoria Free File Manager (EFFM), in the Archive.
--
--	Note:
--	  I know my structure access is a little crude, but I didn't
--	  want this library to be dependant on anything like Win32Lib.

include dll.e
include get.e
include file.e
include machine.e
without warning

global constant
	kernel32_dll			= open_dll( "kernel32.dll" ),
	xlstrlenW				= define_c_func( kernel32_dll, "lstrlenW", {C_POINTER}, C_LONG ),
xFindFirstFile			= define_c_func( kernel32_dll, "FindFirstFileW", {C_POINTER,
C_POINTER}, C_LONG ),
xFindNextFile			= define_c_func( kernel32_dll, "FindNextFileW", {C_LONG,
C_POINTER}, C_INT ),
	xFindClose				= define_c_proc( kernel32_dll, "FindClose", {C_LONG} ),
xFileTimeToSystemTime	= define_c_func( kernel32_dll, "FileTimeToSystemTime",
{C_POINTER, C_POINTER}, C_INT )

global function allocate_unicode( sequence string )
-- allocates a null-terminated Unicode string into memory

	integer len, byte0, byte1
	atom mem
	
	len = length( string )
	mem = allocate( (len*2) + 2 )
	
	for i = 1 to len do
		byte0 = and_bits( string[i], #FF )
		byte1 = floor( string[i] / #100 )
		poke( mem + ((i-1)*2)+0, byte0 )
		poke( mem + ((i-1)*2)+1, byte1 )
	end for

	-- double null terminator
	poke( mem + (len*2)+0, 0 )
	poke( mem + (len*2)+1, 0 )

	return mem
end function

global function peek_unicode( atom mem )
-- reads a Unicode string from memory

	integer i, byte0, byte1
	sequence string

	i = 1
	string = ""
	while length(string) <= 32767 do
		byte0 = peek( mem + ((i-1)*2)+0 )
		byte1 = peek( mem + ((i-1)*2)+1 )
		if byte0 = 0 and byte1 = 0 then
			-- null terminator
			string = string[1..i-1]
			exit
		else
			string &= (byte1 * #100) + byte0
			i += 1
		end if
	end while

	return string
end function

function peek2( atom mem )
-- read a 2-byte value (Word) from memory
	return (peek(mem+1) * #100) + peek(mem+0)
end function

procedure poke2( atom mem, atom word )
	poke( mem+0, and_bits( word, #FF ) )
	poke( mem+1, floor( word / #100 ) )
end procedure

--global constant
--	-- FILETIME Structure
--	ft_dwLowDateTime		= w32allot( DWord ),	-- The FILETIME structure is a 64-bit
value representing the
--	ft_dwHighDateTime		= w32allot( DWord ),	-- number of 100-nanosecond intervals
since January 1, 1601.
--	SIZEOF_FILETIME			= w32allotted_size()

constant FILETIME_SIZE = 8
function FILETIME( object ft )

	atom ptr, dwLowDateTime, dwHighDateTime

	if atom( ft ) then
		-- read structure
		ptr = ft
		
		dwLowDateTime	= peek4s( ptr + 0 )
		dwHighDateTime	= peek4s( ptr + 4 )
		return {dwLowDateTime, dwHighDateTime}

	else
		-- write structure
		ptr = allocate( FILETIME_SIZE )

		poke4( ptr + 0, ft[1] )
		poke4( ptr + 4, ft[2] )
		return ptr

	end if

end function

--global constant
--	-- SYSTEMTIME Structure
--	st_wYear				= w32allot( Word ),
--	st_wMonth				= w32allot( Word ),
--	st_wDayOfWeek			= w32allot( Word ),
--	st_wDay					= w32allot( Word ),
--	st_wHour				= w32allot( Word ),
--	st_wMinute				= w32allot( Word ),
--	st_wSecond				= w32allot( Word ),
--	st_wMilliseconds		= w32allot( Word ),
--	SIZEOF_SYSTEMTIME		= w32allotted_size()

constant SYSTEMTIME_SIZE = 16

function SYSTEMTIME( object st )

	atom ptr, wYear, wMonth, wDayOfWeek, wDay,
		wHour, wMinute, wSecond, wMilliseconds
	
	if atom( st ) then
		-- read structure
		ptr = st
		
		wYear			= peek2( ptr +  0 )
		wMonth			= peek2( ptr +  2 )
		wDayOfWeek		= peek2( ptr +  4 )
		wDay			= peek2( ptr +  6 )
		wHour			= peek2( ptr +  8 )
		wMinute			= peek2( ptr + 10 )
		wSecond			= peek2( ptr + 12 )
		wMilliseconds	= peek2( ptr + 14 )
		return {wYear, wMonth, wDayOfWeek, wDay,
			wHour, wMinute, wSecond, wMilliseconds}
			
	else
		-- write structure
		ptr = allocate( SYSTEMTIME_SIZE )
		
		poke2( ptr +  0, st[1] )
		poke2( ptr +  2, st[2] )
		poke2( ptr +  4, st[3] )
		poke2( ptr +  6, st[4] )
		poke2( ptr +  8, st[5] )
		poke2( ptr + 10, st[6] )
		poke2( ptr + 12, st[7] )
		poke2( ptr + 14, st[8] )
		return ptr
	
	end if

end function

--global constant
--	-- WIN32_FIND_DATA Structure
--	wfd_dwFileAttributes	= w32allot( DWord ),
--	wfd_ftCreationTime		= w32allot( SIZEOF_FILETIME ),
--	wfd_ftLastAccessTime	= w32allot( SIZEOF_FILETIME ),
--	wfd_ftLastWriteTime		= w32allot( SIZEOF_FILETIME ),
--	wfd_nFileSizeHigh		= w32allot( DWord ),
--	wfd_nFileSizeLow		= w32allot( DWord ),
--	wfd_dwReserved0			= w32allot( DWord ),
--	wfd_dwReserved1			= w32allot( DWord ),
--	wfd_cFileName			= w32allot({ Lpsz, MAX_SIZE }),
--	wfd_cAlternateFileName	= w32allot({ Lpsz, 14 }),
--	SIZEOF_WIN32_FIND_DATA	= allotted_size()

constant WIN32_FIND_DATA_SIZE = 40 + 32767 + 12

function WIN32_FIND_DATA( object wfd )

	integer len
atom ptr, dwFileAttributes, nFileSizeHigh, nFileSizeLow, dwReserved0,
dwReserved1
sequence ftCreationTime, ftLastAccessTime, ftLastWriteTime, cFileName,
cAlternateFileName
	
	if atom( wfd ) then
		-- read structure
		ptr = wfd
		
		dwFileAttributes		= peek4s( ptr + 0 )
		ftCreationTime			= FILETIME( ptr +  4 )
		ftLastAccessTime		= FILETIME( ptr + 12 )
		ftLastWriteTime			= FILETIME( ptr + 20 )
		nFileSizeHigh			= peek4s( ptr + 28 )
		nFileSizeLow			= peek4s( ptr + 32 )
		dwReserved0				= peek4s( ptr + 36 )
		dwReserved1				= peek4s( ptr + 40 )

		len = c_func( xlstrlenW, {ptr+44} )
		cFileName				= peek_unicode( ptr+44 )
		cAlternateFileName		= peek_unicode( ptr+44+len )

		return {dwFileAttributes, ftCreationTime, ftLastAccessTime, ftLastWriteTime,
nFileSizeHigh, nFileSizeLow, dwReserved0, dwReserved1, cFileName,
cAlternateFileName}
		
	else
		-- write structure
		ptr = allocate( WIN32_FIND_DATA_SIZE
						+ (length(wfd[9])*2)+2
						+ (length(wfd[10])*2)+2 )
		
		poke4( ptr +  0, wfd[1] )
		poke4( ptr +  4, wfd[2] )
		poke4( ptr + 12, wfd[3] )
		poke4( ptr + 20, wfd[4] )
		poke4( ptr + 24, wfd[5] )
		poke4( ptr + 28, wfd[6] )
		poke4( ptr + 32, wfd[7] )
		poke4( ptr + 36, wfd[8] )
		
		len = length( wfd[9] )
		poke4( ptr + 40, wfd[9] )
		poke4( ptr + 40 + len, wfd[10] )
		return ptr
		
	end if

end function

constant
	-- File Attributes
    zFILE_ATTRIBUTE_READONLY             = #00000001,
    zFILE_ATTRIBUTE_HIDDEN               = #00000002,
    zFILE_ATTRIBUTE_SYSTEM               = #00000004,
    zFILE_ATTRIBUTE_DIRECTORY            = #00000010,
    zFILE_ATTRIBUTE_ARCHIVE              = #00000020,
    zFILE_ATTRIBUTE_DEVICE               = #00000040,
    zFILE_ATTRIBUTE_NORMAL               = #00000080,
    zFILE_ATTRIBUTE_TEMPORARY            = #00000100,
    zFILE_ATTRIBUTE_SPARSE_FILE          = #00000200,
    zFILE_ATTRIBUTE_REPARSE_POINT        = #00000400,
    zFILE_ATTRIBUTE_COMPRESSED           = #00000800,
    zFILE_ATTRIBUTE_OFFLINE              = #00001000,
    zFILE_ATTRIBUTE_NOT_CONTENT_INDEXED  = #00002000,
    zFILE_ATTRIBUTE_ENCRYPTED            = #00004000

constant
	INVALID_HANDLE_VALUE	= -1

constant POSSIBLE_ATTRIBUTES = {
	{ zFILE_ATTRIBUTE_DIRECTORY,	'd' },
	{ zFILE_ATTRIBUTE_READONLY,		'r' },
	{ zFILE_ATTRIBUTE_HIDDEN,		'h' },
	{ zFILE_ATTRIBUTE_SYSTEM,		's' },
	{ zFILE_ATTRIBUTE_DEVICE,		'v' },	-- just guessing here
	{ zFILE_ATTRIBUTE_ARCHIVE,		'a' }}

-- must include file.e for these constants:
--global constant 
--	D_NAME = 1,
--	D_ATTRIBUTES = 2,
--	D_SIZE = 3,
--
--	D_YEAR = 4,
--	D_MONTH = 5,
--	D_DAY = 6,
--
--	D_HOUR = 7,
--	D_MINUTE = 8,
--	D_SECOND = 9

function check_path( sequence path )

	if length(path) then
		if path[$] = '\\' then
			path &= '*'
		end if

		path = "\\\\?\\" & path
		return path
	end if

end function

function append_file( sequence wfd )

	sequence entry
	object filetime, systemtime

	entry = repeat(0,9)
	entry[D_NAME] = wfd[9]
	entry[D_SIZE] = (wfd[5] * #10000) + wfd[6]

	entry[D_ATTRIBUTES] = ""
	for i = 1 to length(POSSIBLE_ATTRIBUTES) do
		if and_bits( wfd[1], POSSIBLE_ATTRIBUTES[i][1] ) then
			entry[D_ATTRIBUTES] &= POSSIBLE_ATTRIBUTES[i][2]
		end if
	end for

	filetime = FILETIME( wfd[4] )
	systemtime = allocate( SYSTEMTIME_SIZE )
	if c_func( xFileTimeToSystemTime, {filetime, systemtime} ) then
		systemtime = SYSTEMTIME( systemtime )
			entry[D_YEAR] = systemtime[1] - 1900
		entry[D_MONTH] = systemtime[2]
		entry[D_DAY] = systemtime[4] + 1
		entry[D_HOUR] = systemtime[5]
		entry[D_MINUTE] = systemtime[6]
		entry[D_SECOND] = systemtime[7]
	end if

	return {entry}
end function


global function win_dir( sequence name )
-- returns directory information, given the name
-- of a file or directory. Format returned is:
-- {
--  {"name1", attributes, size, year, month, day, hour, minute, second},
--  {"name2", ...                                                     },
-- }

	atom lpName, lpWfd, handle
	sequence wfd, files

	name = check_path( name )
	files = {}

	lpName = allocate_unicode( name )
	lpWfd = allocate( WIN32_FIND_DATA_SIZE )
	
	handle = c_func( xFindFirstFile, {lpName, lpWfd} )
	if handle = INVALID_HANDLE_VALUE then
		-- failed!
		free( lpName )
		free( lpWfd )
		return 0
	end if
	
	wfd = WIN32_FIND_DATA( lpWfd )
	files &= append_file( wfd )

	while c_func( xFindNextFile, {handle, lpWfd} ) do
		wfd = WIN32_FIND_DATA( lpWfd )
		files &= append_file( wfd )
	end while

	c_proc( xFindClose, {handle} )
	
	free( lpName )
	free( lpWfd )
	return files
end function

-- ags -- win_dir() not compatible with walk_dir, see new walk_win_dir
--global function walk_win_dir(sequence path_name, integer your_function, 
--                               integer scan_subdirs)
--    object nothing
--    trace(1)
--    my_dir = routine_id("win_dir") -- use win_dir() please
--                                   -- my_dir is global atom in file.e
--    nothing = walk_dir(path_name & "\\", your_function, scan_subdirs)
--    my_dir = -2 	               -- constant DEFAULT=-2 in file.e
--    return nothing
--end function

-- check_slash(): appends a '\' to the path if needed so that win_dir can append
'*' 
-- NOTE: since this is windows specific, slash is only '\'
-- won't cope with "\\*" though
function check_slash(sequence path)
	if not equal (path[$], "\\")  then
		path = path & "\\" -- to be compatible with win_dir()
	end if
	return path
end function

global function walk_win_dir(sequence path_name, integer your_function, 
			 integer scan_subdirs)
-- Generalized Directory Walker
--  modified by Gary Shingles <eu at 531pi.co.nz> for win_dir()
-- Walk through a directory and (optionally) its subdirectories,
-- "visiting" each file and subdirectory. Your function will be called
-- via its routine id. The visits will occur in alphabetical order.
-- Your function should accept the path name and dir() entry for
-- each file and subdirectory. It should return 0 to keep going,
-- or an error code (greater than 0) to quit, or it can return
-- any sequence or atom other than 0 as a useful diagnostic value.
    object d, abort_now, path
    
    -- get the full directory information
    path = check_slash(path_name)
    
    d = win_dir(path)
    if atom(d) then
	return W_BAD_PATH
    end if
    
    -- trim any trailing blanks or slashes from the path
    while length(path) > 0 and 
	  find(path[$], {' ', '\\'}) do
	path = path[1..$-1]
    end while
    
    for i = 1 to length(d) do
	if find('d', d[i][D_ATTRIBUTES]) then
	    -- a directory
	    if not find(d[i][D_NAME], {".", ".."}) then
		abort_now = call_func(your_function, {path, d[i]})
		if not equal(abort_now, 0) then
		    return abort_now
		end if
		if scan_subdirs then
		    abort_now = walk_win_dir(path & '\\' & d[i][D_NAME],
					 your_function, scan_subdirs)
		    
		    if not equal(abort_now, 0) and 
		       not equal(abort_now, W_BAD_PATH) then
			-- allow BAD PATH, user might delete a file or directory 
			return abort_now
		    end if
		end if
	    end if
	else
	    -- a file
abort_now = call_func(your_function, {path_name, d[i]}) -- note: path_name
without slash
	    if not equal(abort_now, 0) then
		return abort_now
	    end if
	end if
    end for
    return 0
end function



Example use (essential bits only):

constant MODE_UPDATE = 1,
         MODE_INSERT = 2
function real_indexer(sequence pathname, sequence entry)
 
     real_pathname = pathname & "\\" & entry[D_NAME]
     rec = blank_record

str = lower(reverse(entry[D_NAME])) FOLDERS if equal(str,".") or equal(str, "..") then return 0

SHORT too short elsif length(str) < 5 then return 0

LNK FILE .lnk file elsif equal(str[1..4], "knl.") then real_pathname = deref_lnk(pathname & "
" & entry[D_NAME])

if length(real_pathname) = 0 then msg(sprintf("Broken Link %s", {entry[D_NAME]})) return 0 broken link end if str = reverse(real_pathname) update str

if equal(str[1..4], "3pm.") then files = {real_pathname} elsif equal(str[1..4], "trc.") then files = get_cart_files(real_pathname) else return 0 not interested end if elsif equal(str[1..4], "3pm.") then MP3 files = {real_pathname} elsif equal(str[1..4], "trc.") then CRT files = get_cart_files(real_pathname) else return 0 end if

rec[TRK_KEY] = pathname & "
" & entry[D_NAME] always rec[TRK_PATH] = pathname rec[TRK_FILE] = files

idx = db_find_key(rec[TRK_KEY]) if idx >= 0 then orig_rec = db_record_data(idx) orig_ts = orig_rec[TRK_TS] comp = compare(entry[3..9], orig_ts) size and/or time changed if comp = 0 then return 0 no change else cart_seq = orig_rec[TRK_CART] mode = MODE_UPDATE end if else mode = MODE_INSERT end if

parent eg: C:\MUSIC, parent = C: C:\MUSIC\MODERN parent = C:\MUSIC bits = split(pathname, '
') if length(bits) = 0 then

weird msg("Weirdness in real_indexer") return 0 elsif length(bits) = 1 then str = bits[1] cats = {} else str = {} cats = {} for i = 1 to length(bits) - 1 do str &= bits[i] if i > 3 then cats = append(cats, bits[i]) never a "C:" category or "MUSIC" category of MUSIC end if

if i != length(bits) - 1 then str &= '
' end if end for cats = append(cats, bits[length(bits)]) end if pdep = length(bits) - 1 TRK_PDEP path depth rec[TRK_PDEP] = pdep TRK_PARENT if find(pathname, rootfols) then this is a root folder, parent is {} rec[TRK_PARENT] = {} else rec[TRK_PARENT] = str end if

TRK_CATS rec[TRK_CATS] = cats

TRK_NAME rec[TRK_NAME] = get_track_name(real_pathname) TRK_TS rec[TRK_TS] = entry[3..9] han = BASS_StreamCreateFile(BASS_FALSE, rec[TRK_FILE][cart_seq], 0, 0, 0) len = BASS_StreamGetLength(han) sec = BASS_ChannelBytes2Seconds(han, len) BASS_StreamFree(han)

TRK_LEN rec[TRK_LEN] = {len} TRK_SEC rec[TRK_SEC] = {s2hms(sec)}

TRK_CART rec[TRK_CART] = cart_seq COMMIT

if mode = MODE_UPDATE then msg(sprintf("Updated: %s", {rec[TRK_KEY]})) db_replace_data(idx, rec) else msg(sprintf("Inserting: %s", {rec[TRK_KEY]})) junk = db_insert(rec[TRK_KEY], rec) if junk != DB_OK then msg(sprintf("Insert failed! %s (err: %d)", {entry[D_NAME], junk})) end if end if return 0 doEvents(0) end function

procedure index_folder(sequence folname) junk = walk_win_dir(folname, routine_id("real_indexer"),1) end procedure

gary }}}

new topic     » goto parent     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu