Pastey Mike Sort

-- This module manages a general purpose sorting system
-- Data can be sorted directly, as an index - either optionally by column(s). Or by a custom routine 
 
 
--sort(data [,col]) 
 
--sort_index(index, keys [,col]) 
 
--custom_sort(s, rtn) 
 
 
-------------------- natural order sorting 
 
-- compares any two objects using the natural order algorithm 
export function nat_compare(object x1, object x2) 
integer x1pt, x2pt, x1len, x2len, number, comp, String 
atom x1num, x2num 
object x1char, x2char 
 
	if sequence(x1) and sequence(x2) then 
 
		x1len = length(x1) 
		x2len = length(x2) 
 
		-- if both are strings 
		-- inline String type-checking for speed 
		String = 1 
		for i = 1 to x1len do 
			x1char = x1[i] 
			if not integer(x1char) 
			or x1char > 255 
			or (x1char < ' ' and not find(x1char,"\t\n\r")) then 
				-- not a String 
				String = 0 
				exit 
			end if 
		end for 
 
		if String then 
			String = 1 
			for i = 1 to x2len do 
				x2char = x2[i] 
				if not integer(x2char) 
				or x2char > 255 
				or (x2char < ' ' and not find(x2char,"\t\n\r")) then 
					-- not a String 
					String = 0 
					exit 
				end if 
			end for 
		end if 
 
		if not String then 
			-- recurse 
			for i = 1 to x1len do 
				if i > x2len then 
					return 1 
				end if 
				comp = nat_compare(x1[i],x2[i]) 
				if comp != 0 then 
					return comp 
				end if 
			end for 
			return compare(x1len,x2len) 
		end if 
 
		-- both are strings 
		x1pt = 1 
		x2pt = 1 
 
		-- number flag 
		number = 0 
 
		while x1pt <= x1len do 
 
			if number then 
				-- build numbers 
				x1num = x1char - '0' 
				while x1pt < x1len do 
					x1pt += 1 
					x1char = x1[x1pt] 
					if (x1char >= '0' and x1char <= '9') then 
						x1num = (x1num * 10) + (x1char - '0') 
					else 
						x1pt -= 1 
						exit 
					end if 
				end while 
 
				x2num = x2char - '0' 
				while x2pt < x2len do 
					x2pt += 1 
					x2char = x2[x2pt] 
					if (x2char >= '0' and x2char <= '9') then 
						x2num = (x2num * 10) + (x2char - '0') 
					else 
						x2pt -= 1 
						exit 
					end if 
				end while 
 
				-- compare as atoms 
				comp = compare(x1num,x2num) 
				if comp != 0 then 
					return comp 
				end if 
 
				-- still equal, keep going 
				number = 0 
				x1pt += 1 
				x2pt += 1 
 
			else 
 
				if x2pt > x2len then 
					-- equal up to point x2 stops 
					-- x1 is longer, therefore greater 
					return 1 
				end if 
 
				x1char = x1[x1pt] 
				x2char = x2[x2pt] 
 
				if 
				(x1char >= '0' and x1char <= '9') 
				and 
				(x2char >= '0' and x2char <= '9') 
				then 
					-- first digit of number found, set flag 
					number = 1 
				else 
					comp = compare(x1char,x2char) 
					if comp != 0 then 
						return comp 
					end if 
					x1pt += 1 
					x2pt += 1 
				end if 
			end if 
		end while 
		-- equal up to point x1 stops, longer (if either) is greater 
		return compare(x1len,x2len) 
	else 
		-- one or both top-level elements are atoms 
		-- compare normally 
		return compare(x1,x2) 
	end if 
 
end function 
 
export constant NAT = routine_id( "nat_compare" ) 
 
 
-------------------------- mike sort 
integer SortType 
object Column 
sequence Keys 
integer CompareRtn 
 
 
constant 
 
	DIRECT=1, 
	COLUMN=2, 
	COLUMNS=3, 
 
	INDEX=4, 
	INDEXCOLUMNS=5, 
	 
	CUSTOM=6 
 
 
procedure init() 
	SortType = 0 
	Column = 0 
	Keys = {} 
	CompareRtn = -1 
end procedure 
 
 
function IsIndex( sequence s, object sortby ) 
 
	-- ignore this case - sortby MUST BE a sequence for index sorting 
	if not sequence(sortby) then 
		return 0 
	end if 
 
 
	-- check if is sequence of integers 
	for i = 1 to length(s) do 
		if not integer( s[i] ) then 
			return 0 -- early out 
		end if 
	end for 
 
	-- all elements integers at this point 
 
	-- find the min/max 
	integer Min = s[1] 
	integer Max = Min 
	for i = 2 to length(s) do 
		integer x = s[i] 
		if x > Max then 
			Max = x 
		elsif x < Min then 
			Min = x 
		end if 
	end for 
 
	-- check min/max 
	integer try = Min >= 1 and Max <= length(sortby) 
 
	-- exit 
	return try 
 
end function 
 
 
function compare_columns(sequence a, sequence b, sequence cols) 
 
	integer cmp, col 
 
	-- loop through all columns 
	for i = length(cols) to 1 by -1 do 
 
		-- get the next column number 
		col = cols[i] 
 
		-- do the compare 
		cmp = compare(a[col], b[col]) 
 
		-- early out? 
		if cmp then exit end if 
 
	end for 
 
	-- exit with the result 
	return cmp 
 
end function 
 
 
 
function Kompare(object a, object b) 
 
	if SortType = DIRECT then 
		return compare(a, b) 
 
	elsif SortType = COLUMN then 
		return compare(a[Column], b[Column]) 
 
	elsif SortType = COLUMNS then 
		return compare_columns(a, b, Column) 
 
	elsif SortType = INDEX then 
		return compare(Keys[a], Keys[b]) 
 
	elsif SortType = INDEXCOLUMNS then 
		return compare_columns(Keys[a], Keys[b], Column) 
 
	elsif SortType = CUSTOM then 
		return call_func(CompareRtn, {a, b}) 
 
	else 
		? 9/0 -- DEV error 
 
	end if 
 
 
end function 
 
 
 
 
 
function insertion_sortXX(sequence x) -- insertion sort on steroids 
	object temp 
	integer insert, hi, mid 
 
	-- loop along UNsorted list 
	for i = 2 to length(x) do 
 
		-- get the first element in the unsorted section 
		temp = x[i] 
 
		insert = 1 -- start of sorted section 
		hi = i-1 -- end of sorted section 
 
		-- find the insertion point 
 
		-- binary search? 
		if i > 15 then 
			while insert <= hi do -- starts at top of sorted list and go to the bottom 
				mid = floor((insert + hi) / 2) 
				if Kompare(temp, x[mid]) < 0 then 
					hi = mid - 1 
				else 
					insert = mid + 1 
				end if 
			end while 
 
		-- linear search? 
		else 
			while hi do 
				if Kompare( temp, x[hi] ) >= 0 then 
					insert = hi+1 
					exit 
				end if 
				hi -= 1 
			end while 
 
		end if 
 
		-- shuffle & insert, but only if needed 
		if insert < i then 
			x[insert+1 .. i] = x[insert .. i-1] 
			x[insert] = temp 
		end if 
 
	end for 
 
	-- exit 
	return x 
 
end function 
 
 
function merge_sort(sequence x) 
 
	-- put x into ascending order using recursive merge sort 
	-- best for medium to large sequences 
	-- optimized with insertion_sort for the shortest sequences 
	-- importantly, it is a *stable* sort 
 
	-- init 
	integer n = length(x) 
 
	-- fast sort for small sequences 
	if n <= 100 then 
		return insertion_sortXX(x) 
	end if 
 
	-- recurse	 
	integer mid = floor(n / 2) 
	sequence a = merge_sort(x[1 .. mid]) -- sort the first half 
	sequence b = merge_sort(x[mid + 1 .. n]) -- sort the second half 
	 
	-- merge the two sorted halves into one 
	sequence merged = {} 
	while length(a) and length(b) do 
		if Kompare(a[1], b[1]) <= 0 then -- this is the KEY to stability since the earlier 'a' element always leads when keys are equal 
			merged = append(merged, a[1]) 
			a = a[2 .. $] 
		else 
			merged = append(merged, b[1]) 
			b = b[2 .. $] 
		end if 
	end while 
 
	-- exit with merged data + remnants 
	if length(a) then 
		return merged & a 
	end if 
	return merged & b 
 
end function 
 
	 
function go(sequence data) 
 
	-- return var 
	sequence ret 
 
	-- fast sort? 
	if length(data) < 200 then 
		ret = insertion_sortXX( data ) 
 
	-- std sort using merge sort 
	else 
		ret = merge_sort( data ) 
 
	end if 
 
	-- exit 
	return ret 
 
end function 
 
 
function get_col(sequence s, integer col) 
 
	-- init 
	integer len = length(s) 
	sequence ret = repeat(0, len) 
	 
	-- pull out the column 
	for i = 1 to len do 
		ret[i] = s[i][col] 
	end for 
 
	-- exit 
	return ret 
 
end function 
 
 
function optimise(object col) 
	if sequence(col) and length(col) = 1 then 
		col = col[1] 
	end if 
	return col 
end function 
 
 
global function sort(sequence data, object col=0) 
 
	-- early out? 
	if not length(data) then return data end if 
 
	-- init 
	init() 
 
	-- get column(s) 
	Column = optimise(col) 
 
	-- columns 
	if sequence(col) then 
		SortType = COLUMNS 
 
	-- direct 
	elsif not col then 
		SortType = DIRECT 
 
	-- column 
	else 
		SortType = COLUMN 
 
	end if 
 
	-- sort 
	return go(data) 
 
 
end function 
 
 
 
global function sort_index( sequence index, sequence sortby, object col=0 ) 
 
	-- early out? 
	if not length(index) then return index end if 
 
	-- check that really is an index 
	if not IsIndex( index, sortby ) then 
		puts(1, "Bad data sent to sort_index()\n") 
		return index -- user error, should we provoke a crash? 
	end if 
 
	-- init 
	init() 
 
	-- get column(s) 
	Column = optimise(col) 
 
	-- work out the type & keys 
	if sequence(Column) then 
		SortType = INDEXCOLUMNS 
		Keys = sortby 
 
	elsif Column then 
		SortType = INDEX 
		Keys = get_col(sortby, Column) 
 
	else 
		SortType = INDEX 
		Keys = sortby 
 
	end if 
 
	-- sort 
	return go( index ) 
 
end function 
 
 
 
global function custom_sort( sequence data, integer rtn ) 
 
	-- early out? 
	if not length(data) then return data end if 
 
	-- init 
	SortType = CUSTOM 
	Column = 0 
	Keys = data 
	CompareRtn = rtn 
 
	-- sort 
	return go(data) 
 
end function 
 
 
----------- testing 
 
procedure show( sequence title, sequence data ) 
    printf(1, "\n%s\n", {title} ) 
    for i=1 to length(data) do 
        printf(1, "%s\n", {data[i]} ) 
    end for 
end procedure 
 
 
sequence data =  {"1A","Item10","Item1A","Item100","Item20","Item1","100","Item10B","10A"} 
show( "unsorted", data ) 
 
sequence number = sort(data) 
show( "`number order` sorted", number ) 
 
 
sequence natural = custom_sort( data, NAT ) 
show( "`natural order` sorted", natural ) 

1. Comment by _tom Jul 20, 2017

Note that the original "natural sorting" includes a second comparison for objects known to be string sequences -- not included here to save space. _tom