Re: Better median function?
Tor wrote:
> Here is my current median function;
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> include sort.e
> global function median(sequence s)
> integer l
> l = length(s)
> if l = 0 then
> return 0
> elsif l = 1 then
> return s[1]
> end if
> s = sort(s)
> return s[floor(l / 2) + 1]
> end function
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> Is the sorting really necessary? Surely there must
> be a more efficient way to find the median value
> of a one-dimensional sequence ?!
No, the sort is not necessary, and yes, there are *many* more
efficient ways to do it. Just one of them is shown below, but I can
think of at least one more potentially much faster!
The presented solution is rather long and cumbersome, because I have
not bothered to streamline or optimize it terribly much, but it is
still typically considerably faster than the function with the sort
(and I optimized that one!). And it gets better as the sequences get
longer!
Enjoy. jiri
-- median.ex
-- jiri babor
-- jbabor at paradise.net.nz
-- 00-06-12
include sort.e
function odd(integer x)
return and_bits(x,1)
end function
function min(sequence s)
atom a
integer len
len = length(s)
if not len then
puts(1, "min error: empty sequence")
abort(1)
elsif len = 1 then
return s[1]
end if
a = s[1]
for i=2 to len do
if s[i]<a then
a = s[i]
end if
end for
return a
end function
function max(sequence s)
atom a
integer len
len = length(s)
if not len then
puts(1, "max error: empty sequence")
abort(1)
elsif len = 1 then
return s[1]
end if
a = s[1]
for i=2 to len do
if s[i]>a then
a = s[i]
end if
end for
return a
end function
function split(sequence s)
sequence s1,s2
atom a,si
integer n
n = 1
s1 = {}
s2 = {}
a = s[1]
for i=2 to length(s) do
si = s[i]
if si<a then
s1 &= si
elsif si>a then
s2 &= si
else
n += 1
end if
end for
return {s1,s2,a,n}
end function
function median_with_sort(sequence s)
integer len,n
len = length(s)
n = floor((len+1)/2)
if not len then return 0 end if
if len > 2 then s = sort(s) end if
if and_bits(len,1) then -- odd
return s[n]
end if
return (s[n] + s[n+1])/2
end function
function median(sequence s)
sequence r
integer len,l1,l2,n1,n2,halflength
len = length(s)
halflength = floor(len/2)
l1 = 0
l2 = 0
r = split(s)
n1 = length(r[1])
n2 = length(r[2])
while 1 do
if (n1+l1) > halflength then
l2 += n2 + r[4]
r = split(r[1])
elsif (n2+l2) > halflength then
l1 += n1 + r[4]
r = split(r[2])
else
exit
end if
n1 = length(r[1])
n2 = length(r[2])
end while
if odd(len) then return r[3] end if
if l1+length(r[1]) = halflength then
return (r[3] + max(r[1]))/2
elsif l2+length(r[2]) = halflength then
return (r[3] + min(r[2]))/2
end if
return r[3]
end function -- median
-- test --------------------------------------------------------------
include machine.e
sequence n,N,s
atom dt,t,x
integer m,M
n = {10,11,100,101,1000,1001}
N = {100000,100000,10000,10000,1000,1000}
tick_rate(200)
for k=1 to length(n) do
m = n[k]
M = N[k]
s = repeat(0,m)
for i=1 to m do
s[i] = rand(m)-1
end for
t = time()
for i=1 to M do
x = median_with_sort(s)
end for
dt = time()-t
? dt
t = time()
for i=1 to M do
x = median(s)
end for
dt = time()-t
? dt
puts(1,'\n')
end for
|
Not Categorized, Please Help
|
|