Re: Better median function?

new topic     » goto parent     » topic index » view thread      » older message » newer message

Tor wrote:

> Here is my current median function;
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> include sort.e
> global function median(sequence s)
>     integer l
>     l = length(s)
>     if l = 0 then
>             return 0
>     elsif l = 1 then
>             return s[1]
>     end if
>     s = sort(s)
>     return s[floor(l / 2) + 1]
> end function
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> Is the sorting really necessary? Surely there must
> be a more efficient way to find the median value
> of a one-dimensional sequence ?!

No, the sort is not necessary, and yes, there are *many* more
efficient ways to do it. Just one of them is shown below, but I can
think of at least one more potentially much faster!

The presented solution is rather long and cumbersome, because I have
not bothered to streamline or optimize it terribly much, but it is
still typically considerably faster than the function with the sort
(and I optimized that one!). And it gets better as the sequences get
longer!

Enjoy. jiri


--  median.ex
--  jiri babor
--  jbabor at paradise.net.nz
--  00-06-12

include sort.e

function odd(integer x)
    return and_bits(x,1)
end function

function min(sequence s)
    atom a
    integer len

    len = length(s)
    if not len then
        puts(1, "min error: empty sequence")
        abort(1)
    elsif len = 1 then
        return s[1]
    end if
    a = s[1]
    for i=2 to len do
        if s[i]<a then
            a = s[i]
        end if
    end for
    return a
end function

function max(sequence s)
    atom a
    integer len

    len = length(s)
    if not len then
        puts(1, "max error: empty sequence")
        abort(1)
    elsif len = 1 then
        return s[1]
    end if
    a = s[1]
    for i=2 to len do
        if s[i]>a then
            a = s[i]
        end if
    end for
    return a
end function

function split(sequence s)
    sequence s1,s2
    atom a,si
    integer n

    n = 1
    s1 = {}
    s2 = {}
    a = s[1]
    for i=2 to length(s) do
        si = s[i]
        if si<a then
            s1 &= si
        elsif si>a then
            s2 &= si
        else
            n += 1
        end if
    end for
    return {s1,s2,a,n}
end function

function median_with_sort(sequence s)
    integer len,n

    len = length(s)
    n = floor((len+1)/2)
    if not len then return 0 end if
    if len > 2 then s = sort(s) end if
    if and_bits(len,1) then  -- odd
        return s[n]
    end if
    return (s[n] + s[n+1])/2
end function

function median(sequence s)
    sequence r
    integer len,l1,l2,n1,n2,halflength

    len = length(s)
    halflength = floor(len/2)
    l1 = 0
    l2 = 0
    r = split(s)
    n1 = length(r[1])
    n2 = length(r[2])
    while 1 do
        if (n1+l1) > halflength then
            l2 += n2 + r[4]
            r = split(r[1])
        elsif (n2+l2) > halflength then
            l1 += n1 + r[4]
            r = split(r[2])
        else
            exit
        end if
        n1 = length(r[1])
        n2 = length(r[2])
    end while

    if odd(len) then return r[3] end if
    if l1+length(r[1]) = halflength then
        return (r[3] + max(r[1]))/2
    elsif l2+length(r[2]) = halflength then
        return (r[3] + min(r[2]))/2
    end if
    return r[3]
end function -- median

-- test --------------------------------------------------------------

include machine.e

sequence n,N,s
atom dt,t,x
integer m,M

n = {10,11,100,101,1000,1001}
N = {100000,100000,10000,10000,1000,1000}
tick_rate(200)

for k=1 to length(n) do
    m = n[k]
    M = N[k]
    s = repeat(0,m)
    for i=1 to m do
        s[i] = rand(m)-1
    end for

    t = time()
    for i=1 to M do
        x = median_with_sort(s)
    end for
    dt = time()-t
    ? dt

    t = time()
    for i=1 to M do
        x = median(s)
    end for
    dt = time()-t
    ? dt
    puts(1,'\n')
end for

new topic     » goto parent     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu