Defeating Murphy's laws and a new challenge

new topic     » topic index » view thread      » older message » newer message

Pete and all:
I've made some more optimizations to the program for splitting a series of
numbers into two having the closest sums.
In the previous version, if all the numbers were multiples of 7, for
example, the convergence was incredibly slow. Now, I use the maximum common
divisor to compute the best possible results, and it goes really fast in
these cases. I added another small optimization.
What perplexes me is that this algorithm defeats Murphy's laws. In fact, it
is nearly impossible to find a data set to make it run slow.
This is the new challenge for all: find this kind of data set.
As this problem is closely related to what is called the knapsack problem, I
wonder if this solution can be applied to it. When I have some more spare
time, I'll give it a try. Years ago I wrote a program in APL for filling to
brim a number of diskettes. I'm thinking of translating it into Euphoria.
Regards.

The new algorithm:

--Group numbers into two groups such that their sums are as close as
possible
--R. M. Forno 2003/04/17
include sort.e
include get.e
include misc.e
include machine.e

integer Last, Lendata, Bestlast, Lowind
atom Lowgoal, Highgoal, Val, Bestval, Totval, Limit, Bound
sequence Data, Index, Accum, Bestind

set_rand(2)
with profile_time

function Mcd2(integer d, integer r)
    integer x
    while r do
    x = remainder(d, r)
    d = r
    r = x
    end while
    return d
end function

function Mcd(sequence s)
    integer len, r
    len = length(s)
    r = s[len]
    for i = len - 1 to 1 by -1 do
    if r = 1 then
        return r
    end if
    r = Mcd2(r, s[i])
    end for
    return r
end function

function readseq()
--Read a sequence of integers from the keyboard or from a redirected file
    sequence s, r
    s = {}
    while 1 do
    r = get(0)
    if r[1] != GET_SUCCESS or not integer(r[2]) then
        exit
    end if
    s &= r[2]
    end while
    return s
end function

procedure outputresult()
--Output resulting partitions
    sequence aux
    aux = repeat(0, Lendata)
    for i = 1 to Lendata do
    aux[i] = i
    end for
    for i = 1 to Bestlast do
    aux[Bestind[i]] = 0
    end for
    printf(1, "Best solution values: %d %d\n", {Bestval, Totval - Bestval})
    puts(1, "Partition 1:\n")
    for i = 1 to Bestlast do
    printf(1, "%d ", Data[Bestind[i]])
    end for
    puts(1, '\n')
    puts(1, "Partition 2:\n")
    for i = 1 to Lendata do
    if aux[i] then
        printf(1, "%d ", Data[aux[i]])
    end if
    end for
    puts(1, '\n')
end procedure

function recurse()
    integer ind, r, dat, i1
    if Val > Highgoal then --Impossible to get better values
    return 0
    elsif Val < Lowgoal then --Try to go ahead
    if Val >= Bound then
        if Val > Bestval then --Test for better value
        Bestval = Val --Update best value and corresponding indexes
        Bestlast = Last
        if Lowind <= Last then
            Bestind[Lowind..Last] = Index[Lowind..Last]
        end if
        Lowind = Last
        end if
    end if
    if Val > Limit then --Impossible to get better values
        return 0
    end if
    ind = Index[Last] --Go to next data item
    while 1 do
        i1 = ind --Previous
        ind += 1
        if ind > Lendata then --End of data vector
        return 0
        end if
        if Val + Accum[ind] <= Bestval then --Impossible to get better value
        return 0
        end if
        dat = Data[ind]
        if dat != Data[i1] or Index[Last] = i1 then --Skip repetition
        Last += 1 --Try next index
        Index[Last] = ind
        Val += dat
        r = recurse()
        if r then --Best possible value found
            return 1
        end if
        Val -= dat --Restore previous value
        Last -= 1
        if Lowind < Last then --Update updating start
            Lowind = Last
        end if
        end if
    end while
    else
    Bestval = Val --Either Val = Lowgoal or Val = Highgoal
    Bestlast = Last
    Bestind = Index
    return 1 --Indicate best possible value found
    end if
end function

procedure optim()
    integer r
    Bestval = 0
    Last = 1
    Bestlast = 1
    Index[1] = 1
    Val = Data[1]
    Limit = Highgoal - Data[Lendata]
    Bound = Lowgoal - Data[1]
    Bestind = Index
    Lowind = 1
    r = recurse()
end procedure

function genrand(integer n, integer m)
    sequence s
    s = 7 * rand(repeat(n, m))
    return s
end function

procedure main()
    atom t
    integer a, b, mcd, r
    a = 10000
    b = 1000000
--  Data = readseq()
    Data = genrand(a, b)
    t = time()
    Lendata = length(Data)
    Index = repeat(0, Lendata)
    if Lendata = 0 then
    Bestlast = 0
    Totval = 0
    Bestval = 0
    elsif Lendata = 1 then
    Bestlast = 1
    Bestind = {1}
    Totval = Data[1]
    Bestval = Totval
    else
    Data = reverse(sort(Data))
    Accum = Data
    for i = Lendata to 2 by -1 do
        Accum[i - 1] += Accum[i]
    end for
    Totval = Accum[1]
    Lowgoal = floor(Totval * 0.5)
    Highgoal = - floor(- Totval * 0.5)
    mcd = Mcd(Data)
    Lowgoal -= remainder(Lowgoal, mcd)
    r = remainder(Highgoal, mcd)
    if r then
        Highgoal += mcd - r
    end if
    optim()
    end if
    printf(2, "n = %d m = %d Sum1 = %d Sum2 = %d Time: %f\n",
        {a, b, Bestval, Totval - Bestval, time() - t})
    outputresult()
end procedure

main()

new topic     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu