1. Massive file sort

The description is as follows.

Process 1
  Create an empty temp file
  Load as many lines from the input file as you feel you can sort at a time.
    call it max_lines
  sort those lines and append them to the temp file.
  Continue this process until the input file is exhausted
  chunks = the number of times you had load a set of lines for sorting

Process 2
  Open the temp file
  Create an empty input file
  Load half of max_lines from the temp file as you feel you can sort.
  Directly output those to the input file

Load max_lines from the temp file
sort those lines and append to input file
repeat last two (2) proccesses until the temp file is exhausted

The above consists of a single pass.
Repeat the passes the same number of times as you have chunks.

---------------------------
include sort.e

integer max_lines, total_lines, pass, required_passes
sequence in_file, out_file,  lines
object line

in_file = "file.txt"
out_file = "temp.txt"

-- Max number of lines to sort at a time.
-- Should be an even number
max_lines = 2000

total_lines = 0
line = gets(in)
while sequence(line) do
  total_lines += 1
end while
required_passes = -floor(-(total_lines / max_lines))

if (required_passes = 1) then
    in = open(in_file, "r")
    lines = repeat(0, total_lines)
    for i = 1 to total_lines do
      lines = gets(in)
    end while
    close(in)

    lines = sort(lines)
    out = open(out_file, "w")
    for i = 1 to total_lines do
      puts(out, lines[i])
    end for
    close(out)
    -- end of program
else
  pass = 1
  while (total_lines >= pass) do
    in = open(in_file, "r")
    out = open("temp.txt", "w")

    while 1 do
      lines = repeat(0, max_lines)
      for i = 1 to max_lines do
        lines[i] = gets(in)
      end for

      f = find(-1, lines)
      if (f) then
        lines = sort(lines[1..f-1])
        for i = 1 to length(lines) do
          puts(out, lines[i])
        end for
        exit
      else
        lines = sort(lines)
        for i = 1 to length(lines) do
          puts(out, lines[i])
        end for
      end if
    end do
    close(in)
    close(out)

    in = open("temp.txt", "r")
    out = open(in_file, "w")

    for i = 1 to max_lines/2 do
      puts(out, gets(in))
    end for

    while 1 do
      lines = repeat(0, max_lines)
      for i = 1 to max_lines do
        lines[i] = gets(in)
      end for

      f = find(-1, lines)
      if (f) then
        lines = sort(lines[1..f-1])
        for i = 1 to length(lines) do
          puts(out, lines[i])
        end for
        exit
      else
        lines = sort(lines)
        for i = 1 to length(lines) do
          puts(out, lines[i])
        end for
      end if
    end do
    close(in)
    close(out)

    pass += 1
  end while

end if

--delete "temp.txt"

new topic     » topic index » view message » categorize

Search



Quick Links

User menu

Not signed in.

Misc Menu