1. Massive file sort
The description is as follows.
Process 1
Create an empty temp file
Load as many lines from the input file as you feel you can sort at a time.
call it max_lines
sort those lines and append them to the temp file.
Continue this process until the input file is exhausted
chunks = the number of times you had load a set of lines for sorting
Process 2
Open the temp file
Create an empty input file
Load half of max_lines from the temp file as you feel you can sort.
Directly output those to the input file
Load max_lines from the temp file
sort those lines and append to input file
repeat last two (2) proccesses until the temp file is exhausted
The above consists of a single pass.
Repeat the passes the same number of times as you have chunks.
---------------------------
include sort.e
integer max_lines, total_lines, pass, required_passes
sequence in_file, out_file, lines
object line
in_file = "file.txt"
out_file = "temp.txt"
-- Max number of lines to sort at a time.
-- Should be an even number
max_lines = 2000
total_lines = 0
line = gets(in)
while sequence(line) do
total_lines += 1
end while
required_passes = -floor(-(total_lines / max_lines))
if (required_passes = 1) then
in = open(in_file, "r")
lines = repeat(0, total_lines)
for i = 1 to total_lines do
lines = gets(in)
end while
close(in)
lines = sort(lines)
out = open(out_file, "w")
for i = 1 to total_lines do
puts(out, lines[i])
end for
close(out)
-- end of program
else
pass = 1
while (total_lines >= pass) do
in = open(in_file, "r")
out = open("temp.txt", "w")
while 1 do
lines = repeat(0, max_lines)
for i = 1 to max_lines do
lines[i] = gets(in)
end for
f = find(-1, lines)
if (f) then
lines = sort(lines[1..f-1])
for i = 1 to length(lines) do
puts(out, lines[i])
end for
exit
else
lines = sort(lines)
for i = 1 to length(lines) do
puts(out, lines[i])
end for
end if
end do
close(in)
close(out)
in = open("temp.txt", "r")
out = open(in_file, "w")
for i = 1 to max_lines/2 do
puts(out, gets(in))
end for
while 1 do
lines = repeat(0, max_lines)
for i = 1 to max_lines do
lines[i] = gets(in)
end for
f = find(-1, lines)
if (f) then
lines = sort(lines[1..f-1])
for i = 1 to length(lines) do
puts(out, lines[i])
end for
exit
else
lines = sort(lines)
for i = 1 to length(lines) do
puts(out, lines[i])
end for
end if
end do
close(in)
close(out)
pass += 1
end while
end if
--delete "temp.txt"