Re: EDS database speed questions

new topic     » goto parent     » topic index » view thread      » older message » newer message

Here is the procedure that reads the data in, after each line is validated

 
--------------------------------------------------------------- 
procedure prep_data() -- reformat data for the database 
    sequence s1, s2, s3, s4, s5, s7, s8, s9 
    sequence Dslabel,  Volser,  Dsname,   Recfm,   Lrecl 
    sequence Firstvol, Dscsize, Volseq,   Volsnum, Media 
    sequence Dsexpdt,  Credt,   Cretm,    Crejbn,  Dusrdata 
    sequence Newdsn,   Blksize 
    integer  i1, i2 
    integer  dslabel, lrecl, blksize, volsnum 
    atom     a1, dscsize 
    i1 = 0 
    cleaned_data = {} 
    ctt_index = {} 
    -- whole_list = {} 
    cbv = {"junk first record"} -- total bytes by volume 
    while i1 < length(ctt_data) do 
       i1 = i1 + 1 
       s1 = ctt_data[i1] 
       s1 = drop_cr(s1) 
       -- ensure every record is the same length 
       while length(s1) < 243 do 
          s1 = s1 & " " 
       end while 
       -- DSLABEL 
       Dslabel = s1[3..7]  -- support 5 digits, not 3 
       s2 = value(Dslabel) 
       dslabel = s2[2] 
       -- VOLSER 
       Volser = s1[12..17] 
       -- DSNAME 
       Dsname = s1[22..65] 
       Dsname = drop_trailing_blanks(Dsname) 
       -- RECFM    with following spaces 
       Recfm = s1[70..72] 
       Recfm = drop_trailing_blanks(Recfm) 
       -- LRECL 
       Lrecl = s1[81..87] 
       Lrecl = drop_trailing_blanks(Lrecl) 
       s2 = value(Lrecl) 
       lrecl = s2[2] 
       -- BLKSIZE 
       Blksize = s1[92..98] 
       Blksize = drop_trailing_blanks(Blksize) 
       s2 = value(Blksize) 
       blksize = s2[2] 
       -- FIRSTVOL 
       Firstvol = s1[103..108] 
       if compare(Firstvol,"        ") = 0 then 
          Firstvol = Volser 
       end if 
       -- DSCSIZE 
       Dscsize = s1[113..120] 
       Dscsize = drop_trailing_blanks(Dscsize) 
       -- field is suffixed with a M or K or G 
       i2 = length(Dscsize) 
       s2 = Dscsize[i2..i2] 
       s3 = Dscsize[1..i2-1] 
       s4 = value(s3) 
       a1 = s4[2] 
       if compare(s2,"K") = 0 then 
          a1 = a1 * 1000 
       end if 
       if compare(s2,"M") = 0 then 
          a1 = a1 * 1000000 
       end if 
       if compare(s2,"G") = 0 then 
          a1 = a1 * 1000000000 
       end if 
       dscsize = a1 
       -- VOLSEQ 
       Volseq = s1[130..135] 
       -- VOLSNUM 
       Volsnum = s1[146..149] 
       s2 = value(Volsnum) 
       volsnum = s2[2] 
       -- MEDIA 
       Media = s1[154..161] 
       Media = drop_trailing_blanks(Media) 
       if length(Media) < 2 then 
          Media = "*blank*" 
       end if 
       -- DSEXPDT 
       Dsexpdt = s1[166..173] 
       -- CREDT 
       Credt = s1[180..187] 
       if compare(Credt,highdate) = 1 then 
          highdate = Credt  -- keep highest date found for licence check 
       end if 
       -- CRETM 
       Cretm = s1[192..196] 
       -- CREJBN 
       Crejbn = s1[204..211] 
       Crejbn = drop_cr(Crejbn) -- might be a trailing "\n" 
       Crejbn = drop_trailing_blanks(Crejbn) 
       if length(Crejbn) < 2 then 
          Crejbn = "*blank* " 
       end if 
       -- DUSRDATA may be blank, and record is then truncated 
       if length(s1) < 229 then 
          Dusrdata = "                    " 
         else 
          Dusrdata = s1[223..243] 
       end if 
       Newdsn = Dsname -- default 
       s5 = {} 
       s5 = append(s5,Volser)    -- 1  6 char tape name 
       s5 = append(s5,dslabel)   -- 2  5 char filenumber 
       s5 = append(s5,Dsname)    -- 3  44 char filename 
       s5 = append(s5,Recfm)     -- 4  3 char record format 
       s5 = append(s5,lrecl)     -- 5  integer, record length 
       s5 = append(s5,blksize)   -- 6  integer, block size 
       s5 = append(s5,Firstvol)  -- 7  6 char, first tape in chain 
       s5 = append(s5,dscsize)   -- 8  atom, bytes in file 
       s5 = append(s5,Volseq)    -- 9  6 char, volume sequence number 
       s5 = append(s5,volsnum)   -- 10 integer, number of tapes in chain 
       s5 = append(s5,Media)     -- 11 8 char, tape type 
       s5 = append(s5,Dsexpdt)   -- 12 8 char, file expiry type 
       s5 = append(s5,Credt)     -- 13 8 char, file creation date 
       s5 = append(s5,Cretm)     -- 14 5 char, file creation time 
       s5 = append(s5,Crejbn)    -- 15 8 char, file creation jobname 
       s5 = append(s5,Dusrdata)  -- 16 20 char, user freeform data 
       s5 = append(s5,Newdsn)    -- 17 44 char, new filename 
       s5 = append(s5,i1)        -- 18 integer, this list counter 
       -- check that the copy is not already done... 
       -- the "ARCH2VTS" is inserted by executing the copy process, 
       -- by a optional JCL step 
       if compare("ARCH2VTS",Dusrdata[1..8]) = 0 then 
          done_list = append(done_list,s5) 
          done_vols = done_vols + 1 
          done_bytes = done_bytes + dscsize 
         else 
          cleaned_data = append(cleaned_data,s5) 
          -- keep track of files and bytes per volume 
          if dslabel = 1 then 
             -- cbv keeps totals of files, bytes per volume 
             -- save prev 
             cbv = append(cbv,track_data) -- total bytes by volume 
             track_data = {} 
             track_data = append(track_data,Volser) 
             track_data = append(track_data,dslabel) 
	     track_data = append(track_data,dscsize) 
            else 
             track_data[2] = dslabel 
             track_data[3] = track_data[3] + dscsize 
          end if 
          -- construct index record too 
          s7 = sprintf("%06d",{i1}) -- max 1 million records 
          s8 = Volser & Dslabel & Volseq & Volsnum & s7 & Dsname 
          ctt_index = append(ctt_index,s8) 
       end if 
    end while 
    ctt_data = {} -- drop data not needed anymore 
    -- sort ctt_index by volser, label, volseq 
    s9 = sort(ctt_index) 
    ctt_index = s9 
    -- save final record 
    cbv = append(cbv,track_data) 
end procedure 
---------------------------------------------------------------- 

The proc reads in sequence ctt_data, and creates ctt_index. Ctt_index is much shorter in record length than ctt_data. My program does lookups against this, and then gets the rest of the fields by using the index stored in ctt_index[18] when required.

But anyway, I think you have helped already - if I have fixed length records, then seek() against a file of ctt_data is the way to go!

Regards Alan

new topic     » goto parent     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu