Re: UTF-8 in Windows
- Posted by jacquesd Sep 14, 2008
- 1247 views
here a program I written for my own needs,
--NAME: OemAnsi.exw --DESCRIPTION: convert oem text file to ansi text file (or reverse). It can be used as a command line filter too. --TARGET_PLATFORM: windows 9x+ --DATE: 2007-09-14 --AUTHOR: Jacques Deschênes, Baie-Comeau, Canada --DETAIL: This program is only of interest to those who use windows in languages with accented characters like french language. -- In french when one send the output of a console command to a text file, the accented characted are -- incorrect because console use oem character set and windows applications like notepad use ansi character set. -- This is annoying if, like me, you often use command script to gather information in text files. -- -- This program can be used with given input and output file names as parameters or as a filter. -- examples: -- to convert Oem_file.txt to ansi_file.txt type on command line: exwc.exe OemAnsi.exw -i oem_file.txt -o ansi_file.txt -- to convert from ansi to oem type: exwc.exe OemAnsi.exw -r -i ansi_file.txt -o oem_file.txt -- to use as a command line filter type: for /? | exwc.exe OemAnsi.exw > for_help_ansi.txt -- then you get the help for "for" command in an ansi text file. -- file names with spaces must be quoted. -- NOTES: -- if you intent to bind or convert this program in C use -con option if you want it to work as a filter. -- 1) When translating with open watcom version 1.7, and oemansi.exe is executed without any option, -- accented character are ignored. This problem doesn't exist with borland 5.5.1 compiler. -- 2) when translated with borland 5.5.1 CTRL-Z is not recognized as end of file when reading from STDIN. -- REF: http://msdn.microsoft.com/en-us/library/ms647473(VS.85).aspx -- http://msdn.microsoft.com/en-us/library/ms647493(VS.85).aspx include machine.e include dll.e include wildcard.e with trace -- win32 api calls constant user32=open_dll("user32.dll") constant iOemToChar = define_c_func(user32,"OemToCharA",{C_POINTER,C_POINTER},C_INT) constant iCharToOem = define_c_func(user32,"CharToOemA",{C_POINTER,C_POINTER},C_INT) function CharToOem(sequence line) atom pString, fnVal sequence oem pString = allocate_string(line) fnVal = c_func(iCharToOem,{pString,pString}) oem = peek({pString,length(line)}) free(pString) return oem end function function OemToChar(sequence line) atom pString, fnVal sequence ansi pString = allocate_string(line) fnVal = c_func(iOemToChar,{pString,pString}) ansi = peek({pString,length(line)}) free(pString) return ansi end function ---------------- integer fBinded, fReverse, fFilterStdIn sequence InpFile, OutFile constant STDIN=0, STDOUT=1, STDERR=2, BAD_FILE_HANDLE = -1 procedure usage(integer exit_code) puts(STDOUT,"oemansi convert oem text to ansi text or reverse.\n"& "USAGE: oemansi [-r] [-i input_file] [-o output_file]\n"& " -r option to convert from ansi to oem\n"& " -i input_file indicate file name to convert.\n"& " If this option is missing, input is read from STDIN\n"& " -o out_file indicate name of output file.\n"& " If this option is missing output to STDOUT\n"& " filter mode usage example: type oem.txt | oemansi > ansi.txt\n\n") abort(exit_code) end procedure procedure error() puts(STDERR,"oemansi bad usage.\n\n") usage(0) end procedure procedure ParseCommandLine() sequence argv integer switch --trace(1) fBinded = 0 fReverse = 0 fFilterStdIn = 0 InpFile = "" OutFile = "" switch = 0 argv = command_line() if equal(argv[1], argv[2]) then fBinded=1 end if for i = 3 to length(argv) do if switch = 0 then if argv[i][1]= '-' then if length(argv[i])<2 then error() end if switch = upper(argv[i][2]) if switch = 'R' then fReverse = 1 switch = 0 elsif switch = '?' then usage(1) end if end if elsif switch = 'I' then InpFile = argv[i] if InpFile[1]!='"' then switch = 0 elsif InpFile[$]='"' then InpFile = InpFile[2..$-1] switch = 0 else switch = -'I' end if elsif switch = -'I' then InpFile &= ' ' & argv[i] if InpFile[$] = '"' then InpFile = InpFile[2..$-1] switch = 0 end if elsif switch = 'O' then OutFile = argv[i] if OutFile[1] != '"' then switch = 0 elsif OutFile[$] = '"' then OutFile = OutFile[2..$-1] switch = 0 else switch = -'O' end if elsif switch = -'O' then OutFile &= ' ' & argv[i] if OutFile[$] = '"' then OutFile = OutFile[2..$-1] switch = 0 end if else error() end if end for if length(InpFile) = 0 then fFilterStdIn = 1 end if end procedure procedure Convert() integer fi, fo object line if fFilterStdIn then fi = STDIN else fi = open(InpFile,"r") if fi = BAD_FILE_HANDLE then printf(STDERR,"Failed to open %s\n",{InpFile}) abort(0) end if end if if length(OutFile) then fo = open(OutFile,"w") if fo = BAD_FILE_HANDLE then printf(STDERR,"failed to open %s\n",{OutFile}) abort(0) end if else fo = STDOUT end if line = gets(fi) while sequence(line) do if fReverse then puts(fo, CharToOem(line)) else puts(fo, OemToChar(line)) end if line = gets(fi) end while if not fi = STDIN then close(fi) end if if not fo = STDOUT then close(fo) end if end procedure ParseCommandLine() Convert() abort(1)