EuroAssembler Index Manual Download Source Macros


Sitemap Links Forum Tests Projects

textling.htm
Data
CodePages
Data
Unicode
Procedures
AutodetectEncoding
CreateOutputFile
ReadFileIni
Main
MakeIndex
WindowPaint
WindowProc
WindowResize
WindowTitle

This is a module of EuroTool program EuroText for Linux.


         EUROASM CPU=X64, Unicode=No, DumpWidth=36
textling PROGRAM Format=COFF, Width=64
           %DROPMACRO *
           INCLUDEHEAD argument.htm
           INCLUDE1 linabi.htm, cpuext64.htm, cpuext.htm, linsfile.htm, linf64.htm, \
                    ansi.htm,string64.htm, memory64.htm, time.htm, status32.htm
↑ Unicode
declares Unicode Points and their categories. They are tossed by the macro UCP into sections of [.rodata] segment.
These sections are named [CodePoint] (WORD) and [Relevance] (BYTE).
Sections [Translit] (DWORD) and [Entity] (QWORD) are not used in EuroText program.
All sections have the same number of members.
[.rodata]                    ; Declare global symbol at the beginning of each section:
 [CodePoint]                 ; Switch to section [CodePoint].
   CodePoint:                ; Label indicating the start of the section.
 [Relevance]                 ; Switch to section [Relevance].
   Relevance:                ; Label indicating the start of the section.

; Declare macroinstruction UCP which interprets the lines of UnicodePoints and populates sections [CodePoint] and [Relevance].
UCP %MACRO CodePoint, Relevance, Translit, Entity
 [CodePoint]                 ; Switch to section [CodePoint].
     DW 0x%CodePoint         ; Define code point (unsigned word), e. g. 0x0000, 0x0001,..0xFFFF.
 [Relevance]                 ; Switch to section [Relevance].
     DB %Relevance           ; Define relevance of each code point (signed byte), e. g. Cc (-32), Nd (+16) etc.
 %ENDMACRO UCP

  INCLUDE unicode.htm        ; Expand the macro UCP with each line of the table UnicodePoints.

 [CodePoint]
   CodePointEnd:             ; Label indicating the end of the section.
↑ CodePages
declares translation tables. Fields of the table CodePages:
CPid is numeric identifier 0..65535 in Microsoft assignment;
CPname is encoding display name, e.g. "ISO-8859-2";
CPaltName defines alternative names (AlsoKnownAs) and other remarks, e.g. "Latin 2 (Central European)";
CPurl is URL of authoritative source, e.g. https://en.wikipedia.org/wiki/Windows-1250 (not used in this program);
CPtable is 0 or 128 words (4 hexadecimal digits) with corresponding BMP codepoint of upper 128 characters. CPtable is omitted in ASCII and Unicode encodings.
Members CPid are tossed by the macro CP into section [CPid].
Members CPname and CPaltName are zero terminated and stored in section [CPname] one after another..
Translation tables are stored in section [CPtable] one after another. The first six tables are omitted.
[.rodata]                    ; Declare symbol at the beginning of each section of segment [.rodata]
 [CPid]
   CPid:
 [CPname]
   CPname:
 [CPtable]
   CPtable:
; Declare macroinstruction CP which interprets the lines of CodePages and populates sections [CPid], [CPname] and [CPtable].
CP %MACRO CPid, CPname, CPaltName, CPurl, CPtable
 [CPid]                      ; Switch to section [CPid].
   DW %CPid                  ; Define encoding identifier, e. g. 437, 667,,,28606.
 [CPname]                    ; Switch to section [CPname].
   DB %CPname,0,%CPaltName,0 ; Join name and alternative name.
 [CPtable]                   ; Switch to section [CPtable].
  %IF "%CPtable" !=== ""     ; The first six encodings don't have translation table, omit them.
i %FOR 1..128                ; Define 128 words of translation table for each encoding.
    DW 0x%5                  ; Member of CPtable, e. g. 00C7, 00FC etc.
    %SHIFT 1                 ; Take the next member of the table.
  %ENDFOR i                  ; Repeat the definition 128 times.
  %ENDIF
 %ENDMACRO CP

  INCLUDE codepage.htm       ; Expand the macro CP with each line of the table CodePages.
 [CPid]
   CPidEnd:                  ; Declare symbol at the end of section [CPid]. Size of the section is CPidEnd-CPid.
LST_INDEX  STRUC               ; Strucure of the index records.
.FA   D DWORD                  ; Offset of the start of line from OutputPtr (file address).
.Size D DWORD                  ; Size of the displayable line in bytes, plus Flags.
  ENDSTRUC LST_INDEX
; .Size does not exceed TermWidth. If FlagWrap is set, it may be less by 1 then TermWidth,
;   because the last character may be a french quote signalizing the wrap.
FlagWrap         = 0x8000_0000 ; This line is word-wrapped; put french quote at TermWidth-1.
NoFlags          = 0x7FFF_FFFF ; Other bits contains the actual line size.
TabSpaces        = 8           ; Number of spaces per one tabelator.
MinTermHeight    = 8
MinTermWidth     = 16
MaxTermWidth     = 800
[.rodata]          ; Constant data.
EuroText            DB 'EuroText',0
Version             DB " %^DATE",0
Help:
 DB "is a viewer of text files, which may be written in",10
 DB "UTF-8, UTF-16, UTF-32 or in one of 75 OEM or ANSI 8-bit encodings.",10
 DB "Specify  ./eurotext.x -InputEncoding=? for the list of supported encodings.",10,10
 DB "Name of the viewed file can be put on the command line, for instance",10
 DB "  ./eurotext.x InputFile.txt",10
 DB "or it may be specified as command-line arguments:",10
 DB "  -InputFile=InputFile.txt",10
 DB "  -InputEncoding=UTF-8",10
 DB "  -WrapLines=no",10
 DB 0
BOM_UTF32BE         DB 0x00,0x00,0xFE,0xFF
BOM_UTF32LE         DB 0xFF,0xFE,0x00,0x00
BOM_UTF16BE         DB 0xFE,0xFF
BOM_UTF16LE         DB 0xFF,0xFE
BOM_UTF8            DB 0xEF,0xBB,0xBF
MediumShadeX2       DB 0xE2,0x96,0x92,0xE2,0x96,0x92,0
FullBlockX2         DB 0xE2,0x96,0x88,0xE2,0x96,0x88,0
WrapSymbol          DB 27,'[38;2;192;0;0m',0xC2,0xBB,27,'[38;2;0;0;0m',0

[.data]
AnsiResize          DB 27,'[8;'
ValResize           DB 'xxxxxxxxx'  ; To be replaced with '8;16t',0
Spaces              DB MaxTermWidth * BYTE ' '
[.bss]
FileIni             DS FILE64
FileInput           DS FILE64
FileOutput          DS FILE64
FileIndex           DS FILE64
TtySize             DS TTYSIZE
TermIO              DS TERMIO
InputPtr            D QWORD
InputSize           D QWORD
InputEnd            D QWORD
OutputPtr           D QWORD
OutputSize          D QWORD
OutputEnd           D QWORD
IndexPtr            D QWORD
IndexEnd            D QWORD
CPnamePtr           D QWORD            ; Pointer to encoding name, e.g. "UTF-8",0.
CPtablePtr          D QWORD            ; Pointer to 8-bit encoding table.
TermWidth           D DWORD            ; Width of displayable window in characters.
TermHeight          D DWORD            ; Height of displayable window in characters.
CodePagesLength     D DWORD            ; Number of code pages (1+5+75).
CodePointLength     D DWORD            ; Number of characters in unicode.htm (1227).
ListPos             D DWORD            ; Ordinal number of index whose row is displayed on the top of terminal.
ListMax             D DWORD            ; Number of index rows. It may be changed by switching wrap|unwrap.
DocRow              D DWORD            ; Which row was clicked on by right mouse button.
DocCol              D DWORD            ; Which column was clicked on by right mouse button.
ThisRowWrap         D DWORD            ; 0 when nowrap, 1 when wrap.
ArgNr               D DWORD
KeyBuffer           D 32*BYTE
WorkMemory          D 800*BYTE
[.text]
Main
Main:: PROC
    StdOutput EuroText, Version, Eol=yes
    ; Prepare database sections.
    LEA RSI,[CPid:]
    LEA RCX,[CPidEnd:]
    SUB RCX,RSI
    SHR ECX,1
    MOV [CodePagesLength],ECX
    LEA RSI,[CodePoint:]
    LEA RCX,[CodePointEnd:]
    SUB RCX,RSI
    SHR ECX,1
    MOV [CodePointLength],ECX
    ; Try to load arguments from the configuration file "/etc/eurotool/eurotext.ini".
    FileAssign FileIni,=B'/etc/eurotool/eurotext.ini'
    FileExists? FileIni
    JNC .10:
    CALL ReadFileIni
.10:MOV EAX,[ArgNr]          ; Read arguments from the command-line.
    INC EAX
    MOV [ArgNr],EAX
    GetArg RAX, Frame=RSP    ; Returns the line in RSI,RCX with one argument, e. g. -IF="~/file.obj"
    JC .20:                  ; If there're no more arguments.
    CALL ArgParse::          ; Use ArgParse to translate it to a public symbol Arg***.
    JNC .10:
    StdOutput ErrorMessage::
    JMP ErrorHelp:
.20:LEA RSI,[ArgInputFile::]
    CMPB [RSI],0
    JZ ErrorHelp:
    LEA RBX,[FileInput]
    FileAssign RBX,RSI
    FileExists? RBX
    JNC .30:
    LEA RSI,[RBX+FILE64.Name]
    StdOutput =B"File """,RSI,=B""" was not found.",Eol=yes
    TerminateProgram 4
.30:; Input file was specified and exists.
    FileLoad RBX
    JC ErrorFileInput:
    MOV [InputPtr],RSI
    MOV [InputSize],RAX
    ADD RAX,RSI
    MOV [InputEnd],RAX
    MOV EAX,[ArgInputEncoding::]
    TEST EAX
    JNZ .40:
    CALL AutodetectEncoding
    MOV [ArgInputEncoding::],EAX
.40:LEA RDI,[CPid]                     ; Array of WORD encoding numbers: 20127, 65001, 1200, 1201 etc.
    MOV RSI,RDI
    MOV ECX,[CodePagesLength]
    REPNE SCASW
    SUB RDI,RSI
    SAR EDI,1
    MOV EDX,EDI                        ; EDX=1 (ASCII), 2 (UTF-8), 3 (UTF-16LE), ,,
    DEC EDI
    SAL EDI,8
    LEA RSI,[CPtable-6*256+RDI]        ; RSI=address of 8-bit translation table.
    MOV [CPtablePtr],RSI
    LEA RDI,[CPname]
    LEA RCX,[CPtable]
    SUB RCX,RDI
    XOR EAX,EAX
.50:DEC EDX
    JZ .60:
    REPNE SCASB                        ; Skip CPname.
    REPNE SCASB                        ; Skip CPaltName.
    JMP .50:
.60:MOV [CPnamePtr],RDI
    CALL CreateOutputFile
    CALL WindowResize
    CALL MakeIndex
    CALL WindowPaint
    CALL WindowProc
    FileClose FileInput
    JSt [Status::],ArgLeaveTemporary,.90:
    FileClose FileOutput,FileIndex
.90:TerminateProgram
    RET
ErrorHelp:
     StdOutput Help:,Eol=yes
     TerminateProgram 8
ErrorFileOutput:
     LEA RSI,[FileOutput.Name]
     JMP ErrorFile:
ErrorFileInput:
     LEA RSI,[FileInput.Name]
     JMP ErrorFile:
ErrorFileIndex:
     LEA RSI,[FileIndex.Name]
ErrorFile:
     StdOutput =B'Error in file "',RSI,=B'"',Eol=yes
     JMP .Terminate:
ErrorAlloc::
     StdOutput =B"Error on memory allocation.",Eol=yes
.Terminate: TerminateProgram 8
  ENDP Main
ReadFileIni
Procedure ReadFileIni reads lines from FileIni in UTF-8 and parses its each line into configuration variables Arg*** in UTF-8, too.
Input
FileIni is assigned with the name.
Clobbers
RAX,RCX,RDX,RSI,RDI
ReadFileIni PROC
    SetSt [Status::],ArgFromFile       ; Tell ArgParse that arguments may not begin with / or -.
    LEA RDI,[WorkMemory]
    Concat$ RDI, RDI,=B'Configuration "',FileIni.Name,=B'"'
.10:FileStreamOpen FileIni,BufSize=4K
    JNC .20:
    Concat$ RDI,RDI,=B'" was not found.',=B(10)
    JMP .90:
.20:FileStreamReadLn FileIni
    JBE .80:
    MOV ECX,EAX
    ; The first line may begin with BOM.
    MOV AX,[RSI]
    CMPW AX,0xBBEF   ; UTF-8 BOM?
    JNE .50:
    ADD RSI,3        ; Skip the BOM.
    SUB ECX,3
    JB .80:
    JMP .50:
.30:FileStreamReadLn FileIni
    JBE .80:
    MOV ECX,EAX
.50:CALL ArgParse::
    JNC .30:
    StdOutput ErrorMessage::, Help
    TerminateProgram 8
.80:FileClose FileIni
    LEA RDI,[WorkMemory]
    Concat$ RDI,RDI,=B'" was accepted.',=B(10)
.90:RstSt [Status::],ArgFromFile
    RET
  ENDP ReadFileIni
AutodetectEncoding
OS-independent procedure AutodetectEncoding reads the first 256 KB of input file, tries all encodings one after another and summarizes relevances of its characters.
The encoding with highest sum of relevances is returned.
Input
[InputPtr] is pointer to the start of text.
[InputSize] is size of the data.
Output
RAX is the autodetected encoding, e. g. 65001, 1250 etc.
R10=relevance
Clobbers
RCX,RDX,RSI,RDI,R8,R9,R10,R11,R12,R13
AutodetectEncoding PROC
    MOV R8,[InputPtr]
    MOV R9,[InputSize]
    MOV R12,65001            ; UTF-8.
    CMP R9D,16
    JB .90:                  ; Shorter files cannot be autodetected.
    CMP R9D,256K
    JB .10:
    MOV R9D,256K
.10:MOV R10,0x8000_0000_0000_0000 ; R10 is the best relevance saldo so far. R12 is its code page.
    XOR R13,R13              ; R13 is 0,1,2 corresponding with character size 1,2,4.
                             ; Try encoding 20127 ASCII.
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
    XOR EAX,EAX
.15:LODSB
    CMP AL,0x7F
    JA  .20:
    CALL .StoreRelevance:
    JMPS .22:
.20:ADD R11,??               ; Any byte above 0x7F deteriorates the relevance by ??=-128.
.22:LOOP .15:
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .30:
    MOV R12,20127
    MOV R10,R11
                             ; Try encoding 12000 UTF-32LE.
    MOV R13B,2               ; Character size=4.
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
    SHR ECX,2
    MOV EAX,[BOM_UTF32LE]
    CMP EAX,[RSI]
    JNE .25:
    ADD R11,Bm
    ADD RSI,4
    DEC ECX
.25:LODSD
    CALL .StoreRelevance:
    LOOP .25:
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .30:
    MOV R12,12000
    MOV R10,R11
.30:                         ; Try encoding 12001 UTF-32BE.
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
    SHR ECX,2
    MOV EAX,[BOM_UTF32BE]
    CMP EAX,[RSI]
    JNE .35:
    ADD R11,Bm
    ADD RSI,4
    DEC ECX
.35:LODSD
    BSWAP EAX
    CALL .StoreRelevance:
    LOOP .35:
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .40:
    MOV R12,12001
    MOV R10,R11
.40:                         ; Try encoding 1200 UTF-16LE.
    MOV R13B,1               ; Character size=2.
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
    SHR ECX,1
    XOR EAX,EAX
    MOV AX,[BOM_UTF16LE]
    CMP AX,[RSI]
    JNE .45:
    ADD R11,Bm
    ADD RSI,2
    DEC ECX
.45:LODSW
    CALL .StoreRelevance:
    LOOP .45:
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .50:
    MOV R12,1200
    MOV R10,R11
.50:                         ; Try encoding 1201 UTF-16BE.
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
    SHR ECX,1
    XOR EAX,EAX
    MOV AX,[BOM_UTF16BE]
    CMP AX,[RSI]
    JNE .55:
    ADD R11,Bm
    ADD RSI,2
    DEC ECX
.55:LODSW
    XCHG AL,AH
    CALL .StoreRelevance:
    LOOP .55:
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .60:
    MOV R12,1201
    MOV R10,R11
.60:                         ; Try encoding 65001 UTF-8.
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
    MOV EAX,[BOM_UTF8]
    MOV EDX,[RSI]
    AND EAX,0x00FFFFFF
    AND EDX,0x00FFFFFF
    CMP EAX,EDX
    JNE .65:
    ADD R11,Bm
    ADD RSI,3
    SUB RCX,3
.65:DecodeUTF8 RSI,.StoreRelevanceUTF8,Size=RCX,Width=32 ; Use macro from string64.htm.
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .70:
    MOV R12,65001
    MOV R10,R11
.70:                         ; Try 8bit encoding OEM or WIDE according to CodePages 437..28606.
    XOR EDX,EDX              ; RDX is index to [CPtable].
    XOR R13,R13              ; Character size=1.
.72:LEA RBX,[RDX+CPtable:]
    SUB R11,R11              ; R11=relevance saldo.
    MOV RSI,R8               ; Restore pointer to text from R8.
    MOV RCX,R9               ; Restore size of text from R9.
.75:XOR EAX,EAX
    LODSB
    CMP AL,0x7F
    JBE .80:
    MOV AX,[RBX+2*RAX-256]   ; Translate AL (128..255) to unicode point in AX by this table.
.80:CALL .StoreRelevance:    ; Add relevance of unicode point EAX to R11.
    LOOP .75:                ; The next character from the sample.
    CMP R11,R10              ; Compare with the best saldo so far in R10.
    JLE .85:                 ; Skip when poor.
    MOV R10,R11              ; R10 is the best saldo so far.
    LEA RCX,[CPid:]
    MOVZX EAX,DH
    MOVZXW R12,[RCX+2*RAX+2*6]; R12 is the best encoding so far.
.85:INC DH                   ; Try the next encoding.
    LEA EAX,[EDX+256*6]
    CMP AH,[CodePagesLength] ; Each OEM/ANSI table is 2*128 bytes long.
    JB .72:
    MOV EAX,20127            ; ASCII.
    TEST R10                 ; Best saldo.
    JS .90:                  ; If negative, it is not a text file. Simulate ASCII.
    MOV RAX,R12              ; Autodetected encoding is returned in RAX.
.90:RET
.StoreRelevanceUTF8: PROC    ; This subprocedure will add relevance of a character decoded from UTF-8
     XOR R13,R13             ;  with codepoint RAX to the saldo in R11. Clobbers: RAX,RDI,R13.
     CMP EAX,80h
     JB .4:
     INC R13
     CMP EAX,800h
     JB .4:
     INC R13
  .4:                        ; Continue with .StoreRelevance:.  R13=0,1,2.
   ENDP .StoreRelevanceUTF8:
.StoreRelevance: PROC        ; This subprocedure will add relevance of a character
    PUSH RCX,RSI             ;  with codepoint EAX to the saldo in R11. Clobbers: RAX,RDI.
     XOR ESI,ESI
     CMP EAX,0x0000_FFFF
     JA .9:                  ; Characters above BMP (asian, emojis) do not influence the relevance.
     LEA RDI,[CodePoint]
     LEA RSI,[RDI+2]
     MOV ECX,[CodePointLength]
     REPNE SCASW
     JE .6:
     MOV RSI,??              ; Deteriorate the relevance when this character is not in our Unicode table.
     JMP .8:
.6:  SUB RDI,RSI             ; Otherwise find the corresponding relevance.
     LEA RSI,[Relevance:]
     SHR RDI,1
     MOVSXB RSI,[RSI+RDI]
.8:  MOV RCX,R13
     SAL RSI,CL              ; Double (UTF-16) or quadruple (UTF-32) the relevance.
     ADD R11,RSI             ; Add it to the saldo R11.
.9: POP RSI,RCX
    CLC
    RET
   ENDP .StoreRelevance:
 ENDP AutodetectEncoding
CreateOutputFile
Assign and create the stream of ArgOutputFile (FileOutput).
Input
ArgOutputFile is the output file name. When it's 0, it will be created as /tmp/InputFile.lst.
FileInput must have been assigned with InputFile name and loaded between InputPtr..InputEnd.
Output
FileInput is closed and relased.
FileOutput is mapped between OutputPtr..OutputEnd.
Error
CF=1 Error writing the FileOutput cancels the program.
CreateOutputFile PROC
    LEA RSI,[ArgOutputFile::]
    LEA RBX,[FileOutput]
    CMPB [RSI],0
    JZ .10:
    FileAssign RBX,RSI
    JMP .20:
.10:MOV EAX,[FileInput.NameOffs]
    LEA RSI,[FileInput.Name]
    ADD RSI,RAX
    FileAssign RBX,=B'/tmp/',RSI,=B'.lst'
.20:FileMkDir RBX
    JC ErrorFileOutput:
    FileStreamCreate RBX
    JC ErrorFileOutput:
    ; Convert the input to UTF-8 and store it to FileOutput.
    MOV RSI,[InputPtr]
    MOV RCX,[InputSize]
    LEA RDX,[WorkMemory]
    MOV EAX,[ArgInputEncoding::]
    Dispatch AX, 65001, 20127, 1200, 1201, 12000, 12001 ; Input encodings UTF or ASCII.
    MOV RBX,[CPtablePtr]
    LEA RDX,[WorkMemory]
    XOR EAX,EAX
    MOV RSI,[InputPtr]
.50:CMP RSI,[InputEnd]
    JNB .90:
    LODSB
    CMP AL,0x80
    JAE .60:
    FileStreamWriteByte FileOutput
    JC ErrorFileOutput
    JMP .50:
.60:MOV AX,[RBX+2*RAX-256]
    MOV RDI,RDX
.70:EncodeUTF8                         ; Encode EAX to UTF-8 and store it to RDI.
    SUB RDI,RDX
    FileStreamWrite FileOutput,RDX,RDI
    JC ErrorFileOutput
    JMP .50:
.EncodeEAX:                            ; Subprocedure to encode codepoint EAX to UTF-8 and store the character to RDI.
    MOV RDI,RDX                        ; Input: RDX point to an ampty string in memory.
    EncodeUTF8                         ; Encode EAX to UTF-8 and store it to RDI.
    SUB RDI,RDX                        ; Get size of UTF-8 character (1..4 bytes).
    FileStreamWrite FileOutput,RDX,RDI
    JC ErrorFileOutput
    RET
.20127:                                ; ASCII.
.65001:                                ; UTF-8.
    FileStreamWrite FileOutput,RSI,RCX ; Store the entire file UTF-8 or ASCII as is.
    JC ErrorFileOutput
    JMP .90
.12001:                                ; UTF-32BE.
    SAR ECX,2
    JZ .90:
.12001L:
    LODSD
    BSWAP EAX
    CALL .EncodeEAX:
    LOOP .12001L:
    JMP .90:
.12000:                                ; UTF-32LE.
    SAR ECX,2
    JZ .90:
.12000L:
    LODSD
    CALL .EncodeEAX:
    LOOP .12000L:
    JMP .90:
.1201:                                 ; UTF-16BE.
    SAR ECX,1
    JZ .90:
.1201L:
    LODSW
    XCHG AH,AL
    CMP AX,0xD800
    JB .1201X:
    CMP AX,0xDBFF
    JA .1201X:
    SUB EAX,0xD800                     ; EAX is the high surrogate.
    SHL EAX,10
    MOV EDI,EAX
    LODSW                              ; The low surrogate expected.
    XCHG AH,AL
    CMP AX,0xDC00
    JB .1201X:
    CMP AX,0xDFFF
    JA .1201X:
    SUB EAX,0xDC00
    LEA EAX,[RDI+RAX+0x1_0000]
.1201X:
    CALL .EncodeEAX:
    LOOP .1201L:
    JMP .90:
.1200:                                 ; UTF-16LE.
    SAR ECX,1
    JZ .90:
.1200L:
    LODSW
    CMP AX,0xD800
    JB .1200X:
    CMP AX,0xDBFF
    JA .1200X:
    SUB EAX,0xD800                     ; EAX is the high surrogate.
    SHL EAX,10
    MOV EDI,EAX
    LODSW                              ; The low surrogate expected.
    CMP AX,0xDC00
    JB .1200X:
    CMP AX,0xDFFF
    JA .1200X:
    SUB EAX,0xDC00
    LEA EAX,[RDI+RAX+0x1_0000]
.1200X:
    CALL .EncodeEAX:
    LOOP .1200L:
   ; JMP .90:
.90:FileClose FileInput, FileOutput
    FileLoad FileOutput
    JC ErrorFileOutput
    MOV [OutputPtr],RSI
    MOV [OutputSize],RAX
    ADD RAX,RSI
    MOV [OutputEnd],RAX
    RET
  ENDP CreateOutputFile
MakeIndex
Create index (LST_INDEX records) of the UTF-8 text mapped between OutputPtr..OutputEnd.
MakeIndex is invoked whenever TermWidth changed or word-wrap was changed.
Input
OutputPtr..OutputEnd in the text in UTF-8.
Status:ArgWrapLines specifies whether the lines are wrapped at [TermWidth].
Output
FileIndex is written and mapped between
IndexPtr..IndexEnd.
MakeIndex PROC
    LEA RBX,[FileIndex]
    FileClose RBX            ; If it was created before.
    LEA RSI,[FileOutput.Name]
    FileAssign RBX,RSI,=B'.index'
    FileStreamCreate RBX
    JC ErrorFileIndex:       ; Abort on error.
    MOV R14,[OutputPtr]
    MOV R15,[OutputEnd]
    MOV R13D,[TermWidth]
    MOV EAX,3
    JSt [Status::],ArgWrapLines,.10:
    DEC EAX
.10:SUB R13,RAX              ; R13 is TermWidth-2 or TermWidth-3.
    ; Virtualy copy the index of source (FileOutput) to destination (FileIndex).
    XOR EAX,EAX
    FileStreamWriteDword RBX
    FileStreamWriteDword RBX ; Store 0-th index as an empty line first.
    MOV RSI,R14
.30: ; RSI points at the start of line.
    CMP RSI,R15              ; OutputEnd.
    JNB .70:
    MOV RAX,RSI
    SUB RAX,R14              ; RAX is FA of this line.
    FileStreamWriteDword RBX ; LST_INDEX.FA
    JC ErrorFileIndex
    XOR EDX,EDX              ; Counter of displayed characters in destination (0..TermWidth-2).
    MOV RDI,RSI              ; Pointer to the start of this source line.
.40:LODSB                    ; Read one character after another.
    Dispatch AL,0x0A, 0x08, 0x09 ; Other controls will be replaced with space (1 byte).
    INC EDX                  ; Count one character (ASCII or UTF-8).
    CMP AL,0x80
    JB .50:                  ; Jump when it's ASCII character 1 byte width.
    OR EAX,0xFFFF_FF00       ; Otherwise it's UTF-8 character 2-4 bytes width.
    NEG EAX
    BSR ECX,EAX              ; ECX=5,4,3 for 2,3,4 bytes of UTF-8 character.
    CMP ECX,5
    JA .40:                  ; Wrong UTF-8 character, ignore.
    MOV EAX,6
    SUB EAX,ECX
    ADD RSI,RAX              ; RAX are remaining UTF-8 bytes (1,2,3).
.50:CMP RDX,R13              ; Out of terminal columns?
    JB .40:
    JNSt [Status::],ArgWrapLines, .0x0A:
    ; This line will be marked FlagWrap in wrap mode because on display it's longer than R13.
    MOV RAX,RSI
    SUB RAX,RDI              ; Subtract the start of this line (RDI).
    OR EAX,FlagWrap
    FileStreamWriteDword RBX ; LST_INDEX.Size
    JC ErrorFileIndex
    JMP .30:
.0x0A:                       ; Skip the rest of the line in nonwrap mode.
    PUSH RAX
     MOV RAX,RSI
     SUB RAX,RDI             ; Subtract the start of this line (RDI).
     FileStreamWriteDword RBX ; LST_INDEX.Size
     JC ErrorFileIndex
    POP RAX
.60:CMP RSI,R15              ; OutputEnd?
    JNB .70:
    CMP AL,0x0A
    JE .30:
    LODSB
    JMP .60:                 ; Skip until EOL.
.0x08:
    CMP EDX,1
    JNA .40:
    DEC EDX
    JMP .40:
.0x09:
    ADD EDX,TabSpaces
    JMP .50:
.70:XOR EAX,EAX
    FileStreamWriteDword RBX ; Store the last index as empty.
    FileStreamWriteDword RBX
.80 FileClose RBX
    FileLoad RBX             ; Reopen and load the index.
    JC .90:
    ADD RAX,RSI
    MOV [IndexPtr],RSI
    MOV [IndexEnd],RAX
    SUB RAX,RSI
    SHR EAX,3                ; SIZE# LST_INDEX=8
    MOV [ListMax],EAX
.90:RET
  ENDP MakeIndex
WindowResize
Resize the terminal window.
WindowResize PROC
    LinABI ioctl,0,TIOCGWINSZ,TtySize
    MOVZXW EDX,[TtySize.ts_lines]
    MOVZXW ECX,[TtySize.ts_cols]
    MOV EAX,MinTermHeight              ; Minimal acceptable lines.
    CMP EDX,EAX
    JB .10:
    MOV EAX,EDX
.10:LEA RDI,[ValResize]
    StoD RDI
    MOV AL,';'
    STOSB
    MOV EAX,MinTermWidth               ; Minimal acceptable columns.
    CMP ECX,EAX
    JB .20:
    MOV EAX,ECX
.20:MOV EDX,MaxTermWidth
    CMP EAX,EDX
    JB .30:
    MOV EAX,EDX
.30:StoD RDI
    MOV AX,'t'
    STOSW
    StdOutput AnsiResize
    LinABI ioctl,0,TIOCGWINSZ,TtySize
    MOVZXW EDX,[TtySize.ts_lines]
    MOV [TermHeight],EDX
    MOVZXW ECX,[TtySize.ts_cols]
    MOV [TermWidth],ECX
    RET
 ENDP WindowResize
WindowTitle
Construct terminal window one line title which contains EuroView FF L "file".
Input
[FormatNamePtr::], [Level], ArgInputFile::
Output
Title line is written to the terminal window.
WindowTitle PROC
    LEA RDI,[WorkMemory::]     ; Title is constructed in WorkMemory.
    MOV RDX,RDI
    MOV AX,'  '
    STOSW
    LEA RSI,[EuroText]
    MOV ECX,8
    REP MOVSB
    MOV AX,' "'
    STOSW
    LEA RSI,[ArgInputFile::]
    GetLengthUTF8 RSI
    REP MOVSB
    MOV AX,'"{'
    STOSW
    MOV EAX,[DocRow]
    StoD RDI
    MOV AX,'}['
    STOSW
    MOV EAX,[DocCol]
    StoD RDI
    MOV EAX,'],  '
    STOSD
    DEC RDI
    MOV RSI,[CPnamePtr]
    TEST RSI
    JZ .20:
    GetLength$ RSI
    REP MOVSB
    MOV AX,', '
.20:JNSt [Status::],ArgWrapLines,.30:
    STOSW
    JMPS .40:
.30:MOV EAX,', no'
    STOSD
.40:MOV EAX,'wrap'
    STOSD
    XOR EAX,EAX
    STOSB
.50:StdOutput AnsiClrBgFFFFFF, AnsiClrFg800000, AnsiKeyHome
    GetLengthUTF8 RDX                  ; Get size to RCX.
    MOV ESI,[TermWidth]
    CMP ECX,ESI
    JBE .60:
    StdOutput RDX,Size=RSI
    JMP .90:
.60:StdOutput RDX,Size=RCX
    SUB ESI,ECX
    StdOutput Spaces,Size=RSI
.90:RET
  ENDP WindowTitle
WindowPaint
Procedure WindowPaint will draw the entire terminal window with its title and with the indexed contents of ListMax rows of the listing file (FileOutput).

The following example shows the listing with ListMax=22 rows, viewed by the terminal window with TermHeight=11 rows.
First line in terminal (title row) is fixed, it shows the program name, file name, file coordinates {row}[column], encoding and wrap status.
Vertical position of the displayed part of listing in the terminal window is specified by ListPos, which is the ordinal number of this listing row, which is displayed in terminal row 1 (just below the title row).

Cursor keys and mouse wheel manipulate only with this variable ListPos, it is saturated to 0..ListMax.

The last two columns in terminal window are dedicated to scroll box. The height of its slider is ScrollHeight=(TermHeight-1)*(TermHeight-1)/ListMax=10*10/22=4 saturated to 1..TermHeight-1=1..10.
Position of the topmost character of the slider is ScrollPos=(TermHeight-1)*(ListPos)/(ListMax)+1=10*7/22+1=4, saturated to 1..TermHeight-1=1..10.

┌──────────────────────────────┐ │ListRow=1 │ │ListRow=2 │ │ListRow=3 │ │ListRow=4 │ ╔════════════════════════╗ │ ║░EuroText░UTF-8░L░"file"║<title ║ListRow=7 TermRow=1 ▒║ │ ║ListRow=8 TermRow=2 ▒║ │ ║ListRow=9 TermRow=3 █║┐<─3 │ ║ListRow=10 TermRow=4 █║│ │ ║ListRow=11 TermRow=5 █║├──4 │ ║ListRow=12 TermRow=6 █║┘ │ ║ListRow=13 TermRow=7 ▒║ │ ║ListRow=14 TermRow=8 ▒║ │ ║ListRow=15 TermRow=9 ▒║ │ ║ListRow=16 TermRow=10 ▒║ │ ╚════════════════════════╝ │ │ListRow=18 │ │ListRow=19 │ │ListRow=20 │ │ListRow=21 │ │ListRow=22 │ └──────────────────────────────┘ TermHeight=11, ListMax=22, ListPos=7, ScrollPos=3, ScrollHeight=4.
Input
[ListPos], [ListMax], [Level], [LevelXBuffer], [OutputPtr]..[OutputEnd], [IndexPtr]..[IndexEnd]
Output
Terminal window is painted.
WindowPaint PROC
    MOV R15D,[TermWidth]
    DEC R15                  ; R15=TermWidth-1 character for the scroll column.
    MOV R12D,1               ; R12=row number: 2,3,4,,,TermHeight
    CALL WindowTitle
    StdOutput AnsiClrFg000000,AnsiClrBgFFFFD0
    MOV EAX,[ListPos]        ; Ordinal number of ListRow displayed in TermRow 1 (0,1,2,3,,,ListMax)
    CMP EAX,[ListMax]        ; Check its saturation.
    JBE .15:
    MOV EAX,[ListMax]
    MOV [ListPos],EAX
.15:INC R12                  ; Current row in terminal (2..TermHeight).
    CMP R12,[TermHeight]
    JA .45:                  ; Jump when TermHeight lines was displayed.
    LEA RDI,[AnsiAtRowCol+2]
    MOV RAX,R12              ; R12=TermRow number: 2,3,4,,,TermHeight
    StoD RDI                 ; Store row number 2,3,4,,,TermHeight
    MOV EAX,';1H'
    STOSD
    StdOutput AnsiAtRowCol   ; Place the cursor at TermRow R12, column 1.
    LEA RDI,[WorkMemory]     ; The line is constructed here.
    MOV R8,RDI               ; R8=Second cache of RDI.
    MOV R9,RDI               ; R9=First cache of RDI.
    XOR EDX,EDX              ; Counter of characters in this row.
    MOV EAX,[ListPos]        ; 0=empty row; 1=first row, 2=second row .. ListMax.
    LEA RAX,[RAX+R12-2]      ; R12=TermRow number: 2,3,4,,,TermHeight
    CMP EAX,[ListMax]
    JA .0x0A:
    MOV RBX,[IndexPtr]
    LEA RBX,[RBX+8*RAX]
    CMP RBX,[IndexEnd]
    JAE .0x0A:
    MOV ESI,[RBX+LST_INDEX.FA]
    MOV ECX,[RBX+LST_INDEX.Size]
    ADD RSI,[OutputPtr]
    MOV EAX,ECX
    SHR EAX,31
    MOV [ThisRowWrap],EAX    ; 0 or 1.
    AND ECX,NoFlags
    LEA R13,[RSI+RCX]        ; R13=pointer to the end of (wrapped) line.
.20:CMP RSI,R13              ; RSI points at the start of line.
    JAE .0x0A:
    LODSB                    ; RDI is at the beginning of UTF-8 character.
    MOV R9,R8                ; R9 is previous cache.
    MOV R8,RDI               ; R8 is current cache (for the case of backspace).
    Dispatch AL,0x0A,0x08,0x09 ; Other control will be replaced with space.
    CMP AL,0x20
    JAE .25:
    MOV AL,0x20              ; Replace other controls with space.
.25:INC EDX                  ; Count one character.
    STOSB                    ; The first byte of UTF-8 character.
    CMP AL,0x80
    JB .30:                  ; If it was ASCII character.
    OR EAX,0xFFFF_FF00
    NEG EAX
    BSR ECX,EAX              ; ECX=5,4,3 for 2,3,4 bytes of UTF-8 character.
    CMP ECX,5
    JA .20:
    MOV EAX,6
    SUB EAX,ECX              ; EAX=1,2,3 (remaining bytes of UTF-8 character).
    MOV ECX,EAX
    REP MOVSB
.30:CMP RDX,R15
    JBE .20:
.0x0A:                       ; End of line.
    MOV ECX,[ThisRowWrap]    ; End of input bytes for this row. Complete the line at RDI with spaces, if necessary.
    JRCXZ .35:
    PUSH RSI
      LEA RSI,[WrapSymbol]
      MOV ECX,SIZE# WrapSymbol
      REP MOVSB              ; Store RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK alias french quote.
    POP RSI
.35:LEA RCX,[R15-1]          ; TermWidth-2 - the last displayed column.
    SUB ECX,EDX              ; Subtract the displayed characters EDX.
    JBE .40:
    MOV AL,' '
    REP STOSB
.40:LEA RAX,[WorkMemory]
    SUB RDI,RAX
    StdOutput RAX,Size=RDI
    CMP R12D,[TermHeight]
    JB  .15:
.45:; Scroll box.
    MOV EDI,[TermHeight]     ; Calculate ScrollHeight.
    DEC EDI
    MOV EAX,EDI
    MUL RAX
    MOV ECX,[ListMax]
    TEST ECX
    JZ .90:
    DIV RCX
    CMP EAX,0                ; Saturate ScrollHeight.
    JA  .50:
    MOV EAX,1
.50:CMP EAX,EDI
    JNA .55:
    MOV EAX,EDI
.55:MOV ESI,EAX              ; Let ESI=ScrollHeight.
    MOV EAX,[ListPos]        ; Calculate ScrollPos.
    MUL RDI                  ; RDI=TermHeight-1.
    MOV ECX,[ListMax]
    DIV RCX
    INC EAX
    INC EDI
    CMP EAX,EDI              ; Saturate ScrollPos.
    JNA .60:
    MOV EAX,EDI
.60:MOV EDX,EAX              ; EDX=ScrollPos, ESI=ScrollHeight.
    LEA RSI,[RSI+RDX+1]      ; ESI=position below the slider.
    StdOutput AnsiClrBgFEFEFF, AnsiClrFg000080
    MOV R12D,1               ; R12=row number: 2,3,4,,,TermHeight
.70:INC R12
    MOV RAX,R12              ; R12=row number: 2,3,4,,,TermHeight
    CMP EAX,[TermHeight]
    JA  .90:
    LEA RDI,[AnsiAtRowCol+2]
    StoD RDI                 ; Store row number 2,3,4,,,TermHeight
    MOV AX,';'
    STOSB
    MOV RAX,R15              ; TermWidth-1.
    StoD RDI
    MOV AX,'H'
    STOSW
    StdOutput AnsiAtRowCol   ; Place the cursor at row R12, col=TermWidth-2.
    CMP R12,RDX
    JB .80:
    CMP R12,RSI
    JAE .80:
    StdOutput FullBlockX2
    JMP .70:
.80:StdOutput MediumShadeX2
    JMP .70:
.0x08:                       ; Backspace.
    SUB EDX,2
    MOV RDI,R9               ; Restore position from previous cache R9.
    JMP .20:
.0x09:                       ; Tabelator.
    MOV ECX,TabSpaces
    ADD EDX,ECX
    MOV AL,' '
    REP STOSB
    JMP .20:
.90:RET
 ENDP WindowPaint
WindowProc
This procedure reads the keyboard and mouse and manipulates with ListPos.
WindowProc PROC
    LEA RBX,[TermIO]
    LinABI ioctl,0,TCGETS,RBX          ; Load current local terminal status.
    RstSt [RBX+TERMIO.c_lflag],ICANON+ECHO
    LinABI ioctl,0,TCSETS,RBX          ; Disable terminal echo.
    StdOutput AnsiMouseEnable
    JMP .ReadKeyboard:
.Resized:
    CALL WindowResize
    CALL MakeIndex
    CALL WindowPaint
.ReadKeyboard:
    LinABI ioctl,0,TIOCGWINSZ,TtySize  ; Check if terminal dimension changed.
    MOVZXW EDX,[TtySize.ts_lines]
    MOVZXW ECX,[TtySize.ts_cols]
    CMP EDX,[TermHeight]
    JNE .Resized:
    CMP ECX,[TermWidth]
    JNE .Resized:
    XOR EDI,EDI                        ; Read the keyboard in the main window.
    LEA RSI,[KeyBuffer]
    MOV R8,RSI
    MOV [RSI],RDI                      ; Pre-clear the keyboard buffer.
    MOV EDX,SIZE# KeyBuffer
    LinABI read,RDI,RSI,RDX
    TEST EAX
    JNA .ReadKeyboard:
    XOR ECX,ECX
    MOV EBX,[ListPos]
key %FOR AnsiKeyUp,AnsiKeyDn,AnsiKeyPgUp,AnsiKeyPgDn,AnsiKeyHome,AnsiKeyEnd, \
         AnsiKeyCtrlHome,AnsiKeyCtrlEnd,AnsiKeyCtrlPgUp,AnsiKeyCtrlPgDn,     \
         AnsiKeyCtrlUp,AnsiKeyCtrlDn,AnsiKeyAltUp,AnsiKeyAltDn,              \
         AnsiKeyShiftUp,AnsiKeyShiftDn,AnsiKeyShiftTab,AnsiKeyTab,           \
         AnsiKeyW,AnsiKeyw,AnsiKeyAltW,AnsiKeyAltw,AnsiKeyCtrlW,AnsiKeyEscape
       LEA RDI,[%key]
       MOV CL,SIZE# %key
       MOV RSI,R8
       REPE CMPSB
       JE .%key:
    %ENDFOR key
    MOV RSI,R8               ; Read the mouse.
    LODSW
    CMP AX,0x5B1B            ; Esc [
    JNE .ReadKeyboard:
    LODSB
    CMP AL,0x3C              ; Less-than character?
    JNE .ReadKeyboard:
    LodD                     ; Mouse button.
    JC  .ReadKeyboard:
    CMP AL,64                ; Scroll wheel up?
    JE .AnsiKeyCtrlUp:
    CMP AL,65                ; Scroll wheel down?
    JE .AnsiKeyCtrlDn:
    MOV ECX,EAX              ; Not scrolled. CL=0 for left button; CL=2 for right button.
    LODSB
    CMP AL,';'
    JNE .ReadKeyboard:
    LodD
    JC .ReadKeyboard:
    MOV EBX,EAX              ; X-coordinate of mouse cursor.
    SHL EBX,16
    LODSB
    CMP AL,';'
    JNE .ReadKeyboard:
    LodD
    JC .ReadKeyboard:
    MOV BX,AX                ; Y-coordinate of mouse cursor.
    LODSB
    CMP AL,'m'
    JNE .ReadKeyboard:
    ; Mouse cursor coordinates are in EBX. Row number is in BX, button in CL.
    MOV EDX,EBX
    SHR EDX,16
    MOV EAX,[TermWidth]
    SUB EAX,3                ; AX is column of the scrollbox.
    CMP DX,AX                ; DX is the column where the mouse clicked.
    JA .Scroll:              ; Clicked at the scrollbox.
    CMP CL,0
    JE .MouseLeftButton:
    CMP CL,2
    JE .MouseRightButton:
    JMP .ReadKeyboard:
.Scroll:                     ; Clicked at the scroll box, row BX.
    MOVZX EAX,BX
    TEST EAX
    JZ .30:
    DEC EAX
.30:MOV ECX,[ListMax]
    MUL RCX
    MOV ECX,[TermHeight]
    DIV RCX
    MOV [ListPos],EAX
    CALL WindowPaint
    JMP .ReadKeyboard:
.AnsiKeyCtrlUp:
.AnsiKeyAltUp:
.AnsiKeyShiftUp:
    SUB EBX,3
.AnsiKeyUp:
    DEC EBX
.Saturate: ; Saturate the proposed EBX=ListPos to the range 0..ListMax-2   (if ListMax>TermHeight),
           ; else to the range 0..1.
    TEST EBX
    JNS .50:
    SUB EBX,EBX              ; If EBX was lower than 0.
.50:MOV EAX,[ListMax]
    CMP EAX,[TermHeight]
    JA .60:
    CMP EBX,1
    JBE .70:
    MOV EBX,1
    JMP .70:
.60:SUB EAX,2
    CMP EBX,EAX
    JBE .70
    MOV EBX,EAX
.70:MOV [ListPos],EBX
    CALL WindowPaint
    JMP .ReadKeyboard
.AnsiKeyCtrlDn:
.AnsiKeyAltDn:
.AnsiKeyShiftDn:
    ADD EBX,3
.AnsiKeyDn:
    INC EBX
    JMP .Saturate:
.AnsiKeyPgUp:
    SUB EBX,[TermHeight]
    ADD EBX,2
    JMP .Saturate:
.AnsiKeyPgDn:
    ADD EBX,[TermHeight]
    SUB EBX,2
    JMP .Saturate:
.AnsiKeyCtrlHome:
.AnsiKeyCtrlPgUp:
.AnsiKeyHome:
    MOV EBX,0
    JMP .Saturate:
.AnsiKeyCtrlEnd:
.AnsiKeyCtrlPgDn:
.AnsiKeyEnd:
    MOV EBX,[ListMax]
    SUB EBX,[TermHeight]
    ADD EBX,2
    JMP .Saturate:
.MouseLeftButton:
.MouseRightButton:
    MOVZXW EAX,BX
    ADD EAX,[ListPos]
    SUB EAX,2
    MOV [DocRow],EAX
    SHR EBX,16
    MOV [DocCol],EBX
    CALL WindowTitle
    JMP .ReadKeyboard:
.AnsiKeyTab:
.AnsiKeyShiftTab:
    jmp .ReadKeyboard:
.AnsiKeyW:
.AnsiKeyw:
.AnsiKeyAltW:
.AnsiKeyAltw:
.AnsiKeyCtrlW:
    InvSt [Status::],ArgWrapLines
    CALL WindowTitle
    CALL MakeIndex
    CALL WindowPaint
    JMP .ReadKeyboard:
.AnsiKeyEscape:
    StdOutput AnsiMouseDisable
    LEA RBX,[TermIO]
    LinABI ioctl,0,TCGETS,RBX          ; Get current terminal echo.
    SetSt [RBX+TERMIO.c_lflag],ICANON+ECHO
    LinABI ioctl,0,TCSETS,RBX          ; Restore terminal echo.
    RET
  ENDP WindowProc
  ENDPROGRAM textling

▲Back to the top▲