This is a CON module of EuroTool program EuroSort.
EUROASM CPU=X64,SIMD=yes,AutoAlign=yes,Unicode=no
%DROPMACRO *
sortwinc PROGRAM Format=COFF, Width=64, ; MaxExpansions=120K
INCLUDEHEAD1 argument.htm
INCLUDE1 winabi.htm, cpuext.htm, wins.htm, winscon.htm, winf64.htm, winsfile.htm, \
memory64.htm,cpuext64.htm,time.htm,string64.htm,status32.htm,sort64.htm
[.rodata] segment.
[CodePoint] (WORD) and [Relevance] (BYTE).
[Translit] (DWORD) and [Entity] (QWORD) are not used in EuroSort program.
[.rodata] ; Declare sections [CodePoint] and [Relevance] by Unicode database. [CodePoint] ; Switch to section [CodePoint] of WORDs with code points 0x0000..0xFFFF. CodePoint: ; Label indicating the start of the section. [Relevance] ; Switch to section [Relevance] of BYTEs with relevance values -128..+127. Relevance: ; Label indicating the start of the section. ; Declare macroinstruction UCP which interprets the lines of UnicodePoints and populates sections [CodePoint] and [Relevance]. UCP %MACRO CodePoint, Relevance, Translit, Entity [CodePoint] ; Switch to section [CodePoint]. DW WORD 0x%CodePoint ; Define code point (unsigned word), e. g. 0x0000, 0x0001,,,0xFFFF. [Relevance] ; Switch to section [Relevance]. DB BYTE %Relevance ; Define relevance of each code point (signed byte), e. g. Cc=-32, Nd=+16 etc. %ENDMACRO UCP INCLUDE "unicode.htm" ; Expand the macro UCP with each line of the table UnicodePoints. [CodePoint] ; Switch to section [CodePoint]. CodePointEnd: ; Label indicating the end of the section. [Relevance] ; Switch to section [Relevance]. RelevanceEnd: ; Label indicating the end of the section.
"ISO-8859-2";
"Latin 2 (Central European)";
https://en.wikipedia.org/wiki/Windows-1250 (not used in this program);
[CPid].
[CPname] one after another..
[CPtable] one after another. The first six tables are omitted.
[rodata] ; Declare global symbol at the beginning of each section:
[CPid] ; Switch to section [CPid] of WORDs with code page identifiers.
CPid: ; Label indicating the start of the section.
[CPtable] ; Switch to section [CPtable] with translation table 128*WORD of each code page.
CPtable: ; Label indicating the start of the section.
; Declare macroinstruction CP which interprets the lines of CodePages and populates sections [CPid] and [CPtable].
CP %MACRO CPid, CPname, CPaltName, CPurl, CPtable
[CPid] ; Switch to section [CPid].
DW %CPid ; Define encoding identifier, e. g. 437, 667,,,28606.
[Table] ; Switch to section [Table]
%IF "%CPtable" !=== "" ; The first six encodings don't have translation table, omit them.
i %FOR 1..128 ; Define 128 words of translation table for each encoding.
DW 0x%5 ; Member of CPtable, e. g. 00C7, 00FC etc.
%SHIFT 1 ; Take the next member of the table (%6, %7, %8,..%134).
%ENDFOR i ; Repeat the definition 128 times.
%ENDIF
%ENDMACRO CP
INCLUDE "codepage.htm" ; Expand the macro CP with each line of the table CodePages.
[CPid] ; Switch to section [CPid]
CPidEnd: ; Behind the last word of the section.
HEAD ; Block HEAD..ENDHEAD will be included to other sources.
SORT_INDEX STRUC ; A structure which describes one line|record of the sorted file.
.Ptr D DWORD ; Offset related to the start of InputFile (to [HeaderInMemEnd]).
.Size D DWORD ; Size of the record in bytes.
ENDSTRUC SORT_INDEX
CP_UTF8 EQU 65001 ; A constant for WinABI functions.
ENDHEAD
[.rodata] ; Text constants.
Version:: DU "EuroSort version %^Date",13,10,0
Help DB 0xEF,0xBB,0xBF
DB "; EuroSort default configuration (UTF-8):",13,10
DB "/InputFile= ; Input file name to be sorted.",13,10
DB "/OutputFile= ; Output file name where the sorted input will be saved.",13,10
DB "/InputEncoding= ; Encoding of InputFile; autodetect when not specified. /IE=? for help.",13,10
DB "/Locale= ; Use national sorting preferences. /Locale=? for help.",13,10
DB "/HeaderSize=0 ; Number of bytes to omit from sort at the begining.",13,10
DB "/HeaderLength=0 ; Number of lines to omit from sort at the begining.",13,10
DB "/FooterSize=0 ; Number of bytes to omit from sort at the end.",13,10
DB "/FooterLength=0 ; Number of lines to omit from sort at the end.",13,10
DB "/RecordSize=0 ; Fixed size in bytes if >0; otherwise variable size ended by EOL.",13,10
DB "/KeyOffset=0 ; Offset of the sorting key in chars from the beginning of the record.",13,10
DB "/KeyLength=-1 ; Key size in chars; up to the end of record when -1.",13,10
DB "/KeyReverse=false ; Sort direction descending.",13,10,
DB "/MergeSpaces=false ; Treat multiple white spaces as one space.",13,10
DB "/DigitFirst=true ; Sort digits before letters.",13,10
DB "/UpperFirst=false ; Sort uppercase letters before lowercase ones.",13,10
DB "/LeaveTemporary=no ; Do not erase temporary index file when the program ends.",13,10
DB 0
BOM_UTF32BE DB 0x00,0x00,0xFE,0xFF
BOM_UTF32LE DB 0xFF,0xFE,0x00,0x00
BOM_UTF16BE DB 0xFE,0xFF
BOM_UTF16LE DB 0xFF,0xFE
BOM_UTF8 DB 0xEF,0xBB,0xBF
Ext_index DU ".index",0 ; Extension of temporary index file.
FiveBack DB 5*B(8),0 ; Backspaces used for progress indicator.
[.bss] ; Working memory variables.
PrimaryWeights:: DD RelevanceEnd-Relevance * DWORD ; One DWORD for each UnicodePoints.
SecondaryWeights:: DD RelevanceEnd-Relevance * DWORD ; One DWORD for each UnicodePoints.
TertiaryWeights:: DD RelevanceEnd-Relevance * DWORD ; One DWORD for each UnicodePoints.
; EuroSort declares five data sections at assembly time; each section has the same number of elements.
; The elements are WORD for [CodePoint], BYTE for [Relevance], DWORD for Primary, Secondary and Tertiary weights.
SortIndex DS SORT_INDEX
File_ini DS FILE64 ; Configuration FILE64.
File_index DS FILE64 ; Temporary FILE64 with SORT_INDEX records.
FileInput DS FILE64 ; Input FILE64.
FileOutput DS FILE64 ; Output FILE64.
HeaderInMemPtr D QWORD ; Pointer to the FileInput, also header mapped in memory.
HeaderInMemEnd D QWORD ; End of FileInput header, mapped in memory.
FooterInMemPtr D QWORD ; Pointer to the footer mapped in memory.
FooterInMemEnd D QWORD ; Pointer to the end of footer, also end of FileInput mapped in memory.
IndexInMem D QWORD ; Pointer to the indexes mapped in memory.
TranslationTable D QWORD ; Pointer to the table, one WORD per each of chars 128..255.
RecordPtrA D QWORD ; Pointer to the first dynamically allocated record. Its used length is RecordLenA.
RecordPtrB D QWORD ; Pointer to the second dynamically allocated record. Its used length is RecordLenB.
WeightPtrA D QWORD ; Pointer to the first dynamically allocated weighed record. Its used length is WeightLenA.
WeightPtrB D QWORD ; Pointer to the second dynamically allocated weighed record. Its used length is WeightLenB.
KeyRecPtrA D QWORD ; Pointer to the start of key in RecordPtrA.
KeyRecPtrB D QWORD ; Pointer to the start of key in RecordPtrB.
KeyRecEndA D QWORD ; Pointer to the end of key in RecordPtrA.
KeyRecEndB D QWORD ; Pointer to the end of key in RecordPtrB.
ProgressDivisor D QWORD ; BitScanReversed of ShellSort's %ShellSortNrOfRecords.
RecordLenA D DWORD ; Length of RecordPtrA in DWORDs.
RecordLenB D DWORD ; Length of RecordPtrB in DWORDs.
WeightLenA D DWORD ; Length of WeightPtrA in DWORDs.
WeightLenB D DWORD ; Length of WeightPtrB in DWORDs.
ArgNr D DWORD ; Binary number of the current cmd-line argument.
Encoding D DWORD ; Input encoding, perhaps autodetected. Never zero.
CodePointLength D DWORD ; Number of members in UnicodePoints table (1227).
CodePagesLength D DWORD ; Number of members in CodePages table (1+5+75=81).
TableWordIndex D DWORD ; Ordinal number of the translation table in section [Table] (0=437, 1=667, 2=737,, 74=28606).
MaxRecordSize D DWORD ; Maximum of variable or fixed record size in bytes used in this InputFile.
NrOfRecords D DWORD ; Number of records in the sorted text.
NrOfRecords$ D 24*BYTE ; Decimal number of records.
Number$ D 24*BYTE ; Decimal general number.
NameUTF16 D MAX_PATH_SIZE * UNICHAR ; Temporary working space for file names in UTF-16.
NameUTF8 D MAX_PATH_SIZE * UNICHAR ; Temporary working space for file names in UTF-8.
Work16 D MAX_PATH_SIZE * UNICHAR ; Temporary working space for reports in UTF-8.
[.text]
MainCon:: PROC
StdOutput Version, Eol=yes, Unicode=yes, Console=yes
LEA RAX,[Relevance] ; Prepare database sections.
LEA RDX,[RelevanceEnd]
SUB RDX,RAX
MOV [CodePointLength],EDX
LEA RAX,[CPid]
LEA RDX,[CPidEnd]
SUB RDX,RAX
SHR EDX,1
MOV [CodePagesLength],EDX
; Try to store and load arguments from the configuration file "%AppData%\eurotool\eurosort.ini".
WinABI GetEnvironmentVariableW,=U'AppData',NameUTF16,SIZE# NameUTF16
TEST RAX
JZ .20:
FileAssign File_ini,NameUTF16,=U'\eurotool\eurosort.ini',Unicode=yes
FileExists? File_ini
JNC .13:
FileMkDir File_ini
JC .10:
LEA RDI,[Help] ; Online help and default configuration.
GetLength$ RDI, Unicode=no
FileStore File_ini, RDI,RCX
JNC .13:
.10:StdOutput =U'Configuration "',File_ini.Name,=U'" could not be saved.',Eol=yes, Unicode=yes
JMP .20
.13:CALL ReadFile_ini
.20:; And finally read arguments from the command-line.
MOV EAX,[ArgNr]
INC EAX ; The next argument.
MOV [ArgNr],EAX
GetArg RAX,Unicode=yes ; RSI,RCX is the line with one argument in UTF-16, e. g. /InputFile="C:\TEMP\Sometext.txt"
JC .27: ; When they are no more arguments.
SHR ECX,1
LEA RDI,[NameUTF8]
WinABI WideCharToMultiByte,CP_UTF8,0,RSI,RCX,RDI,SIZE#NameUTF8/2,0,0 ; Convert to UTF-8.
MOVB [RDI+RAX],0
MOV RSI,RDI
GetLengthUTF8 RSI
CALL ArgParse:: ; Use ArgParse to translate UTF-8 string RSI,RCX to a public symbol Arg***.
JNC .20:
StdOutput Work16,Console=yes,Unicode=yes
StdOutput ErrorMessage::,Unicode=no ; On error write help and exit.
JSt [Status::],ArgEnc?|ArgLoc?|ArgFF?,.23:
StdOutput Help+3,Unicode=no ; On error write help and exit.
.23:TerminateProgram 8
.27:CMPB [ArgInputFile::],0
JZ .Gui:
CMPB [ArgOutputFile::],0
JNZ .Cui:
.Gui:CALL MainGui:: ; Open the GUI window for arguments and wait until it ends.
JSt [Status::],ArgCancel,ErrorCancel:
.Cui:LEA RSI,[ArgInputFile::] ; Convert string arguments Arg*** from UTF-8 to FileInput/Output.Name in UTF-16.
LEA RDI,[FileInput.Name]
WinABI MultiByteToWideChar,CP_UTF8,0,RSI,-1,RDI,SIZE# FILE64.Name/2
LEA RSI,[ArgOutputFile::]
LEA RDI,[FileOutput.Name]
WinABI MultiByteToWideChar,CP_UTF8,0,RSI,-1,RDI,SIZE# FILE64.Name/2
; Check and recap arguments.
CMPB [ArgInputFile::],0
JZ ErrorInput:
StdOutput =B'Input file: "',Unicode=off
StdOutput FileInput.Name,Unicode=on,Console=on
StdOutput =B'"',Eol=yes,Unicode=off
CMPB [ArgOutputFile::],0
JZ ErrorOutput
StdOutput =B'Output file: "',Unicode=off
StdOutput FileOutput.Name, Unicode=on, Console=on
StdOutput =B'"',Eol=yes, Unicode=off
LEA RBX,[FileInput]
FileAssign RBX,FileInput.Name, Unicode=yes
JC ErrorInput
FileMapOpen RBX
JC ErrorMapFile
ADD RAX,RSI
MOV [HeaderInMemPtr],RSI
MOV [HeaderInMemEnd],RSI
MOV [FooterInMemPtr],RAX
MOV [FooterInMemEnd],RAX
; Check the encoding and autodetect when 0.
MOV EAX,[ArgInputEncoding::]
LEA RBX,[=B(0)]
TEST EAX
JNZ .30: ; Jump when the input encoding was explicitly specified.
CALL AutodetectEncoding
LEA RBX,[=B" autodetected."]
.30:MOV [Encoding],EAX
%DROPMACRO CP
CP %MACRO Value,Name ; Redeclare the macro CP to obtain the name corresponding with [Encoding].
CMP EAX,%Value ; E. g. CMP EAX,737
JNE .CP%.:
LEA RDX,[=B'(%Name[2..%&-1])'] ; E. g. LEA RDX,[=B'(IBM737)']
.CP%.:
%ENDMACRO CP
INCLUDE "codepage.htm"
LEA RDI,[NameUTF8] ; Working space.
MOV RSI,RDI
StoD RDI
XOR EAX,EAX
STOSB
StdOutput =B'Encoding: ',RSI,=B' ',RDX,RBX,Eol=yes,Unicode=off
MOV EAX,[Encoding]
LEA RDI,[CPid]
LEA RSI,[RDI+2]
LEA RCX,[CPidEnd]
SUB RCX,RDI
SHR ECX,1
REPNE SCASW
SUB RDI,RSI
LEA RSI,[CPtable]
SHL EDI,7
LEA RAX,[RSI+RDI]
MOV [TranslationTable],RAX
MOV EAX,[ArgLocale::]
CMP EAX,0
JZ .33:
LEA RDI,[NameUTF16]
MOV RSI,RDI
STOSD
StdOutput =B'Locale: ',RSI,Eol=yes,Unicode=off
.33:; Apply header and footer size and length.
CALL HeaderAndFooter
JC ErrorSize:
; Recap nonzero header and footer.
LEA RSI,[NrOfRecords$]
LEA RBX,[=B' bytes']
MOV EAX,[ArgHeaderSize::]
TEST EAX
JZ .37:
StoD RSI
XOR EAX,EAX
STOSB
StdOutput =B'Header: ',RSI,RBX,Eol=yes,Unicode=off
.37:MOV EAX,[ArgFooterSize::]
TEST EAX
JZ .40:
StoD RSI
XOR EAX,EAX
STOSB
StdOutput =B'Footer: ',RSI,RBX,Eol=yes,Unicode=off
.40:LEA RBX,[=B' lines']
MOV EAX,[ArgHeaderLength::]
TEST EAX
JZ .43:
StoD RSI
XOR EAX,EAX
STOSB
StdOutput =B'Header: ',RSI,RBX,Eol=yes,Unicode=off
.43:MOV EAX,[ArgFooterLength::]
TEST EAX
JZ .47:
StoD RSI
XOR EAX,EAX
STOSB
StdOutput =B'Footer: ',RSI,RBX,Eol=yes,Unicode=off
.47:LEA RBX,[=B' characters']
MOV EAX,[ArgKeyOffset::]
TEST EAX
JZ .50:
StoD RSI
XOR EAX,EAX
STOSB
StdOutput =B'Key offset: ',RSI,RBX,Eol=Yes,Unicode=off
.50:MOV EAX,[ArgKeyLength::]
CMP EAX,-1
JE .53:
StoD RSI
XOR EAX,EAX
STOSB
StdOutput =B'Key length: ',RSI,RBX,Eol=Yes,Unicode=off
.53:
arg %FOR KeyReverse,DigitFirst,PunctuationFirst,UpperFirst,MergeSpaces
LEA RDX,[=B'yes']
JSt [Status::],Arg%arg,.4%arg:
LEA RDX,[=B'no ']
.4%arg:
StdOutput =B'%arg: ',RDX,Eol=yes,Unicode=off
%ENDFOR arg
CALL CreateIndex
LEA RBX,[File_index]
FileMapCreate RBX ; Returns RAX=file size, RDI=pointer to the contents.
JC ErrorMapFile:
MOV [IndexInMem],RDI
XOR EDX,EDX
MOV ECX,SIZE# SORT_INDEX
DIV RCX
MOV [NrOfRecords],EAX
LEA RDX,[NrOfRecords$]
StoD RDX
XOR EAX,EAX
STOSB
StdOutput =B"Indexed ",RDX,=B' records of ',Unicode=off
MOV EAX,[ArgRecordSize::]
TEST EAX
JZ .57:
StdOutput =B'fixed size ',Number$,=B' bytes.',Eol=yes,Unicode=off
JMP .60:
.57:StdOutput =B'variable size.',Eol=yes,Unicode=off
.60:MOV ESI,[MaxRecordSize] ; Dynamically allocated buffers for two records.
SHL ESI,2
MemAlloc RSI
JC ErrorAlloc
MOV [RecordPtrA],RAX
MemAlloc RSI
JC ErrorAlloc
MOV [RecordPtrB],RAX
MemAlloc RSI
JC ErrorAlloc
MOV [WeightPtrA],RAX
MemAlloc RSI
JC ErrorAlloc
MOV [WeightPtrB],RAX
CALL SetWeights
; Apply local options.
MOV EAX,[Encoding]
CMP AX,12001
JE .63
CMP AX,1201
JNE .67:
.63:SetSt [Status::],ArgBigEndian
.67:; Sort the index.
MOV EAX,[NrOfRecords]
BSR ECX,EAX
MOV [ProgressDivisor],RCX
StdOutput =B"Sorted xxx %%",Unicode=no
MOV RDI,[IndexInMem]
ShellSort RDI,RAX,SIZE# SORT_INDEX,Compare
; Write the output file.
LEA RBX,[FileOutput]
FileAssign RBX,FileOutput.Name,Unicode=yes
JC ErrorWriteFile
FileCreate RBX
JC ErrorWriteFile:
; Write Header without change.
MOV RSI,[HeaderInMemPtr]
MOV RCX,[HeaderInMemEnd]
SUB RCX,RSI
JZ .70:
FileWrite RBX,RSI,RCX
JC ErrorWriteFile
.70:; Write the sorted input mapped between [HeaderInMemEnd]..[FooterInMemPtr] by the sorted index.
MOV ECX,[NrOfRecords]
JRCXZ .77:
.73:MOV ESI,[RDI+SORT_INDEX.Ptr]
MOV EDX,[RDI+SORT_INDEX.Size]
ADD RSI,[HeaderInMemEnd]
FileWrite RBX,RSI,RDX
JC ErrorWriteFile
ADD RDI,SIZE# SORT_INDEX
LOOP .73:
.77:StdOutput FiveBack,=B'100 %%.',Eol=yes,Unicode=no
; Write Footer without change.
XOR EDI,EDI ; Errorlevel when no error.
MOV RSI,[FooterInMemPtr]
MOV RCX,[FooterInMemEnd]
SUB RCX,RSI
JZ .90:
FileWrite RBX,RSI,RCX
JNC .90:
ErrorWriteFile: ; RBX=^FILE64
LEA RDX,[=B'Error writing to the file "']
.80:StdOutput RDX,Unicode=no
LEA RAX,[RBX+FILE64.Name]
StdOutput RAX,=U'"',Eol=yes,Console=yes,Unicode=yes
JMP .87:
ErrorMapFile: ; RBX=^FILE64
LEA RDX,[=B'Error memory-mapping the file "']
JMP .80:
ErrorAlloc:
LEA RAX,[=B"Error on memory allocation."]
JMP .83:
ErrorSize:
LEA RAX,[=B"Error: nothing to sort after removing header and footer."]
JMP .83:
ErrorCancel:
LEA RAX,[=B"Cancelled."]
JMP .83:
ErrorInput:
LEA RAX,[=B'Error: /InputFile= was not specified.']
JMP .83:
ErrorOutput:
LEA RAX,[=B'Error: /OutputFile= was not specified.']
.83:StdOutput RAX,Eol=yes,Unicode=no
.87:MOV EDI,8 ; Errorlevel on error.
.90:FileClose RBX,File_index,FileInput,FileOutput
JSt [Status::],ArgLeaveTemporary,.93:
FileDelete File_index
.93:TerminateProgram Errorlevel=RDI
ENDP MainCon::
ReadFile_ini:: PROC
SetSt [Status::],ArgFromFile ; Tell ArgParse that arguments may not begin with / or -.
StdOutput =U'Configuration "',File_ini.Name, Unicode=yes, Console=yes
.10:FileStreamOpen File_ini,BufSize=4K
JNC .20:
StdOutput =B'" was not found.',Eol=yes, Unicode=no
JMP .90:
.20:FileStreamReadLn File_ini
JBE .80:
MOV ECX,EAX
; The first line may begin with BOM.
MOV AX,[RSI]
CMPW AX,0xBBEF ; UTF-8 BOM?
JNE .50:
ADD RSI,3 ; Skip the BOM.
SUB ECX,3
JB .80:
JMP .50:
.30:FileStreamReadLn File_ini
JBE .80:
MOV ECX,EAX
.50:CALL ArgParse::
JNC .30:
StdOutput ErrorMessage::,Eol=yes, Unicode=no
StdOutput Help,Unicode=no
TerminateProgram 8
.80:FileClose File_ini
StdOutput =B'" was accepted.',Eol=yes, Unicode=no
.90:RstSt [Status::],ArgFromFile
RET
ENDP ReadFile_ini
AutodetectEncoding:: PROC
MOV R8,[HeaderInMemEnd]
MOV R9,[FooterInMemPtr]
SUB R9,R8
MOV R12,65001 ; UTF-8.
CMP R9D,16
JB .90: ; Short files cannot be autodetected.
CMP R9D,256K
JB .10:
MOV R9D,256K
.10:MOV R10,0x8000_0000_0000_0000 ; R10 is the best relevance saldo so far. R12 is its code page.
XOR R13,R13 ; R13 is 0,1,2 corresponding with character size 1,2,4.
; Try encoding 20127 ASCII.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8 ; Restore pointer to text from R8.
MOV RCX,R9 ; Restore size of text from R9.
XOR EAX,EAX
.15:LODSB
CMP AL,0x7F
JA .20:
CALL .StoreRelevance:
JMPS .22:
.20:ADD R11,?? ; Any byte above 0x7F deteriorates the relevance by ??=-32.
.22:LOOP .15:
CMP R11,R10 ; Compare with the best saldo so far in R10.
JLE .30:
MOV R12,20127
MOV R10,R11
; Try encoding 12000 UTF-32LE.
MOV R13B,2 ; Character size=4.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8 ; Restore pointer to text from R8.
MOV RCX,R9 ; Restore size of text from R9.
SHR ECX,2
MOV EAX,[BOM_UTF32LE]
CMP EAX,[RSI]
JNE .25:
ADD R11,Bm
ADD RSI,4
DEC ECX
.25:LODSD
CALL .StoreRelevance:
LOOP .25:
CMP R11,R10 ; Compare with the best saldo so far in R10.
JLE .30:
MOV R12,12000
MOV R10,R11
.30: ; Try encoding 12001 UTF-32BE.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8 ; Restore pointer to text from R8.
MOV RCX,R9 ; Restore size of text from R9.
SHR ECX,2
MOV EAX,[BOM_UTF32BE]
CMP EAX,[RSI]
JNE .35:
ADD R11,Bm
ADD RSI,4
DEC ECX
.35:LODSD
BSWAP EAX
CALL .StoreRelevance:
LOOP .35:
CMP R11,R10 ; Compare with the best saldo so far in R10.
JLE .40:
MOV R12,12001
MOV R10,R11
.40: ; Try encoding 1200 UTF-16LE.
MOV R13B,1 ; Character size=2.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8 ; Restore pointer to text from R8.
MOV RCX,R9 ; Restore size of text from R9.
SHR ECX,1
XOR EAX,EAX
MOV AX,[BOM_UTF16LE]
CMP AX,[RSI]
JNE .45:
ADD R11,Bm
ADD RSI,2
DEC ECX
.45:LODSW
CALL .StoreRelevance:
LOOP .45:
CMP R11,R10 ; Compare with the best saldo so far in R10.
JLE .50:
MOV R12,1200
MOV R10,R11
.50: ; Try encoding 1201 UTF-16BE.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8 ; Restore pointer to text from R8.
MOV RCX,R9 ; Restore size of text from R9.
SHR ECX,1
XOR EAX,EAX
MOV AX,[BOM_UTF16BE]
CMP AX,[RSI]
JNE .55:
ADD R11,Bm
ADD RSI,2
DEC ECX
.55:LODSW
XCHG AL,AH
CALL .StoreRelevance:
LOOP .55:
CMP R11,R10 ; Compare with the best saldo so far in R10.
JLE .60:
MOV R12,1201
MOV R10,R11
.60: ; Try encoding 65001 UTF-8.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8 ; Restore pointer to text from R8.
MOV RCX,R9 ; Restore size of text from R9.
MOV EAX,[BOM_UTF8]
MOV EDX,[RSI]
AND EAX,0x00FFFFFF
AND EDX,0x00FFFFFF
CMP EAX,EDX
JNE .65:
ADD R11,Bm
ADD RSI,3
SUB RCX,3
.65:DecodeUTF8 RSI,.StoreRelevanceUTF8,Size=RCX,Width=32
CMP R11,R10 ; Compare with the best saldo so far in R10.
JLE .70:
MOV R12,65001
MOV R10,R11
.70: ; Try 8bit encoding OEM or WIDE according to CodePages 437..28606.
XOR R13,R13 ; Character size=1.
MOV EDX,[TableWordIndex] ; 0,1,2,,80-6
SHL EDX,8 ; Each Table is 2*128 bytes long.
LEA RBX,[CPtable+RDX] ; Start of the table.
SUB R11,R11 ; R11=relevance saldo.
MOV RSI,R8
MOV RCX,R9
.75:XOR EAX,EAX
LODSB
CMP AL,0x7F
JBE .80:
MOV AX,[RBX+2*RAX-256] ; Translate AL (128..255) to unicode point in AX by this table.
.80:CALL .StoreRelevance: ; Add relevance of unicode point EAX to R11.
LOOP .75: ; The next character from the sample.
CMP R11,R10 ; Compare with the best saldo so far in R10.
MOV EDX,[TableWordIndex]
JLE .85: ; Skip when poor.
MOV R10,R11 ; This is the best so far.
MOVZXW R12,[2*RDX+2*6+CPid:] ; R12W is the best encoding so far.
.85:INC EDX ; Try the next encoding.
MOV [TableWordIndex],EDX
MOV EAX,[CodePagesLength]
SUB EAX,6
CMP EDX,EAX
JB .70: ; If no more supported encodings.
.90:MOV RAX,R12 ; Autodetected encoding is returned in RAX.
RET
.StoreRelevanceUTF8:: PROC ; This subprocedure will add relevance of a character decoded from UTF-8
XOR R13,R13 ; with codepoint RAX to the saldo in R11. Clobbers: RAX,RDI,R13.
CMP EAX,80h
JB .4:
INC R13
CMP EAX,800h
JB .4:
INC R13
.4: ; Continue with .StoreRelevance:
ENDP .StoreRelevanceUTF8:
.StoreRelevance: PROC ; This subprocedure will add relevance of a character
PUSH RCX,RSI ; with codepoint RAX to the saldo in R11. Clobbers: RAX,RDI.
CMP EAX,0x0000_FFFF
MOV RSI,?? ; Negative relevance. ?? EQU -32.
JA .8: ; Characters above BMP will decrease the relevance.
LEA RDI,[+CodePoint]
MOV ECX,[CodePointLength]
LEA RSI,[RDI+2]
REPNE SCASW
JE .6:
MOV RSI,?? ; Deteriorate the relevance when this character is not in our Unicode table.
JMP .8:
.6: SUB RDI,RSI ; Otherwise find the corresponding relevance.
LEA RSI,[Relevance]
SHR RDI,1
MOVSXB RSI,[RSI+RDI]
.8: MOV RCX,R13
SAL RSI,CL
ADD R11,RSI
POP RSI,RCX
CLC
RET
ENDP .StoreRelevance:
ENDP AutodetectEncoding
Procedure SetWeights populates arrays PrimaryWeights, SecondaryWeights, TertiaryWeights
with 32bit sorting weights, taking arguments ArgLocale and flags ArgUpperFirst, ArgDigitFirst into account.
Each DWORD of weights corresponds with one unichar defined as 16bit WORD in the table
UnicodePoints.
The table must be sorted, but it is not continuous.
Weight of unichars not declared in the table virtualy corresponds with their Unicode value
(0x0000_0080..0x0001_FFFF) increased by WeightCatAboveFFFF (0xF000_0000).
Primary, secondary and tertiary weight of each letter in EAX is prepared into EBX,ECX,EDX, respectively.
Primary weight (EBX) is the Unicode value of the corresponing uppercase letter shifted left by 12 bits.
Secondary weight (ECX) is copied from EBX and completed with diacritic signs, if any, in the low 7 bits.
Tertiary weight (EDX) is copied from ECX and completed with WeightAddCapital when the original letter in EAX was capital.
Then those three registers go to the corresponding category .Control, .Digit, .Letter, .Punctuation, where they are tossed into arrays PrimaryWeights, SecondaryWeights, TertiaryWeights.
SetWeights:: PROC
WeightAddCapital EQU 0x0000_0001 ; A capital letter.
WeightDiaAcute EQU 0x0000_0004
WeightDiaGrave EQU 0x0000_0006
WeightDiaTilde EQU 0x0000_0008
WeightDiaCedilla EQU 0x0000_000A
WeightDiaOgonek EQU 0x0000_000C
WeightDiaMacron EQU 0x0000_000E
WeightDiaCaron EQU 0x0000_0010
WeightDiaBreve EQU 0x0000_0012
WeightDiaCircumflex EQU 0x0000_0014
WeightDiaLigature EQU 0x0000_0016
WeightDiaStroke EQU 0x0000_0018
WeightDiaRingAbove EQU 0x0000_001A
WeightDiaDiaeresis EQU 0x0000_001C
WeightDiaDoubleAcute EQU 0x0000_001E
WeightDiaApostrophe EQU 0x0000_0020
WeightDiaDotless EQU 0x0000_0022
WeightDiaDotAbove EQU 0x0000_0024
WeightDiaDotMiddle EQU 0x0000_0026
WeightDiaCommaBelow EQU 0x0000_0028
WeightDiaTonos EQU 0x0000_002C
WeightDiaDialytika EQU 0x0000_002E
WeightDiaDialytTonos EQU 0x0000_0030
WeightDiaHorn EQU 0x0000_0032
WeightDiaHook EQU 0x0000_0034
WeightDiaShort EQU 0x0000_0036
WeightDiaLong EQU 0x0000_0038
WeightDiaUpturn EQU 0x0000_003A
WeightDiaDescender EQU 0x0000_003C
WeightAddPrimary EQU 0x0000_0200 ; 'C','S','R','Z' with macron in the primary view.
WeightAddOne EQU 0x0000_0400 ;
WeightAddTwo EQU 0x0000_0800
WeightAddThree EQU WeightAddOne+WeightAddTwo
; Base capital weight EQU 0x0xxx_x000 ; Original capital character left shifted by 12.
WeightCatControls EQU 0x8000_0000 ; A white-space control character.
WeightCatPunctFirst EQU 0x9000_0000 ; Nonalphanumeric character when /PunctuationFirst=true.
WeightCatDigitFirst EQU 0xB000_0000 ; A digit when /DigitFirst=true.
WeightCatLetters EQU 0xC000_0000 ; A letter.
WeightCatDigitLast EQU 0xD000_0000 ; A digit when /DigitFirst=false.
WeightCatPunctLast EQU 0xE000_0000 ; Nonalphanumeric character when /PunctuationFirst=false.
WeightCatAboveFFFF EQU 0xF000_0000 ; Other characters above 0x0000_FFFF.
XOR EBX,EBX ; The weight will be constructed in EBX,ECX,EDX.
LEA RSI,[CodePoint]
.A1:MOVZXW EAX,[RSI] ; Load the next codepoint, one after another. EBX is partially kept.
CMP AX,0xFFFF ; Detect the end of table.
JE .Z9:
CMP AX,0x0021 ; Toss the codepoints to categories.
JB .Control: ; 0x0000..0x0020.
CMP AX,0x0030
JB .Punctuation: ; 0x0021..0x002F.
CMP AX,0x003A
JB .Digit: ; DIGIT ZERO..NINE 0x0030..0x0039.
CMP AX,0x0041
JB .Punctuation: ; 0x003A..0x0040.
CMP AX,0x005B ; 'Z'?
JAE .C1:
CMPW [ArgLocale::],ArgLocaleLT
JNE .A5:
CMP AL,'Y'
JNE .B3:
MOV EBX,'I'<<12+WeightAddTwo ; >>
MOV ECX,EDX
.A3:LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.A5:CMPW [ArgLocale::],ArgLocaleEE
JNE .B3:
CMP AL,'S'
JA .A7:
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
JMP .A3:
.A7:MOV EBX,'X'<<12 ; >>
CMP AL,'T'
JE .B1:
MOV EBX,'Y'<<12 ; >>
CMP AL,'U'
JE .B1:
MOV EBX,'Z'<<12 ; >>
CMP AL,'V'
JE .B1:
MOV EBX,0x0005_B000
CMP AL,'W'
JE .B1:
MOV EBX,0x0006_0000
CMP AL,'X'
JE .B1:
MOV EBX,0x0006_1000
CMP AL,'Y'
JE .B1:
MOV EBX,0x0005_6000
.B1:MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.B3:MOV EBX,EAX ; LATIN CAPITAL LETTER A..Z 0x0041..0x005A.
SHL EBX,12
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMPW [ArgLocale::],ArgLocaleAZ
JNE .Letter:
CMP AX,0x0051 ; LATIN CAPITAL LETTER Q
JNE .B5:
MOV EBX,'K'<<12+WeightAddOne ; >> 'Q' in Azerbaijan is sorted after 'K'.
JMP .B7:
.B5:CMP AX,0x0058 ; LATIN CAPITAL LETTER X
JNE .Letter:
MOV EBX,'H'<<12+WeightAddOne ; >> 'X' in Azerbaijan is sorted after 'H'.
.B7:MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.C1:CMP AX,0x0061
JB .Punctuation: ; 0x005B..0060.
CMP AX,0x007B
JAE .D7:
CMPW [ArgLocale::],ArgLocaleLT
JNE .C3:
CMP AL,'y'
JNE .D1:
MOV EBX,'I'<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.C3:CMPW [ArgLocale::],ArgLocaleEE
JNE .D1:
CMP AL,'s'
JA .C5:
MOV EBX,EAX
SUB BL,'a'^'A'
SHL EBX,12
JMP .C7:
.C5:MOV EBX,0x0005_8000
CMP AL,'t'
JE .C7:
MOV EBX,0x0005_9000
CMP AL,'u'
JE .C7:
MOV EBX,0x0005_A000
CMP AL,'v'
JE .C7:
MOV EBX,0x0005_B000
CMP AL,'w'
JE .C7:
MOV EBX,0x0006_0000
CMP AL,'x'
JE .C7:
MOV EBX,0x0006_1000
CMP AL,'y'
JE .C7:
MOV EBX,0x0005_6000
.C7:MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.D1:CMP AX,0x007B
JAE .D7:
MOV EBX,EAX ; LATIN SMALL LETTER A..Z 0x0061..0x007A.
XOR BL,'a'^'A' ; Convert to base uppercase letter.
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMPW [ArgLocale::],ArgLocaleAZ
JNE .Letter:
CMP AX,0x0071 ; LATIN SMALL LETTER Q
JNE .D3:
MOV EBX,'K'<<12+WeightAddOne ; >> 'q' in Azerbaijan is sorted after 'k'.
JMP .D5:
.D3:CMP AX,0x0078 ; LATIN SMALL LETTER X
JNE .Letter:
MOV EBX,'H'<<12+WeightAddOne ; >> 'x' in Azerbaijan is sorted after 'h'.
.D5:MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.D7:CMP AX,0x007F
JB .Punctuation: ; 0x007B..0x007E.
CMP AX,0x00A1
JB .Control: ; 0x007F..0x00A0.
CMP AX,0x00C0
JB .Punctuation: ; 0x00A1..0x00BF.
MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C0 ; LATIN CAPITAL LETTER A WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C1 ; LATIN CAPITAL LETTER A WITH ACUTE
JNE .E1:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'A'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.E1:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C2 ; LATIN CAPITAL LETTER A WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaTilde]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C3 ; LATIN CAPITAL LETTER A WITH TILDE
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C4 ; LATIN CAPITAL LETTER A WITH DIAERESIS
JNE .E7:
CMPW [ArgLocale::],ArgLocaleDE
JNE .E3:
MOV EBX,'A'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.E3:CMPW [ArgLocale::],ArgLocaleEE
JNE .E5:
MOV EBX,0x0005_D000
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.E5:CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+4)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.E7:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaRingAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C5 ; LATIN CAPITAL LETTER A WITH RING ABOVE
JNE .F1:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+3)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.F1:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaLigature]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C6 ; LATIN CAPITAL LETTER AE
JNE .F3:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+1)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.F3:MOV EBX,'C'<<12 ; >>
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C7 ; LATIN CAPITAL LETTER C WITH CEDILLA
JNE .F5:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Letter:
MOV EBX,'C'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.F5:MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C8 ; LATIN CAPITAL LETTER E WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00C9 ; LATIN CAPITAL LETTER E WITH ACUTE
JNE .F7:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'E'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter
.F7:MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00CA ; LATIN CAPITAL LETTER E WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00CB ; LATIN CAPITAL LETTER E WITH DIAERESIS
JE .Letter:
MOV EBX,'I'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00CC ; LATIN CAPITAL LETTER I WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00CD ; LATIN CAPITAL LETTER I WITH ACUTE
JNE .G1:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'I'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.G1:MOV EBX,'I'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00CE ; LATIN CAPITAL LETTER I WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00CF ; LATIN CAPITAL LETTER I WITH DIAERESIS
JE .Letter:
MOV EBX,'D'<<12+WeightDiaStroke ; >>
MOV ECX,EDX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D0 ; LATIN CAPITAL LETTER ETH
JE .Letter:
MOV EBX,'N'<<12 ; >>
LEA ECX,[EBX+WeightDiaTilde]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D1 ; LATIN CAPITAL LETTER N WITH TILDE
JNE .G3:
CMPW [ArgLocale::],ArgLocaleES
JNE .Letter:
MOV EBX,'N'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaTilde]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.G3:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D2 ; LATIN CAPITAL LETTER O WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D3 ; LATIN CAPITAL LETTER O WITH ACUTE
JNE .G7:
CMPW [ArgLocale::],ArgLocalePL
JE .G5:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
.G5:MOV EBX,'O'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.G7:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D4 ; LATIN CAPITAL LETTER O WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaTilde]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D5 ; LATIN CAPITAL LETTER O WITH TILDE
JNE .H1:
CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_C000
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.H1:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D6 ; LATIN CAPITAL LETTER O WITH DIAERESIS
JNE .I1:
CMPW [ArgLocale::],ArgLocaleDE
JE .H3:
CMPW [ArgLocale::],ArgLocaleTR
JNE .H5:
.H3:MOV EBX,'O'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.H5:CMPW [ArgLocale::],ArgLocaleEE
JNE .H7:
MOV EBX,0x0005_E000
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.H7:CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+5)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.I1:CMP AX,0x00D7 ; MULTIPLICATION SIGN
JE .Punctuation:
MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D8 ; LATIN CAPITAL LETTER O WITH STROKE
JNE .I3:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+2)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.I3:MOV EBX,'U'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00D9 ; LATIN CAPITAL LETTER U WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00DA ; LATIN CAPITAL LETTER U WITH ACUTE
JNE .I5:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'U'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.I5:MOV EBX,'U'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00DB ; LATIN CAPITAL LETTER U WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00DC ; LATIN CAPITAL LETTER U WITH DIAERESIS
JNE .J3:
CMPW [ArgLocale::],ArgLocaleDE
JE .I7:
CMPW [ArgLocale::],ArgLocaleTR
JNE .J1:
.I7:MOV EBX,'U'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.J1:CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_F000
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.J3:MOV EBX,'Y'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00DD ; LATIN CAPITAL LETTER Y WITH ACUTE
JNE .J5:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'Y'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.J5:MOV EBX,'Z'<<12+WeightAddTwo ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x00DE ; LATIN CAPITAL LETTER THORN
JE .Letter:
MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaLong]
MOV EDX,ECX
CMP AX,0x00DF ; LATIN SMALL LETTER SHARP S
JNE .J7:
CMPW [ArgLocale::],ArgLocaleDE
JNE .Letter:
MOV EBX,'S'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaLong]
MOV EDX,ECX
JMP .Letter:
.J7:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x00E0 ; LATIN SMALL LETTER A WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x00E1 ; LATIN SMALL LETTER A WITH ACUTE
JNE .K1:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'A'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.K1:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x00E2 ; LATIN SMALL LETTER A WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaTilde]
MOV EDX,ECX
CMP AX,0x00E3 ; LATIN SMALL LETTER A WITH TILDE
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x00E4 ; LATIN SMALL LETTER A WITH DIAERESIS
JNE .L1:
CMPW [ArgLocale::],ArgLocaleDE
JNE .K3:
MOV EBX,'A'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
JMP .Letter:
.K3:CMPW [ArgLocale::],ArgLocaleEE
JNE .K7:
MOV EBX,0x0005_D000
.K5:MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.K7:CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+4)<<12 ; >>
JMP .K5:
.L1:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaRingAbove]
MOV EDX,ECX
CMP AX,0x00E5 ; LATIN SMALL LETTER A WITH RING ABOVE
JNE .L3:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+3)<<12 ; >>
JMP .K5:
.L3:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaLigature]
MOV EDX,ECX
CMP AX,0x00E6 ; LATIN SMALL LETTER AE
JNE .L5:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+1)<<12 ; >>
JMP .K5:
.L5:MOV EBX,'C'<<12 ; >>
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x00E7 ; LATIN SMALL LETTER C WITH CEDILLA
JNE .L7:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Letter:
MOV EBX,'C'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
JMP .Letter:
.L7:MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x00E8 ; LATIN SMALL LETTER E WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x00E9 ; LATIN SMALL LETTER E WITH ACUTE
JNE .M1:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'E'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.M1:MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x00EA ; LATIN SMALL LETTER E WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x00EB ; LATIN SMALL LETTER E WITH DIAERESIS
JE .Letter:
MOV EBX,'I'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x00EC ; LATIN SMALL LETTER I WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x00ED ; LATIN SMALL LETTER I WITH ACUTE
JNE .M3:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'I'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.M3:MOV EBX,'I'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x00EE ; LATIN SMALL LETTER I WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x00EF ; LATIN SMALL LETTER I WITH DIAERESIS
JE .Letter:
MOV EBX,'D'<<12+WeightDiaStroke ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x00F0 ; LATIN SMALL LETTER ETH
JE .Letter:
MOV EBX,'N'<<12 ; >>
LEA ECX,[EBX+WeightDiaTilde]
MOV EDX,ECX
CMP AX,0x00F1 ; LATIN SMALL LETTER N WITH TILDE
JNE .M5:
CMPW [ArgLocale::],ArgLocaleES
JNE .Letter:
MOV EBX,'N'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaTilde]
MOV EDX,ECX
JMP .Letter:
.M5:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x00F2 ; LATIN SMALL LETTER O WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x00F3 ; LATIN SMALL LETTER O WITH ACUTE
JNE .N1:
CMPW [ArgLocale::],ArgLocalePL
JE .M7:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
.M7:MOV EBX,'O'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.N1:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x00F4 ; LATIN SMALL LETTER O WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaTilde]
MOV EDX,ECX
CMP AX,0x00F5 ; LATIN SMALL LETTER O WITH TILDE
JNE .N5:
CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_C000
.N3:MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.N5:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x00F6 ; LATIN SMALL LETTER O WITH DIAERESIS
JNE .P5:
CMPW [ArgLocale::],ArgLocaleDE
JE .N7:
CMPW [ArgLocale::],ArgLocaleTR
JNE .P1:
.N7:MOV EBX,'O'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
JMP .Letter:
.P1:CMPW [ArgLocale::],ArgLocaleEE
JNE .P3:
MOV EBX,0x0005_E000
JMP .N3:
.P3:CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+5)<<12 ; >>
JMP .N3:
.P5:CMP AX,0x00F7 ; DIVISION SIGN
JE .Punctuation:
MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
CMP AX,0x00F8 ; LATIN SMALL LETTER O WITH STROKE
JNE .P7:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+2)<<12 ; >>
JMP .N3:
.P7:MOV EBX,'U'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x00F9 ; LATIN SMALL LETTER U WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x00FA ; LATIN SMALL LETTER U WITH ACUTE
JNE .Q1:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'U'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.Q1:MOV EBX,'U'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x00FB ; LATIN SMALL LETTER U WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x00FC ; LATIN SMALL LETTER U WITH DIAERESIS
JNE .Q7:
CMPW [ArgLocale::],ArgLocaleDE
JE .Q3:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Q5:
.Q3:MOV EBX,'U'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
JMP .Letter:
.Q5:CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_F000
JMP .N3:
.Q7:MOV EBX,'Y'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x00FD ; LATIN SMALL LETTER Y WITH ACUTE
JNE .R1:
CMPW [ArgLocale::],ArgLocaleIS
JNE .Letter:
MOV EBX,'Y'<<12+WeightAddTwo
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.R1:MOV EBX,'Z'<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x00FE ; LATIN SMALL LETTER THORN
JE .Letter:
MOV EBX,'Y'<<12 ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x00FF ; LATIN SMALL LETTER Y WITH DIAERESIS
JE .Letter:
MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0100 ; LATIN CAPITAL LETTER A WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x0101 ; LATIN SMALL LETTER A WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaBreve]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0102 ; LATIN CAPITAL LETTER A WITH BREVE
JE .Letter:
LEA ECX,[EBX+WeightDiaBreve]
MOV EDX,ECX
CMP AX,0x0103 ; LATIN SMALL LETTER A WITH BREVE
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0104 ; LATIN CAPITAL LETTER A WITH OGONEK
JNE .R3:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'A'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.R3:MOV EBX,'A'<<12 ; >>
LEA ECX,[EBX+WeightDiaOgonek]
MOV EDX,ECX
CMP AX,0x0105 ; LATIN SMALL LETTER A WITH OGONEK
JNE .R5:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'A'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.R5:MOV EBX,'C'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0106 ; LATIN CAPITAL LETTER C WITH ACUTE
JNE .R7:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'C'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.R7:MOV EBX,'C'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x0107 ; LATIN SMALL LETTER C WITH ACUTE
JNE .S1:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'C'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.S1:MOV EBX,'C'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0108 ; LATIN CAPITAL LETTER C WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x0109 ; LATIN SMALL LETTER C WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x010A ; LATIN CAPITAL LETTER C WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x010B ; LATIN SMALL LETTER C WITH DOT ABOVE
JE .Letter:
MOV EBX,'C'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x010C ; LATIN CAPITAL LETTER C WITH CARON
JE .Letter:
MOV EBX,'C'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x010D ; LATIN SMALL LETTER C WITH CARON
JE .Letter:
MOV EBX,'D'<<12 ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x010E ; LATIN CAPITAL LETTER D WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x010F ; LATIN SMALL LETTER D WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0110 ; LATIN CAPITAL LETTER D WITH STROKE
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
CMP AX,0x0111 ; LATIN SMALL LETTER D WITH STROKE
JE .Letter:
MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0112 ; LATIN CAPITAL LETTER E WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x0113 ; LATIN SMALL LETTER E WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0116 ; LATIN CAPITAL LETTER E WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x0117 ; LATIN SMALL LETTER E WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0118 ; LATIN CAPITAL LETTER E WITH OGONEK
JNE .S3:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'E'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.S3:MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaOgonek]
MOV EDX,ECX
CMP AX,0x0119 ; LATIN SMALL LETTER E WITH OGONEK
JNE .S5:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'E'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaOgonek]
MOV EDX,ECX
JMP .Letter:
.S5:MOV EBX,'E'<<12 ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x011A ; LATIN CAPITAL LETTER E WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x011B ; LATIN SMALL LETTER E WITH CARON
JE .Letter:
MOV EBX,'G'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x011C ; LATIN CAPITAL LETTER G WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x011D ; LATIN SMALL LETTER G WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaBreve]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x011E ; LATIN CAPITAL LETTER G WITH BREVE
JNE .S7:
MOV EBX,'G'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaBreve]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.S7:LEA ECX,[EBX+WeightDiaBreve]
MOV EDX,ECX
CMP AX,0x011F ; LATIN SMALL LETTER G WITH BREVE
JNE .T1:
MOV EBX,'G'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaBreve]
MOV EDX,ECX
JMP .Letter:
.T1:LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0120 ; LATIN CAPITAL LETTER G WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x0121 ; LATIN SMALL LETTER G WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0122 ; LATIN CAPITAL LETTER G WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x0123 ; LATIN SMALL LETTER G WITH CEDILLA
JE .Letter:
MOV EBX,'H'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0124 ; LATIN CAPITAL LETTER H WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x0125 ; LATIN SMALL LETTER H WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0126 ; LATIN CAPITAL LETTER H WITH STROKE
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
CMP AX,0x0127 ; LATIN SMALL LETTER H WITH STROKE
JE .Letter:
MOV EBX,'I'<<12 ; >>
LEA ECX,[EBX+WeightDiaTilde]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0128 ; LATIN CAPITAL LETTER I WITH TILDE
JE .Letter:
LEA ECX,[EBX+WeightDiaTilde]
MOV EDX,ECX
CMP AX,0x0129 ; LATIN SMALL LETTER I WITH TILDE
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x012A ; LATIN CAPITAL LETTER I WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x012B ; LATIN SMALL LETTER I WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x012E ; LATIN CAPITAL LETTER I WITH OGONEK
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
MOV EDX,ECX
CMP AX,0x012F ; LATIN SMALL LETTER I WITH OGONEK
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0130 ; LATIN CAPITAL LETTER I WITH DOT ABOVE
JNE .T3:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Letter:
MOV EBX,'I'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.T3:LEA ECX,[EBX+WeightDiaDotless]
MOV EDX,ECX
CMP AX,0x0131 ; LATIN SMALL LETTER DOTLESS I
JNE .T5:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Letter:
MOV EBX,('I'-1)<<12+WeightAddThree ; >> Sort dotless i before ordinary i in Turkish.
LEA ECX,[EBX+WeightDiaDotless]
MOV EDX,ECX
JMP .Letter:
.T5:MOV EBX,'J'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0134 ; LATIN CAPITAL LETTER J WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x0135 ; LATIN SMALL LETTER J WITH CIRCUMFLEX
JE .Letter:
MOV EBX,'K'<<12 ; >>
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0136 ; LATIN CAPITAL LETTER K WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x0137 ; LATIN SMALL LETTER K WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaShort]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0138 ; LATIN SMALL LETTER KRA
JE .Letter:
MOV EBX,'L'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0139 ; LATIN CAPITAL LETTER L WITH ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x013A ; LATIN SMALL LETTER L WITH ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x013B ; LATIN CAPITAL LETTER L WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x013C ; LATIN SMALL LETTER L WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x013D ; LATIN CAPITAL LETTER L WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x013E ; LATIN SMALL LETTER L WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaDotMiddle]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x013F ; LATIN CAPITAL LETTER L WITH MIDDLE DOT
JE .Letter:
LEA ECX,[EBX+WeightDiaDotMiddle]
MOV EDX,ECX
CMP AX,0x0140 ; LATIN SMALL LETTER L WITH MIDDLE DOT
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0141 ; LATIN CAPITAL LETTER L WITH STROKE
JNE .T7:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'L'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.T7:MOV EBX,'L'<<12 ; >>
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
CMP AX,0x0142 ; LATIN SMALL LETTER L WITH STROKE
JNE .U1:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'L'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
JMP .Letter:
.U1:MOV EBX,'N'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0143 ; LATIN CAPITAL LETTER N WITH ACUTE
JNE .U3:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'N'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.U3:MOV EBX,'N'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x0144 ; LATIN SMALL LETTER N WITH ACUTE
JNE .U5:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'N'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.U5:LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0145 ; LATIN CAPITAL LETTER N WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x0146 ; LATIN SMALL LETTER N WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0147 ; LATIN CAPITAL LETTER N WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x0148 ; LATIN SMALL LETTER N WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaApostrophe]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0149 ; LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x014A ; LATIN CAPITAL LETTER ENG
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
MOV EDX,ECX
CMP AX,0x014B ; LATIN SMALL LETTER ENG
JE .Letter:
MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x014C ; LATIN CAPITAL LETTER O WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x014D ; LATIN SMALL LETTER O WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaDoubleAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0150 ; LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
JNE .U7:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+6)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.U7:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaDoubleAcute]
MOV EDX,ECX
CMP AX,0x0151 ; LATIN SMALL LETTER O WITH DOUBLE ACUTE
JNE .V1:
CALL .Scandinavian?
JNE .Letter:
MOV EBX,('Z'+6)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.V1:MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaLigature]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0152 ; LATIN CAPITAL LIGATURE OE
JE .Letter:
LEA ECX,[EBX+WeightDiaLigature]
MOV EDX,ECX
CMP AX,0x0153 ; LATIN SMALL LIGATURE OE
JE .Letter:
MOV EBX,'R'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0154 ; LATIN CAPITAL LETTER R WITH ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x0155 ; LATIN SMALL LETTER R WITH ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0156 ; LATIN CAPITAL LETTER R WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x0157 ; LATIN SMALL LETTER R WITH CEDILLA
JE .Letter:
MOV EBX,'R'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0158 ; LATIN CAPITAL LETTER R WITH CARON
JE .Letter:
MOV EBX,'R'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x0159 ; LATIN SMALL LETTER R WITH CARON
JE .Letter:
MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x015A ; LATIN CAPITAL LETTER S WITH ACUTE
JNE .V3:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'S'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.V3:LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x015B ; LATIN SMALL LETTER S WITH ACUTE
JNE .V5:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'S'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.V5:LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x015C ; LATIN CAPITAL LETTER S WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x015D ; LATIN SMALL LETTER S WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x015E ; LATIN CAPITAL LETTER S WITH CEDILLA
JNE .V7:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Letter:
MOV EBX,'S'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.V7:LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x015F ; LATIN SMALL LETTER S WITH CEDILLA
JNE .W1:
CMPW [ArgLocale::],ArgLocaleTR
JNE .Letter:
MOV EBX,'S'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
JMP .Letter:
.W1:MOV EBX,'S'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0160 ; LATIN CAPITAL LETTER S WITH CARON
JNE .W3:
;JE .Letter:
CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_5000
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.W3:MOV EBX,'S'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x0161 ; LATIN SMALL LETTER S WITH CARON
JNE .W5:
CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_5000
MOV ECX,EBX
MOV EDX,ECX
JMP .Letter:
.W5:MOV EBX,'T'<<12 ; >>
LEA ECX,[EBX+WeightDiaCedilla]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0162 ; LATIN CAPITAL LETTER T WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCedilla]
MOV EDX,ECX
CMP AX,0x0163 ; LATIN SMALL LETTER T WITH CEDILLA
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0164 ; LATIN CAPITAL LETTER T WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x0165 ; LATIN SMALL LETTER T WITH CARON
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0166 ; LATIN CAPITAL LETTER T WITH STROKE
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
CMP AX,0x0167 ; LATIN SMALL LETTER T WITH STROKE
JE .Letter:
MOV EBX,'U'<<12 ; >>
LEA ECX,[EBX+WeightDiaTilde]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0168 ; LATIN CAPITAL LETTER U WITH TILDE
JE .Letter:
LEA ECX,[EBX+WeightDiaTilde]
MOV EDX,ECX
CMP AX,0x0169 ; LATIN SMALL LETTER U WITH TILDE
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x016A ; LATIN CAPITAL LETTER U WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x016B ; LATIN SMALL LETTER U WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaBreve]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x016C ; LATIN CAPITAL LETTER U WITH BREVE
JE .Letter:
LEA ECX,[EBX+WeightDiaBreve]
MOV EDX,ECX
CMP AX,0x016D ; LATIN SMALL LETTER U WITH BREVE
JE .Letter:
LEA ECX,[EBX+WeightDiaRingAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x016E
JE .Letter: ; LATIN CAPITAL LETTER U WITH RING ABOVE
LEA ECX,[EBX+WeightDiaRingAbove]
MOV EDX,ECX
CMP AX,0x016F ; LATIN SMALL LETTER U WITH RING ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDoubleAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0170 ; LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaDoubleAcute]
MOV EDX,ECX
CMP AX,0x0171 ; LATIN SMALL LETTER U WITH DOUBLE ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0172 ; LATIN CAPITAL LETTER U WITH OGONEK
JE .Letter:
LEA ECX,[EBX+WeightDiaOgonek]
MOV EDX,ECX
CMP AX,0x0173 ; LATIN SMALL LETTER U WITH OGONEK
JE .Letter:
MOV EBX,'W'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0174 ; LATIN CAPITAL LETTER W WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x0175 ; LATIN SMALL LETTER W WITH CIRCUMFLEX
JE .Letter:
MOV EBX,'Y'<<12 ; >>
LEA ECX,[EBX+WeightDiaCircumflex]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0176 ; LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaCircumflex]
MOV EDX,ECX
CMP AX,0x0177 ; LATIN SMALL LETTER Y WITH CIRCUMFLEX
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0178 ; LATIN CAPITAL LETTER Y WITH DIAERESIS
JE .Letter:
MOV EBX,'Z'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0179 ; LATIN CAPITAL LETTER Z WITH ACUTE
JNE .W7:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'Z'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.W7:LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x017A ; LATIN SMALL LETTER Z WITH ACUTE
JNE .X1:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'Z'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
JMP .Letter:
.X1:LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x017B ; LATIN CAPITAL LETTER Z WITH DOT ABOVE
JNE .X3:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'Z'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.X3:LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x017C ; LATIN SMALL LETTER Z WITH DOT ABOVE
JNE .X5:
CMPW [ArgLocale::],ArgLocalePL
JNE .Letter:
MOV EBX,'Z'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
JMP .Letter:
.X5:MOV EBX,'Z'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x017D ; LATIN CAPITAL LETTER Z WITH CARON
JNE .X7:
CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_7000
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.X7:MOV EBX,'Z'<<12+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x017E ; LATIN SMALL LETTER Z WITH CARON
JNE .Y1:
CMPW [ArgLocale::],ArgLocaleEE
JNE .Letter:
MOV EBX,0x0005_7000
MOV ECX,EBX
MOV EDX,EBX
JMP .Letter:
.Y1:MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaLong]
MOV EDX,ECX
CMP AX,0x017F ; LATIN SMALL LETTER LONG S
JE .Letter:
MOV EBX,'E'<<12+WeightAddOne ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x018F ; LATIN CAPITAL LETTER SCHWA
JE .Letter:
MOV EBX,'F'<<12 ; >>
LEA ECX,[EBX+WeightDiaHook]
MOV EDX,ECX
CMP AX,0x0192 ; LATIN SMALL LETTER F WITH HOOK
JE .Letter:
MOV EBX,'O'<<12 ; >>
LEA ECX,[EBX+WeightDiaHorn]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x01A0 ; LATIN CAPITAL LETTER O WITH HORN
JE .Letter:
LEA ECX,[EBX+WeightDiaHorn]
MOV EDX,ECX
CMP AX,0x01A1 ; LATIN SMALL LETTER O WITH HORN
JE .Letter:
MOV EBX,'U'<<12 ; >>
LEA ECX,[EBX+WeightDiaHorn]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x01AF ; LATIN CAPITAL LETTER U WITH HORN
JE .Letter:
LEA ECX,[EBX+WeightDiaHorn]
MOV EDX,ECX
CMP AX,0x01B0 ; LATIN SMALL LETTER U WITH HORN
JE .Letter:
MOV EBX,'D'<<12+WeightAddTwo+WeightAddPrimary ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x01C4 ; LATIN CAPITAL LETTER DZ WITH CARON
JE .Letter:
MOV EBX,'D'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaCaron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x01C5 ; LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
JE .Letter:
MOV EBX,'D'<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaCaron]
MOV EDX,ECX
CMP AX,0x01C6 ; LATIN SMALL LETTER DZ WITH CARON
JE .Letter:
MOV EBX,'D'<<12+WeightAddOne+WeightAddPrimary ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x01F1 ; LATIN CAPITAL LETTER DZ
JE .Letter:
MOV EBX,'D'<<12+WeightAddOne ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x01F2 ; LATIN CAPITAL LETTER D WITH SMALL LETTER Z
JE .Letter:
MOV EBX,'D'<<12+WeightAddOne ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x01F3 ; LATIN SMALL LETTER DZ
JE .Letter:
MOV EBX,'G'<<12 ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x01F5 ; LATIN SMALL LETTER G WITH ACUTE
JE .Letter:
MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaCommaBelow]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0218 ; LATIN CAPITAL LETTER S WITH COMMA BELOW
JE .Letter:
LEA ECX,[EBX+WeightDiaCommaBelow]
MOV EDX,ECX
CMP AX,0x0219 ; LATIN SMALL LETTER S WITH COMMA BELOW
JE .Letter:
MOV EBX,'T'<<12 ; >>
LEA ECX,[EBX+WeightDiaCommaBelow]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x021A ; LATIN CAPITAL LETTER T WITH COMMA BELOW
JE .Letter:
LEA ECX,[EBX+WeightDiaCommaBelow]
MOV EDX,ECX
CMP AX,0x021B ; LATIN SMALL LETTER T WITH COMMA BELOW
JE .Letter:
MOV EBX,'J'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotless]
MOV EDX,ECX
CMP AX,0x0237 ; LATIN SMALL LETTER DOTLESS J
JE .Letter:
MOV EBX,'E'<<12+WeightAddOne ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0259 ; LATIN SMALL LETTER SCHWA
JE .Letter:
MOV EBX,'R'<<12 ; >>
LEA ECX,[EBX+WeightDiaLong]
MOV EDX,ECX
CMP AX,0x027C ; LATIN SMALL LETTER R WITH LONG LEG
JE .Letter:
CMP AX,0x0386
JB .Punctuation: ; 0x027D..0x0385.
MOV EBX,0x0391<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0386 ; GREEK CAPITAL LETTER ALPHA WITH TONOS
JE .Letter:
CMP AX,0x0387 ; GREEK ANO TELEIA
JE .Punctuation:
MOV EBX,0x0395<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0388 ; GREEK CAPITAL LETTER EPSILON WITH TONOS
JE .Letter:
MOV EBX,0x0397<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0389 ; GREEK CAPITAL LETTER ETA WITH TONOS
JE .Letter:
MOV EBX,0x0399<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x038A ; GREEK CAPITAL LETTER IOTA WITH TONOS
JE .Letter:
MOV EBX,0x039F<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x038C ; GREEK CAPITAL LETTER OMICRON WITH TONOS
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x038E ; GREEK CAPITAL LETTER UPSILON WITH TONOS
JE .Letter:
MOV EBX,0x03A9<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x038F ; GREEK CAPITAL LETTER OMEGA WITH TONOS
JE .Letter:
MOV EBX,0x0399<<12 ; >>
LEA ECX,[EBX+WeightDiaDialytTonos]
MOV EDX,ECX
CMP AX,0x0390 ; GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
JE .Letter:
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03AA
JB .Letter: ; GREEK CAPITAL LETTER ALPHA..OMEGA 0x0391..0x03AB.
MOV EBX,0x0399<<12 ; >>
LEA ECX,[EBX+WeightDiaDialytika]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03AA ; GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaDialytika]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03AB ; GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
JE .Letter:
MOV EBX,0x0391<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03AC ; GREEK SMALL LETTER ALPHA WITH TONOS
JE .Letter:
MOV EBX,0x0395<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03AD ; GREEK SMALL LETTER EPSILON WITH TONOS
JE .Letter:
MOV EBX,0x0397<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03AE ; GREEK SMALL LETTER ETA WITH TONOS
JE .Letter:
MOV EBX,0x0399<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03AF ; GREEK SMALL LETTER IOTA WITH TONOS
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaDialytTonos]
MOV EDX,ECX
CMP AX,0x03B0 ; GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
JE .Letter:
MOV EBX,EAX
SUB BX,0x0020 ; Convert Greek small letters to capital.
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03CA
JB .Letter: ; GREEK SMALL LETTER ALPHA..OMEGA 0x03B1..0x03C9.
MOV EBX,0x0399<<12 ; >>
LEA ECX,[EBX+WeightDiaDialytika]
MOV EDX,ECX
CMP AX,0x03CA ; GREEK SMALL LETTER IOTA WITH DIALYTIKA
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaDialytika]
MOV EDX,ECX
CMP AX,0x03CB ; GREEK SMALL LETTER UPSILON WITH DIALYTIKA
JE .Letter:
MOV EBX,0x039F<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03CC ; GREEK SMALL LETTER OMICRON WITH TONOS
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03CD ; GREEK SMALL LETTER UPSILON WITH TONOS
JE .Letter:
MOV EBX,0x03A9<<12 ; >>
LEA ECX,[EBX+WeightDiaTonos]
MOV EDX,ECX
CMP AX,0x03CE ; GREEK SMALL LETTER OMEGA WITH TONOS
JE .Letter:
MOV EBX,(0x03A9+10)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03CF ; GREEK CAPITAL KAI SYMBOL
JE .Letter:
MOV EBX,0x0392<<12 ; >>
LEA ECX,[EBX+WeightDiaShort]
MOV EDX,ECX
CMP AX,0x03D0 ; GREEK BETA SYMBOL
JE .Letter:
MOV EBX,0x0398<<12 ; >>
LEA ECX,[EBX+WeightDiaShort]
MOV EDX,ECX
CMP AX,0x03D1 ; GREEK THETA SYMBOL
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaHook]
MOV EDX,ECX
CMP AX,0x03D2 ; GREEK UPSILON WITH HOOK SYMBOL
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaShort]
LEA EDX,[ECX+WeightDiaAcute]
CMP AX,0x03D3 ; GREEK UPSILON WITH ACUTE AND HOOK SYMBOL
JE .Letter:
MOV EBX,0x03A5<<12 ; >>
LEA ECX,[EBX+WeightDiaShort]
LEA EDX,[ECX+WeightDiaDiaeresis]
CMP AX,0x03D4 ; GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL
JE .Letter:
MOV EBX,0x03A6<<12 ; >>
LEA ECX,[EBX+WeightAddTwo]
MOV EDX,ECX
CMP AX,0x03D5 ; GREEK PHI SYMBOL
JE .Letter:
MOV EBX,0x03A0<<12 ; >>
LEA ECX,[EBX+WeightAddTwo]
MOV EDX,ECX
CMP AX,0x03D6 ; GREEK PI SYMBOL
JE .Letter:
MOV EBX,(0x03A9+10)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03D7 ; GREEK KAI SYMBOL
JE .Letter:
MOV EBX,(0x03A9+15)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03D8 ; GREEK LETTER QOPPA
JE .Letter:
MOV EBX,(0x03A9+15)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03D9 ; GREEK SMALL LETTER QOPPA
JE .Letter:
MOV EBX,(0x03A9+20)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03DA ; GREEK LETTER STIGMA
JE .Letter:
MOV EBX,(0x03A9+20)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03DB ; GREEK SMALL LETTER STIGMA
JE .Letter:
MOV EBX,(0x03A9+25)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03DC ; GREEK CAPITAL LETTER DIGAMMA
JE .Letter:
MOV EBX,(0x03A9+25)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03DD ; GREEK SMALL LETTER DIGAMMA
JE .Letter:
MOV EBX,(0x03A9+30)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03DE ; GREEK LETTER KOPPA
JE .Letter:
MOV EBX,(0x03A9+30)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03DF ; GREEK SMALL LETTER KOPPA
JE .Letter:
MOV EBX,(0x03A9+35)<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x03E0 ; GREEK LETTER SAMPI
JE .Letter:
MOV EBX,(0x03A9+35)<<12 ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x03E1 ; GREEK SMALL LETTER SAMPI
JE .Letter:
MOV EBX,0x0415<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0401 ; CYRILLIC CAPITAL LETTER IO
JE .Letter:
MOV EBX,0x0414<<12+WeightAddTwo ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0402 ; CYRILLIC CAPITAL LETTER DJE
JE .Letter:
MOV EBX,0x0414<<12+WeightAddThree ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0403 ; CYRILLIC CAPITAL LETTER GJE
JE .Letter:
MOV EBX,0x0415<<12+WeightAddTwo ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0404 ; CYRILLIC CAPITAL LETTER UKRAINIAN IE
JE .Letter:
MOV EBX,0x0417<<12+WeightAddTwo ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0405 ; CYRILLIC CAPITAL LETTER DZE
JE .Letter:
MOV EBX,0x0418<<12+WeightAddOne ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0406 ; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
JE .Letter:
MOV EBX,0x0418<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0407 ; CYRILLIC CAPITAL LETTER YI
JE .Letter:
MOV EBX,0x0418<<12+WeightAddThree ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0408 ; CYRILLIC CAPITAL LETTER JE
JE .Letter:
MOV EBX,0x041B<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0409 ; CYRILLIC CAPITAL LETTER LJE
JE .Letter:
MOV EBX,0x041D<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x040A ; CYRILLIC CAPITAL LETTER NJE
JE .Letter:
MOV EBX,0x0422<<12+WeightAddOne ; >>
MOV ECX,EBX
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x040B ; CYRILLIC CAPITAL LETTER TSHE
JE .Letter:
MOV EBX,0x0422<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x040C ; CYRILLIC CAPITAL LETTER KJE
JE .Letter:
MOV EBX,0x0418<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x040D ; CYRILLIC CAPITAL LETTER I WITH GRAVE
JE .Letter:
MOV EBX,0x0423<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaShort]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x040E ; CYRILLIC CAPITAL LETTER SHORT U
JE .Letter:
MOV EBX,0x0427<<12+WeightAddTwo ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x040F ; CYRILLIC CAPITAL LETTER DZHE
JE .Letter:
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0430
JB .Letter: ; CYRILLIC CAPITAL LETTER A..YA 0x0410..0x042F
MOV EBX,EAX
SUB BX,0x0020 ; Convert regular cyrillic small letters to capital.
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0450
JB .Letter: ; CYRILLIC SMALL LETTER A..YA 0x0430..0x044F
MOV EBX,0x0415<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x0451 ; CYRILLIC SMALL LETTER IO
JE .Letter:
MOV EBX,0x0414<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0452 ; CYRILLIC SMALL LETTER DJE
JE .Letter:
MOV EBX,0x0414<<12+WeightAddThree ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x0453 ; CYRILLIC SMALL LETTER GJE
JE .Letter:
MOV EBX,0x0415<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0454 ; CYRILLIC SMALL LETTER UKRAINIAN IE
JE .Letter:
MOV EBX,0x0417<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0455 ; CYRILLIC SMALL LETTER DZE
JE .Letter:
MOV EBX,0x0418<<12+WeightAddOne ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0456 ; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
JE .Letter:
MOV EBX,0x0418<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x0457 ; CYRILLIC SMALL LETTER YI
JE .Letter:
MOV EBX,0x0418<<12+WeightAddThree ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0458 ; CYRILLIC SMALL LETTER JE
JE .Letter:
MOV EBX,0x041B<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0459 ; CYRILLIC SMALL LETTER LJE
JE .Letter:
MOV EBX,0x041D<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x045A ; CYRILLIC SMALL LETTER NJE
JE .Letter:
MOV EBX,0x0422<<12+WeightAddOne ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x045B ; CYRILLIC SMALL LETTER TSHE
JE .Letter:
MOV EBX,0x0422<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x045C ; CYRILLIC SMALL LETTER KJE
JE .Letter:
MOV EBX,0x0418<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x045D ; CYRILLIC SMALL LETTER I WITH GRAVE
JE .Letter:
MOV EBX,0x0423<<12+WeightAddTwo ; >>
LEA ECX,[EBX+WeightDiaShort]
MOV EDX,ECX
CMP AX,0x045E ; CYRILLIC SMALL LETTER SHORT U
JE .Letter:
MOV EBX,0x0427<<12+WeightAddTwo ; >>
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x045F ; CYRILLIC SMALL LETTER DZHE
JE .Letter:
MOV EBX,0x0413<<12 ; >>
LEA ECX,[EBX+WeightDiaUpturn]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0490 ; CYRILLIC CAPITAL LETTER GHE WITH UPTURN
JE .Letter:
LEA ECX,[EBX+WeightDiaUpturn]
MOV EDX,ECX
CMP AX,0x0491 ; CYRILLIC SMALL LETTER GHE WITH UPTURN
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x0492 ; CYRILLIC CAPITAL LETTER GHE WITH STROKE
JE .Letter:
LEA ECX,[EBX+WeightDiaStroke]
MOV EDX,ECX
CMP AX,0x0493 ; CYRILLIC SMALL LETTER GHE WITH STROKE
JE .Letter:
MOV EBX,0x041A<<12 ; >>
LEA ECX,[EBX+WeightDiaDescender]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x049A ; CYRILLIC CAPITAL LETTER KA WITH DESCENDER
JE .Letter:
LEA ECX,[EBX+WeightDiaDescender]
MOV EDX,ECX
CMP AX,0x049B ; CYRILLIC SMALL LETTER KA WITH DESCENDER
JE .Letter:
MOV EBX,0x0425<<12 ; >>
LEA ECX,[EBX+WeightDiaDescender]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x04B2 ; CYRILLIC CAPITAL LETTER HA WITH DESCENDER
JE .Letter:
LEA ECX,[EBX+WeightDiaDescender]
MOV EDX,ECX
CMP AX,0x04B3 ; CYRILLIC SMALL LETTER HA WITH DESCENDER
JE .Letter:
MOV EBX,0x0427<<12 ; >>
LEA ECX,[EBX+WeightDiaDescender]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x04B6 ; CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
JE .Letter:
LEA ECX,[EBX+WeightDiaDescender]
MOV EDX,ECX
CMP AX,0x04B7 ; CYRILLIC SMALL LETTER CHE WITH DESCENDER
JE .Letter:
MOV EBX,0x0417<<12 ; >>
MOV ECX,EBX
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x04E0 ; CYRILLIC SMALL LETTER ABKHASIAN DZE
JE .Letter:
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x04E1 ; CYRILLIC SMALL LETTER ABKHASIAN DZE
JE .Letter:
MOV EBX,0x0419<<12 ; >>
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x04E2 ; CYRILLIC CAPITAL LETTER I WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x04E3 ; CYRILLIC SMALL LETTER I WITH MACRON
JE .Letter:
MOV EBX,0x0423<<12 ; >>
LEA ECX,[EBX+WeightDiaMacron]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x04EE ; CYRILLIC CAPITAL LETTER U WITH MACRON
JE .Letter:
LEA ECX,[EBX+WeightDiaMacron]
MOV EDX,ECX
CMP AX,0x04EF ; CYRILLIC SMALL LETTER U WITH MACRON
JE .Letter:
CMP AX,0x05D0
JB .Punctuation: ; 0x04F0..0x05CF.
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
MOV EDX,EBX
CMP AX,0x05F0
JB .Letter: ; HEBREW LETTER ALEF..TAV 0x05D0..0x05EA.
CMP AX,0x0621
JB .Punctuation: ; 0x05F0..0x0620.
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0660
JB .Letter: ; ARABIC LETTER HAMZA..YEH 0x0621..0x064A.
CMP AX,0x066A
JB .Digit: ; ARABIC-INDIC DIGIT ZERO..NINE 0x0660..0x0669.
CMP AX,0x066A
JE .Punctuation: ; 0x066A..0x0678.
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x06F0
JB .Letter: ; ARABIC LETTER TTEH..AE 0x0679..0x06D5.
CMP AX,0x06FA
JB .Digit: ; EXTENDED ARABIC-INDIC DIGIT ZERO..NINE 0x06F0..0x06F9.
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x0E50
JB .Letter: ; THAI CHARACTER KO KAI..FONGMAN 0x06FA..0x0E4F.
CMP AX,0x0E5A
JB .Digit: ; THAI DIGIT ZERO..NINE 0x0E50..0x0E59.
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0x1E02
JB .Letter: ; CANADIAN SYLLABICS I..NNGAA 0x0E5A..0x1E01.
MOV EBX,'B'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E02 ; LATIN CAPITAL LETTER B WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E03 ; LATIN SMALL LETTER B WITH DOT ABOVE
JE .Letter:
MOV EBX,'D'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E0A ; LATIN CAPITAL LETTER D WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E0B ; LATIN SMALL LETTER D WITH DOT ABOVE
JE .Letter:
MOV EBX,'F'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E1E ; LATIN CAPITAL LETTER F WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E1F ; LATIN SMALL LETTER F WITH DOT ABOVE
JE .Letter:
MOV EBX,'M'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E40 ; LATIN CAPITAL LETTER M WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E41 ; LATIN SMALL LETTER M WITH DOT ABOVE
JE .Letter:
MOV EBX,'P'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E56 ; LATIN CAPITAL LETTER P WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E57 ; LATIN SMALL LETTER P WITH DOT ABOVE
JE .Letter:
MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E60 ; LATIN CAPITAL LETTER S WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E61 ; LATIN SMALL LETTER S WITH DOT ABOVE
JE .Letter:
MOV EBX,'T'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E6A ; LATIN CAPITAL LETTER T WITH DOT ABOVE
JE .Letter:
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E6B ; LATIN SMALL LETTER T WITH DOT ABOVE
JE .Letter:
MOV EBX,'W'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E80 ; LATIN CAPITAL LETTER W WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x1E81 ; LATIN SMALL LETTER W WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E82 ; LATIN CAPITAL LETTER W WITH ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaAcute]
MOV EDX,ECX
CMP AX,0x1E83 ; LATIN SMALL LETTER W WITH ACUTE
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E84 ; LATIN CAPITAL LETTER W WITH DIAERESIS
JE .Letter:
LEA ECX,[EBX+WeightDiaDiaeresis]
MOV EDX,ECX
CMP AX,0x1E85 ; LATIN SMALL LETTER W WITH DIAERESIS
JE .Letter:
MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaDotAbove]
MOV EDX,ECX
CMP AX,0x1E9B ; LATIN SMALL LETTER LONG S WITH DOT ABOVE
JE .Letter:
MOV EBX,'S'<<12 ; >>
LEA ECX,[EBX+WeightDiaLong]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1E9E ; LATIN CAPITAL LETTER SHARP S
JNE .Y3:
CMPW [ArgLocale::],ArgLocaleDE
JNE .Letter:
MOV EBX,'S'<<12+WeightAddOne ; >>
LEA ECX,[EBX+WeightDiaLong]
LEA EDX,[ECX+WeightAddCapital]
JMP .Letter:
.Y3:MOV EBX,'Y'<<12 ; >>
LEA ECX,[EBX+WeightDiaGrave]
LEA EDX,[ECX+WeightAddCapital]
CMP AX,0x1EF2 ; LATIN CAPITAL LETTER Y WITH GRAVE
JE .Letter:
LEA ECX,[EBX+WeightDiaGrave]
MOV EDX,ECX
CMP AX,0x1EF3 ; LATIN SMALL LETTER Y WITH GRAVE
JE .Letter:
CMP AX,0xF8FF
JB .Punctuation: ; 0x1EF4..0xF8FF.
MOV EBX,EAX
SHL EBX,12
MOV ECX,EBX
MOV EDX,ECX
CMP AX,0xFEFD
JB .Letter: ; ARABIC LETTER 0xF900..0xFEFC.
; JMP .Control: ; 0xFEFD..0xFFFF.
.Control:
LEA EDX,[EAX+WeightCatControls]
JMP .Z1:
.Punctuation:
MOV ECX,WeightCatPunctFirst
JSt [Status::],ArgPunctuationFirst,.Y5:
MOV ECX,WeightCatPunctLast
.Y5:LEA EDX,[EAX+ECX]
JMP .Z1:
.Digit:
MOV ECX,WeightCatDigitFirst
JSt [Status::],ArgDigitFirst,.Y7:
MOV ECX,WeightCatDigitLast
.Y7:LEA EDX,[EAX+ECX]
.Z1:MOV ECX,EDX ; Primary, secondary and tertiary weights are the same.
MOV EBX,EDX
JMP .Z5:
.Letter:
; Expected registers at the entry .Letter:
; EAX is the original codepoint 0x0000_xxxx.
; RSI is the position of the letter in UnicodePoints table.
; EBX (primary weight) is the base letter bbbb (upper case without diacritics) shift left by 12 (0x0bbb_b000).
; ECX (secondary weight) is EBX plus weight of diacritics dd (0x0bbb_bddd).
; EDX (tertiary weight) is ECX plus WeightAddCapital c if CAPITAL LETTER (0x0bbb_bddc).
OR EBX,WeightCatLetters ; Complete the weight with its category g (0xgbbb_bcdd).
OR ECX,WeightCatLetters
OR EDX,WeightCatLetters
JNSt [Status::],ArgUpperFirst, .Z5:
XOR EDX,WeightAddCapital ; Exchange capital and small letters.
.Z5:MOV RDI,RSI
LEA RAX,[CodePoint] ; Store weights RBX,RCX,RDX to the position RSI corresponding with codepoint value.
SUB RDI,RAX
LEA RAX,[PrimaryWeights]
MOV [RAX+2*RDI],EBX
LEA RAX,[SecondaryWeights]
MOV [RAX+2*RDI],ECX
LEA RAX,[TertiaryWeights]
MOV [RAX+2*RDI],EDX
AND EBX,0x0FFF_F000 ; Keep bits 12..27 of EBX from the previous run
XOR ECX,ECX ; and return ECX=EDX=0.
XOR EDX,EDX
ADD RSI,2 ; Go to load the next Unicode character.
JMP .A1:
.Z9:RET
.Scandinavian? PROC ; Returns ZF=1 when /Locale=DK|FI|NO|IS|SE.
CMPW [ArgLocale::],ArgLocaleDK
JE .9:
CMPW [ArgLocale::],ArgLocaleFI
JE .9:
CMPW [ArgLocale::],ArgLocaleNO
JE .9:
CMPW [ArgLocale::],ArgLocaleIS
JE .9:
CMPW [ArgLocale::],ArgLocaleSE
.9:RET
ENDP .Scandinavian?
ENDP SetWeights
HeaderAndFooter:: PROC
; Restrict the input by ArgHeaderSize and ArgFooterSize.
MOV EAX,[ArgHeaderSize::]
ADD RAX,[HeaderInMemPtr]
MOV [HeaderInMemEnd],RAX
MOV RAX,[FooterInMemEnd]
MOV ECX,[ArgFooterSize::]
SUB RAX,RCX
MOV [FooterInMemPtr],RAX
CMP RAX,[HeaderInMemEnd]
JC .90: ; Return CF if Footer is below Header.
; Restrict the input by ArgHeaderLength.
MOV EAX,10 ; LineFeed is a terminating character of records with variable size.
MOV RDI,[HeaderInMemEnd]
MOV RCX,[FooterInMemPtr]
SUB RCX,RDI
MOV EDX,[Encoding]
MOV EBX,[ArgHeaderLength::]
TEST EBX
JZ .40:
; EAX=10, EBX=header lines, EDX=encoding, RCX=file size, RDI=^file in memory.
Dispatch DX,1200,1201,12000,12001 ; UTF-16 and UTF-32 encodings.
.10: REPNE SCASB ; All other encodings keep the byte 10 as LineFeed.
JNE .35:
DEC EBX ; Header length.
JNZ .10:
JMP .35:
.1200:SHR ECX,1 ; UTF-16LE.
.15: REPNE SCASW
JNE .35:
DEC EBX ; Header length.
JNZ .15:
JMP .35:
.1201:SHR ECX,1 ; UTF-16BE.
XCHG AL,AH
.20: REPNE SCASW
JNE .35:
DEC EBX ; Header length.
JNZ .20:
JMP .35:
.12000:SHR ECX,2 ; UTF-32LE.
.25: REPNE SCASD
JNE .35:
DEC EBX ; Header length.
JNZ .25:
JMP .35:
.12001:SHR ECX,2 ; UTF-32BE.
BSWAP EAX
.30: REPNE SCASD
JNE .35:
DEC EBX ; Header length.
JNZ .30:
.35: MOV [HeaderInMemEnd],RDI
.40: ; Restrict the input by ArgFooterLength.
MOV EAX,10 ; LineFeed is a terminating character of records with variable size.
MOV RSI,[HeaderInMemEnd]
MOV RCX,[FooterInMemPtr]
MOV RDI,RCX
SUB RCX,RSI
MOV EBX,[ArgFooterLength::]
TEST EBX
JZ .90:
INC EBX
; EAX=10,EBX=footer lines, RCX=file size, RDI=^end of file in memory.
STD
Dispatch DX,1200d,1201d,12000d,12001d
DEC RDI ; All other encodings keep the byte 10 as LineFeed.
.45: REPNE SCASB
JNE .80:
DEC EBX ; Footer length.
JNZ .45:
LEA RDI,[RDI+2]
JMP .80:
.1200d:SHR ECX,1 ; UTF-16LE.
LEA RDI,[RDI-2]
.50: REPNE SCASW
JNE .80:
DEC EBX ; Footer length.
JNZ .50:
LEA RDI,[RDI+2+2]
JMP .80:
.1201d:SHR ECX,1 ; UTF-16BE.
LEA RDI,[RDI-2]
XCHG AL,AH
.55: REPNE SCASW
JNE .80:
DEC EBX ; Footer length.
JNZ .55:
LEA RDI,[RDI+4]
JMP .80:
.12000d:SHR ECX,2 ; UTF-32LE.
LEA RDI,[RDI-4]
.60: REPNE SCASD
JNE .80:
DEC EBX ; Footer length.
JNZ .60:
LEA RDI,[RDI+4+4]
JMP .80:
.12001d:SHR ECX,2 ; UTF-32BE.
LEA RDI,[RDI-4]
BSWAP EAX
.70: REPNE SCASD
JNE .80:
DEC EBX ; Footer length.
JNZ .70:
LEA RDI,[RDI+4+4]
.80: CLD
MOV [FooterInMemPtr],RDI
CMP RDI,[HeaderInMemEnd] ; Return CF if Footer is below Header.
.90: RET
ENDP HeaderAndFooter
.index.
CreateIndex:: PROC
LEA RBX,[File_index]
FileAssign RBX,FileOutput.Name,Ext_index,Unicode=yes
JC ErrorWriteFile
FileCreate RBX
JC ErrorWriteFile:
MOV R12,[HeaderInMemEnd]
MOV R13,[FooterInMemPtr] ; R12..R13 specifies the netto file size to sort.
MOV EAX,[ArgRecordSize::]
TEST EAX
JZ .20: ; Zero size means variable record size.
MOV ESI,EAX ; Create index of records with fixed size ESI.
MOV R15,RSI
LEA RDX,[Number$]
StoD RDX
XOR EAX,EAX
STOSB
LEA RDX,[SortIndex] ; Working space.
; R12..R13 specify the netto file to sort. RBX=^File_index. RDX=^SORT_INDEX
XOR EDI,EDI
MOV [RDX+SORT_INDEX.Size],ESI
.10:MOV [RDX+SORT_INDEX.Ptr],EDI
FileWrite RBX,RDX,SIZE# SORT_INDEX
JC ErrorWriteFile:
ADD EDI,ESI ; Add record size.
ADD R12,RSI
CMP R12,R13
JB .10:
JMP .90: ; Done with records of fixed size.
.20:LEA RDX,[SortIndex] ; Create index of records with variable size.
; R12..R13 specifies the netto file contents. RBX=^File_index. RDX=^SORT_INDEX
MOV RCX,R13
SUB RCX,R12
XOR ESI,ESI
XOR R15,R15 ; MaxRecordSize.
MOV EAX,[Encoding]
Dispatch AX,1200,1201,12000,12001
.30:MOV RDI,R12 ; Create index for UTF-8 and all 8bit encodings.
ADD RDI,RSI ; Start at previous offset from R12.
MOV RCX,R13
SUB RCX,RDI
JNA .90:
MOV AL,10 ; Line Feed marks the end of record.
REPNE SCASB
MOV [RDX+SORT_INDEX.Ptr],ESI
SUB RDI,R12
SUB EDI,ESI
MOV [RDX+SORT_INDEX.Size],EDI
CMP RDI,R15
JB .40:
MOV R15,RDI
.40:ADD ESI,EDI
FileWrite RBX,RDX,SIZE# SORT_INDEX
JC ErrorWriteFile
JMP .30:
.1200: ; Create index for UTF-16LE encoding.
MOV RDI,R12
ADD RDI,RSI
MOV RCX,R13
SUB RCX,RDI
JNA .90:
SHR ECX,1
JZ .90:
MOV AX,0x000A ; Line Feed marks the end of record.
REPNE SCASW
MOV [RDX+SORT_INDEX.Ptr],ESI
SUB RDI,R12
SUB EDI,ESI
MOV [RDX+SORT_INDEX.Size],EDI
CMP RDI,R15
JB .50:
MOV R15,RDI
.50:ADD ESI,EDI
FileWrite RBX,RDX,SIZE# SORT_INDEX
JC ErrorWriteFile:
JMP .1200:
.1201: ; Create index for UTF-16BE encoding.
MOV RDI,R12
ADD RDI,RSI
MOV RCX,R13
SUB RCX,RDI
JNA .90:
SHR ECX,1
JZ .90:
MOV AX,0x0A00 ; Line Feed marks the end of record.
REPNE SCASW
MOV [RDX+SORT_INDEX.Ptr],ESI
SUB RDI,R12
SUB EDI,ESI
MOV [RDX+SORT_INDEX.Size],EDI
CMP RDI,R15
JB .60:
MOV R15,RDI
.60:ADD ESI,EDI
FileWrite RBX,RDX,SIZE# SORT_INDEX
JC ErrorWriteFile:
JMP .1201:
.12000: ; Create index for UTF-32LE encoding.
MOV RDI,R12
ADD RDI,RSI
MOV RCX,R13
SUB RCX,RDI
JNA .90:
SHR ECX,2
JZ .90:
MOV EAX,0x0000_0010 ; Line Feed marks the end of record.
REPNE SCASD
MOV [RDX+SORT_INDEX.Ptr],ESI
SUB RDI,R12
SUB EDI,ESI
MOV [RDX+SORT_INDEX.Size],EDI
CMP RDI,R15
JB .70:
MOV R15,RDI
.70:ADD ESI,EDI
FileWrite RBX,RDX,SIZE# SORT_INDEX
JC ErrorWriteFile:
JMP .12000:
.12001: ; Create index for UTF-32BE encoding.
MOV RDI,R12
ADD RDI,RSI
MOV RCX,R13
SUB RCX,RDI
JNA .90:
SHR ECX,2
JRCXZ .90:
MOV EAX,0x1000_0000 ; Line Feed marks the end of record.
REPNE SCASD
MOV [RDX+SORT_INDEX.Ptr],ESI
SUB RDI,R12
SUB EDI,ESI
MOV [RDX+SORT_INDEX.Size],EDI
CMP RDI,R15
JB .80:
MOV R15,RDI
.80:ADD ESI,EDI
FileWrite RBX,RDX,SIZE# SORT_INDEX
JC ErrorWriteFile:
JMP .12001:
.90:FileClose RBX
MOV [MaxRecordSize],R15D
RET
ENDP CreateIndex
Convert the input string from [Encoding] to UTF-32LE characters stored to the output dynamically allocated memory RecordPtrA or RecordPtrB.
ConvertEncoding:: PROC
MOV EAX,[Encoding]
Dispatch AX,20127,65001,1200,1201,12000,12001 ; Dispatch to ASCII and to UTF encodings.
; Other 8bit encodings need to translate by the [TranslationTable].
MOV RCX,[TranslationTable]
.10:CMP RSI,RDX
JNB .90:
MOVZXB EAX,[RSI]
CMP AL,0x80
JB .15:
MOVZXW EAX,[RCX+2*RAX-256]
.15:STOSD
INC RSI
JMP .10:
.20127: ; ASCII
XOR EAX,EAX
.20:CMP RSI,RDX
JNB .90:
LODSB
STOSD
JMP .20:
.12000: ; UTF-32LE
.12001: ; UTF-32BE
CMP RSI,RDX
JNB .90:
LODSD
JNSt [Status::],ArgBigEndian,.30:
BSWAP EAX
.30:STOSD
JMP .12000:
.1200: ; UTF-16LE
.1201: ; UTF-16BE
CMP RSI,RDX
JNB .90:
XOR EAX,EAX
LODSW
JNSt [Status::],ArgBigEndian,.40:
XCHG AL,AH
.40:CMP AX,0xDC00
JB .70:
CMP AX,0xE000
JAE .70:
; It is a surrogate.
MOV ECX,EAX ; First (high) surrogate expected.
SUB CX,0xD800
TEST CH,0xFC
JNZ .70: ; If not a valid surrogate, save it as is.
SHL ECX,10
LODSW ; Second (low) surrogate expected.
JNSt [Status::],ArgBigEndian,.50:
XCHG AL,AH
.50:SUB AX,0xDC00
TEST AH,0xFC
JNZ .80: ; Wrong character, rollback.
OR EAX,ECX ; Both surrogates are correct.
ADD EAX,0x0001_0000
.70:STOSD
JMP .1200:
.80:SUB RSI,2 ; Rollback the LODSW instruction.
JMP .1200:
.65001: ; UTF-8
MOV RCX,RDX
SUB RCX,RSI
DecodeUTF8 RSI,.StoreDword,Size=RCX,Width=32
.90:RET
.StoreDword PROC
STOSD
RET
ENDP .StoreDword
ENDP ConvertEncoding
Find the sorting weights for the characters between RSI..RDX and store them to a memory at RDI.
The string RSI..RDX is a subkey, i. e. the input record, possibly decreased by /KeyOffset and /KeyLength.
Weigh:: PROC
LEA R12,[RDI+4]
XOR R13,R13
.10:CMP RSI,RDX ; Test the end of subkey.
JNB .90:
LODSD
SHL R13,32 ; R13 bits 32..63 is previous character.
OR R13,RAX ; R13 bits 0..45 is this character.
CMP EAX,0x0000_FFFF ; Test on characters outside BMP.
JA .80:
CMP AX,0x0020 ; Test on white spaces.
JA .20:
JNSt [Status::],ArgMergeSpaces,.20:
MOV RCX,R13
SHR RCX,32
CMP CX,0x0020 ; Was the previous character white, too?
JBE .10: ; If yes, ignore this character (white spaces will merge).
.20:MOV R8,RDI ; Temporary save RDI.
LEA RDI,[CodePoint] ; Array of WORD codepoints
MOV ECX,(CodePointEnd-CodePoint)/2 ; and its length.
LEA R9,[RDI+2] ; Start of codepoint array+2.
REPNE SCASW ; Find the codepoint.
SUB RDI,R9 ; RDI is now the offset in 16bit UnicodePoints table.
MOV EAX,[RBX+2*RDI] ; Address of the corresponding weight in the 32bit Weights table.
MOV RDI,R8 ; Restore RDI at the output memory.
STOSD ; Store the weight.
MOV ECX,[ArgLocale::] ; Modify the stored weight by locale?
JRCXZ .10:
CMP CX,'HU'
JE .50:
CMP CX,'CZ'
JE .30:
CMP CX,'SK'
JNE .10:
.30:; Letters 'CH','Ch','ch' (but not 'cH') are digraphs in CZ,SK locale, which sort between 'H' and 'I'.
MOV RCX,R13
SHR RCX,32
CMP R13D,'h' ; Current letter is in R13D.
JE .35:
CMP R13D,'H'
JE .40:
JNE .10:
.35:CMP ECX,'c' ; Previous letter is in CX.
JE .45:
.40:CMP ECX,'C'
JNE .10:
.45:MOV ECX,[RDI-2*4] ; Weight of the previous letter 'C' or 'c'.
MOVD [RDI-2*4],0xC004_8100 ; will change to 'H' or 'h'.
TEST CX,WeightAddCapital
JZ .10:
ORD [RDI+2*4],WeightAddCapital
JMP .10: ; The next UTF-32LE character.
.50:; Digraphs Cs, Dz, Gy, Ly, Ny, Sz, Ty, Zs sort after C, D, G, L, N, S, T, Z.
MOV RCX,R13
SHR RCX,32
OR R13B,'s'^'S' ; Convert this character to lower case.
OR CL,'c'^'C' ; Convert previous character to lower case.
CMP R13D,'s' ; Current letter is 's'?
JE .60:
CMP R13D,'z' ; Current letter is 'z'?
JE .75:
CMP R13D,'y' ; Current letter is 'y'?
JNE .10: ; The next UTF-32LE character.
CMP ECX,'g' ; Previous letter was 'g'?
JE .70: ; Detected digraph 'gy',
CMP ECX,'l' ; Previous letter was 'l'?
JE .70: ; Detected digraph 'ly',
CMP ECX,'n' ; Previous letter was 'n'?
JE .70: ; Detected digraph 'ny',
CMP ECX,'t' ; Previous letter was 't'?
JE .70: ; Detected digraph 'ty'.
JMP .10:
.60:CMP ECX,'c' ; Previous letter was 'c'?
JE .70: ; Detected digraph 'cs'.
CMP ECX,'z' ; Digraph 'zs'?
JNE .10: ; The next UTF-32LE character.
.70:ORD [RDI-2*4],WeightAddTwo ; Weight of the previous letter will increase.
JMP .10:
.75:CMP ECX,'d' ; Previous letter was 'd'?
JE .70: ; Detected digraph 'dz'.
CMP ECX,'s' ; Previous letter was 's'?
JE .70: ; Detected digraph 'sz'.
JMP .10:
.80:ADD EAX,WeightCatAboveFFFF
STOSD
JMP .10:
.90:RET
ENDP Weigh
Procedure ShowProgress is called from Compare to display percents of sorted data, overwriting the previous number.
ShowProgress:: PROC
PUSH RCX,RDI
BSR EAX,EDX
MOV ECX,[ProgressDivisor]
MOV EDX,ECX
SUB EDX,EAX
MOV EAX,100
MUL EDX
DIV ECX
LEA RDI,[NameUTF16]
MOV RDX,RDI
StoD RDI,Size=3,Align=right
XOR EAX,EAX
STOSB
StdOutput FiveBack,RDX,=B" %%",Unicode=no
POP RDI,RCX
RET
ENDP ShowProgress
Procedure Compare is called as a callback from ShellSort.
First it compares two records by Key1,Length1,Reverse1,
and only if they're equal by this key, it compares by Key2,Length2,Reverse2 etc.
Steps of the procedure:
Compare:: PROC
PUSH RBX,RCX
INC R11
TEST R11B
JNZ .10: ; No matter what was in R11, reduce the frequency by 256.
CMP RDX,R10 ; Did RDX changed?
JE .10:
MOV R10,RDX ; Show progress only when RDX changed.
CALL ShowProgress ; Called once per 256 Compare when RDX changed.
.10: XCHG RSI,RDI
PUSH RSI,RDI ; Pointers to indexes.
MOV EDX,[RSI+SORT_INDEX.Size]
MOV ESI,[RSI+SORT_INDEX.Ptr]
ADD RSI,[HeaderInMemEnd]
ADD RDX,RSI ; First input record is at RSI..RDX.
MOV RDI,[RecordPtrA]
CALL ConvertEncoding
SUB RDI,[RecordPtrA]
SHR EDI,2
MOV [RecordLenA],EDI
MOV RDI,[RSP] ; Restore pointer to the second SORT_INDEX.
MOV EDX,[RDI+SORT_INDEX.Size]
MOV ESI,[RDI+SORT_INDEX.Ptr]
ADD RSI,[HeaderInMemEnd]
ADD RDX,RSI ; Second input record is at RSI..RDX.
MOV RDI,[RecordPtrB]
CALL ConvertEncoding
SUB RDI,[RecordPtrB]
SHR EDI,2
MOV [RecordLenB],EDI
; Encoding of both records A and B is now converted to UTF-32LE.
MOV EAX,[RecordLenA] ; EAX is the length of record A.
MOV ESI,[ArgKeyOffset::]
CMP ESI,EAX
CMC
JC .70: ; The key of record A is out.
MOV EDX,[ArgKeyLength::]
TEST EDX
JNS .20:
MOV EDX,EAX
.20: SHL EDX,2
ADD RDX,[RecordPtrA] ; Convert to the end pointer.
SHL ESI,2
ADD RSI,[RecordPtrA] ; Convert to the start pointer.
MOV [KeyRecEndA],RDX
MOV [KeyRecPtrA],RSI ; Save pointers to keyed record A.
MOV RDI,[WeightPtrA]
LEA RBX,[PrimaryWeights]
CALL Weigh ; Weigh the keyed record A.
SUB RDI,[WeightPtrA]
SHR EDI,2
MOV [WeightLenA],EDI
; Prepare keyed records to KeyRecPtr..KeyRecEnd.
MOV EAX,[RecordLenB] ; EAX is the length of record B.
MOV ESI,[ArgKeyOffset::]
CMP ESI,EAX
CMC
JC .70 ; The key of record B is out.
MOV EDX,[ArgKeyLength::]
TEST EDX
JNS .30:
MOV EDX,EAX
.30: SHL EDX,2
ADD RDX,[RecordPtrB] ; Convert to the end pointer.
SHL ESI,2
ADD RSI,[RecordPtrB] ; Convert to the start pointer.
MOV [KeyRecEndB],RDX
MOV [KeyRecPtrB],RSI ; Save pointers to keyed record B.
MOV RDI,[WeightPtrB]
LEA RBX,[PrimaryWeights]
CALL Weigh ; Weigh the keyed record B.
SUB RDI,[WeightPtrB]
SHR EDI,2
MOV [WeightLenB],EDI ; Weights of both records is ready.
MOV RSI,[WeightPtrA] ; Compare using the PrimaryWeights.
MOV RDI,[WeightPtrB]
MOV ECX,[WeightLenA]
MOV EDX,[WeightLenB]
CMP ECX,EDX
JBE .40:
MOV ECX,EDX
.40: REPE CMPSD ; ECX=length of the shorter string.
JNE .70: ; Jump if decided.
MOV RSI,[KeyRecPtrA] ; Compare with the PrimaryWeights did not decide. Re-weigh using SecondaryWeights.
MOV RDX,[KeyRecEndA]
MOV RDI,[WeightPtrA]
LEA RBX,[SecondaryWeights]
CALL Weigh ; Weigh the keyed record A.
MOV RSI,[KeyRecPtrB]
MOV RDX,[KeyRecEndB]
MOV RDI,[WeightPtrB]
CALL Weigh ; Weigh the keyed record B.
MOV RSI,[WeightPtrA] ; Compare Using the SecondaryWeights.
MOV RDI,[WeightPtrB]
MOV ECX,[WeightLenA]
MOV EDX,[WeightLenB]
CMP ECX,EDX
JBE .50:
MOV ECX,EDX
.50: REPE CMPSD
JNE .70: ; Jump if decided.
MOV RSI,[KeyRecPtrA] ; Compare with the SecondaryWeights did not decide. Re-weigh using TertiaryWeights.
MOV RDX,[KeyRecEndA]
MOV RDI,[WeightPtrA]
LEA RBX,[TertiaryWeights]
CALL Weigh ; Weigh the keyed record A.
MOV RSI,[KeyRecPtrB]
MOV RDX,[KeyRecEndB]
MOV RDI,[WeightPtrB]
CALL Weigh ; Weigh the keyed record B.
MOV RSI,[WeightPtrA] ; Compare using the TertiaryWeights.
MOV RDI,[WeightPtrB]
MOV ECX,[WeightLenA]
MOV EDX,[WeightLenB]
CMP ECX,EDX
JBE .60:
MOV ECX,EDX
.60: REPE CMPSD
JNE .70: ; Jmp if decided.
CMP EDX,[WeightLenA] ; Compare with Primary, Secondary and Tertiary weights did not find a difference.
JNE .70: ; In that case the longer key is greater.
; ZF=1, both records A and B under Key1 are the same. Try the next key.
.70: POP RDI,RSI ; Restore indexes.
JC .80:
JSt [Status::],ArgKeyReverse,.85:
JMP .90:
.80: JSt [Status::],ArgKeyReverse,.90:
.85: MOV RAX,[RSI] ; Swap two SORT_INDEXes.
XCHG RAX,[RDI]
MOV [RSI],RAX
STC ; Signalize the swap to ShellSort.
.90:POP RCX,RBX ; Restore callee-saved registers.
RET
ENDP Compare:
ENDPROGRAM sortwinc