char-scr.lua /size: 6959 b    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['char-scr'] = {
2    version   = 1.001,
3    comment   = "companion to char-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local tonumber = tonumber
10
11characters.scripthash = { -- we could put these presets in char-def.lua
12    --
13    -- half width opening parenthesis
14    --
15    [0x0028] = "half_width_open",
16    [0x005B] = "half_width_open",
17    [0x007B] = "half_width_open",
18    [0x2018] = "half_width_open", -- ‘
19    [0x201C] = "half_width_open", -- “
20    --
21    -- full width opening parenthesis
22    --
23    [0x3008] = "full_width_open", -- 〈   Left book quote
24    [0x300A] = "full_width_open", -- 《   Left double book quote
25    [0x300C] = "full_width_open", -- 「   left quote
26    [0x300E] = "full_width_open", -- 『   left double quote
27    [0x3010] = "full_width_open", -- 【   left double book quote
28    [0x3014] = "full_width_open", -- 〔   left book quote
29    [0x3016] = "full_width_open", --〖   left double book quote
30    [0x3018] = "full_width_open", --     left tortoise bracket
31    [0x301A] = "full_width_open", --     left square bracket
32    [0x301D] = "full_width_open", --     reverse double prime qm
33    [0xFF08] = "full_width_open", -- (   left parenthesis
34    [0xFF3B] = "full_width_open", -- [   left square brackets
35    [0xFF5B] = "full_width_open", -- {   left curve bracket
36    --
37    -- half width closing parenthesis
38    --
39    [0x0029] = "half_width_close",
40    [0x005D] = "half_width_close",
41    [0x007D] = "half_width_close",
42    [0x2019] = "half_width_close", -- ’   right quote, right
43    [0x201D] = "half_width_close", -- ”   right double quote
44    --
45    -- full width closing parenthesis
46    --
47    [0x3009] = "full_width_close", -- 〉   book quote
48    [0x300B] = "full_width_close", -- 》   double book quote
49    [0x300D] = "full_width_close", -- 」   right quote, right
50    [0x300F] = "full_width_close", -- 』   right double quote
51    [0x3011] = "full_width_close", -- 】   right double book quote
52    [0x3015] = "full_width_close", -- 〕   right book quote
53    [0x3017] = "full_width_close", -- 〗  right double book quote
54    [0x3019] = "full_width_close", --     right tortoise bracket
55    [0x301B] = "full_width_close", --     right square bracket
56    [0x301E] = "full_width_close", --     double prime qm
57    [0x301F] = "full_width_close", --     low double prime qm
58    [0xFF09] = "full_width_close", -- )   right parenthesis
59    [0xFF3D] = "full_width_close", -- ]   right square brackets
60    [0xFF5D] = "full_width_close", -- }   right curve brackets
61    --
62    [0xFF62] = "half_width_open", --     left corner bracket
63    [0xFF63] = "half_width_close", --     right corner bracket
64    --
65    -- vertical opening vertical
66    --
67    -- 0xFE35, 0xFE37, 0xFE39,  0xFE3B,  0xFE3D,  0xFE3F,  0xFE41,  0xFE43,  0xFE47,
68    --
69    -- vertical closing
70    --
71    -- 0xFE36, 0xFE38, 0xFE3A,  0xFE3C,  0xFE3E,  0xFE40,  0xFE42,  0xFE44,  0xFE48,
72    --
73    -- half width opening punctuation
74    --
75    -- <empty>
76    --
77    -- full width opening punctuation
78    --
79    --  0x2236, -- ∶
80    --  0xFF0C, -- ,
81    --
82    -- half width closing punctuation_hw
83    --
84    [0x0021] = "half_width_close", -- !
85    [0x002C] = "half_width_close", -- ,
86    [0x002E] = "half_width_close", -- .
87    [0x003A] = "half_width_close", -- :
88    [0x003B] = "half_width_close", -- ;
89    [0x003F] = "half_width_close", -- ?
90    [0xFF61] = "half_width_close", -- hw full stop
91    --
92    -- full width closing punctuation
93    --
94    [0x3001] = "full_width_close", -- 、
95    [0x3002] = "full_width_close", -- 。
96    [0xFF0C] = "full_width_close", -- ,
97    [0xFF0E] = "full_width_close", --
98    --
99    -- depends on font
100    --
101    [0xFF01] = "full_width_close", -- !
102    [0xFF1F] = "full_width_close", -- ?
103    --
104    [0xFF1A] = "full_width_punct", -- :
105    [0xFF1B] = "full_width_punct", -- ;
106    --
107    -- non starter
108    --
109    [0x3005] = "non_starter", [0x3041] = "non_starter", [0x3043] = "non_starter", [0x3045] = "non_starter", [0x3047] = "non_starter",
110    [0x3049] = "non_starter", [0x3063] = "non_starter", [0x3083] = "non_starter", [0x3085] = "non_starter", [0x3087] = "non_starter",
111    [0x308E] = "non_starter", [0x3095] = "non_starter", [0x3096] = "non_starter", [0x309B] = "non_starter", [0x309C] = "non_starter",
112    [0x309D] = "non_starter", [0x309E] = "non_starter", [0x30A0] = "non_starter", [0x30A1] = "non_starter", [0x30A3] = "non_starter",
113    [0x30A5] = "non_starter", [0x30A7] = "non_starter", [0x30A9] = "non_starter", [0x30C3] = "non_starter", [0x30E3] = "non_starter",
114    [0x30E5] = "non_starter", [0x30E7] = "non_starter", [0x30EE] = "non_starter", [0x30F5] = "non_starter", [0x30F6] = "non_starter",
115    [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31F0] = "non_starter", [0x31F1] = "non_starter",
116    [0x31F2] = "non_starter", [0x31F3] = "non_starter", [0x31F4] = "non_starter", [0x31F5] = "non_starter", [0x31F6] = "non_starter",
117    [0x31F7] = "non_starter", [0x31F8] = "non_starter", [0x31F9] = "non_starter", [0x31FA] = "non_starter", [0x31FB] = "non_starter",
118    [0x31FC] = "non_starter", [0x31FD] = "non_starter", [0x31FE] = "non_starter", [0x31FF] = "non_starter",
119    --
120    [0x301C] = "non_starter", [0x303B] = "non_starter", [0x303C] = "non_starter", [0x309B] = "non_starter", [0x30FB] = "non_starter",
121    [0x30FE] = "non_starter",
122    -- hyphenation
123    --
124    [0x2026] = "hyphen", -- …   ellipsis
125    [0x2014] = "hyphen", -- —   hyphen
126    --
127    [0x1361] = "ethiopic_word",
128    [0x1362] = "ethiopic_sentence",
129    --
130    -- tibetan:
131    --
132    [0x0F0B] = "breaking_tsheg",
133    [0x0F0C] = "nonbreaking_tsheg",
134
135}
136
137table.setmetatableindex(characters.scripthash, function(t,k)
138    local v
139    if not tonumber(k)                     then v = false
140    elseif (k >= 0x03040 and k <= 0x030FF)
141        or (k >= 0x031F0 and k <= 0x031FF)
142        or (k >= 0x032D0 and k <= 0x032FE)
143        or (k >= 0x0FF00 and k <= 0x0FFEF) then v = "katakana"
144    elseif (k >= 0x03400 and k <= 0x04DFF)
145        or (k >= 0x04E00 and k <= 0x09FFF)
146        or (k >= 0x0F900 and k <= 0x0FAFF)
147        or (k >= 0x20000 and k <= 0x2A6DF)
148        or (k >= 0x2F800 and k <= 0x2FA1F) then v = "chinese"
149    elseif (k >= 0x0AC00 and k <= 0x0D7A3) then v = "korean"
150    elseif (k >= 0x01100 and k <= 0x0115F) then v = "jamo_initial"
151    elseif (k >= 0x01160 and k <= 0x011A7) then v = "jamo_medial"
152    elseif (k >= 0x011A8 and k <= 0x011FF) then v = "jamo_final"
153    elseif (k >= 0x01200 and k <= 0x0139F) then v = "ethiopic_syllable"
154    elseif (k >= 0x00F00 and k <= 0x00FFF) then v = "tibetan"
155                                           else v = false
156    end
157    t[k] = v
158    return v
159end)
160
161-- storage.register("characters/scripthash", hash, "characters.scripthash")
162