phys-dim.lmt /size: 42 Kb    last modification: 2024-01-16 10:22
1if not modules then modules = { } end modules ['phys-dim'] = {
2    version   = 1.001,
3    comment   = "companion to phys-dim.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- This is pretty old code that I found back, but let's give it a try
10-- in practice. It started out as m-units.lua but as we want to keep that
11-- module around we moved the code to the dimensions module.
12--
13-- todo: maybe also an sciunit command that converts to si units (1 inch -> 0.0254 m)
14-- etc .. typical something to do when listening to a news whow or b-movie
15--
16-- todo: collect used units for logging (and list of units, but then we need
17-- associations too).
18
19-- The lists have been checked and completed by Robin Kirkham.
20
21-- dubious/wrong
22
23--  Atom                        = [[u]], -- should be amu (atomic mass unit)
24--  Bell                        = [[B]], -- should be bel
25--  Sterant                     = [[sr]], -- should be steradian
26--  Equivalent                  = [[eql]], -- qualifier?
27--  At                          = [[at]], -- qualifier?
28--  Force                       = [[f]], -- qualifier?
29--  eVolt                       = [[eV]],
30--  -- AC or DC voltages should be qualified in the text
31--  VoltAC                      = [[V\unitsbackspace\unitslower{ac}]],
32--  VoltDC                      = [[V\unitsbackspace\unitslower{dc}]],
33--  AC                          = [[V\unitsbackspace\unitslower{ac}]],
34--  DC                          = [[V\unitsbackspace\unitslower{dc}]],
35--  -- probably not harmful but there are better alternatives
36--  -- e.g., revolution per second (rev/s)
37--  RPS                         = [[RPS]],
38--  RPM                         = [[RPM]],
39--  RevPerSec                   = [[RPS]],
40--  RevPerMin                   = [[RPM]],
41
42local rawset, next = rawset, next
43local V, P, S, R, C, Cc, Cs, matchlpeg = lpeg.V, lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.match
44local format, lower, gsub = string.format, string.lower, string.gsub
45local appendlpeg = lpeg.append
46local utfchartabletopattern = lpeg.utfchartabletopattern
47local mergetable, mergedtable, keys, loweredkeys, sortedhash = table.merge, table.merged, table.keys, table.loweredkeys, table.sortedhash
48local setmetatablenewindex = table.setmetatablenewindex
49local utfchar = utf.char
50
51physics            = physics or { }
52physics.units      = physics.units or { }
53
54local allocate     = utilities.storage.allocate
55
56local context      = context
57local commands     = commands
58local implement    = interfaces.implement
59
60local trace_units  = false
61local report_units = logs.reporter("units")
62
63trackers.register("physics.units", function(v) trace_units = v end)
64
65-- digits parser (todo : use patterns)
66
67local math_one       = Cs((P("$")    /"") * (1-P("$"))^1 * (P("$")/"")) / context.m
68local math_two       = Cs((P("\\m {")/"") * (1-P("}"))^1 * (P("}")/"")) / context.m -- watch the space after \m
69
70local digit          = R("09")
71local plus           = P("+")
72local minus          = P("-")
73local plusminus      = P("±")
74local sign           = plus + minus
75local power          = S("^e")
76local digitspace     = S("~@_")
77local comma          = P(",")
78local period         = P(".")
79local semicolon      = P(";")
80local colon          = P(":")
81local signspace      = P("/")
82local positive       = P("++") -- was p
83local negative       = P("--") -- was n
84local highspace      = P("//") -- was s
85local padding        = P("=")
86local space          = P(" ")
87local lparent        = P("(")
88local rparent        = P(")")
89
90local lbrace         = P("{")
91local rbrace         = P("}")
92
93local digits         = digit^1
94
95local powerdigits    = plus  * C(digits) / context.digitspowerplus
96                     + minus * C(digits) / context.digitspowerminus
97                     +         C(digits) / context.digitspower
98
99local ddigitspace    = digitspace  / "" / context.digitsspace
100local ddigit         = digits           / context.digitsdigit
101local dsemicomma     = semicolon   / "" / context.digitsseparatorspace
102local dsemiperiod    = colon       / "" / context.digitsseparatorspace
103local dfinalcomma    = comma       / "" / context.digitsfinalcomma
104local dfinalperiod   = period      / "" / context.digitsfinalperiod
105local dintercomma    = comma       / "" / context.digitsintermediatecomma
106local dinterperiod   = period      / "" / context.digitsintermediateperiod
107local dskipcomma     = comma       / "" / context.digitsseparatorspace
108local dskipperiod    = period      / "" / context.digitsseparatorspace
109local dsignspace     = signspace   / "" / context.digitssignspace
110local dpositive      = positive    / "" / context.digitspositive
111local dnegative      = negative    / "" / context.digitsnegative
112local dhighspace     = highspace   / "" / context.digitshighspace
113local dsomesign      = plus        / "" / context.digitsplus
114                     + minus       / "" / context.digitsminus
115                     + plusminus   / "" / context.digitsplusminus
116local dpower         = power       / "" * ( powerdigits + lbrace * powerdigits * rbrace )
117
118local dpadding       = padding     / "" / context.digitszeropadding -- todo
119
120local dleader        = (dpositive + dnegative + dhighspace + dsomesign + dsignspace)^0
121local dtrailer       = dpower^0
122local dfinal         = P(-1) + #P(1 - comma - period - semicolon - colon)
123local dnumber        = (ddigitspace + ddigit)^1
124
125-- ___,000,000  ___,___,000  ___,___,__0  000,000,000  000.00  000,000,000.00  000,000,000.==
126
127-- : ; for the moment not used, maybe for invisible fraction . , when no leading number
128
129-- local c_p = (ddigitspace^1 * dskipcomma)^0            -- ___,
130--           * (ddigitspace^0 * ddigit * dintercomma)^0  -- _00, 000,
131--           * ddigitspace^0  * ddigit^0                 -- _00 000
132--           * (
133--              dfinalperiod * ddigit                    -- .00
134--            + dskipperiod  * dpadding^1                -- .==
135--            + dsemiperiod  * ddigit                    -- :00
136--            + dsemiperiod  * dpadding^1                -- :==
137--             )^0
138--           + ddigit                                    -- 00
139--
140-- local p_c = (ddigitspace^1 * dskipperiod)^0           -- ___.
141--           * (ddigitspace^0 * ddigit * dinterperiod)^0 -- _00. 000.
142--           * ddigitspace^0  * ddigit^0                 -- _00 000
143--           * (
144--              dfinalcomma * ddigit                     -- ,00
145--            + dskipcomma  * dpadding^1                 -- ,==
146--            + dsemicomma  * ddigit                     -- :00
147--            + dsemicomma  * dpadding^1                 -- :==
148--             )^0
149--           + ddigit                                    -- 00
150--
151-- fix by WS/SB (needs further testing)
152
153local c_p = (ddigitspace^1 * dskipcomma)^0                    -- ___,
154          * (ddigitspace^0 * ddigit * dintercomma)^0          -- _00, 000,
155          * ddigitspace^0  * ddigit^0                         -- _00 000
156          * (
157             dfinalperiod * ddigit^1 * dpadding^1             -- .0=
158           + dfinalperiod * ddigit * (dintercomma * ddigit)^0 -- .00
159           + dskipperiod  * dpadding^1                        -- .==
160           + dsemiperiod  * ddigit * (dintercomma * ddigit)^0 -- :00
161           + dsemiperiod  * dpadding^1                        -- :==
162            )^0
163          + ddigit                                            -- 00
164
165local p_c = (ddigitspace^1 * dskipperiod)^0                   -- ___.
166          * (ddigitspace^0 * ddigit * dinterperiod)^0         -- _00. 000.
167          * ddigitspace^0  * ddigit^0                         -- _00 000
168          * (
169             dfinalcomma * ddigit^1 * dpadding^1              -- ,0=
170           + dfinalcomma * ddigit * (dinterperiod * ddigit)^0 -- 00
171           + dskipcomma  * dpadding^1                         -- ,==
172           + dsemicomma  * ddigit * (dinterperiod * ddigit)^0 -- :00
173           + dsemicomma  * dpadding^1                         -- :==
174            )^0
175          + ddigit                                            -- 00
176
177local p_c_dparser = math_one + math_two + dleader * p_c * dtrailer * dfinal
178local c_p_dparser = math_one + math_two + dleader * c_p * dtrailer * dfinal
179
180local function makedigits(str,reverse)
181    if reverse then
182        matchlpeg(p_c_dparser,str)
183    else
184        matchlpeg(c_p_dparser,str)
185    end
186end
187
188-- tables:
189
190local user_long_prefixes   = { }
191local user_long_units      = { }
192local user_long_operators  = { }
193local user_long_suffixes   = { }
194local user_symbol_units    = { }
195local user_packaged_units  = { }
196
197local user_short_prefixes  = { }
198local user_short_units     = { }
199local user_short_operators = { }
200local user_short_suffixes  = { }
201
202local long_prefixes = {
203
204    -- Le Système international d'unités (SI) 8e édition (Table 5)
205
206    Yocto = "yocto",  -- 10^{-24}
207    Zepto = "zepto",  -- 10^{-21}
208    Atto  = "atto",   -- 10^{-18}
209    Femto = "femto",  -- 10^{-15}
210    Pico  = "pico",   -- 10^{-12}
211    Nano  = "nano",   -- 10^{-9}
212    Micro = "micro",  -- 10^{-6}
213    Milli = "milli",  -- 10^{-3}
214    Centi = "centi",  -- 10^{-2}
215    Deci  = "deci",   -- 10^{-1}
216
217    Deca  = "deca",   -- 10^{1}
218    Hecto = "hecto",  -- 10^{2}
219    Kilo  = "kilo",   -- 10^{3}
220    Mega  = "mega",   -- 10^{6}
221    Giga  = "giga",   -- 10^{9}
222    Tera  = "tera",   -- 10^{12}
223    Peta  = "peta",   -- 10^{15}
224    Exa   = "exa",    -- 10^{18}
225    Zetta = "zetta",  -- 10^{21}
226    Yotta = "yotta",  -- 10^{24}
227
228    -- IEC 60027-2: 2005, third edition, Part 2
229
230    Kibi  = "kibi", -- 2^{10} (not ki)
231    Mebi  = "mebi", -- 2^{20}
232    Gibi  = "gibi", -- 2^{30}
233    Tebi  = "tebi", -- 2^{40}
234    Pebi  = "pebi", -- 2^{50}
235    Exbi  = "exbi", -- 2^{60}
236
237    -- not standard
238
239    Zebi  = "zebi", -- binary
240    Yobi  = "yobi", -- binary
241
242    Micro = "micro",
243    Root  = "root",
244}
245
246local long_units = {
247
248    -- Le Système international d'unités (SI) 8e édition (except synonyms)
249    -- SI base units (Table 1)
250
251    Meter                       = "meter",
252    Gram                        = "gram",
253    Second                      = "second",
254    Ampere                      = "ampere",
255    Kelvin                      = "kelvin",
256    Mole                        = "mole",
257    Candela                     = "candela",
258
259    -- synonyms
260
261    Mol                         = "mole",
262    Metre                       = "meter",
263
264    -- SI derived units with special names (Table 3)
265
266    Radian                      = "radian",
267    Steradian                   = "steradian",
268    Hertz                       = "hertz",
269    Newton                      = "newton",
270    Pascal                      = "pascal",
271    Joule                       = "joule",
272    Watt                        = "watt",
273    Coulomb                     = "coulomb",
274    Volt                        = "volt",
275    Farad                       = "farad",
276    Ohm                         = "ohm",
277    Siemens                     = "siemens",
278    Weber                       = "weber",
279    Tesla                       = "tesla",
280    Henry                       = "henry",
281    Celsius                     = "celsius",
282    Lumen                       = "lumen",
283    Lux                         = "lux",
284    Becquerel                   = "becquerel",
285    Gray                        = "gray",
286    Sievert                     = "sievert",
287    Katal                       = "katal",
288
289    -- non SI units accepted for use with SI (Table 6)
290
291    Minute                      = "minute",
292    Hour                        = "hour",
293    Day                         = "day",
294
295    -- (degree, minute, second of arc are treated specially later)
296
297    Gon                         = "gon",
298    Grad                        = "grad",
299    Hectare                     = "hectare",
300    Liter                       = "liter",
301
302    Tonne                       = "tonne",
303
304    -- synonyms
305
306    MetricTon                   = "tonne",
307    Litre                       = "liter",
308
309    ["Metric Ton"]              = "tonne",
310
311    -- non-SI units whose values must be obtained experimentally (Table 7)
312
313    AtomicMassUnit              = "atomicmassunit",
314    AstronomicalUnit            = "astronomicalunit",
315    ElectronVolt                = "electronvolt",
316    Dalton                      = "dalton",
317
318    ["Atomic Mass Unit"]        = "atomicmassunit",
319    ["Astronomical Unit"]       = "astronomicalunit",
320    ["Electron Volt"]           = "electronvolt",
321
322    -- special cases (catch doubles, okay, a bit over the top)
323
324    DegreesCelsius              = "celsius",
325    DegreesFahrenheit           = "fahrenheit",
326    DegreeCelsius               = "celsius",
327    DegreeFahrenheit            = "fahrenheit",
328
329    ["Degrees Celsius"]         = "celsius",
330    ["Degrees Fahrenheit"]      = "fahrenheit",
331    ["Degree Celsius"]          = "celsius",
332    ["Degree Fahrenheit"]       = "fahrenheit",
333
334 -- too late as we already have connected symbols catched:
335 --
336 -- ["° Celsius"]               = "celsius",
337 -- ["° Fahrenheit"]            = "fahrenheit",
338 -- ["°Celsius"]                = "celsius",
339 -- ["°Fahrenheit"]             = "fahrenheit",
340
341    -- the "natural units" and "atomic units" are omitted for now
342    -- synonyms
343
344    eV                          = "electronvolt",
345    AMU                         = "atomicmassunit",
346
347    -- other non-SI units (Table 8)
348
349    Bar                         = "bar",
350    Hg                          = "mercury",
351 -- ["Millimetre Of Mercury"]   = [[mmHg]],
352    Angstrom                    = "angstrom", -- strictly Ångström
353    NauticalMile                = "nauticalmile",
354    Barn                        = "barn",
355    Knot                        = "knot",
356    Neper                       = "neper",
357    Bel                         = "bel", -- in practice only decibel used
358
359    ["Nautical Mile"]           = "nauticalmile",
360
361    -- other non-SI units from CGS system (Table 9)
362
363    Erg                         = "erg",
364    Dyne                        = "dyne",
365    Poise                       = "poise",
366    Stokes                      = "stokes",
367    Stilb                       = "stilb",
368    Phot                        = "phot",
369    Gal                         = "gal",
370    Maxwell                     = "maxwell",
371    Gauss                       = "gauss",
372    Oersted                     = "oersted",
373
374    -- end of SI
375
376    -- data: for use with the binary prefixes (except Erlang)
377
378    Bit                         = "bit",
379    Byte                        = "byte" ,
380    Baud                        = "baud",
381    Erlang                      = "erlang",
382
383    -- common units, not part of SI
384
385    Atmosphere                  = "atmosphere",
386    Revolution                  = "revolution",
387
388    -- synonyms
389
390    Atm                         = "atmosphere",
391    Rev                         = "revolution",
392
393    -- imperial units (very incomplete)
394
395    Fahrenheit                  = "fahrenheit",
396    Foot                        = "foot",
397    Inch                        = "inch",
398    Calorie                     = "calorie",
399
400    -- synonyms
401
402    Cal                         = "calorie",
403
404}
405
406local long_operators = {
407
408    Times   = "times",
409    Solidus = "solidus",
410    Per     = "per",
411    OutOf   = "outof",
412
413}
414
415local long_suffixes = {
416
417    Linear     = "linear",
418    Square     = "square",
419    Cubic      = "cubic",
420    Quadratic  = "quadratic",
421    Inverse    = "inverse",
422    ILinear    = "ilinear",
423    ISquare    = "isquare",
424    ICubic     = "icubic",
425    IQuadratic = "iquadratic",
426
427}
428
429local short_prefixes = {
430
431    y  = "yocto",
432    z  = "zetto",
433    a  = "atto",
434    f  = "femto",
435    p  = "pico",
436    n  = "nano",
437    u  = "micro",
438    m  = "milli",
439    c  = "centi",
440    d  = "deci",
441    da = "deca",
442    h  = "hecto",
443    k  = "kilo",
444    M  = "mega",
445    G  = "giga",
446    T  = "tera",
447    P  = "peta",
448    E  = "exa",
449    Z  = "zetta",
450    Y  = "yotta",
451
452}
453
454local short_units = { -- I'm not sure about casing
455
456    m  = "meter",
457    Hz = "hertz",
458    hz = "hertz",
459    B  = "bel",
460    b  = "bel",
461    lx = "lux",
462 -- da = "dalton",
463    h  = "hour",
464    s  = "second",
465    g  = "gram",
466    n  = "newton",
467    V  = "volt",
468    t  = "tonne",
469    l  = "liter",
470 -- w  = "watt",
471    W  = "watt",
472 -- a  = "ampere",
473    A  = "ampere",
474
475    Ω  = "ohm",
476
477--  C  = "coulomb", -- needs checking with (c)enti
478--  K  = "kelvin",  -- needs checking with (k)ilo
479--  N  = "newton",  -- needs checking with (n)ewton
480
481    min = "minute",
482
483    [utfchar(0x2103)] = "celsius",
484    [utfchar(0x2109)] = "fahrenheit",
485}
486
487local short_operators = {
488    ["."] = "times",
489    ["*"] = "times",
490    ["/"] = "solidus",
491    [":"] = "outof",
492}
493
494local short_suffixes = { -- maybe just raw digit match
495    ["1"]   = "linear",
496    ["2"]   = "square",
497    ["3"]   = "cubic",
498    ["4"]   = "quadratic",
499    ["+1"]  = "linear",
500    ["+2"]  = "square",
501    ["+3"]  = "cubic",
502    ["+4"]  = "quadratic",
503    ["-1"]  = "inverse",
504    ["-1"]  = "ilinear",
505    ["-2"]  = "isquare",
506    ["-3"]  = "icubic",
507    ["-4"]  = "iquadratic",
508    ["^1"]  = "linear",
509    ["^2"]  = "square",
510    ["^3"]  = "cubic",
511    ["^4"]  = "quadratic",
512    ["^+1"] = "linear",
513    ["^+2"] = "square",
514    ["^+3"] = "cubic",
515    ["^+4"] = "quadratic",
516    ["^-1"] = "inverse",
517    ["^-1"] = "ilinear",
518    ["^-2"] = "isquare",
519    ["^-3"] = "icubic",
520    ["^-4"] = "iquadratic",
521}
522
523local symbol_units = {
524    Degrees    = "degree",
525    Degree     = "degree",
526 -- Deg        = "degree",
527    ["°"]      = "degree",
528    ArcMinute  = "arcminute",
529    [""]      = "arcminute", -- 0x2032
530    ArcSecond  = "arcsecond",
531    [""]      = "arcsecond", -- 0x2033
532    Percent    = "percent",
533    ["%"]      = "percent",
534    Promille   = "permille",
535    Permille   = "permille",
536}
537
538local packaged_units = {
539    Micron = "micron",
540    mmHg   = "millimetermercury",
541}
542
543-- rendering:
544
545local ctx_unitsPUS    = context.unitsPUS
546local ctx_unitsPU     = context.unitsPU
547local ctx_unitsPS     = context.unitsPS
548local ctx_unitsP      = context.unitsP
549local ctx_unitsUS     = context.unitsUS
550local ctx_unitsU      = context.unitsU
551local ctx_unitsS      = context.unitsS
552local ctx_unitsO      = context.unitsO
553local ctx_unitsN      = context.unitsN
554local ctx_unitsC      = context.unitsC
555local ctx_unitsQ      = context.unitsQ
556local ctx_unitsRPM    = context.unitsRPM
557local ctx_unitsRTO    = context.unitsRTO
558local ctx_unitsRabout = context.unitsRabout
559local ctx_unitsNstart = context.unitsNstart
560local ctx_unitsNstop  = context.unitsNstop
561local ctx_unitsNspace = context.unitsNspace
562local ctx_unitsPopen  = context.unitsPopen
563local ctx_unitsPclose = context.unitsPclose
564
565local labels = languages.data.labels
566
567labels.prefixes = allocate {
568    yocto = { labels = { en = [[y]]   } }, -- 10^{-24}
569    zepto = { labels = { en = [[z]]   } }, -- 10^{-21}
570    atto  = { labels = { en = [[a]]   } }, -- 10^{-18}
571    femto = { labels = { en = [[f]]   } }, -- 10^{-15}
572    pico  = { labels = { en = [[p]]   } }, -- 10^{-12}
573    nano  = { labels = { en = [[n]]   } }, -- 10^{-9}
574    micro = { labels = { en = [[\mu]] } }, -- 10^{-6}
575    milli = { labels = { en = [[m]]   } }, -- 10^{-3}
576    centi = { labels = { en = [[c]]   } }, -- 10^{-2}
577    deci  = { labels = { en = [[d]]   } }, -- 10^{-1}
578    deca  = { labels = { en = [[da]]  } }, -- 10^{1}
579    hecto = { labels = { en = [[h]]   } }, -- 10^{2}
580    kilo  = { labels = { en = [[k]]   } }, -- 10^{3}
581    mega  = { labels = { en = [[M]]   } }, -- 10^{6}
582    giga  = { labels = { en = [[G]]   } }, -- 10^{9}
583    tera  = { labels = { en = [[T]]   } }, -- 10^{12}
584    peta  = { labels = { en = [[P]]   } }, -- 10^{15}
585    exa   = { labels = { en = [[E]]   } }, -- 10^{18}
586    zetta = { labels = { en = [[Z]]   } }, -- 10^{21}
587    yotta = { labels = { en = [[Y]]   } }, -- 10^{24}
588    kibi  = { labels = { en = [[Ki]]  } }, -- 2^{10} (not ki)
589    mebi  = { labels = { en = [[Mi]]  } }, -- 2^{20}
590    gibi  = { labels = { en = [[Gi]]  } }, -- 2^{30}
591    tebi  = { labels = { en = [[Ti]]  } }, -- 2^{40}
592    pebi  = { labels = { en = [[Pi]]  } }, -- 2^{50}
593    exbi  = { labels = { en = [[Ei]]  } }, -- 2^{60}
594    zebi  = { labels = { en = [[Zi]]  } }, -- binary
595    yobi  = { labels = { en = [[Yi]]  } }, -- binary
596    micro = { labels = { en = [[µ]]   } }, -- 0x00B5 \textmu
597    root  = { labels = { en = [[]]   } }, -- 0x221A
598}
599
600labels.units = allocate {
601    meter                       = { labels = { en = [[m]]                        } },
602    gram                        = { labels = { en = [[g]]                        } }, -- strictly kg is the base unit
603    second                      = { labels = { en = [[s]]                        } },
604    ampere                      = { labels = { en = [[A]]                        } },
605    kelvin                      = { labels = { en = [[K]]                        } },
606    mole                        = { labels = { en = [[mol]]                      } },
607    candela                     = { labels = { en = [[cd]]                       } },
608    mol                         = { labels = { en = [[mol]]                      } },
609    radian                      = { labels = { en = [[rad]]                      } },
610    steradian                   = { labels = { en = [[sr]]                       } },
611    hertz                       = { labels = { en = [[Hz]]                       } },
612    newton                      = { labels = { en = [[N]]                        } },
613    pascal                      = { labels = { en = [[Pa]]                       } },
614    joule                       = { labels = { en = [[J]]                        } },
615    watt                        = { labels = { en = [[W]]                        } },
616    coulomb                     = { labels = { en = [[C]]                        } },
617    volt                        = { labels = { en = [[V]]                        } },
618    farad                       = { labels = { en = [[F]]                        } },
619    ohm                         = { labels = { en = [[]]                        } }, -- 0x2126 \textohm
620    siemens                     = { labels = { en = [[S]]                        } },
621    weber                       = { labels = { en = [[Wb]]                       } },
622    mercury                     = { labels = { en = [[Hg]]                       } },
623    millimetermercury           = { labels = { en = [[mmHg]]                     } }, -- connected
624    tesla                       = { labels = { en = [[T]]                        } },
625    henry                       = { labels = { en = [[H]]                        } },
626    celsius                     = { labels = { en = [[\checkedtextcelsius]]      } }, -- 0x2103
627    lumen                       = { labels = { en = [[lm]]                       } },
628    lux                         = { labels = { en = [[lx]]                       } },
629    becquerel                   = { labels = { en = [[Bq]]                       } },
630    gray                        = { labels = { en = [[Gy]]                       } },
631    sievert                     = { labels = { en = [[Sv]]                       } },
632    katal                       = { labels = { en = [[kat]]                      } },
633    minute                      = { labels = { en = [[min]]                      } },
634    hour                        = { labels = { en = [[h]]                        } },
635    day                         = { labels = { en = [[d]]                        } },
636    gon                         = { labels = { en = [[gon]]                      } },
637    grad                        = { labels = { en = [[grad]]                     } },
638    hectare                     = { labels = { en = [[ha]]                       } },
639    liter                       = { labels = { en = [[l]]                        } }, -- symbol l or L
640    tonne                       = { labels = { en = [[t]]                        } },
641    electronvolt                = { labels = { en = [[eV]]                       } },
642    dalton                      = { labels = { en = [[Da]]                       } },
643    atomicmassunit              = { labels = { en = [[u]]                        } },
644    astronomicalunit            = { labels = { en = [[au]]                       } },
645    bar                         = { labels = { en = [[bar]]                      } },
646    angstrom                    = { labels = { en = [[Å]]                        } }, -- strictly Ångström
647    nauticalmile                = { labels = { en = [[M]]                        } },
648    barn                        = { labels = { en = [[b]]                        } },
649    knot                        = { labels = { en = [[kn]]                       } },
650    neper                       = { labels = { en = [[Np]]                       } },
651    bel                         = { labels = { en = [[B]]                        } }, -- in practice only decibel used
652    erg                         = { labels = { en = [[erg]]                      } },
653    dyne                        = { labels = { en = [[dyn]]                      } },
654    poise                       = { labels = { en = [[P]]                        } },
655    stokes                      = { labels = { en = [[St]]                       } },
656    stilb                       = { labels = { en = [[sb]]                       } },
657    phot                        = { labels = { en = [[phot]]                     } },
658    gal                         = { labels = { en = [[gal]]                      } },
659    maxwell                     = { labels = { en = [[Mx]]                       } },
660    gauss                       = { labels = { en = [[G]]                        } },
661    oersted                     = { labels = { en = [[Oe]]                       } }, -- strictly Œrsted
662    bit                         = { labels = { en = [[bit]]                      } },
663    byte                        = { labels = { en = [[B]]                        } },
664    baud                        = { labels = { en = [[Bd]]                       } },
665    erlang                      = { labels = { en = [[E]]                        } },
666    atmosphere                  = { labels = { en = [[atm]]                      } },
667    revolution                  = { labels = { en = [[rev]]                      } },
668    fahrenheit                  = { labels = { en = [[\checkedtextfahrenheit]]   } }, -- 0x2109
669    foot                        = { labels = { en = [[ft]]                       } },
670    inch                        = { labels = { en = [[inch]]                     } },
671    calorie                     = { labels = { en = [[cal]]                      } },
672    --
673    degree                      = { labels = { en = [[°]]} },
674    arcminute                   = { labels = { en = [[\checkedtextprime]]        } }, -- ′ 0x2032
675    arcsecond                   = { labels = { en = [[\checkedtextdoubleprime]]  } }, -- ″ 0x2033
676    percent                     = { labels = { en = [[\percent]]                 } },
677    permille                    = { labels = { en = [[\promille]]                } },
678    --
679    micron                      = { labels = { en = [[\textmu m]]                } },
680}
681
682labels.operators = allocate {
683    times   = { labels = { en = [[\unitsTIMES]]   } },
684    solidus = { labels = { en = [[\unitsSOLIDUS]] } },
685    per     = { labels = { en = [[\unitsSOLIDUS]] } },
686    outof   = { labels = { en = [[\unitsOUTOF]]   } },
687}
688
689labels.suffixes = allocate {
690    linear     = { labels = { en = [[1]]  } },
691    square     = { labels = { en = [[2]]  } },
692    cubic      = { labels = { en = [[3]]  } },
693    quadratic  = { labels = { en = [[4]]  } },
694    inverse    = { labels = { en = [[\mathminus1]] } },
695    ilinear    = { labels = { en = [[\mathminus1]] } },
696    isquare    = { labels = { en = [[\mathminus2]] } },
697    icubic     = { labels = { en = [[\mathminus3]] } },
698    iquadratic = { labels = { en = [[\mathminus4]] } },
699}
700
701local function dimpus(p,u,s)
702    if trace_units then
703        report_units("prefix %a, unit %a, suffix %a",p,u,s)
704    end    --
705    if p ~= "" then
706        if u ~= ""  then
707            if s ~= ""  then
708                ctx_unitsPUS(p,u,s)
709            else
710                ctx_unitsPU(p,u)
711            end
712        elseif s ~= ""  then
713            ctx_unitsPS(p,s)
714        else
715            ctx_unitsP(p)
716        end
717    else
718        if u ~= ""  then
719            if s ~= ""  then
720                ctx_unitsUS(u,s)
721         -- elseif c then
722         --     ctx_unitsC(u)
723            else
724                ctx_unitsU(u)
725            end
726        elseif s ~= ""  then
727            ctx_unitsS(s)
728        else
729            ctx_unitsP(p)
730        end
731    end
732end
733
734local function dimspu(s,p,u)
735    return dimpus(p,u,s)
736end
737
738local function dimop(o)
739    if trace_units then
740        report_units("operator %a",o)
741    end
742    if o then
743        ctx_unitsO(o)
744    end
745end
746
747local function dimsym(s)
748    if trace_units then
749        report_units("symbol %a",s)
750    end
751    s = symbol_units[s] or s
752    if s then
753        ctx_unitsC(s)
754    end
755end
756
757local function dimpre(p)
758    if trace_units then
759        report_units("prefix [%a",p)
760    end
761    p = packaged_units[p] or p
762    if p then
763        ctx_unitsU(p)
764    end
765end
766
767-- patterns:
768--
769-- space inside Cs else funny captures and args to function
770--
771-- square centi meter per square kilo seconds
772
773-- todo 0x -> rm
774
775local function update_parsers(keepcase) -- todo: don't remap utf sequences
776
777    local all_long_prefixes   = { }
778    local all_long_units      = { }
779    local all_long_operators  = { }
780    local all_long_suffixes   = { }
781    local all_symbol_units    = { }
782    local all_packaged_units  = { }
783
784    local all_short_prefixes  = { }
785    local all_short_units     = { }
786    local all_short_operators = { }
787    local all_short_suffixes  = { }
788
789    for k, v in sortedhash(long_prefixes)  do all_long_prefixes [k] = v all_long_prefixes [lower(k)] = v end
790    for k, v in sortedhash(long_units)     do all_long_units    [k] = v all_long_units    [lower(k)] = v end
791    for k, v in sortedhash(long_operators) do all_long_operators[k] = v all_long_operators[lower(k)] = v end
792    for k, v in sortedhash(long_suffixes)  do all_long_suffixes [k] = v all_long_suffixes [lower(k)] = v end
793    for k, v in sortedhash(symbol_units)   do all_symbol_units  [k] = v all_symbol_units  [lower(k)] = v end
794    for k, v in sortedhash(packaged_units) do all_packaged_units[k] = v all_packaged_units[lower(k)] = v end
795
796    for k, v in sortedhash(user_long_prefixes)  do all_long_prefixes [k] = v if not keepcase then all_long_prefixes [lower(k)] = v end end
797    for k, v in sortedhash(user_long_units)     do all_long_units    [k] = v if not keepcase then all_long_units    [lower(k)] = v end end
798    for k, v in sortedhash(user_long_operators) do all_long_operators[k] = v if not keepcase then all_long_operators[lower(k)] = v end end
799    for k, v in sortedhash(user_long_suffixes)  do all_long_suffixes [k] = v if not keepcase then all_long_suffixes [lower(k)] = v end end
800    for k, v in sortedhash(user_symbol_units)   do all_symbol_units  [k] = v if not keepcase then all_symbol_units  [lower(k)] = v end end
801    for k, v in sortedhash(user_packaged_units) do all_packaged_units[k] = v if not keepcase then all_packaged_units[lower(k)] = v end end
802
803    for k, v in sortedhash(short_prefixes)  do all_short_prefixes [k] = v end
804    for k, v in sortedhash(short_units)     do all_short_units    [k] = v end
805    for k, v in sortedhash(short_operators) do all_short_operators[k] = v end
806    for k, v in sortedhash(short_suffixes)  do all_short_suffixes [k] = v end
807
808    for k, v in sortedhash(user_short_prefixes)  do all_short_prefixes [k] = v end
809    for k, v in sortedhash(user_short_units)     do all_short_units    [k] = v end
810    for k, v in sortedhash(user_short_operators) do all_short_operators[k] = v end
811    for k, v in sortedhash(user_short_suffixes)  do all_short_suffixes [k] = v end
812
813    local somespace        = P(" ")^0/""
814
815    local p_long_prefix    = appendlpeg(all_long_prefixes,nil,true)
816    local p_long_unit      = appendlpeg(all_long_units,nil,true)
817    local p_long_operator  = appendlpeg(all_long_operators,nil,true)
818    local p_long_suffix    = appendlpeg(all_long_suffixes,nil,true)
819    local p_symbol         = appendlpeg(all_symbol_units,nil,true)
820    local p_packaged       = appendlpeg(all_packaged_units,nil,true)
821
822    local p_short_prefix   = appendlpeg(all_short_prefixes)
823    local p_short_unit     = appendlpeg(all_short_units)
824    local p_short_operator = appendlpeg(all_short_operators)
825    local p_short_suffix   = appendlpeg(all_short_suffixes)
826
827    -- more efficient but needs testing
828
829--     local p_long_prefix    = utfchartabletopattern(all_long_prefixes)  / all_long_prefixes
830--     local p_long_unit      = utfchartabletopattern(all_long_units)     / all_long_units
831--     local p_long_operator  = utfchartabletopattern(all_long_operators) / all_long_operators
832--     local p_long_suffix    = utfchartabletopattern(all_long_suffixes)  / all_long_suffixes
833--     local p_symbol         = utfchartabletopattern(all_symbol_units)   / all_symbol_units
834--     local p_packaged       = utfchartabletopattern(all_packaged_units) / all_packaged_units
835
836--     local p_short_prefix   = utfchartabletopattern(all_short_prefixes)  / all_short_prefixes
837--     local p_short_unit     = utfchartabletopattern(all_short_units)     / all_short_units
838--     local p_short_operator = utfchartabletopattern(all_short_operators) / all_short_operators
839--     local p_short_suffix   = utfchartabletopattern(all_short_suffixes)  / all_short_suffixes
840
841    -- we can can cleanup some space issues here (todo)
842
843    local unitparser = P { "unit",
844        --
845        longprefix    = Cs(V("somespace") * p_long_prefix),
846        shortprefix   = Cs(V("somespace") * p_short_prefix),
847        longsuffix    = Cs(V("somespace") * p_long_suffix),
848        shortsuffix   = Cs(V("somespace") * p_short_suffix),
849        shortunit     = Cs(V("somespace") * p_short_unit),
850        longunit      = Cs(V("somespace") * p_long_unit),
851        longoperator  = Cs(V("somespace") * p_long_operator),
852        shortoperator = Cs(V("somespace") * p_short_operator),
853        packaged      = Cs(V("somespace") * p_packaged),
854        --
855        nothing       = Cc(""),
856        somespace     = somespace,
857        nospace       = (1-somespace)^1, -- was 0
858     -- ignore        = P(-1),
859        --
860        qualifier     = Cs(V("somespace") * (lparent/"") * (1-rparent)^1 * (rparent/"")),
861        --
862        somesymbol    = V("somespace")
863                      * (p_symbol/dimsym)
864                      * V("somespace"),
865        somepackaged  = V("somespace")
866                      * (V("packaged") / dimpre)
867                      * V("somespace"),
868     -- someunknown   = V("somespace")
869     --               * (V("nospace")/ctx_unitsU)
870     --               * V("somespace"),
871        --
872        combination   = V("longprefix")  * V("longunit")   -- centi meter
873                      + V("nothing")     * V("longunit")
874                      + V("shortprefix") * V("shortunit")  -- c m
875                      + V("nothing")     * V("shortunit")
876                      + V("longprefix")  * V("shortunit")  -- centi m
877                      + V("shortprefix") * V("longunit"),  -- c meter
878
879--         combination   = (   V("longprefix")   -- centi meter
880--                           + V("nothing")
881--                         ) * V("longunit")
882--                       + (   V("shortprefix")  -- c m
883--                           + V("nothing")
884--                           + V("longprefix")
885--                         ) * V("shortunit")    -- centi m
886--                       + (   V("shortprefix")  -- c meter
887--                         ) * V("longunit"),
888
889
890        dimension     = V("somespace")
891                      * (
892                            V("packaged") / dimpre
893                          + (V("longsuffix") * V("combination")) / dimspu
894                          + (V("combination") * (V("shortsuffix") + V("nothing"))) / dimpus
895                        )
896                      * (V("qualifier") / ctx_unitsQ)^-1
897                      * V("somespace"),
898        operator      = V("somespace")
899                      * ((V("longoperator") + V("shortoperator")) / dimop)
900                      * V("somespace"),
901        snippet       = V("dimension")
902                      + V("somesymbol"),
903        unit          = (   V("snippet") * (V("operator") * V("snippet"))^0
904                          + V("somepackaged")
905                        )^1,
906    }
907
908    -- todo: avoid \ctx_unitsNstart\ctx_unitsNstop (weird that it can happen .. now catched at tex end)
909
910    local letter = R("az","AZ")
911    local bound  = #(1-letter)
912 -- local number = lpeg.patterns.number
913    local number = Cs( P("$")     * (1-P("$"))^1 * P("$")
914                     + P([[\m{]]) * (1-P("}"))^1 * P("}")
915                     + (1-letter-P(" "))^1 -- todo: catch { } -- not ok
916                   ) / ctx_unitsN
917
918    local start   = Cc(nil) / ctx_unitsNstart
919    local stop    = Cc(nil) / ctx_unitsNstop
920    local space   = P(" ") * Cc(nil) / ctx_unitsNspace
921    local open    = P("(") * Cc(nil) / ctx_unitsPopen
922    local close   = P(")") * Cc(nil) / ctx_unitsPclose
923
924    local range   = somespace
925                  * ( (P("±") + P("pm") * bound) / "" / ctx_unitsRPM
926                    + (P("") + P("to") * bound) / "" / ctx_unitsRTO )
927                  * somespace
928
929    local about   = (P("±") + P("pm") * bound) / "" / ctx_unitsRabout
930                  * somespace
931
932    -- todo: start / stop
933
934    local function combine(parser)
935        return P { "start",
936            number  = start * dleader * (parser + number) * stop,
937            anumber = space
938                    * open
939                    * V("about")^-1
940                    * V("number")
941                    * close,
942            rule    = V("number")^-1
943                    * (V("range") * V("number") + V("anumber"))^-1,
944            unit    = unitparser,
945            about   = about,
946            range   = range,
947            space   = space,
948            start   = V("rule")
949                    * V("unit")
950                    * (V("space") * V("rule") * V("unit"))^0
951                    + open
952                    * V("number")
953                    * (V("range") * V("number"))^-1
954                    * close
955                    * dtrailer^-1
956                    * V("unit")
957                    + V("number")
958        }
959    end
960
961    return combine(p_c_dparser), combine(c_p_dparser)
962end
963
964local p_c_parser_lowercase = nil
965local c_p_parser_lowercase = nil
966local p_c_parser_keepcase  = nil
967local c_p_parser_keepcase  = nil
968
969local dirty_lowercase = true
970local dirty_keepcase  = true
971
972local v_reverse = interfaces.variables.reverse
973local v_keep    = interfaces.variables.keep
974
975local function makeunit(order,option,str)
976    local reverse = order  == v_reverse
977    local keep    = option == v_keep
978    local parser
979    if keep then
980        if dirty_keepcase then
981            if trace_units then
982                report_units("initializing case %s parser","sensititive")
983            end
984            p_c_parser_keepcase, c_p_parser_keepcase = update_parsers(true)
985            dirty_keepcase = false
986        end
987        parser = reverse and p_c_parser_keepcase or c_p_parser_keepcase
988    else
989        if dirty_lowercase then
990            if trace_units then
991                report_units("initializing case %s parser","insensititive")
992            end
993            p_c_parser_lowercase, c_p_parser_lowercase = update_parsers()
994            dirty_lowercase = false
995        end
996        parser = reverse and p_c_parser_lowercase or c_p_parser_lowercase
997    end
998    if not matchlpeg(parser,str) then
999        report_units("unable to parse: %s",str)
1000        context(str)
1001    end
1002end
1003
1004local function trigger(t,k,v)
1005    rawset(t,k,v)
1006    dirty_lowercase = true
1007    dirty_keepcase  = true
1008end
1009
1010local t_units = {
1011    prefixes  = setmetatablenewindex(long_prefixes,trigger),
1012    units     = setmetatablenewindex(long_units,trigger),
1013    operators = setmetatablenewindex(long_operators,trigger),
1014    suffixes  = setmetatablenewindex(long_suffixes,trigger),
1015    symbols   = setmetatablenewindex(symbol_units,trigger),
1016    packaged  = setmetatablenewindex(packaged_units,trigger),
1017}
1018
1019local t_shortcuts = {
1020    prefixes  = setmetatablenewindex(short_prefixes,trigger),
1021    units     = setmetatablenewindex(short_units,trigger),
1022    operators = setmetatablenewindex(short_operators,trigger),
1023    suffixes  = setmetatablenewindex(short_suffixes,trigger),
1024}
1025
1026local t_userunits = {
1027    prefixes  = setmetatablenewindex(user_long_prefixes,trigger),
1028    units     = setmetatablenewindex(user_long_units,trigger),
1029    operators = setmetatablenewindex(user_long_operators,trigger),
1030    suffixes  = setmetatablenewindex(user_long_suffixes,trigger),
1031    symbols   = setmetatablenewindex(user_symbol_units,trigger),
1032    packaged  = setmetatablenewindex(user_packaged_units,trigger),
1033}
1034
1035local t_usershortcuts = {
1036    prefixes  = setmetatablenewindex(user_short_prefixes,trigger),
1037    units     = setmetatablenewindex(user_short_units,trigger),
1038    operators = setmetatablenewindex(user_short_operators,trigger),
1039    suffixes  = setmetatablenewindex(user_short_suffixes,trigger),
1040}
1041
1042physics.units.tables = allocate {
1043    units         = t_units,
1044    shortcuts     = t_shortcuts,
1045    userunits     = t_userunits,
1046    usershortcuts = t_usershortcuts,
1047}
1048
1049local mapping = {
1050    prefix   = "prefixes",
1051    unit     = "units",
1052    operator = "operators",
1053    suffix   = "suffixes",
1054    symbol   = "symbols",
1055    packaged = "packaged",
1056}
1057
1058local function register(category,list,target)
1059    if not list or list == "" then
1060        list = category
1061        category = "unit"
1062    end
1063    local t = target[mapping[category]]
1064    if t then
1065        for k, v in next, utilities.parsers.settings_to_hash(list or "") do
1066            t[k] = v
1067        end
1068    end
1069 -- inspect(tables)
1070end
1071
1072local function registerunit    (category,list) register(category,list,t_userunits) end
1073local function registershortcut(category,list) register(category,list,t_usershortcuts) end
1074
1075physics.units.registerunit     = registerunit
1076physics.units.registershortcut = registershortcut
1077
1078implement {
1079    name      = "digits_normal",
1080    actions   = makedigits,
1081    arguments = "string",
1082}
1083
1084implement {
1085    name      = "digits_reverse",
1086    actions   = makedigits,
1087    arguments = { "string", true },
1088}
1089
1090implement {
1091    name      = "unit",
1092    actions   = makeunit,
1093    arguments = "3 strings"
1094}
1095
1096implement {
1097    name      = "registerunit",
1098    actions   = registerunit,
1099    arguments = "2 strings",
1100}
1101
1102implement {
1103    name      = "registerunitshortcut",
1104    actions   = registershortcut,
1105    arguments = "2 strings",
1106}
1107
1108implement {
1109    name      = "hyphenateddigits",
1110    public    = true,
1111    protected = true,
1112    arguments = { "optional", "string" },
1113    actions   = function(filler, digits)
1114        digits = gsub(digits,"(%d)","%1\\digitsbreak ") -- space needed for following letters
1115        digits = gsub(digits,"\\-$",filler)
1116        context(digits)
1117    end
1118}
1119