#] #] ********************* #] loaddefs link d_Qndfs 'numberBase conversions.ndf' # www.BillHowell.ca # 25May2018 this only applies to positive [ints,hexes] # 26May2018 convert to signed (which is what QNial shows) # 26May2018 unwind_recurPair should go into a general file on array operators ... # 27Jun2018 transferred to its own .ndf file # 08Feb2021 added [TOC, loading] markers f_numberBaseConversions := 'numberBase conversions.ndf' ; loaddefs_start f_numberBaseConversions ; #**************************** # List of operators, generated with : # $ cat "$d_Qndfs""numberBase conversions.ndf" | grep "^#]" | sed 's/^#\]/ /' # ********************* loaddefs link d_Qndfs 'numberBase conversions.ndf' baseChr_to_decInt IS OP baseChr - decInt_to_baseChr IS OP a - basePre_to_basePwr IS OP a - baseStr_to_basePwr IS OP a - baseStr_to_baseChrs IS OP a - basePre_baseChrs IS OP a - baseStr_to_basePwrSeq IS OP baseStr - wind_dwn_recurPair IS OP a - baseStr_to_decIntUnSigned IS OP baseStr - decInt_to_baseChrs_raw IS OP pwr decInt - decInt_to_baseStr IS OP decInt basePre - decInt_to_baseStr_len IS OP decInt basePre len - baseStr_to_baseStr_len IS OP baseStr basePre len - baseStr_to_decIntSigned_len IS OP baseStr len - baseStr_to_decIntSigned IS OP baseStr - baseStr_to_decIntSigned_time IS - build_hexTable IS - #*************************** # numChr_conversion IS OP base1Chrs base2Chrs base1Chr - convert signed 2-byte integer to hex character # numStr_conversion IS OP base1Chrs base2Chrs base1Str - convert signed 2-byte integer to hex string # int_to_hexStr IS OP int hexSize - convert signed integer (any magnitude) to hex string # numChr_conversion2 IS OP chr - convert hex character to signed 2-byte integer notation # hexStr_to_int IS OP str - # unwind_recurPair IS OP a - pick out (first of pair) only down a pair-recursive array, save to global wind_upp_recurPair # hex_chr = nibble (4 bits) # treat all [oct,dec,hex] ints as signed baseChrsALL := '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' 'Α' 'Β' 'Γ' 'Δ' 'Ε' 'Ζ' 'Η' 'Θ' 'Ι' 'Κ' 'Λ' 'Μ' 'Ν' 'Ξ' 'Ο' 'Π' 'Ρ' 'Σ' 'Σ' 'Τ' 'Υ' 'Φ' 'Χ' 'Ψ' 'Ω' 'α' 'β' 'γ' 'δ' 'ε' 'ζ' 'η' 'θ' 'ι' 'κ' 'λ' 'μ' 'ν' 'ξ' 'ο' 'π' 'ρ' 'ς' 'σ' 'τ' 'υ' 'φ' 'χ' 'ψ' 'ω' ; basePres := '0b' '0d' '0x' '0o' ; basePwrs := 2 10 16 8 ; baseMaxs := 1 10 15 7 ; baseMins := 0 -10 -16 -8 ; IF flag_debug THEN write 'loading baseChr_to_decInt' ; ENDIF ; #] baseChr_to_decInt IS OP baseChr - baseChr_to_decInt IS OP baseChr { find baseChr baseChrsALL } IF flag_debug THEN write 'loading decInt_to_baseChr' ; ENDIF ; #] decInt_to_baseChr IS OP a - decInt_to_baseChr IS OP decInt { baseChrsALL@decInt } IF flag_debug THEN write 'loading basePre_to_basePwr' ; ENDIF ; #] basePre_to_basePwr IS OP a - basePre_to_basePwr IS OP basePre { (find basePre basePres) choose basePwrs } IF flag_debug THEN write 'loading baseStr_to_basePwr' ; ENDIF ; #] baseStr_to_basePwr IS OP a - baseStr_to_basePwr IS OP baseStr { (find (baseStr#(0 1)) basePres) choose basePwrs } IF flag_debug THEN write 'loading baseStr_to_baseChrs' ; ENDIF ; #] baseStr_to_baseChrs IS OP a - baseStr_to_baseChrs IS OP baseStr { EACH solitary rest rest baseStr } IF flag_debug THEN write 'loading basePre_baseChrs' ; ENDIF ; #] basePre_baseChrs IS OP a - basePre_baseChrs IS OP baseStr { baseStr#(0 1) (rest rest baseStr) } IF flag_debug THEN write 'loading baseStr_to_basePwrSeq' ; ENDIF ; #] baseStr_to_basePwrSeq IS OP baseStr - baseStr_to_basePwrSeq IS OP baseStr { (baseStr_to_basePwr baseStr) EACHRIGHT power (reverse tell (gage shape baseStr - 2)) } # all calculations are done in decimal - handy & familiar # base[X]Str must be a list of strings, NOT a string!!! wind_upp_recurPair := null ; IF flag_debug THEN write 'loading wind_dwn_recurPair' ; ENDIF ; #] wind_dwn_recurPair IS OP a - wind_dwn_recurPair IS OP a { NONLOCAL wind_upp_recurPair ; IF (NOT isfault a) THEN wind_upp_recurPair := link (first a) wind_upp_recurPair ; wind_dwn_recurPair second a ; ENDIF } # defined in "setup.ndf" # floor_mod IS OP a b - convenient return of BOTH floor and mod! IF flag_debug THEN write 'loading baseStr_to_decIntUnSigned' ; ENDIF ; #] baseStr_to_decIntUnSigned IS OP baseStr - baseStr_to_decIntUnSigned IS OP baseStr { LOCAL basePre baseChrs decInts decSeq pwrSeq ; pwrSeq := baseStr_to_basePwrSeq baseStr ; decSeq := EACH baseChr_to_decInt (baseStr_to_baseChrs baseStr) ; decInts := pwrSeq EACHBOTH * decSeq ; sum decInts } # baseStr_to_decIntUnSigned '0b01111111' # EACH baseStr_to_decIntUnSigned '0x7F' '0x80' '0xFE' '0xFF' IF flag_debug THEN write 'loading decInt_to_baseChrs_raw' ; ENDIF ; #] decInt_to_baseChrs_raw IS OP pwr decInt - decInt_to_baseChrs_raw IS OP pwr decInt { LOCAL baseChrs div_flr div_mod result ; NONLOCAL wind_upp_recurPair ; div_flr div_mod := floor_mod decInt pwr ; wind_upp_recurPair := link wind_upp_recurPair div_mod ; IF (~= 0 div_flr) THEN decInt_to_baseChrs_raw pwr div_flr ; ELSE null ; ENDIF ; } # decInt_to_baseChrs_raw 10 5230 IF flag_debug THEN write 'loading decInt_to_baseStr' ; ENDIF ; #] decInt_to_baseStr IS OP decInt basePre - decInt_to_baseStr IS OP decInt basePre { LOCAL baseChrs div_flr div_mod result ; NONLOCAL wind_upp_recurPair ; wind_upp_recurPair := null ; pwr := basePre_to_basePwr basePre ; decInt_to_baseChrs_raw pwr decInt ; result := link basePre link baseChrsALL#(reverse wind_upp_recurPair) ; wind_upp_recurPair := null ; result } IF flag_debug THEN write 'loading decInt_to_baseStr_len' ; ENDIF ; #] decInt_to_baseStr_len IS OP decInt basePre len - decInt_to_baseStr_len IS OP decInt basePre len { LOCAL baseChrs div_flr div_mod result zeros ; NONLOCAL wind_upp_recurPair ; wind_upp_recurPair := null ; pwr := basePre_to_basePwr basePre ; decInt_to_baseChrs_raw pwr decInt ; zeros := len - (gage shape wind_upp_recurPair) ; IF (0 <= zeros) THEN result := link basePre (zeros reshape '0') link baseChrsALL#(reverse wind_upp_recurPair) ; ELSE result := fault '?decInt_to_baseStr_len ERROR : len too short for conversion' ; ENDIF ; wind_upp_recurPair := null ; result } IF flag_debug THEN write 'loading baseStr_to_baseStr_len' ; ENDIF ; #] baseStr_to_baseStr_len IS OP baseStr basePre len - baseStr_to_baseStr_len IS OP baseStr basePre len { LOCAL decInt ; decInt := baseStr_to_decIntUnSigned baseStr ; decInt_to_baseStr_len decInt basePre len } # link baseStr_to_baseStr_len '0x7F' '0b' 8 # 01Jun2018 baseStr_to_decIntSigned_len is brutish - need [subtle, efficient] code # Also, at present it is ONLY applicable to 2-byte hex notation (8-bit binary) IF flag_debug THEN write 'loading baseStr_to_decIntSigned_len' ; ENDIF ; #] baseStr_to_decIntSigned_len IS OP baseStr len - baseStr_to_decIntSigned_len IS OP baseStr len { LOCAL binPre binSgn binDigits binStrUnsigned signer ; binStr := link baseStr_to_baseStr_len baseStr '0b' len ; binPre binSgn binDigits := [2 take, 2 pick, rest rest rest] binStr ; IF (binSgn = `0) THEN signer := 1 ; ELSE signer := -1 ; ENDIF ; binStrUnsigned := link link binPre binDigits ; signer * baseStr_to_decIntUnSigned binStrUnsigned } # sum ((2 2 2 2 2 2 2) EACHBOTH power (6 5 4 3 2 1 0)) # sum ((2 2 2 2 2 2 2 2) EACHBOTH power (7 6 5 4 3 2 1 0)) # baseStr_to_decIntUnSigned '0b1111111' # EACH baseStr_to_decIntSigned_len ('0x70' 8) ('0x7F' 8) ('0x80' 8) ('0x81' 8) ('0xFE' 8) ('0xFF' 8) ('0xEE' 8) ('0xEF' 8) ('0xAE' 8) ('0xAF' 8) ('0x8E' 8) ('0x8F' 8) # baseStr_to_decIntSigned IS OP baseStr - more efficient, general form IF flag_debug THEN write 'loading baseStr_to_decIntSigned' ; ENDIF ; #] baseStr_to_decIntSigned IS OP baseStr - baseStr_to_decIntSigned IS OP baseStr { LOCAL binPre binSgn binDigits binStrUnsigned signer ; basePre baseChrs := basePre_baseChrs baseStr ; baseSgnChr := string first baseChrs ; basePreIntNeg := baseChr_to_decInt baseSgnChr ; baseSgnChrNeg := ceiling (basePre_to_basePwr basePre / 2) ; IF (basePreIntNeg < baseSgnChrNeg) THEN signer := 1 ; ELSE signer := -1 ; baseChrs@0 := decInt_to_baseChr (basePreIntNeg - baseSgnChrNeg) ; ENDIF ; baseStrUnsigned := link link basePre baseChrs ; signer * baseStr_to_decIntUnSigned baseStrUnsigned } # EACH baseStr_to_decIntSigned '0x70' '0x7F' '0x80' '0x81' '0xFE' '0xFF' '0xEE' '0xEF' '0xAE' '0xAF' '0x8E' '0x8F' IF flag_debug THEN write 'loading baseStr_to_decIntSigned_time' ; ENDIF ; #] baseStr_to_decIntSigned_time IS - baseStr_to_decIntSigned_time IS { LOCAL t_start ; t_start := sec_from_timestamp ; FOR i WITH tell 100000 DO EACH baseStr_to_decIntSigned_len ('0x70' 8) ('0x7F' 8) ('0x80' 8) ('0x81' 8) ('0xFE' 8) ('0xFF' 8) ('0xEE' 8) ('0xEF' 8) ('0xAE' 8) ('0xAF' 8) ('0x8E' 8) ('0x8F' 8) ; ENDFOR ; write link 'baseStr_to_decIntSigned_len : ' (string hhmmss_elapsed t_start) ; t_start := sec_from_timestamp ; FOR i WITH tell 100000 DO EACH baseStr_to_decIntSigned '0x70' '0x7F' '0x80' '0x81' '0xFE' '0xFF' '0xEE' '0xEF' '0xAE' '0xAF' '0x8E' '0x8F' ; ENDFOR ; write link 'baseStr_to_decIntSigned : ' (string hhmmss_elapsed t_start) ; } # 01Jun2018 100,000 loops each qnial> baseStr_to_decIntSigned_time baseStr_to_decIntSigned_len : 00:00:46 baseStr_to_decIntSigned : 00:00:11 >> four-fold faster with simpler approach >> I'm sure it could still be improved a lot!! #************************** # build_hexTable IS # ?dates? # 01Jun2018 I am focussing ONLY on charsets that I know & use! # first byte -62 -> 2*2-byte sequences # first byte -120, # most of my unicodes are 3 byte, so the first conditional should use that assumption, then [2,4,1] # but if most unicodes are 3-byte, I might as well start assuming possibly 4-byte unicode ; IF flag_debug THEN write 'loading build_hexTable' ; ENDIF ; #] build_hexTable IS - build_hexTable IS { LOCAL hexTable ; hexTable := '' '' 'Signed' '-Int' 'Howell''s interpretation' 'hexLo' 'hexHi' 'decLo' 'decHi' ' ' '0x00' '0x7F' null null 'One-byte standard characters ' '0x80' '0x9F' null null 'First byte of a 3-byte character encoding (I assume because of [-18,-30]!!)' '0xA0' '0xDF' null null 'First byte of a 2-byte character encoding (I assume because of [-62]!!)' '0xE0' '0xEF' null null 'First byte of a 3-byte character encoding ' '0xF0' '0xF4' null null 'First byte of a 4-byte character encoding ' '0x80' '0xBF' null null 'Continuation bytes that follow first bytes, (can also be first byte)' ; tbl_cols := 5 ; tbl_rows := floor ((gage shape hexTable) / tbl_cols) ; hexTable := tbl_rows tbl_cols reshape hexTable ; FOR r WITH (2 + tell (tbl_rows - 2)) DO hexTable@(r 2) := baseStr_to_decIntSigned hexTable@(r 0) ; hexTable@(r 3) := baseStr_to_decIntSigned hexTable@(r 1) ; ENDFOR ; hexTable } #https://stackoverflow.com/questions/5290182/how-many-bytes-does-one-unicode-character-take #How many bytes does one Unicode character take? #Strangely enough, nobody pointed out how to calculate how many bytes is taking one Unicode char. Here is the rule for UTF-8 #encoded strings: #Binary Hex Comments #0xxxxxxx 0x00..0x7F Only byte of a 1-byte character encoding #10xxxxxx 0x80..0xBF Continuation bytes (1-3 continuation bytes) #110xxxxx 0xC0..0xDF First byte of a 2-byte character encoding #1110xxxx 0xE0..0xEF First byte of a 3-byte character encoding #11110xxx 0xF0..0xF4 First byte of a 4-byte character encoding #So the quick answer is: it takes 1 to 4 bytes, depending on the first one which will indicate how many bytes it'll take up. #As prewett pointed out, this rule only applies to UTF-8 #edited Nov 7 '16 at 6:51 #answered Oct 26 '15 at 15:38 #paul.ago # int_lo int_hi := EACH baseStr_to_decIntUnsigned '0x00' '0x7F' # EACH char (int_lo + tell (int_hi - int_lo)) # !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ # qnial> build_hexTable +-----+-----+------+-----+---------------------------------------------------------------------------+ | | |Signed|-Int |Howell's interpretation | +-----+-----+------+-----+---------------------------------------------------------------------------+ |hexLo|hexHi|decLo |decHi| | +-----+-----+------+-----+---------------------------------------------------------------------------+ |0x00 |0x7F | 0| 127|One-byte standard characters | +-----+-----+------+-----+---------------------------------------------------------------------------+ |0x80 |0x9F | 0| -31|First byte of a 3-byte character encoding (I assume because of [-18,-30]!!)| +-----+-----+------+-----+---------------------------------------------------------------------------+ |0xA0 |0xDF | -32| -95|First byte of a 2-byte character encoding (I assume because of [-62]!!) | +-----+-----+------+-----+---------------------------------------------------------------------------+ |0xE0 |0xEF | -96| -111|First byte of a 3-byte character encoding | +-----+-----+------+-----+---------------------------------------------------------------------------+ |0xF0 |0xF4 | -112| -116|First byte of a 4-byte character encoding | +-----+-----+------+-----+---------------------------------------------------------------------------+ |0x80 |0xBF | 0| -63|Continuation bytes that follow first bytes, (can also be first byte) | +-----+-----+------+-----+---------------------------------------------------------------------------+ # 01Jun2018 for detailed lists of charcodes : # see "/media/bill/SWAPPER/Qnial/MY_NDFS/strings- unicode chars by decInt combinations.txt" # 01Jun2018 A NEGATIVE direction means starting with the character str@i_str and going back (useful for substitutions) # loaddefs_ended f_numberBaseConversions ; # enddoc