#] #] ********************* #] loaddefs link d_Qndfs 'dictionaries.ndf' - here, dictionaries are actually sorted lists of words #] this is very simplistic, as the pdftotext conversion is good, not like mishmashed columnar format! # www.BillHowell.ca ??date?? initial # view this file in a text editor, with [constant width font, tab = 3 spaces], no line-wrap # 26Jun2021 "word list" is the phrase I should use, rather than "dictionary" f_load := 'dictionaries.ndf' ; loaddefs_start f_load ; #**************************** # List of operators, generated with : # $ grep "^#]" "$d_Qndfs""dictionaries.ndf" | sed 's/^#\]/ /' # ********************* loaddefs link d_Qndfs 'dictionaries.ndf' - here, dictionaries are actually sorted lists of words this is very simplistic, as the pdftotext conversion is good, not like mishmashed columnar format! +-----+ [create, process] dictionaries pTxt_make_pWrd IS OP pTxt pWrd - generate a sorted list of words from a text file pTxt_make_pWrd_test IS - res ipsa loquitor pDicInn_removeApoLines_pDicOut IS OP pDicInn pDicOut - remove lines with apostrophes (apos) pDicL_merge_pDic IS OP pDicL pdic - [cat, sort unique] multiple dictionaries pDicL_merge_pDic_test IS - res ipsa loquitor pDwd_pDic_extract_pDif IS OP pDwd pDic pDif - create a specialized dictionary not in old dictionaries +-----+ Dictionary.com wordList (dictionary) DictionaryCom_create_pDicCom IS - rip [ante, post]cedent, word, hase, acronym]s from Dictionary.com use this to call up definition page from Dictionary.com pDicCom_clean IS - testbed for cleanup of downloaded DictionaryCom pDicCom_pDicLinux_diff IS - testbed for cleanup of downloaded DictionaryCom +-----+ Vaccine wordList (dictionary) The approach here is manual cy-paste from urls, then build vaccineDic dicPathL_create_pDicVaccine IS - rip [ante, post]cedent, word, phrase, acronym]s from Dictionary.com pVaccine_pDicLinux_diff IS - testbed for cleanup of downloaded DictionaryCom +-----+ Combined list of words (dictionary) pathL_create_pDicAll IS - [select, combine] dictionaries +-----+ repair p_text using dictionaries (eg pdf files), kind of like a spell check pTxt_pDic_fixFrags IS OP ptxt pdic - pClean_replace_pFragL_pSubFragL IS OP pFragL pSubFragL - file read of variables #*********** # Setup (kind of like header info) # header file usually loadded by 'email analysis - Fauci corona virus.ndf' : # loaddefs link d_Qndfs 'email analysis - Fauci corona virus header.ndf' ; p_dicUSA_Linux := '/usr/share/dict/american-english' ; p_dicUSA_noApos := link d_Qndfs 'dictionary Linux american-english noApos.txt' ; path_backupDatedTo_zArchive (link d_Qndfs 'dictionaries.ndf') ; #***************************** #] +-----+ #] [create, process] dictionaries IF flag_debug THEN write '+-----+' ; ENDIF ; IF flag_debug THEN write '[create, process] dictionaries' ; ENDIF ; # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pTxt_make_pWrd' ; ENDIF ; #] pTxt_make_pWrd IS OP pTxt pWrd - generate a sorted list of words from a text file pTxt_make_pWrd IS OP pTxt pWrd { IF (NAND (EACH path_exists ("p_old pTxt) ("p_new pWrd))) THEN EACH write '?pTxt_make_pWrd error, file unknown, one of : ' pTxt pWrd ; ELSE host link 'grep -o -E ' chr_apo '\w+' chr_apo ' "' ptxt '" | grep --invert-match "[0-9]\+" | sort -u >"' pwrd '"' ; ENDIF ; } # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pTxt_make_pWrd_test' ; ENDIF ; #] pTxt_make_pWrd_test IS - res ipsa loquitor pTxt_make_pWrd_test IS { LOCAL p_1stClean pWrd ; NONLOCAL d_temp ; p_1stClean := link d_temp '1stClean temp.txt' ; pwrd := link d_temp 'extract_pFragsAndSubs dicWrds.txt' ; pTxt_make_pWrd p_1stClean pWrd ; } # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pDicInn_removeApoLines_pDicOut' ; ENDIF ; #] pDicInn_removeApoLines_pDicOut IS OP pDicInn pDicOut - remove lines with apostrophes (apos) # pDicInn is typically a Linux-style dictionary that denotes nouns by appending 's # usually only has to be run once, result saved to d_Qndfs for use in context of many dictionaries # 22Jun2021 initial pDicInn_removeApoLines_pDicOut IS OP pDicInn pDicOut { IF (NAND (EACH path_exists ("p_old pDicInn) ("p_new pDicOut))) THEN EACH write '?pDicInn_removeApoLines_pDicOut error, file unknown, one of : ' pDicInn pDicOut ; ELSE host link 'grep --invert-match "' chr_apo '" "' pDicInn '" >"' pDicOut '" ' ; ENDIF ; } # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pDicL_merge_pDic' ; ENDIF ; #] pDicL_merge_pDic IS OP pDicL pdic - [cat, sort unique] multiple dictionaries pDicL_merge_pDic IS OP pDicL pdic { LOCAL strQuotedListOfDicL ; IF (NAND (AND (EACHRIGHT path_exists "p_old pDicL)) (path_exists "p_new pdic)) THEN write '?pDicL_merge_pDic error, file unknown, one of : ' ; EACH write pDicL ; EACH write pdic '' ; ELSE strQuotedListOfDicL := strL_eachQuoted_strOut pDicL ; host link 'cat ' strQuotedListOfDicL ' | sort -u >>"' pdic '" ' ; ENDIF ; } # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pDicL_merge_pDic_test' ; ENDIF ; #] pDicL_merge_pDic_test IS - res ipsa loquitor pDicL_merge_pDic_test IS { LOCAL pdic pDicL ; NONLOCAL d_Fauci d_temp ; pDicL := (link d_temp 'extract_pFragsAndSubs dicWrds.txt') (link d_Fauci '7_Fauci dictionary.txt') ; pdic := link d_temp 'pDicL_merge_pDic_test result.txt' ; pDicL_merge_pDic pDicL pdic ; } # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pDwd_pDic_extract_pDif' ; ENDIF ; #] pDwd_pDic_extract_pDif IS OP pDwd pDic pDif - create a specialized dictionary not in old dictionaries # should be [automatic, manu]ally cleaned up (I'm too lazy - huge job!) pWrd_pDic_extract_pDif IS OP pWrd pDic pDif { IF (NAND (EACH path_exists ("p_old pWrd) ("p_old pdic) ("p_new pdif))) THEN EACH write '?pWrd_pDic_extract_pDif error, file unknown, one of : ' pWrd pdic pdif ; ELSE host link 'diff "' pdic '" "' pWrd '" | grep "^>" | grep --invert-match "[_]\+" | sed "s/^> //" | sort -u -f >"' pdif '"' ; ENDIF ; } #***************************** #] +-----+ #] Dictionary.com wordList (dictionary) IF flag_debug THEN write '+-----+' ; ENDIF ; IF flag_debug THEN write 'Dictionary.com wordList (dictionary)' ; ENDIF ; # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading DictionaryCom_create_pDicCom' ; ENDIF ; #] DictionaryCom_create_pDicCom IS - rip [ante, post]cedent, word, hase, acronym]s from Dictionary.com #] use this to call up definition page from Dictionary.com 28Jun2021 see also : 'wmctrl notes.txt' link d_Qndfs 'windows [open, close, ID, title], [get,set,move] cart [posn,size].ndf' ; link d_Qndfs 'Linux signals applications non-window.ndf' ; link d_Qndfs 'economics, markets/options data [download, process].ndf' # WARNING make sure that [webPage, text file]s are UNIQUE before running!!! # WARNING make sure that ALL Dictionary.com [webPage, text file]s are CLOSED before running!!! DictionaryCom_create_pDicCom IS { LOCAL i_page letters nPages pDicCom p_temp1 p_temp2 url_dicCom url_dicCom_base winName ; % ; url_dicCom_base := 'https://www.dictionary.com/list/' ; winName := 'Browse Dictionary' ; d_dic := link d_Qndfs 'dictionaries/' ; pDicCom := link d_dic 'Dictionary.com.txt' ; p_temp1 := link d_temp 'Dic temp1.txt' ; p_temp2 := link d_temp 'Dic temp2.txt' ; % ; IF (NAND (EACH path_exists ("p_new pDicCom) ("p_new p_temp1))) THEN EACH write '?DictionaryCom_create_pDicCom error, file unknown : ' pDicCom ; ELSE % rip text of webPages ; letters := '0' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' ; nPages := 3 49 46 67 36 27 29 27 32 25 8 11 28 43 20 19 59 4 29 ; letters := link letters 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' ; nPages := link nPages 76 39 13 12 19 1 3 3 ; write link 'sum NPages : ' (string (sum nPages)) ; % write mix letters nPages ; host link 'echo " " >"' pDicCom '"' ; FOR letter WITH letters DO FOR i_page WITH (tell ((find_Howell letter letters) pick npages)) DO write link 'letter : ' letter ', page : ' (string i_page) ; url_downloadTxtTo_path (link url_dicCom_base letter '/' (string i_page)) 'Browse Dictionary' p_temp1 ; host link 'cat "' pDicCom '" "' p_temp1 '" >"' p_temp2 '"' ; host link 'mv "' p_temp2 '" "' pDicCom '"' ; ENDFOR ; ENDFOR ; ENDIF ; } # qnial> path_extract_fname '/media/bill/ramdisk/Dictionary.com temp1.txt' Dictionary.com temp1.txt >> OK # full dictionary letters := 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' ; nPages := 67 36 27 29 27 32 25 8 11 28 43 20 19 59 4 29 ; letters := link letters 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' ; nPages := link nPages 76 39 13 12 19 1 3 3 ; # small test set letters := ['0'] ; nPages := [ 3 ] ; # done '0' 'a' 'b' 3 49 46 # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pDicCom_clean' ; ENDIF ; #] pDicCom_clean IS - testbed for cleanup of downloaded DictionaryCom # 28Jun2021 initial pDicCom_clean IS { LOCAL d_dic finn fout p_temp1 p_temp2 pDicCom line ; NONLOCAL d_Qndfs d_temp ; % ; d_dic := link d_Qndfs 'dictionaries/' ; % pDicCom := link d_dic 'Dictionary.com test clean.txt' ; pDicCom := link d_dic 'Dictionary.com.txt' ; pDicCln := link d_dic 'Dictionary.com clean.txt' ; p_temp1 := link d_temp 'Dic temp1.txt' ; p_temp2 := link d_temp 'Dic temp2.txt' ; % ; % remove [head, foot]er of webPages ; finn := open pDicCom "r ; fout := open p_temp1 "w ; WHILE (NOT isfault (line := readfile finn)) DO IF (line = ' DICTIONARY.COM') THEN WHILE (NOT isfault (line := readfile finn)) DO % write line ; IF (line = ' z') THEN EXIT 'done' ; ENDIF ; ENDWHILE ; ELSEIF (OR (line EACHRIGHT = 'Tired of Typos? Get Help Now!' (link 'See Today' chr_apo 's Synonym') 'Book Your Online Tutor Now' ) ) THEN WHILE (NOT isfault (line := readfile finn)) DO % write line ; IF (line = '© 2021 Dictionary.com, LLC') THEN EXIT 'done' ; ENDIF ; ENDWHILE ; ELSEIF (str_isBlank line) THEN null ; ELSE writefile fout line ; ENDIF ; ENDWHILE ; EACH close finn fout ; % ; % remove leading spaces, cull [??[pre, ante]fix??, phrase, startHyphen, period, apo]s, blanklines, sortup ; host link 'sed "s/^ //;s/^-\(.*\)//;s/\(.*\) \(.*\)//g;s/“//;s/”//;s/’//" "' p_temp1 '" >"' p_temp2 '"' ; host link 'sed s/\' chr_apo '/zzz/ "' p_temp2 '" | sed -e "s/\./zzz/" | grep --invert-match "zzz" >"' p_temp1 '"' ; host link 'grep "\S" "' p_temp1 '" >"' p_temp2 '"' ; host link 'sort -u "' p_temp2 '" >"' p_temp1 '"' ; % Unix sort is NOT the same as QNial sort, need QNial for searches, but Unix faster & helps QNial ; pinn_sortupTo_pout p_temp1 pDicCln ; } # host link 'sed s/\' chr_apo '/zzz/ "' (link d_Qndfs 'dictionaries/Dictionary.com.txt') '" | sed -e "s/\./zzz/" | grep --invert-match "zzz" >"' (link d_temp 'Dic temp1.txt') '"' # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pDicCom_pDicLinux_diff' ; ENDIF ; #] pDicCom_pDicLinux_diff IS - testbed for cleanup of downloaded DictionaryCom # 28Jun2021 initial pDicCom_pDicLinux_diff IS { LOCAL d_dic pDicCom pDiff pLinux ; NONLOCAL d_Qndfs ; % ; d_dic := link d_Qndfs 'dictionaries/' ; pDicCom := link d_dic 'Dictionary.com clean.txt' ; pLinux := link d_dic 'dictionary Linux american-english noApos.txt' ; pDiff := link d_temp 'pDicCom_pDicLinux_diff temp.txt' ; % ; EACH write d_dic d_Qndfs pDicCom pDiff pLinux ; host link 'diff "' pLinux '" "' pDicCom '" --suppress-common-lines >"' pDiff '"' ; } # diff "/media/bill/Dell2/Website - raw/Qnial/MY_NDFS/dictionaries/dictionary Linux american-english noApos.txt" "/media/bill/Dell2/Website - raw/Qnial/MY_NDFS/dictionaries/Dictionary.com clean.txt" --suppress-common-lines >"/media/bill/ramdisk/pDicCom_pDicLinux_diff temp.txt" #***************************** #] +-----+ #] Vaccine wordList (dictionary) #] The approach here is manual cy-paste from urls, then build vaccineDic IF flag_debug THEN write '+-----+' ; ENDIF ; IF flag_debug THEN write 'Vaccine wordList (dictionary)' ; ENDIF ; # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading dicPathL_create_pDicVaccine' ; ENDIF ; #] dicPathL_create_pDicVaccine IS - rip [ante, post]cedent, word, phrase, acronym]s from Dictionary.com # 29Jun2021 initial dicPathL_create_pDicVaccine IS { LOCAL d_dic pCDC pCDCWikip pWikipedia p_temp1 p_temp2 ; % ; d_dic := link d_Qndfs 'dictionaries/' ; pCDC := link d_dic 'vaccine CDC glossary.txt' ; pWikipedia := link d_dic 'vaccine wikipedia.txt' ; pCDCWikip := link d_dic 'vaccine CDC and wikipedia.txt' ; p_temp1 := link d_temp 'vaccine temp1.txt' ; p_temp2 := link d_temp 'vaccine temp2.txt' ; % ; IF (NAND (EACH path_exists ("p_old pCDC) ("p_old pWikipedia) ("p_new pCDCWikip))) THEN EACH write '?DictionaryCom_create_pVaccine error, file unknown : ' pCDC pCDCWikip pWikipedia ; ELSE host link 'cat "' pCDC '" "' pWikipedia '" >"' p_temp1 '"' ; host link 'grep -o -E ' chr_apo '\w+' chr_apo ' "' p_temp1 '" | grep --invert-match "_" | sed "s/[0-9]*//;s/^[A-Za-z]$//" | sort -u -f >"' p_temp2 '"' ; % Unix sort is NOT the same as QNial sort, need QNial for searches, but Unix faster & helps QNial ; pinn_sortupTo_pout p_temp2 pCDCWikip ; ENDIF ; } # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pVaccine_pDicLinux_diff' ; ENDIF ; #] pVaccine_pDicLinux_diff IS - testbed for cleanup of downloaded DictionaryCom # 29Jun2021 initial - incomplete, hasn't been tried yet pVaccine_pDicLinux_diff IS { LOCAL d_dic pVaccine pDiff pLinux ; NONLOCAL d_Qndfs ; % ; d_dic := link d_Qndfs 'dictionaries/' ; pVaccine := link d_dic 'Dictionary.com clean.txt' ; pLinux := link d_dic 'dictionary Linux american-english noApos.txt' ; pDiff := link d_temp 'pVaccine_pDicLinux_diff temp.txt' ; % ; EACH write d_dic d_Qndfs pVaccine pDiff pLinux ; host link 'diff "' pLinux '" "' pVaccine '" --suppress-common-lines >"' pDiff '"' ; } # diff "/media/bill/Dell2/Website - raw/Qnial/MY_NDFS/dictionaries/dictionary Linux american-english noApos.txt" "/media/bill/Dell2/Website - raw/Qnial/MY_NDFS/dictionaries/Dictionary.com clean.txt" --suppress-common-lines >"/media/bill/ramdisk/pVaccine_pDicLinux_diff temp.txt" #***************************** #] +-----+ #] Combined list of words (dictionary) IF flag_debug THEN write '+-----+' ; ENDIF ; IF flag_debug THEN write 'Combined list of words (dictionary)' ; ENDIF ; # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pathL_create_pDicAll' ; ENDIF ; #] pathL_create_pDicAll IS - [select, combine] dictionaries pathL_create_pDicAll IS { LOCAL cmd d_dic pDicAll pDicCom pLinux pVaccine p_temp1 p_temp2 ; % ; d_dic := link d_Qndfs 'dictionaries/' ; pLinux := link d_dic 'dictionary Linux american-english noApos.txt' ; pDicCom := link d_dic 'Dictionary.com clean.txt' ; pVaccine := link d_dic 'vaccine CDC and wikipedia.txt' ; pDicAll := link d_dic 'combined [Linux, Dictionary.com, vaccine [CDC, Wikipedia]].txt' ; p_temp1 := link d_temp 'vaccine temp1.txt' ; p_temp2 := link d_temp 'vaccine temp2.txt' ; % ; IF (NAND (EACH path_exists ("p_old pLinux) ("p_old pDicCom) ("p_old pVaccine) ("p_new pDicAll) ) ) THEN EACH write '?pathL_create_pDicAll error, one of files unknown : ' pDicAll pDicCom pLinux pVaccine ; ELSE host link 'cat "' pLinux '" "' pDicCom '" "' pVaccine '" | sort -u -f >"' p_temp1 '"' ; % Unix sort is NOT the same as QNial sort, need QNial for searches, but Unix faster & helps QNial ; pinn_sortupTo_pout p_temp1 pDicAll ; ENDIF ; } #***************************** #] +-----+ #] repair p_text using dictionaries (eg pdf files), kind of like a spell check IF flag_debug THEN write '+-----+' ; ENDIF ; IF flag_debug THEN write 'repair p_text using dictionaries (eg pdf files), kind of like a spell check' ; ENDIF ; # loaddefs link d_Qndfs 'dictionaries.ndf' IF flag_debug THEN write 'loading pTxt_pDic_fixFrags' ; ENDIF ; #] pTxt_pDic_fixFrags IS OP ptxt pdic - pdic = dictionary of words, merged from [standard Linux words, other dictionaries], always sortuped ptxt = text version of emailPdf pfr[*] = various stages of sed markups pgp[*] = various stages of grep extractions to files of sequential frags pwrd = sorted list of words from ptxt # 01Jul2021 initial, ripped up earlier version : see 'dictionaries - how NOT to process.ndf' (started 19Jun2021) pTxt_pDic_fixFrags IS OP ptxt pdic { LOCAL finn fout fragL frags i j line not_empty str strLenL strL pfrg pfr1 pfr2 pfr3 pfr4 pfr5 pgp1 pgp2 pgp3 pgp4 pgp5 pwrd n_goodFragL goodfragL goodSubFragL ; NONLOCAL d_temp ; % ; pfrg := link d_temp 'frags.txt' ; pfr1 := link d_temp 'frag1.txt' ; pfr2 := link d_temp 'frag2.txt' ; pfr3 := link d_temp 'frag3.txt' ; pfr4 := link d_temp 'frag4.txt' ; pfr5 := link d_temp 'frag5.txt' ; pgp1 := link d_temp 'grep1.txt' ; pgp2 := link d_temp 'grep2.txt' ; pgp3 := link d_temp 'grep3.txt' ; pgp4 := link d_temp 'grep4.txt' ; pgp5 := link d_temp 'grep5.txt' ; pmrk := link d_temp 'emails - markup frags.txt' ; ptm1 := link d_temp 'temp1.txt' ; ptm2 := link d_temp 'temp2.txt' ; ptx1 := link d_temp 'emails - chain frags.txt' ; pwrd := link d_temp 'words.txt' ; % ; IF (NAND (EACH path_exists ("p_old ptxt) ("p_old pdic) )) THEN EACH write '?pTxt_pDic_fixFrags error, file unknown, one of : ' pdic ptxt '' ; ELSE write link timestamp_YYMMDD_HMS '-> generate a sorted list of words from the text' ; % couldn't get to work directly : ; % grep -o -E '\w+' "/media/bill/ramdisk/1stClean temp.txt" | grep --invert-match "[0-9]\+" | sort -u >"/media/bill/ramdisk/words.txt" ; % output to pwrd ; host 'bash "$d_bin""emails - convert pdfCompilation to text, list all words.sh" ' ; % ; % I need to deal with capitalized first letter of each sentence ; % ; write link timestamp_YYMMDD_HMS '-> diff pdic and pwrd, sort to pfrg' ; % find lines of SORTED pwrd not in pdic, 29Jun2021 cd "[_]\+" to "_", might be a problem ; % 29Jun2021 (NYET? later) to reduce [data, computation]s, remove pdic elements to length >1 ? ; % | grep --invert-match "^[A-Za-z]$" ; host link 'diff "' pdic '" "' pwrd '" | grep "^>" | sed "s/^> //" | sort -u -f >"' pfrg '"' ; fragL := pinn_read_strL pfrg ; write '(gage shape fragL) = ' (gage shape fragL) ; % ; write link timestamp_YYMMDD_HMS '-> markup all "frag"s in ptxt with "(|frag|)"' ; % excessively long, not optimized! could use split multi-greps for ~10,000 frags? ; host link 'cp -p "' ptxt '" "' ptm1 '"' ; FOR frag WITH fragL DO host link 'sed "s/ ' frag ' / (|' frag '|) /g" "' ptm1 '" >"' ptm2 '"' ; host link 'mv "' ptm2 '" "' ptm1 '"' ; ENDFOR ; host link 'mv "' ptm1 '" "' pmrk '"' ; % ; write link timestamp_YYMMDD_HMS '-> sed frag sequences of (1-5) frags lengths' ; % 01Jul2021 currently will miss frags across \n, multiple (space, tab), tabs, etc) ; frag5 := ' (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) ' ; frag4 := ' (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) ' ; frag3 := ' (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) ' ; frag2 := ' (|\([A-Za-z0-9]*\)|) (|\([A-Za-z0-9]*\)|) ' ; frgs5 := ' (|\1|)(|\2|)(|\3|)(|\4|)(|\5|) ' ; frgs4 := ' (|\1|)(|\2|)(|\3|)(|\4|) ' ; frgs3 := ' (|\1|)(|\2|)(|\3|) ' ; frgs2 := ' (|\1|)(|\2|) ' ; host link 'sed "s/' frag5 '/' frgs5 '/" "' pmrk '" >"' pfr5 '"' ; host link 'sed "s/' frag4 '/' frgs4 '/" "' pfr5 '" >"' pfr4 '"' ; host link 'sed "s/' frag3 '/' frgs3 '/" "' pfr4 '" >"' pfr3 '"' ; host link 'sed "s/' frag2 '/' frgs2 '/" "' pfr3 '" >"' ptx1 '"' ; % ; write link timestamp_YYMMDD_HMS '-> grep frag sequences of (1-5) frags lengths' ; host link 'grep " (|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|) " "' ptx1 '" "' pgp5 '"' ; host link 'grep " (|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|) " "' ptx1 '" "' pgp4 '"' ; host link 'grep " (|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|) " "' ptx1 '" "' pgp3 '"' ; host link 'grep " (|[A-Za-z0-9]*|)(|[A-Za-z0-9]*|) " "' ptx1 '" "' pgp2 '"' ; ENDIF ; } # markup_test IS { str := host link 'sed "s/ \(^[|]\+\)' frag '\(^[|)]\+\) / (|' frag '|) /g" "' ptm1 '" >"' ptm2 '"' ; } # $ echo "bother but I wanted to not ify the Coronavirus Task force about potential blood shortages if blood" | sed "s/ not / (|not|) /g" # $ echo "bother but I wanted to (|not|) ify the Coronavirus Task force about potential blood shortages if blood" | sed "s/ not / (|not|) /g" # $ echo "bother but I wanted to not ify the Coronavirus Task force about potential blood shortages if blood" | sed "s/ \(^[(|]*\)not\(^[|)]*\) / (|not|) /g" # later fragL := strList_readFrom_path pfrg ; % ; % 01Jul2021 this will allow easy insertion of higher-order linguistics : [context, grammar, etc] ; words in_dic EACHLEFT := (5 reshape [null]) ; % ; write link timestamp_YYMMDD_HMS '-> process "[||]"s, ' ; % 01Jul2021 this will allow easy insertion of higher-order linguistics : [context, grammar, etc] ; words in_dic EACHLEFT := (5 reshape [null]) ; % ; write link timestamp_YYMMDD_HMS '-> strL_write_pout [, sub]FragL' ; % getconf LINE_MAX maximum length of lines for files (24Jun2021 2048 bytes for my LMDE) ; maxStrLenForFiles := 2048 ; write 'strLen_max = ' (strLen_max := max (EACH (gage shape) goodSubFragL)) ; IF (maxStrLenForFiles < strLen_max) THEN write '?pTxt_pDic_extract_pfrag_pFragSubs error : maxStrLenForFiles ~< (max (EACH (gage shape) goodSubFragL) : ' maxStrLenForFiles strLen_max ; ELSE write link timestamp_YYMMDD_HMS '-> strL_write_pout' ; fragL subFragL := goodfragL goodSubFragL ; strSubFragL := EACH listOfStrL_to_strExecuteMirror subFragL ; strL_write_pout FragL pfrg ; strL_write_pout SubFragL psub ; ENDIF ; # '<<:| ' = (6 take '<<:| µ') # tonumber first (host_result link 'wc -l "' (link d_temp 'extract_pFragsAndSubs raweFrags.txt') '" | sed "s/^\([0-9]\+\)\(.*\)/\1/" ') # write (2 3 reshape ('fragName' 'n_frags' 'n_subFrags' 'rawe' n_raweFlagL ((gage shape) raweSupFragL) ) ; # 10 take raweFragL # post (10 take raweSupFragL) # EACH write (rawefragL@10000) (raweSupFragL@10000) #**************************** # Instructions # loaddefs_ended f_load ; # enddoc