# /media/bill/SWAPPER/Qnial/MY_NDFS/email Thunderbird - delete binary attachments.ndf
# simple work to remove binary files from Thunderbird mailboxes
# AFTER they have been saved as separate files
d_emailDelAttach := '/media/bill/SWAPPER/Qnial/MY_NDFS/' ;
f_emailDelAttach := 'email - extract, sort, cull addresses from text.ndf' ;
write link 'loading ' d_emailDelAttach f_emailDelAttach ;
lq_emailDelAttach IS loaddefs link d_emailDelAttach f_emailDelAttach
write 'loading /media/bill/SWAPPER/Qnial/MY_NDFS/email Thunderbird - delete binary attachments.ndf' ;
#*************************
# Status
# extracted with Attachment Extracted, or saved by "Save As"
Operation: Thunderbird_delete_binary_attachments
The operation is not foolproof. Definitions such as "f is g" would not be
found; nor would ones in which the items sought are on different lines.
Assumes that a filename includes path if not in nialroot, and that only one `. occurs!!
#*************************
# DEBUGGING tools
# Debugging - problem with trapping faults:
From: /media/bill/SWAPPER/Qnial/v6testing/0a_notebook.txt
7. Command line options
OK -s supresses fault triggering
# IF flag_writescreen THEN Writescreen routine ; ENDIF ;
routine := 'routine' ;
# IF flag_break THEN BREAK ; ENDIF ;
# IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
flag_writescreen := o ;
flag_break := l ; % Set this to track things or not at break point ;
#####################################################################################################
# SETUP, initial assignments
# Libraries (not needed here?...)
library "stripblk
library "findstri
IF not in "string_split_by_string (EACH first symbols 0) THEN
set "sketch;
loaddefs '/media/bill/SWAPPER/Qnial/MY_NDFS/strings.ndf' 0;
ENDIF;
# From - Tue Jun 02 09:01:50 2009
Month_convert IS OP Month_3name
{ Case phrase Month_3name FROM
"Jan : code := '01' ; END
"Feb : code := '02' ; END
"Mar : code := '03' ; END
"Apr : code := '04' ; END
"May : code := '05' ; END
"Jun : code := '06' ; END
"Jul : code := '07' ; END
"Aug : code := '08' ; END
"Sep : code := '09' ; END
"Oct : code := '10' ; END
"Nov : code := '11' ; END
"Dec : code := '12' ; END
ELSE code := fault 'ERROR Month_convert' ;
ENDCASE ;
code
}
#####################################################################################################
# Character codings
# uses MIME Decoder for PHP: http://www.php.net/manual/en/function.base64-decode.php
# MIME encode / decode
Base64 - http://www.fourmilab.ch/webtools/base64/
list contents first: tar -tvf base64-1.5.tar.gz
extract: tar -x base64-1.5.tar.gz
to run: base64 -d -i "/home/bill/Attachments - Thunderbird/Inbox/filename" "/home/bill/Attachments - Thunderbird/Binary/filename"
chrcd_apost := 39 ;
chrcd_comma := 44 ;
chrcd_CR := 13 ;
chrcd_dash := 45 ;
chrcd_LF := 10 ;
chrcd_period:= 46 ;
chrcd_slash := 47 ;
chrcd_space := 32 ;
chrcd_tab := 9 ;
chrcd_undscr:= 95 ;
# Linux & Windows filename constraints - avoid characters that will cause problems!
# Linux - prohibited -> /" Permitted - *\?<> etc (not many restrictions) also is case-sensitive
# Windows - prohibited -> \/:*?<> Permitted -
# Howell - prohibited -> \/:*?<> plus @%{|}~(127)
chrcd_pun1 := EACH charrep (` `! `# `$ `% `& `' `( `) `+ `, `- `.) ; %Punctuation 1 space !"#$%&'()*+,-./ CAREFUL - *"/ are NOT legitimate in filenames!! Cannot use 32 + tell ( 47 - 32 + 1) ;
chrcd_digt := 48 + tell ( 57 - 48 + 1) ; %ten digits ;
chrcd_pun2 := EACH charrep (`= `@) ; %Punctuation 2 =@ CAREFUL - : semi-colon ? `< `> are NOT legitimate in filenames!! Cannot use 91 + tell ( 96 - 91 + 1) Also <=>@ not at start of filename! ;
chrcd_caps := 65 + tell ( 90 - 65 + 1) ; %Capital letters ;
chrcd_pun3 := EACH charrep (`[ `] `^ `_) ; %Punctuation 2 []^_ CAREFUL - \ is NOT legitimate in filenames!! Cannot use 91 + tell ( 96 - 91 + 1) ;
chrcd_smal := 97 + tell (122 - 97 + 1) ; %Small letters ;
chrcd_pun4 := EACH charrep (`{ `} `~) ; %Punctuation 3 {}~ CAREFUL - | are NOT legitimate in filenames!! Cannot use 123 + tell (127 - 123 + 1) ;
chrcd_intl := 165 + tell (165 - 128 + 1) ; %International letters & symbols ;
chrcd_graf := 166 + tell (223 - 166 + 1) ; %Graphical and special symbols - Don't use for now ;
chrcd_grek := 224 + tell (238 - 224 + 1) ; %Greek & Latin - OK ;
chrcd_math := 239 + tell (254 - 239 + 1) ; %Mathematical - don't use for now ;
charcodes_for_filename := link chrcd_pun1 chrcd_digt chrcd_pun2 chrcd_caps chrcd_pun3 chrcd_smal chrcd_pun4 chrcd_intl chrcd_grek ;
chars_for_filename := EACH char charcodes_for_filename ;
charcodes_for_pathname := link chrcd_pun1 chrcd_digt chrcd_pun2 chrcd_caps chrcd_pun3 chrcd_slash chrcd_smal chrcd_pun4 chrcd_intl chrcd_grek ;
chars_for_pathname := EACH char charcodes_for_pathname ;
# Will there be a problem with the loaddefs of the `[space]?
char_CR := char chrcd_CR ;
char_LF := char chrcd_LF ;
list_LF := list char chrcd_space ;
char_space := char chrcd_space ;
char_tab := char chrcd_tab ;
# old- change from this
CR := char 13 ;
#####################################################################################################
# State descriptions and globals for this workspace "Thunderbird - delete binary attachments.NDF"
CONTENT_DESCRIPTION := 0 ;
CONTENT_DISPOSITION := 1 ;
CONTENT_ID := 2 ;
CONTENT_TRANSFER_ENCODING := 3 ;
CONTENT_TYPE := 4 ;
DATE_TAG := 5 ;
DASH_START := 6 ;
EMPTY_LIN := 7 ;
FROM_TAG := 8 ;
IN_HEADER := 9 ;
MESSAGE_ID := 10 ;
SUBJECT_TAG := 11 ;
TO_TAG := 12 ;
X_MSK := 13 ;
IN_HEADER := 99 ;
HDR_ONELINES := CONTENT_DESCRIPTION CONTENT_DISPOSITION CONTENT_ID MESSAGE_ID X_MSK IN_HEADER ;
FROM_ONELINES := CONTENT_DESCRIPTION CONTENT_DISPOSITION CONTENT_ID MESSAGE_ID X_MSK IN_HEADER ;
Attach_name := 'Attach_name none' ;
count_files_renamed := 0 ;
count_total_all := 0 ;
count_total_renamed := 0 ;
Date_cur := '100502' ;
dir_attchs := '/home/bill/Attachments - Thunderbird/' ;
dir_emails := '/home/bill/.mozilla-thunderbird/n4caryuo.default/Mail/Local Folders/' ;
dir_emails_old := 'dir_emails_old not set yet' ;
dir_emails_old_root := '/media/IOMEGA_HDD/Thunderbird backups/' ;
mailbox := 'mailbox is undefined' ;
dir_email := 'dir_email is undefined' ;
fault_eof := fault '?eof' ;
fault_ls := fault '?Invalid argument' ;
fault_noexpr := fault '?noexpr' ;
fil_log_nial := 'log_file_nial.txt' ; % logs effects of nial calls ;
fil_log_host := 'log_file_host.txt' ; % logs effects of host calls ;
attach_count := 0 ;
Fin := 0 ;
Fot := 0 ;
fot_log_nial := 0 ;
fot_log_host := 'nothing' ;
Fun := 0 ;
Fromer := 'Nobody' ;
Lins := 60 reshape solitary '' ;
Lin_c := 0 ;
line_count := 0 ;
mime_name := '' ;
pth_fileList := link dir_attchs 'pth_fileList.txt' ;
target_folder := '' ;
#####################################################################################################
#####################################################################################################
# emailer operators
#
Process_date IS OP Lin
{ NONLOCAL Date_cur ;
Year Month Day := (29 30) (11 12 13) (15 16) EACHLEFT choose Lin ;
Month := Month_convert Month ;
Date_cur := link Year Month Day ;
}
is_Odd IS OP a { (a mod 2) = 1 }
is_blankLin IS OP Str
{ NONLOCAL char_tab char_space char_CR fault_noexpr ;
AND OR (char_tab char_space char_CR fault_noexpr EACHLEFT EACHRIGHT match Str)
}
# was (each charrep Str)
isempty_Lin IS OP Lin
{ IF Lin = null THEN l
ELSE is_blankLin Lin
ENDIF
}
clean_text IS OP texter
{ NONLOCAL chars_for_filename ;
(texter EACHLEFT in chars_for_filename) sublist texter
}
clean_filename IS OP texter
{ NONLOCAL chars_for_filename ;
(texter EACHLEFT in chars_for_filename) sublist texter
}
clean_pathname IS OP texter
{ NONLOCAL chars_for_pathname ;
(texter EACHLEFT in chars_for_pathname) sublist texter
}
get_Fromer IS OP Lin
{ NONLOCAL Fromer ;
IF (last Lin = char_CR) THEN Lin := front Lin ; ENDIF ;
Fromer := ((gage shape Lin) - 6) takeright Lin ;
IF AND (`< `> EACHLEFT in Fromer)
THEN Fromer := front second string_split_by_string '<' Fromer ;
Fromer := rest first string_split_by_string '>' Fromer ;
ENDIF ;
Fromer := clean_filename Fromer ;
}
readBil IS
{ NONLOCAL Fin list_LF line_count ;
routine := 'readBil' ;
IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
rec := readfile Fin ;
line_count := line_count + 1 ;
IF (toupper (7 take rec) = 'FROM - ') THEN Process_date rec ; %Assumes not in dash_skip ;
ELSEIF (toupper (6 take rec) = 'FROM: ') THEN get_Fromer rec ; %Will use attachment From, not the email From ;
ENDIF ;
rec
}
Process_timestamp IS
{ Day Month Year Hour Minute := (8 9) (4 5 6) (22 23) (11 12) (14 15) EACHLEFT choose timestamp ;
IF (first Day = char 32) THEN Day := link `0 (second Day) ; ENDIF ;
Month := Month_convert Month ;
link ' ' Year Month Day ' ' Hour 'h' Minute
}
Find_attachName IS OP stringer
{ NONLOCAL Attach_name ;
splitter := string_split_by_string '"' stringer ;
IF gage shape splitter = 3
THEN Attach_name := rest first (1 choose splitter) ;
ELSE Attach_name := 'Attach_name none' ;
ENDIF ;
Attach_name := clean_filename Attach_name ;
}
dash_skip IS
{ NONLOCAL Attach_name Date_cur dir_emails dir_emails_old
Fat Fin Fot fot_log_host Fromer Fun line_count mime_name target_folder ;
routine := 'dash_skip' ;
IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
% ;
%writefile empty lines to Fot (not Fat!!) prior to binary code ;
%there may be a mistake here - empty lines should be written AFTER the hyperink below? ;
Record := readfile Fin ;
line_count := line_count + 1 ;
WHILE Record ~= fault_eof DO
IF NOT isempty_Lin Record THEN EXIT 0 ; ENDIF ;
writefile Fot Record ;
Record := readfile Fin ;
line_count := line_count + 1 ;
ENDWHILE ;
% ;
%writefile binary code to Fun (MIME coded) until empty line or line starting with a dash are encountered ;
Fun := open mime_name "w ;
WHILE Record ~= fault_eof DO
IF isempty_Lin Record THEN EXIT 0 ; ENDIF ;
IF Record@0 = `- THEN EXIT 0 ; ENDIF ;
writefile Fun Record ;
Record := readfile Fin ;
line_count := line_count + 1 ;
ENDWHILE ;
close Fun ;
% ;
%construct a name and open final binary output file ;
%Jun10 This must be made more efficient - too time consuming!! ;
folder := last string_cut_by_char `/ target_folder ;
i := 0 ;
IF gage shape Attach_name > 30 THEN Attach_name := 30 take Attach_name ; ENDIF ;
Fat_name := link dir_attchs target_folder '.sbd/' folder ', ' Date_cur ', ' Fromer ', v' (string i) ', ' Attach_name ;
WHILE (NOT isfault host link 'ls >>"' fot_log_host '" "' Fat_name '"' ) DO
i := i + 1 ;
Fat_name := link dir_attchs target_folder '.sbd/' folder ', ' Date_cur ', ' Fromer ', v' (string i) ', ' Attach_name ;
ENDWHILE ;
% ;
%MIME decode to get binary ;
IF (fault_noexpr ~= host link 'base64 >>"' fot_log_host '" -d "' mime_name '" "' Fat_name '"') THEN
writescreen link 'ERROR: dash_skip - cannot mime_decode ' mime_name ' to ' Fat_name ;
host link 'cp >>"' fot_log_host '" "' mime_name '" "' Fat_name '"' ;
ENDIF ;
% ;
%write hyperlink into Fot ;
writefile Fot '
' ;
writefile Fot (link '
') ;
writefile Fot (link 'Howell link to: ' Fat_name ' ') ;
writefile Fot '
' ;
% ;
close Fat ;
Record
}
is_attach_hdr IS OP Lin
{ NONLOCAL CONTENT_TRANSFER_ENCODING CONTENT_TYPE EMPTY_LIN IN_HEADER ;
routine := 'is_attach_hdr' ;
IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
IF isempty_Lin Lin THEN EMPTY_LIN
ELSEIF Lin@0 = `- THEN DASH_START
ELSEIF toupper (27 take Lin) = 'CONTENT-TRANSFER-ENCODING: ' THEN CONTENT_TRANSFER_ENCODING
ELSEIF toupper (14 take Lin) = 'CONTENT-TYPE: ' THEN CONTENT_TYPE
ELSE IN_HEADER
ENDIF
}
build_multiLine IS
{ NONLOCAL char_CR IN_HEADER Lins Lin_c line_count ;
routine := 'build_multiLine' ;
IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
multiLine := '' ;
i := 0 ;
flag_hdr := l ;
% Do a maximum of 4 times ;
WHILE flag_hdr DO
IF (last Lins@Lin_c = char_CR)
THEN temp := front Lins@Lin_c ;
ELSE temp := Lins@Lin_c ;
ENDIF ;
multiLine := link multiline temp ;
i := i + 1 ;
Lin_c := Lin_c + 1 ;
Lins@Lin_c := readBil ;
%writescreen link 'list first Lins@Lin_c: ' (list first Lins@Lin_c) ;
%writescreen link 'is_blankLin (list first Lins@Lin_c): ' (string is_blankLin (list first Lins@Lin_c)) ;
flag_hdr := AND (i < 4) (is_blankLin (list first Lins@Lin_c) ) (NOT is_blankLin Lins@Lin_c) ;
ENDWHILE ;
% ;
%writescreen link 'multi-line: ' multiline ;
multiLine
}
From_process IS OP Lin
{ NONLOCAL CONTENT_TYPE CONTENT_TRANSFER_ENCODING count_files_renamed EMPTY_LIN Fot Lins Lin_c line_count ;
routine := 'from_process' ;
IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
skip_flag := o ;
% ;
Attach_name := 'Attach_name none' ;
For i with count 60 DO
hdr_code := is_attach_hdr Lins@Lin_c ;
IF hdr_code = CONTENT_TYPE THEN multiLine := build_multiLine ; Find_attachName multiLine ;
ELSEIF hdr_code = CONTENT_TRANSFER_ENCODING THEN multiLine := build_multiLine ; temp := 6 takeright multiLine ;
IF toupper temp = 'BASE64' THEN skip_flag := l ; ENDIF ;
ELSEIF OR (hdr_code EACHRIGHT = EMPTY_LIN DASH_START) THEN EXIT 0 ;
ELSE Lin_c := Lin_c + 1 ; Lins@Lin_c := readBil ;
ENDIF ;
ENDFOR ;
% ;
IF skip_flag THEN Lins@0 := link 'Howell - binary extracted from: ' Lins@0 ; ENDIF ;
FOR i WITH tell (Lin_c + 1) DO writefile Fot Lins@i ; ENDFOR ;
IF skip_flag THEN
count_files_renamed := count_files_renamed + 1 ;
dash_Lin := dash_skip ;
ELSE dash_Lin := Lins@Lin_c ;
ENDIF ;
% ;
dash_Lin
}
dash_process IS OP Lin
{ NONLOCAL CONTENT_TYPE CONTENT_TRANSFER_ENCODING EMPTY_LIN Fot Lins Lin_c line_count ;
routine := 'dash_process' ;
IF flag_writescreen THEN writescreen link routine ' ' (string line_count); ENDIF ;
%Lin_c := 0 ; %This is set in Attach_del ;
% ;
WHILE Lin@0 = `- DO
Lin2 := readBil ;
IF Lin2@0 = `-
THEN writefile Fot Lin ;
ELSE Lins@0 := Lin ;
Lins@1 := Lin2 ;
ENDIF ;
Lin := Lin2 ;
ENDWHILE ;
% ;
%Lin_c := 1 ; % Dont reset - dash_process and Attach_del provide different starting points ;
From_process Lin
}
# Attach_del - DOn't run this independently!
# target_folder - mailbox pathname (incl filename) (from dirs as below)
# dir_email - directory containing input mailbox file, which will be "trimmed" by removal of attachments
# dir_emails - directory for output mailbox files
# dir_emails_old - IOMEGA dir where original mailbox file will be copied
Attach_del IS OP target_foldar
{ NONLOCAL count_files_renamed count_total_all count_total_renamed
dir_email dir_emails dir_emails_old Fin Fot fot_log_host Lin_c
mime_name target_folder ;
flag_error := o ;
target_folder := target_foldar ;
count_files_all := 0 ;
count_files_renamed := 0 ;
% ;
% ;
IF (fault_noexpr ~= (host_return := host link 'cp >>"' fot_log_host '" "' dir_emails target_folder '" '
'"' dir_emails_old target_folder '"' ) )
THEN
writescreen link 'ERROR: Attach_del - cannot copy ' target_folder ;
writescreen link ' host_return =' host_return ;
flag_error := l ;
ENDIF ;
IF (isfault (Fin := open (link dir_emails_old target_folder) "r)) THEN
writescreen link 'ERROR: Attach_del - cannot open ' dir_emails_old target_folder ;
flag_error := l ;
ENDIF ;
IF NOT flag_error THEN
IF (isfault (Fot := open (link dir_emails target_folder) "w)) THEN
writescreen link 'ERROR: Attach_del - cannot open ' target_folder ' in ' dir_emails ;
flag_error := l ;
ENDIF ;
ENDIF ;
% ;
IF flag_error
THEN break ;
ELSE
count_files_all := count_files_all + 1 ;
writescreen link 'Processing attachments in file: ' dir_emails target_folder ;
fot_log_nial EACHRIGHT writefile '' (link 'processing of: ' target_folder) ;
Lin := readBil ;
WHILE Lin ~= fault_eof DO
IF Lin@0 = `-
THEN Lin_c := 1 ; Lin := dash_process Lin ;
ELSE writefile Fot Lin ; Lin := readBil ;
ENDIF ;
ENDWHILE ;
ENDIF ;
% ;
writefile fot_log_nial (link 'count_mlbxs_all: ' (string count_files_all )) ;
writefile fot_log_nial (link 'count_attachs_saved: ' (string count_files_renamed)) ;
count_total_all := count_total_all + count_files_all ;
count_total_renamed := count_total_renamed + count_files_renamed ;
EACH close Fin Fot ;
}
# This creates a list of "descending" hierarchical filepaths, starting with top level
unfold_dirs IS OP path
{ IF = null path
THEN null
ELSE append [unfold_dirs first, second] ( string_firstSplit_by_string '/' path )
ENDIF
}
# Create all sub-directories on the IOMEGA as well right at the start. ;
# DONT trap the error at this point (no ""). it's important that the whole program crash if the directory can't be created ;
# For example, if the IOMEGA isn't turned on! ;
# Do dashed lead-in ONLY for now!
ELSEIF (toupper (7 take Lin) = 'FROM - ') THEN Lin_c := 0 ; Lins@0 := Lin ; Lin := From_process Lin ;
emailer IS OP target_folders
{ NONLOCAL count_total_all count_total_renamed dir_attchs dir_email dir_emails_old dir_emails_old_root
fil_log_host fil_log_nial fot_log_host fot_log_nial mime_name ;
Dater := Process_timestamp ;
mime_name := link dir_attchs 'MIME.txt' ;
% ;
count_total_all := 0 ;
count_total_renamed := 0 ;
fot_log_nial := open (link dir_attchs fil_log_nial) "a ;
fot_log_host := link dir_attchs fil_log_host ;
fot_log_nial EACHRIGHT writefile ('' '' '******************************************************************' (link 'emailer - run at: ' timestamp ) '' ) ;
EACH host EACH link CART (solitary link ('echo >>"' fot_log_host '" '))
('" "' '" "' '"******************************************************************"' (link '"emailer - run at: ' timestamp '"') '" "') ;
dir_emails_old := link dir_emails_old_root (rest Dater) ' old mail folders/' ;
host_return := host link 'mkdir >>"' fot_log_host '" "' dir_emails_old '"' ;
% ;
EACH host ( EACH link CART (solitary link ('mkdir >>"' fot_log_host '" "' dir_emails_old))
(link EACH front EACH unfold_dirs target_folders)
(solitary '"')
) ;
EACH Attach_del target_folders ;
% ;
fot_log_nial EACHRIGHT writefile
'' 'Totals for all attachments saved: '
(link 'count_total_mailbxs: ' (string count_total_all ))
(link 'count_total_attchSaved: ' (string count_total_renamed)) ;
close fot_log_nial ;
}
#####################################################################################################
# emailer - ERROR TRACKING
# 000 pe22Jun10 probably fiename problem....
post target_folders
post EACH host EACH link
22Jun10 worked well!:
must check that email folder created in Thunderbird directories directly, not waste time copying over!!
on mailbox folders do:
search: From - Mon Jan 1 00:00:00 1965
replace: From - Mon Jan 18 00:00:00 2006
# 001 24Jun10 - many filenames (especially from Catherine's emails) have line feed in the middle - due to one-character days
For this I should pull out the key text with split_string_bystring or whatever it's called! (my own code)
See also Error#002 below...
(22Aug10 UNTESTED-FIX only for CR, renaming existing erred files see Error#006)
# 002 24Jun10 - illegal character '<' in [?Jokes of Note?, z_backlog 2009 etc
- this also applies to the creation of "From" and "Attachment" names
(22Aug10 UNTESTED-FIX)
# 003 24Jun10 - mailboxes with repeated names like years like 2005 will be overwritten in the IOMEGA-HDD backup directory!!
This is very dangerous (loss of data) and must be fixed
(22Aug10 UNTESTED-FIX)
# 004 22Aug10 - I need a routine to "clean up" a whole pile of already-existing filenames (illegal characters got through!)
# 005 22Aug10 - Put hyperlinks in Thunderbird format - not my HTML-based screw-up!
To do this - I need to copy a mailbox over to a new name, then detach a file and see what the code looks like
I did this with Travel/MBNA travel insurance, but "detach" doesn't work!!
Put this to the side for now!!
# 006 22Aug10 - I need a routine to go back to old mailboxc files & not end up with multiple copies of same files
could do file compare to delete duplicate?
# 007 22Aug10 - I need a routine to identify "missing mailbox files" and list them so that the datasets are corrected!
Actually - it should construct this itselt from the actual mailbox! It's too much work (and too many errors)
to specify lists!
# 008 22Aug10 - "From:" name may be incorrect - it may simply be the last name found, and there may NOT be a correct name for
the current email.
(22Aug10 UNTESTED-FIX)
# 009 19Oct10 - LOST DATA from "Vauhan, Paul"! IOMEGA wasn't on when "lf" invoked.
a) Instructions were added to "EXECUTION" section below (20Oct10 - initial draft done)
b) Add fault traps for file opening errors in:
i) "emailer" - IF isfault (host (link 'mkdir "' dir_emails_old '"')) THEN EXIT ; ENDIF ;
21Oct10 changed to (fault_noexpr = host ...
ii) "Attach_del" - similar, but more detail to trap
iii) "dash_skip" - perhaps later
(20Oct10 - Linux error message "cp: cannot create regular file ..."
# 010 20ct10 - I lost Catherine's emails - and pulled an old 24Jun10 mailbox, so I've lost 5 months of emails!!
# 011 21Oct10 - Attachments list "nobody" often as the "from" contact! - something wrong again with this!
(29Oct10 UNTESTED "Fromer" was set to "nobody" in dash_skip - I deleted this!)
# 012 21Oct10 - Error message from "host cp" related to illegal character "<" or something like that
(24Oct10 - cannot seem to do stdout redirection from within host command for "cp" even if it works for "ls")
(27Oct10 - PSEUDO-FIXED, I was just sloppy with the host text, but there is still a problem)
# 013 24Oct10 - "cp" can't create IOMEGA file? - looks like filename timestamp is wrong?? (30Oct10 - sometime ".sbd"
is still being included in "target_folders" entry for a folder!)
# 014 27Oct10 - Howell's hyperlinks are being removed from emails!! bad news - but it all has to be fixed somehow:
- reposition hyperlinks within each email
- target ULTIMATE directory, not the temporary holding for Attachements before they are cleaned up (deleted garbarge)
- but do this much later
# void single solitary
#####################################################################################################
# emailer - DEVELOPMENT ACTIONS AT START/CLOSE OF DAY
# 22Aug10 15:56 run emailer on "Travel.sbd/MBNA Travel Insurance.sbd/"
# 29Aug10 Finished making new directories, now dealing with Fat_name in dash_skip
will have to replace mailbox with target_folder or something like that.
# 21Oct10: There are still numerous "vestigial code" segments remaining from a much larger context to
process far more types of binary coding. For exampe, rtf type content - which may be the most important
remaining code to process.
Also, the hyperlinks are "ill positioned" (invisible) in the cleaned email, and they don't anticipate the final folder.
# 24Oct10 As per original capabilities in fix_filenames, added fil_log_host & modified operators for that
# 27Oct10 Many outstanding problems:
host-result logging, (30Oct10 - seems partially better)
Nobody sent (30Oct10 FIXED again!),
hyperlinks now being picked out & destroyed, etc
HOWEVER - the key functioj of saving attachments works!! Good enough until in a couple of months
I have time to revisit.
# 30Oct10 I ran emailer on the Inbox, which is clearly screwed up (,any old messages "hidden" in text somehow)
it extracted 301 attachements (+-), and I moved the attachements into /home/bill/inbox
Inbox is still WAAAYYY to big! (???)
#####################################################################################################
#####################################################################################################
# fix_filenames - CLEAN UP existing erroneous filenames in a directory (CRs & other improper characters)
16Oct10 started
# clean_pathname is above in "emailer operators"
# IF flag_break THEN BREAK ; ENDIF ;
clen_filenames IS OP target_folder
{ NONLOCAL count_total_all count_total_renamed dir_attchs pth_fileList fot_log_nial ;
writescreen 'clen_filenames' ;
count_files_all := 0 ;
count_files_renamed := 0 ;
% ;
fot_fileList := open pth_fileList "r ;
WHILE ((Lin := readfile fot_fileList) ~= fault_eof) DO
count_files_all := count_files_all + 1 ;
IF ((Lin2 := clean_pathname Lin) ~= Lin) THEN
count_files_renamed := count_files_renamed + 1 ;
pp := link 'mv "' (link target_folder Lin) '" "' (link target_folder Lin2) '"' ;
IF flag_break THEN BREAK ; ENDIF ;
host pp ;
ENDIF ;
ENDWHILE ;
% ;
writefile fot_log_nial (link 'count_files_all: ' (string count_files_all )) ;
writefile fot_log_nial (link 'count_files_renamed: ' (string count_files_renamed)) ;
count_total_all := count_total_all + count_files_all ;
count_total_renamed := count_total_renamed + count_files_renamed ;
close fot_fileList ;
}
save_filenames IS OP target_folder
{ NONLOCAL dir_attchs pth_fileList fot_log_nial ;
writescreen target_folder ;
fot_fileList := open pth_fileList "w ;
fot_log_nial EACHRIGHT writefile '' (link 'processing of: ' target_folder) ;
host link 'ls >"' pth_fileList '" "' target_folder '"' ;
close fot_fileList ;
}
fix_filenames IS OP target_folders
{ NONLOCAL count_total_all count_total_renamed dir_attchs fot_log_nial ;
% ;
count_total_all := 0 ;
count_total_renamed := 0 ;
fot_log_nial := open (link dir_attchs fil_log_nial) "a ;
fot_log_nial EACHRIGHT writefile '' '' '******************************************************************'
(link 'fix_filenames - run at: ' timestamp) ;
FOR target_folder WITH target_folders DO
target_folder := link dir_attchs target_folder '.sbd/' ;
save_filenames target_folder ;
clen_filenames target_folder ;
ENDFOR ;
% ;
fot_log_nial EACHRIGHT writefile
'' 'Totals for all attachments saved: '
(link 'count_total_folders: ' (string count_total_all ))
(link 'count_total_attchSaved: ' (string count_total_renamed)) ;
close fot_log_nial ;
}
#####################################################################################################
# fix_filenames - ERROR TRACKING
# 009 17Oct10 - doesn't splice in corrected dating (18Oct10 - fixed)
# 010 17Oct10 - traped in infinite loop! (18Oct10 - fixed)
# 19Oct10 - Things work fairly well now.
# 05Mar11 - Didn't remove line feed type character? in "/home/bill/Attachments - Thunderbird/z_backlog.sbd/2003.sbd"
- "invalid host command" is error - the extra character is 13 (CR) which doesn't work in the Linux command line!!
STILL A PROBLEM (but was fixed before?!?) - probably works with other illegal characters, but need to
fix for the CR
110305 14h39 - fixed - it was an error with the apostrophe...
# 05Mar11 - Also, changed emaol folder "z_backlog" to "z_Archive"
#####################################################################################################
# fix_filenames - DEVELOPMENT ACTIONS AT START/CLOSE OF DAY
# 16Oct10 trying to get back into this - clean_text should now work with illegal characters i filename
(still might get garbage names)
So - I need a seaparate routine to clean up existing filenames in directory
(19Oct10 DONE)
# 16Oct10 close of day
- target_folders needs more work (19Oct10 DONE - have changed so that the emailer data format works!)
- fix_filename - needs routine to read through appended pth_fileList and
append first & 2nd row of filename
(19Oct10 NOT NECESSARY - char 13 makes it look like there's a linefeed)
# 24Oct10 - Added fil_log_host & modified operators for that
#####################################################################################################
#####################################################################################################
# file_transfer - move files from /home/bill/Attachments - Thunderbird to permanent storage
AFTER they have been cleaned up, junk and duplicates removed
# 1 Delete junk files in all folders of "/home/bill/Attachments - Thunderbird/" (active ones)
for example, files <5 kbytes
# 2 run the routine below
# 001 21Oct10 - Not drafted yet! let alone test, debug, etc
# 002 problem of '.sbd' in directory names!
# 003 21Oct10 - FOS Holding for Process (and other directories) are NOT arranged the same as email folders
consistency would greatly simplify the code and maintenance
save_dir IS OP target_folder
{ NONLOCAL dir_attchs ;
host link 'mv "' dir_attchs target_folder '.sbd/*.* /home/bill/' (string_cut_by_string '.sbd' target_folder) '/' ;
}
store_files IS OP target_folders
{ NONLOCAL dir_attchs ;
EACH save_dir (EACH link CART (solitary link ('cp "' dir_emails_old))
(link EACH front EACH unfold_dirs target_folders)
(solitary '"')
) ;
}
#####################################################################################################
#####################################################################################################
# USER-SPECIFIC DATA - including EXAMPLES, TESTING
# user-email-and-directories-specific setup
# - must manually check that the "/home/bill/Attachments - Thunderbird" directories & folders exist!!
#### Processing of a single, specified folder
# target_folders := solitary ('Jokes of note') ; % Special - very large attachements! ;
# target_folders := solitary ('Inbox') ; % Special - I want to keep attachments for a while, periodically clean ;
#### Normal, active folders to process
# target_folders :=
'Climate.sbd/Vaughan, Paul'
'Family'
'Family.sbd/Catherine'
'Family.sbd/Kim'
'Family.sbd/Molnar, Alex'
'Family.sbd/Mom & Dad'
'Family.sbd/Sarah'
'Family.sbd/Steve & Diane'
'Friends'
'Friends.sbd/Ball, Gordon'
'Friends Of Science'
'Jokes of note'
'z_Archive.sbd/2011'
;
target_folders :=
'z_Archive.sbd/2005'
'z_Archive.sbd/2006'
'z_Archive.sbd/2007'
'z_Archive.sbd/2008'
;
#### Dormant folders: The following folders are "on hold" for periodic re-processing, or never as the case may be:
# target_folders :=
'Climate.sbd/People.sbd/Climate.sbd/Vaughan, Paul'
'Friends.sbd/Ing'
;
#### Old archives - fairly sure these will not be cleaned again (shouldn't be added to)!!
'Climate.sbd/People.sbd/Salonius, Peter'
'My sports & clubs.sbd/z_archive.sbd/Toastmasters'
'Neural.sbd/z_Archive.sbd/IJCNN09.sbd/Student best papers'
'Neural.sbd/z_Archive.sbd/IJCNN09 Publicity.sbd/Journals & Newsletters'
'Neural.sbd/z_Archive.sbd/IJCNN09 Publicity.sbd/Poster & CFP'
'Purchases.sbd/House - 86 Anderson'
'z_Archive.sbd/2002'
'z_Archive.sbd/2003'
'z_Archive.sbd/2004'
'z_Archive.sbd/2009'
'z_Archive.sbd/2010'
# No longer exist as directories
'Friends.sbd/Arbour'
'Friends Of Science.sbd/_FOS, CCNet - science titbits.sbd/2005'
'Friends Of Science.sbd/_FOS, CCNet - science titbits.sbd/2006'
'Friends Of Science.sbd/_FOS, CCNet - science titbits.sbd/2007'
'Friends Of Science.sbd/_FOS, CCNet - science titbits.sbd/2008'
'Friends Of Science.sbd/__Holding for process.sbd/2007'
'Friends Of Science.sbd/__Holding for process.sbd/2008'
#####################################################################################################
# EXECUTION
# Tests - how do you stop program termination with a fault?
/media/bill/SWAPPER/Qnial/QNial - error messages and experiences.txt - 02May10 THIS WORKED!!: command line nial -s !!!!
# INSTRUCTIONS - before running any of the operators below!! (eg via "lf")
# 1. BACK-UP the directory "/home/bill/.mozilla-thunderbird/n4caryuo.default"
to IOMEGA drive "/media/IOMEGA_HDD/Thunderbird backups/yymmdd n4caryuo.default"
# 2. Check "emailer ERROR TRACKING" and "fix_filenames - ERROR TRACKING" to make sure there are no
outstanding critical issuues that will cause problems
# 3. Select or build the apropriate "target_folders" data in section " USER-SPECIFIC DATA" above.
# 4. DESELECT all other "target_folders " that will NOT be used! (comment out with "#")
# 5. TURN ON the external IOMEGA hard drive
# 6. Close Thunderbird if it is running
# 7. Save this file "Thunderbird - delete binary attachments.NDF" to make sure the right code/data will be loaded
# 8. Check launch script:
a) qnial MUST launch without automatic termination by faults ("nial -s")
b) "profile.ndf" must set "f := '/media/bill/SWAPPER/Qnial/MY_NDFS/Thunderbird - delete binary attachments.ndf' ; "
# 9. Finally, launch from qnial via "lf"
emailer target_folders
# fix_filenames target_folders
# enddoc