#!/bin/sh
#] 
#] *********************
#] $ bash  "$d_bin"'diff pdftotext.sh'  - run diff on pdf files
# www.BillHowell.ca  23Jul2022 initial, from 'diff [txt, csv].sh'  


#************************

source  "$d_bin""standard header.sh"


#08********08
# Rationale for improving this
# should base this in QNial for faore power, but host calls to bash

# many diffs are due to [page, reference] number changes, which should be ignored




#************************

#] diff_old()  - 

	diff_old()
{  
echo "jump"
	d_work="$d_webRawe"'References/Health/'  
	f1='Dobler 07Sep2021 Hormonageddon - How Chemical and Electromagnetic Influences weakened the Human Character in three Generations'  
	f2='Dobler 19Jun2022 Hormonageddon, revision1'  

} 

	d_work="$d_web"'Neural nets/INNS/'  
	dater='221211 '  
	f1="$dater"'ByLaws 1991'  
	f2="$dater"'ByLaws 2022'  
	f_diff="$dater"'diff f1-f2'

#] diff_it()  - convert two .pdf files to text, bash "diff" them, manual edit and comment the resulting text

	pdftotext_it()
{  

	# initial standard diff 
	pdftotext  "$d_work$f1"'.pdf'  "$d_temp$f1"'.txt'  
	pdftotext  "$d_work$f2"'.pdf'  "$d_temp$f2"'.txt'  
}


	diff_it()
{  
	diff  "$d_work$f1"'.txt'  "$d_work$f2"'.txt'  --suppress-common-lines  >"$d_work$f_diff"'.txt'
}  

	deNumber_it()
{  
	# separate initial diff into [f1, f2]_without numbers, diff that
	grep  "^<"  "$d_temp"'diff f1-f2.txt'  |  sed  's|^<[ ]\+||;s|^[ ]*$||;s|[0-9]\+||g;s|\.[\.]\+||'  |  grep  --invert-match  "Hormonageddon\|Chapter\|^$"      >"$d_temp"'diff f1 no-numbers.txt'
	grep  "^>"  "$d_temp"'diff f1-f2.txt'  |  sed  's|^>[ ]\+||;s|^[ ]*$||;s|[0-9]\+||g;s|\.[\.]\+||'  |  grep  --invert-match  "Hormonageddon\|Chapter\|^$"      >"$d_temp"'diff f2 no-numbers.txt'
	diff  "$d_temp"'diff f1 no-numbers.txt'  "$d_temp"'diff f2 no-numbers.txt'  --suppress-common-lines  >"$d_temp"'diff f1-f2 no-numbers.txt'  
	
}  

# First, convert to text.  Cleanup is usually required!
#	pdftotext_it

# Diff the cleaned text docs
 diff_it

# Sometimes ut helps to remove article numbers where these have changed extensively
#	deNumber_it


# enddoc
