#!/bin/sh
#] 
#] *********************
#] $ bash "$d_bin"'webPage search news.sh' - j
# www.BillHowell.ca  06Mar2022 initial 
# view in text editor, using constant-width font (eg courier), tabWidth = 3


#************************
# List of operators, generated with :
# $ grep  "^#]"  "$d_bin""webPage search news.sh" |  sed "s/^#\]/  /" 

#


#************************
# Setup 

source  "$d_bin""standard header.sh"

# Tests : 
# echo  'I saw a nulclear ship in Kyiv'  |  grep -i  "nuclear\|ship\|kyiv" 
# echo  'I saw a nuclear  ship in Kyiv'  |  grep -i  "nuclear\|ship\|kyiv" 
# echo  'I saw a nuclear  ship in Kyiv'  |  grep -i  "kyiv\|ship\|nuclear" 
# echo  'I saw a yaught  ship in Paris'   |  grep -i  "nuclear\|ship\|kyiv" 
# echo  'I saw a yaught  SHIP in Paris'   |  grep -i  "nuclear\|ship\|kyiv" 

# alternative words
#	Dnipro\|?Dneipre?
#	Zaporizhzhia\|Zaporizhzhya
#	Dnister
#	Danube\|Dunarea(accent on 1st a)


#************
# Ukraine-Russia


#] search_2liner()  - search for terms in 2-line news items, 1st line with "<a href" 
# 07Mar2022 initial from 06Mar2022 search_kyivindependent, need many-line news item improvement in code

	search_2liner()
{  
	searchStr="$1"
	p_news="$2"
	p_item="$3"
if  [ -f  "$p_news" ]; then 
	echo ""  >"$p_item"
	while  read -u 9 line; do 
		hit0=$(  echo "$line"  |  grep '<a href=' )  
		if  [ "$hit0" !=  "" ]; then 
			hit1=$(  echo    "$hit0"   |  grep -i "$searchStr" )  
			read -u 9 lin2  
			hit2=$(  echo    "$line2"  |  grep -i "$searchStr" )  
			if  [[ "$hit1" !=  ""  ||  "$hit2" !=  ""  ]]; then 
				echo  "$hit0"	>>"$p_item"
				echo  "$lin2"	>>"$p_item"
				echo  ""			>>"$p_item"
			fi
		fi
	done 9< "$p_news"  
else echo  "news file doesnt exist : $p_news"
fi 
}  


#] search_kyivindependent()  - seach for news on webPage (special 2-line structure)
# 06Mar2022 initial 

	search_kyivindependent()
{  
	search_2liner  "$1"  "$p_news_kyiv"  "$p_item_kyiv"  
}  


#] search_ukrinform()  - search for news on webPage (special 2-line structure)
#]      only looks at 2 lines, whereas many stories have many more lines!
# 06Mar2022 initial 

	search_ukrinform()
{  
	search_2liner  "$1"  "$p_news_ukri"  "$p_item_ukri"  
}  


	p_news_kyiv="$d_webRawe"'History/Ukraine-Russia/kyivindependent.com news log.html'
	p_news_ukri="$d_webRawe"'History/Ukraine-Russia/ukrinform.net news log.html'

	p_item_kyiv="$d_temp"'kyivindependent.com search items.txt'
	p_item_ukri="$d_temp"'ukrinform.net search items.txt'

	p_both="$d_temp"'[kyiv, ukri] search results.txt'


#] search_all()  - search [kyivindependent, ukrinform]
# 07Mar2022 initial, need to add "other news sources" file

	search_all()
{	
	search_2liner  "$1"  "$p_news_kyiv"  "$p_item_kyiv"  
	search_2liner	"$1"  "$p_news_ukri"  "$p_item_ukri"  
	cat  "$p_item_kyiv"  "$p_item_ukri"  >"$p_both" 
}  




#*******************************************************************************
# Procedures -  To run :
# copy-paste https://kyivindependent.com/news-archive/ page source to 
# 	"$d_temp"'kyivindependent.com page source.txt'
# process with bash script 
# 	$ bash  "$d_bin"'news kyivindependent.com scrape.sh'
# 	outputs to "$d_temp"'kyivindependent.com news articles.txt'
# key markers
# 	<div class="new-day">Sunday, March 6</div>
# 	article id="post-
# 	</section><!-- .recent-posts -->
# copy-paste updated parts to this file 


#************
# run $ bash  "$d_bin"'webPage search news.sh'  

#	search_kyivindependent  'nuclear\|hydroelectric\|coal\|power plant'
#	search_ukrinform			'nuclear\|hydroelectric\|coal\|power plant'
#	search_all					'nuclear\|hydroelectric\|coal\|power plant'
#	search_ukrinform			'Kyiv\|Chernihiv\|Chornobyl\|Chernobyl\|Zytomyr\|Bila Tserkva\|Boryspil'


#	search_all  'nuclear\|hydroelectric\|coal\|power plant'
#	search_all  'Kyiv\|Chernihiv\|Chornobyl\|Chernobyl\|Zytomyr\|Bila Tserkva\|Boryspil'
#	search_all  'Odesa\|Dnistrovskyy\|Bilhorod\|Illichivsk\|Chornomorsk\|Lypetske\|Tuzla'
#	search_all  'Mykolayiv\|Ochakiv\|Zhovtneve\|Kherson\|Nova Kakhovka'
#	search_all  'Melitopol\|Tokmak'
	search_all  'Mariupol\|Novoazovsk\|Kuybyshevo'
#	search_all  'Lviv\|Ternopil\|Ivano-Frankivsk\|Stryy\|Drohobych\|Kamyanka-Buzka'  
#	search_all  'Chernivtsi\|Kolomyya\|Khotya\|Stryy\|Drohobych\|Kamyanka-Buzka'  
#	search_all  'Kamyanets-Podilskyy\|Zalishchyky\|Skala-Podilska\|Mohyliv-Podilskky'
#	search_all  'Khmelnyskyy\|Vinnytsya\|Zhmeynka\|Berdychiv'
#	search_all  'Sumy\|Konotop\|Hlukhiv\|Shostka\|Voronezh\|Krolevets\|Bakhmach\|Romney\|Okhtyrka'

#	search_all  'soccer\|jiu-jitsu\|football'


# 07Mar2022 all of a sudden, basically ALL news items included.  WHY!?!?!?  I must have somehow corrupted
#	search_kyivindependent  'Lviv\|Ternopil\|Ivano-Frankivsk\|Stryy\|Drohobych\|Kamyanka-Buzka\|'
#	search_kyivindependent  'Chernivtsi\|Kolomyya\|Khotya\|Stryy\|Drohobych\|Kamyanka-Buzka\|'
# >> FUNNY:  I left a trailig '\|' in search pattern!!
# >> OK now




# enddoc
