#!/bin/bash ################################# #seicor - puts e,i,oi in proper format for bangtex and related indian languages # Copyright (C) 2005 Somendra M. Bhattacharjee (somen@iopb.res.in) # # Written by: # Somendra M. Bhattacharjee # Institute of Physics, Bhubaneswar 751 005, India # email:somen@iopb.res.in # Home page: http://www.iopb.res.in/~somen # This is release. v 3.4. # For the most recent version, check the above URL (under Programs). # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #################################### numl=5 # hash mark every "numl" line #### help file in english function helpfile { cat <" and "%". Also, 'ou' can be used without "e". "Joubon" will become "\*J*eoubon". The markups (% and %) can be anywhere in a line but note that the line containing "%" will be processed but not the one containing "%" . o Comments (including inline comments) marked by % (as in tex/latex) are not touched. o "{", "}" are treated as "letters", except for "{oi}", "{aa}". o Anything from a "\" till a blank space will be left untreated like "\quad " or "\verb+Ami+ ", "{\rm{bash}}" etc. Exception to this is the special '\*' which remains unchanged. o *e, *i, *{oi} are not touched. See the example below. Works with GNU BASH version 2 and up only. It will not run with lower versions of BASH. This version of seicor: v$vrsn Get current version from http://www.iopb.res.in/~somen/Prog/seicor OPTIONS: -h : prints this help file and exits -hb : prints a help file in bangla (roman fonts) and exits -lN or -l=N : puts a hash mark on the screen after every N lines. (default: N=5) -n or -d : the lines containing '%', '%' NOT printed in the output file. -v : prints the version number and exits infile : ".tex" is appended if no ".tex" is supplied outfile: output file (if no outfile is given, default is infile_sei.tex in the present working directory) More options to be added! EXAMPLE: A sample input file: " Some lines to be ignored \\ % <<<<< starts here \\ \begin{verse} ANNakhi Jodi Aaj kore Aporadh koriyea kKoma.\\ He nirupoma \\ % kean gan theke neOya? \end{verse} Joubonosorosii niire % <<<<< ends here More lines to be ignored \\ " after processing becomes " Some lines to be ignored \\ % <<<<< starts here \\ \begin{verse} ANNa\*kh*i Jo\*d*i Aaj ko\*r*e Aporadh ko\*r*i\*y*ea kKoma.\\ \*H*e \*n*irupoma \\ % kean gan theke neOya? \end{verse} \*J*eoubonosorosii nii\*r*e % <<<<< ends here More lines to be ignored \\ " AUTHOR: Written by Somendra M. Bhattacharjee (SINP, Kolkata/Institute of Physics, Bhubaneswar 751 005, India). BUGS: Could have been faster! REPORTING BUGS: Report bugs to . COPYRIGHT: Copyright © 2005 Somendra M. Bhattacharjee This is free software released under GPL. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $dtd EOF exit 0 } #### help file in bangla (in roman fonts) function help_bang { cat <" diey suru korey sesh kortey hobey "%" diye. EI bishesh markup ("%" ebong "%") line er je kono jaygay thakte parey, kintu je line ey "%" thakbe sei line ey poriborton hote parey. Je line ey "%" thakbe sei line ey poriborton hobe na. o Tex/Latex er comment, ja "%" diey suru hoy, ta aporibortito thakbey. o "{", "}" borNo hisabe dhora hobey, tai sabdhan. "{oi}" absyo alada. o "\" thakle poroborti phank projonto sobta ek rokom thakbey, jemon "\quad " ba "\verb+Ami+ ", "{\rm{bash}}" ityadi. '\*' abosyo er modhey porbey na. (nicher udahoron dekhun). o *e, *i, *{oi} etc aporibortito thakbey. GNU bash version 2 ba tar porer hole cholte asubidha nei. tar ager version ey cholbe na. ei sonskoron: seicor v$vrsn cholti songskoron songroho korun: http://www.iopb.res.in/~somen/Prog/seicor OPTIONS: -h : english helpfile ta print kore, beriey ashbey -hb : ei helpfile (roman horofe bangla) print kore, beriey ashbey -lN or -l=N : screen ey proti N line antor ekta korey hash mark dekhabe. (kichu na dile: N=5) -n or -d : '%', '%'- wala linegulo output file e likhbe na. -v : songskoron number likhey berie ashbe. infile : ".tex" jurhe debe jodi kono ".tex" na thakey sheshe outfile: output file (kono outfile ullekh na korle, infile_sei.tex file toiri hobey, je directory te chalano hochhey sekhane) Aro options pore deoa hote parey! Udahoron: eirokom ekta input file theke suru korley: " Some lines to be ignored \\ % <<<<< starts here \\ \begin{verse} ANNakhi Jodi Aaj kore Aporadh koriyea kKoma.\\ He nirupoma \\ % kean gan theke neOya? \end{verse} Joubonosorosii niire % <<<<< ends here More lines to be ignored \\ " seicor toiri korbe " Some lines to be ignored \\ % <<<<< starts here \\ \begin{verse} ANNa\*kh*i Jo\*d*i Aaj ko\*r*e Aporadh ko\*r*i\*y*ea kKoma.\\ \*H*e \*n*irupoma \\ % kean gan theke neOya? \*J*eoubonosorosii nii\*r*e \end{verse} % <<<<< ends here More lines to be ignored \\ " lekhok: ei program er lekhok Somendra M. Bhattacharjee (Institute of Physics, Bhubaneswar 751 005, India). Gondogol? Aro jodi taratari cholto! Anno Gondogol dekhley: email pathan: . COPYRIGHT: Copyright © 2005 Somendra M. Bhattacharjee This is free software released under GPL. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $dtd EOF exit 0 } ### Main program starts vrsn='3.4' ## version number dtd='May 16 2006.' ## release date ############################## while : #: placeholder, true always! do case "$1" in -v*) echo `basename $0` version $vrsn $dtd exit 0;; -l*) numl="$1" numl=${numl/-l=/} numl=${numl/-l/} if [ $[numl+1] = "1" ] then numl=5 fi ;; -[nd]) no_ei=1;; ### no %, % in output -h) echo " Help page in English " helpfile|less exit 0;; -hb) echo " Help in bangla " help_bang|less exit 0;; *) break ;; esac shift done if [ $# -lt 1 -o $# -gt 2 ]; then echo "Usage: `basename $0` [options...] input[.tex] [output[.tex]]" 1>&2 exit 1 fi # check if the program can run: check bash version tstbsh=`echo $BASH_VERSION|cut -f1 -d'.'` if [ "$[tstbsh]" -gt "1" ] then : else echo "E: Your bash does not allow susbtituion/array etc. Upgrade your bash" echo "B: Ei bash ey hobe na - notun bash darkar" exit 1 fi # check/fix input output file names infile=$1 if [ $# -gt 1 ] then outfile=$2 else outfile=$(basename $1) outfile=${outfile/.tex/}_sei fi case "$infile" in *.tex) : ;; *) infile=${infile}.tex ;; esac case "$outfile" in *.tex) : ;; *) outfile=${outfile}.tex ;; esac if [ "$outfile" = "$infile" ] then echo '>>>>>>> E: Output file name changed' echo '>>>>>>> B: Output file er nam paltano holo' outfile=${infile//.tex/_sei.tex} fi if [ ! -r "$infile" ] then echo " " echo " E: Aborting $infile does not exist or not readable" echo " B: cherhe dilam: $infile nei ba porha jacche na " echo " " exit 1 fi cat /dev/null >$outfile # start afresh function ei_thik { # to put \*...* in right places for bangtex unset -v fl_txt ZZZ=$@ ZZZ_3="" ################# x0="$ZZZ" unset -v wrd xx wtst #xx=$1 xx=$(echo $x0|tr ' ' '^') xx=${xx//\{oi\}/oi} xx=${xx//eou/ou} xx=${xx//\{aa\}/aa} #xx=${xx//oi/P} len=${#xx} len1=$(echo $len) # construct array of letters wlg=0 until [ $[len1] = 0 ]; do x1=${xx:0:1} ## cut first letter wrd[$wlg]=$x1 if [ "$[wlg]" = 0 ] then x1=${x1/e/\*e} ## protect *i *e constructs x1=${x1/i/\*i} # x1=${x1/P/\*P} fi xx=${xx:1} case "$x1" in '*') # to identify *i,*e,*P x2=${xx:0:1} ## cut next one case "$x2" in [ie]) ## identify *i etc as units wrd[$wlg]='*'$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] ;; [oa]) ## identify *oi,*ou,*aa etc as units x3=${xx:0:2} ## cut two case "$x3" in o[iu]|aa) ## identify oi,ou,aa as units wrd[$wlg]='*'$x3 xx=${xx:2} len1=$[len1-3] wlg=$[wlg+1] ;; *) wrd[$wlg]='*'$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] ;; esac ;; *) len1=$[len1-1] wlg=$[wlg+1] ;; esac ;; i) x2=${xx:0:1} ## cut next one if [ "$x2" = "i" ]; then ## identify ii as a unit wrd[$wlg]=ii xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] else len1=$[len1-1] wlg=$[wlg+1] fi ;; o) x2=${xx:0:1} ## cut next one case "$x2" in ## identify oi ou as units [iu]) wrd[$wlg]=$x1$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] ;; *) len1=$[len1-1] wlg=$[wlg+1] ;; esac ;; [sS]) x2=${xx:0:1} ## cut next one if [ "$x2" = "h" ]; then ## identify sh as a unit wrd[$wlg]=$x1$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] else len1=$[len1-1] wlg=$[wlg+1] fi ;; '\') x2=${xx:0:1} ## cut next one case "$x2" in '*') ## identify \* as a unit wrd[$wlg]='\*' xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] ;; [a-zA-Z'\']) nwlg=$[wlg] ## isolate slash commands wrd[$nwlg]=${wrd[$[nwlg]]}$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] while : do x3=${xx:0:1} ## cut next one if [ "$x3" = '^' ] then break else xx=${xx:1} len1=$[len1-1] wrd[$nwlg]=${wrd[$[nwlg]]}$x3 fi done ;; *) len1=$[len1-1] wlg=$[wlg+1] ;; esac ;; N) # for NN NNG x2=${xx:0:1} ## cut next one case "$x2" in 'N') ## select NN as a unit xx=${xx:1} x3=${xx:0:1} ## cut next one case "$x3" in 'G') ## identify NNG as a unit wrd[$wlg]='NNG' xx=${xx:1} len1=$[len1-3] wlg=$[wlg+1] ;; *) ## identify NN as a unit wrd[$wlg]='NN' len1=$[len1-2] wlg=$[wlg+1] ;; esac ;; *) ##go ahead if only N len1=$[len1-1] wlg=$[wlg+1] ;; esac ;; ':'|'/') ## for hosonto x2=${xx:0:1} ## cut next one if [ "$x2" = "/" ]; then ## identify :/, // as units wrd[$wlg]=$x1$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] else len1=$[len1-1] wlg=$[wlg+1] fi ;; [rR]) x2=${xx:0:1} ## cut next one if [ "$x2" = "R" ]; then ## identify rR, RR as units wrd[$wlg]=$x1$x2 xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] else len1=$[len1-1] wlg=$[wlg+1] fi ;; k) x2=${xx:0:1} ## cut next one if [ "$x2" = "K" ]; then ## identify kK as a unit wrd[$wlg]="kK" xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] else len1=$[len1-1] wlg=$[wlg+1] fi ;; a) x2=${xx:0:1} ## cut next one if [ "$x2" = "a" ]; then ## identify aa as a unit wrd[$wlg]="aa" xx=${xx:1} len1=$[len1-2] wlg=$[wlg+1] else len1=$[len1-1] wlg=$[wlg+1] fi ;; *) len1=$[len1-1] wlg=$[wlg+1] ;; esac done wrd[$wlg]=' ' ## identify end # echo "${wrd[*]}" # locate e and i then go back slps=0 wlg=0 eipos=-1 idd=$[wlg] len1=$len until [ $[len1] = 0 ]; do ind=-1 case "${wrd[$[wlg]]}" in [aouAEIOUVK^]|'*'[ie]|'*'o[iu]|'*'aa|NNG|[rR]R|':/'|'//') ## identify the trivial ones eipos=$[wlg] ## e,i,oi cannot cross these ;; e|i|o[iu]|aa) # turn for e,i and oi =P # echo $wlg j0=$[eipos+1] ## default position but may ## move forward yy=${wrd[$[j0]]} # echo $yy #### error detection if [ "$[j0]" -gt "$[wlg-1]" ] then echo " ?ERROR: $ZZZ "|tr "^" " " 1>&3 break fi until [ $[j0] = $[wlg-1] ]; do j0=$[j0+1] yy=$yy${wrd[$[j0]]} ## part of the word upto e,i,P ## from defined pos. done # echo $eipos yy "$yy" wrd ${wrd[$[eipos+1]]} case "$yy" in ## check for patterns ?) ## example: ki indx=$[wlg-1] #echo 3 ;; *) ## or else ..... yy1=${yy//kK/X} ## replace kK by F yy1=${yy1//[Ss]h/k} ## replace sh by k yy1=${yy1//[hRMLWYFK]/F} ## replace all fala's by F yy1=${yy1//NN/F} ## replace chadrabinduu by F yy1=${yy1//N[GJ]/kF} ## replace NG, NJ yy1=${yy1//[gcjJTDNtdnpbmrlHsSBwX]/k} ## replace by k and kF case "$yy1" in # #rule 1 *[^/][^F]) ## baki indx=$[wlg-1] #echo rule 1 ;; #rule 2 kF|*[^/]kF) ## khela, barhi, kosMin indx=$[wlg-2] #echo rule 2 $yy1 ;; #rule 3 k[F/][kF]|*[^/]k[F/][kF]) ## ShWe, g/Ye, k/ki but not k/khWe indx=$[wlg-3] #echo rule 3 $yy1 ;; #rule 4 *[^/]k/kF|*[^/]kF/k) ## k/khi, kh/ki, k/kWi indx=$[wlg-4] #echo rule 4 $yy1 ;; #rule 5 *[^/]kF/kF) ## kh/khi, NJ/chi indx=$[wlg-5] #echo rule 5 $yy1 ;; #rule 6 *[^/]kF/kFF|*[^/]kFF/kF) ## kh/khWi, khh/khi indx=$[wlg-6] #echo rule 6 $yy1 ;; #rule 7 *) #echo here ## when everything fails indx=$[eipos+1] #echo rule 7 $yy1 ;; esac esac wrd[$[indx]]='\*'${wrd[$[indx]]} wrd[$[wlg]]='*'${wrd[$[wlg]]} eipos=$[wlg] ;; *) #echo none ${wrd[$[wlg]]} ;; esac len1=$[len1-1] wlg=$[wlg+1] done # echo $x0 ' ---> ' "${wrd[*]}"|tr -d " "|tr 'Z^' '* ' fl_0=`echo "${wrd[*]}"|tr -d " "|sed -e 's/'*'oi/'*'\{oi\}/g'|sed -e 's/'*'ou/'*'eou/g'|sed -e 's/'*'aa/'*'\{aa\}/g'` # fl_0=`echo "${wrd[*]}"|tr -d " "` # fl_0=${fl_0//\*P/\*\{oi\}} # replace only *P fl_txt="$fl_txt $fl_0" #fl_txt=$(echo $fl_txt) #echo $fl_txt $ZZZ_3 echo $fl_txt return } test_ei=0 # =1 => convert klng=0 #be careful about \%, \\, ^ in the file - they should be preserved. sed -e 's/\\%/\*char25/g' -e 's/\\/\\\\/g' -e 's/\^/\*char94/g' -e 's/ /'^'/g' $infile| \ while read AAA do klng=$[klng+1] if [ "$klng" = "$numl" ] then echo -n '#' klng=0 fi case "$AAA" in *'%'*) test_ei=1 ## shuru if test "$no_ei" = '1' then test_ei=10 fi ;; *'%'*) test_ei=0 ## shesh if test "$no_ei" = '1' then test_ei=01 fi ;; *) : ;; ## continue esac case "$test_ei" in 1) ZZZ_1=`echo "$AAA"|cut -f1 -d'%'` ## left of '%' matters ZZZ_1=${ZZZ_1:+$ZZZ_1'^'} ##if not empty add ^ for end ZZZ_2=`echo "$AAA"|cut -s -f2- -d'%'` ## store right of '%' ZZZ_2=${ZZZ_2:+%$ZZZ_2} ## if no '%', ZZZ_2 is ## empty, else insert '%' ZZZ_1=`ei_thik "$ZZZ_1"` echo "$ZZZ_1""$ZZZ_2"|tr '^' ' ' |sed -e 's/\*char25/\\%/g' -e "s/\*char94/\^/g">>$outfile ;; 0) echo "$AAA"|tr '^' ' '|sed -e 's/\*char25/\\%/g' -e "s/\*char94/\^/g" >>$outfile ;; 10) test_ei=1;; 01) test_ei=0;; esac done 3>&1 echo " " echo " done (seicor version number: $vrsn)" echo "output file: $outfile" echo "check http://www.iopb.res.in/~somen/Prog/seicor for current version of seicor" exit 0