#!/bin/bash function update_tasks_list { local tasksListFile=$1 # fichier contenant la nouvelle liste de cmds touch $tasksListFile local logFileOfPrevPack=$2 # fichier log de relatif a l'execution de la derniere liste de cmds local tasksListFileOfPrevPack=$3 # fichier contenant la derniere liste de cmds > $tasksListFile old_IFS=$IFS # sauvegarde du séparateur de champ IFS=$'\n' # nouveau séparateur de champ, le caractère fin de ligne for cmdReport in $( cat $logFileOfPrevPack ) do hasCmdGoodFormat=`echo $cmdReport | grep -e '^#executed by process' | wc -l ` if [ "x${hasCmdGoodFormat}" == "x0" ] then continue fi local resCmd=`echo $cmdReport | awk '{print $9}' ` if [ "x$resCmd" != "x0" ] then local cmdToPrint=`echo $cmdReport | awk '{print $NF}' ` case "x$resCmd" in x5) newCmdToPrint=`echo $cmdToPrint | sed 's;output_ncrcat;output_tar;' ` echo "./process_list.sh $newCmdToPrint" >> ${tasksListFile} ;; x10) echo "./process_list.sh $cmdToPrint" >> ${tasksListFile} newCmdToPrint=`echo $cmdToPrint | sed 's;output_ncrcat;output_tar;' ` echo "./process_list.sh $newCmdToPrint" >> ${tasksListFile} ;; x50) # ne fait rien, la liste, au depart a concatener, a ete taree # car echec de la concatenation ;; *) echo "./process_list.sh $cmdToPrint" >> ${tasksListFile} ;; esac fi done IFS=$old_IFS # rétablissement du séparateur de champ par défaut # Il faut aussi rajouter les cmds qui n'ont pas ete traitees du tout, # par exemple en cas d'interruption du calculateur # Pour cela, on boucle sur la derniere liste de cmds et on cherche dans le # fichier log associe si certaines sont absentes. On remet ces commandes (absentes) # dans la nouvelle liste old_IFS=$IFS # sauvegarde du séparateur de champ IFS=$'\n' # nouveau séparateur de champ, le caractère fin de ligne for cmd in $( cat $tasksListFileOfPrevPack ) do local list=`echo $cmd | awk '{print $NF}' ` hasListBeenTreated=`grep $list $logFileOfPrevPack | wc -l ` if [ "x${hasListBeenTreated}" == "x0" ] then echo $cmd >> ${tasksListFile} fi done IFS=$old_IFS # rétablissement du séparateur de champ par défaut # Il peut arriver que 2 listes soient identiques, on empeche ce cas cat ${tasksListFile} | sort | uniq > taskFile.txt cat taskFile.txt > ${tasksListFile} } function getNumeroOfCurrentTry { local num_try="1" tryNumFile=${USER_OUTPUT_PROGRESS}/numero_current_try.txt if [ ! -e $tryNumFile ] then echo "Le fichier :" >> $badFailureFile echo "$tryNumFile" >> $badFailureFile echo "doit etre present dans le repertoire :" >> $badFailureFile echo "${USER_OUTPUT_PROGRESS}" >> $badFailureFile echo "et il doit contenir un numero d'essai" >> $badFailureFile exit 1 fi num_try=`head -n 1 $tryNumFile ` echo $num_try } function getNumeroOfLastInstance { local num_instance="0" local numTry=$1 ici=$PWD local progressDirectory="${USER_OUTPUT_PROGRESS}/TRY__${numTry}" if [ ! -e $progressDirectory ] then echo "fonction getNumeroOfLastInstance :" >> $badFailureFile echo "Le repertoire :" >> $badFailureFile echo "$progressDirectory" >> $badFailureFile echo "devrait exister. Il n'existe pas." >> $badFailureFile exit 1 fi cd $progressDirectory listFiles=`ls | grep -e "inputCmd__try__${numTry}__instance__[[:digit:]]\{1,2\}.list" ` for listFile in $listFiles do num=`echo $listFile | awk -F"__" '{print $NF}' | awk -F"." '{print $1}' ` if [ $num -gt $num_instance ] then num_instance=$num fi done cd $ici echo $num_instance } function check_progress { local file1=$1 local file2=$2 if [ "x${file1}" == "x" ] || [ "x${file2}" == "x" ] then echo "check_progress : Le nom d'au moins 1 des 2 fichiers d'entree est vide" >> $badFailureFile exit 1 fi if [ ! -e $file1 ] || [ ! -e $file2 ] then echo "check_progress : au moins un des 2 fichiers suivants n'existe pas :" >> $badFailureFile echo "$file1" echo "$file2" exit 1 fi local nbLineFile1=`cat $file1 | wc -l ` local nbLineFile1=`cat $file2 | wc -l ` if [ $nbLineFile1 -ne $nbLineFile1 ] then echo 1 return fi old_IFS=$IFS # sauvegarde du séparateur de champ IFS=$'\n' # nouveau séparateur de champ, le caractère fin de ligne for line in $( cat $file1 ) do # echo $line local isLineInFile2=`grep $line $file2 | wc -l ` if [ $isLineInFile2 -eq 0 ] then echo 1 return fi done IFS=$old_IFS # rétablissement du séparateur de champ par défaut echo 0 } function update_report { > $reportFile # on vide le fichier rapport echo "Execution of tasks :" >> $reportFile echo "------------------" >> $reportFile cat $inputCmd >> $reportFile echo >> $reportFile echo "Results of tasks :" >> $reportFile echo "----------------" >> $reportFile cat $output >> $reportFile echo >> $reportFile echo >> $reportFile } export RANDOM=$$ # random seed function gives_random_number { lim=$1 bit=-1 let "bit = RANDOM % $lim" bit=$(( $bit + 1 )) # nb entre 1 et $limit echo $bit } ########## batch directives : begin ########## #MSUB -r pack_ipsl # Nom du job ### mutable directives ### #MSUB -o /ccc/dmfbuf/import_data.2/ccrt/dmnfs12/cont003/bacasable/GUILLAUME/PSEUDO_DMNFS_PROGRESS/zIGCM_OUT/detailed_pack_output/pack_ipsl_%I.o #MSUB -e /ccc/dmfbuf/import_data.2/ccrt/dmnfs12/cont003/bacasable/GUILLAUME/PSEUDO_DMNFS_PROGRESS/zIGCM_OUT/detailed_pack_output/pack_ipsl_%I.e #MSUB -n 7 #MSUB -T 900 #MSUB -A tgcc0013 #MSUB -q standard #MSUB -Qos test ########## batch directives : end ########## export JOB_DIR=${LS_SUBCWD:-${PWD}} export EXE_DIR=${JOB_DIR} source ${EXE_DIR}/DEM_utilities.sh export badFailureFile=${USER_OUTPUT_PROGRESS}/badFailure.txt export numCurrentTry=$( getNumeroOfCurrentTry ) export progressDir="${USER_OUTPUT_PROGRESS}/TRY__${numCurrentTry}" if [ ! -e $progressDir ] then echo "Le repertoire de suivi :" >> $badFailureFile echo "$progressDir" >> $badFailureFile echo "n'existe pas. STOP." >> $badFailureFile exit 1 fi export numPrevInstance=$( getNumeroOfLastInstance $numCurrentTry ) export numNewInstance=$(( $numPrevInstance + 1 )) export inputCmd="${progressDir}/inputCmd__try__${numCurrentTry}__instance__${numNewInstance}.list" export nextInputCmd="${progressDir}/nextInputCmd__try__${numCurrentTry}__instance__${numNewInstance}.list" export output="${progressDir}/packOutput__try__${numCurrentTry}__instance__${numNewInstance}.log" export reportFile="${progressDir}/report__try__${numCurrentTry}__instance__${numNewInstance}.log" export checkFile="${progressDir}/check__try__${numCurrentTry}__instance__${numNewInstance}.log" export checkFileTmp="checkTmp__try__${numCurrentTry}__instance__${numNewInstance}.txt" export noInterruptFile="${progressDir}/noInterrupt__try__${numCurrentTry}__instance__${numNewInstance}.txt" # a virer #echo "inputCmd=$inputCmd" >> $badFailureFile #echo "nextInputCmd=$nextInputCmd" >> $badFailureFile #echo "output=$nextInputCmd" >> $badFailureFile #echo "reportFile=$nextInputCmd" >> $badFailureFile #echo "noInterruptFile=$noInterruptFile" >> $badFailureFile # exit 0 # a virer # a virer #if [ ${numNewInstance} -ge 4 ] #then # echo "inputCmd=$inputCmd" >> $badFailureFile # echo "nextInputCmd=$nextInputCmd" >> $badFailureFile # echo "output=$nextInputCmd" >> $badFailureFile # echo "reportFile=$nextInputCmd" >> $badFailureFile # echo "noInterruptFile=$noInterruptFile" >> $badFailureFile # echo >> $badFailureFile #fi if [ ${numCurrentTry} -le 1 ] && [ ${numNewInstance} -le 1 ] then # C'est le tout premier essai # on construit la liste des taches a effectuer en inventoriant les fichiers *.liste # dans les rep contenus dans le fichier "config_card.liste" > ${inputCmd} for CONFIG in $( awk '{print $1}' ${IGCM_DEM}/config_card.liste ) ; do PATH_SIMU=$( dirname $CONFIG ) # echo "PATH_SIMU=$PATH_SIMU" setOfListFiles=`find $PATH_SIMU -type f -name "*.list" ` for file in $setOfListFiles do echo "./process_list.sh $file" >> ${inputCmd} done done else # if try > 1 && inst == 1 ==> construction liste cmd avec fichiers try - 1, derniere instance # dans ce dernier cas, gerer une eventuelle interruption au try - 1 # if try > 1 && inst > 1 ==> construction liste cmd avec fichiers try, instance precedente if [ ${numNewInstance} -ge 2 ] then nextInputCmd_of_PrevInst="${progressDir}/nextInputCmd__try__${numCurrentTry}__instance__${numPrevInstance}.list" if [ ! -e $nextInputCmd_of_PrevInst ] then echo "Le fichier suivant :" >> $badFailureFile echo "$nextInputCmd_of_PrevInst" >> $badFailureFile echo "n'existe pas. Il devrait exister. STOP." >> $badFailureFile exit 1 fi cat $nextInputCmd_of_PrevInst > ${inputCmd} else # numNewInstance == 1 numPrevTry=$(( $numCurrentTry - 1 )) # echo "numCurrentTry=$numCurrentTry" >> $badFailureFile # a virer # echo "numPrevTry=$numPrevTry" >> $badFailureFile # a virer # exit 1 # a virer numLastInstInstanceInPrevTry=$( getNumeroOfLastInstance $numPrevTry ) noInterruptFile="${progressDir}/noInterrupt__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.txt" if [ ! -e $noInterruptFile ] then # il y a eu interruption non prevue au dernier essai, il faut recomposer la liste des cmds avec les resultats # de l'essai precedent, derniere instance prevProgressDir="${USER_OUTPUT_PROGRESS}/TRY__${numPrevTry}" nextInputCmd_of_LastInst="${prevProgressDir}/nextInputCmd__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.list" output_of_LastInst="${prevProgressDir}/packOutput__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.log" inputCmd_of_LastInst="${prevProgressDir}/inputCmd__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.list" if [ ! -e $output_of_LastInst ] || [ ! -e $inputCmd_of_LastInst ] then echo "Les fichiers suivants :" >> $badFailureFile echo "$output_of_LastInst" >> $badFailureFile echo "$inputCmd_of_LastInst" >> $badFailureFile echo "n'existent pas. Il devrait exister. STOP." >> $badFailureFile exit 1 fi update_tasks_list ${nextInputCmd_of_LastInst} ${output_of_LastInst} ${inputCmd_of_LastInst} cat $nextInputCmd_of_LastInst > ${inputCmd} else nextInputCmd_of_LastInst="${progressDir}/nextInputCmd__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.list" if [ ! -e $nextInputCmd_of_LastInst ] then echo "Le fichier suivant :" >> $badFailureFile echo "$nextInputCmd_of_LastInst" >> $badFailureFile echo "n'existe pas. Il devrait exister. STOP." >> $badFailureFile exit 1 fi cat $nextInputCmd_of_LastInst > ${inputCmd} fi fi fi # Initialisation du rapport : par defaut, les calculs ont ete interrompus echo "No report. Le computation must have interrupted." > $reportFile # exit 0 # a virer # a virer ######################################### if [ ${numNewInstance} -ge 10 ] then echo >> $badFailureFile echo "10eme instance. STOP." >> $badFailureFile exit 1 fi ################################################### # startTime=$( getDateMilliSeconds ) # suppr # echo "start time:$startTime" >> $timeHandlingFile # suppr > $timeEndFile # added ccc_mprun ./glost_launch -R $timeLimitBeforeEnd ${inputCmd} 2>${output} ### ccc_mprun ./cmd_launch.exe ${inputCmd} 2>${output} ### ./cmd_launch.exe ${inputCmd} 2>${output} ### ccc_mprun -p standard -n ${BRIDGE_MSUB_NPROC} ./cmd_launch.exe ${inputCmd} 2>myIO/output.log ### mpirun -n 4 ./cmd_launch.exe myIO/inputCmd10.list 2>myIO/output.log # meantime=$( getTimeDiffSeconds $startTime ) # suppr endExecutionTime=$( getDateMilliSeconds ) echo "end time:$endExecutionTime" >> $timeEndFile # echo "meantime ncrcat = $meantime" # exit 0 # a virer # Gestion des reprises : # -------------------- update_report update_tasks_list $nextInputCmd $output $inputCmd # Verifications sur qq listes (dont le traitement semble correct) : # ---------------------------------------------------------------------------------------- if [ "x${nbListsToCheck}" == "x" ] then echo "nbre de listes a checker absent" >> $checkFile echo "nbre de listes a checker absent" >> $badFailureFile exit 1 fi > $checkFileTmp # ensemble des listes concatenees correctement set_of_good_lists=`cat $output | grep -e '^#executed by process' | awk '{ if ($9==0){print $12} }' | grep "output_ncrcat" ` # envoi des cmds de check dans fichier tmp for lst in $set_of_good_lists do echo "./check_ncrcat_list.sh $lst" >> $checkFileTmp done # nombre de listes concatenees correctement nb_of_good_lists=`cat $checkFileTmp | wc -l ` # le nb de listes a checker ne peut exceder le nb de listes disponibles pour le check if [ $nbListsToCheck -ge $nb_of_good_lists ] then nbListsToCheck=$nb_of_good_lists fi nbLstToCheck_tmp=$nbListsToCheck while [ $nbLstToCheck_tmp -gt 0 ] do random_number=$( gives_random_number $nbLstToCheck_tmp ) # nb aleatoire entre 1 et $nbLstToCheck_tmp checkCmd=`sed -n "${random_number}p" $checkFileTmp ` $checkCmd # on envoie la cmd de check resCmd=$? if [ "x${resCmd}" != "x0" ] then echo "$checkCmd ==> not OK ==> stop everything." >> $checkFile echo "$checkCmd ==> not OK ==> stop everything." >> $badFailureFile # exit 1 # a retablir else echo "$checkCmd ==> OK" >> $checkFile fi sed -i "${random_number}d" $checkFileTmp # on retire la cmd qui vient d'etre effectuee du fichier tmp nbLstToCheck_tmp=$(( $nbLstToCheck_tmp - 1 )) done rm $checkFileTmp # ----- Fin verif ------------------------------------------------------------------------ echo "no interruption has occured" > ${noInterruptFile} # exit 0 # a virer # Tout s'est bien passe # ---------------------- everythingOK=`cat $nextInputCmd | wc -l ` if [ "x${everythingOK}" == "x0" ] then echo "Tout s'est fini correctement" >> $badFailureFile exit 0 fi # y a t il progression ? # ---------------------- resDiff=$( check_progress $inputCmd $nextInputCmd ) # resDiff == 1 : fichiers differents # resDiff == 0 : fichiers identiques if [ "x${resDiff}" == "x0" ] then echo "Il n'y pas plus de progression" >> $badFailureFile exit 1 fi # On enchaine avec le meme script ccc_msub launch_and_measureTime.sh