source: trunk/libIGCM/AA_RunChecker @ 760

Last change on this file since 760 was 760, checked in by labetoulle, 12 years ago

RunChecker? :

  • Bugfix in error detection for post-treatment jobs ;
  • Cosmetic change in IGCM_config_CommonConfiguration to avoid blank lines in output.
  • Property svn:executable set to *
  • Property svn:keywords set to Revision Author Date
File size: 15.4 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Sonia Labetoulle
5# Contact: sonia.labetoulle__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2012)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#======================================================================#
15# Display a report on how a simulation is faring.
16
17
18function ChangeUsr {
19
20  echo $1 | sed -e "s/${CurrentUsr}/${TargetUsr}/" \
21                -e "s/${CurrentGrp}/${TargetGrp}/"
22
23}
24
25
26function SearchCatalog {
27
28  typeset num
29  unset SUBMIT_DIR
30
31  fg_new=false
32
33  NbOcc=$( awk -v JobName=${JobName} \
34           'BEGIN {x=0}  $1 ~ JobName {++x} END {print x}' ${SimuCatalog} )
35
36  if ( [ ${NbOcc} -eq 0 ] && ( $fg_path ) ) ; then
37    set -A FileList $( ls ${ConfigPath}/Job_* )
38    if [ X$FileList == X ] ; then
39      NbOcc=0
40    else
41      NbOcc=${#FileList[@]}
42      fg_new=true
43    fi
44  fi
45
46  if ( [ ${NbOcc} -eq 0 ] && ( ${fg_search} ) ) ; then
47    SEARCH_DIR=${WORKDIR}
48    if [ ${TargetUsr} != ${CurrentUsr} ] ; then
49      SEARCH_DIR=$( ChangeUsr ${SEARCH_DIR})
50    fi
51    echo "${JobName} not in Catalog, we'll try to find it in ${SEARCH_DIR}"
52
53    set -A FileList $( find ${SEARCH_DIR}/ \
54                            -path ${SEARCH_DIR}/IGCM_OUT -prune -o \
55                            -name Job_${JobName} -print )
56    if [ X$FileList == X ] ; then
57      NbOcc=0
58    else
59      NbOcc=${#FileList[@]}
60      fg_new=true
61    fi
62  fi
63
64  if [ ${NbOcc} -gt 1 ] ; then
65    echo "More than one job"
66    ind=0
67    while [ ${ind} -lt ${NbOcc} ] ; do
68      printf '%2i) %-30s\n' ${ind} ${FileList[${ind}]}
69      (( ind = ind + 1 ))
70    done
71    echo "Give your choice number or 'q' to quit : "
72    read Choice
73    if [ X${Choice} == Xq ] ; then
74      exit
75    else
76      fg_new=true
77      FileList=${FileList[${Choice}]}
78      NbOcc=1
79    fi
80  fi
81
82  case ${NbOcc} in
83    0)
84      echo "${JobName} not found."
85      echo "You can try : *) '-s' option to automatically search your \$WORKDIR, "
86      echo "              *) '-p' option to provide the config.card path, "
87      echo "              *)  manually editing your ${SimuCatalog}"
88      exit ;;
89    1)
90      if ( ${fg_new} ) ; then
91        JobName=${JobName:=$( basename ${FileList} | awk -F"_" '{ print $2 }' )}
92        SUBMIT_DIR=$( dirname ${FileList} )
93        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}"
94        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}" >> ${SimuCatalog}
95      elif ( [ ${TargetUsr} == $( awk -v JobName=${JobName} \
96                                      '$1 ~ JobName {print $2}' \
97                                      ${SimuCatalog} ) ] \
98          && [ ${HostName}  == $( awk -v JobName=${JobName} \
99                                      '$1 ~ JobName {print $3}' \
100                                      ${SimuCatalog} ) ] ) ; then
101        JobName=$( awk -v JobName=${JobName} '$1 ~ JobName {print $1}' ${SimuCatalog} )
102        SUBMIT_DIR=$( awk -v JobName=${JobName} '$1 ~ JobName {print $4}' ${SimuCatalog} )
103      else
104        echo "${JobName} not in Catalog."
105        exit
106      fi
107      break ;;
108    *)
109      break ;;
110  esac
111
112  return
113
114}
115
116
117function AffichResult {
118
119  fg_first=false
120  fg_last=false
121  fg_job=false
122
123  while [ $# -ne 0 ] ; do
124    case ${1} in
125      -f|--first)
126        fg_first=true
127        shift 1 ;;
128      -l|--last)
129        fg_last=true
130        shift 1 ;;
131      -j|--job)
132        fg_job=true
133        shift 1 ;;
134      -*)
135        echo "usage: ${0}"
136        echo "       options = -f; -l"
137        exit ;;
138      *)
139        break ;;
140    esac
141  done
142
143
144  # Define colors
145  # =============
146  ColEsc="\033["
147  ColNon="${ColEsc}0m"       # Return to normal
148  ColExp="${ColEsc}1m"       # Blanc - gras
149  ColFat="${ColEsc}1;31m"    # Fatal
150  ColCpl="${ColEsc}1;32m"    # Completed
151  ColAtt="${ColEsc}1;30m"    # Waiting
152  ColDef="${ColEsc}1;34m"    # Default
153  ColRbl="${ColEsc}31m"      # Rebuild
154
155
156 
157  # Print header
158  # ============
159  if ( ${fg_first} ) ; then
160    Dum=""
161    (( len = 67 - ${#JobName} ))
162    echo "|===========================================================================================================|"
163    printf "| JobName = ${ColExp}%-${#JobName}s${ColNon}" ${JobName}
164    printf "%-${len}s" ${Dum} 
165    printf "run.card : ${ColExp}%-17s${ColNon}|\n" "${LastWrite} "
166    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
167    echo "|                         |             |                         |             |     Pending Rebuilds      |"
168    echo "| Date Begin - DateEnd    | PeriodState | Current Period          | CumulPeriod | Nb  : from     : to       |"
169    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
170
171    printf "| %-10s - %-10s | " \
172           $DateBegin $DateEnd 
173
174    case $PeriodState in
175      Fatal)
176        Color=${ColFat}
177        break ;;
178      Completed)
179        Color=${ColCpl}
180        break ;;
181      Waiting|OnQueue)
182        Color=${ColAtt}
183        break ;;
184      *)
185        Color=${ColDef}
186        break ;;
187    esac
188    printf "${Color}%-11s${ColNon} | " $PeriodState
189
190    printf "%-10s - %-10s | %11s | " \
191           $PeriodDateBegin $PeriodDateEnd $CumulPeriod
192
193    if ( [ X${NbRebuild} != X. ] && [ X${NbRebuild} != X0 ] ) ; then
194      printf "${ColRbl}%3s : %-8s : %-8s${ColNon} |\n" \
195             $NbRebuild $FirstRebuild $LastRebuild
196    else
197      printf "%3s : %-8s : %-8s |\n" \
198             $NbRebuild $FirstRebuild $LastRebuild
199    fi
200
201    printf "|-----------------------------------------------------------------------------------------------------------|\n"
202    printf "|                                                      Last                                                 |\n"
203    printf "|     Rebuild      |   Pack_Output    |   Pack_Restart   |    Pack_Debug    |  Monitoring  |     Atlas      |\n"
204    printf "|------------------|------------------|------------------|------------------|--------------|----------------|\n"
205
206    return
207  fi
208
209  # Print Post-processing job status
210  # ================================
211  if ( ${fg_job} ) ; then
212    printf "|"
213
214    # Print rebuild and pack jobs
215    # ---------------------------
216    for JobType in ${JobType_list[*]} ; do
217      eval Date=\${${JobType}_Date[${ind}]}
218      eval Status=\${${JobType}_Status[${ind}]}
219      eval Nb=\${${JobType}_Nb[${ind}]}
220
221      if [ X${Status} == XOK  ] ; then
222        Color=${ColCpl}
223      else
224        Color=${ColFat}
225      fi
226      printf "  ${Color}%-8s${ColNon} : %3i  |" ${Date} ${Nb}
227    done
228
229    Color=${ColExp}
230
231    # Print monitoring jobs
232    # ---------------------
233    JobType=monitoring
234    if [ $ind -eq 0 ] ; then
235      eval Date=\${${JobType}_Date}
236    else
237      Date=""
238    fi
239    printf "     ${Color}%-4s${ColNon}     |" ${Date}
240
241    # Print atlas jobs
242    # ----------------
243    JobType=atlas
244    eval Date=\${${JobType}_Date[${ind}]}
245    printf "  ${Color}%-12s${ColNon}  |" ${Date}
246
247    printf "\n"
248
249    return
250  fi
251
252  # Print footer
253  # ============
254  if ( ${fg_last} ) ; then
255    printf "|===========================================================================================================|\n"
256    date +"${DateFormat}"
257    return
258  fi
259
260}
261
262#======================================================================#
263
264#D- Task type (computing or post-processing)
265TaskType=post-processing
266typeset -i Verbosity=0
267
268CurrentUsr=$( whoami )
269CurrentGrp=$( groups $CurrentUsr | awk '{print $3}' )
270
271if ( [ $# -eq 0 ] ) ; then
272  $0 -h
273  exit
274fi
275
276# Arguments
277# =========
278# Default argument values
279# -----------------------
280TargetUsr=${CurrentUsr}
281HostName=$( hostname | sed -e "s/[0-9].*//" )
282
283fg_color=true
284fg_search=false
285fg_quiet=false
286fg_path=false
287NbHisto=10
288
289# Get arguments from command line
290# -------------------------------
291while [ $# -ne 0 ] ; do
292  case $1 in
293    -h|--help|-help)
294      echo "usage: $0 [-u user] [-n] [-q] [-j n] job_name"
295      echo "       $0 [-u user] [-n] [-q] [-j n] -p config.card_path"
296      echo ""
297      echo "options :"
298      echo "  -h : print this help and exit"
299      echo "  -u : owner of the job"
300      echo "  -q : quiet"
301      echo "  -j : print n post-processing jobs (default is 10)"
302      echo "  -s : search for a new job in \$WORKDIR and fill in "
303      echo "       the catalog before printing information"
304      echo "  -p : give the config.card path to a new simulation, "
305      echo "       instead of the job name."
306      exit ;;
307#    -b|-nocolor)
308#      fg_color=false
309#      shift 1 ;;
310    -s|-search)
311      fg_search=true
312      shift 1 ;;
313    -q|-quiet)
314      fg_quiet=true
315      shift 1 ;;
316    -p|-config-path)
317      ConfigPath="$2"
318      fg_path=true
319      shift 2 ;;
320    -j|-job-number)
321      NbHisto="$2"
322      shift 2 ;;
323    -u|-user)
324      TargetUsr="$2"
325      shift 2 ;;
326    -*)
327      $0 -h
328      exit ;;
329    *)
330      break ;;
331  esac
332done
333
334
335if ( ( ! $fg_path ) && [ $# -lt 1 ] ) ; then
336  $0 -h
337  exit
338fi
339
340if ( ( ${fg_path} ) && ( ${fg_search} ) ) ; then
341  echo "You cannot use -s and -p at the same time"
342  exit
343fi
344
345
346# Load libIGCM library
347# ====================
348libIGCM=${libIGCM:=::modipsl::/libIGCM}
349
350. ${libIGCM}/libIGCM_debug/libIGCM_debug.ksh
351     ( ${DEBUG_debug} ) && IGCM_debug_Check
352. ${libIGCM}/libIGCM_card/libIGCM_card.ksh
353     ( ${DEBUG_debug} ) && IGCM_card_Check
354. ${libIGCM}/libIGCM_date/libIGCM_date.ksh
355     ( ${DEBUG_debug} ) && IGCM_date_Check
356#-------
357. ${libIGCM}/libIGCM_sys/libIGCM_sys.ksh
358. ${libIGCM}/libIGCM_config/libIGCM_config.ksh
359
360
361JobName=$1
362
363if ( ${fg_quiet} ) ; then
364  NbHisto=1
365fi
366
367echo "Target user = ${TargetUsr}"
368
369TargetGrp=$( groups $TargetUsr | awk '{print $3}' )
370
371# Define the catalog in which the known simulations are stored
372SimuCatalog="$( ccc_home )/.simucatalog.dat"
373if [ ! -s ${SimuCatalog} ] ; then
374  touch ${SimuCatalog}
375fi
376
377# Date format
378DateFormat="%d/%m/%y %R:%S"
379
380# Find SUBMIT_DIR in catalog
381# ==========================
382SearchCatalog
383
384if [ ! X${SUBMIT_DIR} == X ] ; then
385
386  echo "Submit:  >${SUBMIT_DIR}<"
387  cd $SUBMIT_DIR
388
389
390  # Extract usefull information from run.card and config.card
391  # =========================================================
392
393  RunFile="${SUBMIT_DIR}/run.card"
394  ConfFile="${SUBMIT_DIR}/config.card"
395
396  IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card
397
398
399  if [ -s ${RunFile} ] ; then
400    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodState
401    PeriodState=${run_Configuration_PeriodState}
402  else
403    PeriodState="Waiting"
404  fi
405
406  if ( [ X${PeriodState} == XRunning ] || [ X${PeriodState} == XOnQueue ] ) ; then
407    NbRun=$( ccc_mstat -f | grep -c ${JobName} )
408
409    if [ ${NbRun} -eq 0 ] ; then
410      PeriodState="Fatal"
411    fi
412  fi
413
414  DateBegin=${config_UserChoices_DateBegin}
415  DateEnd=${config_UserChoices_DateEnd}
416  TagName=${config_UserChoices_TagName}
417  ExperimentName=${config_UserChoices_ExperimentName}
418  SpaceName=${config_UserChoices_SpaceName}
419
420  IGCM_card_DefineVariableFromOption ${RunFile} Configuration CumulPeriod
421  CumulPeriod=${run_Configuration_CumulPeriod}
422
423  if ( [ ! X${PeriodState} == XWaiting ] && [ ! X${PeriodState} == XCompleted ] ) ; then
424    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateBegin
425    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateEnd
426    PeriodDateBegin=${run_Configuration_PeriodDateBegin}
427    PeriodDateEnd=${run_Configuration_PeriodDateEnd}
428  else
429    PeriodDateBegin="."
430    PeriodDateEnd="."
431  fi
432
433  DATA_DIR=${R_SAVE}
434  POST_DIR=${R_BUFR}/Out
435  CWORK_DIR=${R_FIGR}
436  if [ X${config_Post_RebuildFromArchive} = Xtrue ] ; then
437    REBUILD_DIR=${R_SAVE}/TMP
438    RebuildJob="rebuild_fromArchive"
439  else
440    REBUILD_DIR=${BIG_DIR}/${config_UserChoices_TagName}/${config_UserChoices_JobName}
441    RebuildJob="rebuild_fromWorkdir"
442  fi
443
444  if [ ${TargetUsr} != ${CurrentUsr} ] ; then
445    DATA_DIR=$( ChangeUsr ${DATA_DIR})
446    POST_DIR=$( ChangeUsr ${POST_DIR} )
447    CWORK_DIR=$( ChangeUsr ${CWORK_DIR} )
448    REBUILD_DIR=$( ChangeUsr ${REBUILD_DIR} )
449  fi
450
451  echo "Data:    >${DATA_DIR}<"
452  echo "Rebuild: >${REBUILD_DIR}<"
453  echo "Post:    >${POST_DIR}<"
454  echo "Work:    >${CWORK_DIR}<"
455
456  if [ $PeriodState != "Waiting" ] ; then
457
458    # Check pending rebuilds
459    # ======================
460
461    set -A RebuildList $( find ${REBUILD_DIR}/ -name "REBUILD_*" | sort )
462    if [ ${#RebuildList[*]} -gt 0 ] ; then
463      NbRebuild=$( IGCM_sys_CountFileArchive ${REBUILD_DIR} )
464
465      FirstRebuild=$( basename ${RebuildList[0]} | cut -f2 -d\_ )
466      LastRebuild=$( basename ${RebuildList[ (( NbRebuild=${NbRebuild}-1 )) ]} | cut -f2 -d\_ )
467    else
468      NbRebuild="."
469
470      FirstRebuild="."
471      LastRebuild="."
472    fi
473  else
474    NbRebuild="."
475
476    FirstRebuild="."
477    LastRebuild="."
478  fi
479
480
481  # Check last REBUILD and PACK* jobs
482  # =================================
483  # Define input parameters
484  # -----------------------
485  set -A JobType_list "${RebuildJob}" "pack_output" "pack_restart" "pack_debug"
486
487  for JobType in ${JobType_list[*]} ; do
488    typeset    name1="${JobType}_String"
489    typeset    name2="${JobType}_Field"
490    if [ X${JobType} == X${RebuildJob} ] ; then
491      eval ${name1}=IGCM_sys_PutBuffer_Out
492      eval ${name2}=4
493    else
494      eval ${name1}=IGCM_sys_Put_Out
495      eval ${name2}=3
496    fi
497  done
498
499  # Check jobs
500  # ----------
501  NbLines=0
502  for JobType in ${JobType_list[*]} ; do
503    eval String=\${${JobType}_String}
504    eval Field=\${${JobType}_Field}
505
506    set -A FileList $( ls ${POST_DIR}/${JobType}.*.out | tail -n ${NbHisto} )
507
508    if [ ${#FileList[*]} -gt ${NbLines} ] ; then
509      NbLines=${#FileList[*]}
510    fi
511
512    (( ind = 0 ))
513    for FileName in ${FileList[*]} ; do
514      LastDate=$( basename ${FileName} | awk -F"." '{ print $(NF-1) }' )
515
516      Error=$( awk -v String=${String} \
517                   'BEGIN { x=0 } ($1~String) && ($3~"error.") { x=x+1 } END { print x }' \
518                   ${POST_DIR}/${JobType}.${LastDate}.out )
519
520      Match=$( awk -v String=${String} \
521                   'BEGIN { x=0 } ($1~String) && ($3!~"error.") { x=x+1 } END { print x }' \
522                   ${POST_DIR}/${JobType}.${LastDate}.out )
523      (( Nb = ${Match} - ${Error} ))
524
525      if ( [ ${Error} -eq 0 ] && [ ${Nb} -gt 0 ] ) ; then
526        Status=OK
527      else
528        Status=KO
529      fi
530
531      eval ${JobType}_Date[$ind]=${LastDate}
532      eval ${JobType}_Status[$ind]=${Status}
533      eval ${JobType}_Nb[$ind]=${Nb}
534     
535      (( ind = ind + 1 ))
536    done
537  done
538
539  # Check last MONITORING jobs
540  # ==========================
541  JobType=monitoring
542  if [ -d ${CWORK_DIR}/MONITORING ] ; then
543    LastDate=$( cdo showyear ${CWORK_DIR}/MONITORING/files/ATM_bils_global_ave.nc 2> /dev/null | \
544                    awk '{ print $NF }' )
545    eval ${JobType}_Date=${LastDate}
546  fi
547
548  # Check last ATLAS jobs
549  # =====================
550  JobType=atlas
551  if [ -d ${CWORK_DIR}/ATLAS ] ; then
552    set -A FileList $( ls ${CWORK_DIR}/ATLAS | tail -n ${NbHisto} )
553
554    if [ ${#FileList[*]} -gt ${NbLines} ] ; then
555      NbLines=${#FileList[*]}
556    fi
557
558    (( ind = 0 ))
559    for FileName in ${FileList[*]} ; do
560      eval ${JobType}_Date[$ind]=${FileName}
561      (( ind = ind + 1 ))
562    done
563  fi
564
565
566  # Time of last write on run.card
567  # ==============================
568  LastWrite=$( ls -l --time-style=+"${DateFormat}" ${SUBMIT_DIR}/run.card | awk '{print $6 " " $7}' )
569   
570
571  # Print results
572  # =============
573  AffichResult -f
574  ind=0
575  while [ $ind -lt $NbLines ] ; do
576    AffichResult -j
577    (( ind = ind + 1 ))
578  done
579  AffichResult -l
580
581fi
582
Note: See TracBrowser for help on using the repository browser.