source: trunk/libIGCM/AA_RunChecker @ 835

Last change on this file since 835 was 835, checked in by labetoulle, 11 years ago

Runchecker : add rsh access for monitoring files on Ada. See #128.

  • Property svn:executable set to *
  • Property svn:keywords set to Revision Author Date
File size: 16.8 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Sonia Labetoulle
5# Contact: sonia.labetoulle__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2012)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#======================================================================#
15# Display a report on how a simulation is faring.
16
17
18function ChangeUsr {
19
20  CurrentGrp=$( groups $CurrentUsr | gawk '{print $3}' )
21  TargetGrp=$( groups $TargetUsr | gawk '{print $3}' )
22
23  echo $1 | sed -e "s/${CurrentUsr}/${TargetUsr}/" \
24                -e "s/${CurrentGrp}/${TargetGrp}/"
25
26}
27
28
29function SearchCatalog {
30
31  typeset num
32  unset SUBMIT_DIR
33
34  fg_new=false
35
36  if [ ! X${JobName} == X ] ; then
37    NbOcc=$( gawk -v JobName=${JobName} \
38             'BEGIN {x=0}  $1 ~ JobName {++x} END {print x}' ${SimuCatalog} )
39  else
40    NbOcc=0
41  fi
42
43  if ( [ ${NbOcc} -eq 0 ] && ( ${fg_path} ) ) ; then
44    set -A FileList $( ls ${ConfigPath}/Job_* )
45    if [ X$FileList == X ] ; then
46      NbOcc=0
47    else
48      NbOcc=${#FileList[@]}
49      fg_new=true
50    fi
51  fi
52
53  if ( [ ${NbOcc} -eq 0 ] && ( ${fg_search} ) ) ; then
54    SEARCH_DIR=${WORKDIR}
55    if [ ${TargetUsr} != ${CurrentUsr} ] ; then
56      SEARCH_DIR=$( ChangeUsr ${SEARCH_DIR})
57    fi
58    echo "${JobName} not in Catalog, we'll try to find it in ${SEARCH_DIR}"
59
60    set -A FileList $( find ${SEARCH_DIR}/ \
61                            -path ${SEARCH_DIR}/IGCM_OUT -prune -o \
62                            -name Job_${JobName} -print )
63    if [ X$FileList == X ] ; then
64      NbOcc=0
65    else
66      NbOcc=${#FileList[@]}
67      fg_new=true
68    fi
69  fi
70
71  if [ ${NbOcc} -gt 1 ] ; then
72    echo "More than one job"
73    ind=0
74    while [ ${ind} -lt ${NbOcc} ] ; do
75      printf '%2i) %-30s\n' ${ind} ${FileList[${ind}]}
76      (( ind = ind + 1 ))
77    done
78    echo "Give your choice number or 'q' to quit : "
79    read Choice
80    if [ X${Choice} == Xq ] ; then
81      exit
82    else
83      fg_new=true
84      FileList=${FileList[${Choice}]}
85      NbOcc=1
86    fi
87  fi
88
89  case ${NbOcc} in
90    0)
91      echo "${JobName} not found."
92      echo "You can try : *) '-s' option to automatically search your \$WORKDIR, "
93      echo "              *) '-p' option to provide the config.card path, "
94      echo "              *)  manually editing your ${SimuCatalog}"
95      exit ;;
96    1)
97      if ( ${fg_new} ) ; then
98        JobName=${JobName:=$( basename ${FileList} | gawk -F"_" '{ print $2 }' )}
99        SUBMIT_DIR=$( dirname ${FileList} )
100        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}"
101        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}" >> ${SimuCatalog}
102        sort -u ${SimuCatalog} > ${SimuCatalog}.tmp
103        mv ${SimuCatalog}.tmp ${SimuCatalog}
104      elif ( [ ${TargetUsr} == $( gawk -v JobName=${JobName} \
105                                      '$1 ~ JobName {print $2}' \
106                                      ${SimuCatalog} ) ] \
107          && [ ${HostName}  == $( gawk -v JobName=${JobName} \
108                                      '$1 ~ JobName {print $3}' \
109                                      ${SimuCatalog} ) ] ) ; then
110        JobName=$( gawk -v JobName=${JobName} '$1 ~ JobName {print $1}' ${SimuCatalog} )
111        SUBMIT_DIR=$( gawk -v JobName=${JobName} '$1 ~ JobName {print $4}' ${SimuCatalog} )
112      else
113        echo "${JobName} not in Catalog."
114        exit
115      fi
116      ;;
117    *)
118      break ;;
119  esac
120
121  return
122
123}
124
125
126function AffichResult {
127
128  fg_first=false
129  fg_last=false
130  fg_job=false
131
132  while [ $# -ne 0 ] ; do
133    case ${1} in
134      -f|--first)
135        fg_first=true
136        shift 1 ;;
137      -l|--last)
138        fg_last=true
139        shift 1 ;;
140      -j|--job)
141        fg_job=true
142        shift 1 ;;
143      -*)
144        echo "usage: ${0}"
145        echo "       options = -f; -l"
146        exit ;;
147      *)
148        break ;;
149    esac
150  done
151
152
153  # Define colors
154  # =============
155  ColEsc="\033["
156  ColNon="${ColEsc}0m"       # Return to normal
157  ColExp="${ColEsc}1m"       # Blanc - gras
158  ColFat="${ColEsc}1;31m"    # Fatal
159  ColCpl="${ColEsc}1;32m"    # Completed
160  ColAtt="${ColEsc}1;30m"    # Waiting
161  ColDef="${ColEsc}1;34m"    # Default
162  ColRbl="${ColEsc}31m"      # Rebuild
163
164
165
166  # Print header
167  # ============
168  if ( ${fg_first} ) ; then
169    Dum=""
170    (( len = 67 - ${#JobName} ))
171    echo "|===========================================================================================================|"
172    printf "| JobName = ${ColExp}%-${#JobName}s${ColNon}" ${JobName}
173    printf "%-${len}s" ${Dum} 
174    printf "run.card : ${ColExp}%-18s${ColNon}|\n" "${LastWrite}"
175    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
176    echo "|                         |             |                         |             |     Pending Rebuilds      |"
177    echo "| Date Begin - DateEnd    | PeriodState | Current Period          | CumulPeriod | Nb  : from     : to       |"
178    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
179
180    printf "| %-10s - %-10s | " \
181           $DateBegin $DateEnd 
182
183    case $PeriodState in
184      Fatal)
185        Color=${ColFat}
186        ;;
187      Completed)
188        Color=${ColCpl}
189        ;;
190      Waiting|OnQueue)
191        Color=${ColAtt}
192        ;;
193      *)
194        Color=${ColDef}
195        ;;
196    esac
197    printf "${Color}%-11s${ColNon} | " $PeriodState
198
199    printf "%-10s - %-10s | %11s | " \
200           $PeriodDateBegin $PeriodDateEnd $CumulPeriod
201
202    if ( [ X${NbRebuild} != X. ] && [ X${NbRebuild} != X0 ] ) ; then
203      printf "${ColRbl}%3s : %-8s : %-8s${ColNon} |\n" \
204             $NbRebuild $FirstRebuild $LastRebuild
205    else
206      printf "%3s : %-8s : %-8s |\n" \
207             $NbRebuild $FirstRebuild $LastRebuild
208    fi
209
210    if [ ${NbLines} -gt 0 ] ; then
211      printf "|-----------------------------------------------------------------------------------------------------------|\n"
212      printf "|                                                      Last                                                 |\n"
213      printf "|     Rebuild      |   Pack_Output    |   Pack_Restart   |    Pack_Debug    |  Monitoring  |     Atlas      |\n"
214      printf "|------------------|------------------|------------------|------------------|--------------|----------------|\n"
215    fi
216
217    return
218  fi
219
220  # Print Post-processing job status
221  # ================================
222  if ( ${fg_job} ) ; then
223    printf "|"
224
225    # Print rebuild and pack jobs
226    # ---------------------------
227    for JobType in ${JobType_list[*]} ; do
228      eval Date=\${${JobType}_Date[${ind}]}
229      eval Status=\${${JobType}_Status[${ind}]}
230      eval Nb=\${${JobType}_Nb[${ind}]}
231
232      if [ X${Status} == XOK  ] ; then
233        Color=${ColCpl}
234      else
235        Color=${ColFat}
236      fi
237      printf "  ${Color}%-8s${ColNon} : %3s  |" ${Date} ${Nb}
238    done
239
240    Color=${ColExp}
241
242    # Print monitoring jobs
243    # ---------------------
244    JobType=monitoring
245    if [ $ind -eq 0 ] ; then
246      eval Date=\${${JobType}_Date}
247    else
248      Date=""
249    fi
250    printf "     ${Color}%-4s${ColNon}     |" ${Date}
251
252    # Print atlas jobs
253    # ----------------
254    JobType=atlas
255    eval Date=\${${JobType}_Date[${ind}]}
256    printf "  ${Color}%-12s${ColNon}  |" ${Date}
257
258    printf "\n"
259
260    return
261  fi
262
263  # Print footer
264  # ============
265  if ( ${fg_last} ) ; then
266    printf "|===========================================================================================================|\n"
267    date +"${DateFormat}"
268    return
269  fi
270
271}
272
273#======================================================================#
274
275#D- Task type (computing, post-processing or checking)
276TaskType=checking
277typeset -i Verbosity=0
278
279CurrentUsr=$( whoami )
280
281if ( [ $# -eq 0 ] ) ; then
282  $0 -h
283  exit
284fi
285
286# Arguments
287# =========
288# Default argument values
289# -----------------------
290TargetUsr=${CurrentUsr}
291HostName=$( hostname | sed -e "s/[0-9].*//" )
292
293fg_color=true
294fg_search=false
295fg_quiet=false
296fg_path=false
297NbHisto=20
298
299# Get arguments from command line
300# -------------------------------
301while [ $# -ne 0 ] ; do
302  case $1 in
303    -h|--help|-help)
304      echo "usage: $0 [-u user] [-q] [-j n] [-s] job_name"
305      echo "       $0 [-u user] [-q] [-j n] -p config.card_path"
306      echo ""
307      echo "options :"
308      echo "  -h : print this help and exit"
309      echo "  -u : owner of the job"
310      echo "  -q : quiet"
311      echo "  -j : print n post-processing jobs (default is 20)"
312      echo "  -s : search for a new job in \$WORKDIR and fill in "
313      echo "       the catalog before printing information"
314      echo "  -p : give the directory (absolute path) containing "
315      echo "       the config.card instead of the job name."
316      exit ;;
317    -j|-job-number)
318      NbHisto="$2"
319      shift 2 ;;
320    -p|-config-path)
321      ConfigPath="$2"
322      fg_path=true
323      shift 2 ;;
324    -q|-quiet)
325      fg_quiet=true
326      shift 1 ;;
327    -s|-search)
328      fg_search=true
329      shift 1 ;;
330    -u|-user)
331      TargetUsr="$2"
332      shift 2 ;;
333    -*)
334      $0 -h
335      exit ;;
336    *)
337      break ;;
338  esac
339done
340
341
342if ( ( ! ${fg_path} ) && [ $# -lt 1 ] ) ; then
343  $0 -h
344  exit
345fi
346
347if ( ( ${fg_path} ) && ( ${fg_search} ) ) ; then
348  echo "You cannot use -s and -p at the same time"
349  exit
350fi
351
352
353# Load libIGCM library
354# ====================
355libIGCM=${libIGCM:=::modipsl::/libIGCM}
356
357DEBUG_debug=false
358DEBUG_sys=false
359
360. ${libIGCM}/libIGCM_debug/libIGCM_debug.ksh
361. ${libIGCM}/libIGCM_card/libIGCM_card.ksh
362. ${libIGCM}/libIGCM_date/libIGCM_date.ksh
363#-------
364. ${libIGCM}/libIGCM_sys/libIGCM_sys.ksh
365. ${libIGCM}/libIGCM_config/libIGCM_config.ksh
366#-------
367( ${DEBUG_debug} ) && IGCM_debug_Check
368( ${DEBUG_debug} ) && IGCM_card_Check
369( ${DEBUG_debug} ) && IGCM_date_Check
370
371JobName=$1
372
373if ( ${fg_quiet} ) ; then
374  NbHisto=1
375fi
376
377echo "Target user = ${TargetUsr}"
378
379# Define the catalog in which the known simulations are stored
380SimuCatalog="${HOME}/.simucatalog.dat"
381if [ ! -s ${SimuCatalog} ] ; then
382  touch ${SimuCatalog}
383fi
384
385# Date format
386DateFormat="%d/%m/%y %R:%S"
387
388# Find SUBMIT_DIR in catalog
389# ==========================
390SearchCatalog
391
392if [ ! X${SUBMIT_DIR} == X ] ; then
393
394  echo "Submit:  >${SUBMIT_DIR}<"
395  IGCM_sys_Cd $SUBMIT_DIR
396
397
398  # Extract usefull information from run.card and config.card
399  # =========================================================
400
401  RunFile="${SUBMIT_DIR}/run.card"
402  ConfFile="${SUBMIT_DIR}/config.card"
403
404  IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card
405
406
407  if [ -s ${RunFile} ] ; then
408    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodState
409    PeriodState=${run_Configuration_PeriodState}
410  else
411    PeriodState="Waiting"
412  fi
413
414  if ( [ X${PeriodState} == XRunning ] || [ X${PeriodState} == XOnQueue ] ) ; then
415    #NbRun=$( ccc_mstat -f | grep -c ${JobName} )
416    IGCM_sys_CountJobInQueue ${JobName} NbRun
417
418    if [ ${NbRun} -eq 0 ] ; then
419      PeriodState="Fatal"
420    fi
421  fi
422
423  DateBegin=${config_UserChoices_DateBegin}
424  DateEnd=${config_UserChoices_DateEnd}
425  TagName=${config_UserChoices_TagName}
426  ExperimentName=${config_UserChoices_ExperimentName}
427  SpaceName=${config_UserChoices_SpaceName}
428
429
430  if ( [ ! X${PeriodState} == XWaiting ] && [ ! X${PeriodState} == XCompleted ] ) ; then
431    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateBegin
432    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateEnd
433    IGCM_card_DefineVariableFromOption ${RunFile} Configuration CumulPeriod
434    PeriodDateBegin=${run_Configuration_PeriodDateBegin}
435    PeriodDateEnd=${run_Configuration_PeriodDateEnd}
436    CumulPeriod=${run_Configuration_CumulPeriod}
437  else
438    PeriodDateBegin="."
439    PeriodDateEnd="."
440    CumulPeriod="."
441  fi
442
443  DATA_DIR=${R_SAVE}
444  POST_DIR=${R_BUFR}/Out
445  CWORK_DIR=${R_FIGR}
446  if [ X${config_Post_RebuildFromArchive} = Xtrue ] ; then
447    RebuildJob="rebuild_fromArchive"
448  else
449    RebuildJob="rebuild_fromWorkdir"
450  fi
451
452  if [ ${TargetUsr} != ${CurrentUsr} ] ; then
453    DATA_DIR=$( ChangeUsr ${DATA_DIR})
454    POST_DIR=$( ChangeUsr ${POST_DIR} )
455    CWORK_DIR=$( ChangeUsr ${CWORK_DIR} )
456    REBUILD_DIR=$( ChangeUsr ${REBUILD_DIR} )
457  fi
458
459  echo "Data:    >${DATA_DIR}<"
460  echo "Rebuild: >${REBUILD_DIR}<"
461  echo "Post:    >${POST_DIR}<"
462  echo "Work:    >${CWORK_DIR}<"
463
464  # Are packs activated or not ?
465  # ============================
466  if ( [ ! X${config_Post_PackFrequency} = X${NULL_STR} ] && \
467       [ ! X${config_Post_PackFrequency} = XNONE ] ) ; then
468    Pack=true
469  else
470    Pack=false
471  fi
472
473
474  if [ $PeriodState != "Waiting" ] ; then
475
476    # Check pending rebuilds
477    # ======================
478
479    set -A RebuildList $( find ${REBUILD_DIR}/ -name "REBUILD_*" | sort )
480    if [ ${#RebuildList[*]} -gt 0 ] ; then
481      NbRebuild=$( IGCM_sys_CountFileArchive ${REBUILD_DIR} )
482
483      FirstRebuild=$( basename ${RebuildList[0]} | cut -f2 -d\_ )
484      LastRebuild=$( basename ${RebuildList[ (( NbRebuild=${NbRebuild}-1 )) ]} | cut -f2 -d\_ )
485    else
486      NbRebuild="."
487
488      FirstRebuild="."
489      LastRebuild="."
490    fi
491  else
492    NbRebuild="."
493
494    FirstRebuild="."
495    LastRebuild="."
496  fi
497
498
499  if [ $PeriodState != "Waiting" ] ; then
500
501    # Check last REBUILD and PACK* jobs
502    # =================================
503    # Define input parameters
504    # -----------------------
505    set -A JobType_list "${RebuildJob}" "pack_output" "pack_restart" "pack_debug"
506
507    for JobType in ${JobType_list[*]} ; do
508      typeset    name1="${JobType}_String"
509      typeset    name2="${JobType}_Field"
510      typeset    name3="${JobType}_Activ"
511      if [ X${JobType} == X${RebuildJob} ] ; then
512        if ( ${Pack} ) ; then
513          eval ${name1}=IGCM_sys_PutBuffer_Out
514        else
515          eval ${name1}=IGCM_sys_Put_Out
516        fi
517        eval ${name2}=4
518        eval ${name3}=true
519      else
520        eval ${name1}=IGCM_sys_Put_Out
521        eval ${name2}=3
522        if ( ${Pack} ) ; then
523          eval ${name3}=true
524        else
525          eval ${name3}=false
526        fi
527      fi
528    done
529
530    # Check jobs
531    # ----------
532    NbLines=0
533    for JobType in ${JobType_list[*]} ; do
534      eval String=\${${JobType}_String}
535      eval Field=\${${JobType}_Field}
536      eval Activ=\${${JobType}_Activ}
537
538      if ( ${Activ} ) ; then
539
540        set -A FileList $( ls ${POST_DIR}/${JobType}.*.out | tail -n ${NbHisto} )
541
542        if [ ${#FileList[*]} -gt ${NbLines} ] ; then
543          NbLines=${#FileList[*]}
544        fi
545
546        (( ind = 0 ))
547        for FileName in ${FileList[*]} ; do
548          LastDate=$( basename ${FileName} | gawk -F"." '{ print $(NF-1) }' )
549
550          set -- $( gawk -v String=${String} \
551                         'BEGIN { nb_ok = 0 ; nb_ko = 0 } \
552                         ($1 ~ String) { \
553                           if ($3 !~ /error./) { \
554                             nb_ok = nb_ok + 1 \
555                           } else { \
556                             nb_ko = nb_ko + 1 \
557                           } \
558                         } \
559                         END { print nb_ok " " nb_ko }' \
560                         ${POST_DIR}/${JobType}.${LastDate}.out )
561          Match=$1
562          Error=$2
563
564          (( Nb = ${Match} - ${Error} ))
565
566          if ( [ ${Error} -eq 0 ] && [ ${Nb} -gt 0 ] ) ; then
567            Status=OK
568          else
569            Status=KO
570          fi
571
572          eval ${JobType}_Date[$ind]=${LastDate}
573          eval ${JobType}_Status[$ind]=${Status}
574          eval ${JobType}_Nb[$ind]=${Nb}
575
576          (( ind = ind + 1 ))
577        done
578
579      else
580
581          eval ${JobType}_Date[0]=""
582          eval ${JobType}_Status[0]=""
583          eval ${JobType}_Nb[0]=""
584
585      fi
586
587    done
588
589    # Check last MONITORING jobs
590    # ==========================
591    JobType=monitoring
592    IGCM_sys_TestDirArchive ${CWORK_DIR}/MONITORING
593    RC=$?
594    if [ $RC == 0 ] ; then
595      FileTmp=$( IGCM_sys_RshArchive "ls ${CWORK_DIR}/MONITORING/files/*.nc | head -n 1" )
596      IGCM_sys_GetDate_Monitoring ${FileTmp} LastDate
597      eval ${JobType}_Date=${LastDate}
598    fi
599
600    # Check last ATLAS jobs
601    # =====================
602    JobType=atlas
603    IGCM_sys_TestDirArchive ${CWORK_DIR}/ATLAS
604    RC=$?
605    if [ $RC == 0 ] ; then
606      set -A FileList $( IGCM_sys_RshArchive "ls ${CWORK_DIR}/ATLAS | tail -n ${NbHisto}" )
607
608      if [ ${#FileList[*]} -gt ${NbLines} ] ; then
609        NbLines=${#FileList[*]}
610      fi
611
612      (( ind = 0 ))
613      for FileName in ${FileList[*]} ; do
614        eval ${JobType}_Date[$ind]=${FileName}
615        (( ind = ind + 1 ))
616      done
617    fi
618
619
620    # Time of last write on run.card
621    # ==============================
622    LastWrite=$( ls -l --time-style=+"${DateFormat}" ${SUBMIT_DIR}/run.card | gawk '{print $6 " " $7}' )
623
624  else
625
626    NbLines=0
627
628  fi
629
630
631  # Print results
632  # =============
633  AffichResult -f
634  ind=0
635  while [ $ind -lt $NbLines ] ; do
636    AffichResult -j
637    (( ind = ind + 1 ))
638  done
639  AffichResult -l
640
641fi
642
Note: See TracBrowser for help on using the repository browser.