source: trunk/libIGCM/libIGCM_debug/libIGCM_debug.ksh @ 1227

Last change on this file since 1227 was 1220, checked in by sdipsl, 9 years ago
  • Any post-procesing errors will stop the computing jobs when SpaceName?=PROD. See #260
  • Feature was removed some years ago because postprocessing errors were legions. More robust now.
  • Property licence set to
    The following licence information concerns ONLY the libIGCM tools
    ==================================================================

    Copyright © Centre National de la Recherche Scientifique CNRS
    Commissariat à l'Énergie Atomique CEA

    libIGCM : Library for Portable Models Computation of IGCM Group.

    IGCM Group is the french IPSL Global Climate Model Group.

    This library is a set of shell scripts and functions whose purpose is
    the management of the initialization, the launch, the transfer of
    output files, the post-processing and the monitoring of datas produce
    by any numerical program on any plateforme.

    This software is governed by the CeCILL license under French law and
    abiding by the rules of distribution of free software. You can use,
    modify and/ or redistribute the software under the terms of the CeCILL
    license as circulated by CEA, CNRS and INRIA at the following URL
    "http://www.cecill.info".

    As a counterpart to the access to the source code and rights to copy,
    modify and redistribute granted by the license, users are provided only
    with a limited warranty and the software's author, the holder of the
    economic rights, and the successive licensors have only limited
    liability.

    In this respect, the user's attention is drawn to the risks associated
    with loading, using, modifying and/or developing or reproducing the
    software by the user in light of its specific status of free software,
    that may mean that it is complicated to manipulate, and that also
    therefore means that it is reserved for developers and experienced
    professionals having in-depth computer knowledge. Users are therefore
    encouraged to load and test the software's suitability as regards their
    requirements in conditions enabling the security of their systems and/or
    data to be ensured and, more generally, to use and operate it in the
    same conditions as regards security.

    The fact that you are presently reading this means that you have had
    knowledge of the CeCILL license and that you accept its terms.
  • Property svn:keywords set to Revision Author Date
File size: 34.2 KB
RevLine 
[913]1#!/bin/ksh
[2]2
3#**************************************************************
4# Author: Patrick Brockmann, Martial Mancip
[373]5# Contact: Patrick.Brockmann__at__cea.fr Martial.Mancip__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
[2]9# IPSL (2006)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#==================================================
15# The documentation of this file can be automatically generated
[913]16# if you use the prefix #D- for comments to be extracted.
[2]17# Extract with command: cat lib* | grep "^#D-" | cut -c "4-"
18#==================================================
19
20#==================================================
21# Add high level verbosity
22typeset -i Verbosity=${Verbosity:=3}
23
24#==================================================
25# DEBUG_debug
26# Add low level verbosity
[1083]27DEBUG_debug=${DEBUG_debug:=false}
[2]28
[872]29#==================================================
30# GENERATE RANDOM ERROR ; only apply if ( ${DEBUG_debug} )
31typeset RandomError=false
32
[2]33#==================================================
34# NULL_STR
35# Default null string
[913]36typeset -r NULL_STR="_0_"
[2]37
38#==================================================
39# libIGCM_CurrentTag
40# Current libIGCM tag, check compatibilty with *.card
[915]41typeset -r libIGCM_CurrentTag="1.0"
[2]42
43#==================================================
44# Exit Flag (internal debug)
45# When true, end the master loop AFTER SAVES FILES
46ExitFlag=false
47
48#==================================================
49# Declare a stack of functions calls
[59]50unset IGCM_debug_Stack
51unset IGCM_debug_StackArgs
[913]52unset IGCM_debug_StackTiming
[54]53IGCM_debug_Stack[0]=${NULL_STR}
54IGCM_debug_StackArgs[0]=${NULL_STR}
[913]55IGCM_debug_StackTiming[0]=${NULL_STR}
[2]56IGCM_debug_LenStack=0
57
58#D-#==================================================================
[913]59#D-function IGCM_debug_getDate_ms
60#D- * Purpose: Give number of milliseconds since 01-jan-1970
61function IGCM_debug_getDate_ms
62{
[926]63  typeset nanosecs ms
[913]64  # nano secondes since 01-jan-1970
65  nanosecs=$( date +%s%N )
66
67  # truncate the last 6 digits to get milliseconds since 01-jan-1970
68  ms=${nanosecs:0:${#nanosecs}-6}
69
70  echo "$ms"
71}
72
73#D-#==================================================================
74#D-function IGCM_debug_sizeOfTabContent
75#D- * Purpose: Give sumed size of a list of files
[924]76#D- * Usage: IGCM_debug_sizeOfTabContent entityList destination
77#D- *        where entityList is a list of files or directory
78#D- *        where dest is either a directory or a file name
[913]79function IGCM_debug_sizeOfTabContent
80{
[924]81  typeset entityListe destination iEntity sizeKo sumSizeKo sumSizeMo
82
83  eval set +A entityListe \${${1}}
[941]84  destination=${2}
[924]85  sumSizeKo=0
86
87  # Here we will try to compute size (file or directory size) from local path and not from archive.
[941]88  for ((i = 0; i < ${#entityListe[*]}; i += 1)) ; do
89    if [ -f ${entityListe[$i]} ] ; then
90      # One file or a bunch of files has been copied without renaming from a visible filesystem
91      iEntity=${entityListe[$i]}
92    elif [ -f ${entityListe[$i]##/*/} ] ; then
93      # One file or a bunch of files has been copied without renaming from an non visible filesystem
[924]94      # remove path /home/login/../ from entityListe elements
95      iEntity=${entityListe[$i]##/*/}
96    elif [ -f ${destination} ] ; then
[941]97      # a file has been copied and renamed
[924]98      iEntity=${destination}
99    elif [ -f ${destination}/${entityListe[$i]##/*/} ] ; then
100      # a copy in a directory but not in ${PWD}
101      iEntity=${destination}/${entityListe[$i]##/*/}
102    elif [ -d ${entityListe[$i]} ] ; then
[941]103      # a directory has been copied from a non remote place
[924]104      iEntity=${entityListe[$i]}
105    elif [ -d ${destination}/${entityListe[$i]##/*/} ] ; then
[941]106      # a directory has been copied from a remote archive and not renamed
[924]107      iEntity=${destination}/${entityListe[$i]##/*/}
108    elif [ -d ${destination} ] ; then
[941]109      # a directory has been copied from a remote archive and renamed
[924]110      iEntity=${destination}
[917]111    fi
[1083]112    sizeKo=$( du --apparent-size -skL ${iEntity} | gawk '{print $1}' )
[924]113    sumSizeKo=$(( $sumSizeKo + $sizeKo ))
[913]114  done
[924]115  sumSizeMo=$( echo "scale=6;${sumSizeKo}/1024" | bc )
116  echo "${sumSizeKo}|${sumSizeMo}"
[913]117}
118
119#D-#==================================================================
[983]120#D-function IGCM_debug_send_AMQP_msg__MAILTUNNEL
121#D- * Purpose: Take over AMQP C client using mail as a message recipient
122#D- * One argument : base64 encoded message
[1051]123#D- * Attach encoded config.card when starting the simulation
124
[983]125function IGCM_debug_send_AMQP_msg__MAILTUNNEL {
126
[987]127  typeset b64_encoded_msg mail_recipient
[1076]128  typeset buffer send_messages mail_frequency
[987]129  typeset last_mail_date__file
130
[983]131  b64_encoded_msg=$1
132
[1072]133  mail_recipient="superviseur@ipsl.jussieu.fr"
[983]134  send_messages=0
135  mail_frequency=3600 # in seconds
136  # use to keep track when was last mail sent (maybe to be replaced with global variable)
[1150]137  last_mail_date__file=${R_BUF}/.stamp.${config_UserChoices_TagName}.${config_UserChoices_JobName}
138  # use to accumulate messages before sending them
139  buffer=${R_BUF}/.buffer.${config_UserChoices_TagName}.${config_UserChoices_JobName}
[983]140
141  # init
142  if [ ! -f "${buffer}" ]; then
[1150]143    touch ${buffer}
[983]144  fi
145
146  if [ ! -f "${last_mail_date__file}" ]; then
[1150]147    touch ${last_mail_date__file}
[983]148  else
[1150]149    # compute last time the file was changed (in seconds)
[983]150    seconds_since_last_mail=$(( $(date +%s) - $(stat -c %Y ${last_mail_date__file}) ))
[997]151    # send message when exceeding threshold
152    [ ${seconds_since_last_mail} -gt ${mail_frequency} ] && send_messages=1
[983]153  fi
154
[997]155  # queue messages in the buffer
156  echo ${b64_encoded_msg} >> ${buffer}
157
158  # send mail
[1051]159
[1053]160  if [ X${initBigBro} = Xtrue ] ; then
[1087]161    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") > ${SUBMIT_DIR}/mail.txt
[1051]162    mailx -s "[TEMPORARY AMQP CHANNEL]" -a ${SUBMIT_DIR}/config.card.base64 ${mail_recipient} < ${buffer} # send buffer
163    rm -f $buffer ; touch ${buffer}                                    # clear buffer
164    touch ${last_mail_date__file}                                      # memorize last mail date
165    initBigBro=false
[1150]166  elif [ ${send_messages} -eq 1 ] ; then
[1087]167    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") >> ${SUBMIT_DIR}/mail.txt
[1051]168    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
[1150]169    rm -f ${buffer} ; touch ${buffer}                                  # flush the buffer
[1051]170    touch ${last_mail_date__file}                                      # memorize last mail date
[983]171  fi
[987]172
[1189]173  if ( ${FlushAMQP} ) ; then
[1150]174    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
175    rm -f ${buffer}                                                    # cleaning behind us
176    rm -f ${last_mail_date__file}                                      # cleaning behind us
177  fi
178
[983]179  # Allways all good for now.
180  return 0
181}
182
183#D-#==================================================================
[1162]184#D-function IGCM_debug_sendAMQP_Metrics
185#D- * Purpose: Take over AMQP C client using mail as a message recipient
[1202]186#D- * Two arguments : - Directory where metrics.json files can be found
187#D- *                 - Metrics Group Name. metrics will be added to this group
[1162]188#D- * Attach encoded metrics.json files.
189
190function IGCM_debug_sendAMQP_Metrics {
191
192  typeset mail_recipient encodedBody
193  if [ X${ActivateBigBro} = Xtrue ] ; then
194    mail_recipient="superviseur@ipsl.jussieu.fr"
195    # Metrics tag on server side
196    code=7100
197    # Usual AMQP message to route messages on server side
[1202]198    encodedBody=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"metricsGroupName\":\"${2}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" |  base64 -w 0 )
[1162]199    # send mail
200    attachmentsOptions=""
201    for metricsFile in $( ls $1/*json ) ; do
202      attachmentsOptions="-a ${metricsFile} ${attachmentsOptions}"
203    done
[1204]204    IGCM_debug_Print 2 "IGCM_debug_sendAMQP_Metrics "
[1162]205    echo ${encodedBody}|mailx -s "[TEMPORARY AMQP CHANNEL]" ${attachmentsOptions} ${mail_recipient}
206  fi
207
208  # Allways all good for now.
209  return 0
210}
211
212#D-#==================================================================
[913]213#D-function IGCM_debug_SendAMQP
214#D- * Purpose: Send body; encoded body and config.card to rabbitMQ
[1053]215function IGCM_debug_sendAMQP {
216
[913]217  typeset decal first additionnalOption encodedBody
218
219  # Encode message Body
220  encodedBody=$( echo "${Body}" | base64 -w 0 )
221
222  # Send config.card ?
223  if [ X${1} = Xactivate ] ; then
224    # Encode config.card
225    cat ${SUBMIT_DIR}/config.card | base64 -w 0 > ${SUBMIT_DIR}/config.card.base64
226    # Prepare additionnal option
227    additionnalOption="-f ${SUBMIT_DIR}/config.card.base64"
[1076]228    #
[1051]229    initBigBro=true
[913]230  else
231    additionnalOption=
[1051]232    #
233    initBigBro=false
[913]234  fi
235
236  # Only cosmetics : stack file
[1115]237  if [ X${ActivateStackFilling} = Xtrue ] ; then
238    decal=0
239    while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
240      printf ' ' >> ${StackFileLocation}/${StackFileName}
241      (( decal = decal + 1 ))
242    done
243    # Log to stack file using human readable format
244    echo "${Body}" >> ${StackFileLocation}/${StackFileName}
245  fi
[913]246
[983]247  # Log separately encoded AMQP message command for reuse in a mock up
[1120]248  #echo sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody} >> ${RUN_DIR_PATH}/send.AMQP.${config_UserChoices_JobName}.${config_UserChoices_ExperimentName}.${config_UserChoices_SpaceName}.${config_UserChoices_TagName}.${CumulPeriod}.history.txt
[913]249
250  # Send the message
[983]251  if [ X${BigBrotherChannel} = XMAIL ] ; then
252    IGCM_debug_send_AMQP_msg__MAILTUNNEL "${encodedBody}"
253    status=$?
254  else
255    sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody}
256    status=$?
257  fi
258
[913]259  if [ ${status} -gt 0 ] ; then
260    IGCM_debug_Print 2 "IGCM_debug_Push/PopStack/ActivateBigBro : command sendAMQPMsg failed error code ${status}"
[1051]261    echo sendAMQPMsg -h localhost -p 5672 -b "${Body}"
[1090]262    exit 1
[913]263  fi
264}
265
266#D-#==================================================================
[2]267#D-function IGCM_debug_CallStack
[913]268#D-* Purpose: Print the call stack tree from the oldest to the youngest (opposite of the display standard)
[2]269#D-
270function IGCM_debug_CallStack {
[544]271  if ( $DEBUG_debug ) ; then
[913]272    # Cosmetics
[544]273    typeset i decal
[823]274    i=0
[544]275    until [ $i -eq ${IGCM_debug_LenStack} ]; do
276      decal=0
277      until [ $decal -eq ${i} ]; do
[869]278        printf -- ' '
[823]279        (( decal = decal + 1 ))
[544]280      done
[869]281      echo "$i - ${IGCM_debug_Stack[$(( $IGCM_debug_LenStack-$i-1 ))]}" "(${IGCM_debug_StackArgs[$(( $IGCM_debug_LenStack-$i-1 ))]})"
[823]282      ((i = i + 1))
[544]283    done
284  fi
[2]285}
286
287#D-#==================================================================
288#D-function IGCM_debug_PushStack
289#D-* Purpose: Push a function name in the stack
290#D-
291function IGCM_debug_PushStack {
[544]292  if ( $DEBUG_debug ) ; then
[913]293    typeset decal inputs startTime_ms
294
295    # Only cosmetics : stack file
[1115]296    if [ X${ActivateStackFilling} = Xtrue ] ; then
297      echo >> ${StackFileLocation}/${StackFileName}
298      decal=0
299      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
300        printf ' ' >> ${StackFileLocation}/${StackFileName}
301        (( decal = decal + 1 ))
302      done
[2]303
[1115]304      # Fill the stack file
305      echo "> ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
306    fi
[926]307
[913]308    # Save input list in an indexed array
[823]309    INPUTS=( $@ )
[913]310
311    # Get timing information
312    startTime_ms=$( IGCM_debug_getDate_ms )
313
[544]314    # We add function call name on beginning of the stack
315    set +A IGCM_debug_Stack -- ${1} ${IGCM_debug_Stack[*]}
[2]316
[913]317    # Save timing in milliseconds in an indexed array
318    set +A IGCM_debug_StackTiming -- ${startTime_ms} ${IGCM_debug_StackTiming[*]}
319
[544]320    # We include the "null" Args in the beginning of the StackArgs
[913]321    set +A IGCM_debug_StackArgs ${NULL_STR} ${IGCM_debug_StackArgs[*]}
322
[544]323    # Then, we shift StackArgs tabular
[1065]324    # Replacing blank separated list by comma separated list of quoted elements (except the first and last element)
[913]325    if [ $# -gt 1 ]; then
[1065]326      IGCM_debug_StackArgs[0]=$(echo ${INPUTS[*]:1} | sed -e "s/\ /\",\"/g" )
[544]327    fi
[855]328
[1189]329    # Unplugged message 4000 handling for now. To ease downstream treatment.
[1150]330    #if [ X${ActivateBigBro} = Xtrue ] ; then
331    #  # RabbitMQ message code "PUSHSTACK"
[1189]332    #  code=4000
[1150]333    #  # RabbitMQ message body
334    #  Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"nesting\":\"${IGCM_debug_LenStack}\",\"command\":\"${IGCM_debug_Stack[0]}\",\"arguments\":[\"${IGCM_debug_StackArgs[0]}\"],\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
335    #  # Fill the rabbitMQ queue
336    #  IGCM_debug_sendAMQP
337    #fi
[855]338
339    # Increment LenStack
[544]340    (( IGCM_debug_LenStack = IGCM_debug_LenStack + 1 ))
[2]341
[869]342    #IGCM_debug_CallStack
[544]343  fi
[2]344}
345
346#D-#==================================================================
347#D-function IGCM_debug_PopStack
348#D-* Purpose: Pop a function name in the stack
349#D-
350function IGCM_debug_PopStack {
[544]351  if ( $DEBUG_debug ) ; then
[926]352    typeset i decal command arguments startTime_ms endTime_ms
[941]353    typeset instrumentation dest prefix
[926]354    # they are not typeset because they are send "by adress" to son functions
355    # we unset them to avoid "memory effect"
356    unset fileList source
[913]357
358    # INTRODUCE SIMPLE ERROR GENERATOR TO TEST SUPERVISOR
359    # PROBABILITY ERROR IS 0.0001 PER COMMAND OR FUNCTION CALL
360    # THERE ARE ~500 COMMAND OR FUNCTION CALL PER PERIOD
361    if ( ${RandomError} ) ; then
362      if [ $((RANDOM%10000)) -le 10 ] ; then
363        IGCM_debug_Print 1 "Random error has been triggered"
[1115]364        if [ X${ActivateStackFilling} = Xtrue ] ; then
365          echo "RANDOM ERROR" >> ${StackFileLocation}/${StackFileName}
366        fi
[913]367        ExitFlag=true
368      fi
369    fi
370
[544]371    if [ "${IGCM_debug_Stack[0]}" = "${1}" ]; then
[913]372      # Everything is cool
373
374      # Get timing information
375      endTime_ms=$( IGCM_debug_getDate_ms )
376
377      # Save Stack information before poping the stack
378      command=${IGCM_debug_Stack[0]}
379
[1084]380      # Go from comma separated list of quoted elements (except the first and the last element)
381      # to unquoted space separated elements in an array
[1083]382      set -A arguments -- $( echo ${IGCM_debug_StackArgs[0]} | sed -e "s/\",\"/\ /g" )
[913]383
384      # Save Stack information before poping the stack
385      startTime_ms=${IGCM_debug_StackTiming[0]}
386
387      # Pop the stack
[823]388      (( IGCM_debug_LenStack = IGCM_debug_LenStack - 1 ))
389      set -A IGCM_debug_Stack -- ${IGCM_debug_Stack[*]:1}
390      set -A IGCM_debug_StackArgs -- ${IGCM_debug_StackArgs[*]:1}
[913]391      set -A IGCM_debug_StackTiming -- ${IGCM_debug_StackTiming[*]:1}
[544]392    else
393      echo 'IGCM_debug_Exit : stack is corrupted ! LenStack =' ${IGCM_debug_LenStack}
394      IGCM_debug_Exit $@
395    fi
[913]396
[914]397    # Special actions depending on command to prepare IGCM_debug_PrintInfosActions call
[913]398    # We are interested in:
399    #  0. Which command performs the work
400    #  1. Size of entity we are working with
401    #  2. Where are we reading
402    #  3. Where are we writing
403    #  4. How long it took
404
[915]405    instrumentation=false
406
[913]407    case ${command} in
[925]408    # Classical copy (only files are given to IGCM_sys_Cp as options)
409    IGCM_sys_Cp)
410      instrumentation=true
411      # All but the latest
412      fileList=${arguments[*]:0:${#arguments[*]}-1}
413      # just need the first file to get the directory
414      source=${arguments[0]}
415      # Nothing but the latest
416      dest=${arguments[${#arguments[*]}-1]}
417      # Size of file whose name are stored in a list
418      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
419      ;;
420
[913]421    # Copy from archive machine or from buffer
422    IGCM_sys_Get|IGCM_sys_GetBuffer)
[915]423      instrumentation=true
[913]424      if [ ${#arguments[*]} -eq 2 ] ; then
425        source=${arguments[0]}
426        dest=${arguments[1]}
427        # Size of file whose name are stored in a variable
[917]428        entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
[913]429      elif ( [ ${#arguments[*]} -eq 3 ] && [ ${arguments[0]} = '/l' ] ) ; then
[936]430        # IGCM_sys_Get /l liste_file[*] /ccc/scratch/cont003/dsm/p86denv/RUN_DIR/985998_14754/
[913]431        # Keep the array name hosting the all list
[936]432        eval set +A fileList \${${arguments[1]}}
[913]433        # just need the first file to get the directory
[936]434        source=${fileList[0]}
[931]435        dest=${arguments[2]}
[934]436        # Size of file whose name are stored in a list
[936]437        entitySize=$( IGCM_debug_sizeOfTabContent fileList[*] ${dest} )
[913]438      elif [ [ ${#arguments[*]} -ge 3 ] ; then
439       # All but the latest
[916]440        fileList=${arguments[*]:0:${#arguments[*]}-1}
[913]441        # just need the first file to get the directory
442        source=${arguments[0]}
443        # Nothing but the latest
444        dest=${arguments[${#arguments[*]}-1]}
445        # Size of file whose name are stored in a list
[917]446        entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
[913]447      fi
448      ;;
449
[925]450    # Copy from compute node or copy to archive/buffer
451    IGCM_sys_Get_Master|IGCM_sys_Get_Dir|IGCM_sys_Put_Out|IGCM_sys_PutBuffer_Out)
[924]452      instrumentation=true
[916]453      source=${arguments[0]}
[924]454      dest=${arguments[1]}
455      # Size of file whose name are stored in a variable
456      entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
[913]457      ;;
458
459    # Rebuild command
460    IGCM_sys_rebuild|IGCM_sys_rebuild_station)
[915]461      instrumentation=true
[913]462      # All but the first
463      fileList=${arguments[*]:1:${#arguments[*]}-1}
464      # just need a file to get the directory
465      source=${arguments[1]}
466      # Nothing but the first
467      dest=${arguments[0]}
468      # Size of file whose name are stored in a list
[917]469      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
[913]470      ;;
[941]471
[926]472    # NCO commands
473    IGCM_sys_ncrcat|IGCM_sys_ncecat|IGCM_sys_ncra|IGCM_sys_ncks|IGCM_sys_cdo)
474      # Example of what we want to catch : only filenames in those command lines
475      # IGCM_sys_ncrcat -O -v ${list_var_final_ncrcat} ${OUT_SE[*]} ${RESULT_SE}
476      # IGCM_sys_ncrcat --hst -v ${liste_coord}${var} ${file1} ${liste_file_tmp[*]} ${file_out}
477      # IGCM_sys_ncrcat -p ${dir} ${liste_file_tmp} --output ${output}
478      # IGCM_sys_ncrcat -x -v ${list_var} -p ${dir} ${liste_file_tmp} --output ${output}
479      instrumentation=true
[941]480      keepGoing=true
481      prefix=.
[926]482      i=0
483      while ( ${keepGoing} ) ; do
[941]484        # the last one is not interesting
485        if [ ${i} -eq ${#arguments[*]}-1 ] ; then
486          keepGoing=false
487        # look after "-p" option. Path prefix is the following arguments
488        elif [ ${arguments[${i}]} = "-p" ] ; then
[926]489          ((i = i + 1))
[941]490          prefix=${arguments[${i}]}
491          ((i = i + 1))
492        elif [ ${i} -eq ${#arguments[*]}-1 ] ; then
[926]493          keepGoing=false
[941]494        # looking for files
495        elif [ -f ${prefix}/${arguments[${i}]} ] ; then
496          fileList="${fileList} ${prefix}/${arguments[${i}]}"
497          ((i = i + 1))
498        # other options are not interesting
[926]499        else
500          ((i = i + 1))
501        fi
502      done
[941]503
[926]504      # i value is at least 1
505      # just need one file to get the directory
[941]506      source=$( echo ${fileList} | gawk '{print $1}' )
[926]507      # Nothing but the latest
508      dest=${arguments[${#arguments[*]}-1]}
509      # Size of file whose name are stored in a list
510      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
511      ;;
[913]512    esac
513
514    # Print information related to instrumentation
[915]515    ( ${instrumentation} ) && IGCM_debug_PrintInfosActions ${command} ${entitySize} ${startTime_ms} ${endTime_ms} ${dest} ${source}
[913]516
517    # Only cosmetics : stack file
[1115]518    if [ X${ActivateStackFilling} = Xtrue ] ; then
519      decal=0
520      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
521        printf ' ' >> ${StackFileLocation}/${StackFileName}
522        (( decal = decal + 1 ))
523      done
524    fi
[2]525
[855]526    if ( ${ExitFlag} ) ; then
527      # Inform the stack file
[1115]528      if [ X${ActivateStackFilling} = Xtrue ] ; then
529        echo '!!! ExitFlag has been activated !!!' >> ${StackFileLocation}/${StackFileName}
530      fi
[874]531
[1216]532      # Unplugged message 4900 handling for now. To ease downstream treatment.
533      #if [ X${ActivateBigBro} = Xtrue ] ; then
534      #  # RabbitMQ message code "ERROR HAS BEEN TRIGGERED"
535      #  code=4900
536      #  # RabbitMQ message body
537      #  Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"nesting\":\"${IGCM_debug_LenStack}\",\"command\":\"${command}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
538      #  # Fill the rabbitMQ queue
539      #  IGCM_debug_sendAMQP
540      #fi
[855]541    else
542      # Inform the stack file
[1115]543      if [ X${ActivateStackFilling} = Xtrue ] ; then
544        echo "< ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
545      fi
546
[1189]547      # Unplugged message 4100 handling for now. To ease downstream treatment.
[1150]548      #if [ X${ActivateBigBro} = Xtrue ] ; then
549      #  # RabbitMQ message code "POPSTACK"
[1189]550      #  code=4100
[1150]551      #  # RabbitMQ message body
552      #  Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"nesting\":\"${IGCM_debug_LenStack}\",\"command\":\"${command}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
553      #  # Fill the rabbitMQ queue
554      #  IGCM_debug_sendAMQP
555      #fi
[855]556    fi
557
[913]558    # Reset array if necessary
[544]559    if [ ${IGCM_debug_LenStack} = 0 ]; then
560      #echo
561      #IGCM_debug_Print 3 "Clean stack array"
562      #echo
563      unset IGCM_debug_Stack
564      unset IGCM_debug_StackArgs
[913]565      unset IGCM_debug_StackTiming
[544]566      IGCM_debug_Stack[0]=${NULL_STR}
567      IGCM_debug_StackArgs[0]=${NULL_STR}
[913]568      IGCM_debug_StackTiming[0]=${NULL_STR}
[2]569    fi
[544]570  fi
[869]571  #IGCM_debug_CallStack
[2]572}
573
574#D-#==================================================================
[1189]575#D-function IGCM_debug_BigBro_Initialize
[855]576#D-* Purpose: switch rabbitMQ on
577#D-
[1189]578function IGCM_debug_BigBro_Initialize {
579  IGCM_debug_PushStack "IGCM_debug_BigBro_Initialize"
[855]580
[1051]581# Message type standard fields:
582# https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
583
584# Message type dictionnary and custom fields:
585# https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
586
[868]587  if [ X${BigBrother} = Xtrue ] ; then
[1051]588    # create a unique ID for this specific job
589    jobuid=$(uuidgen)
[913]590
[1189]591    if [ X${TaskType} = Xcomputing ]; then
592      if ( ${FirstInitialize} ) ; then
593        # RabbitMQ message code "BEGIN A SIMULATION"
594        code=0000
595        # create and persist a unique id for this simulation
596        simuid=$(uuidgen)
597        IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration simuid ${simuid}
598        # Standard fields for the first message
599        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"activity\":\"IPSL\",\"name\":\"${config_UserChoices_JobName}\",\"cumulPeriod\":\"${CumulPeriod}\",\"experiment\":\"${config_UserChoices_ExperimentName}\",\"space\":\"${config_UserChoices_SpaceName}\",\"model\":\"${config_UserChoices_TagName}\",\"startDate\":\"${config_UserChoices_DateBegin}\",\"endDate\":\"${config_UserChoices_DateEnd}\",\"login\":\"${LOGIN}\",\"centre\":\"${CENTER}\",\"machine\":\"${MASTER}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
600        # RabbitMQ message body with specific fields associated message codes treated here
[1202]601        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
[1189]602        # Fill the rabbitMQ queue (the config.card in use will be sent)
603        IGCM_debug_sendAMQP activate
604      else
605        # RabbitMQ message code "A NEW COMPUTING JOB IS RUNNING PART OF A SIMULATION"
606        code=1000
607        # retrieve this simulation's unique id
608        IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
609        simuid=${run_Configuration_simuid}
610        # Using standard fields for message others than the first one. Still subject to change
611        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"cumulPeriod\":\"${CumulPeriod}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
612        # RabbitMQ message body with specific fields associated message codes treated here
[1201]613        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
[1189]614        # Fill the rabbitMQ queue
615        IGCM_debug_sendAMQP
616      fi
617
618      # NOT VERY NICE BUT ... IT WORKS
619      # Be sure that the genericSimulationID will be small from now on
620      # Using standard fields for messages others than the first one. Still subject to change
621      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"cumulPeriod\":\"${CumulPeriod}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
622
623    elif [ X${TaskType} = Xpost-processing ]; then
624      # RabbitMQ message code "A NEW POST-PROCESSING JOB IS RUNNING PART OF A SIMULATION"
625      code=2000
[1076]626      # retrieve this simulation's unique id
[1051]627      IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
[1086]628      simuid=${run_Configuration_simuid}
[1076]629      # Using standard fields for message others than the first one. Still subject to change
[1051]630      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"cumulPeriod\":\"${CumulPeriod}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
[1087]631      # RabbitMQ message body with specific fields associated message codes treated here
[1202]632      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
[1087]633      # Fill the rabbitMQ queue
634      IGCM_debug_sendAMQP
[855]635    fi
[913]636    # Turn the flag on
[855]637    ActivateBigBro=true
[1189]638    # Dont flush by default
639    FlushAMQP=false
[855]640  fi
[1189]641  IGCM_debug_PopStack "IGCM_debug_BigBro_Initialize"
[855]642}
643
644#D-#==================================================================
[1189]645#D-function IGCM_debug_BigBro_Finalize
646#D-* Purpose: Finalize rabbitMQ messages exchanges
647#D-
648function IGCM_debug_BigBro_Finalize {
649  IGCM_debug_PushStack "IGCM_debug_BigBro_Finalize"
650
651  # Message type standard fields:
652  # https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
653
654  # Message type dictionnary and custom fields:
655  # https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
656
657  if ( $DEBUG_debug ) ; then
658    if [ X${ActivateBigBro} = Xtrue ] ; then
659      if [ X${TaskType} = Xcomputing ]; then
660        if ( ${simulationIsOver} ) ; then
661          # RabbitMQ message code "SIMULATION ENDS"
662          code=0100
663          FlushAMQP=true
[1207]664        elif ( ${ExitFlag} ) ; then
665          # RabbitMQ message code "EXIT THE JOBS BECAUSE ERROR(S) HAS BEEN TRIGGERED"
666          code=9999
667          FlushAMQP=true
[1189]668        else
669          # RabbitMQ message code "COMPUTING JOB ENDS"
670          code=1100
671        fi
672      elif [ X${TaskType} = Xpost-processing ]; then
[1207]673        if ( ${ExitFlag} ) ; then
674          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
675          code=2900
676          FlushAMQP=true
677          else
678          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
679          code=2100
680          FlushAMQP=true
681        fi
[1189]682      fi
683      # RabbitMQ message body
684      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
685      # Fill the rabbitMQ queue
686      IGCM_debug_sendAMQP
687    fi
688  fi
689 
690  IGCM_debug_PopStack "IGCM_debug_BigBro_Finalize"
691}
692
693#D-#==================================================================
[2]694#D-function IGCM_debug_Exit
695#D-* Purpose: Print Call Stack and set ExitFlag to true
696#D-
697function IGCM_debug_Exit {
[544]698  IGCM_debug_PushStack "IGCM_debug_Exit"
699  echo "IGCM_debug_Exit : " "${@}"
[913]700  echo
[894]701  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!"
[913]702  echo "!!   ERROR TRIGGERED    !!"
703  echo "!!   EXIT FLAG SET      !!"
704  echo "!------------------------!"
705  echo
[894]706  IGCM_debug_CallStack
[544]707  ExitFlag=true
708  IGCM_debug_PopStack "IGCM_debug_Exit"
[2]709}
710
711#D-#==================================================
712#D-function IGCM_debug_Verif_Exit
713#D-* Purpose: exit with number 1 if ExitFlag is true
714#D-
715function IGCM_debug_Verif_Exit {
[544]716  if ( ${ExitFlag} ) ; then
[1206]717    echo "IGCM_debug_Verif_Exit : Something wrong happened previously."
[1207]718    echo "IGCM_debug_Verif_Exit : ERROR and EXIT keyword will help find out where."
[1206]719    # Only computing TaskType stops the job for now.
720    if [ X${TaskType} = Xcomputing ] ; then
[775]721      IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal"
722      echo "                        EXIT THE JOB."
723      echo
[869]724      IGCM_debug_CallStack
[874]725
[1206]726      # Mail notification
727      IGCM_sys_SendMail
[913]728
[1207]729      # Inform the rabbitMQ queue
730      IGCM_debug_BigBro_Finalize
731
[544]732      # And Good Bye
733      date
734      exit 1
[1206]735
736    elif [ X${TaskType} = Xpost-processing ] ; then
737      # If SpaceName is PROD then we stop when post_processing failed
738      if [ X${config_UserChoices_SpaceName} = XPROD ] ; then
[1220]739        echo "                        EXIT THE POST-PROCESSING JOB."
[1206]740        echo
[1207]741        IGCM_debug_CallStack
742
[1220]743        # Notify the computing job that something wrong happened.
744        IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal"
745
[1206]746        # Mail notification?
747        #IGCM_sys_SendMailPost
[1207]748
749        # Inform the rabbitMQ queue
750        IGCM_debug_BigBro_Finalize
751
[1206]752        # And Good Bye
753        date
754        exit 1
755      else
[1207]756        echo "In config.card the variable SpaceName is not in PROD"
[1206]757        echo "              SO WE DO NOT EXIT THE JOB."
758        echo
[1207]759
760        # Inform the rabbitMQ queue
761        IGCM_debug_BigBro_Finalize
762
[1206]763        date
764      fi
765    elif [ X${TaskType} = Xchecking ] ; then
[1207]766      echo "Nothing will happen for now"
[2]767    fi
[544]768  fi
[2]769}
770
771#D-#==================================================================
772#D-function IGCM_debug_Print
773#D-* Purpose: Print arguments according to a level of verbosity.
774#D-
775function IGCM_debug_Print
776{
[544]777  typeset level=$1
778  shift
779
780  if [ X"${1}" = X"-e" ]; then
781    typeset cmd_echo="echo -e"
[2]782    shift
[544]783  else
784    typeset cmd_echo="echo"
785  fi
[2]786
[544]787  if [ ${level} -le ${Verbosity} ] ; then
788    typeset i
789    case "${level}" in
790    1) for i in "$@" ; do
[734]791      ${cmd_echo} $(date +"%Y-%m-%d %T") "--Debug1-->" ${i}
[913]792      done ;;
[544]793    2) for i in "$@" ; do
[734]794      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------Debug2-->" ${i}
[913]795      done ;;
[544]796    3) for i in "$@" ; do
[734]797      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------------Debug3-->" ${i}
[913]798      done ;;
[544]799    esac
800  fi
[2]801}
802
803#D-#==================================================================
804#D-function IGCM_debug_PrintVariables
805#D-* Purpose: Print arguments when match a pattern
806#D-           according to a level of verbosity.
807function IGCM_debug_PrintVariables
808{
[544]809  typeset level=$1
810  shift
[2]811
[830]812  list=$( set | grep ^$1 | sed -e "s/'//g" )
[54]813
[544]814  if [ "X${list}" != X ]  ; then
815    IGCM_debug_Print ${level} ${list}
816  fi
[2]817}
818
819#D-#==================================================================
[914]820#D-function IGCM_debug_PrintInfosActions
[913]821#D-* Purpose: Print information related to instrumentation
822function IGCM_debug_PrintInfosActions
823{
824  typeset actionType=$1
825  typeset entitySize=$2
826  typeset start_ms=$3
827  typeset end_ms=$4
828
829  typeset dest=$5
830  typeset source=$6
831
832  typeset diff_ms entitySizeKo entitySizeMo flux_Ko_ms flux_Ko_s flux_Mo_s
[1090]833  typeset dirFrom dirTo
[913]834
835  diff_ms=$(( $end_ms - $start_ms ))
836  # echo "diff_ms=$diff_ms"
837
838  entitySizeKo=$( echo ${entitySize} | gawk -F"|" '{print $1}' )
839  # echo "entitySizeKo=$entitySizeKo"
840  entitySizeMo=$( echo ${entitySize} | gawk -F"|" '{print $2}' )
841
842  # flux en Ko / ms
843  flux_Ko_ms=$( echo "scale=6;${entitySizeKo}/${diff_ms}" | bc )
844  # echo "flux_Ko_ms=$flux_Ko_ms"
845
846  # flux en Ko / s
847  flux_Ko_s=$(( $flux_Ko_ms * 1000 ))
848  # echo "flux_Ko_s=$flux_Ko_s"
849
850  # flux en Mo / s
851  flux_Mo_s=$( echo "scale=6;${flux_Ko_s}/1024" | bc )
852  # echo "flux_Mo_s=$flux_Mo_s"
853
854  if [ -d $dest ] ; then
[1090]855    dirTo=$( readlink -f ${dest} )
[913]856  else
[1090]857    dirTo=$( readlink -f $( dirname ${dest} ) )
[913]858  fi
859
860  if [ -d $source ] ; then
[1090]861    dirFrom=$( readlink -f ${source} )
[913]862  else
[1090]863    dirFrom=$( readlink -f $( dirname ${source} ) )
[913]864  fi
865
[1094]866  instrumentationContent=$( echo "\"actionName\":\"${actionType}\",\"size_Mo\":\"${entitySizeMo}\",\"duration_ms\":\"${diff_ms}\",\"throughput_Mo_s\":\"${flux_Mo_s}\",\"dirFrom\":\"${dirFrom}\",\"dirTo\":\"${dirTo}\"" )
[1050]867
[1115]868  if [ X${ActivateStackFilling} = Xtrue ] ; then
869    echo "{${instrumentationContent}}" >> ${StackFileLocation}/${StackFileName}
870  fi
[1050]871
872  # Inform the rabbitMQ queue
873  if [ X${ActivateBigBro} = Xtrue ] ; then
874    # RabbitMQ message body
[1065]875    Body=$( echo "{${genericSimulationID},\"msgCode\":\"7000\",\"msgUID\":\"$(uuidgen)\",${instrumentationContent},\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
[1050]876    # Fill the rabbitMQ queue
877    IGCM_debug_sendAMQP
878  fi
[913]879}
880
881#D-#==================================================================
[2]882#D-function IGCM_debug_Check
883#D- * Purpose: Check the present file by comparison with a reference file
884function IGCM_debug_Check
885{
[544]886  #---------------------
887  if [ ! -n "${libIGCM}" ] ; then
888    echo "Check libIGCM_debug ..........................................[ FAILED ]"
889    echo "--Error--> libIGCM variable is not defined"
890    exit 2
891  fi
[2]892
[544]893  #---------------------
894  if [ ! -n "${Verbosity}" ] ; then
895    echo "Check libIGCM_debug ..........................................[ FAILED ]"
896    echo "--Error--> Verbosity variable is not defined"
897    exit 3
898  fi
[2]899
[544]900  #---------------------
[1118]901  # Need to remove timestamps here
902  diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g") > /dev/null 2>&1
[1106]903  status=$?
[2]904
[1118]905  if [ ${status} -eq 0 ] ; then
[544]906    echo "Check libIGCM_debug ..............................................[ OK ]"
907  else
908    echo "Check libIGCM_debug ..........................................[ FAILED ]"
909    echo "--Error--> Execution of ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh"
910    echo "           has produced the file IGCM_debug_Test.ref.failed"
911    echo "           Please analyse differences with the reference file by typing:"
912    echo "           diff IGCM_debug_Test.ref.failed ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref"
913    echo "           Report errors to the author: Patrick.Brockmann@cea.fr"
[1118]914    diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g")
[544]915    exit 4
916  fi
917  #---------------------
[2]918}
Note: See TracBrowser for help on using the repository browser.