job_ce0l_interannual-bash

#!/bin/bash
# David Cugnet, 06/2016
# This script build a bunch of jobs, each one building limit.nc file, ozone file (optional)
# and initial state (optional, first year only).
# All those sequential light jobs can be submitted in parallel.
# The script checks used files are present, to avoid to submit nonfunctional jobs.
# Currently set up for CURIE and ADA machines, with corresponding paths for AMIP SST/SIC files.
# If you want to use other SST/SIC files, please change TYPE option. You will have to enter
# the path to the SST/SIC files manually, unless you include them in this script.
# BEWARE: $OZODIR, $DEFDIR, $OUTDIR and possibly other paths have probably to be changed !
#
# SINGLE ARGUMENT: the type of sst/sic (variable V) files you want to use.
# standard files name variables names
# * AMIP for AMIP files amipbc_${V}_1x1.nc tosbcs/sicbcs
# * CPL for coupled model outputs cpl_atm_${V}.nc SISUTESW/SIICECOV
# * HIST for atmosph model outputs histmth_${V}.nc tsol_oce/pourc_sic
# * DELK for ??? sstk.nc/ci.nc sstk/ci => not yet.

#--- INPUT FILES TYPE
TYPE='AMIP'
if [ $# -ge 1 ]; then TYPE=$1; fi

#--- FEW PATHS THAT ARE NOT LIKELY TO CHANGE
case $TYPE in
AMIP) sstsic_in='amipbc_${V}_360x180_${Y}.nc'; sstsic_ou='amipbc_${V}_1x1.nc' ;;
CPL) sstsic_in='cpl_atm_${V}_${Y}.nc'; sstsic_ou='cpl_atm_${V}.nc' ;;
HIST) sstsic_in='histmth_${V}_${Y}.nc'; sstsic_ou='histmth_${V}.nc' ;;
esac
INIDIR=""; ce0l="./ce0lseq.e"
if [ ${HOSTNAME:0:3} = 'ada' ]; then work=$WORKDIR; run=""
INIDIR='/linkhome/rech/psl/rpsl035/IGCM/INIT/ATM/LMDZ'; ARCHIV=""
elif [ ${HOSTNAME:0:5} = 'curie' ]; then work=$CCCWORKDIR; run="ccc_mprun"
INIDIR='INIT/ATM'; ARCHIV='/ccc/store/cont003/dsm/p86ipsl/dmf_import/IGCM/STORAGE/INIT.tar'
else echo "Not set up for this machine yet, sorry."; exit; fi
SSTDIR=$INIDIR/AMIP
if [ "$INIDIR" = "" ]; then read -p ">>> Enter path for climatologies: " SSTDIR; fi
if [ "$SSTDIR" = "" ]; then read -p ">>> Enter path for SST/SIC files: " INIDIR; fi

#===============================================================================
#=== PARAMETERS YOU WILL PROBABLY NEED TO CHANGE ===============================
#===============================================================================
Yb=1870 #= start year
Ye=2006 #= end year
res=144x142x79 #= resolution
#res=96x95x39
#res=280x280x79
cal='earth_360d' #= calendar (earth_366d / julian / ...)
#cal='earth_365d'
#cal='gregorian'
DEFDIR=$PWD/DefLists #= folder for parameters files
ok_etat0=n #= build dyn/phys initial states (y/n ; first year only).
ok_limit=y #= build boundary conditions file (y/n).
GENCI_ID='gen????' #= GENCI project ID (for CURIE)
OUTDIR=$work/LIMIT_$TYPE/$res/$cal #= output files storage folder
OZODIR=$work/O3-CMIP5/14Fields #= ozone forcing files folder
ozo='Ozone_CMIP5_ACC_SPARC_${Y}_T2Mz_O3.nc' #= ozone forcing files name
read_climoz=0 #= build (1/2) or not (0) climoz file
#= 2: using tro3 3: using tro3 and tro3_daylight
#=list of keys that have to be changed. Syntax: <file>:<key>=<value>
keys="run:anneeref=\${Y} run:ok_limit=${ok_limit} run:ok_etat0=${ok_etat0} run:calend=$cal config:read_climoz=${read_climoz}"

#===============================================================================
#=== SOME MORE PARAMETERS NOT SUPPOSED TO CHANGE EVERY DAY =====================
#===============================================================================
exe=ce0l_${res}_phylmd_seq.e
BINDIR=$PWD/bin
JOBDIR=jobs-ce0l-$res-${cal##*_}; if [ ! -d $JOBDIR ]; then mkdir -p $JOBDIR; fi

#--- LIST OF CONFIGURATION FILES (*.def FILES)
# If standard name differs, it is given after a ":".
DEFSLIST='physiq.def config.def conv_param.def traceur.def wake_param.def run.def'
case ${res##*x} in
39) DEFSLIST="$DEFSLIST gcm.def_96x95x39_NPv3.1:gcm.def" ;;
79) DEFSLIST="$DEFSLIST gcm.def vert_L79.def:vert.def" ;;
*) DEFSLIST="$DEFSLIST gcm.def" ;;
esac

#--- LIST OF CLIMATOLOGIES NEEDED BY CE0L (EXCLUDING SST/SIC ; WITHOUT END ".nc")
# If standard name differs, it is given after a ":".
CLIMLIST="Albedo ECDYN ECPHY landiceref Relief Rugos"

#--- MAKE SOME VARIABLES GLOBAL
export DEFDIR=$DEFDIR OZODIR=$OZODIR BINDIR=$BINDIR sstsic_in=$sstsic_in
export INIDIR=$INIDIR SSTDIR=$SSTDIR OUTDIR=$OUTDIR sstsic_ou=$sstsic_ou
export ARCHIV=$ARCHIV JOBDIR=$JOBDIR DEFSLIST=$DEFSLIST keys=$keys ozo=$ozo


#===============================================================================
#=== FEW FUNCTIONS =============================================================
#===============================================================================
function get_files {
# Purpose: N being the number of arguments, put commands in a job to:
# * if Nth argument is an archive (N-1th is then the job name) :
# extract from archive local copies of files named after args 1..N-2.
# * if Nth argument is the job name :
# create local links of existing local files named after args 1..N-2
# or create local copies of existing ERGON files named after args 1..N-2
# Arguments syntax: <m/o>:name_in[:name_out]
local args arch a f f_in f_ou fin fou job d d1 d2 nsm ns nsp idx typ ren
local mand="n" opt="" tared="" ergon="" nmiss=0
declare -a args=("$@")
idx=$((${#args[@]}-1)); arch=${args[$idx]}; a=$(eval echo $arch)

#=== LAST ARGUMENT IS A VALID ARCHIVE ?
if [ "${a%.tar*}" != "$a" ]; then opt='vf'; fi
if [[ "${a%.tgz*}" != "$a" || "${a%.tar.gz*}" != "$a" ]]; then opt='xvf'; fi
tar t$opt $a > /dev/null 2>&1
if [ $? -ne 0 ]; then echo "Invalid archive $arch"; return 1; fi
if [ "$opt" != "" ]; then unset args[$idx]; ((idx--)); fi
job=${args[$idx]}; unset args[$idx]; ((idx--))

#=== LOOP ON FILES
nsm=1000; nsp=0
while [ $idx -ge 0 ]; do arg=${args[$idx]}; unset args[$idx]; ((idx--))
#=== MANDATORY OR NOT
case ${arg%%:*} in
m|o) mand=${arg%%:*}; arg=${arg#*:} ;;
*) mand='o' ;;
esac
#=== INPUT/OUTPUT FILE NAMES
f_in=${arg%%:*}; f_ou=${arg#*:}; f_ou=${f_ou##*/}
fin=$(eval echo $f_in); fou=$(eval echo $f_ou)
#=== CHECK REACHABLE FILE
typ=0
if [ -f $fin ]; then typ=1; fi #=== LOCAL FILE
mfls $fin 2> /dev/null; if [ $? -eq 0 ]; then typ=2; fi #=== ON ERGON
tar t$opt $a $fin > /dev/null 2>&1; if [ $? -eq 0 ]; then typ=3; fi #=== IN ARCHIVE
#=== REACHABLE FILE/FOUND ON ERGON/FOUND IN THE ARCHIVE => LINK/COPIED/EXTRACTED
case $typ in
1) echo " ln -s $f_in $f_ou" >> $job ; continue ;;
2) ergon="$ergon $f_in" ;;
3) tared="$tared $f_in"
#--- STRIPPED COMPONENTS: current, min, max
ns=$(grep -o "/" <<< $fin | wc -l); nsm=$((nsm<ns?nsm:ns)); nsp=$((nsp>ns?nsp:ns)) ;;
*) if [ $mand = 'm' ]; then echo "Missing file $f_in."; ((nmiss++)); fi ;;
esac
if [[ $fou != '.' && $fou != ${fin##*/} ]]; then ren="$ren ${f_in##*/}:${f_ou##*/}"; fi
done
if [ $nmiss -gt 0 ]; then return 1; fi

#=== GET ALL FILES FROM ARCHIVE WITH A SINGLE "tar" CALL
if [ "$tared" != "" ]; then
echo " tar x$opt $arch$tared --strip-components=$nsm" >> $job
if [ $nsm -ne $nsp ]; then
#--- SOME EXTRACTED FILES STILL HAVE FOLDERS TREE ; GET LIST OF THESE FOLDERS
d1=""
for f in $tared; do fin=$(eval echo $f)
if [ $(grep -o "/" <<< $fin | wc -l) -ne $nsm ]; then d=$(stride $fin $nsm /); d1="$d1 ${d%%/*}"; fi
done
#--- REMOVE REDUNDANT FOLDERS
d1=$(echo $d1 | xargs -n1 | sort -u | xargs)
d2=""; for f in $d1; do d2="$d2 $f/*"; done
echo " mv$d2 . ; rmdir $d1" >> $job
fi
fi

#=== GET ALL FILES FOUND ON ERGON WITH A SINGLE "mfget" CALL
if [ "$ergon" != "" ]; then echo " mfget $ergon ." >> $job; fi

#=== RENAME FILES THAT NEED TO BE
if [ "$ren" != "" ]; then for f in $ren; do echo " mv ${f%%:*} ${f#*:}" >> $job; done; fi
return 0
}

#===============================================================================
function copy_files {
# Purpose: N being the number of arguments, put commands in a job to hard copy file(s) $1..$N-1.
# Arguments syntax: <m/o>:name_in[:name_out]
local idx job arg mand nmiss=0
declare -a args=("$@")
idx=$((${#args[@]}-1)); job=${args[$idx]}; unset args[$idx]; ((idx--))

#=== LOOP ON FILES
while [ $idx -ge 0 ]; do arg=${args[$idx]}; unset args[$idx]; ((idx--))
#=== MANDATORY OR NOT
case ${arg%%:*} in
m|o) mand=${arg%%:*}; arg=${arg#*:} ;;
*) mand='o' ;;
esac
#=== INPUT/OUTPUT FILE NAMES
f_in=${arg%%:*}; f_ou=${arg#*:}; f_ou=${f_ou##*/}
fin=$(eval echo $f_in)
#=== CHECK REACHABLE FILE
if [ -f $fin ]; then
echo " cp $f_in $f_ou" >> $job
elif [ $mand = 'm' ]; then
echo "Missing file $fin"; ((nmiss++))
fi
done
if [ $nmiss -gt 0 ]; then return 1; else return 0; fi
}

#===============================================================================
function get_clims {
# Purpose: write commands in a job to get available climatologies needed by ce0l.
# $1: year $2: jobname $3: archive name (optional)
# If standard name differs, it is given after a ":".
# Used global vars: CLIMLIST, INIDIR, SSTDIR
local f fin fou V fim fin fip fou ff=""
#--- LIST OF CLIMATOLOGIES
for f in $CLIMLIST; do
f_ou=.; if [ $(grep -o ":" <<< "$f" | wc -l) -ne 0 ]; then f_ou=${f##*:}.nc; fi
ff="$ff m:\$INIDIR/${f%:*}.nc:$f_ou"
done
#--- OZONE FILES
if [ $read_climoz -ne 0 ]; then
ff="$ff m:\$OZODIR/${ozof}:climoz.nc"
fi
#--- SST AND SIC
for V in sic sst; do
fin=$(Y=$1; eval echo $sstsic_in)
fim=$(Y=$(($1-1)); eval echo $sstsic_in)
fip=$(Y=$(($1+1)); eval echo $sstsic_in)
fou=$(eval echo $sstsic_ou)
ff="$ff m:\$SSTDIR/$fin:$fou \$SSTDIR/$fim:${fou%.*}_m.nc \$SSTDIR/$fip:${fou%.*}_p.nc"
done
get_files $ff $2 $3
return $?
}

#===============================================================================
function change_key {
# Purpose: write in $3 the command to set the value of key ${1%%=*} to ${1##*=} in file $2.def
# $1: <key=value> $2: parameter file name (without .def suffix) $3: job name
echo " sed -i s%${1%%=*}=.*%${1}% $2.def" >> $3 ; return 0
}

#===============================================================================
function gen_job {
# Generate a job for a single year.
# Used global paths: JOBDIR, DEFDIR, BINDIR, OUTDIR, INITDIR, SSTDIR, OZODIR
# Used global vars: Yb, ok_limit, read_climoz, ok_etat0, ozof, DEFSLISTS
local Y=$1 ok_start=$ok_start
job_name=job_ce0l_${Y}
job=$JOBDIR/$job_name.bash
ozof=$(eval echo $ozo)
if [ $Y -ne $Yb ]; then ok_start='n'; fi
if [ ${HOSTNAME:0:3} = 'ada' ]; then
cat > $job << ______fin
# @ job_name = $job_name
# @ output = \$(job_name).\$(jobid)
# @ error= \$(job_name).\$(jobid)
# @ job_type = serial
# @ as_limit = 20.0Gb
# @ wall_clock_limit = 0:10:00
# @ queue

set -x
cd \$TMPDIR
______fin
elif [ ${HOSTNAME:0:5} = 'curie' ]; then
cat > $job << ______fin
#!/bin/bash
#MSUB -o $job_name.%I.o
#MSUB -e $job_name.%I.e
#MSUB -r $job_name
#MSUB -n 1
#MSUB -T 600
#MSUB -A $GENCI_ID
#MSUB -q standard

set -x
if [ ! -d \$SCRATCHDIR/CE0L_${Y} ]; then mkdir \$SCRATCHDIR/CE0L_${Y}; fi
cd \$SCRATCHDIR/CE0L_${Y}
______fin
fi
if [ "$ARCHIV" != "" ]; then echo " ARCHIV=$ARCHIV" >> $job; fi
cat >> $job << ______fin
DEFDIR=$DEFDIR
BINDIR=$BINDIR
INIDIR=$INIDIR
SSTDIR=$SSTDIR
OUTDIR=$OUTDIR
if [ ! -d \$OUTDIR ]; then mkdir -p \$OUTDIR; fi
______fin
if [ $read_climoz -ne 0 ]; then
echo " OZODIR=$OZODIR" >> $job
fi
echo "" >> $job
echo " #=== GET AND ALTER CONFIGURATION FILES" >> $job
ff=""; for f in $DEFSLIST; do ff="$ff m:\$DEFDIR/$f"; done
copy_files $ff $job
if [ $? -ne 0 ]; then exit; fi
for key in $keys; do
change_key $(eval echo ${key#*:}) ${key%%:*} $job
done
echo "" >> $job
echo " #=== LINK THE MODEL" >> $job
get_files m:\$BINDIR/$exe:$ce0l $job
if [ $? -ne 0 ]; then exit; fi

echo "" >> $job
echo " #=== LINK MISCELLANOUS DATA" >> $job
get_clims $Y $job \$ARCHIV
if [ $? -ne 0 ]; then exit; fi
cat >> $job << ______fin

#=== RUN THE MODEL
$run $ce0l

#=== COPY OUTPUTS
______fin
if [ $ok_limit = 'y' ]; then
echo " mv limit.nc \$OUTDIR/limit_${Y}.nc" >> $job
fi
if [ $read_climoz -ne 0 ]; then
echo " mv climoz_LMDZ.nc \$OUTDIR/climoz_LMDZ_${Y}.nc" >> $job
fi
if [ $ok_etat0 = 'y' ]; then
echo " mv start.nc startphy.nc \$OUTDIR/start_${Y}.nc" >> $job
echo " mv start.nc startphy.nc \$OUTDIR_${Y}.nc" >> $job
fi
}

#===============================================================================
for Y in $(eval echo \{$Yb..$Ye\}); do gen_job $Y & done