CAMMAC https://cammac.readthedocs.io

S.Sénési for Météo-France - sept 2019 to march 2021

Create a derived varable from a (single) primary one, provided derivation can be formulated by a CDO operations pipe

Here, applied to the case of creating yearly stats out of daily precipitation, for selected experiments, models , variants; and create a data versions dictionnary for this derived data

Parameters stand in first cell, and are either commented here or in the doc (see above)

In [ ]:
from IPython.core.display import display, HTML, Image
display(HTML("<style>.container { width:100% !important; }</style>"))
In [ ]:
#
# Define derived variables : output label, input variable and its table, and CDO operation to apply
#
one_mm_per_day="%g"%(1./(24.*3600.)) # in S.I.
cases = {
    #
    # Number of dry days per year
    "dry days"   : {"label":"dday"  , "variable":"pr", "table":"day" ,
               "operator_args" :{"operator" :"expr,'dday=pr' -yearsum -ltc,"+one_mm_per_day }},
    #
    # Mean precipitation for non-dry days
    "daily rain" : {"label":"drain", "variable":"pr", "table":"day", 
               "operator_args" : {"operator" :"expr,'drain=pr' -yearmean -setrtomiss,-1,"+one_mm_per_day} } ,
}
#
# List experiments to process, and period
periods           = {
    "historical" : "1850-2014",
    "ssp126"     : "2015-2100",
    "ssp245"     : "2015-2100",
    "ssp585"     : "2015-2100",
    "piControl"  : None

}
#
excluded_models   = {} # A dict of list of excluded models, per experiment
included_models   = {} # A dict of list of included models, per experiment
#
data_versions_tag    = "20200918"
data_versions_dir    = "/home/ssenesi/CAMMAC/select_data_versions"

# Define json output : 
#  - new (i.e. from scratch) or 
#  - add_separate (add to existing secondary dict) or 
#  - add_to_input (combine with input data versions dict, in a distinct file)
output_option        = "add_to_input"
version              = "_derived" # Used as a suffix to input name in output json file name

output_root     = "/data/ssenesi/CMIP6_derived_variables"
output_pattern  = output_root+"/${variable}/${variable}_${table}_${model}_${experiment}_${realization}_${grid}_${version}_${PERIOD}.nc"
#
# Should we recompute existing files ?
recompute         = False
#
do_test           = True
In [ ]:
if do_test :
    # List experiments to process, and period
    periods           = { "ssp126" : "2015-2017", }
    #periods           = { "historical" : "1850-1852", }
    #periods           = { "piControl"  : None, }
    included_models   = { "ssp126" : [ "EC-Earth3" ] }
    version="_test"
    if "daily rain" in cases : cases.pop("daily rain")
In [ ]:
import sys
from string import Template
import os
import json

# Climaf setup (version >= 1.2.13 - see https://climaf.readthedocs.io)
sys.path.append(climaf_lib) 
from climaf.api import *

# Climaf settings
climaf.cache.stamping=False

sys.path.append(CAMMAClib ) 
from CAMMAClib.mips_et_al  import read_versions_dictionnary, \
            institute_for_model, mip_for_experiment, models_for_experiments
from CAMMAClib.ancillary  import feed_dic
In [ ]:
climaf.dataloc.dataloc(project='CMIP6', organization='generic', url=output_pattern, table='yr')
In [ ]:
data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)

Set output data_version dictionnary

In [ ]:
jsfile="%s/Data_versions_selection_%s%s.json" % (data_versions_dir , data_versions_tag, version)
if output_option == "add_separate" :
    if os.path.exists(jsfile):
        with open(jsfile,"r") as f :
            print "Loading data versions dict to complement from %s"%jsfile
            derived_versions=json.load(f)
    else :
        print "Creating derived data versions from scratch "
        derived_versions=dict()
elif output_option == "add_to_input" :
    print "Creating derived data versions dict from input dict %s in %s"%(data_versions_tag,jsfile)
    derived_versions=data_versions
elif output_option == "new" :
    print "Creating derived data versions from scratch in %s"%jsfile
    derived_versions=dict()
else :
    raise Error("Unkown output_option value : %s"%output_option)

Compute pre-processed variables

In [ ]:
for case in cases :
    variable = cases[case]["variable"]
    table    = cases[case]["table"]
    for experiment in periods :
        excluded = excluded_models.get(experiment,[])
        included = included_models.get(experiment,None)
        #
        # Select data versions in the same way as for other processings,
        # maybe with more models 
        models_variants=models_for_experiments(data_versions,variable,table,
                                      [experiment],excluded,included)
        for model,variant in models_variants :
            # Compute list of relevant files for the period to process
            grid,version,data_period=data_versions[experiment][variable][table][model][variant]
            pperiod = periods[experiment]
            if pperiod is None :
                # For piControl, process whole available period
                pperiod=data_period
            print "For %10s, %10s,%20s,%s %20s"%(case, experiment,model,variant,pperiod),
            #continue
            dic=dict(project="CMIP6", experiment=experiment, model=model, institute=institute_for_model(model), 
                     period=pperiod,variable=variable, table=table,mip=mip_for_experiment(experiment),
                     realization=variant, version=version, grid=grid)
            data=ds(**dic)
            files=data.baseFiles().split()
            print "  % 3d file(s) to process"%len(files)
            #
            #
            dic["table"]="yr"
            dic["variable"]=cases[case]["label"]
            all_periods=[] # The list of actually processed periods
            # Compute total period and check if corresponding otuput file already exists
            for each_file in files :
                actual_period = each_file.split('_')[-1].replace(".nc","")
                all_periods.append(climaf.period.init_period(actual_period))
            total_period=climaf.period.merge_periods(all_periods)
            dic["PERIOD"]=str(total_period[0])
            out_file = Template(output_pattern).safe_substitute(**dic)
            #print "Outfile = %s"%out_file
            if recompute :
                print "First removing %s"%out_file
                os.system("rm -f %s"%(out_file))
            if not os.path.exists(out_file) :
                #
                files_to_merge = "" # The whitespace separated list of processed files, to merge at the end
                for each_file in files :
                    # Compute output filename
                    actual_period = each_file.split('_')[-1].replace(".nc","")
                    dic["PERIOD"]=actual_period
                    one_file = Template(output_pattern).safe_substitute(**dic)
                    if os.path.exists(one_file) and not recompute : 
                        continue
                    else :
                        #
                        # Apply relevant operation
                        print "\tProcessing",each_file
                        fdata=fds(each_file,simulation=experiment)
                        derived=ccdo(fdata,**cases[case]["operator_args"])
                        out_dir  = os.path.dirname(one_file)
                        if not os.path.exists(out_dir) : 
                            os.makedirs(out_dir)
                        cfile(derived,one_file,ln=True)
                        # 
                    # Record filenames to merge
                    files_to_merge += " "+one_file
                    #
                if len(files) > 1 :
                    os.system("cdo mergetime %s %s"%(files_to_merge,out_file))
                    os.system("rm -f %s"%(files_to_merge))
            feed_dic(derived_versions,(grid,version,str(total_period[0])),experiment,cases[case]["label"],"yr",model,variant)
                   
            #
#
with open(jsfile,"w") as f :
    json.dump(derived_versions,f,separators=(',', ': '),indent=3,ensure_ascii=True)
print "Data versions dictionnary written as "+jsfile

#write_versions_dic("201902071814")   
In [ ]: