S.Sénési for Météo-France - sept 2019 to march 2021
from IPython.core.display import display, HTML, Image
display(HTML("<style>.container { width:100% !important; }</style>"))
#
# Define derived variables : output label, input variable and its table, and CDO operation to apply
#
one_mm_per_day="%g"%(1./(24.*3600.)) # in S.I.
cases = {
#
# Number of dry days per year
"dry days" : {"label":"dday" , "variable":"pr", "table":"day" ,
"operator_args" :{"operator" :"expr,'dday=pr' -yearsum -ltc,"+one_mm_per_day }},
#
# Mean precipitation for non-dry days
"daily rain" : {"label":"drain", "variable":"pr", "table":"day",
"operator_args" : {"operator" :"expr,'drain=pr' -yearmean -setrtomiss,-1,"+one_mm_per_day} } ,
}
#
# List experiments to process, and period
periods = {
"historical" : "1850-2014",
"ssp126" : "2015-2100",
"ssp245" : "2015-2100",
"ssp585" : "2015-2100",
"piControl" : None
}
#
excluded_models = {} # A dict of list of excluded models, per experiment
included_models = {} # A dict of list of included models, per experiment
#
data_versions_tag = "20200918"
data_versions_dir = "/home/ssenesi/CAMMAC/select_data_versions"
# Define json output :
# - new (i.e. from scratch) or
# - add_separate (add to existing secondary dict) or
# - add_to_input (combine with input data versions dict, in a distinct file)
output_option = "add_to_input"
version = "_derived" # Used as a suffix to input name in output json file name
output_root = "/data/ssenesi/CMIP6_derived_variables"
output_pattern = output_root+"/${variable}/${variable}_${table}_${model}_${experiment}_${realization}_${grid}_${version}_${PERIOD}.nc"
#
# Should we recompute existing files ?
recompute = False
#
do_test = True
if do_test :
# List experiments to process, and period
periods = { "ssp126" : "2015-2017", }
#periods = { "historical" : "1850-1852", }
#periods = { "piControl" : None, }
included_models = { "ssp126" : [ "EC-Earth3" ] }
version="_test"
if "daily rain" in cases : cases.pop("daily rain")
import sys
from string import Template
import os
import json
# Climaf setup (version >= 1.2.13 - see https://climaf.readthedocs.io)
sys.path.append(climaf_lib)
from climaf.api import *
# Climaf settings
climaf.cache.stamping=False
sys.path.append(CAMMAClib )
from CAMMAClib.mips_et_al import read_versions_dictionnary, \
institute_for_model, mip_for_experiment, models_for_experiments
from CAMMAClib.ancillary import feed_dic
climaf.dataloc.dataloc(project='CMIP6', organization='generic', url=output_pattern, table='yr')
data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
jsfile="%s/Data_versions_selection_%s%s.json" % (data_versions_dir , data_versions_tag, version)
if output_option == "add_separate" :
if os.path.exists(jsfile):
with open(jsfile,"r") as f :
print "Loading data versions dict to complement from %s"%jsfile
derived_versions=json.load(f)
else :
print "Creating derived data versions from scratch "
derived_versions=dict()
elif output_option == "add_to_input" :
print "Creating derived data versions dict from input dict %s in %s"%(data_versions_tag,jsfile)
derived_versions=data_versions
elif output_option == "new" :
print "Creating derived data versions from scratch in %s"%jsfile
derived_versions=dict()
else :
raise Error("Unkown output_option value : %s"%output_option)
for case in cases :
variable = cases[case]["variable"]
table = cases[case]["table"]
for experiment in periods :
excluded = excluded_models.get(experiment,[])
included = included_models.get(experiment,None)
#
# Select data versions in the same way as for other processings,
# maybe with more models
models_variants=models_for_experiments(data_versions,variable,table,
[experiment],excluded,included)
for model,variant in models_variants :
# Compute list of relevant files for the period to process
grid,version,data_period=data_versions[experiment][variable][table][model][variant]
pperiod = periods[experiment]
if pperiod is None :
# For piControl, process whole available period
pperiod=data_period
print "For %10s, %10s,%20s,%s %20s"%(case, experiment,model,variant,pperiod),
#continue
dic=dict(project="CMIP6", experiment=experiment, model=model, institute=institute_for_model(model),
period=pperiod,variable=variable, table=table,mip=mip_for_experiment(experiment),
realization=variant, version=version, grid=grid)
data=ds(**dic)
files=data.baseFiles().split()
print " % 3d file(s) to process"%len(files)
#
#
dic["table"]="yr"
dic["variable"]=cases[case]["label"]
all_periods=[] # The list of actually processed periods
# Compute total period and check if corresponding otuput file already exists
for each_file in files :
actual_period = each_file.split('_')[-1].replace(".nc","")
all_periods.append(climaf.period.init_period(actual_period))
total_period=climaf.period.merge_periods(all_periods)
dic["PERIOD"]=str(total_period[0])
out_file = Template(output_pattern).safe_substitute(**dic)
#print "Outfile = %s"%out_file
if recompute :
print "First removing %s"%out_file
os.system("rm -f %s"%(out_file))
if not os.path.exists(out_file) :
#
files_to_merge = "" # The whitespace separated list of processed files, to merge at the end
for each_file in files :
# Compute output filename
actual_period = each_file.split('_')[-1].replace(".nc","")
dic["PERIOD"]=actual_period
one_file = Template(output_pattern).safe_substitute(**dic)
if os.path.exists(one_file) and not recompute :
continue
else :
#
# Apply relevant operation
print "\tProcessing",each_file
fdata=fds(each_file,simulation=experiment)
derived=ccdo(fdata,**cases[case]["operator_args"])
out_dir = os.path.dirname(one_file)
if not os.path.exists(out_dir) :
os.makedirs(out_dir)
cfile(derived,one_file,ln=True)
#
# Record filenames to merge
files_to_merge += " "+one_file
#
if len(files) > 1 :
os.system("cdo mergetime %s %s"%(files_to_merge,out_file))
os.system("rm -f %s"%(files_to_merge))
feed_dic(derived_versions,(grid,version,str(total_period[0])),experiment,cases[case]["label"],"yr",model,variant)
#
#
with open(jsfile,"w") as f :
json.dump(derived_versions,f,separators=(',', ': '),indent=3,ensure_ascii=True)
print "Data versions dictionnary written as "+jsfile
#write_versions_dic("201902071814")