S.Sénési for Météo-France - sept 2019 to march 2021
changes[scenario][variable][time_stat][hseason][ens_stat][period] = value of change
changes[scenario][variable]["models"] = dict with keys=model_names and values= "metadata for the data version used"
Pendergrass, A. G., Knutti, R., Lehner, F., Deser, C., & Sanderson, B. M. (2017). Precipitation variability increases in a warmer climate. Scientific Reports, 7(1), 17966. https://doi.org/10.1038/s41598-017-17966-y
import os
# Version number will be a suffix for data filename. Use e.g. "_V1" for legibility
version = ""
# All models listed through next parameters as providing data for a scenario should be included :
# Also : data_versions_tag is a component of output filename
data_versions_tag = "20201228_derived"
excluded_models = { "mrro":["CAMS-CSM1-0"]} # a dict with keys==variable and value=list of models
included_models = {} # a dict with keys==variable and value=list of models or None
data_versions_dir = os.getenv("CAMMAC")+"/data"
#scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list
# Managed 'variables' actually are a combination of a geophysical variabe and a time statistics
# among 'mean' and 'std' (for inter-annual standard deviation)
variables=[ ["pr","mean"], ["prw","mean"], ["pr","std"], ["mrro","mean"], ["mrro","std"] ]
# Hybrid seasons definitions. Can use any CDO's selseason compliant season label, 'ANN' and 'anm'
# and (for now) region labels 'NH', 'SH', 'tropics' (which all stands for land and the latitude
# limit set by 'latitude_limit') and 'land'.
# You may change function create_masks() below for extending known regions
latitude_limit = 30.
# SH_latitude_limit allows to exclude Antarctica when set to 60; use e.g. 91 to include Antarctica
SH_latitude_limit = 60
hybrid_seasons = {
"tropics_DJF" : [ ("tropics", "DJF") ],
"tropics_JJA" : [ ("tropics", "JJA") ],
"extra_winter" : [ ("NH", "DJF"), ("SH", "JJA") ],
"extra_summer" : [ ("NH", "JJA"), ("SH", "DJF") ],
}
# List of model-ensemble statistics to compute on each variable
# 'nq5' and 'nq95' == 5 and 95% percentiles estimated from a normal law
# 'ens' == just provide a dict of all values (with key=model name)
# 'wmean' and 'iwmean' == mean weighted by some weights (or their inverse), as e.g. temperature
# change; this option is not actually implemented in this version of the code
stats_list = [ "mean","second","butlast","min","max", "wmean","iwmean","ens","nq5","nq95"]
# Computing changes implies defining a reference period
ref_experiment = "historical"
ref_period = "1850-1900"
#ref_period = "1995-2014"
# List of projection experiments to process
scenarios = [ "ssp585", "ssp245","ssp126" ]
# Define the time slicing for projections. It may include years belonging to ref_experiment
# duration, but scenario's begin must match ref_experiment's end.
periods_length = 20
start_year = 2021
last_year = 2081 # i.e. last period's begin
step = 10 # Not necessarily equal to periods_length !
# The directory holding output (json) files
outdir = "./changes"
# To be documented
clean_cache=False
ch=dict(compute=True,house_keeping=True)
# For debug and perf assessment :
do_profile = False
own_cache = False
one_cache_per_process= False
cache_root = None
do_test = True
if do_test :
#data_versions_tag = "20200714-test"
version = "_test"
scenarios = [ "ssp585" ]
included_models = { "mrro": [ "CNRM-CM6-1-HR" ] }#,"IPSL-CM6A-LR", "FGOALS-f3-L" ]
excluded_models = {"mrro" : [ "FGOALS-f3-L" ]}
variables = [ ["mrro","mean"] ]
#hybrid_seasons = { "extra_winter" : [ ("NH", "DJF"), ("SH", "JJA") ],}
hybrid_seasons = { "nh_winter" : [ ("NH", "DJF") ],}
periods_length = 1
step = 70
stats_list = [ "mean" ]
import os,sys, json
import numpy as np
# Climaf setup (version >= 1.2.13 - see https://climaf.readthedocs.io)
if own_cache or one_cache_per_process:
if cache_root is None :
cache_root=os.getcwd()
cache_dir=cache_root+"/tmp_climaf_cache"
if one_cache_per_process :
cache_dir += "_%d"%os.getpid()
os.environ["CLIMAF_CACHE"]=cache_dir
from climaf.api import *
climaf.cache.stamping=False
climaf.driver.dig_hard_into_cache=False
#
from CAMMAClib.ancillary import prettier_label, feed_dic
from CAMMAClib.mips_et_al import read_versions_dictionnary, TSU_metadata, institute_for_model, models_for_experiments
from CAMMAClib.variability import init_trend
# Load some user settings, if available
settings_file=os.getenv("CAMMAC_USER_PYTHON_CODE_DIR",".")+'/cammac_user_settings.py'
if os.path.exists(settings_file) :
exec(compile(open(settings_file).read(),settings_file,'exec'))
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
# NB : This code is half-way between :
# 1 - a regular time slicing of projection duration, with computation of warming level for these slices
# 2 - defining a time slicing for each model and scenario based on the warming level that must be reached for each slice index
# Option 1 allows to build a parametric curve for the dependency of a change with warming, while option 2 allows to
# compute the direct relationship
# So, for now, we implement option 1 but the code shows some un-necessary complexity due to preparing for option 2
# Define a list of indices for the time slicing used
period_indices = range(0,1+(last_year-start_year+1)/step)
def index_to_period(scenario,model,period_index) :
"""
Returns the period during which MODEL reaches the warming level defined for PERIOD_INDEX
in experiment SCENARIO
For now, just handle an even slicing of periods of interest, whatever the model and scenario
"""
slices= [ "%s-%s"%(begin,begin+periods_length-1) for begin in range(start_year,last_year+1,step) ]
return slices[period_index]
def mean_or_std(scenario, model, variant, season, variable, table, time_stat, period, compute=True, house_keeping=False):
"""
Handles seasons : 'ANN' , and all seasons valid for CDO selseason operator
Handles time statistics 'mean' and 'std'
For 'std', the season means are detrended before computing standard deviation
"""
global data_versions, project
#
# Get details of dataset used
#print scenario,variable,table,model,variant
#print "/",
grid,version,_ = data_versions[scenario][variable][table][model][variant]
#
# Define relevant dataset
dic=dict(project=project, experiment=scenario,
model=model, institute=institute_for_model(model),
variable=variable,period=period, grid=grid,
table=table, version=version,realization=variant)
if project=="CMIP5" :
dic.pop("mip")
elif project=="CMIP6" :
if scenario=="historical" : dic.update(mip="CMIP")
else : dic.update(mip="ScenarioMIP")
else :
raise ValueError("Cannot process project %s"%project)
#
base_ds=ds(**dic)
#
if season != "anm" and season != "ANN" :
#
# Compute relevant stat
if time_stat=="mean" :
# Average over years after selecting season
res=ccdo(base_ds,operator="timmean -selseason,%s"%season)
elif time_stat=="std" :
# Compute seasonal mean for each year
cached=ccdo(base_ds,operator="selseason,%s -seasmean"%season)
# Must detrend
a=ctrend(cached)
detrended=csubtrend(cached,a,a.b)
# Compute standard deviation of seasonal values series
res=ccdo_fast(detrended,operator="timstd1")
else:
raise ValueError("stat %s cannot yet be processed by mean_or_std for %s"%(time_stat,variable))
elif time_stat == "mean" :
res=ccdo(base_ds,operator="timmean")
elif time_stat=="std" :
# Compute year means
cached=ccdo(base_ds,operator="yearmean")
# Must detrend
a=ctrend(cached)
detrended=csubtrend(cached,a,a.b)
# Compute standard deviation of detrended yearly values series
res=ccdo_fast(detrended,operator="timstd1")
else :
raise ValueError("stat %s cannot yet be processed by mean_or_std for season %s"+\
"and variable %s"%(time_stat,season,variable))
#
if compute :
cfile(res)
if house_keeping :
cdrop(base_ds)
if time_stat=="std" :
cdrop(a)
cdrop(detrended)
#
return res
def create_masks(model,variable,realization,table) :
global latitude_limit,data_versions
"""
Return masks for : land, land_tropics, land-NH, land-SH
where region of interest has value 0 and other grid cells are set to missing value
"""
try :
grid,_,_= data_versions["piControl"][variable][table][model][realization]
except :
grid,_,_=data_versions["historical"][variable][table][model].values()[0]
dic=dict(project=project, experiment="piControl", variable="sftlf",
model=model, institute=institute_for_model(model),
table="fx", period="fx", version="latest",mip="CMIP",
realization=realization, grid=grid)
#if model=="GFDL-CM4" : dic.update(grid="gr1")
#
land = ds(**dic)
land = ccdo_fast(land,operator="divc,100.")
# TBD : take into account fractional cell coverage by land
#land = ccdo_fast(land,operator="setctomiss,0.")
land = ccdo_fast(land,operator="setrtomiss,-1,0.5")
#
tropics = ccdo_fast(land,operator="expr,'_lat=(abs(clat(sftlf))>%f)?missval(sftlf):1;land_tropics=_lat*sftlf'"%\
latitude_limit)
north = ccdo_fast(land,operator="expr,'_lat=(clat(sftlf)>%f)?1:missval(sftlf);land_north=_lat*sftlf'"%latitude_limit)
south = ccdo_fast(land,operator="expr,'_lat=(clat(sftlf)<-%f && clat(sftlf)>-%f)?1:missval(sftlf);land_south=_lat*sftlf'"%(latitude_limit,SH_latitude_limit))
return {"land":land,"tropics":tropics,"NH":north,"SH":south }
# For tests
#l,t,n,s=masks("CNRM-CM5")
#ncview(n)
def apply_mask(field,mask_field):
"""
Assumes that mask field has non-zero non-missing values on interesting places, and
zero or missing value elsewhere
Result has missing value except on interesting places, where it has input field value
"""
return ccdo2_flip(field,mask_field,operator="ifthen")
def union(field1,field2) :
"""
Returns a field which, on each cell, has field1 value, except if missing,
in which case field2 value is used
"""
# Make a mask with f1, changing its zeroes to non-zero, and its missing to 0
v1=field1.variable
masq=ccdo_fast(field1,operator="setmisstoc,0 -expr,'mask=(%s!=missval(%s))?1:0;'"%(v1,v1))
# Apply mask
rep=ccdo3_flip(field1,field2,masq,operator="ifthenelse")
return(rep)
def hybrid_field(fields,hybrid_season,all_masks):
"""
Computes an hybrid field by combining seasonal fields and geographical masks
FIELDS is a dict of fields indexed by season labels
MASKS is a dict of mask fields which have non-zero value over the region of
interest, and 0 elsewhere. They are supposed to be on the same grid as fields
in FIELDS
HYBRID_SEASON is a list of pairs (region, season), where 'region' is among keys of
dict MASKS, and 'season' is among the season labels of dict FIELDS
However, the case of HYBRID_SEASON = [ ("globe", whatever_season) ] is also handled,
without an entry "globe" in MASKS. The returned value is then FIELDS[whatever_season]
"""
region,season=hybrid_season[0]
if region=="globe" :
return fields[season]
#
rep=apply_mask(fields[season],all_masks[region])
if len(hybrid_season)>1 :
for region,season in hybrid_season[1:] :
rep=union(rep,apply_mask(fields[season],all_masks[region]))
return rep
def fldmean(option,field):
if option=="linear" :
f=ccdo_fast(field,operator="fldmean")
else : # assume quadratic
f=ccdo_fast(field,operator="sqrt -fldmean -sqr")
#print "computing",f
cfile(f)
#print "cfile done"
rep=np.float64(cvalue(f))
#print "cvalue done"
# Avoid creating numerous small files
#if clean_cache :
#cdrop(f)
return(rep)
def weighted_ensemble_stat(ens,weights,option) :
""" Compute mean or quantile on an ensemble using scaling weights """
#
if option in [ "mean", "wmean","iwmean","ens_mean"]:
if weights is None :
weights=dict()
for k in ens : weights[k]=1.
sum=0
wsum=0
for e in ens :
if option in [ "mean" ,"ensmean" ] : weight=1.
elif option == "wmean" : weight=weights[e]
elif option == "iwmean" : weight=1./weights[e]
sum+=weight*ens[e]
wsum+=weight
return sum/wsum
#
elif option in ["min","max","second","butlast" ] :
l=ens.values()
l.sort()
if option=="min" : return np.float64(l[0])
if option=="second" : return np.float64(l[1])
if option=="butlast" : return np.float64(l[-2])
if option=="max" : return np.float64(l[-1])
#
elif option in ["nq5","nq95" ] : #percentiles with gaussian hypothesis
mean=np.mean(ens.values())
std1=np.std(ens.values(),ddof=1)
if option == "nq5" : return mean - 1.645*std1
if option == "nq95" : return mean + 1.645*std1
#
if option == "ens" :
rep=ens.copy()
for model in rep :
rep[model] = np.float64(ens[model])
return rep
return rep
#
elif option in ens : # return a single model, which name is passed with 'option'
return np.float64(ens[option])
else:
raise ValueError("Time to use some library for stats (%s) !"%option)
def compute_stats(scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list):
global excluded_models,included_models
# Compute requested time statistics of fields on ref_period and samples,
# for all variables on seasons, and for tas mean on a per-annual basis
#
projs=dict() # projs[scenario][model][variable][time_stat][period_index][season] : projection values (2d fields)
refs=dict() # refs [scenario][model][variable][time_stat][season] : reference values (2d fields)
models=dict() # models[scenario][variable] == models used for a scenario and a variable
cases=[ (seasons,variable,time_stat) for variable,time_stat in variables ] + \
[ (["ANN"],"tas","mean") ]
#
print "\nComputing time-statistics on basic fields\n"
for seasonl,variable,time_stat in cases :
print variable,time_stat,seasonl,
table="Amon"
if variable in ["mrro","mrso","mrsos"] : table="Lmon"
#
for scenario in scenarios :
excl=excluded_models.get(variable,[])
incl=included_models.get(variable,None)
mods=models_for_experiments(data_versions,variable,table,[ref_experiment,scenario],excl,incl)
feed_dic(models,mods,scenario,variable)
for model,variant in mods :
print model,
for season in seasonl :
print season,
# Values for ref_period
print ref_period,
val=mean_or_std(ref_experiment,model,variant,season,variable,table,time_stat,ref_period)
feed_dic(refs,val,scenario,model,variable,time_stat,season)
#
for period_index in period_indices :
period=index_to_period(scenario,model,period_index)
print period,
val=mean_or_std(scenario,model,variant,season,variable,table,time_stat,period)
feed_dic(projs,val,scenario,model,variable,time_stat,period_index,season)
print
print
#
# Compute aggregates on regions and seasons (for base variables, and global+annual average for tas),
# then derive relative (or absolute) change
#
changes=dict()
print "\nComputing individual changes by integrating over regions and hybrid seasons\n"
for scenario in scenarios :
print scenario
cases=[ (hybrid_seasons,variable,time_stat) for variable,time_stat in variables ] + \
[ ({"globe_year" : [ ("globe","ANN") ] }, "tas","mean") ]
#
for hseasons,variable,time_stat in cases :
print "\t",variable, time_stat,
if time_stat=="std" : option = "quadratic"
else : option = "linear"
table="Amon"
if variable in ["mrro","mrso","mrsos"] : table="Lmon"
#
for model,variant in models[scenario][variable] :
print model,variable,variant,table,
#print model,
all_masks = create_masks(model,variable,variant,table)
#
ref_fields = refs [scenario][model][variable][time_stat]
#
for hseason in hseasons :
#print hseason,
#
ref = fldmean(option,hybrid_field(ref_fields ,hseasons[hseason],all_masks))
#
for period_index in period_indices :
#print period+" ",
# Some (model, scenario) may not reach the warming level corresponding to period_index
if index_to_period(scenario,model,period_index) is None:
continue
#
proj_fields = projs[scenario][model][variable][time_stat][period_index]
pro = fldmean(option,hybrid_field(proj_fields,hseasons[hseason],all_masks))
#
if variable == "tas" :
change = pro - ref
else :
change=100* ( pro - ref ) / ref
feed_dic(changes,change,scenario,variable,time_stat,hseason,period_index,model)
print
print "Saving cache index for ",scenario," ...",
csync()
print "done"
# Compute ensemble stats on each scenario ensemble
stats=dict()
print "\nComputing ensemble statistics\n"
for scenario in changes :
for variable in changes[scenario] :
for time_stat in changes[scenario][variable] :
for hseason in changes[scenario][variable][time_stat] :
for period_index in changes[scenario][variable][time_stat][hseason] :
# Access dict representing the models ensemble
ens = changes[scenario][variable][time_stat][hseason][period_index]
# Compute the relevant statistics on the models ensemble
for ens_stat in stats_list :
res = weighted_ensemble_stat(ens, None,ens_stat)
period=index_to_period("any","any",period_index)
feed_dic(stats,res,scenario,variable,time_stat,hseason,ens_stat,period)
for ens_stat in stats_list :
if ens_stat=="ens" : continue
for period_index in changes[scenario][variable][time_stat][hseason] :
#print_ensemble_values(variable,stat,warming_index,season,changes[variable][warming_index][season]...,c[stat][period],models)
pass
print
# Record the metadata of models used in each case
metadata=dict()
table="Amon"
if variable=="mrro" : table="Lmon"
for model,variant in models[scenario][variable]:
metadata[model]=TSU_metadata([scenario,ref_experiment],[(model,variant)],variable,table,data_versions)
feed_dic(stats,metadata,scenario,variable,"models")
print 40*"@"
return stats,projs,refs
project="CMIP6"
#
init_trend()
#
# Define some CliMAF operators with tricky arguments ordering in order that CliMAF
# do not loose track of variable name for the output of some CDO operators
cscript('ccdo2_flip', 'cdo ${operator} ${in_2} ${in_1} ${out}')
cscript('ccdo3_flip', 'cdo ${operator} ${in_3} ${in_1} ${in_2} ${out}')
#
data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
#
# Gather a list of "real" seasons used in requested hybrid seasons
seasons=[]
for hseason in hybrid_seasons :
for region,season in hybrid_seasons[hseason]:
if season not in seasons :
seasons.append(season)
if do_profile :
import cProfile,io,pstats
pr = cProfile.Profile()
pr.enable()
#
stats,projs,refs=compute_stats(scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list)
#
if do_profile :
pr.disable()
python_version="python2"
if python_version == "python2":
s = io.BytesIO()
else:
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats()
# Just un-comment next line to get the profile on stdout
print s.getvalue()
if not os.path.exists(outdir): os.makedirs(outdir)
stats["ref_period"]=ref_period
with open ("%s/stats_%s_%s%s.json"%(outdir,project,data_versions_tag,version),"w") as f :
json.dump(stats,f,separators=(',', ': '),indent=3)
csync()
test_CMIP5=False
if test_CMIP5 :
fields=dict()
project="CMIP5"
land,tropics,north,south=create_masks("CNRM-CM5")
print "period winter summer"
periods=[ "%d-%d"%(start,start+30) for start in range(2020,2080,10) ]
for period in periods :
#period='2010-2040'
mean_or_std("rcp85", 'CNRM-CM5','JJA','pr_mean',period,fields)
mean_or_std("rcp85", 'CNRM-CM5','DJF','pr_mean',period,fields)
jja=fields['CNRM-CM5']['JJA']['pr_mean'][period]
djf=fields['CNRM-CM5']['DJF']['pr_mean'][period]
winter=fldmean('linear',union(apply_mask(djf,north),apply_mask(jja,south)))*24.*3600.
summer=fldmean('linear',union(apply_mask(jja,north),apply_mask(djf,south)))*24.*3600.
#ncview(u)
print period,"%.3g"%winter,"%.3g"%summer
def print_ensemble_values(variable,stat,period,season,ens,val,models):
if season=="globe_year" : variable="GTAS_"+variable
print "%-9s %-4s %-13s %-9s %5.1f "%(variable[0:9],stat[0:4],season,period,val),
for model in models :
print " %5.1f "%(ens[model]),
print
def print_ensemble_models(models):
print "%-9s %-4s %-13s %-9s %-4s"%("","","","",""),
for model in models :
print "%- 7s "%(model[:7]),
print
def keep_scenario_settings(scenario):
#
#
if scenario == "rcp85" :
project="CMIP5"
ref_period="1976-2005"
models=["bcc-csm1-1-m", "bcc-csm1-1", "CanESM2", # pas de sftlf : "HadGEM2-AO","FIO-ESM", "CMCC-CMS",
"CNRM-CM5", "CSIRO-Mk3-6-0", "IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR",
"FGOALS-s2", "MIROC5", "MIROC-ESM-CHEM", "MIROC-ESM", "HadGEM2-CC", "HadGEM2-ES",
"MRI-CGCM3", "GISS-E2-H-CC", "GISS-E2-R-CC", "GISS-E2-R",
"CCSM4", "NorESM1-ME", "NorESM1-M", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M",
"CESM1-BGC", "CESM1-CAM5", ] # pas de precip : "CESM1-WACCM",
# pas de huss : "BNU-ESM","CMCC-CESM","MPI-ESM-LR", "MPI-ESM-MR",
# Define time slices for projections
periods_length=31
ref_year=1990
delta=10
samples_number=9
else :
project="CMIP6"
ref_period="1995-2014"
if scenario == "ssp585" :
models=["BCC-CSM2-MR","CanESM5","CNRM-CM6-1","CNRM-ESM2-1", "EC-Earth3",
"EC-Earth3-Veg","IPSL-CM6A-LR","MIROC6","MIROC-ES2L","UKESM1-0-LL",
"MRI-ESM2-0","CESM2","CESM2-WACCM","GFDL-CM4","GFDL-ESM4",]
elif scenario == "ssp245" :
models=["BCC-CSM2-MR","CanESM5","CNRM-CM6-1","CNRM-ESM2-1", "EC-Earth3",
"EC-Earth3-Veg","IPSL-CM6A-LR","MIROC6","MIROC-ES2L","UKESM1-0-LL",
"MRI-ESM2-0","CESM2","CESM2-WACCM","GFDL-CM4","GFDL-ESM4",]
else :
raise ValueError("cannot process scenario ",scenario)
# Define time slices for projections
periods_length=20
ref_year=2021
delta=10
samples_number=7
begins = [ ref_year + n*delta for n in range(0,samples_number) ]
slices= [ "%s-%s"%(begin,begin+periods_length-1) for begin in begins ]
print slices