S.Sénési for Météo-France - sept 2019 to march 2021
changes[scenario][variable][time_stat][hseason][ens_stat][period] = value of change
changes[scenario][variable]["models"] = dict with keys=model_names and values= "metadata for the data version used"
Pendergrass, A. G., Knutti, R., Lehner, F., Deser, C., & Sanderson, B. M. (2017). Precipitation variability increases in a warmer climate. Scientific Reports, 7(1), 17966. https://doi.org/10.1038/s41598-017-17966-y
import os
# Version number will be a suffix for data filename. Use e.g. "_V1" for legibility
version = ""
# All models listed through next parameters as providing data for a scenario should be included :
# Also : data_versions_tag is a component of output filename
data_versions_tag = "20200918"
excluded_models = { "mrro":["CAMS-CSM1-0"]} # a dict with keys==variable and value=list of models
included_models = {} # a dict with keys==variable and value=list of models or None
data_versions_dir = os.getenv("CAMMAC")+"/data"
#scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list
# Managed 'variables' actually are a combination of a geophysical variabe and a time statistics
# among 'mean' and 'std' (for inter-annual standard deviation)
variables=[ ["pr","mean"], ["prw","mean"], ["pr","std"], ["mrro","mean"], ["mrro","std"] ]
# Hybrid seasons definitions. Can use any CDO's selseason compliant season label, 'ANN' and 'anm'
# and (for now) region labels 'NH', 'SH', 'tropics' (which all stands for land and the latitude
# limit set by 'latitude_limit') and 'land'.
# You may change function create_masks() below for extending known regions
latitude_limit = 30.
# SH_latitude_limit allows to exclude Antarctica when set to 60; use e.g. 91 to include Antarctica
SH_latitude_limit = 60
hybrid_seasons = {
"tropics_DJF" : [ ("tropics", "DJF") ],
"tropics_JJA" : [ ("tropics", "JJA") ],
"extra_winter" : [ ("NH", "DJF"), ("SH", "JJA") ],
"extra_summer" : [ ("NH", "JJA"), ("SH", "DJF") ],
}
# List of model-ensemble statistics to compute on each variable
# 'nq5' and 'nq95' == 5 and 95% percentiles estimated from a normal law
# 'ens' == just provide a dict of all values (with key=model name)
# 'wmean' and 'iwmean' == mean weighted by some weights (or their inverse), as e.g. temperature
# change; this option is not actually implemented in this version of the code
stats_list = [ "mean","second","butlast","min","max", "wmean","iwmean","ens","nq5","nq95"]
stats_list = [ "mean", "ens","nq5","nq95"]
# Computing changes implies defining a reference period
ref_experiment = "historical"
ref_period = "1850-1900"
#ref_period = "1995-2014"
# List of projection experiments to process
scenarios = [ "ssp585", "ssp245","ssp126" ]
# Type of computation for the curve....TBDoc
#option = "parametric"
option = "direct"
# If using an even time slicing (parametric curve), define it.
# It may include years belonging to ref_experiment duration, but scenario's begin must match ref_experiment's end.
periods_length = 20
start_year = 2021
last_year = 2081 # i.e. last period's begin
step = 10 # Not necessarily equal to periods_length !
# If using it, define warming levels and its discretization
max_warming = 6. # °C
min_warming = 2.
warming_step = 0.25
proj_period = "2015-2099" # period investigated for the warming
window_half_size = 10 # (years) half-size of temperature running mean
# The directory holding output (json) files
outdir = "./changes"
# To be documented
clean_cache=False
ch=dict(compute=True,house_keeping=True)
# For debug and perf assessment :
do_profile = False
own_cache = False
one_cache_per_process= False
cache_root = None
do_test = True
if do_test :
#data_versions_tag = "20200714-test"
version = "_test"
scenarios = [ "ssp585", "ssp245" ]
included_models = { "mrro": [ "CNRM-CM6-1-HR", "IPSL-CM6A-LR"] }#,"IPSL-CM6A-LR", "FGOALS-f3-L" ]
excluded_models = {"mrro" : [ "FGOALS-f3-L" ]}
variables = [ ["mrro","mean"] ]
#hybrid_seasons = { "extra_winter" : [ ("NH", "DJF"), ("SH", "JJA") ],}
hybrid_seasons = { "tropics_annual" : [ [ "tropics" , "ANN" ] ] }
#hybrid_seasons = { "nh_winter" : [ ("NH", "DJF") ],}
max_warming = 6
min_warming = 2
warming_step = 2
#periods_length = 1
#step = 70
stats_list = [ "mean", "ens","nq5","nq95"]
import os,sys, json, numpy as np
# Climaf setup (version >= 1.2.13 - see https://climaf.readthedocs.io)
if own_cache or one_cache_per_process:
if cache_root is None :
cache_root=os.getcwd()
cache_dir=cache_root+"/tmp_climaf_cache"
if one_cache_per_process :
cache_dir += "_%d"%os.getpid()
os.environ["CLIMAF_CACHE"]=cache_dir
from climaf.api import *
climaf.cache.stamping=False
climaf.driver.dig_hard_into_cache=False
#
from CAMMAClib.ancillary import prettier_label, feed_dic
from CAMMAClib.mips_et_al import read_versions_dictionnary, TSU_metadata, institute_for_model, \
models_for_experiments, models_for_experiments_multi_var
from CAMMAClib.changes import global_change
from CAMMAClib.variability import init_trend
# Load some user settings, if available
settings_file=os.getenv("CAMMAC_USER_PYTHON_CODE_DIR",".")+'/cammac_user_settings.py'
if os.path.exists(settings_file) :
exec(compile(open(settings_file).read(),settings_file,'exec'))
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
# NB : This code can handle :
# 1 - a regular time slicing of projection duration, with computation of warming level for these slices
# 2 - defining a time slicing for each model and scenario based on the warming level that must be reached
# for each slice index
# Option 1 allows to build a parametric curve for the dependency of a change with warming, while
# option 2 allows tocompute the direct relationship
# So the code shows some un-necessary complexity for either option alone
if option== "parametric" :
# Define a list of indices for the time slicing used
period_indices = range(0,1+(last_year-start_year+1)/step)
else :
# Define a list of warming levels
warming_levels = np.arange(min_warming, max_warming + warming_step, warming_step)
period_indices = warming_levels
warming_periods = dict() # warming_periods[scenario][model] is a list of periods where a model reaches
# the warming levels derived from above parameters
def index_to_period(scenario,model,period_index,option=option) :
"""
Returns the period during which MODEL reaches the warming level defined for PERIOD_INDEX
in experiment SCENARIO
With option=parametric, just implement an even slicing of periods of interest, whatever
the model and scenario
"""
global periods_length
global start_year
global last_year
global step
if option == "parametric" :
slices= [ "%s-%s"%(begin,begin+periods_length-1) for begin in range(start_year,last_year+1,step) ]
return slices[period_index]
else :
return warming_periods[scenario][model][period_index]
def mean_or_std(scenario, model, variant, season, variable, table, time_stat, period, compute=True, house_keeping=False):
"""
Handles seasons : 'ANN' , and all seasons valid for CDO selseason operator
Handles time statistics 'mean' and 'std'
For 'std', the season means are detrended before computing standard deviation
Does not yet handle the combination ('anm','std') (nor ('ANN,'std'))
"""
global data_versions, project
#
# Get details of dataset used
#print scenario,variable,table,model,variant
#print "/",
grid,version,_ = data_versions[scenario][variable][table][model][variant]
#
# Define relevant dataset
dic=dict(project=project, experiment=scenario,
model=model, institute=institute_for_model(model),
variable=variable,period=period, grid=grid,
table=table, version=version,realization=variant)
if project=="CMIP5" :
dic.pop("mip")
elif project=="CMIP6" :
if scenario=="historical" : dic.update(mip="CMIP")
else : dic.update(mip="ScenarioMIP")
else :
raise ValueError("Cannot process project %s"%project)
#
base_ds=ds(**dic)
#
if season != "anm" and season != "ANN" :
#
# Compute relevant stat
if time_stat=="mean" :
# Average over years after selecting season
res=ccdo(base_ds,operator="timmean -selseason,%s"%season)
elif time_stat=="std" :
# Compute seasonal mean for each year
cached=ccdo(base_ds,operator="selseason,%s -seasmean"%season)
# Must detrend
a=ctrend(cached)
detrended=csubtrend(cached,a,a.b)
# Compute standard deviation of seasonal values series
res=ccdo_fast(detrended,operator="timstd1")
else:
raise ValueError("stat %s cannot yet be processed by mean_or_std for %s"%(time_stat,variable))
elif time_stat == "mean" :
res=ccdo(base_ds,operator="timmean")
elif time_stat=="std" :
# Compute year means
cached=ccdo(base_ds,operator="yearmean")
# Must detrend
a=ctrend(cached)
detrended=csubtrend(cached,a,a.b)
# Compute standard deviation of detrended yearly values series
res=ccdo_fast(detrended,operator="timstd1")
else :
raise ValueError("stat %s cannot yet be processed by mean_or_std for season %s"+\
"and variable %s"%(time_stat,season,variable))
#
if compute :
cfile(res)
if house_keeping :
cdrop(base_ds)
if time_stat=="std" :
cdrop(a)
cdrop(detrended)
#
return res
def create_masks(model,variable,realization,table) :
global latitude_limit,data_versions
"""
Return masks for : land, land_tropics, land-NH, land-SH
where region of interest has value 0 and other grid cells are set to missing value
"""
try :
grid,_,_= data_versions["piControl"][variable][table][model][realization]
except :
grid,_,_=data_versions["historical"][variable][table][model].values()[0]
dic=dict(project=project, experiment="piControl", variable="sftlf",
model=model, institute=institute_for_model(model),
table="fx", period="fx", version="latest",mip="CMIP",
realization=realization, grid=grid)
#if model=="GFDL-CM4" : dic.update(grid="gr1")
#
land = ds(**dic)
land = ccdo_fast(land,operator="divc,100.")
# TBD : take into account fractional cell coverage by land
#land = ccdo_fast(land,operator="setctomiss,0.")
land = ccdo_fast(land,operator="setrtomiss,-1,0.5")
#
sftlf='sftlf'
tropics = ccdo_fast(land,operator="expr,'_lat=(abs(clat(%s))>%f)?missval(%s):1;land_tropics=_lat*%s'"%\
(sftlf,latitude_limit,sftlf,sftlf))
north = ccdo_fast(land,operator="expr,'_lat=(clat(%s)>%f)?1:missval(%s);land_north=_lat*%s'"%\
(sftlf,latitude_limit,sftlf,sftlf))
south = ccdo_fast(land,operator=\
"expr,'_lat=(clat(%s)<-%f && clat(%s)>-%f)?1:missval(%s);land_south=_lat*%s'"%\
(sftlf,latitude_limit,sftlf,SH_latitude_limit,sftlf,sftlf))
return {"land":land,"tropics":tropics,"NH":north,"SH":south }
# For tests
#l,t,n,s=masks("CNRM-CM5")
#ncview(n)
def apply_mask(field,mask_field):
"""
Assumes that mask field has non-zero non-missing values on interesting places, and
zero or missing value elsewhere
Result has missing value except on interesting places, where it has input field value
"""
return ccdo2_flip(field,mask_field,operator="ifthen")
def union(field1,field2) :
"""
Returns a field which, on each cell, has field1 value, except if missing,
in which case field2 value is used
"""
# Make a mask with f1, changing its zeroes to non-zero, and its missing to 0
v1=field1.variable
masq=ccdo_fast(field1,operator="setmisstoc,0 -expr,'mask=(%s!=missval(%s))?1:0;'"%(v1,v1))
# Apply mask
rep=ccdo3_flip(field1,field2,masq,operator="ifthenelse")
return(rep)
def hybrid_field(fields,hybrid_season,all_masks):
"""
Computes an hybrid field by combining seasonal fields and geographical masks
FIELDS is a dict of fields indexed by season labels
MASKS is a dict of mask fields which have value 1 over the region of interest, and
0 elsewhere. They are supposed to be on the same grid as fields in FIELDS
HYBRID_SEASON is a list of pairs (region, season), where 'region' is among keys of
dict MASKS, and 'season' is among the season labels of dict FIELDS
However, the case of HYBRID_SEASON = [ ("globe", whatever_season) ] is also handled,
without an entry "globe" in MASKS. The returned value is then FIELDS[whatever_season]
"""
region,season=hybrid_season[0]
if region=="globe" :
return fields[season]
#
rep=apply_mask(fields[season],all_masks[region])
if len(hybrid_season)>1 :
for region,season in hybrid_season[1:] :
rep=union(rep,apply_mask(fields[season],all_masks[region]))
return rep
def fldmean(option,field):
if option=="linear" :
f=ccdo_fast(field,operator="fldmean")
else : # assume quadratic
f=ccdo_fast(field,operator="sqrt -fldmean -sqr")
#print "computing",f
cfile(f)
#print "cfile done"
rep=np.float64(cvalue(f))
#print "cvalue done"
# Avoid creating numerous small files
#if clean_cache :
#cdrop(f)
return(rep)
def weighted_ensemble_stat(ens,weights,option) :
""" Compute mean or quantile on an ensemble using scaling weights """
#
if option in [ "mean", "wmean","iwmean","ens_mean"]:
if weights is None :
weights=dict()
for k in ens : weights[k]=1.
sum=0
wsum=0
for e in ens :
if option in [ "mean" ,"ensmean" ] : weight=1.
elif option == "wmean" : weight=weights[e]
elif option == "iwmean" : weight=1./weights[e]
sum+=weight*ens[e]
wsum+=weight
return sum/wsum
#
elif option in ["min","max","second","butlast" ] :
l=ens.values()
l.sort()
if option=="min" : return np.float64(l[0])
if option=="second" : return np.float64(l[1])
if option=="butlast" : return np.float64(l[-2])
if option=="max" : return np.float64(l[-1])
#
elif option in ["nq5","nq95" ] : #percentiles with gaussian hypothesis
mean=np.mean(ens.values())
std1=np.std(ens.values(),ddof=1)
if option == "nq5" : return mean - 1.645*std1
if option == "nq95" : return mean + 1.645*std1
#
if option == "ens" :
rep=ens.copy()
for model in rep :
rep[model] = np.float64(ens[model])
return rep
return rep
#
elif option in ens : # return a single model, which name is passed with 'option'
return np.float64(ens[option])
else:
raise ValueError("Time to use some library for stats (%s) !"%option)
def compute_warming_periods() :
for scenario in scenarios :
mods=models[scenario]["tas"]
# compute ensemble of warming series along projection period of choosen experiment
GSAT=global_change("tas","Amon",scenario,proj_period,ref_experiment,ref_period,
mods, data_versions,filter_length=2*window_half_size+1)
for model,realization in mods :
# Create a list of periods parallel to the list of warming levels, based on
# the relation : the warming for the model and the period exceeds the list
# level, and doesn't exceed it for earlier periods
fyear=int(proj_period.split("-")[0]) + window_half_size
lyear=int(proj_period.split("-")[1]) - window_half_size
center_years=range(fyear,lyear+1)
warmings=cMA(GSAT[model]).flatten().data
if len(center_years) != len(warmings) :
raise ValueError("Logic error : len(center_years) != len(warmings) : %s != %s, %s"%\
(len(center_years), len(warmings),center_years))
periods=dict()
for wl in warming_levels :
aperiod=None
for warming,cyear in zip(warmings,center_years) :
if aperiod is None :
if warming >= wl and warming < wl + warming_step :
aperiod="%d-%d"%(cyear-window_half_size, cyear+window_half_size)
periods[wl]=aperiod
feed_dic(warming_periods,periods,scenario,model)
#print scenario, model
#print "\twarming_levels :",warming_levels
#print "\twarmings :",warmings
#print "\tcenter_years",center_years
#print "\tperiods",periods
def select_models():
"""
Feed global dict 'models[scenario][variable]' with list of (model,variant) pairs that we will use,
possibly by enforcing selecting same realization for a variable and "tas"
"""
#
global models,scenarios,variables,excluded_models,included_models
#
for variable,_ in variables :
excl=excluded_models.get(variable,[])
incl=included_models.get(variable,None)
table="Amon"
if variable=="mrro" : table="Lmon"
#
for scenario in scenarios :
if option=="parametric" :
vars_list=[(variable,table)]
else:
vars_list=[(variable,table),("tas","Amon")]
mods=models_for_experiments_multi_var(data_versions,vars_list,[ref_experiment,scenario],excl,incl)
#
feed_dic(models,mods,scenario,variable)
# dict models is also used for generating metadata about used data -> must append on 'tas' entry
feed_dic(models,mods,scenario,"tas",extend_list=True)
def compute_stats(scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list):
global excluded_models,included_models
# Compute requested time statistics of fields on ref_period and samples,
# for all variables on seasons, and for tas mean on a per-annual basis
#
projs=dict() # projs[scenario][model][variable][time_stat][period_index][season] : projection values (2d fields)
refs=dict() # refs [scenario][model][variable][time_stat][season] : reference values (2d fields)
cases=[ (seasons,variable,time_stat) for variable,time_stat in variables ]
if option == "parametric" :
cases += [ (["ANN"],"tas","mean") ]
#
print "\nComputing time-statistics on basic fields\n"
for seasonl,variable,time_stat in cases :
print variable,time_stat,seasonl,
table="Amon"
if variable in ["mrro","mrso","mrsos"] : table="Lmon"
#
for scenario in scenarios :
mods=models[scenario][variable]
for model,variant in mods :
print model,
for season in seasonl :
print season,
# Values for ref_period
print ref_period,
val=mean_or_std(ref_experiment,model,variant,season,variable,table,time_stat,ref_period)
feed_dic(refs,val,scenario,model,variable,time_stat,season)
#
for period_index in period_indices :
period=index_to_period(scenario,model,period_index)
print period,
if period is not None :
val=mean_or_std(scenario,model,variant,season,variable,table,time_stat,period)
feed_dic(projs,val,scenario,model,variable,time_stat,period_index,season)
print
print
#
# Compute aggregates on regions and seasons (for base variables, and global+annual average for tas),
# then derive relative (or absolute) change
#
changes=dict()
print "\nComputing individual changes by integrating over regions and hybrid seasons\n"
for scenario in scenarios :
print scenario
cases=[ (hybrid_seasons,variable,time_stat) for variable,time_stat in variables ] + \
[ ({"globe_year" : [ ("globe","ANN") ] }, "tas","mean") ]
#
for hseasons,variable,time_stat in cases :
print "\t",variable, time_stat,
if time_stat=="std" :
opt = "quadratic"
else :
opt = "linear"
table="Amon"
if variable in ["mrro","mrso","mrsos"] : table="Lmon"
#
if variable not in refs[scenario][model] :
# It can be the case for "tas", if option != parametric
continue
for model,variant in models[scenario][variable] :
print model,variable,variant,table,
#print model,
all_masks = create_masks(model,variable,variant,table)
#
ref_fields = refs [scenario][model][variable][time_stat]
#
for hseason in hseasons :
#print hseason,
#
ref = fldmean(opt,hybrid_field(ref_fields,hseasons[hseason],all_masks))
#
for period_index in period_indices :
#print period+" ",
# Some (model, scenario) may not reach the warming level corresponding to period_index
if index_to_period(scenario,model,period_index) is None:
continue
#
proj_fields = projs[scenario][model][variable][time_stat][period_index]
pro = fldmean(opt,hybrid_field(proj_fields,hseasons[hseason],all_masks))
#
if variable == "tas" :
change = pro - ref
else :
change=100* ( pro - ref ) / ref
feed_dic(changes,change,variable,time_stat,hseason,period_index,"%s_%s"%(model,scenario))
print
print "Saving cache index for ",scenario," ...",
csync()
print "done"
# Compute ensemble stats for each case
stats=dict()
print "\nComputing ensemble statistics\n"
for variable in changes :
for time_stat in changes[variable] :
for hseason in changes[variable][time_stat] :
for period_index in changes[variable][time_stat][hseason] :
# Access dict representing the models ensemble
ens = changes[variable][time_stat][hseason][period_index]
# Record the ensemble size for the period
#print "feeding model_nubers for %s %s %s %g %s with %s"%(variable,time_stat,hseason,period_index,period,len(ens))
feed_dic(stats,len(ens),variable,"models_number",period_index) ####################################################################################
# Compute the relevant statistics on the models ensemble
for ens_stat in stats_list :
res = weighted_ensemble_stat(ens, None,ens_stat)
if option=="parametric" :
aperiod=index_to_period("any","any",period_index)
else :
aperiod=period_index
feed_dic(stats,res,variable,time_stat,hseason,ens_stat,aperiod) ##########################################################################""
for ens_stat in stats_list :
if ens_stat=="ens" : continue
for period_index in changes[variable][time_stat][hseason] :
#print_ensemble_values(variable,stat,warming_index,season,changes[variable][warming_index][season]...,c[stat][period],models)
pass
print
print 40*"@"
#
# Record metadata for datasets used
#
for scenario in models :
for variable in models[scenario] :
# Record the metadata of models used in each case
metadata=dict()
table="Amon"
if variable=="mrro" : table="Lmon"
for model,variant in models[scenario][variable]:
metadata[model]=TSU_metadata([scenario,ref_experiment],[(model,variant)],variable,table,data_versions)
feed_dic(stats,metadata,variable,"metadata",scenario) ############################################################################################
#
return stats,projs,refs,changes
project="CMIP6"
#
init_trend()
#
data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
#
# Gather a list of "real" seasons used in requested hybrid seasons
seasons=[]
for hseason in hybrid_seasons :
for region,season in hybrid_seasons[hseason]:
if season not in seasons :
seasons.append(season)
# For handling metadata
models=dict() # models[scenario][variable] == list of (model,variant) used for a scenario and a variable
select_models()
if option!="parametric":
compute_warming_periods()
#
if "ens" not in stats_list :
stats_list.append("ens")
if do_profile :
import cProfile,io,pstats
pr = cProfile.Profile()
pr.enable()
#
stats,projs,refs,changes=compute_stats(scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list)
#
if do_profile :
pr.disable()
python_version="python2"
if python_version == "python2":
s = io.BytesIO()
else:
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats()
# Just un-comment next line to get the profile on stdout
print s.getvalue()
if not os.path.exists(outdir): os.makedirs(outdir)
stats["ref_period"] = ref_period
stats["option"] = option
#
with open ("%s/stats_%s_%s%s.json"%(outdir,project,data_versions_tag,version),"w") as f :
json.dump(stats,f,separators=(',', ': '),indent=3)
csync()
def print_ensemble_values(variable,stat,period,season,ens,val,models):
if season=="globe_year" : variable="GTAS_"+variable
print "%-9s %-4s %-13s %-9s %5.1f "%(variable[0:9],stat[0:4],season,period,val),
for model in models :
print " %5.1f "%(ens[model]),
print
def print_ensemble_models(models):
print "%-9s %-4s %-13s %-9s %-4s"%("","","","",""),
for model in models :
print "%- 7s "%(model[:7]),
print