CAMMAC https://cammac.readthedocs.io

S.Sénési for Météo-France - sept 2019 to march 2021

Compute model ensemble statistics for changes in mean and variability of various geophysical variables (and 'tas')

Mean or variability is computed over a sampling/slicing of the scenarios duration

Variables satistics are subsequently spatially integrated over user-defined hybrid regions (as e.g. land extra-tropical winter, which means DJF for northern hemisphere and JJA for southern hemisphere). In the case of variability, the space average is a quadratic one

Multiple scenarios can be processed in sequence.

The 'one model - one vote' rule is implicit in the processing

Result is a dictionnary of relative changes (absolute change for 'tas'), as a json file, and with structure :

changes[scenario][variable][time_stat][hseason][ens_stat][period] = value of change

changes[scenario][variable]["models"] = dict with keys=model_names and values= "metadata for the data version used"

It can be used for feeding a figure of rate of change with global warming, as e.g. in Pendergrass et al (2017)

Pendergrass, A. G., Knutti, R., Lehner, F., Deser, C., & Sanderson, B. M. (2017). Precipitation variability increases in a warmer climate. Scientific Reports, 7(1), 17966. https://doi.org/10.1038/s41598-017-17966-y

Here applied by default to precipitation, moisture and runoff, for e.g. AR6-WGI-Chapter8-FigSOD_8.16

Parameters stand in first cell, are either commented here or in the doc (see above)

In [ ]:
 
In [ ]:
import os
# Version number will be a suffix for data filename. Use e.g. "_V1" for legibility
version               = ""

# All models listed through next parameters as providing data for a scenario should be included :
# Also : data_versions_tag is a component of output filename
data_versions_tag    = "20210201"
excluded_models      = { "mrro":["CAMS-CSM1-0"]} # a dict with keys==variable and value=list of models
included_models      = {}  # a dict with keys==variable and value=list of models or None
data_versions_dir    = os.getenv("CAMMAC")+"/data"

# Managed 'variables' actually are a combination of a geophysical variabe and a time statistics
# among 'mean' and 'std' (for inter-annual standard deviation)
variables=[ ["pr","mean"], ["prw","mean"], ["pr","std"], ["mrro","mean"], ["mrro","std"] ]

# Hybrid seasons definitions. Can use any CDO's selseason compliant season label, 'ANN' and 'anm'
# and (for now) region labels 'NH', 'SH', 'tropics' (which all stands for land and the latitude  
# limit set by 'latitude_limit') and 'land'. 
# You may change function create_masks() below for extending known regions
latitude_limit     = 30.
# SH_latitude_limit allows to exclude Antarctica when set to 60; use e.g. 91 to include Antarctica
SH_latitude_limit  = 60
hybrid_seasons = { 
    "tropics_DJF"  : [ ("tropics", "DJF") ], 
    "tropics_JJA"  : [ ("tropics", "JJA") ], 
    "extra_winter" : [ ("NH", "DJF"), ("SH", "JJA") ],
    "extra_summer" : [ ("NH", "JJA"), ("SH", "DJF") ],
    }

combinations = None
# An alternate way is to define a list of seasons and a list of combinations which are triplets 
# of (variable, time statistics, geographical domain) as e.g. in 
# combinations       : [ [pr, mean, land ], [prw, mean, globe], [mrro, mean, wland] ]
# seasons            : [ DJF, JJA ]
# Then, generated hybrid seasons name will be the commbination of all regions names and seasons names e.g: 
# "land DJF", "land JJA", "globe DJF", "globe JJA", "wland DJF", "wland JJA"


# List of model-ensemble statistics to compute on each variable
# 'nq5' and 'nq95' == 5 and 95% percentiles estimated from a normal law
# 'ens' == just provide a dict of all values (with key=model name)
# 'wmean' and 'iwmean' == mean weighted by some weights (or their inverse), as e.g. temperature 
#                         change; this option is not actually implemented in this version of the code
stats_list         = [ "mean","second","butlast","min","max", "wmean","iwmean","ens","nq5","nq95"]
stats_list         = [ "mean", "ens","nq5","nq95"]

# Computing changes implies defining a reference period
ref_experiment     = "historical"
ref_period         = "1850-1900" 
#ref_period         = "1995-2014"

# List of projection experiments to process
scenarios          = [ "ssp585", "ssp245","ssp126" ]

default_scaling   = 24.*3600
scaling           = {"tas" : 1., "prw" : 1. }

# Type of computation for the curve....TBDoc
#option             = "parametric"
option             = "direct"
# When option is 'parametric', do we actually need computing tas statistics ? 
do_compute_tas     = True

# If using an even time slicing (parametric curve), define it. 
# It may include years belonging to ref_experiment duration, but scenario's begin must match ref_experiment's end. 
periods_length     = 20
start_year         = 2021
last_year          = 2081  # i.e. last period's begin
step               = 10  # Not necessarily equal to periods_length !

# Othrwise, define warming levels 
max_warming        = 6. # °C
min_warming        = 2.
warming_step       = 0.25
proj_period        = "2015-2099" # period investigated for the warming
window_half_size   = 10  # (years) half-size of temperature running mean 


# The directory holding output (json) files
outdir             = "./changes"

do_compute         = True

create_table       = True
input_dir          = None  # where is the json file from compute stage (used when do_compute is False)
table_cases        = None  # cases=[ (hybrid_season,variable,time_stat) ]
table_periods      = None
table_scenarios    = None
table_ranges       = "empirical"

# To be documented
clean_cache=False
ch=dict(compute=True,house_keeping=True)

# For debug and perf assessment :
do_profile           = False
own_cache            = False 
one_cache_per_process= False
cache_root           = None

do_test               = True
do_test_combinations  = False
do_closing            = False
In [ ]:
if do_test :
    #data_versions_tag  = "20200714-test"
    version            = "_test"
    scenarios          = [ "ssp585", "ssp245" ]
    included_models    = { "mrro": [ "CNRM-CM6-1-HR","CNRM-CM6-1", "IPSL-CM6A-LR", "FGOALS-f3-L" ],}
    excluded_models    = {"mrro" : [ "FGOALS-f3-L" ]}
    variables          = [ ["mrro","mean"] ]
    #hybrid_seasons     = { "extra_winter" : [ ("NH", "DJF"), ("SH", "JJA") ],}
    hybrid_seasons  = { "tropics_annual" : [  [ "tropics" , "ANN" ] ]  }

    #hybrid_seasons     = { "nh_winter" : [ ("NH", "DJF") ],}
    max_warming        = 6
    min_warming        = 2
    warming_step       = 2
    #periods_length     = 1
    #step               = 70
    stats_list         = [ "mean", "ens","nq5","nq95"]
In [ ]:
if do_test_combinations :
    #data_versions_tag  = "20200714-test"
    version            = "_monsoon"
    scenarios          = [ "ssp585" ]
    included_models    = { "mrro": [ "CNRM-CM6-1-HR","CNRM-CM6-1", ],
                           "prw" : [ "CNRM-CM6-1-HR","CNRM-CM6-1", "IPSL-CM6A-LR", "FGOALS-f3-L" ]}
    excluded_models    = {"mrro" : [ "FGOALS-f3-L" ]}
    combinations       = [ ["mrro","mean",'land'] , ["prw","mean",'globe']]
    combinations       = [ ["mrro","std",'AusMCM'] ]
    combinations   = None
    hybrid_seasons  = { "AusMCM" : [  [ "AusMCM" , "DJF" ] ]  }
    #seasons            = [ "DJF" ]#, "JJA" ]
    periods_length     = 2
    start_year         = 2021
    last_year          = 2030
    step               = 10  
    stats_list         = [ "mean", "ens","nq5","nq95", "q5", "q95"]
    option             = "parametric"
    ref_period         = "1850-1851" 
    do_compute_tas     = False
In [ ]:
#do_closing=True
if do_closing :
    #data_versions_tag  = "20200714-test"
    version            = "_closing"
    scenarios          = [ "ssp585" ]
    excluded_models    = { "mrro": [  "CAMS-CSM1-0", "IITM-ESM", ],
                           "P-E" : [ "IITM-ESM" , "EC-Earth3", "FIO-ESM-2-0", "MPI-ESM1-2-HR", "IPSL-CM6A-LR" ]}
    included_models    = {}
    combinations       = [ ["mrro","mean",'land'] , ["P-E","mean",'land']]
    seasons            = [ "ANN" ]
    periods_length     = 20
    start_year         = 2041
    last_year          = 2061
    step               = 40  
    stats_list         = [ "ens","mean","nq95"]
    option             = "parametric"
    ref_period         = "1850-1900" 
    do_compute_tas     = False
In [ ]:
import os,sys, josn, numpy as np

# Climaf setup (version >= 1.2.13 - see https://climaf.readthedocs.io)
if own_cache or one_cache_per_process:
    if cache_root is None :
        cache_root=os.getcwd()
    cache_dir=cache_root+"/tmp_climaf_cache"
    if one_cache_per_process :
        cache_dir += "_%d"%os.getpid()
    os.environ["CLIMAF_CACHE"]=cache_dir

from climaf.api import *
climaf.cache.stamping=False
climaf.driver.dig_hard_into_cache=False
from climaf.cache import sync_cvalues
#
from CAMMAClib.ancillary   import prettier_label, feed_dic
from CAMMAClib.mips_et_al  import read_versions_dictionnary, TSU_metadata, institute_for_model, \
                                  models_for_experiments, models_for_experiments_multi_var, \
                                    table_for_var_and_experiment

from CAMMAClib.changes     import global_change
from CAMMAClib.variability import init_trend

# Load some user settings, if available
settings_file=os.getenv("CAMMAC_USER_PYTHON_CODE_DIR",".")+'/cammac_user_settings.py'
if os.path.exists(settings_file) :
    exec(compile(open(settings_file).read(),settings_file,'exec'))
In [ ]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Add a location for fixed fields (above standard location for CMIP6 data)

In [ ]:
# NB : This code can handle  :
#  1 - a regular time slicing of projection duration, with computation of warming level for these slices 
#  2 - defining a time slicing for each model and scenario based on the warming level that must be reached 
# for each slice index
# Option 1 allows to build a parametric curve for the dependency of a change with warming, while 
# option 2 allows tocompute the direct relationship
# So the code shows some un-necessary complexity for either option alone

def make_period_indices() :
    global period_indices

    if option== "parametric" :
        # Define a list of indices for the time slicing used 
        period_indices = range(0,1+(last_year-start_year)/step)
    else :
        # Define a list of warming levels 
        warming_levels = np.arange(min_warming, max_warming + warming_step, warming_step)
        period_indices = warming_levels  
In [ ]:
def index_to_period(scenario,model,period_index) :
    """
    Returns the period during which MODEL reaches the warming level defined for PERIOD_INDEX 
    in experiment SCENARIO
    
    With option==parametric, just implement an even slicing of periods of interest, whatever 
    the model and scenario. 
    
    index -1 refer to reference period
    
    """
    global periods_length, start_year, last_year, step
    global option

    if period_index == -1 :
        return ref_period
    #print "\t",option
    if option == "parametric" :
        slices= [ "%s-%s"%(begin,begin+periods_length-1)  for begin in range(start_year,last_year+1,step) ]
        return slices[period_index]
    else :
        return warming_periods[scenario][model][period_index]

A basic function for computing mean or variability for a variable and a period of an scenario

In [ ]:
def mean_or_std(scenario, model, variant, season, variable, table, time_stat, period, compute=True, house_keeping=False):
    """
    Handles seasons  : 'ANN' , and all seasons valid for CDO selseason operator
    Handles time statistics 'mean' and 'std'
    For 'std', the season means are detrended before computing standard deviation
    
    Does not yet handle the combination ('anm','std') (nor ('ANN,'std'))
    """
    global data_versions, project
    #
    # Get details of dataset used
    #print scenario,variable,table,model,variant
    #print "/",
    try :
        grid,version,_ = data_versions[scenario][variable][table][model][variant]
    except :
        raise ValueError("Error accessing %s %s %s %s %s"%(scenario,variable,table,model,variant) )
    #
    # Define relevant dataset
    dic=dict(project=project, experiment=scenario,
             model=model, institute=institute_for_model(model),
             variable=variable,period=period, grid=grid,
             table=table, version=version,realization=variant)
    if project=="CMIP5" :
        dic.pop("mip")
    elif project=="CMIP6" : 
        if scenario=="historical" : dic.update(mip="CMIP")
        else : dic.update(mip="ScenarioMIP")
    else : 
        raise ValueError("Cannot process project %s"%project)
    #
    base_ds=ds(**dic)
    #
    if season != "anm" and season != "ANN" :
        #
        # Compute relevant stat
        if time_stat=="mean" : 
            # Average over years after selecting season
            res=ccdo(base_ds,operator="timmean -selseason,%s"%season)
        elif time_stat=="std" : 
            # Compute seasonal mean for each year
            cached=ccdo(base_ds,operator="selseason,%s -seasmean"%season)
            # Must detrend 
            a=ctrend(cached)
            detrended=csubtrend(cached,a,a.b)
            # Compute standard deviation of seasonal values series
            res=ccdo_fast(detrended,operator="timstd1")
        else: 
            raise ValueError("stat %s cannot yet be processed by mean_or_std for %s"%(time_stat,variable))
    elif time_stat == "mean" :
        res=ccdo(base_ds,operator="timmean")
    elif time_stat=="std" : 
        # Compute year means
        cached=ccdo(base_ds,operator="yearmean")
        # Must detrend 
        a=ctrend(cached)
        detrended=csubtrend(cached,a,a.b)
        # Compute standard deviation of detrended yearly values series
        res=ccdo_fast(detrended,operator="timstd1")
    else :
        raise ValueError("stat %s cannot yet be processed by mean_or_std for season %s"+\
                    "and variable %s"%(time_stat,season,variable))
    #
    if compute : 
        cfile(res)
    if house_keeping :
        cdrop(base_ds)
        if time_stat=="std" :
            cdrop(a)
            cdrop(detrended)
    #
    return res

Ancilliaries, dealing with land and region masks, and masking fields, and combining regions

In [ ]:
def create_masks(model,variable,realization,table) :
    
    global latitude_limit,data_versions

    """
    Return masks for : land, land_tropics, land-NH, land-SH
    where region of interest has value 0 and other grid cells are set to missing value
    """
    try :
        grid,_,_= data_versions["piControl"][variable][table][model][realization]
    except :         
        grid,_,_=data_versions["historical"][variable][table][model].values()[0]
    dic=dict(project=project, experiment="piControl", variable="sftlf",
                    model=model, institute=institute_for_model(model),
                    table="fx", period="fx", version="latest",mip="CMIP",
                    realization=realization, grid=grid)
    #if model=="GFDL-CM4" : dic.update(grid="gr1")
    #
    land = ds(**dic)
    land = ccdo_fast(land,operator="divc,100.")
    # TBD : take into account fractional cell coverage by land
    #land = ccdo_fast(land,operator="setctomiss,0.")
    land = ccdo_fast(land,operator="setrtomiss,-1,0.5")
    #
    sftlf='sftlf'
    tropics = ccdo_fast(land,operator="expr,'_lat=(abs(clat(%s))>%f)?missval(%s):1;land_tropics=_lat*%s'"%\
                        (sftlf,latitude_limit,sftlf,sftlf))
    north = ccdo_fast(land,operator="expr,'_lat=(clat(%s)>%f)?1:missval(%s);land_north=_lat*%s'"%\
                      (sftlf,latitude_limit,sftlf,sftlf))
    south = ccdo_fast(land,operator=\
                      "expr,'_lat=(clat(%s)<-%f && clat(%s)>-%f)?1:missval(%s);land_south=_lat*%s'"%\
                      (sftlf,latitude_limit,sftlf,SH_latitude_limit,sftlf,sftlf))
    masks = {"land":land,"tropics":tropics,"NH":north,"SH":south }
    
    # Handle monsoon regions masks
    for monsoon_region in [ "AusMCM", "WAfriM", "SAsiaM", "SAmerM", "SAfri", "NAmerM", "EqAmer", "EAsiaM"] :
        region_mask = fds(CAMMAC+"/data/basins/%s_global.nc"%monsoon_region)
        region_mask = ccdo_fast(region_mask , operator="setrtomiss,-1000,0.99")
        region_mask = regrid(region_mask,land,option="remapnn")
        masks.update({monsoon_region : region_mask})
    
    return masks

# For tests
#l,t,n,s=masks("CNRM-CM5")
#ncview(n)

def apply_mask(field,mask_field):
    """ 
    Assumes that mask field has non-zero non-missing values on interesting places, and 
    zero or missing value elsewhere
    Result has missing value except on interesting places, where it has input field value
    """
    return ccdo2_flip(field,mask_field,operator="ifthen")


def union(field1,field2) :
    """ 
    Returns a field which, on each cell, has field1 value, except if missing, 
    in which case field2 value is used
    """
    # Make a mask with f1, changing its zeroes to non-zero, and its missing to 0
    v1=field1.variable
    masq=ccdo_fast(field1,operator="setmisstoc,0 -expr,'mask=(%s!=missval(%s))?1:0;'"%(v1,v1))
    # Apply mask
    rep=ccdo3_flip(field1,field2,masq,operator="ifthenelse")
    return(rep)


def hybrid_field(fields,hybrid_season,all_masks):
    """
    Computes an hybrid field by combining seasonal fields and geographical masks
    
    FIELDS is a dict of fields indexed by season labels
    
    MASKS is a dict of mask fields which have value 1 over the region of interest, and 
    0 elsewhere. They are supposed to be on the same grid as fields in FIELDS
    
    HYBRID_SEASON is a list of pairs (region, season), where 'region' is among keys of 
    dict MASKS, and 'season' is among the season labels of dict FIELDS
    
    However, the case of HYBRID_SEASON = [ ("globe", whatever_season) ] is also handled, 
    without an entry "globe" in MASKS. The returned value is then FIELDS[whatever_season]
    
    """
    region,season=hybrid_season[0]

    if region=="globe" :
        return fields[season]
    #
    rep=apply_mask(fields[season],all_masks[region])
    if len(hybrid_season)>1 :
        for region,season in hybrid_season[1:] :
            rep=union(rep,apply_mask(fields[season],all_masks[region]))
    return rep


def fldmean(option,field):
    if option=="linear" :
        f=ccdo_fast(field,operator="fldmean")
    else : # assume quadratic
        f=ccdo_fast(field,operator="sqrt -fldmean -sqr")
    #print "computing",f
    cfile(f)
    #print "cfile done"
    rep=np.float64(cvalue(f))
    #print "cvalue done"
    # Avoid creating numerous small files
    #if clean_cache : 
    #cdrop(f)
    return(rep)
In [ ]:
def weighted_ensemble_stat(ens,weights,option) :
    """ Compute mean or quantile on an ensemble using scaling weights """
    #
    if option in [ "mean", "ens_mean" , "ensmean"]:
        return np.mean(ens.values())
    #
    elif option in [ "wmean","iwmean" ]:
        if weights is None :
            weights=dict()
            for k in ens : weights[k]=1.
        sum=0 
        wsum=0
        for e in ens : 
            if option in [ "mean" ,"ensmean" ] : weight=1.
            elif option == "wmean" :             weight=weights[e]
            elif option == "iwmean" :            weight=1./weights[e]
            sum+=weight*ens[e]
            wsum+=weight
        return sum/wsum
    #
    elif option in ["min","max","second","butlast" ] :
        l=ens.values()
        l.sort()
        if option=="min"     : return np.float64(l[0])
        if option=="second"  : return np.float64(l[1])
        if option=="butlast" : return np.float64(l[-2])
        if option=="max"     : return np.float64(l[-1])
    #
    elif option in ["nq5","nq95" ] : #percentiles with gaussian hypothesis
        mean=np.mean(ens.values())
        std1=np.std(ens.values(),ddof=1)
        if option == "nq5"   : return mean - 1.645*std1
        if option == "nq95"  : return mean + 1.645*std1
    #
    elif option in ["q5","q95" ] : # empirical percentiles 
        if option == "q5"   : return np.percentile(ens.values(),5.)
        if option == "q95"  : return np.percentile(ens.values(),95.)
    #
    if option == "ens"  :
        rep=ens.copy()
        for model in rep : 
            rep[model] = np.float64(ens[model])
            return rep
        return rep
    #
    elif option in ens : # return a single model, which name is passed with 'option'
        return np.float64(ens[option])
    else:
        raise ValueError("Time to use some library for stats (%s) !"%option)
In [ ]:
def compute_warming_periods() :
    
    for scenario in scenarios :
        mods=models[scenario]["tas"]
        # compute ensemble of warming series along projection period of choosen experiment
        GSAT=global_change("tas","Amon",scenario,proj_period,ref_experiment,ref_period,
                        mods, data_versions,filter_length=2*window_half_size+1)
        for model,realization in mods :
            # Create a list of periods parallel to the list of warming levels, based on 
            # the relation : the warming for the model and the period exceeds the list 
            # level, and doesn't exceed it for earlier periods
            fyear=int(proj_period.split("-")[0]) + window_half_size
            lyear=int(proj_period.split("-")[1]) - window_half_size
            center_years=range(fyear,lyear+1)
            warmings=cMA(GSAT[model]).flatten().data
            if len(center_years) != len(warmings) :
                raise  ValueError("Logic error : len(center_years) != len(warmings) : %s != %s, %s"%\
                                  (len(center_years), len(warmings),center_years))
            periods=dict()
            for wl in warming_levels :
                aperiod=None
                for warming,cyear in zip(warmings,center_years) :
                    if aperiod is None :
                        if warming >= wl and warming < wl + warming_step :
                            aperiod="%d-%d"%(cyear-window_half_size, cyear+window_half_size)
                periods[wl]=aperiod
            feed_dic(warming_periods,periods,scenario,model)
            #print scenario, model 
            #print "\twarming_levels :",warming_levels
            #print "\twarmings :",warmings
            #print "\tcenter_years",center_years
            #print "\tperiods",periods
In [ ]:
def compute_stats(scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list): 
    
    global excluded_models,included_models,cases
    
    # Compute requested time statistics of fields on ref_period and samples, 
    # for all variables on seasons, and for tas mean on a per-annual basis
    #
    projs=dict() # projs[scenario][model][variable][time_stat][period_index][season] : projection values (2d fields)
    refs=dict()  # refs [scenario][model][variable][time_stat][season]                : reference values (2d fields)
    
    if combinations is None :
        cases=[ (seasons,variable,time_stat) for variable,time_stat in variables ] 
    else :
        cases= [ (seasons, variable, time_stat) for variable,time_stat,region in combinations ]
        
    if option == "parametric" and do_compute_tas:
          cases += [ (["ANN"],"tas","mean") ]

    #
    print "\nComputing time-statistics on basic fields, option=%s\n"%option
    for seasonl,variable,time_stat in cases  :
        print variable,time_stat,seasonl,
        #
        for scenario in scenarios :
            print scenario
            table=table_for_var_and_experiment(variable,scenario)
            mods=models[scenario][variable]
            for model,variant in mods :
                print model,
                for season in seasonl :
                    print season,
                    # Values for ref_period
                    print ref_period,
                    val=mean_or_std(ref_experiment,model,variant,season,variable,table,time_stat,ref_period)
                    feed_dic(refs,val,scenario,model,variable,time_stat,season)
                    #
                    for period_index in period_indices :
                        period=index_to_period(scenario,model,period_index)
                        print period,
                        if period is not None :
                            val=mean_or_std(scenario,model,variant,season,variable,table,time_stat,period)
                            feed_dic(projs,val,scenario,model,variable,time_stat,period_index,season)
                print
            print
    csync()
    #
    # Compute aggregates on regions and seasons (for base variables, and global+annual average for tas), 
    # then derive relative (or absolute) change
    #
    changes=dict()
    print "\nComputing individual changes by integrating over regions and hybrid seasons\n"
    for scenario in scenarios :
        print scenario
        if combinations is None :
            cases=[ (hybrid_seasons,variable,time_stat) for variable,time_stat in variables ] 
        else :
            cases=[ ({ region+" "+season : [(region,season)]} ,variable,time_stat) \
                   for variable,time_stat,region in combinations  \
                   for season                    in seasons ] 
        if option=="parametric" and do_compute_tas :
            cases += [ ({"globe_year" : [ ("globe","ANN") ] }, "tas","mean") ]
        print cases
        for hseasons,variable,time_stat in cases  :
            print "\t",variable, time_stat,
            if time_stat=="std"  : 
                opt = "quadratic"
            else : 
                opt = "linear"
            table=table_for_var_and_experiment(variable,scenario)
            #
            if variable not in refs[scenario][model] :
                # It can be the case for "tas", if option !=  parametric 
                continue
            for model,variant in models[scenario][variable] :
                print model,variable,variant,table,
                #print model,
                all_masks = create_masks(model,variable,variant,table)
                #
                ref_fields  = refs [scenario][model][variable][time_stat]               
                #
                for hseason in hseasons  : 
                    #print hseason,
                    #
                    ref = fldmean(opt,hybrid_field(ref_fields,hseasons[hseason],all_masks))
                    ref = ref * scaling.get(variable,default_scaling)
                    feed_dic(changes,ref,variable,time_stat,hseason,-1,ref_experiment,model)
                    #
                    for period_index in period_indices :
                        # Some (model, scenario) may not reach the warming level corresponding to period_index
                        if index_to_period(scenario,model,period_index) is None:
                            continue
                        #
                        proj_fields = projs[scenario][model][variable][time_stat][period_index]
                        pro = fldmean(opt,hybrid_field(proj_fields,hseasons[hseason],all_masks))
                        pro = pro * scaling.get(variable,default_scaling)
                        feed_dic(changes,pro,variable,time_stat,hseason,period_index,scenario,model)
            print
        print "Saving cache index for ",scenario," ...",
        csync()
        #print "done"
        

    # Compute ensemble stats for each case
    stats=dict()
    print "\nComputing ensemble statistics\n"
    for variable in changes :
        for time_stat in changes[variable] :
            for hseason in changes[variable][time_stat] :
                for period_index in changes[variable][time_stat][hseason] :
                    refs = changes[variable][time_stat][hseason][-1][ref_experiment]
                    for ens_stat in stats_list :
                        # Stat on ref value 
                        ref_stat = weighted_ensemble_stat(refs, None,ens_stat)
                    #
                    if option=="parametric" : aperiod=index_to_period("any","any",period_index)
                    else :                    aperiod=period_index
                    #
                    for scenario in changes[variable][time_stat][hseason][period_index] :
                        # Access dict representing the models ensemble
                        ens     = changes   [variable][time_stat][hseason][period_index][scenario]
                        # Compute the relevant statistics on the models ensemble
                        for ens_stat in stats_list :
                            # stat on absolute value
                            res = weighted_ensemble_stat(ens, None,ens_stat)
                            feed_dic(stats,    res,variable,time_stat,hseason,ens_stat,aperiod,scenario,"value") ##########################################################################
                            if period_index == -1 :
                                continue
                            # ensemble stat on change value
                            change_stat = weighted_ensemble_stat( { k: ens[k] - refs[k] for k in ens }, None,ens_stat)
                            feed_dic(stats,change_stat,variable,time_stat,hseason,ens_stat,aperiod,scenario,"change") 
                            
                            # ensemble stat on relative change value
                            rchange_stat= weighted_ensemble_stat({ k: 100 * (ens[k] - refs[k])/refs[k] for k in ens }, None,ens_stat)
                            feed_dic(stats,rchange_stat,variable,time_stat,hseason,ens_stat,aperiod,scenario,"stat_on_rchange") 
                            
                            if ens_stat == "ens" :
                                continue

                            # relative change on ensemble stat value
                            rel_change = 100 * change_stat / ref_stat
                            feed_dic(stats,rel_change,variable,time_stat,hseason,ens_stat,aperiod,scenario,"rchange_on_stat") 
                            #
                        # Record the ensemble size for the period
                        feed_dic(stats,len(ens),variable,"models_number",aperiod,scenario)  
                            
                    for ens_stat in stats_list :
                        if ens_stat=="ens" : continue
                        for period_index in changes[variable][time_stat][hseason] :
                            #print_ensemble_values(variable,stat,warming_index,season,changes[variable][warming_index][season]...,c[stat][period],models)
                            pass
            print
        sync_cvalues()
        print 40*"@"
    #
    # Record metadata for datasets used
    #
    for scenario in models :
        for variable in models[scenario] :
            # Record the metadata of models used in each case
            metadata=dict()
            table=table_for_var_and_experiment(variable,scenario)
            for model,variant in models[scenario][variable]:
                metadata[model]=TSU_metadata([scenario,ref_experiment],[(model,variant)],variable,table,data_versions)
            feed_dic(stats,metadata,variable,"metadata",scenario) ############################################################################################
    #
    return stats,projs,refs,changes

excluded_models = {"pr": ["IITM-ESM"], "P-E": ["IITM-ESM"], "prw": ["IITM-ESM", "CMCC-CM2-HR4", "CMCC-CM2-SR5", "FIO-ESM-2-0"], "mrro": ["CAMS-CSM1-0", "IITM-ESM"]} included_models={} combinations=None variables=[("pr","mean")] models=dict() scenarios=['ssp585'] select_models() models

In [ ]:
def select_models():
    """
    Feed global dict 'models[scenario][variable]' with list of (model,variant) pairs that we will use,
    possibly by enforcing selecting same realization for a variable and "tas"
    """
    #
    global models,scenarios,variables,combinations,excluded_models,included_models
    #
    if combinations is None : 
        all_variables = [ variable for variable,_ in variables ]
    else :
        all_variables = [ variable for variable,_,_ in combinations ]
    #    
    for variable in all_variables :
        excl=excluded_models.get(variable,[])
        incl=included_models.get(variable,None)
        #
        for scenario in scenarios :
            table=table_for_var_and_experiment(variable,scenario)
            if option == "parametric" and do_compute_tas :
                vars_list=[(variable,table),("tas","Amon")]
            else: 
                vars_list=[(variable,table)]
            mods=models_for_experiments_multi_var(data_versions,vars_list,[ref_experiment,scenario],excl,incl)
            #
            feed_dic(models,mods,scenario,variable)
            # dict models is also used for generating metadata about used data -> must append on 'tas' entry
            if do_compute_tas :
                feed_dic(models,mods,scenario,"tas",extend_list=True)
In [ ]:
project="CMIP6"
#
init_trend()
#
data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
#    
# Gather a list of "real" seasons used in requested hybrid seasons

if combinations is None :
    seasons=[]
    for hseason in hybrid_seasons :
        for region,season in hybrid_seasons[hseason]:
            if season not in seasons :
                seasons.append(season)
            
# For handling metadata            
models=dict() # models[scenario][variable] == list of (model,variant) used for a scenario and a variable
select_models()
print models
if option!="parametric":
    compute_warming_periods()
# 
if "ens" not in stats_list :
    stats_list.append("ens")
    
warming_periods    = dict()  # warming_periods[scenario][model] is a list of periods where a model reaches 
                             # the warming levels derived from above parameters
make_period_indices()
In [ ]:
if do_compute :    
    if do_profile :
        import cProfile,io,pstats
        pr = cProfile.Profile()
        pr.enable()
    #    
    stats,projs,refs,changes=compute_stats(scenarios,ref_experiment,ref_period,variables,seasons,period_indices,stats_list)
    #
    if do_profile :
        pr.disable()
        python_version="python2"
        if python_version == "python2":
            s = io.BytesIO()
        else:
            s = io.StringIO()
        ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
        ps.print_stats()
        # Just un-comment next line to get the profile on stdout
        print s.getvalue()
In [ ]:
#variable,time_stat,hseason,ens_stat,aperiod,scenario,"rchange_on_stat"
def write_csv() :
    max_count=2
    count=0
    if True :
        with open(json_file.replace(".json",".csv"),"w") as f :
            if table_ranges   != "empirical" :
                line="variable;time_stat;season;experiment;period;value;Uvalue;change;Uchange;rchange;Urchange;Rchange\n"
                print line, 
                f.write(line)
                line="variable;time statistics;region and season;experiment;period;ensemble mean;uncertainty on ensemble mean;"+\
                    "ensemble mean change;uncertainty on ensemble mean change;ensemble mean of relative changes;"+\
                    "uncertainty on ensemble mean of relative changes;relative change of ensemble means\n"
                print line
                f.write(line)
            else :
                line="variable;time_stat;season;experiment;period;value;p5;p95;change;cp5;cp95;rchange;rp9;rp95;Rchange\n"
                print line, 
                f.write(line)
                line="variable;time statistics;region and season;experiment;period;ensemble mean;percentile 5 on ensemble mean;percentile 95 on ensemble mean;"+\
                    "ensemble mean change;percentile 5 on ensemble mean change; percentile 95 on ensemble mean change; ensemble mean of relative changes;"+\
                    "percentile 5 on ensemble mean of relative changes;percentile 95 on ensemble mean of relative changes; relative change of ensemble means\n"
                f.write(line)
                print line
                
            for variable in stats :
                if variable in ["option", "ref_period" ]: continue
                for time_stat in stats[variable] :
                    if time_stat in ['metadata','models_number' ] : continue
                    for season in stats[variable][time_stat]:            
                        d=stats[variable][time_stat][season] 
                        #print variable,time_stat,season
                        #print d.keys()
                        for period in d["mean"] :
                            for scenario in d["mean"][period] :
                                if (season,variable,time_stat) not in table_cases :
                                    continue
                                if scenario not in table_scenarios : continue
                                if period   not in table_periods : continue
                                dd=d["mean"][period][scenario]
                                try :
                                    value=dd["value"]
                                except :
                                    print variable,season,period,scenario,dd
                                    raise valueError("")
                                change=dd.get("change",0.)
                                rchange=dd.get("stat_on_rchange",0.)
                                Rchange=dd.get("rchange_on_stat",0.)
                                if table_ranges != "empirical" :
                                    try  : 
                                        uu=stats[variable][time_stat][season]["nq5"][period][scenario]
                                        Uvalue  = value   - uu.get("value",0)
                                        Uchange = change  - uu.get("change", 0.)
                                        Urchange= rchange - uu.get("rchange",0.)
                                    except :
                                        Uvalue = 0. 
                                        Uchange = 0.
                                        Urchange = 0.
                                    line="%s;%s;%s;%s;%s;%g;%g;%g;%g;%g;%g;%g\n"\
                                        %(variable,time_stat,season,scenario,period,value,Uvalue,change,Uchange,rchange,Urchange,Rchange)
                                else : 
                                    try  : 
                                        u5 = stats[variable][time_stat][season]["q5" ][period][scenario]
                                        u95= stats[variable][time_stat][season]["q95"][period][scenario]
                                        p5  = u5.get("value",0)
                                        p95 = u95.get("value",0)
                                        cp5  = u5.get("change",0)
                                        cp95 = u95.get("change",0)
                                        rp5  = u5.get("rchange_on_stat",0)
                                        rp95 = u95.get("rchange_on_stat",0)
                                    except :
                                        print stats,variable,time_stat,season,"q5"    
                                        print stats[variable][time_stat][season]["q5" ].keys()
                                        p5 = 0; p95=0.; cp5 = 0; cp95=0.; rp5 = 0; rp95=0.; 
                                    line="%s;%s;%s;%s;%s;%g;%g;%g;%g;%g;%g;%g;%g;%g;%g\n"\
                                        %(variable,time_stat,season,scenario,period,value,p5,p95,change,cp5,cp95,rchange,rp5,rp95,Rchange)
                                f.write(line)
                                count +=1
                                if count < max_count : print line
                
In [ ]:
def write_table(max_count=None) :
    
    count=0
 
    with open(json_file.replace(".json",".txt"),"w") as txt :
        txt.write("Changes in various variables, for various seasons and periods, as averaged over either globe or land\n")
        txt.write("Key :\n")
        txt.write("    - value  : ensemble mean of raw space averaged value\n")
        txt.write("    - change : ensemble mean of change in space averaged value for the period (w.r.t. the period in titles second column )\n")
        txt.write("    - rchng  : ensemble mean of the relative changes\n")
        txt.write("    - Rchng  : relative change of the ensemble means of values\n")
        if table_ranges   == "empirical" :
            txt.write("First three values also show an uncertainty range (min/max), based on empirical percentiles 5-95%\n")
            float_format="%-29s "
            ratio_format="%-14s "
            period_format="| %-88s"
        else:
            txt.write("First three values also show an uncertainty figure beween parenthesis, based on percentiles 5-95%\n")
            float_format="%-26s "
            ratio_format="%-13s "
            period_format="| %-67s"
        
        
        # txt.write mean of values and changes (for three change types)
        for variable,hseason,time_stat in table_cases :
            count +=1 
            if max_count is not None and count > max_count : continue
            txt.write(" \n")
            #print variable, time_stat
            #print stats[variable][time_stat][hseason].keys()
            d=stats[variable][time_stat][hseason]["mean"]
            ref=d[ref_period][ref_experiment]
            #
            txt.write("%s %s time_stat=%s\n"%(variable,hseason,time_stat))
            periods=d.keys()
            periods.sort()
            if ref_period in periods : periods.remove(ref_period)
            
            # Column titles
            txt.write(17*" ")
            
            for p in periods : txt.write(period_format%p)
            txt.write("\n")
            txt.write("%-6s"%"SSP ")
            txt.write(" %-9s "%ref_period)
            for period in d :
                if period==ref_period : continue
                txt.write("|  ")
                txt.write(float_format%"value")
                txt.write(float_format%"change")
                txt.write(ratio_format%" rchng (%)")
                txt.write("% 11s "%"Rchng")
            txt.write("\n")
            
             # Values
            for scenario in scenarios :
                txt.write("%6s "%scenario)
                d=stats[variable][time_stat][hseason]
                txt.write("{0:> 8.2e} ".format(ref["value"]))
                for period in d["mean"] :
                    if period==ref_period : continue
                    if scenario not in table_scenarios : continue
                    if period   not in table_periods : continue
                    #
                    #print d["mean"]
                    dd =d["mean"][period][scenario]
                    txt.write("| ")
                    if table_ranges   == "empirical" :
                        u5 =d["q5"]  [period][scenario]
                        u95=d["q95"] [period][scenario]
                        txt.write( "{0:> 8.2e} ".format(dd["value"]))
                        txt.write( "{0:> 8.2e}/{1:> 8.2e} ".format(u5["value"],u95["value"]))
                        txt.write( "{0:> 8.2e} ".format(dd["change"]))
                        txt.write( "{0:> 8.2e}/{1:> 8.2e} ".format(u5["change"],u95["change"]))
                        txt.write( "{0:> 6.1f} ".format(dd["rchange_on_stat"]))
                        txt.write( "{0:> 6.1f}/{1:> 6.1f} ".format(u5["rchange_on_stat"],u95["rchange_on_stat"]))

                    else:
                        rng=d["nq95"][period][scenario]
                        txt.write("{0:> 8.2e} ".format(dd["value"]))
                        txt.write("(+/-{0:> 7.1e}) ".format(rng["value"]-dd["value"]))
                        txt.write("{0:> 8.2e} ".format(dd["change"]))
                        txt.write("(+/-{0:> 7.1e}) ".format(rng["change"]-dd["change"]))
                        txt.write("{0:> 6.1f} ".format(dd["rchange_on_stat"]))
                        txt.write("(+/-{0:> 6.1f}) ".format(rng["rchange_on_stat"]-dd["rchange_on_stat"]))
                    txt.write("{0:> 6.1f} ".format(dd["stat_on_rchange"]))
                txt.write("\n")
            
    
In [ ]:
def write_metadata(max_count=None) :    
    count=0
    with open(filebase.replace(".json","_md.txt"),"w") as meta :
        for variable in stats :
            if variable in ["option", "ref_period" ]: continue
            mdata=stats[variable]["metadata"]
            for scenario in mdata :
                for model in mdata[scenario]:
                    lines=mdata[scenario][model]
                    meta.write(lines)
                    if max_count is None or count < max_count :
                        print lines,
                        count +=1
 
In [ ]:
filebase = outdir+"/stats_%s_%s%s.json"%(ref_period,data_versions_tag,version)
#
if do_compute :
    if not os.path.exists(outdir):  os.makedirs(outdir)

    stats["ref_period"] = ref_period
    stats["option"]     = option
    #
    with open (filebase,"w") as f :
            json.dump(stats,f,separators=(',', ': '),indent=3)
    csync()
In [ ]:
if create_table : 
    
    if input_dir is not None :
        indir = input_dir
    else: 
        indir = outdir
    json_file = indir+"/stats_%s_%s%s.json"%(ref_period,data_versions_tag,version)

    with open(json_file,"r") as f :
        stats=json.load(f)
    
    cases=set()
    scenarios=set()
    periods=set()
    for variable in stats :
        if variable in ["option", "ref_period" ]: continue
        for time_stat in stats[variable]:
            if time_stat in ['metadata','models_number','ens' ] : continue
            for season in stats[variable][time_stat] :
                cases.add((variable,season,time_stat))
                for ens_stat in stats[variable][time_stat][season]:
                    for period in stats[variable][time_stat][season][ens_stat]:
                        periods.add(period)
                        for scenario in stats[variable][time_stat][season][ens_stat][period] :
                            if scenario != ref_experiment :
                                scenarios.add(scenario)
    scenarios=list(scenarios)
    scenarios.sort()
    print scenarios
    cases=list(cases)
    cases.sort()
    print cases
    periods.remove(ref_period)
    periods=list(periods)
    periods.sort()
    

    if table_cases == None      : table_cases    = cases
    if table_periods == None    : table_periods  = periods
    if table_scenarios == None  : table_scenarios= scenarios
    

    write_table()
    write_csv()
    write_metadata()