CAMMAC https://cammac.readthedocs.io

S.Sénési for Météo-France - sept 2019 to march 2021

Compute and/or plot ensemble statistics of relative changes

  • for mean and/or inter-annual standard deviation of basin-averaged variables,
  • over a list of basins and
  • for a time_slicing of a projection period, and
  • for a series of scenarios

Results are json files (in a directory indicated below by variable 'outdir') hosting dictionnaries organized as

  • changes[variable][time_stat][basin][ensemble_stat][slice] = stat_value

 and named

  • changes_scenario_dataVersion_version.json

and a figure, which can either show 3 basins 2 variables or up to 9 basins 1 variable, and named e.g.

  • rate_of_change_per_basin_...dataVersion_version.png

Parameters stand in first cell, are either commented here or in the doc (see above)

Default settings (some may be overriden by Papermill - this would show in next cell in the execution output notebook)

In [ ]:
import os
do_test               = True

# This script has two phases, which can be activated separately
do_compute            = True
do_plot               = True

# Version number will be a suffix for data and figure filenames. Use e.g. "_V1" for legibility
version               = ""
plot_version          = ""
figure_name           = "Fig8-27"

# Model data versions stuff
#############################

data_versions_dir    = os.getenv("CAMMAC")+"/data"
# All models listed through next parameters as providing data for a scenario should be included :
data_versions_tag    = "20200918"
excluded_models      = [] 
included_models      = None       # Can be a list that limits models used

# Compute parameters
##########################

# Basins for changes computation. Value 'land' is also recognized
compute_basins      = [ "Amazon" , "Lena", "Yangtze" ,"Mississippi", "Danube", "Niger" ]
#compute_basins        = [ "Amazon" , "Lena", "Yangtze"  ]

# triplet lists of  [variable, table, time statistics] of interest 
variables            = [ [ "mrro","Lmon","mean" ], [ "mrro","Lmon","std" ] ]

# List of ensemble statistics computed for each variable 
stats_list          = [ "median", "mean","nq5","nq95","nq25","nq75","ens"]

# Computing changes implies defining a refereece period
ref_experiment      = "historical"
ref_period          = "1850-1900" 

# Define time slices for projection, for a list of projection epxeriments. 
# May include years belonging to ref_experiment duration, but scenario's begin 
# must match ref_experiment's end. 
scenarios          = [ "ssp585", "ssp245","ssp126" ]
periods_length     = 20
start_year         = 1901
last_year          = 2081  # i.e. last period's begin
step               = 10  # Not necessarily equal to periods_length !
#
# The directory holding output (json) files and figures
outdir             = "./figures"

# Additional parameters, for plot
###################################

# The plot script is tuned for one or two variables only
plot_variables      = [ ["mrro","Lmon","mean"], ["mrro","Lmon","std"] ]

plot_variable_label = "runoff"
plot_name1          = "mean"
plot_name2          = "variability"
# Basins to plot. Must be a sublist of compute_basins. 
# If basins number > 3, plot_variables should have only one entry, and we can have up to 9 basins
# Otherwise, it should have two entries, for a 2 columns/vars * 3 rows/basins plot
plot_basins         = ["Amazon","Yangtze", "Lena"]
#plot_basins=["Mississippi", "Danube", "Niger"]

# Curves to plot, in right order. Must be a sublist of 'stats_list' 
plot_stats          = ["nq5","mean","nq95"]
plot_stats_label    = "ensemble mean, 5 and 95 percentiles"

# Parameters for periods to plot. Must be consistent with periods above. Can be a subset
plot_start_year     = 1901
plot_last_year      = 2081
plot_step           = 10

image_format        = "png"

# Dict of y-axis range for various basins
minmax= { "Amazon" : (-45,30),"Lena" : (-10,70),"Yangtze" : (-35,40),"Mississippi" : (-30,45),
          "Danube" : (-35,40),"Niger" : (-70,160),"Euphrates" : (-120,80),"Indus" : (-45,110),
          "Nile" : (-100,180), "Parana":(-40,70,), "Mackenzie":(-10,50), "Amu-Darya":(-50,80), 
          "Murray" : (-70,120),}

ch=dict(compute=True,house_keeping=False)

# Stable parameters
#######################

# We use CTRIP-V2 data for basins. 
# The only constraint on basins data is to be provided as a NetCDF file with as single 
# field having a distinct integer value for each basin; One must also provide here below 
# some mapping of integers to basin names for interesting basins 
basins_file        = os.getenv("CAMMAC")+"/data/basins/num_bas_ctrip.nc"
# See colocated file rivnum05_new2.txt for a full table of basin numbers
# Entry "land" is necessary if wishing to compute integration over land 
basins_key         = {"land": -999, "Yangtze":11 , "Lena":8, "Amazon":1, "Mississippi":3 , 
                      "Danube":29, "Niger":9, "Euphrates":31, "Indus":20, "Nile":4, "Parana":5 , 
                      "Amu-Darya": 45, "Mackenzie":12, "Lake Eyre":15, "Murray":17}
In [ ]:
do_other_test = False
# for tests :
if do_other_test :
    do_plot=False
    periods_length=3
    start_year=2016
    last_year=2020
    version="_Vtest"
    compute_basins=[ "Amazon" ]
    variables=[ ["mrro","Lmon","mean"], ["mrro","Lmon","std"] ]
    scenarios=[ "ssp585" ]
    stats_list=[ "median" ]
    ch=dict(compute=True,house_keeping=False)
    included_models     = ["CNRM-CM6-1","IPSL-CM6A-LR"]
In [ ]:
# for tests :
if do_test :
    do_compute     = False
    do_plot        = True
    plot_variables = [["mrro","Lmon","mean"]]
    plot_basins    = ["Amazon","Yangtze", "Lena", "Mississippi", "Danube" , "Niger"]
In [ ]:
# These two commands have no effects when run outside a notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
In [ ]:
import json
import sys
import os, os.path
import subprocess
In [ ]:
from climaf.api import *
climaf.cache.stamping=False
climaf.driver.dig_hard_into_cache=False
In [ ]:
from CAMMAClib.ancillary   import prettier_label, feed_dic
from CAMMAClib.mips_et_al  import read_versions_dictionnary, TSU_metadata
from CAMMAClib.changes     import stats_of_basins_changes

# Load some user settings, if available
settings_file=os.getenv("CAMMAC_USER_PYTHON_CODE_DIR",".")+'/cammac_user_settings.py'
if os.path.exists(settings_file) :
    exec(compile(open(settings_file).read(),settings_file,'exec'))
In [ ]:
def init_slices(start_year,last_year,periods_length,step):
    slices=[] ; current=start_year
    while current <= last_year : 
        slices.append("%s-%s"%(current,current+periods_length-1))
        current+=step
    return slices
In [ ]:
if do_compute :

    # Read dictionnary of data versions 
    data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
    metadata=""

    # Init time periods
    slices=init_slices(start_year,last_year,periods_length,step)

    basins_data={"basins":compute_basins, "basins_file":basins_file,"basins_key":basins_key}
    basins_data_globe={"basins":["globe"], "basins_file":"","basins_key":{}}
    #
    import os.path
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    #
    model_changes=dict()
    metadata=""
    for scenario in scenarios :
        stats_all_vars=dict()
        tas_models=set()
        for variable,table,time_stat in variables :
            #print scenario, variable, table, time_stat
            changes,models=stats_of_basins_changes(model_changes, ref_experiment, scenario, ref_period,variable, table, time_stat,
                                               data_versions,slices,stats_list,basins_data, included_models=included_models,
                                               excluded_models=excluded_models, must_have_vars=[("tas","Amon")],**ch)
            feed_dic(stats_all_vars,changes,variable,time_stat)
            metadata+=TSU_metadata([scenario,ref_experiment],models,variable,table,data_versions)
            tas_models=tas_models.union(models)
            print "tas_models=",tas_models
        # Next for tas. We use the union of per-variable models list 
        tas_models = [ model for model,real in tas_models ]
        tas_changes,models=stats_of_basins_changes(model_changes, ref_experiment, scenario, ref_period,"tas","Amon", "mean",
                                    data_versions,slices,["mean","ens"],basins_data_globe, included_models=tas_models,
                                    relative=False, **ch)
        feed_dic(stats_all_vars,tas_changes,"tas","mean")
        metadata+=TSU_metadata([scenario,ref_experiment],models,"tas","Amon",data_versions)
        # Write all variables results
        with open (outdir+"/changes_allvars_%s_%s%s.json"%(scenario,data_versions_tag,version),"w") as f :
            json.dump(stats_all_vars,f,separators=(',', ': '),indent=3)
    #
    with open("%s/%s%s_md.txt"%(outdir,figure_name,version),"w") as f: f.write(metadata)
            

Create a NetCDF file for Ncl plot script

In [ ]:
if do_plot :
    
    import Nio

    # Init plot time periods
    plot_slices=init_slices(plot_start_year,plot_last_year,periods_length,plot_step)

    stats=dict()
    nmodels=dict()
    #
    for scenario in scenarios :
        try :
            filename="%s/changes_allvars_%s_%s%s.json"%(outdir,scenario,data_versions_tag,version)
            with open (filename,"r") as f :
                stats[scenario]=json.load(f)
        except:
            raise ValueError("No cached data for scenario %s. Try setting 'do_compute=True'\n%s"%(scenario,filename))
        var_one,table_one,time_stat_one=plot_variables[0]
        nmodels[scenario]=len(stats[scenario][var_one][time_stat_one][plot_basins[0]]["ens"][plot_slices[0]])
    #
    fn="change_rate_basins_data.nc"
    !rm -f {fn}
    f=Nio.open_file(fn,"c")
    f.create_dimension('ssp'   ,len(scenarios))
    f.create_dimension('stat'  ,len(plot_stats))
    f.create_dimension('basin' ,len(plot_basins))
    f.create_dimension('period',len(plot_slices))
    #
    #f.create_variable('nmodels','i',('ssp'))
    #f.variables['nmodels'][:] = [ nmodels[s] for s in scenarios ]
    #
    f.create_variable('tas','d',('ssp','period'))
    tas_mean=[[stats[scenario]["tas"]["mean"]["globe"]["mean"][p] 
               for p in plot_slices ] 
              for scenario in scenarios ] 
    #print tas_mean
    f.variables['tas'][:] = tas_mean
    #
    for variable,table,time_stat in plot_variables :
        var_stat=variable+"_"+time_stat
        f.create_variable(var_stat,'d',('ssp','basin','stat','period'))
        f.variables[var_stat][:] = [[[[ stats[scenario][variable][time_stat][basin][stat][p] 
                                       for p in plot_slices]
                                      for stat in plot_stats]
                                     for basin in plot_basins ] 
                                    for scenario in scenarios ] 
        # Store number of models for each var+stat and scenario
        #var_stat_nb=var_stat+"_nb"
        #f.create_variable(var_stat_nb,'d',('ssp','stat'))
        #f.variables[var_stat_nb][:] = [[ len(stats[scenario][variable][time_stat][plot_basins[0]["ens"][plot_slices[0]]])
        #                                  for stat in plot_stats]
        #                                for scenario in scenarios ] 
    #
    f.close()
    

Launch Ncl script for plot

All parameters for the plot are provided through Ncl command-line arguments, that can be fine-tuned here.

In [ ]:
if do_plot :
    # Command below is needed on Ciclad due to (uncomplete ?) Nio install in Jerome's conda
    #  env which has an adverse impact on launched command environment for Ncl execution
    if "NCARG_NCARG" in os.environ :
        os.environ.pop("NCARG_NCARG")

    def ncl_strings_tab(it) :
        # Create a tab of strings from it, in Ncl syntax ; e.g. : (/"aa","bb"/)
        tab="(/"
        for val in it[0:-1] : tab+='"%s", '%val
        tab+='"%s"/)'%it[-1]
        return tab
    #
    figfile="rate_of_change_per_basin_vs_%s_%s%s"%(ref_period,data_versions_tag,plot_version)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    #
    ncl_basins=ncl_strings_tab(plot_basins)
    ncl_vars=ncl_strings_tab([ "%s_%s"%(var,stat) for var,table,stat in variables ])
    v,t,s=variables[0]
    ncl_var="%s_%s"%(v,s)
    pl=[ "%s - n=%d"%(prettier_label.get(e,e),nmodels[e]) for e in scenarios ]
    ncl_experiment_labels=ncl_strings_tab(pl)
    #
    nb_models=29
    end=last_year+periods_length-1
    #
    if len(plot_basins) <= 3 :
        plot_script = "change_rate_basins_2vars.ncl"
        connector   = " and"
    elif len(plot_basins) <= 9  :
        plot_script    = "change_rate_basins_1var.ncl"
        connector      = ""
        plot_name2     = ""
    else : 
        raise ValueError("Cannot plot more than 9 basins ")
    ncl_script=CAMMAC+"/notebooks/"+plot_script
    #
    # Plot figure
    # Build a  command whcih provides arguments for the union of arguments of the two plot script (e.g. 'names' and 'name' )
    command="ncl -Q %s"%ncl_script +\
             " ' input_file = \"%s\"'"       %fn +\
             " ' figfile    = \"%s/%s\"'"       %(outdir,figfile) +\
             " ' names = (/\"%s\",\"%s\"/)'" %(plot_name1,plot_name2) +\
             " ' name  = \"%s\"'" %(plot_name1) +\
             " ' title = \"Rate of change in basin-scale %s %s%s %s\"'"%(plot_variable_label,plot_name1,connector,plot_name2) +\
             " ' xtitle = \"Warming above %s, from %s to %s\"'"%(ref_period,start_year,end) +\
             " ' ytitle = \"Change vs %s (%%)"%(ref_period)+" %s\"'"%plot_stats_label +\
             " ' vars = %s'"%ncl_vars+\
             " ' var  = \"%s\"'"%ncl_var+\
             " ' basins = %s'"%ncl_basins+\
             " ' experiments_labels = %s'"%(ncl_experiment_labels)+\
             " ' xmin = 0.0'"+\
             " ' xmax = 5.05'"+\
             " ' type=\"%s\"'"%image_format
    #
    #" ' ytitle = \"Change in basin-averaged %s%s %s of %s, vs %s "%(plot_name1,connector,plot_name2,plot_variable_label,ref_period)+\
    #"(%%) ~Z75~~C~(%s models %s)\"'"%(nb_models,plot_stats_label) +\
    #
    if len(plot_basins) > 3 :
        yminmax="(/"
        for basin in plot_basins :
            yminmax+="(/%g,%g/),"%(minmax[basin][0],minmax[basin][1])
        yminmax=yminmax[:len(yminmax)-1]
        yminmax+="/)"
        #
        command +=" ' yminmax = %s'"%yminmax
    #
    print "command=",command
    out=subprocess.check_output(command,shell=True)
    if "OK" not in out[-3:] :
        print out
        raise ValueError("Issue executing Ncl plot script : \n"+out)
    else :
        os.system("cd %s ; ln -sf %s.png %s%s.png"%(outdir,figfile,figure_name,version))


# For Zhang figure scale
#'yminmax=(/(/(/-10,10/),(/-30.,30./)/),(/(/-10,5/),(/-20.,30./)/),(/(/0,35/),(/-10.,60./)/)  /)'
#'xmax=3.05'\
In [ ]: