S.Sénési for Météo-France - sept 2019 to march 2021
data_versions_tag = "20210201_derived"
data_versions_dir = "/home/ssenesi/CAMMAC/select_data_versions"
from IPython.core.display import display, HTML, Image
display(HTML("<style>.container { width:100% !important; }</style>"))
import requests # use pip or conda to install it if needed
import json
import sys
from CAMMAClib.ancillary import feed_dic
from CAMMAClib.mips_et_al import institute_for_model, mip_for_experiment,\
models_for_experiments,read_versions_dictionnary, prefered_variant
def errata(dataset_drs,base_url="https://errata.es-doc.org/1/"):
"""
Query the errata service for erratas on a dataset DRS, such as
>>> dataset_drs="CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912"
and returns a list of pairs (severity, description) for relevant erratas
"""
erratas=[]
resolve_url=base_url+"resolve/simple-pid?datasets="+dataset_drs
r=requests.get(resolve_url)
#print resolve_url
try :
r=r.json()
except ValueError :
print "\nNo Json object for "+dataset_drs
return None
if 'errorCode' not in r :
for handle in r :
#print handle
#print r[handle]['errataIds']
#print type(r[handle]['errataIds'])
l=r[handle]['errataIds']
if type(l) != type([]) :
l=eval(l)
for uid in l :
#print uid
e=requests.get(base_url+"issue/retrieve?uid="+uid).json()['issue']
erratas.append((e['severity'],e['description']))
else :
#print "No entry for "+ds
return None
return erratas
errata('CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912')
errata('CMIP6.CMIP.THU.CIESM.historical.r1i1p1f1.Amon.pr.gr.v20200417')
errata('CMIP6.ScenarioMIP.THU.CIESM.ssp585.r1i1p1f1.Amon.tas.gr.v20200417')
def analyze_erratas(dic,experiments=None,variables=None, max_count=None, do_print=True, print_expids=False ) :
"""
Use a data versions dictionnary such as produced by notebook select_data_versions and
query the ESGF errata service for all corresponding datasets
Returns a dictionnary of expid DRS with an errata, grouped that way :
>>> d[variable][severity][errata_description] = [ ... set of expid DRS ...]
Arg dic is a data versions dictionnary organized that way :
data_versions[expid][variable][table][model][variant]=(grid,version,data_period)
Arg variable allows to restrict the analysis to those metadata lines which are for a given variable
Arg max_count allows to restrict the number of processed cases
"""
errata_base_url="https://errata.es-doc.org/1/"
count=0
berrata2models=dict()
already_done=[]
if experiments is None :
experiments=dic.keys()
for experiment in experiments :
print experiment,
if variables is None :
variables=dic[experiment].keys()
#print variables
for variable in variables :
print variable,
for table in dic[experiment][variable] :
for model in dic[experiment][variable][table] :
variants=set(dic[experiment][variable][table][model].keys())
variant=prefered_variant(variants,"",model)
if variant is None :
raise ValueError("Issue with prefered variant for %s %s %s %s"%(experiment,variable,model,variants) )
grid,version,data_period = dic[experiment][variable][table][model][variant]
#CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.piControl
expid="CMIP6.%s.%s.%s.%s"%(mip_for_experiment(experiment),institute_for_model(model),model,experiment)
nuple=(expid,variant,table,variable,grid,version)
if nuple not in already_done :
already_done.append(nuple)
#print "processing ",nuple
drs="%s.%s.%s.%s.%s.%s"%(expid,variant,table,variable,grid,version)
#print drs
#continue
count +=1
err_list=errata(drs,errata_base_url)
#print err_list
if err_list is not None :
if variable not in berrata2models :
berrata2models[variable]=dict()
for severity,description in err_list :
if severity not in berrata2models[variable]:
berrata2models[variable][severity]=dict()
if description not in berrata2models[variable][severity] :
berrata2models[variable][severity][description]=set()
expid_short=expid.split(".")[3]+"."+expid.split(".")[4]
berrata2models[variable][severity][description].add(expid_short)
if max_count is not None and count > max_count :
break
print
print
for variable in berrata2models :
for severity in berrata2models[variable] :
for description in berrata2models[variable][severity] :
expids=berrata2models[variable][severity][description]
berrata2models[variable][severity][description]=list(expids)
print "%d distinct cases scrutinized"%len(already_done)
if do_print :
print_errata2models(berrata2models,print_expids)
#
from datetime import datetime
berrata2models["Errata service query date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
berrata2models["Errata service query url"] = errata_base_url
#
return berrata2models
def print_errata2models(berrata2models,print_expids=True,severities=["high","medium"]):
for variable in berrata2models :
if "Errata" in variable : continue
print "\nvariable",variable
for severity in berrata2models[variable] :
if severity not in severities : continue
print "\n\tseverity",severity #, berrata2models[variable][severity]
for description in berrata2models[variable][severity] :
expids=berrata2models[variable][severity][description]
if print_expids :
print "\n\t\t",description,expids
else:
print "\n\t\t",description,len(expids)
data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
a=analyze_erratas(data_versions,experiments=["historical"],variables=["pr"],do_print=False)#,max_count=1)
print_errata2models(a,print_expids=True,severities=["medium","high"])
allvars=dict()
for variable in [u'pr', u'tas', u'mrro', u'evspsbl', u'mrso', u'P-E', u'prw', u'mrsos', u'sos'] :
allvars[variable]=analyze_erratas(data_versions,do_print=False,variables=[variable])
print_errata2models(allvars[variable],print_expids=True,severities=["medium","high"])
jsfile="all_erratas_%s.json"%data_versions_tag
#a["files"]=l
a["doc"]="list_of_model.experiment[variable][severity][description]"
with open(jsfile,"w") as f :
json.dump(a,f,separators=(',', ': '),indent=3,ensure_ascii=True)