Source code for esgf_query

"""
Two functions for querying the ESGF errata service and summarizing its results
"""

from __future__  import division, print_function , unicode_literals, absolute_import

import requests  # use pip or conda to install it if needed
import json
from datetime import datetime

[docs]def query_errata_service(dataset_drs,base_url="https://errata.es-doc.org/1/"):

    """
    Query the errata service for erratas on a dataset DRS, such as
    >>> dataset_drs="CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912"
    and returns a list of pairs (severity, description) for relevant erratas
    """

    erratas=[]    
    resolve_url=base_url+"resolve/simple-pid?datasets="+dataset_drs
    r=requests.get(resolve_url)
    try :
        r=r.json()
    except ValueError :
        print("\nNo Json object for "+dataset_drs)
        return None
    if 'errorCode' not in r :
        for handle in r :
            l=r[handle]['errataIds']
            if type(l) != type([]) :
                l=eval(l)
            for uid in l :
                e=requests.get(base_url+"issue/retrieve?uid="+uid).json()['issue']
                erratas.append((e['severity'],e['description']))
    else :
        return None
    return erratas

[docs]def analyze_erratas(fn,max_count=None, do_print=True, panel=None, variable=None) :

    """
    Reads the kind of AR6 metadata file which describes data used for a figure (or figure panel) and
    query the ESGF errata service for all corresponding datasets
    
    Returns a dictionnary of experiment DRS with an errata, grouped that way :
    
    >>>  d[variable][severity][errata_description] = [ ... list of experiment DRS ...]
    
    Arg count allows to limit the number of requests to the errata service
    Arg variable allows to restrict the analysis to those metadata lines which are for a given variable
    Arg panel allows to restrict the analysis to those metadata lines which have a given panel label
    
    Example of a metadata file line: 
       CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.piControl   none   r1i1p1f1 Amon         pr  gn v20190710 ssp126 a 

    Note : sub-experiment ids are not (yet) handled
      
    """
    
    errata_base_url="https://errata.es-doc.org/1/"
    with open(fn) as fic : 
        lines=fic.readlines()
    count=0
    berrata2models=dict()
    variables=[]
    for line in lines :
        count+=1
        fields=line.split()
        if len(fields) >= 7 and fields[1]=='none' and (max_count==None or count <= max_count) :
            print(".",end='')
            expid=fields[0]
            variant=fields[2]
            table=fields[3]
            rvariable=fields[4]
            grid=fields[5]
            version=fields[6]
            if len(fields)>7:
                rpanel=fields[7]
            else: 
                rpanel=False
            if (panel is None or panel==rpanel) and (variable is None or variable==rvariable):
                drs="%s.%s.%s.%s.%s.%s"%(expid,variant,table,rvariable,grid,version)
                err_list=query_errata_service(drs,errata_base_url)
                if err_list is not None :
                    if rvariable not in berrata2models :
                        berrata2models[rvariable]=dict()
                    for severity,description in err_list :
                        if severity not in berrata2models[rvariable]:
                            berrata2models[rvariable][severity]=dict()
                        if description not in berrata2models[rvariable][severity] :
                            berrata2models[rvariable][severity][description]=set()
                        berrata2models[rvariable][severity][description].add(expid)
    print
    if do_print :
        for variable in berrata2models :
            print("variable",variable)
            for severity in berrata2models[variable] :
                print("\tseverity",severity)
                for description in berrata2models[variable][severity] :
                    print("\t\t",description,berrata2models[variable][severity][description])
    #
    berrata2models["Errata service query date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    berrata2models["Errata service query url"]  = errata_base_url
    #
    return berrata2models