diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 23b791149c..1a877d2155 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -283,7 +283,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol | CERES-SYN1deg | rlds, rldscs, rlus, rluscs, rlut, rlutcs, rsds, rsdscs, rsus, rsuscs, rsut, rsutcs (3hr) | 3 | NCL | | | rlds, rldscs, rlus, rlut, rlutcs, rsds, rsdt, rsus, rsut, rsutcs (Amon) | | | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| CLARA-AVHRR | clt, clivi, clwvi, lwp (Amon) | 3 | NCL | +| CLARA-AVHRR | clt, clivi, clwvi, lwp (Amon, CFday) | 3 | Python | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | CLOUDSAT-L2 | clw, clivi, clwvi, lwp (Amon) | 3 | NCL | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml new file mode 100644 index 0000000000..a5df06faf2 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml @@ -0,0 +1,56 @@ +# Common global attributes for Cmorizer output +attributes: + dataset_id: CLARA-AVHRR + version: A3 + tier: 2 + type: sat + project_id: OBS6 + source: "https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties" + reference: "clara-a3" + comment: "" + +variables: + # monthly means + clivi_month: + mip: Amon + short_name: clivi + raw: iwp_allsky + filename: IWPmm{year}*.nc + clt_month: + mip: Amon + short_name: clt + raw: cfc + filename: CFCmm{year}*.nc + lwp_month: + mip: Amon + short_name: lwp + raw: lwp_allsky + filename: LWPmm{year}*.nc + clwvi_month: + mip: Amon + short_name: clwvi + raw: ['lwp_allsky', 'iwp_allsky'] + filename: ['LWPmm{year}*.nc', 'IWPmm{year}*.nc'] + operator: sum + # daily means + clivi_day: + mip: CFday + short_name: clivi + raw: iwp_allsky + filename: IWPdm{year}{month}*.nc + clt_day: + mip: CFday + short_name: clt + raw: cfc + filename: CFCdm{year}{month}*.nc + lwp_day: + mip: CFday + short_name: lwp + raw: lwp_allsky + filename: LWPdm{year}{month}*.nc + clwvi_day: + mip: CFday + short_name: clwvi + raw: ['lwp_allsky', 'iwp_allsky'] + filename: ['LWPdm{year}{month}*.nc', 'IWPdm{year}{month}*.nc'] + operator: sum diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 488a3ac452..1791f18785 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -233,28 +233,27 @@ datasets: CLARA-AVHRR: tier: 3 - source: https://wui.cmsaf.eu/ - last_access: 2021-03-22 + source: https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties + last_access: 2026-05-12 info: | Download and processing instructions - 1) Create ("register") an user account at - https://wui.cmsaf.eu/safira/action/viewLogin?menuName=NUTZER_HOME - 2) login (same URL as above) - 3) Search data using search form at - https://wui.cmsaf.eu/safira/action/viewProduktHome?menuName=PRODUKT_HOME - - Product group: Climate Data Records - - Product family: CLARA-A ed. 2.1 - - Product name: CFC - Factional cloud cover - IWP - Ice water path - LWP - Liquid water path - - Area: Global - - Temporal resolution: Monthly - 4) Select "CLARA-A ed. 2.1 AVHRR on polar orbiting satellites" from - list of results. - 5) Click on "Add to order cart" - 6) Follow download instructions in automatic email received when data - are ready for download. - 7) Untar all .tar files into a single directory. + https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties?tab=download + Put all daily files for one month (mm) of one year (yyyy) under a single + directory "daily/", monthly files for one year (yyyy) under "monthly/". + Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI + Select the following from the CDS: + Product family: CLARA-A3 + Origin: EUMETSAT + Variable: Cloud fraction, Cloud physical properties of the ice/liquid phase + Climate data record type: TCDR + Time aggregation: Daily mean / Monthly mean + Year: select all + Month: select all + Day: select all + Geographical area: Whole available region + Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only available until 2020. + Alternatively, CLARA-AVHRR data could also be obtained from the EUMETSAT data store + (https://data.eumetsat.int/). CLOUDSAT-L2: tier: 3 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py new file mode 100644 index 0000000000..afb995637e --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -0,0 +1,181 @@ +"""Script to download CM SAF CLARA-AHRR data from the CDS.""" + +import datetime +import gzip +import logging +import shutil +import zipfile +from pathlib import Path + +import cdsapi + +logger = logging.getLogger(__name__) + + +def download_dataset( + original_data_dir, + dataset, + dataset_info, + start_date, + end_date, + overwrite, +): + """Download dataset. + + Parameters + ---------- + original_data_dir : Path + Directory where original data will be stored. + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + cds_url = "https://cds.climate.copernicus.eu/api" + + raw_obs_dir = original_data_dir + output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset + output_folder.mkdir(parents=True, exist_ok=True) + + # Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only + # available until 2020. Alternatively, CLARA-AVHRR data could also + # be obtained from the EUMETSAT data store + # (https://data.eumetsat.int/). + + if start_date is None: + start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) + start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) + else: + start_date_mm = start_date + start_date_dd = start_date + + if end_date is None: + end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + else: + end_date_mm = end_date + end_date_dd = end_date + + requests = {} + + # The CDS requests for daily values are done for each month separately + # to avoid the error "cost limits exceeded". + + for year in range(start_date_mm.year, end_date_mm.year + 1): + requests.update( + { + "clivi_monthly_" + str(year): { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_ice_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "monthly_mean", + "year": str(year), + "month": [f"{m:02d}" for m in range(1, 13)], + }, + "clt_monthly_" + str(year): { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_fraction", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "monthly_mean", + "year": str(year), + "month": [f"{m:02d}" for m in range(1, 13)], + }, + "lwp_monthly_" + str(year): { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_liquid_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "monthly_mean", + "year": str(year), + "month": [f"{m:02d}" for m in range(1, 13)], + }, + }, + ) + + for year in range(start_date_dd.year, end_date_dd.year + 1): + for month in range(1, 13): + requests.update( + { + "clivi_daily_" + str(year) + f"{month:02d}": { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_ice_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "daily_mean", + "year": str(year), + "month": f"{month:02d}", + "day": [f"{m:02d}" for m in range(1, 32)], + }, + "clt_daily_" + str(year) + f"{month:02d}": { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_fraction", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "daily_mean", + "year": str(year), + "month": f"{month:02d}", + "day": [f"{m:02d}" for m in range(1, 32)], + }, + "lwp_daily_" + str(year) + f"{month:02d}": { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_liquid_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "daily_mean", + "year": str(year), + "month": f"{month:02d}", + "day": [f"{m:02d}" for m in range(1, 32)], + }, + }, + ) + + cds_client = cdsapi.Client(cds_url) + + for var_name, request in requests.items(): + datestr = var_name.split("_")[2] + if "daily" in var_name: + outdir = output_folder / f"daily/{datestr}/" + else: + outdir = output_folder / f"monthly/{datestr}/" + outdir.mkdir(parents=True, exist_ok=True) + + logger.info("Downloading %s data to %s", var_name, outdir) + + file_path = outdir / f"{var_name}.gz" + + if file_path.exists() and not overwrite: + logger.info( + "File %s already exists. Skipping download.", + file_path, + ) + continue + + try: + cds_client.retrieve( + "satellite-cloud-properties", + request, + file_path.as_posix(), + ) + # Handle both .gz and .zip files + with Path(file_path).open("rb") as file: + magic = file.read(2) + + if magic == b"PK": # ZIP file signature + logger.info("Detected ZIP file: %s", file_path) + with zipfile.ZipFile(file_path, "r") as zip_ref: + zip_ref.extractall(outdir) + else: + logger.info("Detected GZIP file: %s", file_path) + with gzip.open(file_path, "rb") as f_in: + with Path(outdir / file_path.stem).open("rb") as f_out: + shutil.copyfileobj(f_in, f_out) + except Exception as ex: + logger.info("%s: no data downloaded for %s", type(ex), var_name) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl deleted file mode 100644 index 020e2cf9e6..0000000000 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl +++ /dev/null @@ -1,247 +0,0 @@ -; ############################################################################# -; ESMValTool CMORizer for CM SAF CLARA-AHRR v2 data -; ############################################################################# -; -; Tier -; Tier 3: restricted dataset. -; -; Source -; https://wui.cmsaf.eu/ -; -; Last access -; 2021-03-22 -; -; Download and processing instructions -; 1) Create ("register") an user account at -; https://wui.cmsaf.eu/safira/action/viewLogin?menuName=NUTZER_HOME -; 2) login (same URL as above) -; 3) Search data using search form at -; https://wui.cmsaf.eu/safira/action/viewProduktHome?menuName=PRODUKT_HOME -; -; - Product group: Climate Data Records -; - Product family: CLARA-A ed. 2.1 -; - Product name: CFC - Factional cloud cover -; IWP - Ice water path -; LWP - Liquid water path -; - Area: Global -; - Temporal resolution: Monthly -; -; 4) Select "CLARA-A ed. 2.1 AVHRR on polar orbiting satellites" from -; list of results. -; 5) Click on "Add to order cart" -; 6) Follow download instructions in automatic email received when data -; are ready for download. -; 7) Untar all .tar files into a single directory. -; -; Modification history -; 20230818-lauer_axel: added output of clwvi (in addition to iwp, lwp) -; 20210506-lauer_axel: output of lwp instead of clwvi -; 20210323-lauer_axel: written. -; -; ############################################################################# -loadscript(getenv("esmvaltool_root") + \ - "/data/formatters/interface.ncl") - -begin - - ; Script name (for logger) - DIAG_SCRIPT = "cmorize_obs_clara_avhrr.ncl" - - ; Source name - OBSNAME = "CLARA-AVHRR" - - ; Tier - TIER = 3 - - ; Period - YEAR1 = 1982 - YEAR2 = 2018 - - ; Selected variable (standard name) - VAR = (/"clt", "clivi", "lwp", "clwvi"/) - - ; Name in the raw data - NAME = (/"cfc", "iwp_allsky", "lwp_allsky", "iwp_allsky"/) - - ; Filename base - FNBASE = (/"CFCmm", "IWPmm", "LWPmm", "IWPmm"/) - - ; Conversion factor - ; Remark: total cloud cover (CFC) is reported as "1" but is actually "%" - ; IWP and LWP use scale_factor to convert to kg/m2 - ; CONV = (/1., 1., 1., 1./) - - ; MIP - MIP = (/"Amon", "Amon", "Amon", "Amon"/) - - ; Frequency - FREQ = (/"mon", "mon", "mon", "mon"/) - - ; CMOR table - CMOR_TABLE = getenv("cmor_tables") + \ - (/"/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/custom/CMOR_lwp.dat", \ - "/cmip5/Tables/CMIP5_Amon"/) - - ; Type - TYPE = "sat" - - ; Version - VERSION = "V002-01" - - ; Global attributes - SOURCE = "https://wui.cmsaf.eu/" - REF = "https://doi.org/10.5676/EUM_SAF_CM/CLARA_AVHRR/V002_01" - COMMENT = "The CM SAF data are owned by EUMETSAT and are available to " \ - + "all users free of charge and with no conditions to use. If you wish " \ - + "to use these products, EUMETSAT's copyright credit must be shown by " \ - + "displaying the words 'Copyright (c) (2020) EUMETSAT' under/in each " \ - + "of these SAF Products used in a project or shown in a publication " \ - + "or website. Please follow the citation guidelines given at " \ - + "https://doi.org/10.5676/EUM_SAF_CM/CLARA_AVHRR/V002_01 and also " \ - + "register as a user at http://cm-saf.eumetsat.int/ to receive latest " \ - + "information on CM SAF services and to get access to the CM SAF User " \ - + "Help Desk." - -end - -begin - - do vv = 0, dimsizes(VAR) - 1 - - log_info("Processing " + VAR(vv) + " (" + MIP(vv) + ")") - - time = create_timec(YEAR1, YEAR2) - date = cd_calendar(time, 1) - - ; Create timeseries - do yy = YEAR1, YEAR2 - - syear = sprinti("%i", yy) - do mm = 1, 12 - - smonth = sprinti("%0.2i", mm) - - ; Read file - fname = systemfunc("ls " + input_dir_path + FNBASE(vv) + \ - syear + smonth + "01*.nc") - - ; No files found - if (ismissing(fname)) then - log_info("Warning: no input data found for variable " + VAR(vv) + \ - " (" + syear + smonth + ")") - continue - end if - - ; Extract data - f = addfile(fname, "r") - val = f->$NAME(vv)$ - if (isatt(val, "scale_factor")) then - scalefac = tofloat(val@scale_factor) - else - scalefac = 1.0 - end if - if (isatt(val, "add_offset")) then - offset = tofloat(val@add_offset) - else - offset = 0.0 - end if - xx = tofloat(val) * scalefac + offset - delete(val) - - ; Assign to global array - if (.not.isdefined("output")) then - dims = dimsizes(xx) - dims(0) = dimsizes(time) - output = new(dims, float) - output!0 = "time" - output&time = time - output!1 = "lat" - output&lat = f->lat - output!2 = "lon" - output&lon = f->lon - fillval = xx@_FillValue - end if - output(ind(toint(yy * 100 + mm).eq.date), :, :) = (/xx/) - - delete(fname) - delete(f) - delete(xx) - - ; *** calculate clwvi (lwp + iwp) *** - - if (VAR(vv) .eq. "clwvi") then - fname = systemfunc("ls " + input_dir_path + "LWPmm" + \ - syear + smonth + "01*.nc") - - ; No files found - if (ismissing(fname)) then - log_info("Warning: input data incomplete for variable " + \ - VAR(vv) + " (" + syear + smonth + ")") - continue - end if - - ; Extract data - f = addfile(fname, "r") - val = f->lwp_allsky - if (isatt(val, "scale_factor")) then - scalefac = tofloat(val@scale_factor) - else - scalefac = 1.0 - end if - if (isatt(val, "add_offset")) then - offset = tofloat(val@add_offset) - else - offset = 0.0 - end if - xx = tofloat(val) * scalefac + offset - delete(val) - - idx = ind(toint(yy * 100 + mm).eq.date) - output(idx, :, :) = output(idx, :, :) + (/xx(0, :, :)/) - - delete(idx) - delete(xx) - delete(fname) - delete(f) - end if ; if VAR(vv) .eq. "clwvi" - end do - end do - - ; Set fill value - output = where(output.eq.fillval, output@_FillValue, output) - - ; Format coordinates - output!0 = "time" - output!1 = "lat" - output!2 = "lon" - format_coords(output, YEAR1 + "0101", YEAR2 + "1231", FREQ(vv)) - - ; Set variable attributes - tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv)) - delete(output) - output = tmp - delete(tmp) - - ; Calculate coordinate bounds - bounds = guess_coord_bounds(output, FREQ(vv)) - - ; Set global attributes - gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF, COMMENT) - - ; Output file - DATESTR = YEAR1 + "01-" + YEAR2 + "12" - fout = output_dir_path + \ - str_join((/"OBS", OBSNAME, TYPE, VERSION, \ - MIP(vv), VAR(vv), DATESTR/), "_") + ".nc" - - ; Write variable - write_nc(fout, VAR(vv), output, bounds, gAtt) - delete(gAtt) - delete(output) - delete(bounds) - - end do - -end diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py new file mode 100644 index 0000000000..bc13d99ea6 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -0,0 +1,405 @@ +"""ESMValTool CMORizer for CLARA-AVHRR data. + +Tier + Tier 3: restricted dataset (registration required). + +Source + Copernicus Climate Data Store (CDS): + https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties?tab=download + +Last access + 20251126 + +Download and processing instructions + Select the following from the CDS: + Product family: CLARA-A3 + Origin: EUMETSAT + Variable: Cloud fraction, Cloud physical properties of the ice/liquid phase + Climate data record type: TCDR + Time aggregation: Daily mean / Monthly mean + Year: select all + Month: select all + Day: select all + Geographical area: Whole available region + Put all daily files for one month (mm) of one year (yyyy) under a single + directory "daily/", monthly files for one year (yyyy) under "monthly/". + + Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI to be able + to download the data from the CDS + + As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only available until 2020. + Alternatively, CLARA-AVHRR data could also be obtained from the EUMETSAT data store + (https://data.eumetsat.int/). + + Alternatively, use the automatic downloader (recommended): + esmvaltool data download CLARA-AVHRR --original-data-dir + + +Modification history + 20251126-lauer_axel: written. +""" + +import datetime +import logging +from copy import deepcopy +from pathlib import Path + +import cf_units +import iris +import numpy as np +from dask import array as da +from dateutil import relativedelta +from esmvalcore.cmor.table import CMOR_TABLES + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _create_masked_cube(cube, year, month, day): + """Create cube containing only nan from existing cube.""" + masked_cube = cube.copy() + masked_cube.data = da.ma.masked_greater(cube.core_data(), -1e20) + + # Read dataset time unit and calendar from file + dataset_time_unit = str(masked_cube.coord("time").units) + dataset_time_calender = masked_cube.coord("time").units.calendar + # Convert datetime + newtime = datetime.datetime( + year=year, + month=month, + day=day, + tzinfo=datetime.UTC, + ) + newtime = cf_units.date2num( + newtime, + dataset_time_unit, + dataset_time_calender, + ) + masked_cube.coord("time").points = float(newtime) + + return masked_cube + + +def _fix_coordinates(cube, definition): + """Fix coordinates.""" + axis2def = {"T": "time", "X": "longitude", "Y": "latitude"} + axes = ["T", "X", "Y"] + + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + if axis == "T": + coord.convert_units("days since 1850-1-1 00:00:00.0") + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + coord.points = coord.core_points().astype("float64") + if len(coord.points) > 1: + if coord.bounds is not None: + coord.bounds = None + coord.guess_bounds() + + return cube + + +def _extract_variable(cube_list, var, cfg, out_dir, is_daily): + timefreq = "daily" if is_daily else "monthly" + logger.info("CMORizing variable '%s' (%s)", var["short_name"], timefreq) + + attributes = deepcopy(cfg["attributes"]) + attributes["mip"] = var["mip"] + attributes["raw"] = var["raw"] + cmor_table = CMOR_TABLES[attributes["project_id"]] + definition = cmor_table.get_variable(var["mip"], var["short_name"]) + + # make sure there is one cube for every day (daily data) or + # every month (monthly data) of the year + # (print debug info about missing days/months and add cube with + # nan to fill gaps + + full_list = iris.cube.CubeList() + time_list = [] + + # round latitude and longitude points to 3 digits to avoid rounding issues + for cube in cube_list: + loncoord = cube.coord("longitude") + latcoord = cube.coord("latitude") + loncoord.points = np.round(loncoord.core_points(), 3) + latcoord.points = np.round(latcoord.core_points(), 3) + + # create list of available days/months ('time_list') + year0 = 0 + for cube in cube_list: + timecoord = cube.coord("time") + if year0 == 0: + year0 = timecoord.units.num2date(timecoord.points[0]).year + cubetime = timecoord.units.num2date(timecoord.points) + time_list.append(cubetime) + + # create cube list for every day/month of the year by adding + # cubes containing only nan to fill possible gaps + + if is_daily: + loop_date = datetime.datetime(year0, 1, 1, tzinfo=datetime.UTC) + while loop_date <= datetime.datetime( + year0, + 12, + 31, + tzinfo=datetime.UTC, + ): + date_available = False + for idx, cubetime in enumerate(time_list): + if ( + loop_date.year == cubetime[0].year + and loop_date.month == cubetime[0].month + and loop_date.day == cubetime[0].day + ): + date_available = True + full_list.append(cube_list[idx]) + break + if not date_available: + logger.debug( + "No data available for %s", + loop_date.strftime("%Y-%m-%d"), + ) + masked_cube = _create_masked_cube( + cube_list[0], + loop_date.year, + loop_date.month, + loop_date.day, + ) + full_list.append(masked_cube) + loop_date += relativedelta.relativedelta(days=1) + else: + loop_date = datetime.datetime(year0, 1, 1, tzinfo=datetime.UTC) + while loop_date <= datetime.datetime( + year0, + 12, + 31, + tzinfo=datetime.UTC, + ): + date_available = False + for idx, cubetime in enumerate(time_list): + if ( + loop_date.year == cubetime[0].year + and loop_date.month == cubetime[0].month + ): + date_available = True + full_list.append(cube_list[idx]) + break + if not date_available: + logger.debug( + "No data available for %s", + loop_date.strftime("%Y-%m"), + ) + masked_cube = _create_masked_cube( + cube_list[0], + loop_date.year, + loop_date.month, + loop_date.day, + ) + full_list.append(masked_cube) + loop_date += relativedelta.relativedelta(months=1) + + iris.util.unify_time_units(full_list) + cube = full_list.concatenate_cube() + cube.coord("time").points = ( + cube.coord("time").core_points().astype("float64") + ) + + # Set correct names + cube.var_name = definition.short_name + cube.standard_name = definition.standard_name + cube.long_name = definition.long_name + + # Fix units + cube.units = definition.units + + # Roll longitude + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + nlon = len(cube.coord("longitude").points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=-1) + cube.attributes.update( + {"geospatial_lon_min": "0", "geospatial_lon_max": "360"}, + ) + + # Fix coordinates + cube = _fix_coordinates(cube, definition) + cube.coord("latitude").attributes = None + cube.coord("longitude").attributes = None + + # Save results + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + version = attributes["version"] + if is_daily: + attributes["version"] = f"{version}-DAILY" + else: + attributes["version"] = f"{version}-MONTHLY" + + utils.save_variable( + cube, + cube.var_name, + out_dir, + attributes, + unlimited_dimensions=["time"], + ) + + logger.info( + "Finished CMORizing variable '%s' (%s) for current year", + var["short_name"], + timefreq, + ) + + +def _load_files(var, in_dir, year, daily): + """Load all input files for one year. If requested, add different variables.""" + varlist = ( + var["raw"] if isinstance(var["raw"], list) else var["raw"].split() + ) + filelist = ( + var["filename"] + if isinstance(var["filename"], list) + else var["filename"].split() + ) + + # create a list of filenames to be read + + in_files = [] + + for filemask in filelist: + if daily: + for month in range(1, 13): + srcdir = Path(in_dir) / f"daily/{year}{month:02d}" + filepattern = filemask.format(year=year, month=f"{month:02d}") + in_files.extend( + [str(p) for p in srcdir.glob(pattern=filepattern)], + ) + else: + srcdir = Path(in_dir) / f"monthly/{year}" + filepattern = filemask.format(year=year) + in_files.extend([str(p) for p in srcdir.glob(pattern=filepattern)]) + + if len(varlist) == 1: + cube_list = iris.load(in_files, varlist[0]) + else: + cube_list = [] + for raw_name in varlist: + cube_list.extend(iris.load(in_files, raw_name)) + + # (global) attributes to remove + drop_attrs = [ + "date_created", + "time_coverage_start", + "time_coverage_end", + "CMSAF_included_Daily_Means", + "CMSAF_platform_and_orbits", + "platform", + ] + + # remove global attributes that might prevent concatenation + + for cube in cube_list: + for attr in drop_attrs: + if attr in cube.attributes: + cube.attributes.pop(attr) + + # If "operator" is defined in the CMOR config file, then + # do calculations now. So far, only the "sum" of 2 or more + # variables is implemented + + cube_list_sum = [] + if var.get("operator", "") == "sum": + for raw_name in varlist: + sublist = [c for c in cube_list if c.var_name == raw_name] + if not cube_list_sum: + cube_list_sum = sublist + else: + logger.debug("Adding cubes (%s)...\n", raw_name) + for cube in sublist: + # get time of cube to be added to the sum + timecoord = cube.coord("time") + # find sum cube with matching time + for sumcube in cube_list_sum: + sumtimecoord = sumcube.coord("time") + if timecoord == sumtimecoord: + result = sumcube + result += cube + logger.debug( + "cube added for time %s", + timecoord.units.num2date(timecoord.points), + ) + break + cube_list = cube_list_sum + elif var.get("operator"): + errstr = "Multiple input files found, with operator '{}' configured: {}".format( + var.get("operator"), + ", ".join(in_files), + ) + raise ValueError(errstr) + + return cube_list + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorize CLARA-AVHRR dataset.""" + glob_attrs = cfg["attributes"] + glob_version = glob_attrs.get("version", "") + + logger.info( + "Starting cmorization for tier%s OBS files: %s", + glob_attrs["tier"], + glob_attrs["dataset_id"], + ) + logger.info("Input data from: %s", in_dir) + logger.info("Output will be written to: %s", out_dir) + logger.info("CMORizing CLARA-AVHRR version %s", glob_attrs["version"]) + + if start_date is None: + start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) + start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) + else: + start_date_mm = start_date + start_date_dd = start_date + + if end_date is None: + end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + else: + end_date_mm = end_date + end_date_dd = end_date + + for var_name, var in cfg["variables"].items(): + var["var_name"] = var_name + + glob_attrs["mip"] = var["mip"] + if "version" in var: + glob_attrs["version"] = var["version"] + else: + glob_attrs["version"] = glob_version + + if "day" in var_name: + logger.info("Input data for %s is daily data", var_name) + daily = True + start_date = start_date_dd + end_date = end_date_dd + else: + logger.info("Input data for %s is monthly data", var_name) + daily = False + start_date = start_date_mm + end_date = end_date_mm + + for year in range(start_date.year, end_date.year + 1): + logger.info("Processing year %s", year) + cube_list = _load_files(var, in_dir, year, daily) + + if not cube_list: + logger.info( + "%d: no data not found for variable %s", + year, + var_name, + ) + else: + _extract_variable(cube_list, var, cfg, out_dir, daily) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index f3d30a33c4..ba34e3d964 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -1434,16 +1434,14 @@ diagnostics: description: CLARA-AVHRR check variables: clt: - mip: Amon clivi: - mip: Amon clwvi: - mip: Amon lwp: - mip: Amon additional_datasets: - - {dataset: CLARA-AVHRR, project: OBS, tier: 3, - type: sat, version: V002-01, start_year: 1982, end_year: 2018} + - {dataset: CLARA-AVHRR, project: OBS6, tier: 3, + type: sat, version: A3-DAILY, mip: CFday, start_year: 2020, end_year: 2020} + - {dataset: CLARA-AVHRR, project: OBS6, tier: 3, + type: sat, version: A3-MONTHLY, mip: Amon, start_year: 1979, end_year: 2020} scripts: null diff --git a/esmvaltool/references/clara-a3.bibtex b/esmvaltool/references/clara-a3.bibtex new file mode 100644 index 0000000000..ae896fc2b1 --- /dev/null +++ b/esmvaltool/references/clara-a3.bibtex @@ -0,0 +1,9 @@ +@article{clara-avhrr, + doi = {10.5676/EUM_SAF_CM/CLARA_AVHRR/V003}, + url = {https://doi.org/10.5676/EUM_SAF_CM/CLARA_AVHRR/V003}, + year = 2023, + publisher = {Satellite Application Facility on Climate Monitoring (CM SAF)}, + author = {Karlsson, Karl-Göran and Riihelä, Aku and Trentmann, Jörg and Stengel, Martin and Solodovnik, Irina and Meirink, Jan Fokke and Devasthale, Abhay and Jääskeläinen, Emmihenna and Kallio-Myers, Viivi and Eliasson, Salomon and Benas, Nikos and Johansson, Erik and Stein, Diana and Finkensieper, Stephan and Håkansson, Nina and Akkermans, Tom and Clerbaux, Nicolas and Selbach, Nathalie and Schröder, Marca and Hollmann, Rainer}, + title = {CLARA-A3: CM SAF cLoud, Albedo and surface RAdiation dataset from AVHRR data - Edition 3}, + journal = {Satellite Application Facility on Climate Monitoring (CM SAF)} +}