From f3e07d489250bf1561ffa32c89b268261c709028 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 26 Nov 2025 15:43:25 +0100 Subject: [PATCH 01/12] first version of CLARA-AVHRR CMORized (C3S version) --- doc/sphinx/source/input.rst | 2 +- .../data/cmor_config/CLARA-AVHRR.yml | 44 ++++ .../data/downloaders/datasets/clara_avhrr.py | 151 +++++++++++ .../data/formatters/datasets/clara_avhrr.ncl | 247 ------------------ .../recipes/examples/recipe_check_obs.yml | 9 +- esmvaltool/references/clara-a3.bibtex | 9 + 6 files changed, 208 insertions(+), 254 deletions(-) create mode 100644 esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py delete mode 100644 esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl create mode 100644 esmvaltool/references/clara-a3.bibtex diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index c3b3ca2921..bc01d31d22 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -282,7 +282,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol | CERES-SYN1deg | rlds, rldscs, rlus, rluscs, rlut, rlutcs, rsds, rsdscs, rsus, rsuscs, rsut, rsutcs (3hr) | 3 | NCL | | | rlds, rldscs, rlus, rlut, rlutcs, rsds, rsdt, rsus, rsut, rsutcs (Amon) | | | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| CLARA-AVHRR | clt, clivi, clwvi, lwp (Amon) | 3 | NCL | +| CLARA-AVHRR | clt, clivi, lwp (Amon) | 3 | Python | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | CLOUDSAT-L2 | clw, clivi, clwvi, lwp (Amon) | 3 | NCL | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml new file mode 100644 index 0000000000..f065187577 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml @@ -0,0 +1,44 @@ +# Common global attributes for Cmorizer output +attributes: + dataset_id: CLARA-AVHRR + version: A3 + tier: 2 + modeling_realm: sat + project_id: OBS6 + source: "https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties" + reference: "clara-a3" + comment: "" + +variables: + # monthly means + clivi_month: + mip: Amon + short_name: clivi + raw: iwp_allsky + filename: IWPmm{year}*.nc + clt_month: + mip: Amon + short_name: clt + raw: cfc + filename: CFCmm{year}*.nc + lwp_month: + mip: Amon + short_name: lwp + raw: lwp_allsky + filename: LWPmm{year}*.nc + # daily means + clivi_day: + mip: CFday + short_name: clivi + raw: iwp_allsky + filename: IWPdm{year}{month}*.nc + clt_day: + mip: CFday + short_name: clt + raw: cfc + filename: CFCdm{year}{month}*.nc + lwp_day: + mip: CFday + short_name: lwp + raw: lwp_allsky + filename: LWPdm{year}{month}*.nc diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py new file mode 100644 index 0000000000..479ffa8095 --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -0,0 +1,151 @@ +"""Script to download CM SAF CLARA-AHRR data from the CDS.""" + +import gzip +import logging +import shutil +import zipfile +from datetime import datetime +from pathlib import Path + +import cdsapi + +logger = logging.getLogger(__name__) + + +def download_dataset( + config, dataset, dataset_info, start_date, end_date, overwrite +): + """Download CLARA-AVHRR dataset using CDS API. + + - An ECMWF account is needed to download the datasets from + https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties + - The file named .cdspirc containing the key associated to + the ECMWF account needs to be saved in user's ${HOME} directory. + - All the files will be saved in ${RAWOBS}/Tier2/CLARA-AVHRR. + """ + cds_url = "https://cds.climate.copernicus.eu/api" + + raw_obs_dir = Path(config["rootpath"]["RAWOBS"][0]) + output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset + output_folder.mkdir(parents=True, exist_ok=True) + + if start_date is None: + start_date = datetime(1979, 1, 1) + if end_date is None: + end_date = datetime(2020, 12, 31) + + requests = {} + + # The CDS requests for daily values are done for each month separately + # to avoid the error "cost limits exceeded". + + for year in range(start_date.year, end_date.year + 1): + requests.update( + { + "clivi_monthly_" + str(year): { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_ice_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "monthly_mean", + "year": str(year), + "month": [f"{m:02d}" for m in range(1, 13)], + }, + "clt_monthly_" + str(year): { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_fraction", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "monthly_mean", + "year": str(year), + "month": [f"{m:02d}" for m in range(1, 13)], + }, + "lwp_monthly_" + str(year): { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_liquid_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "monthly_mean", + "year": str(year), + "month": [f"{m:02d}" for m in range(1, 13)], + }, + } + ) + for month in range(1, 13): + requests.update( + { + "clivi_daily_" + str(year) + f"{month:02d}": { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_ice_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "daily_mean", + "year": str(year), + "month": f"{month:02d}", + "day": [f"{m:02d}" for m in range(1, 32)], + }, + "clt_daily_" + str(year) + f"{month:02d}": { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_fraction", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "daily_mean", + "year": str(year), + "month": f"{month:02d}", + "day": [f"{m:02d}" for m in range(1, 32)], + }, + "lwp_daily_" + str(year) + f"{month:02d}": { + "product_family": "clara_a3", + "origin": "eumetsat", + "variable": "cloud_physical_properties_of_the_liquid_phase", + "climate_data_record_type": "thematic_climate_data_record", + "time_aggregation": "daily_mean", + "year": str(year), + "month": f"{month:02d}", + "day": [f"{m:02d}" for m in range(1, 32)], + }, + } + ) + + cds_client = cdsapi.Client(cds_url) + + for var_name, request in requests.items(): + datestr = var_name.split("_")[2] + if "daily" in var_name: + outdir = output_folder / f"daily/{datestr}/" + else: + outdir = output_folder / f"monthly/{datestr}/" + outdir.mkdir(parents=True, exist_ok=True) + + logger.info("Downloading %s data to %s", var_name, outdir) + + file_path = outdir / f"{var_name}.gz" + + if file_path.exists() and not overwrite: + logger.info( + "File %s already exists. Skipping download.", + file_path, + ) + continue + + try: + cds_client.retrieve( + "satellite-cloud-properties", + request, + file_path.as_posix(), + ) + # Handle both .gz and .zip files + with open(file_path, "rb") as file: + magic = file.read(2) + + if magic == b"PK": # ZIP file signature + logger.info("Detected ZIP file: %s", file_path) + with zipfile.ZipFile(file_path, "r") as zip_ref: + zip_ref.extractall(outdir) + else: + logger.info("Detected GZIP file: %s", file_path) + with gzip.open(file_path, "rb") as f_in: + with open(outdir / file_path.stem, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + except Exception as ex: + logger.info("%s: no data downloaded for %s", type(ex), var_name) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl deleted file mode 100644 index 020e2cf9e6..0000000000 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.ncl +++ /dev/null @@ -1,247 +0,0 @@ -; ############################################################################# -; ESMValTool CMORizer for CM SAF CLARA-AHRR v2 data -; ############################################################################# -; -; Tier -; Tier 3: restricted dataset. -; -; Source -; https://wui.cmsaf.eu/ -; -; Last access -; 2021-03-22 -; -; Download and processing instructions -; 1) Create ("register") an user account at -; https://wui.cmsaf.eu/safira/action/viewLogin?menuName=NUTZER_HOME -; 2) login (same URL as above) -; 3) Search data using search form at -; https://wui.cmsaf.eu/safira/action/viewProduktHome?menuName=PRODUKT_HOME -; -; - Product group: Climate Data Records -; - Product family: CLARA-A ed. 2.1 -; - Product name: CFC - Factional cloud cover -; IWP - Ice water path -; LWP - Liquid water path -; - Area: Global -; - Temporal resolution: Monthly -; -; 4) Select "CLARA-A ed. 2.1 AVHRR on polar orbiting satellites" from -; list of results. -; 5) Click on "Add to order cart" -; 6) Follow download instructions in automatic email received when data -; are ready for download. -; 7) Untar all .tar files into a single directory. -; -; Modification history -; 20230818-lauer_axel: added output of clwvi (in addition to iwp, lwp) -; 20210506-lauer_axel: output of lwp instead of clwvi -; 20210323-lauer_axel: written. -; -; ############################################################################# -loadscript(getenv("esmvaltool_root") + \ - "/data/formatters/interface.ncl") - -begin - - ; Script name (for logger) - DIAG_SCRIPT = "cmorize_obs_clara_avhrr.ncl" - - ; Source name - OBSNAME = "CLARA-AVHRR" - - ; Tier - TIER = 3 - - ; Period - YEAR1 = 1982 - YEAR2 = 2018 - - ; Selected variable (standard name) - VAR = (/"clt", "clivi", "lwp", "clwvi"/) - - ; Name in the raw data - NAME = (/"cfc", "iwp_allsky", "lwp_allsky", "iwp_allsky"/) - - ; Filename base - FNBASE = (/"CFCmm", "IWPmm", "LWPmm", "IWPmm"/) - - ; Conversion factor - ; Remark: total cloud cover (CFC) is reported as "1" but is actually "%" - ; IWP and LWP use scale_factor to convert to kg/m2 - ; CONV = (/1., 1., 1., 1./) - - ; MIP - MIP = (/"Amon", "Amon", "Amon", "Amon"/) - - ; Frequency - FREQ = (/"mon", "mon", "mon", "mon"/) - - ; CMOR table - CMOR_TABLE = getenv("cmor_tables") + \ - (/"/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/custom/CMOR_lwp.dat", \ - "/cmip5/Tables/CMIP5_Amon"/) - - ; Type - TYPE = "sat" - - ; Version - VERSION = "V002-01" - - ; Global attributes - SOURCE = "https://wui.cmsaf.eu/" - REF = "https://doi.org/10.5676/EUM_SAF_CM/CLARA_AVHRR/V002_01" - COMMENT = "The CM SAF data are owned by EUMETSAT and are available to " \ - + "all users free of charge and with no conditions to use. If you wish " \ - + "to use these products, EUMETSAT's copyright credit must be shown by " \ - + "displaying the words 'Copyright (c) (2020) EUMETSAT' under/in each " \ - + "of these SAF Products used in a project or shown in a publication " \ - + "or website. Please follow the citation guidelines given at " \ - + "https://doi.org/10.5676/EUM_SAF_CM/CLARA_AVHRR/V002_01 and also " \ - + "register as a user at http://cm-saf.eumetsat.int/ to receive latest " \ - + "information on CM SAF services and to get access to the CM SAF User " \ - + "Help Desk." - -end - -begin - - do vv = 0, dimsizes(VAR) - 1 - - log_info("Processing " + VAR(vv) + " (" + MIP(vv) + ")") - - time = create_timec(YEAR1, YEAR2) - date = cd_calendar(time, 1) - - ; Create timeseries - do yy = YEAR1, YEAR2 - - syear = sprinti("%i", yy) - do mm = 1, 12 - - smonth = sprinti("%0.2i", mm) - - ; Read file - fname = systemfunc("ls " + input_dir_path + FNBASE(vv) + \ - syear + smonth + "01*.nc") - - ; No files found - if (ismissing(fname)) then - log_info("Warning: no input data found for variable " + VAR(vv) + \ - " (" + syear + smonth + ")") - continue - end if - - ; Extract data - f = addfile(fname, "r") - val = f->$NAME(vv)$ - if (isatt(val, "scale_factor")) then - scalefac = tofloat(val@scale_factor) - else - scalefac = 1.0 - end if - if (isatt(val, "add_offset")) then - offset = tofloat(val@add_offset) - else - offset = 0.0 - end if - xx = tofloat(val) * scalefac + offset - delete(val) - - ; Assign to global array - if (.not.isdefined("output")) then - dims = dimsizes(xx) - dims(0) = dimsizes(time) - output = new(dims, float) - output!0 = "time" - output&time = time - output!1 = "lat" - output&lat = f->lat - output!2 = "lon" - output&lon = f->lon - fillval = xx@_FillValue - end if - output(ind(toint(yy * 100 + mm).eq.date), :, :) = (/xx/) - - delete(fname) - delete(f) - delete(xx) - - ; *** calculate clwvi (lwp + iwp) *** - - if (VAR(vv) .eq. "clwvi") then - fname = systemfunc("ls " + input_dir_path + "LWPmm" + \ - syear + smonth + "01*.nc") - - ; No files found - if (ismissing(fname)) then - log_info("Warning: input data incomplete for variable " + \ - VAR(vv) + " (" + syear + smonth + ")") - continue - end if - - ; Extract data - f = addfile(fname, "r") - val = f->lwp_allsky - if (isatt(val, "scale_factor")) then - scalefac = tofloat(val@scale_factor) - else - scalefac = 1.0 - end if - if (isatt(val, "add_offset")) then - offset = tofloat(val@add_offset) - else - offset = 0.0 - end if - xx = tofloat(val) * scalefac + offset - delete(val) - - idx = ind(toint(yy * 100 + mm).eq.date) - output(idx, :, :) = output(idx, :, :) + (/xx(0, :, :)/) - - delete(idx) - delete(xx) - delete(fname) - delete(f) - end if ; if VAR(vv) .eq. "clwvi" - end do - end do - - ; Set fill value - output = where(output.eq.fillval, output@_FillValue, output) - - ; Format coordinates - output!0 = "time" - output!1 = "lat" - output!2 = "lon" - format_coords(output, YEAR1 + "0101", YEAR2 + "1231", FREQ(vv)) - - ; Set variable attributes - tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv)) - delete(output) - output = tmp - delete(tmp) - - ; Calculate coordinate bounds - bounds = guess_coord_bounds(output, FREQ(vv)) - - ; Set global attributes - gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF, COMMENT) - - ; Output file - DATESTR = YEAR1 + "01-" + YEAR2 + "12" - fout = output_dir_path + \ - str_join((/"OBS", OBSNAME, TYPE, VERSION, \ - MIP(vv), VAR(vv), DATESTR/), "_") + ".nc" - - ; Write variable - write_nc(fout, VAR(vv), output, bounds, gAtt) - delete(gAtt) - delete(output) - delete(bounds) - - end do - -end diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 403f43fb92..4da6666eb4 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -1362,16 +1362,13 @@ diagnostics: description: CLARA-AVHRR check variables: clt: - mip: Amon clivi: - mip: Amon - clwvi: - mip: Amon lwp: - mip: Amon additional_datasets: - {dataset: CLARA-AVHRR, project: OBS, tier: 3, - type: sat, version: V002-01, start_year: 1982, end_year: 2018} + type: sat, version: A3-DAILY, mip: CFday, start_year: 2020, end_year: 2020} + - {dataset: CLARA-AVHRR, project: OBS, tier: 3, + type: sat, version: A3-MONTHLY, mip: Amon, start_year: 2020, end_year: 2020} scripts: null diff --git a/esmvaltool/references/clara-a3.bibtex b/esmvaltool/references/clara-a3.bibtex new file mode 100644 index 0000000000..ae896fc2b1 --- /dev/null +++ b/esmvaltool/references/clara-a3.bibtex @@ -0,0 +1,9 @@ +@article{clara-avhrr, + doi = {10.5676/EUM_SAF_CM/CLARA_AVHRR/V003}, + url = {https://doi.org/10.5676/EUM_SAF_CM/CLARA_AVHRR/V003}, + year = 2023, + publisher = {Satellite Application Facility on Climate Monitoring (CM SAF)}, + author = {Karlsson, Karl-Göran and Riihelä, Aku and Trentmann, Jörg and Stengel, Martin and Solodovnik, Irina and Meirink, Jan Fokke and Devasthale, Abhay and Jääskeläinen, Emmihenna and Kallio-Myers, Viivi and Eliasson, Salomon and Benas, Nikos and Johansson, Erik and Stein, Diana and Finkensieper, Stephan and Håkansson, Nina and Akkermans, Tom and Clerbaux, Nicolas and Selbach, Nathalie and Schröder, Marca and Hollmann, Rainer}, + title = {CLARA-A3: CM SAF cLoud, Albedo and surface RAdiation dataset from AVHRR data - Edition 3}, + journal = {Satellite Application Facility on Climate Monitoring (CM SAF)} +} From e4d66baf17703eca73311bfc6b176e77f7756074 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 13 May 2026 15:12:28 +0200 Subject: [PATCH 02/12] snapshot --- esmvaltool/cmorizers/data/datasets.yml | 36 +++++++--------- .../data/downloaders/datasets/clara_avhrr.py | 43 +++++++++++++------ 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 5997f13864..c2788db55a 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -233,28 +233,24 @@ datasets: CLARA-AVHRR: tier: 3 - source: https://wui.cmsaf.eu/ - last_access: 2021-03-22 + source: https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties + last_access: 2026-05-12 info: | Download and processing instructions - 1) Create ("register") an user account at - https://wui.cmsaf.eu/safira/action/viewLogin?menuName=NUTZER_HOME - 2) login (same URL as above) - 3) Search data using search form at - https://wui.cmsaf.eu/safira/action/viewProduktHome?menuName=PRODUKT_HOME - - Product group: Climate Data Records - - Product family: CLARA-A ed. 2.1 - - Product name: CFC - Factional cloud cover - IWP - Ice water path - LWP - Liquid water path - - Area: Global - - Temporal resolution: Monthly - 4) Select "CLARA-A ed. 2.1 AVHRR on polar orbiting satellites" from - list of results. - 5) Click on "Add to order cart" - 6) Follow download instructions in automatic email received when data - are ready for download. - 7) Untar all .tar files into a single directory. + https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties?tab=download + Put all daily files for one year (yyyy) under a single directory "daily/", + monthly files for one year (yyyy) under "monthly/". + Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI + Select the following from the CDS: + Product family: CLARA-A3 + Origin: EUMETSAT + Variable: Cloud fraction, Cloud physical properties of the ice/liquid phase + Climate data record type: TCDR + Time aggregation: Daily mean / Monthly mean + Year: select all + Month: select all + Day: select all + Geographical area: Whole available region CLOUDSAT-L2: tier: 3 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py index 479ffa8095..3ac6e87170 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -5,7 +5,6 @@ import shutil import zipfile from datetime import datetime -from pathlib import Path import cdsapi @@ -13,33 +12,49 @@ def download_dataset( - config, dataset, dataset_info, start_date, end_date, overwrite + original_data_dir, + dataset, + dataset_info, + start_date, + end_date, + overwrite, ): - """Download CLARA-AVHRR dataset using CDS API. - - - An ECMWF account is needed to download the datasets from - https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties - - The file named .cdspirc containing the key associated to - the ECMWF account needs to be saved in user's ${HOME} directory. - - All the files will be saved in ${RAWOBS}/Tier2/CLARA-AVHRR. + """Download dataset. + + Parameters + ---------- + original_data_dir : Path + Directory where original data will be stored. + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files """ cds_url = "https://cds.climate.copernicus.eu/api" - raw_obs_dir = Path(config["rootpath"]["RAWOBS"][0]) + raw_obs_dir = original_data_dir output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset output_folder.mkdir(parents=True, exist_ok=True) if start_date is None: - start_date = datetime(1979, 1, 1) + start_date_mm = datetime(1979, 1, 1, tzinfo=datetime.UTC) + start_date_dd = datetime(2020, 1, 1, tzinfo=datetime.UTC) if end_date is None: - end_date = datetime(2020, 12, 31) + end_date_mm = datetime(2020, 12, 31, tzinfo=datetime.UTC) + end_date_dd = datetime(2020, 12, 31, tzinfo=datetime.UTC) requests = {} # The CDS requests for daily values are done for each month separately # to avoid the error "cost limits exceeded". - for year in range(start_date.year, end_date.year + 1): + for year in range(start_date_mm.year, end_date_mm.year + 1): requests.update( { "clivi_monthly_" + str(year): { @@ -71,6 +86,8 @@ def download_dataset( }, } ) + + for year in range(start_date_dd.year, end_date_dd.year + 1): for month in range(1, 13): requests.update( { From cec2a1c0d010f140f5a116db15f56c470a589a18 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 13 May 2026 16:22:44 +0200 Subject: [PATCH 03/12] updates for recent main --- .../data/cmor_config/CLARA-AVHRR.yml | 2 +- .../data/downloaders/datasets/clara_avhrr.py | 10 +- .../data/formatters/datasets/clara_avhrr.py | 329 ++++++++++++++++++ 3 files changed, 335 insertions(+), 6 deletions(-) create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py diff --git a/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml index f065187577..de915f3995 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml @@ -3,7 +3,7 @@ attributes: dataset_id: CLARA-AVHRR version: A3 tier: 2 - modeling_realm: sat + type: sat project_id: OBS6 source: "https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties" reference: "clara-a3" diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py index 3ac6e87170..d56b6ed23a 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -1,10 +1,10 @@ """Script to download CM SAF CLARA-AHRR data from the CDS.""" +import datetime import gzip import logging import shutil import zipfile -from datetime import datetime import cdsapi @@ -43,11 +43,11 @@ def download_dataset( output_folder.mkdir(parents=True, exist_ok=True) if start_date is None: - start_date_mm = datetime(1979, 1, 1, tzinfo=datetime.UTC) - start_date_dd = datetime(2020, 1, 1, tzinfo=datetime.UTC) + start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) + start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) if end_date is None: - end_date_mm = datetime(2020, 12, 31, tzinfo=datetime.UTC) - end_date_dd = datetime(2020, 12, 31, tzinfo=datetime.UTC) + end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) requests = {} diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py new file mode 100644 index 0000000000..9fe740ab7e --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -0,0 +1,329 @@ +"""ESMValTool CMORizer for CLARA-AVHRR data. + +Tier + Tier 3: restricted dataset (registration required). + +Source + Copernicus Climate Data Store (CDS): + https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties?tab=download + +Last access + 20251126 + +Download and processing instructions + Select the following from the CDS: + Product family: CLARA-A3 + Origin: EUMETSAT + Variable: Cloud fraction, Cloud physical properties of the ice/liquid phase + Climate data record type: TCDR + Time aggregation: Daily mean / Monthly mean + Year: select all + Month: select all + Day: select all + Geographical area: Whole available region + Put all daily files for one year (yyyy) under a single directory "daily/", + monthly files for one year (yyyy) under "monthly/". + Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI + + Alternatively, use the automatic downloader (recommended): + esmvaltool data download CLARA-AVHRR + + +Modification history + 20251126-lauer_axel: written. +""" + +import datetime +import glob +import logging +import os +from copy import deepcopy + +import cf_units +import iris +import numpy as np +from dask import array as da +from dateutil import relativedelta +from esmvalcore.cmor.table import CMOR_TABLES + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _create_masked_cube(cube, year, month, day): + """Create cube containing only nan from existing cube.""" + masked_cube = cube.copy() + masked_cube.data = da.ma.masked_greater(cube.core_data(), -1e20) + + # Read dataset time unit and calendar from file + dataset_time_unit = str(masked_cube.coord("time").units) + dataset_time_calender = masked_cube.coord("time").units.calendar + # Convert datetime + newtime = datetime.datetime(year=year, month=month, day=day) + newtime = cf_units.date2num( + newtime, + dataset_time_unit, + dataset_time_calender, + ) + masked_cube.coord("time").points = float(newtime) + + return masked_cube + + +def _fix_coordinates(cube, definition): + """Fix coordinates.""" + axis2def = {"T": "time", "X": "longitude", "Y": "latitude"} + axes = ["T", "X", "Y"] + + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + if axis == "T": + coord.convert_units("days since 1850-1-1 00:00:00.0") + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + coord.points = coord.core_points().astype("float64") + if len(coord.points) > 1: + if coord.bounds is not None: + coord.bounds = None + coord.guess_bounds() + + return cube + + +def _extract_variable(in_files, var, cfg, out_dir, is_daily): + if is_daily: + timefreq = "daily" + else: + timefreq = "monthly" + logger.info("CMORizing variable '%s' (%s)", var["short_name"], timefreq) + attributes = deepcopy(cfg["attributes"]) + attributes["mip"] = var["mip"] + attributes["raw"] = var["raw"] + cmor_table = CMOR_TABLES[attributes["project_id"]] + definition = cmor_table.get_variable(var["mip"], var["short_name"]) + + # load all input files (1 year) into 1 cube + # --> drop attributes that differ among input files + cube_list = iris.load(in_files, var["raw"]) + + # (global) attributes to remove + drop_attrs = [ + "date_created", + "time_coverage_start", + "time_coverage_end", + "CMSAF_included_Daily_Means", + "CMSAF_platform_and_orbits", + "platform", + ] + + for cube in cube_list: + for attr in drop_attrs: + if attr in cube.attributes: + cube.attributes.pop(attr) + + # make sure there is one cube for every day (daily data) or + # every month (monthly data) of the year + # (print debug info about missing days/months and add cube with + # nan to fill gaps + + full_list = iris.cube.CubeList() + time_list = [] + + # round latitude and longitude points to 3 digits to avoid rounding issues + for cube in cube_list: + loncoord = cube.coord("longitude") + latcoord = cube.coord("latitude") + loncoord.points = np.round(loncoord.core_points(), 3) + latcoord.points = np.round(latcoord.core_points(), 3) + + # create list of available days/months ('time_list') + year0 = 0 + for cube in cube_list: + timecoord = cube.coord("time") + if year0 == 0: + year0 = timecoord.units.num2date(timecoord.points[0]).year + cubetime = timecoord.units.num2date(timecoord.points) + time_list.append(cubetime) + + # create cube list for every day/month of the year by adding + # cubes containing only nan to fill possible gaps + + if is_daily: + loop_date = datetime.datetime(year0, 1, 1) + while loop_date <= datetime.datetime(year0, 12, 31): + date_available = False + for idx, cubetime in enumerate(time_list): + if ( + loop_date.year == cubetime[0].year + and loop_date.month == cubetime[0].month + and loop_date.day == cubetime[0].day + ): + date_available = True + full_list.append(cube_list[idx]) + break + if not date_available: + logger.debug( + "No data available for %s", + loop_date.strftime("%Y-%m-%d"), + ) + masked_cube = _create_masked_cube( + cube_list[0], + loop_date.year, + loop_date.month, + loop_date.day, + ) + full_list.append(masked_cube) + loop_date += relativedelta.relativedelta(days=1) + else: + loop_date = datetime.datetime(year0, 1, 1) + while loop_date <= datetime.datetime(year0, 12, 31): + date_available = False + for idx, cubetime in enumerate(time_list): + if ( + loop_date.year == cubetime[0].year + and loop_date.month == cubetime[0].month + ): + date_available = True + full_list.append(cube_list[idx]) + break + if not date_available: + logger.debug( + "No data available for %s", + loop_date.strftime("%Y-%m"), + ) + masked_cube = _create_masked_cube( + cube_list[0], + loop_date.year, + loop_date.month, + loop_date.day, + ) + full_list.append(masked_cube) + loop_date += relativedelta.relativedelta(months=1) + + iris.util.unify_time_units(full_list) + cube = full_list.concatenate_cube() + cube.coord("time").points = ( + cube.coord("time").core_points().astype("float64") + ) + + # Set correct names + cube.var_name = definition.short_name + cube.standard_name = definition.standard_name + cube.long_name = definition.long_name + + # Fix units + cube.units = definition.units + + # # Fix data type + # cube.data = cube.core_data().astype('float32') + + # Roll longitude + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + nlon = len(cube.coord("longitude").points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=-1) + cube.attributes.update( + {"geospatial_lon_min": "0", "geospatial_lon_max": "360"}, + ) + + # Fix coordinates + cube = _fix_coordinates(cube, definition) + cube.coord("latitude").attributes = None + cube.coord("longitude").attributes = None + + # Save results + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + version = attributes["version"] + if is_daily: + attributes["version"] = f"{version}-DAILY" + else: + attributes["version"] = f"{version}-MONTHLY" + + utils.save_variable( + cube, + cube.var_name, + out_dir, + attributes, + unlimited_dimensions=["time"], + ) + + logger.info("Finished CMORizing %s", ", ".join(in_files)) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorize CLARA-AVHRR dataset.""" + glob_attrs = cfg["attributes"] + if "version" in glob_attrs: + glob_version = glob_attrs["version"] + else: + glob_version = "" + + logger.info( + "Starting cmorization for tier%s OBS files: %s", + glob_attrs["tier"], + glob_attrs["dataset_id"], + ) + logger.info("Input data from: %s", in_dir) + logger.info("Output will be written to: %s", out_dir) + logger.info("CMORizing CLARA-AVHRR version %s", glob_attrs["version"]) + + if start_date is None: + start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) + start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) + if end_date is None: + end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + + for var_name, var in cfg["variables"].items(): + var["var_name"] = var_name + + glob_attrs["mip"] = var["mip"] + if "version" in var: + glob_attrs["version"] = var["version"] + else: + glob_attrs["version"] = glob_version + + if "day" in var_name: + logger.info("Input data for %s is daily data", var_name) + daily = True + start_date = start_date_dd + end_date = end_date_dd + else: + logger.info("Input data for %s is monthly data", var_name) + daily = False + start_date = start_date_mm + end_date = end_date_mm + + for year in range(start_date.year, end_date.year + 1): + logger.info("Processing year %s", year) + in_files = [] + if daily: + for month in range(1, 13): + filepattern = os.path.join( + in_dir, + f"daily/{year}{month:02d}", + var["filename"].format( + year=year, month=f"{month:02d}" + ), + ) + in_files.extend(glob.glob(filepattern)) + else: + filepattern = os.path.join( + in_dir, + f"monthly/{year}", + var["filename"].format(year=year), + ) + in_files.extend(glob.glob(filepattern)) + + if not in_files: + logger.info( + "%d: no data not found for variable %s", + year, + var_name, + ) + else: + _extract_variable(in_files, var, cfg, out_dir, daily) From 0ca5a49a0553a8530aec793f1e384958bf1fedb6 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 13 May 2026 16:33:41 +0200 Subject: [PATCH 04/12] updates recipe_check_obs.yml --- esmvaltool/recipes/examples/recipe_check_obs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index aa3357be5b..e0d9118627 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -1440,7 +1440,7 @@ diagnostics: - {dataset: CLARA-AVHRR, project: OBS, tier: 3, type: sat, version: A3-DAILY, mip: CFday, start_year: 2020, end_year: 2020} - {dataset: CLARA-AVHRR, project: OBS, tier: 3, - type: sat, version: A3-MONTHLY, mip: Amon, start_year: 2020, end_year: 2020} + type: sat, version: A3-MONTHLY, mip: Amon, start_year: 1979, end_year: 2020} scripts: null From 1c7f175267ce927c6b6161cc3da5837fe4e47d50 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 09:22:51 +0200 Subject: [PATCH 05/12] added clwvi --- doc/sphinx/source/input.rst | 2 +- .../data/cmor_config/CLARA-AVHRR.yml | 12 ++ esmvaltool/cmorizers/data/datasets.yml | 4 +- .../data/formatters/datasets/clara_avhrr.py | 143 ++++++++++++------ 4 files changed, 114 insertions(+), 47 deletions(-) diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 76e745baac..1a877d2155 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -283,7 +283,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol | CERES-SYN1deg | rlds, rldscs, rlus, rluscs, rlut, rlutcs, rsds, rsdscs, rsus, rsuscs, rsut, rsutcs (3hr) | 3 | NCL | | | rlds, rldscs, rlus, rlut, rlutcs, rsds, rsdt, rsus, rsut, rsutcs (Amon) | | | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| CLARA-AVHRR | clt, clivi, lwp (Amon) | 3 | Python | +| CLARA-AVHRR | clt, clivi, clwvi, lwp (Amon, CFday) | 3 | Python | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | CLOUDSAT-L2 | clw, clivi, clwvi, lwp (Amon) | 3 | NCL | +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml index de915f3995..a5df06faf2 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CLARA-AVHRR.yml @@ -26,6 +26,12 @@ variables: short_name: lwp raw: lwp_allsky filename: LWPmm{year}*.nc + clwvi_month: + mip: Amon + short_name: clwvi + raw: ['lwp_allsky', 'iwp_allsky'] + filename: ['LWPmm{year}*.nc', 'IWPmm{year}*.nc'] + operator: sum # daily means clivi_day: mip: CFday @@ -42,3 +48,9 @@ variables: short_name: lwp raw: lwp_allsky filename: LWPdm{year}{month}*.nc + clwvi_day: + mip: CFday + short_name: clwvi + raw: ['lwp_allsky', 'iwp_allsky'] + filename: ['LWPdm{year}{month}*.nc', 'IWPdm{year}{month}*.nc'] + operator: sum diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index cd70e05eae..fce32ed5a8 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -238,8 +238,8 @@ datasets: info: | Download and processing instructions https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties?tab=download - Put all daily files for one year (yyyy) under a single directory "daily/", - monthly files for one year (yyyy) under "monthly/". + Put all daily files for one month (mm) of one year (yyyy) under a single + directory "daily/", monthly files for one year (yyyy) under "monthly/". Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI Select the following from the CDS: Product family: CLARA-A3 diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index 9fe740ab7e..f5de05f3b6 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -21,9 +21,10 @@ Month: select all Day: select all Geographical area: Whole available region - Put all daily files for one year (yyyy) under a single directory "daily/", - monthly files for one year (yyyy) under "monthly/". - Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI + Put all daily files for one month (mm) of one year (yyyy) under a single + directory "daily/", monthly files for one year (yyyy) under "monthly/". + Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI to be able + to download the data from the CDS Alternatively, use the automatic downloader (recommended): esmvaltool data download CLARA-AVHRR @@ -94,37 +95,19 @@ def _fix_coordinates(cube, definition): return cube -def _extract_variable(in_files, var, cfg, out_dir, is_daily): +def _extract_variable(cube_list, var, cfg, out_dir, is_daily): if is_daily: timefreq = "daily" else: timefreq = "monthly" logger.info("CMORizing variable '%s' (%s)", var["short_name"], timefreq) + attributes = deepcopy(cfg["attributes"]) attributes["mip"] = var["mip"] attributes["raw"] = var["raw"] cmor_table = CMOR_TABLES[attributes["project_id"]] definition = cmor_table.get_variable(var["mip"], var["short_name"]) - # load all input files (1 year) into 1 cube - # --> drop attributes that differ among input files - cube_list = iris.load(in_files, var["raw"]) - - # (global) attributes to remove - drop_attrs = [ - "date_created", - "time_coverage_start", - "time_coverage_end", - "CMSAF_included_Daily_Means", - "CMSAF_platform_and_orbits", - "platform", - ] - - for cube in cube_list: - for attr in drop_attrs: - if attr in cube.attributes: - cube.attributes.pop(attr) - # make sure there is one cube for every day (daily data) or # every month (monthly data) of the year # (print debug info about missing days/months and add cube with @@ -251,7 +234,96 @@ def _extract_variable(in_files, var, cfg, out_dir, is_daily): unlimited_dimensions=["time"], ) - logger.info("Finished CMORizing %s", ", ".join(in_files)) + logger.info( + "Finished CMORizing variable '%s' (%s) for current year", + var["short_name"], + timefreq, + ) + + +def _load_files(var, cfg, in_dir, year, daily): + if type(var["raw"]) is list: + varlist = var["raw"] + else: + varlist = var["raw"].split() + + if type(var["filename"]) is list: + filelist = var["filename"] + else: + filelist = var["filename"].split() + + in_files = [] + + for filemask in filelist: + if daily: + for month in range(1, 13): + filepattern = os.path.join( + in_dir, + f"daily/{year}{month:02d}", + filemask.format(year=year, month=f"{month:02d}"), + ) + in_files.extend(glob.glob(filepattern)) + else: + filepattern = os.path.join( + in_dir, + f"monthly/{year}", + filemask.format(year=year), + ) + in_files.extend(glob.glob(filepattern)) + + if len(varlist) == 1: + cube_list = iris.load(in_files, varlist[0]) + else: + cube_list = [] + for raw_name in varlist: + cube_list.extend(iris.load(in_files, raw_name)) + + # (global) attributes to remove + drop_attrs = [ + "date_created", + "time_coverage_start", + "time_coverage_end", + "CMSAF_included_Daily_Means", + "CMSAF_platform_and_orbits", + "platform", + ] + + for cube in cube_list: + for attr in drop_attrs: + if attr in cube.attributes: + cube.attributes.pop(attr) + + cube_list_sum = [] + if var.get("operator", "") == "sum": + for raw_name in varlist: + sublist = [c for c in cube_list if c.var_name == raw_name] + if not cube_list_sum: + cube_list_sum = sublist + else: + logger.debug("Adding cubes (%s)...\n", raw_name) + for cube in sublist: + # get time of cube to be added to the sum + timecoord = cube.coord("time") + # find sum cube with matching time + for sumcube in cube_list_sum: + sumtimecoord = sumcube.coord("time") + if timecoord == sumtimecoord: + sumcube += cube + logger.debug( + "cube added for time %s", + timecoord.units.num2date(timecoord.points), + ) + break + cube_list = cube_list_sum + else: + raise ValueError( + "Multiple input files found, with operator '{}' configured: {}".format( + var.get("operator"), + ", ".join(in_files), + ), + ) + + return cube_list def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): @@ -300,30 +372,13 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): for year in range(start_date.year, end_date.year + 1): logger.info("Processing year %s", year) - in_files = [] - if daily: - for month in range(1, 13): - filepattern = os.path.join( - in_dir, - f"daily/{year}{month:02d}", - var["filename"].format( - year=year, month=f"{month:02d}" - ), - ) - in_files.extend(glob.glob(filepattern)) - else: - filepattern = os.path.join( - in_dir, - f"monthly/{year}", - var["filename"].format(year=year), - ) - in_files.extend(glob.glob(filepattern)) + cube_list = _load_files(var, cfg, in_dir, year, daily) - if not in_files: + if not cube_list: logger.info( "%d: no data not found for variable %s", year, var_name, ) else: - _extract_variable(in_files, var, cfg, out_dir, daily) + _extract_variable(cube_list, var, cfg, out_dir, daily) From f1c7ad2e2e60d06ef0b06f83caac4591ac84eada Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 10:13:42 +0200 Subject: [PATCH 06/12] update formatter --- .../cmorizers/data/formatters/datasets/clara_avhrr.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index f5de05f3b6..b44da07ce0 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -252,6 +252,8 @@ def _load_files(var, cfg, in_dir, year, daily): else: filelist = var["filename"].split() + # create a list of filenames to be read + in_files = [] for filemask in filelist: @@ -288,11 +290,17 @@ def _load_files(var, cfg, in_dir, year, daily): "platform", ] + # remove global attributes that might prevent concatenation + for cube in cube_list: for attr in drop_attrs: if attr in cube.attributes: cube.attributes.pop(attr) + # If "operator" is defined in the CMOR config file, then + # do calculations now. So far, only the "sum" of 2 or more + # variables is implemented + cube_list_sum = [] if var.get("operator", "") == "sum": for raw_name in varlist: @@ -315,7 +323,7 @@ def _load_files(var, cfg, in_dir, year, daily): ) break cube_list = cube_list_sum - else: + elif var.get("operator"): raise ValueError( "Multiple input files found, with operator '{}' configured: {}".format( var.get("operator"), From bf0e8fa0fd0196e67b798f1958d9e4ce43383248 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 10:28:57 +0200 Subject: [PATCH 07/12] updated recipe_check_obs.yml --- esmvaltool/recipes/examples/recipe_check_obs.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index e0d9118627..ba34e3d964 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -1435,11 +1435,12 @@ diagnostics: variables: clt: clivi: + clwvi: lwp: additional_datasets: - - {dataset: CLARA-AVHRR, project: OBS, tier: 3, + - {dataset: CLARA-AVHRR, project: OBS6, tier: 3, type: sat, version: A3-DAILY, mip: CFday, start_year: 2020, end_year: 2020} - - {dataset: CLARA-AVHRR, project: OBS, tier: 3, + - {dataset: CLARA-AVHRR, project: OBS6, tier: 3, type: sat, version: A3-MONTHLY, mip: Amon, start_year: 1979, end_year: 2020} scripts: null From e75ab5cfe7995a70c79b581daae0d1682693fba1 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 11:13:18 +0200 Subject: [PATCH 08/12] codacy --- .../data/downloaders/datasets/clara_avhrr.py | 7 +++ .../data/formatters/datasets/clara_avhrr.py | 61 ++++++++++--------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py index d56b6ed23a..5f26f55485 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -45,9 +45,16 @@ def download_dataset( if start_date is None: start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) + else: + start_date_mm = start_date + start_date_dd = start_date + if end_date is None: end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + else: + end_date_mm = end_date + end_date_dd = end_date requests = {} diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index b44da07ce0..d2bca0b11f 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -61,7 +61,9 @@ def _create_masked_cube(cube, year, month, day): dataset_time_unit = str(masked_cube.coord("time").units) dataset_time_calender = masked_cube.coord("time").units.calendar # Convert datetime - newtime = datetime.datetime(year=year, month=month, day=day) + newtime = datetime.datetime( + year=year, month=month, day=day, tzinfo=datetime.UTC + ) newtime = cf_units.date2num( newtime, dataset_time_unit, @@ -96,10 +98,7 @@ def _fix_coordinates(cube, definition): def _extract_variable(cube_list, var, cfg, out_dir, is_daily): - if is_daily: - timefreq = "daily" - else: - timefreq = "monthly" + timefreq = "daily" if is_daily else "monthly" logger.info("CMORizing variable '%s' (%s)", var["short_name"], timefreq) attributes = deepcopy(cfg["attributes"]) @@ -136,8 +135,10 @@ def _extract_variable(cube_list, var, cfg, out_dir, is_daily): # cubes containing only nan to fill possible gaps if is_daily: - loop_date = datetime.datetime(year0, 1, 1) - while loop_date <= datetime.datetime(year0, 12, 31): + loop_date = datetime.datetime(year0, 1, 1, tzinfo=datetime.UTC) + while loop_date <= datetime.datetime( + year0, 12, 31, tzinfo=datetime.UTC + ): date_available = False for idx, cubetime in enumerate(time_list): if ( @@ -162,8 +163,10 @@ def _extract_variable(cube_list, var, cfg, out_dir, is_daily): full_list.append(masked_cube) loop_date += relativedelta.relativedelta(days=1) else: - loop_date = datetime.datetime(year0, 1, 1) - while loop_date <= datetime.datetime(year0, 12, 31): + loop_date = datetime.datetime(year0, 1, 1, tzinfo=datetime.UTC) + while loop_date <= datetime.datetime( + year0, 12, 31, tzinfo=datetime.UTC + ): date_available = False for idx, cubetime in enumerate(time_list): if ( @@ -201,9 +204,6 @@ def _extract_variable(cube_list, var, cfg, out_dir, is_daily): # Fix units cube.units = definition.units - # # Fix data type - # cube.data = cube.core_data().astype('float32') - # Roll longitude cube.coord("longitude").points = cube.coord("longitude").points + 180.0 nlon = len(cube.coord("longitude").points) @@ -241,16 +241,16 @@ def _extract_variable(cube_list, var, cfg, out_dir, is_daily): ) -def _load_files(var, cfg, in_dir, year, daily): - if type(var["raw"]) is list: - varlist = var["raw"] - else: - varlist = var["raw"].split() - - if type(var["filename"]) is list: - filelist = var["filename"] - else: - filelist = var["filename"].split() +def _load_files(var, in_dir, year, daily): + """Load all input files for one year. If requested, add different variables.""" + varlist = ( + var["raw"] if isinstance(var["raw"], list) else var["raw"].split() + ) + filelist = ( + var["filename"] + if isinstance(var["filename"], list) + else var["filename"].split() + ) # create a list of filenames to be read @@ -316,7 +316,8 @@ def _load_files(var, cfg, in_dir, year, daily): for sumcube in cube_list_sum: sumtimecoord = sumcube.coord("time") if timecoord == sumtimecoord: - sumcube += cube + result = sumcube + result += cube logger.debug( "cube added for time %s", timecoord.units.num2date(timecoord.points), @@ -337,10 +338,7 @@ def _load_files(var, cfg, in_dir, year, daily): def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorize CLARA-AVHRR dataset.""" glob_attrs = cfg["attributes"] - if "version" in glob_attrs: - glob_version = glob_attrs["version"] - else: - glob_version = "" + glob_version = glob_attrs["version"] if "version" in glob_attrs else "" logger.info( "Starting cmorization for tier%s OBS files: %s", @@ -354,9 +352,16 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): if start_date is None: start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) + else: + start_date_mm = start_date + start_date_dd = start_date + if end_date is None: end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) + else: + end_date_mm = end_date + end_date_dd = end_date for var_name, var in cfg["variables"].items(): var["var_name"] = var_name @@ -380,7 +385,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): for year in range(start_date.year, end_date.year + 1): logger.info("Processing year %s", year) - cube_list = _load_files(var, cfg, in_dir, year, daily) + cube_list = _load_files(var, in_dir, year, daily) if not cube_list: logger.info( From 90b05f5887685f84ef66957ce1bda603e8ba1059 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 11:26:30 +0200 Subject: [PATCH 09/12] codacy --- .../data/downloaders/datasets/clara_avhrr.py | 4 ++-- .../data/formatters/datasets/clara_avhrr.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py index 5f26f55485..61b0f87654 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -91,7 +91,7 @@ def download_dataset( "year": str(year), "month": [f"{m:02d}" for m in range(1, 13)], }, - } + }, ) for year in range(start_date_dd.year, end_date_dd.year + 1): @@ -128,7 +128,7 @@ def download_dataset( "month": f"{month:02d}", "day": [f"{m:02d}" for m in range(1, 32)], }, - } + }, ) cds_client = cdsapi.Client(cds_url) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index d2bca0b11f..14d13d3259 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -137,7 +137,10 @@ def _extract_variable(cube_list, var, cfg, out_dir, is_daily): if is_daily: loop_date = datetime.datetime(year0, 1, 1, tzinfo=datetime.UTC) while loop_date <= datetime.datetime( - year0, 12, 31, tzinfo=datetime.UTC + year0, + 12, + 31, + tzinfo=datetime.UTC, ): date_available = False for idx, cubetime in enumerate(time_list): @@ -325,12 +328,10 @@ def _load_files(var, in_dir, year, daily): break cube_list = cube_list_sum elif var.get("operator"): - raise ValueError( - "Multiple input files found, with operator '{}' configured: {}".format( - var.get("operator"), - ", ".join(in_files), - ), + errstr = "Multiple input files found, with operator '{}' configured: {}".format( + var.get("operator"), ", ".join(in_files) ) + raise ValueError(errstr) return cube_list @@ -338,7 +339,7 @@ def _load_files(var, in_dir, year, daily): def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorize CLARA-AVHRR dataset.""" glob_attrs = cfg["attributes"] - glob_version = glob_attrs["version"] if "version" in glob_attrs else "" + glob_version = glob_attrs.get("version", "") logger.info( "Starting cmorization for tier%s OBS files: %s", From 392729a077c44efca6f2e1a772fe7145d0602a21 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 11:38:15 +0200 Subject: [PATCH 10/12] codacy --- .../data/formatters/datasets/clara_avhrr.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index 14d13d3259..ddb331fe5f 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -35,10 +35,9 @@ """ import datetime -import glob import logging -import os from copy import deepcopy +from pathlib import Path import cf_units import iris @@ -62,7 +61,10 @@ def _create_masked_cube(cube, year, month, day): dataset_time_calender = masked_cube.coord("time").units.calendar # Convert datetime newtime = datetime.datetime( - year=year, month=month, day=day, tzinfo=datetime.UTC + year=year, + month=month, + day=day, + tzinfo=datetime.UTC, ) newtime = cf_units.date2num( newtime, @@ -168,7 +170,10 @@ def _extract_variable(cube_list, var, cfg, out_dir, is_daily): else: loop_date = datetime.datetime(year0, 1, 1, tzinfo=datetime.UTC) while loop_date <= datetime.datetime( - year0, 12, 31, tzinfo=datetime.UTC + year0, + 12, + 31, + tzinfo=datetime.UTC, ): date_available = False for idx, cubetime in enumerate(time_list): @@ -262,19 +267,15 @@ def _load_files(var, in_dir, year, daily): for filemask in filelist: if daily: for month in range(1, 13): - filepattern = os.path.join( - in_dir, - f"daily/{year}{month:02d}", - filemask.format(year=year, month=f"{month:02d}"), + srcdir = Path(in_dir) / f"daily/{year}{month:02d}" + filepattern = filemask.format(year=year, month=f"{month:02d}") + in_files.extend( + [str(p) for p in srcdir.glob(pattern=filepattern)] ) - in_files.extend(glob.glob(filepattern)) else: - filepattern = os.path.join( - in_dir, - f"monthly/{year}", - filemask.format(year=year), - ) - in_files.extend(glob.glob(filepattern)) + srcdir = Path(in_dir) / f"monthly/{year}" + filepattern = filemask.format(year=year) + in_files.extend([str(p) for p in srcdir.glob(pattern=filepattern)]) if len(varlist) == 1: cube_list = iris.load(in_files, varlist[0]) From 39e9f4bfb2850145ba65b3910b30b77e43e046a1 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 19 May 2026 11:48:52 +0200 Subject: [PATCH 11/12] codacy --- .../cmorizers/data/downloaders/datasets/clara_avhrr.py | 5 +++-- esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py index 61b0f87654..46e0b79da7 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -5,6 +5,7 @@ import logging import shutil import zipfile +from pathlib import Path import cdsapi @@ -159,7 +160,7 @@ def download_dataset( file_path.as_posix(), ) # Handle both .gz and .zip files - with open(file_path, "rb") as file: + with Path(file_path).open("rb") as file: magic = file.read(2) if magic == b"PK": # ZIP file signature @@ -169,7 +170,7 @@ def download_dataset( else: logger.info("Detected GZIP file: %s", file_path) with gzip.open(file_path, "rb") as f_in: - with open(outdir / file_path.stem, "wb") as f_out: + with Path(outdir / file_path.stem).open("rb") as f_out: shutil.copyfileobj(f_in, f_out) except Exception as ex: logger.info("%s: no data downloaded for %s", type(ex), var_name) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index ddb331fe5f..3487d0577d 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -270,7 +270,7 @@ def _load_files(var, in_dir, year, daily): srcdir = Path(in_dir) / f"daily/{year}{month:02d}" filepattern = filemask.format(year=year, month=f"{month:02d}") in_files.extend( - [str(p) for p in srcdir.glob(pattern=filepattern)] + [str(p) for p in srcdir.glob(pattern=filepattern)], ) else: srcdir = Path(in_dir) / f"monthly/{year}" @@ -330,7 +330,8 @@ def _load_files(var, in_dir, year, daily): cube_list = cube_list_sum elif var.get("operator"): errstr = "Multiple input files found, with operator '{}' configured: {}".format( - var.get("operator"), ", ".join(in_files) + var.get("operator"), + ", ".join(in_files), ) raise ValueError(errstr) From aa1b6379d999baa51b84ae5a056f483daf03ef13 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 21 May 2026 14:22:37 +0200 Subject: [PATCH 12/12] added info on EUMETSAT data store --- esmvaltool/cmorizers/data/datasets.yml | 3 +++ .../cmorizers/data/downloaders/datasets/clara_avhrr.py | 5 +++++ .../cmorizers/data/formatters/datasets/clara_avhrr.py | 7 ++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index fce32ed5a8..1791f18785 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -251,6 +251,9 @@ datasets: Month: select all Day: select all Geographical area: Whole available region + Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only available until 2020. + Alternatively, CLARA-AVHRR data could also be obtained from the EUMETSAT data store + (https://data.eumetsat.int/). CLOUDSAT-L2: tier: 3 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py index 46e0b79da7..afb995637e 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/clara_avhrr.py @@ -43,6 +43,11 @@ def download_dataset( output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset output_folder.mkdir(parents=True, exist_ok=True) + # Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only + # available until 2020. Alternatively, CLARA-AVHRR data could also + # be obtained from the EUMETSAT data store + # (https://data.eumetsat.int/). + if start_date is None: start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py index 3487d0577d..bc13d99ea6 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/clara_avhrr.py @@ -23,11 +23,16 @@ Geographical area: Whole available region Put all daily files for one month (mm) of one year (yyyy) under a single directory "daily/", monthly files for one year (yyyy) under "monthly/". + Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI to be able to download the data from the CDS + As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only available until 2020. + Alternatively, CLARA-AVHRR data could also be obtained from the EUMETSAT data store + (https://data.eumetsat.int/). + Alternatively, use the automatic downloader (recommended): - esmvaltool data download CLARA-AVHRR + esmvaltool data download CLARA-AVHRR --original-data-dir Modification history