ESMValGroup · axel-lauer · Nov 26, 2025 · May 12, 2026 · May 13, 2026 · May 13, 2026
diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst
@@ -283,7 +283,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol
 | CERES-SYN1deg                          | rlds, rldscs, rlus, rluscs, rlut, rlutcs, rsds, rsdscs, rsus, rsuscs, rsut, rsutcs (3hr)             |   3  | NCL             |
 |                                        | rlds, rldscs, rlus, rlut, rlutcs, rsds, rsdt, rsus, rsut, rsutcs (Amon)                              |      |                 |
 +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
-| CLARA-AVHRR                            | clt, clivi, clwvi, lwp (Amon)                                                                        |   3  | NCL             |
+| CLARA-AVHRR                            | clt, clivi, clwvi, lwp (Amon, CFday)                                                                 |   3  | Python          |
 +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | CLOUDSAT-L2                            | clw, clivi, clwvi, lwp (Amon)                                                                        |   3  | NCL             |
 +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+

@@ -0,0 +1,56 @@
+# Common global attributes for Cmorizer output
+attributes:
+  dataset_id: CLARA-AVHRR
+  version: A3
+  tier: 2
+  type: sat
+  project_id: OBS6
+  source: "https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties"
+  reference: "clara-a3"
+  comment: ""
+
+variables:
+  # monthly means
+  clivi_month:
+    mip: Amon
+    short_name: clivi
+    raw: iwp_allsky
+    filename: IWPmm{year}*.nc
+  clt_month:
+    mip: Amon
+    short_name: clt
+    raw: cfc
+    filename: CFCmm{year}*.nc
+  lwp_month:
+    mip: Amon
+    short_name: lwp
+    raw: lwp_allsky
+    filename: LWPmm{year}*.nc
+  clwvi_month:
+    mip: Amon
+    short_name: clwvi
+    raw: ['lwp_allsky', 'iwp_allsky']
+    filename: ['LWPmm{year}*.nc', 'IWPmm{year}*.nc']
+    operator: sum
+  # daily means
+  clivi_day:
+    mip: CFday
+    short_name: clivi
+    raw: iwp_allsky
+    filename: IWPdm{year}{month}*.nc
+  clt_day:
+    mip: CFday
+    short_name: clt
+    raw: cfc
+    filename: CFCdm{year}{month}*.nc
+  lwp_day:
+    mip: CFday
+    short_name: lwp
+    raw: lwp_allsky
+    filename: LWPdm{year}{month}*.nc
+  clwvi_day:
+    mip: CFday
+    short_name: clwvi
+    raw: ['lwp_allsky', 'iwp_allsky']
+    filename: ['LWPdm{year}{month}*.nc', 'IWPdm{year}{month}*.nc']
+    operator: sum
@@ -233,28 +233,27 @@ datasets:
 
   CLARA-AVHRR:
     tier: 3
-    source: https://wui.cmsaf.eu/
-    last_access: 2021-03-22
+    source: https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties
+    last_access: 2026-05-12
     info: |
       Download and processing instructions
-        1) Create ("register") an user account at
-          https://wui.cmsaf.eu/safira/action/viewLogin?menuName=NUTZER_HOME
-        2) login (same URL as above)
-        3) Search data using search form at
-        https://wui.cmsaf.eu/safira/action/viewProduktHome?menuName=PRODUKT_HOME
-          - Product group: Climate Data Records
-          - Product family: CLARA-A ed. 2.1
-          - Product name: CFC - Factional cloud cover
-                          IWP - Ice water path
-                          LWP - Liquid water path
-          - Area: Global
-          - Temporal resolution: Monthly
-        4) Select "CLARA-A ed. 2.1 AVHRR on polar orbiting satellites" from
-          list of results.
-        5) Click on "Add to order cart"
-        6) Follow download instructions in automatic email received when data
-          are ready for download.
-        7) Untar all .tar files into a single directory.
+            https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties?tab=download
+            Put all daily files for one month (mm) of one year (yyyy) under a single
+            directory "daily/<yyyymm>", monthly files for one year (yyyy) under "monthly/<yyyy>".
+            Note: you must accept the terms of use for CC-BY, EUMETSAT CM SAF, ESA CCI
+            Select the following from the CDS:
+                Product family: CLARA-A3
+                Origin: EUMETSAT
+                Variable: Cloud fraction, Cloud physical properties of the ice/liquid phase
+                Climate data record type: TCDR
+                Time aggregation: Daily mean / Monthly mean
+                Year: select all
+                Month: select all
+                Day: select all
+                Geographical area: Whole available region
+      Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only available until 2020.
+            Alternatively, CLARA-AVHRR data could also be obtained from the EUMETSAT data store
+            (https://data.eumetsat.int/).
 
   CLOUDSAT-L2:
     tier: 3

@@ -0,0 +1,181 @@
+"""Script to download CM SAF CLARA-AHRR data from the CDS."""
+
+import datetime
+import gzip
+import logging
+import shutil
+import zipfile
+from pathlib import Path
+
+import cdsapi
+
+logger = logging.getLogger(__name__)
+
+
+def download_dataset(
+    original_data_dir,
+    dataset,
+    dataset_info,
+    start_date,
+    end_date,
+    overwrite,
+):
+    """Download dataset.
+
+    Parameters
+    ----------
+    original_data_dir : Path
+        Directory where original data will be stored.
+    dataset : str
+        Name of the dataset
+    dataset_info : dict
+         Dataset information from the datasets.yml file
+    start_date : datetime
+        Start of the interval to download
+    end_date : datetime
+        End of the interval to download
+    overwrite : bool
+        Overwrite already downloaded files
+    """
+    cds_url = "https://cds.climate.copernicus.eu/api"
+
+    raw_obs_dir = original_data_dir
+    output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset
+    output_folder.mkdir(parents=True, exist_ok=True)
+
+    #  Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only
+    #        available until 2020. Alternatively, CLARA-AVHRR data could also
+    #        be obtained from the EUMETSAT data store
+    #        (https://data.eumetsat.int/).
+
+    if start_date is None:
+        start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC)
+        start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC)
+    else:
+        start_date_mm = start_date
+        start_date_dd = start_date
+
+    if end_date is None:
+        end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC)
+        end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC)
+    else:
+        end_date_mm = end_date
+        end_date_dd = end_date
+
+    requests = {}
+
+    # The CDS requests for daily values are done for each month separately
+    # to avoid the error "cost limits exceeded".
+
+    for year in range(start_date_mm.year, end_date_mm.year + 1):
+        requests.update(
+            {
+                "clivi_monthly_" + str(year): {
+                    "product_family": "clara_a3",
+                    "origin": "eumetsat",
+                    "variable": "cloud_physical_properties_of_the_ice_phase",
+                    "climate_data_record_type": "thematic_climate_data_record",
+                    "time_aggregation": "monthly_mean",
+                    "year": str(year),
+                    "month": [f"{m:02d}" for m in range(1, 13)],
+                },
+                "clt_monthly_" + str(year): {
+                    "product_family": "clara_a3",
+                    "origin": "eumetsat",
+                    "variable": "cloud_fraction",
+                    "climate_data_record_type": "thematic_climate_data_record",
+                    "time_aggregation": "monthly_mean",
+                    "year": str(year),
+                    "month": [f"{m:02d}" for m in range(1, 13)],
+                },
+                "lwp_monthly_" + str(year): {
+                    "product_family": "clara_a3",
+                    "origin": "eumetsat",
+                    "variable": "cloud_physical_properties_of_the_liquid_phase",
+                    "climate_data_record_type": "thematic_climate_data_record",
+                    "time_aggregation": "monthly_mean",
+                    "year": str(year),
+                    "month": [f"{m:02d}" for m in range(1, 13)],
+                },
+            },
+        )
+
+    for year in range(start_date_dd.year, end_date_dd.year + 1):
+        for month in range(1, 13):
+            requests.update(
+                {
+                    "clivi_daily_" + str(year) + f"{month:02d}": {
+                        "product_family": "clara_a3",
+                        "origin": "eumetsat",
+                        "variable": "cloud_physical_properties_of_the_ice_phase",
+                        "climate_data_record_type": "thematic_climate_data_record",
+                        "time_aggregation": "daily_mean",
+                        "year": str(year),
+                        "month": f"{month:02d}",
+                        "day": [f"{m:02d}" for m in range(1, 32)],
+                    },
+                    "clt_daily_" + str(year) + f"{month:02d}": {
+                        "product_family": "clara_a3",
+                        "origin": "eumetsat",
+                        "variable": "cloud_fraction",
+                        "climate_data_record_type": "thematic_climate_data_record",
+                        "time_aggregation": "daily_mean",
+                        "year": str(year),
+                        "month": f"{month:02d}",
+                        "day": [f"{m:02d}" for m in range(1, 32)],
+                    },
+                    "lwp_daily_" + str(year) + f"{month:02d}": {
+                        "product_family": "clara_a3",
+                        "origin": "eumetsat",
+                        "variable": "cloud_physical_properties_of_the_liquid_phase",
+                        "climate_data_record_type": "thematic_climate_data_record",
+                        "time_aggregation": "daily_mean",
+                        "year": str(year),
+                        "month": f"{month:02d}",
+                        "day": [f"{m:02d}" for m in range(1, 32)],
+                    },
+                },
+            )
+
+    cds_client = cdsapi.Client(cds_url)
+
+    for var_name, request in requests.items():
+        datestr = var_name.split("_")[2]
+        if "daily" in var_name:
+            outdir = output_folder / f"daily/{datestr}/"
+        else:
+            outdir = output_folder / f"monthly/{datestr}/"
+        outdir.mkdir(parents=True, exist_ok=True)
+
+        logger.info("Downloading %s data to %s", var_name, outdir)
+
+        file_path = outdir / f"{var_name}.gz"
+
+        if file_path.exists() and not overwrite:
+            logger.info(
+                "File %s already exists. Skipping download.",
+                file_path,
+            )
+            continue
+
+        try:
+            cds_client.retrieve(
+                "satellite-cloud-properties",
+                request,
+                file_path.as_posix(),
+            )
+            # Handle both .gz and .zip files
+            with Path(file_path).open("rb") as file:
+                magic = file.read(2)
+
+            if magic == b"PK":  # ZIP file signature
+                logger.info("Detected ZIP file: %s", file_path)
+                with zipfile.ZipFile(file_path, "r") as zip_ref:
+                    zip_ref.extractall(outdir)
+            else:
+                logger.info("Detected GZIP file: %s", file_path)
+                with gzip.open(file_path, "rb") as f_in:
+                    with Path(outdir / file_path.stem).open("rb") as f_out:
-                with gzip.open(file_path, "rb") as f_in:
-                    with Path(outdir / file_path.stem).open("rb") as f_out:
+                with (
+                    gzip.open(file_path, "rb") as f_in,
+                    Path(outdir / file_path.stem).open("rb") as f_out,
+                ):
-                with gzip.open(file_path, "rb") as f_in:
-                    with Path(outdir / file_path.stem).open("rb") as f_out:
+                with (
+                    gzip.open(file_path, "rb") as f_in,
+                    Path(outdir / file_path.stem).open("rb") as f_out,
+                ):
+                        shutil.copyfileobj(f_in, f_out)
+        except Exception as ex:
+            logger.info("%s: no data downloaded for %s", type(ex), var_name)