From 44470cc66deeeb9c13aeb0dd856bca7af30e02cf Mon Sep 17 00:00:00 2001 From: Ioan Ferencik Date: Wed, 27 May 2026 02:25:06 +0200 Subject: [PATCH 1/9] refine nasa search and create downlaod structure --- rapida/cli/aclick.py | 2 +- rapida/cli/ntl.py | 107 +++++++++++++++++++++++++++++++------- rapida/ntl/cache.py | 49 +++++++++++++++++ rapida/ntl/nasa/const.py | 92 ++++++++++++++++++-------------- rapida/ntl/nasa/io.py | 23 ++++++++ rapida/ntl/nasa/search.py | 101 ++++++++++++++++++++--------------- rapida/ntl/nasa/util.py | 37 +++++++++++++ rapida/ntl/noaa/cmask.py | 13 ++--- rapida/ntl/noaa/io.py | 10 ++-- 9 files changed, 322 insertions(+), 112 deletions(-) create mode 100644 rapida/ntl/cache.py create mode 100644 rapida/ntl/nasa/io.py create mode 100644 rapida/ntl/nasa/util.py diff --git a/rapida/cli/aclick.py b/rapida/cli/aclick.py index fbdb1d6..a16a0cf 100644 --- a/rapida/cli/aclick.py +++ b/rapida/cli/aclick.py @@ -43,7 +43,7 @@ def list_commands(self, ctx): def command(self, *args, **kwargs): # Automatically wrap all @group.command() calls in AsyncCommand kwargs.setdefault('cls', AsyncCommand) - return super().command(*args, **kwargs) + return super().command(*args, no_args_is_help=True, **kwargs) def group(self, *args, **kwargs): # Ensure nested groups inherit this behavior diff --git a/rapida/cli/ntl.py b/rapida/cli/ntl.py index d19a815..3b596f3 100644 --- a/rapida/cli/ntl.py +++ b/rapida/cli/ntl.py @@ -3,10 +3,12 @@ from datetime import date import click from rapida.cli import RapidaCommandGroup -from rapida.ntl.nasa.const import ARCHIVE, OPERATIONAL, PROCESSING_LEVEL_NAMES +from rapida.ntl.nasa.const import ARCHIVE, OPERATIONAL, PROCESSING_LEVEL_NAMES, PRODUCT_NAMES from rapida.ntl.nasa.search import search as nasa_search from rapida.ntl.noaa.search import async_search_granules, VIIRSNavigator from rapida.util.bbox_param_type import BboxParamType +from rapida.ntl.nasa.io import download as download_from_nasa + from rich.table import Table logger = logging.getLogger(__name__) @@ -37,12 +39,13 @@ def handle_parse_result(self, ctx, opts, args): def ntl(): """Nighttime Lights VIIRS data and impact detection""" pass -@ntl.group(short_help=f'Search for available NTL data products across tiers and streams') + +@ntl.group(short_help=f'Search for available NTL data') def search(): """Search for available NTL data products across distinct data streams.""" pass -@search.command(name='noaa', short_help=f'Search for available NTL data from operational NOAA stream') +@search.command(name='noaa', short_help=f'Search for available NTL operational data from NOAA source') @click.option('-b', '--bbox', required=True, @@ -114,17 +117,17 @@ async def search_noaa(ctx, bbox:tuple[numbers.Number]=None, target_date:date=Non progress.console.print(f"\n[dim]Note: Each granule represents {1025 / 12:.2f}s of instrument data.[/dim]") -@search.command(name='nasa', short_help=f'Search for available NTL data from NASA science archive stream') +@search.command(name='nasa', short_help=f'Search for available NTL science data from NASA source') @click.option('-b', '--bbox', required=True, type=BboxParamType(), help='Bounding box xmin/west, ymin/south, xmax/east, ymax/north' ) -@click.option("--date", "target_date", +@click.option("--date", "nominal_date", type=click.DateTime(formats=["%Y-%m-%d"]), required=True, - help='' + help='The human experience of a specific night, local time zone matched to the center of bbox' ) @click.option( '-s', '--stream', @@ -144,32 +147,96 @@ async def search_noaa(ctx, bbox:tuple[numbers.Number]=None, target_date:date=Non ) @click.pass_context -def search_nasa(ctx, bbox:tuple[numbers.Number]=None, target_date:date=None, stream:str = None, processing_level:str=None): +def search_nasa(ctx, bbox:tuple[numbers.Number]=None, nominal_date:date=None, stream:str = None, processing_level:str=None): progress = ctx.obj.get('progress') - urls = nasa_search(processing_level=processing_level, target_date=target_date, + urls = nasa_search(processing_level=processing_level, nominal_date=nominal_date, bbox=bbox, stream=stream, progress=progress) if urls: - table = Table(title=f" {processing_level} VIIRS satellites tiles for the night of {target_date.date()} covering {bbox}", + table = Table(title=f" {processing_level} VIIRS satellites tiles for the night of {nominal_date.date()}-{nominal_date.strftime('%Y%j')} covering {bbox}", title_style="bold yellow") table.add_column("Product", style="red", justify='center') + table.add_column("Timestamp", style="red", justify='center') + table.add_column("Tile", style="red", justify='center') table.add_column("URI", style="green", justify='center') - for url in urls: - table.add_row(*url) + for e in urls: + table.add_row(*e) progress.console.print(table) -@ntl.command(short_help=f'Download selected NTL data') -async def download(): - logger.info('Downloading NTL') -@ntl.command(short_help=f'Execute crisis impact detection (48h Alerts / 72h Assessments)') -async def detect(): - logger.info('Detecting impact on the ground') + +@ntl.group(short_help=f'Download NTL data ') +def download(): + pass + + + +@download.command(name='nasa', short_help=f'Download NTL products from NASA') +# +# @click.option('-b', '--bbox', +# required=True, +# type=BboxParamType(), +# help='Bounding box xmin/west, ymin/south, xmax/east, ymax/north' +# ) +@click.option( "-t", "--timestamp", "timestamp", + type=str, + required=True, + help='Granule timestamp string as date and time. Ex: 202604152232 ' + ) +@click.option( + "-p", + "product", + type=click.Choice(PRODUCT_NAMES, case_sensitive=True), + required=True, + help=f'The product to download.' + + ) + +@click.option( + "--dest-dir", + "-d", # Short option + "dest_dir", # Function argument name + type=click.Path( + exists=False, # Set to True if you want Click to fail if the dir doesn't exist yet + file_okay=False, # Strictly enforce that this is a directory, not a file + dir_okay=True, + resolve_path=True # Resolves relative paths (like '.') to absolute paths automatically + ), + default="/tmp", # Defaults to the current working directory + show_default=True, # Tells the user what the default is in the --help menu + help="Destination directory to save the downloaded the images." +) + + +@click.pass_context +async def download_nasa(ctx, bbox:tuple[numbers.Number]=None, timestamp:str = None, product:str=None, dest_dir:str=None): + progress = ctx.obj.get('progress') + + downloaded_files = await download_from_nasa(timestamp=timestamp, product=product, dst_dir=dest_dir,progress=progress) + + + + +@download.command(name='noaa', short_help=f'Download operational NTL data from NOAA') + + +@click.pass_context +async def download_noaa(ctx, ): + logger.info('Downloading from NOAA') + + + -@ntl.command(short_help=f'Track long-term resilience and recovery curves (2-3 Week horizon)') -async def monitor(): - logger.info('Monitoring recovery') +# @ntl.command(short_help=f'Execute crisis impact detection (48h Alerts / 72h Assessments)') +# @click.pass_context +# async def detect(ctx): +# logger.info('Detecting impact on the ground') +# +# +# @ntl.command(short_help=f'Track long-term resilience and recovery curves (2-3 Week horizon)') +# async def monitor(): +# logger.info('Monitoring recovery') diff --git a/rapida/ntl/cache.py b/rapida/ntl/cache.py new file mode 100644 index 0000000..a45f44b --- /dev/null +++ b/rapida/ntl/cache.py @@ -0,0 +1,49 @@ +import shelve +import time +import os +import tempfile +import hashlib +import json + + + +MAX_AGE_SECONDS = 6 * 3600 # 6 hours +CACHE_PATH = os.path.join(tempfile.gettempdir(), "ntl_search_cache") + + + +def search_id(search_params: dict) -> str: + """Generates a deterministic unique ID based on the STAC search parameters.""" + # Sort the dictionary keys to ensure the same parameters always produce the exact same hash + param_string = json.dumps(search_params, sort_keys=True) + return hashlib.md5(param_string.encode('utf-8')).hexdigest() + + + +def store(key:str=None, url:str=None, tile:str=None, cache_path=CACHE_PATH): + with shelve.open(cache_path) as cache: + record = cache.get(key, None) + if record is None: + record = {tile:url}, time.time() + else: + record[0].update({tile:url}) + cache[key] = record + + + + + +def get_urls(key:str=None, tile:str=None, cache_path=CACHE_PATH): + with shelve.open(cache_path) as cache: + record = cache.get(key, None) + if record is None: + return + for tiles, creation_time in record: + # Invalidate purely on read + if time.time() - creation_time > MAX_AGE_SECONDS: + del cache[key] + return # Expired + if tile and tile in tiles: + return tiles[tile] + return tiles + diff --git a/rapida/ntl/nasa/const.py b/rapida/ntl/nasa/const.py index 9c7262e..63a71d9 100644 --- a/rapida/ntl/nasa/const.py +++ b/rapida/ntl/nasa/const.py @@ -1,46 +1,47 @@ import json from pystac_client import Client - +import re PRODUCT = 46 COLLECTIONS_STRING = \ ''' { - "LANCEMODIS": { - "A1": [ - "VJ146A1_NRT_2", - "VNP46A1_NRT_1", - "VNP46A1_NRT_2" - ], - "A1G": [ - "VJ146A1G_NRT_2", - "VNP46A1G_NRT_2", - "VNP46A1G_NRT_1" - ], - "A2": [ - "VNP46A2_NRT_2" - ] - }, - "LAADS": { - "A1": [ - "VJ146A1_2", - "VNP46A1_2" - ], - "A2": [ - "VJ146A2_2", - "VNP46A2_2" - ], - "A3": [ - "VJ146A3_2", - "VNP46A3_2", - "VNP46A3_1" - ], - "A4": [ - "VJ146A4_2", - "VNP46A4_2", - "VNP46A4_1" - ] - } + "LANCEMODIS": { + "A1": [ + "VJ146A1_NRT_2", + "VNP46A1_NRT_1", + "VNP46A1_NRT_2" + ], + "A2": [ + "VJ146A2_NRT_2", + "VNP46A2_NRT_2" + ], + "A1G": [ + "VJ146A1G_NRT_2", + "VNP46A1G_NRT_2", + "VNP46A1G_NRT_1" + ] + }, + "LAADS": { + "A1": [ + "VJ146A1_2", + "VNP46A1_2" + ], + "A2": [ + "VJ146A2_2", + "VNP46A2_2" + ], + "A3": [ + "VJ146A3_2", + "VNP46A3_2", + "VNP46A3_1" + ], + "A4": [ + "VJ146A4_2", + "VNP46A4_2", + "VNP46A4_1" + ] + } } ''' COLLECTIONS = json.loads(COLLECTIONS_STRING) @@ -63,6 +64,19 @@ PROCESSING_LEVELS = {stream_name: list(stream_data.keys()) for stream_name, stream_data in COLLECTIONS.items()} PROCESSING_LEVEL_NAMES = {CATALOG2STREAM[stream_name]: list(stream_data.keys()) for stream_name, stream_data in COLLECTIONS.items()} +NTL_FILENAME_PATTERN = re.compile( + r"^(?PV[A-Z0-9_]+)\." + r"A(?P\d{4})(?P\d{3})\." + r"(?:(?P