Merge pull request #1700 from bhilbert4/wisp-finder-del-figs

mfixstsci · web-flow · commit 9ca5373f2dce · 2025-09-30T14:36:56.000-04:00
wisp finder - nans in png, close figures
diff --git a/jwql/instrument_monitors/nircam_monitors/prepare_wisp_pngs.py b/jwql/instrument_monitors/nircam_monitors/prepare_wisp_pngs.py
@@ -7,6 +7,7 @@
 
 import argparse
 import numpy as np
+from astropy.convolution import Gaussian2DKernel, interpolate_replace_nans
 from astropy.io import fits
 from astropy.stats import sigma_clipped_stats
 import os
@@ -28,6 +29,28 @@ def create_figure(image, outfile):
     plt.imshow(image, origin='lower')
     plt.axis('off')
     plt.savefig(outfile, bbox_inches='tight')
+    plt.close('all')
+
+
+def fill_nan_with_nearest_neighbor(arr):
+    """
+    Replaces NaN values in a 2D NumPy array with values interpolated
+    from the nearest non-NaN neighbors.
+
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        A 2D NumPy array potentially containing NaN values.
+
+    Returns
+    -------
+    filled_arr : numpy.ndarray
+        A new array with NaNs replaced by nearest neighbor interpolation.
+    """
+    kernel = Gaussian2DKernel(x_stddev=1, y_stddev=1)
+    filled_arr = interpolate_replace_nans(arr, kernel)
+
+    return filled_arr
 
 
 def rescale_array(arr):
@@ -126,6 +149,9 @@ def run(filename, out_dir=None):
     """
     data = fits.getdata(filename)
 
+    # Replace NaN values with interpolated values from nearest neighbors
+    data = fill_nan_with_nearest_neighbor(data)
+
     # Get the basename of the input file. This will be used to create
     # the output png file name
     outfile_base = os.path.basename(filename).split('.')[0]
diff --git a/jwql/instrument_monitors/nircam_monitors/wisp_finder.py b/jwql/instrument_monitors/nircam_monitors/wisp_finder.py
@@ -164,7 +164,7 @@ def define_model_architecture():
 def define_options(parser=None, usage=None, conflict_handler='resolve'):
     """Add command line options
 
-    Parrameters
+    Parameters
     -----------
     parser : argparse.parser
         Parser object
@@ -265,7 +265,7 @@ def predict_wisp(model, image_path, transform):
     probability = torch.sigmoid(output).item()
     threshold = 0.5
     prediction_label = "wisp" if probability >= threshold else "no wisp"
-    return prediction_label
+    return prediction_label, probability, threshold
 
 
 def preprocess_image(image_path, transform):
@@ -308,6 +308,7 @@ def query_mast(starttime, endtime):
     rate_files : list
         List of filenames
     """
+    logging.info("Running sci_obs_id query")
     sci_obs_id_table = Observations.query_criteria(instrument_name=["NIRCAM/IMAGE"],
                                                    provenance_name=["CALJWST"],  # Executed observations
                                                    t_min=[starttime, endtime]
@@ -317,16 +318,18 @@ def query_mast(starttime, endtime):
 
     # Loop over visits identifying uncalibrated files that are associated
     # with them
-    for exposure in (sci_obs_id_table):
+    for i, exposure in enumerate(sci_obs_id_table):
         products = Observations.get_product_list(exposure)
         filtered_products = Observations.filter_products(products,
                                                          productType='SCIENCE',
                                                          productSubGroupDescription='RATE',
                                                          calib_level=[2])
+        logging.info(f"\tExpore {i+1} of {len(sci_obs_id_table)}: {len(products)} products filters to {len(filtered_products)} rate files")
         sci_files_to_download.extend(filtered_products['dataURI'])
 
     # The current ML wisp finder model is only trained for the wisps on the B4 detector,
     # so keep only those files. Also, keep only the filenames themselves.
+    logging.info(f"Sorting {len(sci_files_to_download)} rate files")
     rate_files = sorted([fname.replace('mast:JWST/product/', '') for fname in sci_files_to_download if 'nrcb4' in fname])
     return rate_files
 
@@ -477,10 +480,12 @@ def run_predictor(ratefiles, model_file, start_date, end_date):
 
         # Remove any duplicates coming from files that are present in both the
         # public and proprietary filesystems
+        n_filepaths_before = len(filepaths)
         filepaths = remove_duplicate_files(filepaths)
+        n_filepaths_after = len(filepaths)
 
         # Copy files to working directory
-        logging.info("Copying files from the filesystem to the working directory.")
+        logging.info(f"Copying {n_filepaths_after} files from the filesystem to the working directory (removed {n_filepaths_before - n_filepaths_after} duplicates).")
         working_filepaths = copy_files_to_working_dir(filepaths)
 
         # Load the trained ML model
@@ -497,20 +502,20 @@ def run_predictor(ratefiles, model_file, start_date, end_date):
             png_filename = prepare_wisp_pngs.run(working_filepath, out_dir=working_dir)
 
             # Predict
-            prediction = predict_wisp(model, png_filename, transform)
+            prediction, probability, threshold = predict_wisp(model, png_filename, transform)
 
             # If a wisp is predicted, set the wisp flag in the anomalies database
             if prediction == "wisp":
                 # Create the rootname. Strip off the path info, and remove '.fits' and the suffix
                 # (i.e. 'rate'')
                 rootfile = '_'.join(os.path.basename(working_filepath).split('.')[0].split('_')[0:-1])
-                logging.info(f"\tFound wisp in {rootfile}\n")
+                logging.info(f"\tFound wisp in {rootfile} (probability {probability} < threshold {threshold})\n\n")
 
                 # Add the wisp flag to the RootFileInfo object for the rootfile
                 add_wisp_flag(rootfile)
             else:
                 rootfile = '_'.join(os.path.basename(working_filepath).split('.')[0].split('_')[0:-1])
-                logging.info(f'\tNo wisp in {rootfile}\n')
+                logging.info(f'\tNo wisp in {rootfile} (probability {probability} < threshold {threshold})\n')
 
             # Delete the png and fits files
             os.remove(png_filename)
diff --git a/jwql/utils/constants.py b/jwql/utils/constants.py
@@ -720,7 +720,8 @@
     "niriss_readnoise_query_history", "niriss_readnoise_stats",
     "nirspec_readnoise_query_history", "nirspec_readnoise_stats",
     "miri_ta_query_history", "miri_ta_stats",
-    "nirspec_ta_query_history", "nirspec_ta_stats", "nirspec_wata_stats", "nirspec_msata_stats"
+    "nirspec_ta_query_history", "nirspec_ta_stats", "nirspec_wata_stats", "nirspec_msata_stats",
+    "wisp_finder_b4_query_history"
 ]
 
 # Suffix for msa files
diff --git a/jwql/website/apps/jwql/monitor_models/common.py b/jwql/website/apps/jwql/monitor_models/common.py
@@ -149,8 +149,8 @@ class Meta:
     * Django doesn't have a built-in array data type, so you need to import it from the
       database-compatibility layers. The ArrayField takes, as a required argument, the type
       of data that makes up the array.
-    * In the Meta sub-class of the monitor class, the `db_table_comment = 'monitors'` statement is
-      required so that django knows that the model should be stored in the monitors table.
+    * In order to store a table in the Monitors database (JWQLDB), you must add that table's name
+      (Meta.db_table) to the MONITOR_TABLE_NAMES constant in jwql.utils.constants.py
     * The `float()` casts are required because the database interface doesn't understand
       numpy data types.
     * The `list()` cast is required because the database interface doesn't understand the

Original file line number	Diff line number	Diff line change
`@@ -720,7 +720,8 @@`
`720`	`720`	`"niriss_readnoise_query_history", "niriss_readnoise_stats",`
`721`	`721`	`"nirspec_readnoise_query_history", "nirspec_readnoise_stats",`
`722`	`722`	`"miri_ta_query_history", "miri_ta_stats",`
`723`		`- "nirspec_ta_query_history", "nirspec_ta_stats", "nirspec_wata_stats", "nirspec_msata_stats"`
	`723`	`+ "nirspec_ta_query_history", "nirspec_ta_stats", "nirspec_wata_stats", "nirspec_msata_stats",`
	`724`	`+ "wisp_finder_b4_query_history"`
`724`	`725`	`]`
`725`	`726`
`726`	`727`	`# Suffix for msa files`