use nsmap instead of long manual urls

isce-framework · scottstanie · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022
commit 2382df431c8709ca408e1a1a11c686070ba3a488
diff --git a/src/s1reader/get_xml_boundary.py b/src/s1reader/get_xml_boundary.py
@@ -170,11 +170,58 @@ def run_asfsmd(safe_list, pol="vv", out_name="sentinel1_boundaries_asfsmd.csv"):
                 )
                 out_dict["safe_path"].append(safe_path)
     df = pd.DataFrame(out_dict)
+    df = df.sort_values("burst_id").reset_index(drop=True)
     if out_name:
         df.to_csv(out_name, index=False)
     df["boundary"] = df["boundary_wkt"].apply(shapely.wkt.loads)
-    return df, processed_safes
+    return df
+
+
+def run_asfsmd_parallel(safe_list, pol="vv", out_name="sentinel1_boundaries_asfsmd.csv"):
+    # out_dict = {"burst_id": [], "boundary_wkt": [], "safe_path": []}
+    from concurrent.futures import ProcessPoolExecutor, as_completed
+    all_results = []
+    with ProcessPoolExecutor(max_workers=10) as exc:
+        futures = [
+            exc.submit(_run_safe, safe_name, pol) for safe_name in safe_list
+        ]
+        for fut in as_completed(futures):
+            all_results.extend(fut.result())
+
+    df = pd.DataFrame(all_results, columns=["burst_id", "boundary_wkt", "safe_path"])
+    df = df.sort_values("burst_id").reset_index(drop=True)
+    if out_name:
+        df.to_csv(out_name, index=False)
+    df["boundary"] = df["boundary_wkt"].apply(shapely.wkt.loads)
+    return df
+
 
+def _run_safe(safe_path, pol="vv"):
+    safe_path = os.path.abspath(safe_path)
+    try:
+        bursts = [
+            load_bursts(
+                safe_path,
+                orbit_path=None,
+                swath_num=i,
+                pol=pol,
+                flag_apply_eap=False,
+            )
+            for i in [1, 2, 3]
+        ]
+    except Exception as e:
+        print(f"Failed to load {safe_path}: {e}")
+        return []
+
+    out = []
+    for iw_bs in bursts:
+        for b in iw_bs:
+            out.append((
+                b.burst_id, 
+                shapely.wkt.dumps(MultiPolygon(b.border)),
+                safe_path
+            ))
+    return out
 
 def as_datetime(t_str):
     """Parse given time string to datetime.datetime object.
@@ -334,15 +381,25 @@ def transform(geom, src_epsg, dst_epsg):
 
 def compare_boundaries(df, db_path=None):
     results = []
+    failed_idxs = []
     if db_path is None:
         db_path = "/home/staniewi/dev/burst_map_margin4000.sqlite3"
     query = "SELECT epsg, asbinary(geometry) FROM burst_id_map WHERE burst_id_jpl = ?"
     with sqlite3.connect(db_path) as con:
         con.enable_load_extension(True)
         con.load_extension("mod_spatialite")
-        for bid in df.burst_id:
+        for idx, bid in enumerate(df.burst_id):
             cur = con.execute(query, (bid,))
-            results.append(cur.fetchone())
+            r = cur.fetchone()
+            if r is None:
+                failed_idxs.append(idx)
+            else:
+                results.append(r)
+
+    bad_df = df.index.isin(failed_idxs)
+    df_failed = df.loc[bad_df].copy()
+    print(f"Failed to find {len(df_failed)} bursts")
+    df = df.loc[~bad_df]
 
     epsgs, db_geoms_wkb = zip(*results)
     df["db_boundary"] = [shapely.wkb.loads(g) for g in db_geoms_wkb]
@@ -368,7 +425,9 @@ def compare_boundaries(df, db_path=None):
     df["ymaxs"] = bounds_db[:, 3] - bounds_actual[:, 3]
     df = df.drop(["boundary_wkt"], axis=1, errors="ignore")
     df["iou"] = df.apply(lambda row: iou(row.boundary_utm, row.db_boundary_utm), axis=1)
-    return df
+    mismatches = df["iou"] < 0.60
+    print(f"Found {mismatches.sum()} mismatches")
+    return df, df_failed.reset_index()
 
 
 def iou(geom1, geom2):

diff --git a/src/s1reader/s1_reader.py b/src/s1reader/s1_reader.py
@@ -268,30 +268,31 @@ def get_ipf_version(tree: ET):
     '''Extract the IPF version from the ET of manifest.safe
     '''
     # get version from software element
-    search_term = _get_manifest_pattern(tree, ['processing', 'facility', 'software'])
-    software_elem = tree.find(search_term)
+    search_term, nsmap = _get_manifest_pattern(tree, ['processing', 'facility', 'software'])
+    software_elem = tree.find(search_term, nsmap)
     ipf_version = version.parse(software_elem.attrib['version'])
 
     return ipf_version
 
-def get_start_end_track(manifest_tree: ET):
+def get_start_end_track(tree: ET):
     '''Extract the start/end relative orbits from manifest.safe file'''
-    search_term = _get_manifest_pattern(manifest_tree, ['orbitReference', 'relativeOrbitNumber'])
-    elem_start, elem_end = manifest_tree.findall(search_term)
+    search_term, nsmap = _get_manifest_pattern(tree, ['orbitReference', 'relativeOrbitNumber'])
+    elem_start, elem_end = tree.findall(search_term, nsmap)
     return int(elem_start.text), int(elem_end.text)
 
 
 def _get_manifest_pattern(tree: ET, keys: list):
-    '''Extract data from the ET of manifest.safe'''
+    '''Get the search path to extract data from the ET of manifest.safe'''
+    # Get the namespace from the root element to avoid full urls
+    try:
+        nsmap = tree.nsmap
+    except AttributeError:
+        nsmap = tree.getroot().nsmap
     # path to xmlData in manifest
     xml_meta_path = 'metadataSection/metadataObject/metadataWrap/xmlData'
+    safe_terms = "/".join([f'safe:{key}' for key in keys])
+    return f'{xml_meta_path}/{safe_terms}', nsmap
 
-    # piecemeal build path to nested data
-    esa_http = '{http://www.esa.int/safe/sentinel-1.0}'
-    search_term = xml_meta_path
-    for k in keys:
-        search_term += f'/{esa_http}{k}'
-    return search_term
 
 def get_path_aux_cal(directory_aux_cal: str, str_annotation: str):
     '''
@@ -890,7 +891,16 @@ def get_burst_id(sensing_time: datetime.datetime, ascending_node_dt: datetime.da
     has_anx_crossing = end_track == (start_track % 175) + 1
     time_since_anx = (sensing_time - ascending_node_dt).total_seconds()
 
-    if (time_since_anx - T_orb) > T_beam:
+    # Note on adjacent burst times:
+    # IW1 -> IW2 takes ~0.83220 seconds
+    # IW2 -> IW3 takes ~1.07803 seconds
+    # IW3 -> IW1 takes ~0.84803 seconds
+    # cumulative_burst_time = np.cumsum([0, 0.832, 1.078, 0.848])[:3]
+    swath_num = int(subswath_name[-1])
+    cumulative_burst_time = [0.0, 0.832, 1.91]
+    max_time_past_anx = cumulative_burst_time[swath_num - 1]
+
+    if (time_since_anx - T_orb) > max_time_past_anx:
         if not has_anx_crossing:
             # Additional exception for scenes which have an ascending node
             # provided that's more than 1 orbit in the past
@@ -904,3 +914,4 @@ def get_burst_id(sensing_time: datetime.datetime, ascending_node_dt: datetime.da
     esa_burst_id = 1 + int(np.floor((dt_b - T_pre) / T_beam))
     # Form the unique JPL ID by combining track/burst/swath
     return f't{track_number:03d}_{esa_burst_id:06d}_{subswath_name.lower()}'
+
diff --git a/tests/data/README.md b/tests/data/README.md
@@ -16,10 +16,7 @@ Example datasets:
 - S1A_IW_SLC__1SDV_20221024T184148_20221024T184218_045587_05735F_D6E2.zip
   - Equator ascending data which contains a node crossing. The final bursts are a different track than initial bursts.
   - orbit file was downloaded and OSV list truncated for space
-  - Added by Scott Staniewicz
-
-This one was created using `asfsmd`:
-
+  - This one was created using `asfsmd`:
 ```bash
 asfsmd S1A_IW_SLC__1SDV_20221024T184148_20221024T184218_045587_05735F_D6E2 --do-noise --do-cal -iw 2
 ```

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -10,6 +10,7 @@
 from s1reader.s1_reader import get_start_end_track
 
 BURST_ID_PAT = r"t(?P<track>\d{3})_(?P<burst_id>\d{6})_iw(?P<subswath_num>[1-3])"
+# Test cases where the normal relative orbit calculation fails
 ORBIT_EXCEPTIONS = [
     "S1A_IW_SLC__1SDV_20141004T031312_20141004T031339_002674_002FB6_07B5.zip",
 ]
@@ -28,7 +29,7 @@ def test_all_files(test_paths, esa_burst_db):
 
 def test_orbit_exception(test_paths):
     """Check the 2014 test case where the track number is different.
-    
+
     See https://forum.step.esa.int/t/sentinel-1-relative-orbit-from-filename/7042/11
     """
     zip_path = test_paths.data_dir / ORBIT_EXCEPTIONS[0]