diff --git a/modules/SimpleSLAM/LICENSE b/modules/SimpleSLAM/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/modules/SimpleSLAM/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/modules/SimpleSLAM/README.md b/modules/SimpleSLAM/README.md new file mode 100644 index 0000000000..e046d1cea4 --- /dev/null +++ b/modules/SimpleSLAM/README.md @@ -0,0 +1,180 @@ +# Structure From Motion (SfM) - README + + +https://github.com/user-attachments/assets/f489a554-299e-41ad-a4b4-436e32d8cbd5 + + +## IMPORTANT - LightGlue Installation- +- 1st do this +``` +git clone https://github.com/cvg/LightGlue.git && cd LightGlue +python -m pip install -e . +``` +- 2nd download data + - https://rpg.ifi.uzh.ch/docs/teaching/2024/kitti05.zip + + - put it in folder named 'Dataset' + +- 3rd + - intall ```requirements.txt``` + - ```bash scripts/run_tracker_visualization.sh``` + +## INSTALLING COLMAP +![Image showing commands](media/colmap_instruc.png) +use this command - ```ninja -j4 -l 8 > build.log 2>&1``` + + +for pycolmap - https://colmap.github.io/pycolmap/index.html#pycolmap-index + +refer the image if when running the command it freezes and crashes +![pycolmap instructions- Image showing commands](media/pycolmap_instruc.png) + + +## Overview + +This repository contains two Python scripts that demonstrate basic Structure-from-Motion (SfM) pipelines: + +1. **sfm.py** + - A more classical approach that uses standard OpenCV feature detectors (e.g., SIFT, ORB, AKAZE) and BFMatcher or FLANN to match keypoints between images. + - Performs pose estimation (essential matrix or PnP) and triangulation to build a sparse map of 3D points. + - Uses optional non-linear refinement via scipy’s least squares to improve the estimated camera pose. + +2. **sfm_lightglue_aliked.py** + - An enhanced pipeline that integrates neural network-based feature extraction (**ALIKED**) and feature matching (**LightGlue**). + - Demonstrates how modern, learned feature detectors and matchers can improve keypoint reliability and reduce drift. + - Also includes the same core SfM steps (pose estimation, triangulation, optional non-linear refinement). + - Tracks a simple **Absolute Trajectory Error (ATE)** and accumulates a **cumulative translation error** for quick performance checks. + +Both scripts are **prototypes** designed primarily for **concept validation and experimentation**. For real-time, production-grade implementations, it’s recommended to integrate a C++ back end (e.g., [Ceres Solver](https://github.com/ceres-solver/ceres-solver)) for optimization and manage heavy-lifting tasks in a more performant environment. + +--- + +## Features + +### Common SfM Steps +- **Dataset Loading** (KITTI, Malaga, or custom folder with images). +- **Camera Calibration** for loading intrinsic/extrinsic parameters. +- **Feature Extraction** + - sfm.py: classical (SIFT, ORB, AKAZE) + - sfm_lightglue_aliked.py: ALIKED (learned keypoints + descriptors) +- **Feature Matching** + - sfm.py: BFMatcher or FLANN + - sfm_lightglue_aliked.py: LightGlue (neural network-based matching) +- **Motion Estimation** + - 2D-2D with essential matrix. + - 2D-3D with PnP (once 3D map points are available). +- **Triangulation** + - Convert 2D matches into 3D points. +- **Non-linear Refinement** + - Uses scipy’s Levenberg-Marquardt (`least_squares`) to minimize reprojection error. +- **Basic Stereo Handling** (KITTI, Malaga) + - Combine left and right images for better scale recovery if stereo calibration is present. +- **Trajectory Evaluation** + - **ATE** (Absolute Trajectory Error) if ground truth is available. + - A simple “cumulative translation error” measure. + +--- + +## Requirements + +- **Python 3.7+** +- **OpenCV** (>= 4.x recommended) +- **NumPy** +- **Matplotlib** (for visualization) +- **scipy** (for non-linear refinement) +- **tqdm** (for progress bars) +- **PyTorch** (only required for sfm_lightglue_aliked.py, if using LightGlue + ALIKED) +- **lightglue** (the Python package for the LightGlue matching framework) + +--- + +## Usage + +### 1. Cloning & Installation +1. Clone this repository: + ```bash + git clone https://github.com/your-organization/your-repo.git + ``` +2. Install dependencies: + ```bash + pip install -r requirements.txt + ``` + Or individually: + ```bash + pip install opencv-python numpy matplotlib scipy tqdm torch + # plus LightGlue if not already installed + ``` + +### 2. Running **sfm.py** +```bash +python sfm.py --dataset kitti --data_path ./Dataset/kitti +``` +- **Arguments**: + - `--dataset`: name of the dataset (kitti, malaga, or custom). + - `--data_path`: path to the dataset folder. +- **Behavior**: + - Loads images, performs feature detection + matching (SIFT, ORB, AKAZE), estimates camera motion, triangulates points. + - Optionally runs non-linear refinement on the pose. + - Plots or logs the results (trajectory, errors). + +*(Adjust arguments to match your own script’s CLI if needed.)* + +### 3. Running **sfm_lightglue_aliked.py** +```bash +python sfm_lightglue_aliked.py --dataset kitti --data_path ./Dataset/kitti --use_lightglue True +``` +- **Arguments**: + - `--dataset`: name of the dataset (kitti, malaga, or custom). + - `--data_path`: path to the dataset folder. + - `--use_lightglue`: enable or disable ALIKED + LightGlue pipeline. +- **Behavior**: + - Loads images, runs ALIKED for feature extraction, and LightGlue for matching (GPU if available). + - Estimates camera motion, triangulates points, performs non-linear refinement if configured. + - Computes: + - **ATE** (Absolute Trajectory Error) + - A “cumulative translation error” measure + - Optionally displays debug visualizations (keypoints, matches, trajectory). + +### 4. Visualizations +- **Matplotlib** windows may pop up showing: + - Keypoints and matches for consecutive frames. + - The evolution of the 3D point cloud (if any). + - The camera’s estimated trajectory vs. ground truth (if available). + +### 5. Customization +- Modify `THUMPUP_POS_THRESHOLD` and `THUMPUP_ROT_THRESHOLD` for keyframe selection. +- Tweak the **maximum keypoints** or **confidence** in `ALIKED` or **LightGlue** for performance vs. accuracy trade-offs. +- Adjust RANSAC thresholds or non-linear refinement parameters (in `refine()` method) for more robust or faster solutions. + +--- + +## Implementation Details + +- **sfm.py** + - Uses OpenCV for feature detection (SIFT, ORB, or AKAZE). + - BFMatcher or FLANN for matching. + - Essential matrix / PnP for pose. + - Minimal keyframe selection logic. + +- **sfm_lightglue_aliked.py** + - ALIKED for learned keypoints + descriptors, LightGlue for matching. + - Similar pose estimation logic (PnP, essential matrix). + - Triangulation + refinement steps are nearly the same. + - Typically yields more reliable matches and lower drift. + +- **Stereo** logic (KITTI, Malaga) uses left/right cameras for absolute scale. +- **Monocular** is scale-invariant and can produce an arbitrary scale. +- **Error Metrics**: + - **ATE**: Norm of translation difference from ground truth. + - **Cumulative translation error**: Summation of frame-by-frame translation offsets. + +--- + +## Performance & Future Directions + +- **Python Prototyping**: Great for algorithmic experimentation but can be slower for large-scale or real-time tasks. +- **Production-Grade**: Offload heavy steps (bundle adjustment, large-scale optimization) to C++. +- **Loop Closure & Full SLAM**: These scripts focus on **Visual Odometry**. Future expansions may include place recognition, pose graph optimization, etc. + +--- + diff --git a/modules/SimpleSLAM/config/calibrate_camera/calibration.pkl b/modules/SimpleSLAM/config/calibrate_camera/calibration.pkl new file mode 100644 index 0000000000..9d15f007d0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/calibration.pkl differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/calibration.py b/modules/SimpleSLAM/config/calibrate_camera/calibration.py new file mode 100644 index 0000000000..fe42ae17e9 --- /dev/null +++ b/modules/SimpleSLAM/config/calibrate_camera/calibration.py @@ -0,0 +1,137 @@ +import numpy as np +import cv2 as cv +import glob +import pickle + + + +################ FIND CHESSBOARD CORNERS - OBJECT POINTS AND IMAGE POINTS ############################# + +chessboardSize = (9,6) +# frameSize = (640,480) +image_size = None + +# termination criteria +criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001) + + +# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0) +objp = np.zeros((chessboardSize[0] * chessboardSize[1], 3), np.float32) +objp[:,:2] = np.mgrid[0:chessboardSize[0],0:chessboardSize[1]].T.reshape(-1,2) + +size_of_chessboard_squares_mm = 20 +objp = objp * size_of_chessboard_squares_mm + + +# Arrays to store object points and image points from all the images. +objpoints = [] # 3d point in real world space +imgpoints = [] # 2d points in image plane. + + +images = glob.glob('./images/*.png') + +for image in images: + + img = cv.imread(image) + gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) + + # Set image_size based on the first image + if image_size is None: + image_size = gray.shape[::-1] + + + # Find the chess board corners + ret, corners = cv.findChessboardCorners(gray, chessboardSize, None) + + # If found, add object points, image points (after refining them) + if ret == True: + + objpoints.append(objp) + corners2 = cv.cornerSubPix(gray, corners, (11,11), (-1,-1), criteria) + imgpoints.append(corners) + + # Draw and display the corners + cv.drawChessboardCorners(img, chessboardSize, corners2, ret) + # cv.imshow('img', img) + # cv.waitKey(1000) + + +cv.destroyAllWindows() + + + + +############## CALIBRATION ####################################################### + +# Ensure that image_size has been set +if image_size is not None: + ret, cameraMatrix, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, image_size, None, None) +else: + raise Exception("Error: No images were processed for calibration.") + +# Save the camera calibration result for later use (we won't worry about rvecs / tvecs) +pickle.dump((cameraMatrix, dist), open( "calibration.pkl", "wb" )) +pickle.dump(cameraMatrix, open( "cameraMatrix.pkl", "wb" )) +pickle.dump(dist, open( "dist.pkl", "wb" )) + +# ###########PRINT PARAMETERS############################# +# Print the camera matrix +print("Camera Matrix:") +print(cameraMatrix) + +# Print the distortion coefficients +print("\nDistortion Coefficients:") +print(dist) + +# # Print the rotation vectors +# print("\nRotation Vectors:") +# for i, rvec in enumerate(rvecs): +# print(f"Image {i+1}:") +# print(rvec) + +# # Print the translation vectors +# print("\nTranslation Vectors:") +# for i, tvec in enumerate(tvecs): +# print(f"Image {i+1}:") +# print(tvec) + + +# ############## UNDISTORTION ##################################################### + +# img = cv.imread('cali5.png') +# h, w = img.shape[:2] +# newCameraMatrix, roi = cv.getOptimalNewCameraMatrix(cameraMatrix, dist, (w,h), 1, (w,h)) + + + +# # Undistort +# dst = cv.undistort(img, cameraMatrix, dist, None, newCameraMatrix) + +# # crop the image +# x, y, w, h = roi +# dst = dst[y:y+h, x:x+w] +# cv.imwrite('caliResult1.png', dst) + + + +# # Undistort with Remapping +# mapx, mapy = cv.initUndistortRectifyMap(cameraMatrix, dist, None, newCameraMatrix, (w,h), 5) +# dst = cv.remap(img, mapx, mapy, cv.INTER_LINEAR) + +# # crop the image +# x, y, w, h = roi +# dst = dst[y:y+h, x:x+w] +# cv.imwrite('caliResult2.png', dst) + + + + +# Reprojection Error +mean_error = 0 + +for i in range(len(objpoints)): + imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], cameraMatrix, dist) + error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2)/len(imgpoints2) + mean_error += error + +print( "total error: {}".format(mean_error/len(objpoints)) ) diff --git a/modules/SimpleSLAM/config/calibrate_camera/cameraMatrix.pkl b/modules/SimpleSLAM/config/calibrate_camera/cameraMatrix.pkl new file mode 100644 index 0000000000..e4c1a2f062 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/cameraMatrix.pkl differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/dist.pkl b/modules/SimpleSLAM/config/calibrate_camera/dist.pkl new file mode 100644 index 0000000000..e68f6ef218 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/dist.pkl differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/getImages.py b/modules/SimpleSLAM/config/calibrate_camera/getImages.py new file mode 100644 index 0000000000..6022cf49d9 --- /dev/null +++ b/modules/SimpleSLAM/config/calibrate_camera/getImages.py @@ -0,0 +1,25 @@ +import cv2 + +cap = cv2.VideoCapture(2) + +num = 0 + +while cap.isOpened(): + + succes, img = cap.read() + + k = cv2.waitKey(5) + + if k == 27: + break + elif k == ord('s'): # wait for 's' key to save and exit + cv2.imwrite('images/img' + str(num) + '.png', img) + print("image saved!") + num += 1 + + cv2.imshow('Img',img) + +# Release and destroy all windows before termination +cap.release() + +cv2.destroyAllWindows() \ No newline at end of file diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0001.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0001.png new file mode 100644 index 0000000000..ca03e4cf07 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0001.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0002.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0002.png new file mode 100644 index 0000000000..e10588ecea Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0002.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0006.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0006.png new file mode 100644 index 0000000000..44444f4af5 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0006.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0009.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0009.png new file mode 100644 index 0000000000..8ed06231c3 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0009.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0012.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0012.png new file mode 100644 index 0000000000..9cea8e72e7 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0012.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0013.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0013.png new file mode 100644 index 0000000000..3151ee953f Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0013.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0014.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0014.png new file mode 100644 index 0000000000..69b7b43b41 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0014.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0016.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0016.png new file mode 100644 index 0000000000..61243fceef Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0016.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0018.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0018.png new file mode 100644 index 0000000000..4638656186 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0018.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0021.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0021.png new file mode 100644 index 0000000000..fa320c5979 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0021.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0025.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0025.png new file mode 100644 index 0000000000..64602b5255 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0025.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0029.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0029.png new file mode 100644 index 0000000000..174b1d0489 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0029.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0030.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0030.png new file mode 100644 index 0000000000..9a423ecc34 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0030.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0032.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0032.png new file mode 100644 index 0000000000..a4610a9b13 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0032.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0033.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0033.png new file mode 100644 index 0000000000..626fafc42f Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0033.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0034.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0034.png new file mode 100644 index 0000000000..97476fece8 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0034.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0036.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0036.png new file mode 100644 index 0000000000..bee1acd749 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0036.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0042.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0042.png new file mode 100644 index 0000000000..bfedab34c0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0042.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0044.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0044.png new file mode 100644 index 0000000000..5b0f48e641 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0044.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0045.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0045.png new file mode 100644 index 0000000000..5a96e94590 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0045.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0048.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0048.png new file mode 100644 index 0000000000..8371db0c95 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0048.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0049.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0049.png new file mode 100644 index 0000000000..408b53c56a Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0049.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0050.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0050.png new file mode 100644 index 0000000000..70a4ae65e0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0050.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0051.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0051.png new file mode 100644 index 0000000000..9bf7de7128 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0051.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0052.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0052.png new file mode 100644 index 0000000000..cfb31c568f Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0052.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0054.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0054.png new file mode 100644 index 0000000000..7f4a4c3ecb Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0054.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0055.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0055.png new file mode 100644 index 0000000000..70952068c0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0055.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0058.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0058.png new file mode 100644 index 0000000000..1c81661de8 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0058.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0062.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0062.png new file mode 100644 index 0000000000..b109d78d9b Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0062.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0065.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0065.png new file mode 100644 index 0000000000..d78ab5f5cb Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0065.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0069.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0069.png new file mode 100644 index 0000000000..89b0b7869c Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0069.png differ diff --git a/modules/SimpleSLAM/config/monocular.yaml b/modules/SimpleSLAM/config/monocular.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/config/stereo.yaml b/modules/SimpleSLAM/config/stereo.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/docs/system_design.md b/modules/SimpleSLAM/docs/system_design.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/media/Screenshot from 2025-06-25 20-23-36.png b/modules/SimpleSLAM/media/Screenshot from 2025-06-25 20-23-36.png new file mode 100644 index 0000000000..d117e7380a Binary files /dev/null and b/modules/SimpleSLAM/media/Screenshot from 2025-06-25 20-23-36.png differ diff --git a/modules/SimpleSLAM/media/colmap_instruc.png b/modules/SimpleSLAM/media/colmap_instruc.png new file mode 100644 index 0000000000..bb29c8acf4 Binary files /dev/null and b/modules/SimpleSLAM/media/colmap_instruc.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..24a7ec50b2 Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.20251.png b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.20251.png new file mode 100644 index 0000000000..ff002efebf Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.20251.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/kitt Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/kitt Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..0514bafc4d Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/kitt Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/lightglue -Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/lightglue -Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..a5db82cdc0 Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/lightglue -Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/lightglue Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/lightglue Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..1915546c89 Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/lightglue Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/lightglue malaga-Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/lightglue malaga-Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..8f7eb95adf Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/lightglue malaga-Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/output.mp4 b/modules/SimpleSLAM/media/output.mp4 new file mode 100644 index 0000000000..4f8d4e3a19 Binary files /dev/null and b/modules/SimpleSLAM/media/output.mp4 differ diff --git a/modules/SimpleSLAM/media/pyceres_pycolmap_works.png b/modules/SimpleSLAM/media/pyceres_pycolmap_works.png new file mode 100644 index 0000000000..3bfd4bfed5 Binary files /dev/null and b/modules/SimpleSLAM/media/pyceres_pycolmap_works.png differ diff --git a/modules/SimpleSLAM/media/pycolmap_instruc.png b/modules/SimpleSLAM/media/pycolmap_instruc.png new file mode 100644 index 0000000000..c4072de4f1 Binary files /dev/null and b/modules/SimpleSLAM/media/pycolmap_instruc.png differ diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/PKG-INFO b/modules/SimpleSLAM/opencv_simple_slam.egg-info/PKG-INFO new file mode 100644 index 0000000000..388be457dd --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/PKG-INFO @@ -0,0 +1,11 @@ +Metadata-Version: 2.1 +Name: opencv-simple-slam +Version: 0.1.0 +Summary: A simple feature-based SLAM using OpenCV and LightGlue +Home-page: UNKNOWN +License: UNKNOWN +Platform: UNKNOWN +License-File: LICENSE + +UNKNOWN + diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/SOURCES.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/SOURCES.txt new file mode 100644 index 0000000000..a1fbaf0920 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/SOURCES.txt @@ -0,0 +1,26 @@ +LICENSE +README.md +setup.py +opencv_simple_slam.egg-info/PKG-INFO +opencv_simple_slam.egg-info/SOURCES.txt +opencv_simple_slam.egg-info/dependency_links.txt +opencv_simple_slam.egg-info/requires.txt +opencv_simple_slam.egg-info/top_level.txt +slam/__init__.py +slam/core/__init__.py +slam/core/dataloader.py +slam/core/features_utils.py +slam/core/keyframe_utils.py +slam/core/landmark_utils.py +slam/core/multi_view_utils.py +slam/core/pnp_utils.py +slam/core/trajectory_utils.py +slam/core/visualization_utils.py +slam/monocular/__init__.py +slam/monocular/main.py +slam/monocular/main2.py +slam/stereo/ROUGHstereo_tracker.py +slam/stereo/__init__.py +tests/__init__.py +tests/test_lightglue_vs_manual.py +tests/test_multi_view_triangulation.py \ No newline at end of file diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/dependency_links.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/dependency_links.txt new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/requires.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/requires.txt new file mode 100644 index 0000000000..1710f46421 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/requires.txt @@ -0,0 +1,6 @@ +lightglue@ git+https://github.com/cvg/LightGlue.git +lz4 +numpy +opencv-python +torch +tqdm diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/top_level.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/top_level.txt new file mode 100644 index 0000000000..91fd4a6e69 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/top_level.txt @@ -0,0 +1,2 @@ +slam +tests diff --git a/modules/SimpleSLAM/refrences/ba.py b/modules/SimpleSLAM/refrences/ba.py new file mode 100644 index 0000000000..6ffd073f16 --- /dev/null +++ b/modules/SimpleSLAM/refrences/ba.py @@ -0,0 +1,79 @@ +import numpy as np +from scipy.sparse import lil_matrix +from scipy.optimize import least_squares + +def adjust_bundle(camera_array, points_3d, points_2d, camera_indices, point_indices, num_cameras, num_points): + """Intializes all the class attributes and instance variables. + Write the specifications for each variable: + + cameraArray with shape (n_cameras, 9) contains initial estimates of parameters for all cameras. + First 3 components in each row form a rotation vector, + next 3 components form a translation vector, + then a focal distance and two distortion parameters. + + points_3d with shape (n_points, 3) + contains initial estimates of point coordinates in the world frame. + + camera_ind with shape (n_observations,) + contains indices of cameras (from 0 to n_cameras - 1) involved in each observation. + + point_ind with shape (n_observations,) + contatins indices of points (from 0 to n_points - 1) involved in each observation. + + points_2d with shape (n_observations, 2) + contains measured 2-D coordinates of points projected on images in each observations. + """ + def rotate(points, rot_vecs): + theta = np.linalg.norm(rot_vecs, axis=1)[:, np.newaxis] + with np.errstate(invalid='ignore'): + v = rot_vecs / theta + v = np.nan_to_num(v) + dot = np.sum(points * v, axis=1)[:, np.newaxis] + cos_theta = np.cos(theta) + sin_theta = np.sin(theta) + + return cos_theta * points + sin_theta * np.cross(v, points) + dot * (1 - cos_theta) * v + + def project(points, cameraArray): + """Convert 3-D points to 2-D by projecting onto images.""" + points_proj = rotate(points, cameraArray[:, :3]) + points_proj += cameraArray[:, 3:6] + points_proj = -points_proj[:, :2] / points_proj[:, 2, np.newaxis] + # f = cameraArray[:, 6] + # k1 = cameraArray[:, 7] + # k2 = cameraArray[:, 8] + # n = np.sum(points_proj ** 2, axis=1) + # r = 1 + k1 * n + k2 * n ** 2 + # points_proj *= (r * f)[:, np.newaxis] + return points_proj + + def fun(params, n_cameras, num_points, camera_indices, point_indices, points_2d): + camera_params = params[:n_cameras * 9].reshape((n_cameras, 9)) + points_3d = params[n_cameras * 9:].reshape((num_points, 3)) + points_proj = project(points_3d[point_indices], camera_params[camera_indices]) + return (points_proj - points_2d).ravel() + + def bundle_adjustment_sparsity(numCameras, numPoints, cameraIndices, pointIndices): + m = cameraIndices.size * 2 + n = numCameras * 9 + numPoints * 3 + A = lil_matrix((m, n), dtype=int) + i = np.arange(cameraIndices.size) + for s in range(9): + A[2 * i, cameraIndices * 9 + s] = 1 + A[2 * i + 1, cameraIndices * 9 + s] = 1 + for s in range(3): + A[2 * i, numCameras * 9 + pointIndices * 3 + s] = 1 + A[2 * i + 1, numCameras * 9 + pointIndices * 3 + s] = 1 + return A + + def optimizedParams(params, num_cameras, num_points): + camera_params = params[:num_cameras * 9].reshape((num_cameras, 9)) + points_3d = params[num_cameras * 9:].reshape((num_points, 3)) + return camera_params, points_3d + + x0 = np.hstack((camera_array.ravel(), points_3d.ravel())) + A = bundle_adjustment_sparsity(num_cameras, num_points, camera_indices, point_indices) + res = least_squares(fun, x0, ftol=1e-2, method='lm', + args=(num_cameras, num_points, camera_indices, point_indices, points_2d)) + camera_params, points_3d = optimizedParams(res.x, num_cameras, num_points) + return camera_params, points_3d \ No newline at end of file diff --git a/modules/SimpleSLAM/refrences/sfm.py b/modules/SimpleSLAM/refrences/sfm.py new file mode 100644 index 0000000000..c39fc13582 --- /dev/null +++ b/modules/SimpleSLAM/refrences/sfm.py @@ -0,0 +1,769 @@ +import cv2 +import numpy as np +import os +import glob +import matplotlib +matplotlib.use('TkAgg') # or 'Qt5Agg', 'WXAgg', etc. +from matplotlib import pyplot as plt +import matplotlib.gridspec as gridspec +from tqdm import tqdm +from scipy.optimize import least_squares +import pandas as pd +import pickle + +class StructureFromMotion: + def __init__(self, prefix, thumpup_pos_thresh, thumpup_rot_thresh): + # You can set this to True if you definitely want to run stereo mode. + # If the dataset doesn't have right-camera images, you can set it to False. + self.isStereo = False + + self.THUMPUP_POS_THRESHOLD = thumpup_pos_thresh + self.THUMPUP_ROT_THRESHOLD = thumpup_rot_thresh + + # ================== DETERMINE DATASET ================== + if 'kitti' in prefix: + self.dataset = 'kitti' + self.image_list = sorted(glob.glob(os.path.join(prefix, '05/image_0', '*.png'))) + self.image_list_r = sorted(glob.glob(os.path.join(prefix, '05/image_1', '*.png'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_kitti() + self.gt = np.loadtxt(os.path.join(prefix, 'poses/05.txt')).reshape(-1, 3, 4) + # Set stereo to True if you want to use stereo images. + # If the dataset is incomplete, set it to False. + # self.isStereo = True + + elif 'parking' in prefix: + self.dataset = 'parking' + self.image_list = sorted(glob.glob(os.path.join(prefix, 'images', '*.png'))) + self.gt = np.loadtxt(os.path.join(prefix, 'poses.txt')).reshape(-1, 3, 4) + self.K_3x3, self.Proj = self.load_calibration_parking() + # For parking dataset, typically no stereo images are provided, + # so you can set self.isStereo = False if needed. + + elif 'malaga' in prefix: + self.dataset = 'malaga' + self.image_list = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_left.jpg'))) + self.image_list_r = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_right.jpg'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_malaga() + self.gt = self.malaga_get_gt( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_all-sensors_GPS.txt'), + os.path.join(prefix, 'poses') + ) + # self.isStereo = True + + elif 'custom' in prefix: + """ + For a custom dataset: + - we expect: + prefix/ + custom_video.mp4 or custom_compress.mp4 (video frames) + calibration.pkl (something that provides camera intrinsics) + groundtruth.txt (optional) + """ + self.dataset = 'custom' + self.image_list = [] + + # 1) Load calibration + calib_path = os.path.join(prefix, 'calibration.pkl') + if not os.path.exists(calib_path): + raise FileNotFoundError(f"Calibration file not found at: {calib_path}") + self.K_3x3, self.Proj = self.load_calibration_custom(calib_path) + + # 2) Load frames from a video + video_path = os.path.join(prefix, 'custom_compress.mp4') + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found at: {video_path}") + cap = cv2.VideoCapture(video_path) + self.frames = [] + success, frame = cap.read() + while success: + self.frames.append(frame) + success, frame = cap.read() + cap.release() + + self.image_list = list(range(len(self.frames))) # e.g. [0..N-1] + + # 3) Optionally load ground truth + possible_gt_path = os.path.join(prefix, 'groundtruth.txt') + if os.path.exists(possible_gt_path): + try: + self.gt = np.loadtxt(possible_gt_path).reshape(-1, 3, 4) + except: + print("Ground truth found but could not be loaded. Skipping.") + self.gt = None + else: + print("No ground truth for custom video. Ground truth set to None.") + self.gt = None + + else: + raise NotImplementedError('Dataset not implemented:', prefix) + + # ================== FEATURE DETECTORS ================== + self.MAX_FEATURES = 6000 + matcher_type = "akaze" # or "orb", "sift" + + if matcher_type == "orb": + self.fd = cv2.ORB_create(self.MAX_FEATURES) + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + elif matcher_type == "sift": + self.fd = cv2.SIFT_create() + self.bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + elif matcher_type == "akaze": + self.fd = cv2.AKAZE_create() + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + else: + raise ValueError(f"Unsupported matcher, {matcher_type}") + + self.RANSAC_THRESHOLD = 1.0 + + # =============== STEREO SGBM SETUP if self.isStereo = True ========= + if self.isStereo: + self.MIN_DISP = 0 + self.NUM_DISP = 32 + self.BLOCK_SIZE = 11 + P1 = self.BLOCK_SIZE * self.BLOCK_SIZE * 8 + P2 = self.BLOCK_SIZE * self.BLOCK_SIZE * 32 + self.stereo = cv2.StereoSGBM_create( + minDisparity=self.MIN_DISP, + numDisparities=self.NUM_DISP, + blockSize=self.BLOCK_SIZE, + P1=P1, + P2=P2 + ) + + # --------------------------------------------------------------------- + # CALIBRATION LOADERS + # --------------------------------------------------------------------- + def load_calibration_kitti(self): + # left + params_left = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_left_3x3 = params_left[:3, :3] + + # right + params_right = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_right_3x3 = params_right[:3, :3] + return K_left_3x3, params_left, K_right_3x3, params_right + + def load_calibration_parking(self): + P = np.array([ + 331.37, 0.0, 320.0, 0.0, + 0.0, 369.568, 240.0, 0.0, + 0.0, 0.0, 1.0, 0.0 + ]).reshape(3, 4) + K_3x3 = P[:3, :3] + return K_3x3, P + + def load_calibration_malaga(self): + K_left_3x3 = np.array([ + [795.11588, 0.0, 517.12973], + [0.0, 795.11588, 395.59665], + [0.0, 0.0, 1.0 ] + ]) + Proj_l = np.array([ + [795.11588, 0.0, 517.12973, 0.0], + [0.0, 795.11588, 395.59665, 0.0], + [0.0, 0.0, 1.0, 0.0] + ]) + + # Right camera extrinsics + q = np.array([1.0, 0.0, 0.0, 0.0]) + R_right = self.quaternion_to_rotation_matrix(q) + t_right = np.array([0.119471, 0.0, 0.0]).reshape(3, 1) + + inv_homo_proj = np.linalg.inv(self.get_4by4_homo_proj(R_right, t_right)) + Proj_r = Proj_l @ inv_homo_proj + + # For the right camera + K_right_3x3 = K_left_3x3.copy() + return K_left_3x3, Proj_l, K_right_3x3, Proj_r + + def load_calibration_custom(self, calib_path): + with open(calib_path, 'rb') as f: + calibration = pickle.load(f) + camera_matrix_3x3 = calibration[0] # shape (3,3) + # distCoeffs = calibration[1] # shape (5,) or similar, if needed + + # Build a 3×4 projection for the left camera + Proj = np.hstack([camera_matrix_3x3, np.zeros((3, 1))]) + return camera_matrix_3x3, Proj + + # --------------------------------------------------------------------- + # MALAGA GT UTILS + # --------------------------------------------------------------------- + def malaga_get_gt(self, filepath, gt_filepath): + col_names = [ + "Time","Lat","Lon","Alt","fix","sats","speed","dir", + "LocalX","LocalY","LocalZ","rawlogID","GeocenX","GeocenY","GeocenZ", + "GPSX","GPSY","GPSZ","GPSVX","GPSVY","GPSVZ","LocalVX","LocalVY","LocalVZ","SATTime" + ] + df = pd.read_csv(filepath, sep='\s+', comment='%', header=None, names=col_names) + df = df[["Time", "LocalX", "LocalY", "LocalZ"]].sort_values(by="Time").reset_index(drop=True) + + times = df["Time"].values + first_time, last_time = times[0], times[-1] + + # Remove frames that have no GT + rows_to_del = [] + for i in range(len(self.image_list)): + f = self.image_list[i] + timestamp = self.extract_file_timestamp(f) + if timestamp < first_time or timestamp > last_time: + rows_to_del.append(i) + self.image_list = np.delete(self.image_list, rows_to_del) + self.image_list_r = np.delete(self.image_list_r, rows_to_del) + + gt = [] + for f in self.image_list: + timestamp = self.extract_file_timestamp(f) + position = self.get_position_at_time(timestamp, df) + pose = np.eye(4) + pose[:3, 3] = position + gt.append(pose) + + gt = np.array(gt) + # Truncate to Nx3 if that is what your pipeline expects + gt = gt[:, :3] + + np.save(gt_filepath + ".npy", gt) + np.savetxt(gt_filepath + ".txt", gt.reshape(gt.shape[0], -1), fmt="%.5f") + return gt + + def extract_file_timestamp(self, filepath): + filename = os.path.basename(filepath) + parts = filename.split("_") + time_part = parts[2] + return float(time_part) + + def get_position_at_time(self, timestamp, df): + times = df["Time"].values + idx = np.searchsorted(times, timestamp) + t0 = times[idx-1] + t1 = times[idx] + row0 = df.iloc[idx-1] + row1 = df.iloc[idx] + x0, y0, z0 = row0["LocalX"], row0["LocalY"], row0["LocalZ"] + x1, y1, z1 = row1["LocalX"], row1["LocalY"], row1["LocalZ"] + + alpha = (timestamp - t0)/(t1 - t0) if t1 != t0 else 0 + x = x0 + alpha*(x1 - x0) + y = y0 + alpha*(y1 - y0) + z = z0 + alpha*(z1 - z0) + return [-y, z, x] + + # --------------------------------------------------------------------- + # GENERAL UTILS + # --------------------------------------------------------------------- + def quaternion_to_rotation_matrix(self, q): + a, b, c, d = q + a2, b2, c2, d2 = a*a, b*b, c*c, d*d + R = np.array([ + [a2 + b2 - c2 - d2, 2*b*c - 2*a*d, 2*b*d + 2*a*c], + [2*b*c + 2*a*d, a2 - b2 + c2 - d2, 2*c*d - 2*a*b], + [2*b*d - 2*a*c, 2*c*d + 2*a*b, a2 - b2 - c2 + d2] + ]) + return R + + def get_4by4_homo_proj(self, R, t): + P = np.eye(4) + P[:3, :3] = R + P[:3, 3] = t.ravel() + return P + + # --------------------------------------------------------------------- + # READING IMAGES + # --------------------------------------------------------------------- + def read_image(self, idx): + if self.dataset == 'custom': + return self.frames[idx] + else: + return cv2.imread(self.image_list[idx]) + + # --------------------------------------------------------------------- + # FEATURE DETECTION & MATCHING + # --------------------------------------------------------------------- + def feature_detection(self, image): + kp, des = self.fd.detectAndCompute(image, mask=None) + return kp, des + + def feature_matching(self, des1, des2, kp1, kp2): + matches = self.bf.match(des1, des2) + matches = sorted(matches, key=lambda x: x.distance) + m_kp1 = np.array([kp1[m.queryIdx] for m in matches]) + m_kp2 = np.array([kp2[m.trainIdx] for m in matches]) + return m_kp1, m_kp2, matches + + # --------------------------------------------------------------------- + # POSE ESTIMATION (2D-2D) + # --------------------------------------------------------------------- + def pose_estimation(self, pos_m_kp1, pos_m_kp2): + E, _ = cv2.findEssentialMat( + pos_m_kp1, pos_m_kp2, + cameraMatrix=self.K_3x3, + method=cv2.RANSAC, + threshold=self.RANSAC_THRESHOLD + ) + _, R, t, _ = cv2.recoverPose(E, pos_m_kp1, pos_m_kp2, cameraMatrix=self.K_3x3) + return R, t + + # --------------------------------------------------------------------- + # TRIANGULATION + # --------------------------------------------------------------------- + def triangulate(self, R, t, pos_m_kp1, pos_m_kp2): + """ + We use self.Proj as the 3×4 matrix for the left camera. + For the second camera, we do self.Proj @ [R|t]. + """ + P1 = self.Proj + P2 = self.Proj @ self.get_4by4_homo_proj(R, t) + points_4d = cv2.triangulatePoints(P1, P2, pos_m_kp1.T, pos_m_kp2.T) + points_3d = (points_4d[:3] / points_4d[3]).T # Nx3 + return points_3d + + def project_3d_to_2d(self, X_3D, R, t): + P = self.Proj @ self.get_4by4_homo_proj(R, t) + X_3D_hom = np.hstack([X_3D, np.ones((X_3D.shape[0], 1))]) + X_pix = X_3D_hom @ P.T + u = X_pix[:, 0] / X_pix[:, 2] + v = X_pix[:, 1] / X_pix[:, 2] + return np.vstack((u, v)).T + + # --------------------------------------------------------------------- + # STEREO UTILITIES + # --------------------------------------------------------------------- + def compute_stereo_disparity(self, img_l, img_r): + gray_left = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY) + gray_right = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY) + disparity = self.stereo.compute(gray_left, gray_right).astype(np.float32) / 16.0 + return disparity + + def apply_disparity_check(self, q, disp, min_disp, max_disp): + q_idx = q.astype(int) + disp_vals = disp.T[q_idx[:,0], q_idx[:,1]] + mask = (disp_vals > min_disp) & (disp_vals < max_disp) + return disp_vals, mask + + def calculate_right_features(self, q1, q2, disparity1, disparity2, min_disp=0.0, max_disp=100.0): + disparity1_vals, mask1 = self.apply_disparity_check(q1, disparity1, min_disp, max_disp) + disparity2_vals, mask2 = self.apply_disparity_check(q2, disparity2, min_disp, max_disp) + in_bounds = mask1 & mask2 + + q1_l = q1[in_bounds] + q2_l = q2[in_bounds] + disp1 = disparity1_vals[in_bounds] + disp2 = disparity2_vals[in_bounds] + + # Right coords + q1_r = np.copy(q1_l) + q2_r = np.copy(q2_l) + q1_r[:, 0] -= disp1 + q2_r[:, 0] -= disp2 + return q1_l, q1_r, q2_l, q2_r, in_bounds + + def get_stereo_3d_pts(self, q1_l, q1_r, q2_l, q2_r): + """ + Triangulate from left and right images for two frames (q1_l, q1_r) and (q2_l, q2_r). + Then we can do solvePnPRansac on Q1, etc. + """ + Q1_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q1_l.T, q1_r.T) + Q1_3d = (Q1_4d[:3] / Q1_4d[3]).T + + Q2_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q2_l.T, q2_r.T) + Q2_3d = (Q2_4d[:3] / Q2_4d[3]).T + return Q1_3d, Q2_3d + + # --------------------------------------------------------------------- + # NONLINEAR REFINEMENT + # --------------------------------------------------------------------- + def refine(self, pos_m_kp1, pos_m_kp2, points_3d, R, t, verbose=False): + def reprojection_resid(params, pos_m_kp1, pos_m_kp2, points_3d): + R_vec = params[:3] + t_vec = params[3:].reshape(3,1) + R_opt, _ = cv2.Rodrigues(R_vec) + + # First camera is (I, 0) + proj1 = self.project_3d_to_2d(points_3d, np.eye(3), np.zeros((3,1))) + # Second camera is (R, t) + proj2 = self.project_3d_to_2d(points_3d, R_opt, t_vec) + + err1 = (pos_m_kp1 - proj1).ravel() + err2 = (pos_m_kp2 - proj2).ravel() + return np.concatenate((err1, err2)) + + r_init = cv2.Rodrigues(R)[0].ravel() + t_init = t.ravel() + params_init = np.hstack([r_init, t_init]) + + result = least_squares( + reprojection_resid, + params_init, + args=(pos_m_kp1, pos_m_kp2, points_3d), + method='lm', + ftol=1e-6, xtol=1e-6, gtol=1e-6 + ) + r_refined = result.x[:3] + t_refined = result.x[3:].reshape(3,1) + R_refined, _ = cv2.Rodrigues(r_refined) + + if verbose: + print("[Refine] r_init:", r_init, "-> r_refined:", r_refined) + print("[Refine] t_init:", t_init, "-> t_refined:", t_refined.ravel()) + + return R_refined, t_refined + + # --------------------------------------------------------------------- + # VISUALIZATION + # --------------------------------------------------------------------- + def vis_init(self): + plt.ion() # interactive mode on + fig = plt.figure(figsize=(20, 10)) + gs = gridspec.GridSpec(2, 3, figure=fig) + ax1 = fig.add_subplot(gs[0, 0]) + ax2 = fig.add_subplot(gs[0, 1]) + ax3 = fig.add_subplot(gs[0, 2]) + ax4 = fig.add_subplot(gs[1, 0], projection='3d') + ax5 = fig.add_subplot(gs[1, 1]) + ax6 = fig.add_subplot(gs[1, 2]) + + if self.dataset == 'kitti': + ax4.view_init(elev=15, azim=-90) + elif self.dataset == 'parking': + ax4.view_init(elev=-80, azim=-90) + + # Basic range + x_min, x_max = -400, 400 + y_min, y_max = -400, 400 + z_min, z_max = -400, 400 + + return ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max + + def vis(self, vis_param, image1, image2, kp1, kp2, all_points_3d, all_poses, matches, i, show): + ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max = vis_param + + c_pose = all_poses[-1] + points_3d = all_points_3d[-1] + # transform local points to world + points_3d_world = (c_pose[:, :3] @ points_3d.T + c_pose[:, 3:]).T + + ax1.clear() + ax1.imshow(image1, cmap='gray') + ax1.axis('off') + + outimg1 = cv2.drawKeypoints(image1, kp1, None) + ax2.clear() + ax2.imshow(outimg1) + ax2.axis('off') + + matched_img = cv2.drawMatches(image1, kp1, image2, kp2, matches, None, + flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) + ax3.clear() + ax3.imshow(matched_img) + ax3.axis('off') + + ax4.clear() + ax4.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax4.set_ylim([y_min + c_pose[1, -1], y_max + c_pose[1, -1]]) + ax4.set_zlim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + ax4.scatter(points_3d_world[:,0], points_3d_world[:,1], points_3d_world[:,2], s=10, c='green') + ax4.scatter(c_pose[0, -1], c_pose[1, -1], c_pose[2, -1], c='red', label='Camera') + ax4.legend() + + ax5.clear() + ax5.grid() + ax5.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax5.set_ylim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + arr_poses = np.array(all_poses) + ax5.scatter(arr_poses[:, 0, -1], arr_poses[:, 2, -1], s=10, c='red', label='Estimated camera') + + # if GT exists + if self.gt is not None and i < self.gt.shape[0]: + ax5.scatter(self.gt[:i, 0, -1], self.gt[:i, 2, -1], s=10, c='black', label='GT camera') + + ax5.scatter(points_3d_world[:,0], points_3d_world[:,2], s=10, c='green', alpha=0.5) + ax5.legend() + + ax6.clear() + ax6.plot(range(len(all_points_3d)), [p.shape[0] for p in all_points_3d], label='num matched keypoints') + ax6.grid() + ax6.legend() + + plt.tight_layout() + if show: + plt.pause(0.01) + plt.show() + + # --------------------------------------------------------------------- + # LOSSES + # --------------------------------------------------------------------- + def compute_ate(self, est_pose, gt_pose): + t_est = est_pose[:, 3] + t_gt = gt_pose[:, 3] + return np.linalg.norm(t_est - t_gt) + + def compute_rte(self, est_pose_old, est_pose_new, gt_pose_old, gt_pose_new): + old_est_t = est_pose_old[:, 3] + new_est_t = est_pose_new[:, 3] + old_gt_t = gt_pose_old[:, 3] + new_gt_t = gt_pose_new[:, 3] + rel_est = new_est_t - old_est_t + rel_gt = new_gt_t - old_gt_t + return np.linalg.norm(rel_est - rel_gt) + + # --------------------------------------------------------------------- + # INITIALIZE (KEYFRAMES) + # --------------------------------------------------------------------- + def initialize(self, keyframe_filename): + if os.path.exists(keyframe_filename): + return + + c_pose = np.hstack([np.eye(3), np.zeros((3,1))]) # 3x4 + all_poses = [c_pose] + keyframes = [0] + + print("Finding keyframes") + initial_frame = True + for id_n_keyframe in tqdm(range(1, len(self.image_list))): + id_c_keyframe = keyframes[-1] + pose_c_keyframe = all_poses[id_c_keyframe] + + img1 = self.read_image(id_c_keyframe) + img2 = self.read_image(id_n_keyframe) + + if self.isStereo: + # For stereo, we load the right images as well: + img1_r = cv2.imread(self.image_list_r[id_c_keyframe]) + img2_r = cv2.imread(self.image_list_r[id_n_keyframe]) + disparity1 = self.compute_stereo_disparity(img1, img1_r) + disparity2 = self.compute_stereo_disparity(img2, img2_r) + + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + # Pose estimation + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + # stereo approach + q1_l, q1_r, q2_l, q2_r, mask_stereo = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + # Triangulate in the left-right images for both frames + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + + success, rvec, tvec, inliers = cv2.solvePnPRansac( + objectPoints=Q1.reshape(-1,1,3).astype(np.float32), + imagePoints=q2_l.reshape(-1,1,2).astype(np.float32), + cameraMatrix=self.K_3x3, + distCoeffs=None, + iterationsCount=500, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("PnP RANSAC failed... (Stereo approach)") + + # Triangulate & refine + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, pts_3d, R, t, verbose=False) + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + # check thresholds + dist_from_keyframe = np.linalg.norm(c_pose[:3, -1] - pose_c_keyframe[:3, -1]) + good_depth_pts = pts_3d[pts_3d[:,2] > 0, 2] + avg_depth = np.mean(good_depth_pts) if len(good_depth_pts) else 1e-6 + + rel_R = np.linalg.inv(pose_c_keyframe[:3, :3]) @ c_pose[:3, :3] + rod = cv2.Rodrigues(rel_R)[0] + deg = np.abs(rod * 180.0 / np.pi) + ratio = dist_from_keyframe / avg_depth + + if ratio > self.THUMPUP_POS_THRESHOLD or np.any(deg > self.THUMPUP_ROT_THRESHOLD): + print(f" [Keyframe] Dist: {dist_from_keyframe:.3f}, AvgDepth: {avg_depth:.3f}, ratio={ratio:.3f}, deg={deg.squeeze()}, frame={id_n_keyframe}") + keyframes.append(id_n_keyframe) + + np.save(keyframe_filename, np.array(keyframes)) + + # --------------------------------------------------------------------- + # MAIN RUN + # --------------------------------------------------------------------- + def run(self, keyframe_filename, checkpoints=20, dynamic_vis=True): + """ + Runs the main pipeline using either the keyframes from file + or all frames if no file is found. The 'dynamic_vis' parameter + can be set to False if you do not want interactive visualization. + """ + import matplotlib.pyplot as plt + + # 1) Load or generate keyframes + if os.path.exists(keyframe_filename): + keyframes = np.load(keyframe_filename) + print(f"loaded keyframes from file: {keyframes}") + else: + keyframes = np.arange(len(self.image_list)) + print("keyframe file not found, using all frames") + + # 2) Conditionally set up visualization + vis_param = None + if dynamic_vis: + vis_param = self.vis_init() + + # 3) Initialize + c_pose = np.hstack((np.eye(3), np.zeros((3, 1)))) + all_poses = [c_pose] + all_points_3d = [] + initial_frame = True + cumulative_loss_loc = 0 + + # 4) Main loop + iter_count = len(keyframes) + for i in tqdm(range(1, iter_count)): + idx1 = keyframes[i - 1] + idx2 = keyframes[i] + + # --- Read images + img1 = self.read_image(idx1) + img2 = self.read_image(idx2) + + # If stereo is active, compute disparities or do pnp + if self.isStereo: + img1_r = cv2.imread(self.image_list_r[idx1]) + img2_r = cv2.imread(self.image_list_r[idx2]) + disparity1 = self.compute_stereo_disparity(img1, img1_r) + disparity2 = self.compute_stereo_disparity(img2, img2_r) + + # --- Feature detection + matching + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + # --- Pose estimation + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + # stereo approach + q1_l, q1_r, q2_l, q2_r, in_bounds = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + success, rvec, tvec, inliers = cv2.solvePnPRansac( + Q1.reshape(-1,1,3).astype(np.float32), + q2_l.reshape(-1,1,2).astype(np.float32), + self.K_3x3, None, + iterationsCount=2000, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("[Warning] Stereo PnP RANSAC failed to find a valid pose.") + + # --- Triangulate & refine + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, points_3d, R, t) + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + all_points_3d.append(points_3d) + + # --- Update global camera pose + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + # --- If ground truth is available, compute any desired loss + ate_loss = -1 + rte_loss = -1 + if (self.gt is not None) and (idx2 < self.gt.shape[0]): + gt_pose_current = self.gt[idx2] + diff_mean = np.mean(np.abs(gt_pose_current[:, 3] - c_pose[:, 3])) + cumulative_loss_loc += diff_mean + ate_loss = self.compute_ate(c_pose, gt_pose_current) + # For RTE: + # if i >= 2 and (idx1 < self.gt.shape[0]): + # rte_loss = self.compute_rte(all_poses[-2], all_poses[-1], self.gt[idx1], self.gt[idx2]) + + # --- Visualization + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=True) + + # --- Save screenshots every 'checkpoints' frames + if (i % checkpoints == 0) or (i == iter_count - 1): + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=False) + + out_dir = f"./output/{self.dataset}" + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + if ate_loss == -1: + fname = f"{idx1}_noGT.png" + else: + fname = f"{idx1}_{(cumulative_loss_loc / i):.2f}_{ate_loss:.2f}.png" + + plt.savefig(os.path.join(out_dir, fname), dpi=100) + + +# ================================ +# Example main script usage +# ================================ +if __name__ == "__main__": + thumpup_pos_thresh = 0.015 + thumpup_rot_thresh = 1.5 + dataset = "kitti" # can be "kitti", "parking", "malaga", or "custom" + + # Create output directory if it doesn't exist + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + + if dataset == "malaga": + keyframe_filename = f"./output/malaga_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/malaga-urban-dataset-extract-07", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "parking": + keyframe_filename = f"./output/parking_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/parking", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "kitti": + keyframe_filename = f"./output/kitti_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/kitti", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "custom": + keyframe_filename = f"./output/custom_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/custom", thumpup_pos_thresh, thumpup_rot_thresh) + else: + raise ValueError("Unknown dataset") + + sfm.initialize(keyframe_filename) + sfm.run(keyframe_filename, checkpoints=20, dynamic_vis=True) + + # If you want to block at the end (so the figure remains): + # plt.show(block=True) diff --git a/modules/SimpleSLAM/refrences/sfm_lightglue_aliked.py b/modules/SimpleSLAM/refrences/sfm_lightglue_aliked.py new file mode 100644 index 0000000000..2f626acdca --- /dev/null +++ b/modules/SimpleSLAM/refrences/sfm_lightglue_aliked.py @@ -0,0 +1,853 @@ +import cv2 +import numpy as np +import os +import glob +import matplotlib +matplotlib.use('TkAgg') # or 'Qt5Agg', 'WXAgg', etc. +from matplotlib import pyplot as plt +import matplotlib.gridspec as gridspec +from tqdm import tqdm +from scipy.optimize import least_squares +import pandas as pd +import pickle + + +# New imports for LightGlue integration +import torch +from lightglue import LightGlue, ALIKED # using ALIKED features +from lightglue.utils import load_image, rbd + +class StructureFromMotion: + def __init__(self, prefix, thumpup_pos_thresh, thumpup_rot_thresh, use_lightglue=True): + # Set this flag to True to use ALIKED+LightGlue instead of the OpenCV pipeline. + self.use_lightglue = use_lightglue + + # If using LightGlue, initialize its extractor and matcher. + if self.use_lightglue: + self.extractor = ALIKED(max_num_keypoints=2048).eval().cuda() + # Here we choose 'disk' as the matching features for LightGlue. + self.matcher = LightGlue(features='disk').eval().cuda() + + # (Rest of your __init__ code remains unchanged...) + self.isStereo = False + self.THUMPUP_POS_THRESHOLD = thumpup_pos_thresh + self.THUMPUP_ROT_THRESHOLD = thumpup_rot_thresh + + # ================== DETERMINE DATASET ================== + if 'kitti' in prefix: + self.dataset = 'kitti' + self.image_list = sorted(glob.glob(os.path.join(prefix, '05/image_0', '*.png'))) + self.image_list_r = sorted(glob.glob(os.path.join(prefix, '05/image_1', '*.png'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_kitti() + self.gt = np.loadtxt(os.path.join(prefix, 'poses/05.txt')).reshape(-1, 3, 4) + elif 'parking' in prefix: + self.dataset = 'parking' + self.image_list = sorted(glob.glob(os.path.join(prefix, 'images', '*.png'))) + self.gt = np.loadtxt(os.path.join(prefix, 'poses.txt')).reshape(-1, 3, 4) + self.K_3x3, self.Proj = self.load_calibration_parking() + elif 'malaga' in prefix: + self.dataset = 'malaga' + self.image_list = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_left.jpg'))) + self.image_list_r = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_right.jpg'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_malaga() + self.gt = self.malaga_get_gt( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_all-sensors_GPS.txt'), + os.path.join(prefix, 'poses') + ) + elif 'custom' in prefix: + self.dataset = 'custom' + self.image_list = [] + calib_path = os.path.join(prefix, 'calibration.pkl') + if not os.path.exists(calib_path): + raise FileNotFoundError(f"Calibration file not found at: {calib_path}") + self.K_3x3, self.Proj = self.load_calibration_custom(calib_path) + video_path = os.path.join(prefix, 'custom_compress.mp4') + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found at: {video_path}") + cap = cv2.VideoCapture(video_path) + self.frames = [] + success, frame = cap.read() + while success: + self.frames.append(frame) + success, frame = cap.read() + cap.release() + self.image_list = list(range(len(self.frames))) + possible_gt_path = os.path.join(prefix, 'groundtruth.txt') + if os.path.exists(possible_gt_path): + try: + self.gt = np.loadtxt(possible_gt_path).reshape(-1, 3, 4) + except: + print("Ground truth found but could not be loaded. Skipping.") + self.gt = None + else: + print("No ground truth for custom video. Ground truth set to None.") + self.gt = None + else: + raise NotImplementedError('Dataset not implemented:', prefix) + + # # ================== FEATURE DETECTORS (OpenCV fallback) ================== + self.MAX_FEATURES = 6000 + matcher_type = "akaze" # or "orb", "sift" + if matcher_type == "orb": + self.fd = cv2.ORB_create(self.MAX_FEATURES) + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + elif matcher_type == "sift": + self.fd = cv2.SIFT_create() + self.bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + elif matcher_type == "akaze": + self.fd = cv2.AKAZE_create() + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + else: + raise ValueError(f"Unsupported matcher, {matcher_type}") + + self.RANSAC_THRESHOLD = 1.0 + + if self.isStereo: + self.MIN_DISP = 0 + self.NUM_DISP = 32 + self.BLOCK_SIZE = 11 + P1 = self.BLOCK_SIZE * self.BLOCK_SIZE * 8 + P2 = self.BLOCK_SIZE * self.BLOCK_SIZE * 32 + self.stereo = cv2.StereoSGBM_create( + minDisparity=self.MIN_DISP, + numDisparities=self.NUM_DISP, + blockSize=self.BLOCK_SIZE, + P1=P1, + P2=P2 + ) + + # --------------------------------------------------------------------- + # CALIBRATION LOADERS + # --------------------------------------------------------------------- + def load_calibration_kitti(self): + # left + params_left = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_left_3x3 = params_left[:3, :3] + + # right + params_right = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_right_3x3 = params_right[:3, :3] + return K_left_3x3, params_left, K_right_3x3, params_right + + def load_calibration_parking(self): + P = np.array([ + 331.37, 0.0, 320.0, 0.0, + 0.0, 369.568, 240.0, 0.0, + 0.0, 0.0, 1.0, 0.0 + ]).reshape(3, 4) + K_3x3 = P[:3, :3] + return K_3x3, P + + def load_calibration_malaga(self): + K_left_3x3 = np.array([ + [795.11588, 0.0, 517.12973], + [0.0, 795.11588, 395.59665], + [0.0, 0.0, 1.0 ] + ]) + Proj_l = np.array([ + [795.11588, 0.0, 517.12973, 0.0], + [0.0, 795.11588, 395.59665, 0.0], + [0.0, 0.0, 1.0, 0.0] + ]) + + # Right camera extrinsics + q = np.array([1.0, 0.0, 0.0, 0.0]) + R_right = self.quaternion_to_rotation_matrix(q) + t_right = np.array([0.119471, 0.0, 0.0]).reshape(3, 1) + + inv_homo_proj = np.linalg.inv(self.get_4by4_homo_proj(R_right, t_right)) + Proj_r = Proj_l @ inv_homo_proj + + # For the right camera + K_right_3x3 = K_left_3x3.copy() + return K_left_3x3, Proj_l, K_right_3x3, Proj_r + + def load_calibration_custom(self, calib_path): + with open(calib_path, 'rb') as f: + calibration = pickle.load(f) + camera_matrix_3x3 = calibration[0] # shape (3,3) + # distCoeffs = calibration[1] # shape (5,) or similar, if needed + + # Build a 3×4 projection for the left camera + Proj = np.hstack([camera_matrix_3x3, np.zeros((3, 1))]) + return camera_matrix_3x3, Proj + + # --------------------------------------------------------------------- + # MALAGA GT UTILS + # --------------------------------------------------------------------- + def malaga_get_gt(self, filepath, gt_filepath): + col_names = [ + "Time","Lat","Lon","Alt","fix","sats","speed","dir", + "LocalX","LocalY","LocalZ","rawlogID","GeocenX","GeocenY","GeocenZ", + "GPSX","GPSY","GPSZ","GPSVX","GPSVY","GPSVZ","LocalVX","LocalVY","LocalVZ","SATTime" + ] + df = pd.read_csv(filepath, sep='\s+', comment='%', header=None, names=col_names) + df = df[["Time", "LocalX", "LocalY", "LocalZ"]].sort_values(by="Time").reset_index(drop=True) + + times = df["Time"].values + first_time, last_time = times[0], times[-1] + + # Remove frames that have no GT + rows_to_del = [] + for i in range(len(self.image_list)): + f = self.image_list[i] + timestamp = self.extract_file_timestamp(f) + if timestamp < first_time or timestamp > last_time: + rows_to_del.append(i) + self.image_list = np.delete(self.image_list, rows_to_del) + self.image_list_r = np.delete(self.image_list_r, rows_to_del) + + gt = [] + for f in self.image_list: + timestamp = self.extract_file_timestamp(f) + position = self.get_position_at_time(timestamp, df) + pose = np.eye(4) + pose[:3, 3] = position + gt.append(pose) + + gt = np.array(gt) + # Truncate to Nx3 if that is what your pipeline expects + gt = gt[:, :3] + + np.save(os.path.join(gt_filepath + ".npy"), gt) + np.savetxt(os.path.join(gt_filepath + ".txt"), gt.reshape(gt.shape[0], -1), fmt="%.5f") + return gt + + def extract_file_timestamp(self, filepath): + filename = os.path.basename(filepath) + parts = filename.split("_") + time_part = parts[2] + return float(time_part) + + def get_position_at_time(self, timestamp, df): + times = df["Time"].values + idx = np.searchsorted(times, timestamp) + t0 = times[idx-1] + t1 = times[idx] + row0 = df.iloc[idx-1] + row1 = df.iloc[idx] + x0, y0, z0 = row0["LocalX"], row0["LocalY"], row0["LocalZ"] + x1, y1, z1 = row1["LocalX"], row1["LocalY"], row1["LocalZ"] + + alpha = (timestamp - t0)/(t1 - t0) if t1 != t0 else 0 + x = x0 + alpha*(x1 - x0) + y = y0 + alpha*(y1 - y0) + z = z0 + alpha*(z1 - z0) + return [-y, z, x] + + # --------------------------------------------------------------------- + # GENERAL UTILS + # --------------------------------------------------------------------- + def quaternion_to_rotation_matrix(self, q): + a, b, c, d = q + a2, b2, c2, d2 = a*a, b*b, c*c, d*d + R = np.array([ + [a2 + b2 - c2 - d2, 2*b*c - 2*a*d, 2*b*d + 2*a*c], + [2*b*c + 2*a*d, a2 - b2 + c2 - d2, 2*c*d - 2*a*b], + [2*b*d - 2*a*c, 2*c*d + 2*a*b, a2 - b2 - c2 + d2] + ]) + return R + + def get_4by4_homo_proj(self, R, t): + P = np.eye(4) + P[:3, :3] = R + P[:3, 3] = t.ravel() + return P + + # --------------------------------------------------------------------- + # READING IMAGES + # --------------------------------------------------------------------- + def read_image(self, idx): + if self.dataset == 'custom': + return self.frames[idx] + else: + return cv2.imread(self.image_list[idx]) + + # --------------------------------------------------------------------- + # FEATURE DETECTION & MATCHING + # --------------------------------------------------------------------- + def feature_detection(self, image): + kp, des = self.fd.detectAndCompute(image, mask=None) + return kp, des + + def feature_matching(self, des1, des2, kp1, kp2): + matches = self.bf.match(des1, des2) + matches = sorted(matches, key=lambda x: x.distance) + m_kp1 = np.array([kp1[m.queryIdx] for m in matches]) + m_kp2 = np.array([kp2[m.trainIdx] for m in matches]) + return m_kp1, m_kp2, matches + + # --------------------------------------------------------------------- + # POSE ESTIMATION (2D-2D) + # --------------------------------------------------------------------- + def pose_estimation(self, pos_m_kp1, pos_m_kp2): + E, _ = cv2.findEssentialMat( + pos_m_kp1, pos_m_kp2, + cameraMatrix=self.K_3x3, + method=cv2.RANSAC, + threshold=self.RANSAC_THRESHOLD + ) + _, R, t, _ = cv2.recoverPose(E, pos_m_kp1, pos_m_kp2, cameraMatrix=self.K_3x3) + return R, t + + def pose_estimation(self, pos_m_kp1, pos_m_kp2): + # If the keypoints are torch tensors on GPU, move them to CPU and convert to numpy + if isinstance(pos_m_kp1, torch.Tensor): + pts1 = pos_m_kp1.cpu().numpy().astype(np.float32) + else: + pts1 = np.asarray(pos_m_kp1, dtype=np.float32) + + if isinstance(pos_m_kp2, torch.Tensor): + pts2 = pos_m_kp2.cpu().numpy().astype(np.float32) + else: + pts2 = np.asarray(pos_m_kp2, dtype=np.float32) + + + # Pass the camera matrix and additional parameters as positional arguments + E, _ = cv2.findEssentialMat( + pts1, pts2, + cameraMatrix=self.K_3x3, + method=cv2.RANSAC, + threshold=self.RANSAC_THRESHOLD + ) + _, R, t, _ = cv2.recoverPose(E, pts1, pts2, cameraMatrix=self.K_3x3) + return R, t + + + # --------------------------------------------------------------------- + # TRIANGULATION + # --------------------------------------------------------------------- + def triangulate(self, R, t, pos_m_kp1, pos_m_kp2): + P1 = self.Proj + P2 = self.Proj @ self.get_4by4_homo_proj(R, t) + + # Ensure the keypoints are numpy arrays of type float32 + if isinstance(pos_m_kp1, torch.Tensor): + pts1 = pos_m_kp1.cpu().numpy().astype(np.float32) + else: + pts1 = np.asarray(pos_m_kp1, dtype=np.float32) + + if isinstance(pos_m_kp2, torch.Tensor): + pts2 = pos_m_kp2.cpu().numpy().astype(np.float32) + else: + pts2 = np.asarray(pos_m_kp2, dtype=np.float32) + + # cv2.triangulatePoints expects points in shape (2, N) + points_4d = cv2.triangulatePoints(P1, P2, pts1.T, pts2.T) + points_3d = (points_4d[:3] / points_4d[3]).T # Convert homogeneous coordinates to 3D + return points_3d + + # --------------------------------------------------------------------- + # STEREO UTILITIES + # --------------------------------------------------------------------- + def compute_stereo_disparity(self, img_l, img_r): + gray_left = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY) + gray_right = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY) + disparity = self.stereo.compute(gray_left, gray_right).astype(np.float32) / 16.0 + return disparity + + def apply_disparity_check(self, q, disp, min_disp, max_disp): + q_idx = q.astype(int) + disp_vals = disp.T[q_idx[:,0], q_idx[:,1]] + mask = (disp_vals > min_disp) & (disp_vals < max_disp) + return disp_vals, mask + + def calculate_right_features(self, q1, q2, disparity1, disparity2, min_disp=0.0, max_disp=100.0): + disparity1_vals, mask1 = self.apply_disparity_check(q1, disparity1, min_disp, max_disp) + disparity2_vals, mask2 = self.apply_disparity_check(q2, disparity2, min_disp, max_disp) + in_bounds = mask1 & mask2 + + q1_l = q1[in_bounds] + q2_l = q2[in_bounds] + disp1 = disparity1_vals[in_bounds] + disp2 = disparity2_vals[in_bounds] + + # Right coords + q1_r = np.copy(q1_l) + q2_r = np.copy(q2_l) + q1_r[:, 0] -= disp1 + q2_r[:, 0] -= disp2 + return q1_l, q1_r, q2_l, q2_r, in_bounds + + def get_stereo_3d_pts(self, q1_l, q1_r, q2_l, q2_r): + """ + Triangulate from left and right images for two frames (q1_l, q1_r) and (q2_l, q2_r). + Then we can do solvePnPRansac on Q1, etc. + """ + Q1_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q1_l.T, q1_r.T) + Q1_3d = (Q1_4d[:3] / Q1_4d[3]).T + + Q2_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q2_l.T, q2_r.T) + Q2_3d = (Q2_4d[:3] / Q2_4d[3]).T + return Q1_3d, Q2_3d + + def project_3d_to_2d(self, X_3D, R, t): + # Compute the projection matrix for the given rotation and translation. + P = self.Proj @ self.get_4by4_homo_proj(R, t) + # Convert 3D points to homogeneous coordinates. + X_3D_hom = np.hstack([X_3D, np.ones((X_3D.shape[0], 1))]) + # Project the 3D points. + X_pix = X_3D_hom @ P.T + # Normalize to get pixel coordinates. + u = X_pix[:, 0] / X_pix[:, 2] + v = X_pix[:, 1] / X_pix[:, 2] + return np.vstack((u, v)).T + + # --------------------------------------------------------------------- + # NONLINEAR REFINEMENT + # --------------------------------------------------------------------- + def refine(self, pos_m_kp1, pos_m_kp2, points_3d, R, t, verbose=False): + def reprojection_resid(params, pos_m_kp1, pos_m_kp2, points_3d): + R_vec = params[:3] + t_vec = params[3:].reshape(3,1) + R_opt, _ = cv2.Rodrigues(R_vec) + + # Project points for camera 1 and camera 2. + proj1 = self.project_3d_to_2d(points_3d, np.eye(3), np.zeros((3,1))) + proj2 = self.project_3d_to_2d(points_3d, R_opt, t_vec) + + # Ensure the keypoint arrays are numpy arrays on the CPU. + if isinstance(pos_m_kp1, torch.Tensor): + pos_m_kp1 = pos_m_kp1.cpu().numpy().astype(np.float32) + else: + pos_m_kp1 = np.asarray(pos_m_kp1, dtype=np.float32) + if isinstance(pos_m_kp2, torch.Tensor): + pos_m_kp2 = pos_m_kp2.cpu().numpy().astype(np.float32) + else: + pos_m_kp2 = np.asarray(pos_m_kp2, dtype=np.float32) + + err1 = (pos_m_kp1 - proj1).ravel() + err2 = (pos_m_kp2 - proj2).ravel() + return np.concatenate((err1, err2)) + + # Get initial parameters from the current pose. + r_init = cv2.Rodrigues(R)[0].ravel() + t_init = t.ravel() + params_init = np.hstack([r_init, t_init]) + + result = least_squares( + reprojection_resid, + params_init, + args=(pos_m_kp1, pos_m_kp2, points_3d), + method='lm', + ftol=1e-6, xtol=1e-6, gtol=1e-6 + ) + + # Check if the optimization was successful; if not, return the original R, t. + if not result.success: + print("Least squares refinement did not converge. Returning original pose.") + return R, t + + r_refined = result.x[:3] + t_refined = result.x[3:].reshape(3, 1) + R_refined, _ = cv2.Rodrigues(r_refined) + + if verbose: + print("[Refine] r_init:", r_init, "-> r_refined:", r_refined) + print("[Refine] t_init:", t_init, "-> t_refined:", t_refined.ravel()) + + return R_refined, t_refined + + + # --------------------------------------------------------------------- + # VISUALIZATION + # --------------------------------------------------------------------- + def vis_init(self): + plt.ion() # interactive mode on + fig = plt.figure(figsize=(20, 10)) + gs = gridspec.GridSpec(2, 3, figure=fig) + ax1 = fig.add_subplot(gs[0, 0]) + ax2 = fig.add_subplot(gs[0, 1]) + ax3 = fig.add_subplot(gs[0, 2]) + ax4 = fig.add_subplot(gs[1, 0], projection='3d') + ax5 = fig.add_subplot(gs[1, 1]) + ax6 = fig.add_subplot(gs[1, 2]) + + if self.dataset == 'kitti': + ax4.view_init(elev=15, azim=-90) + elif self.dataset == 'parking': + ax4.view_init(elev=-80, azim=-90) + + # Basic range + x_min, x_max = -400, 400 + y_min, y_max = -400, 400 + z_min, z_max = -400, 400 + + return ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max + + def vis(self, vis_param, image1, image2, kp1, kp2, all_points_3d, all_poses, matches, i, show): + ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max = vis_param + + c_pose = all_poses[-1] + points_3d = all_points_3d[-1] + # transform local points to world + points_3d_world = (c_pose[:, :3] @ points_3d.T + c_pose[:, 3:]).T + + ax1.clear() + ax1.imshow(image1, cmap='gray') + ax1.axis('off') + + outimg1 = cv2.drawKeypoints(image1, kp1, None) + ax2.clear() + ax2.imshow(outimg1) + ax2.axis('off') + + matched_img = cv2.drawMatches(image1, kp1, image2, kp2, matches, None, + flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) + ax3.clear() + ax3.imshow(matched_img) + ax3.axis('off') + + ax4.clear() + ax4.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax4.set_ylim([y_min + c_pose[1, -1], y_max + c_pose[1, -1]]) + ax4.set_zlim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + ax4.scatter(points_3d_world[:,0], points_3d_world[:,1], points_3d_world[:,2], s=10, c='green') + ax4.scatter(c_pose[0, -1], c_pose[1, -1], c_pose[2, -1], c='red', label='Camera') + ax4.legend() + + ax5.clear() + ax5.grid() + ax5.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax5.set_ylim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + arr_poses = np.array(all_poses) + ax5.scatter(arr_poses[:, 0, -1], arr_poses[:, 2, -1], s=10, c='red', label='Estimated camera') + + # if GT exists + if self.gt is not None and i < self.gt.shape[0]: + ax5.scatter(self.gt[:i, 0, -1], self.gt[:i, 2, -1], s=10, c='black', label='GT camera') + + ax5.scatter(points_3d_world[:,0], points_3d_world[:,2], s=10, c='green', alpha=0.5) + ax5.legend() + + ax6.clear() + ax6.plot(range(len(all_points_3d)), [p.shape[0] for p in all_points_3d], label='num matched keypoints') + ax6.grid() + ax6.legend() + + plt.tight_layout() + if show: + # pass + plt.pause(0.01) + plt.show() + + # --------------------------------------------------------------------- + # LOSSES + # --------------------------------------------------------------------- + def compute_ate(self, est_pose, gt_pose): + t_est = est_pose[:, 3] + t_gt = gt_pose[:, 3] + return np.linalg.norm(t_est - t_gt) + + def compute_rte(self, est_pose_old, est_pose_new, gt_pose_old, gt_pose_new): + old_est_t = est_pose_old[:, 3] + new_est_t = est_pose_new[:, 3] + old_gt_t = gt_pose_old[:, 3] + new_gt_t = gt_pose_new[:, 3] + rel_est = new_est_t - old_est_t + rel_gt = new_gt_t - old_gt_t + return np.linalg.norm(rel_est - rel_gt) + + # --------------------------------------------------------------------- + # INITIALIZE (KEYFRAMES) + # --------------------------------------------------------------------- + def initialize(self, keyframe_filename): + if os.path.exists(keyframe_filename): + return + + c_pose = np.hstack([np.eye(3), np.zeros((3,1))]) # 3x4 + all_poses = [c_pose] + keyframes = [0] + + print("Finding keyframes") + initial_frame = True + for id_n_keyframe in tqdm(range(1, len(self.image_list))): + id_c_keyframe = keyframes[-1] + pose_c_keyframe = all_poses[id_c_keyframe] + + img1 = self.read_image(id_c_keyframe) + img2 = self.read_image(id_n_keyframe) + + # Use LightGlue if enabled, otherwise use OpenCV pipeline. + if self.use_lightglue: + pos_m_kp1, pos_m_kp2, matches, kp1, kp2 = self.lightglue_match(img1, img2) + else: + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + # Pose estimation, triangulation, refinement, etc. (unchanged) + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + # stereo approach (unchanged) + q1_l, q1_r, q2_l, q2_r, mask_stereo = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + success, rvec, tvec, inliers = cv2.solvePnPRansac( + objectPoints=Q1.reshape(-1,1,3).astype(np.float32), + imagePoints=q2_l.reshape(-1,1,2).astype(np.float32), + cameraMatrix=self.K_3x3, + distCoeffs=None, + iterationsCount=500, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("PnP RANSAC failed... (Stereo approach)") + + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, pts_3d, R, t, verbose=False) + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + dist_from_keyframe = np.linalg.norm(c_pose[:3, -1] - pose_c_keyframe[:3, -1]) + good_depth_pts = pts_3d[pts_3d[:,2] > 0, 2] + avg_depth = np.mean(good_depth_pts) if len(good_depth_pts) else 1e-6 + + rel_R = np.linalg.inv(pose_c_keyframe[:3, :3]) @ c_pose[:3, :3] + rod = cv2.Rodrigues(rel_R)[0] + deg = np.abs(rod * 180.0 / np.pi) + ratio = dist_from_keyframe / avg_depth + + if ratio > self.THUMPUP_POS_THRESHOLD or np.any(deg > self.THUMPUP_ROT_THRESHOLD): + print(f" [Keyframe] Dist: {dist_from_keyframe:.3f}, AvgDepth: {avg_depth:.3f}, ratio={ratio:.3f}, deg={deg.squeeze()}, frame={id_n_keyframe}") + keyframes.append(id_n_keyframe) + + np.save(keyframe_filename, np.array(keyframes)) + + + + # --------------------------------------------------------------------- + # MAIN RUN + # --------------------------------------------------------------------- + def run(self, keyframe_filename, checkpoints=20, dynamic_vis=True): + import matplotlib.pyplot as plt + + if os.path.exists(keyframe_filename): + keyframes = np.load(keyframe_filename) + print(f"loaded keyframes from file: {keyframes}") + else: + keyframes = np.arange(len(self.image_list)) + print("keyframe file not found, using all frames") + + vis_param = self.vis_init() if dynamic_vis else None + + c_pose = np.hstack((np.eye(3), np.zeros((3, 1)))) + all_poses = [c_pose] + all_points_3d = [] + initial_frame = True + cumulative_loss_loc = 0 + + iter_count = len(keyframes) + for i in tqdm(range(1, iter_count)): + idx1 = keyframes[i - 1] + idx2 = keyframes[i] + img1 = self.read_image(idx1) + img2 = self.read_image(idx2) + + if self.isStereo: + img1_r = cv2.imread(self.image_list_r[idx1]) + img2_r = cv2.imread(self.image_list_r[idx2]) + disparity1 = self.compute_stereo_disparity(img1, img1_r) + disparity2 = self.compute_stereo_disparity(img2, img2_r) + + if self.use_lightglue: + pos_m_kp1, pos_m_kp2, matches, kp1, kp2 = self.lightglue_match(img1, img2) + else: + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + q1_l, q1_r, q2_l, q2_r, in_bounds = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + success, rvec, tvec, inliers = cv2.solvePnPRansac( + Q1.reshape(-1,1,3).astype(np.float32), + q2_l.reshape(-1,1,2).astype(np.float32), + self.K_3x3, None, + iterationsCount=2000, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("[Warning] Stereo PnP RANSAC failed to find a valid pose.") + + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, points_3d, R, t) + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + all_points_3d.append(points_3d) + + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + ate_loss = -1 + rte_loss = -1 + if (self.gt is not None) and (idx2 < self.gt.shape[0]): + gt_pose_current = self.gt[idx2] + diff_mean = np.mean(np.abs(gt_pose_current[:, 3] - c_pose[:, 3])) + cumulative_loss_loc += diff_mean + ate_loss = self.compute_ate(c_pose, gt_pose_current) + + # --- Visualization + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=True) + + # --- Save screenshots every 'checkpoints' frames + if (i % checkpoints == 0) or (i == iter_count - 1): + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=False) + + out_dir = os.path.join(output_dir, self.dataset) + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + if ate_loss == -1: + fname = f"{idx1}_noGT.png" + else: + fname = f"{idx1}_{(cumulative_loss_loc / i):.2f}_{ate_loss:.2f}.png" + + plt.savefig(os.path.join(out_dir, fname), dpi=100) + + # ----------------------- NEW HELPER FUNCTIONS ----------------------- + + def numpy_to_tensor(self, image): + """Converts a BGR numpy image (as read by cv2) into a torch tensor of shape (1, 3, H, W) + normalized in [0,1] and moved to GPU if available.""" + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_rgb = image_rgb.astype(np.float32) / 255.0 + tensor = torch.from_numpy(image_rgb).permute(2, 0, 1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + + def convert_lightglue_to_opencv(self, keypoints0, keypoints1, matches): + """ + Convert filtered LightGlue keypoints and matches into OpenCV-compatible KeyPoint and DMatch objects. + Here, keypoints0 and keypoints1 are assumed to already be filtered (only matched keypoints). + + Returns: + opencv_kp0: List of cv2.KeyPoint objects for image0. + opencv_kp1: List of cv2.KeyPoint objects for image1. + opencv_matches: List of cv2.DMatch objects where each match is simply (i,i). + """ + n_matches = keypoints0.shape[0] # number of matched keypoints + opencv_kp0 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints0] + opencv_kp1 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints1] + opencv_matches = [] + for i in range(n_matches): + # Here, we assume that the i-th keypoint in keypoints0 matches the i-th in keypoints1. + dmatch = cv2.DMatch(i, i, 0, 0.0) + opencv_matches.append(dmatch) + return opencv_kp0, opencv_kp1, opencv_matches + + + + def lightglue_match(self, img1, img2): + """ + Uses ALIKED and LightGlue to extract features and match them, + then converts the outputs to OpenCV-compatible formats. + Returns: + pos_kp0: numpy array of keypoint coordinates for image0 (shape (K,2)). + pos_kp1: numpy array of keypoint coordinates for image1 (shape (K,2)). + opencv_matches: list of cv2.DMatch objects. + opencv_kp0: list of cv2.KeyPoint objects for image0. + opencv_kp1: list of cv2.KeyPoint objects for image1. + """ + # Convert image to torch tensor + img1_tensor = self.numpy_to_tensor(img1) + img2_tensor = self.numpy_to_tensor(img2) + feats0 = self.extractor.extract(img1_tensor) + feats1 = self.extractor.extract(img2_tensor) + matches_out = self.matcher({'image0': feats0, 'image1': feats1}) + # Remove batch dimension using rbd from lightglue.utils + feats0 = rbd(feats0) + feats1 = rbd(feats1) + matches_out = rbd(matches_out) + + # Get matches and keypoints as numpy arrays. + matches = matches_out['matches'] # shape (K,2) + # keypoints returned by LightGlue are numpy arrays of (x,y) + keypoints0 = feats0['keypoints'][matches[..., 0]] + keypoints1 = feats1['keypoints'][matches[..., 1]] + + # Convert the keypoints and matches to OpenCV formats. + opencv_kp0, opencv_kp1, opencv_matches = self.convert_lightglue_to_opencv(keypoints0, keypoints1, matches) + + return keypoints0, keypoints1, opencv_matches, opencv_kp0, opencv_kp1 + + + # ----------------------- OVERRIDDEN METHODS IN initialize() and run() ----------------------- + + + +# ================================ +# Example main script usage +# ================================ +if __name__ == "__main__": + thumpup_pos_thresh = 0.015 + thumpup_rot_thresh = 1.5 + dataset = "kitti" # can be "kitti", "parking", "malaga", or "custom" + + # Create output directory if it doesn't exist + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + + if dataset == "malaga": + keyframe_filename = f"./output/malaga_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/malaga-urban-dataset-extract-07", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "parking": + keyframe_filename = f"./output/parking_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/parking", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "kitti": + keyframe_filename = f"./output/kitti_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/kitti", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "custom": + keyframe_filename = f"./output/custom_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/custom", thumpup_pos_thresh, thumpup_rot_thresh) + else: + raise ValueError("Unknown dataset") + + sfm.initialize(keyframe_filename) + sfm.run(keyframe_filename, checkpoints=20, dynamic_vis=True) + + # If you want to block at the end (so the figure remains): + # plt.show(block=True) diff --git a/modules/SimpleSLAM/requirements.txt b/modules/SimpleSLAM/requirements.txt new file mode 100644 index 0000000000..4dcb596437 --- /dev/null +++ b/modules/SimpleSLAM/requirements.txt @@ -0,0 +1,12 @@ +lightglue==0.0 +matplotlib==3.10.3 +numpy==2.2.6 +opencv_python==4.11.0.86 +pandas==2.2.3 +scipy==1.15.3 +torch==2.7.0 +tqdm==4.67.1 +open3d==0.18.0 +pyceres==2.3 +pycolmap==3.10.0 +lz4==4.4.4 \ No newline at end of file diff --git a/modules/SimpleSLAM/scripts/run_tracker_visualization.sh b/modules/SimpleSLAM/scripts/run_tracker_visualization.sh new file mode 100644 index 0000000000..c9182109b2 --- /dev/null +++ b/modules/SimpleSLAM/scripts/run_tracker_visualization.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -e + +# ============================================== +# 1) Run with OpenCV SIFT detector + BF matcher: +# ============================================== +# python3 -m slam.monocular.main \ +# --dataset tum-rgbd \ +# --base_dir ./Dataset \ +# --detector akaze \ +# --matcher bf \ +# --fps 10 \ +# --ransac_thresh 1.0 \ +# --no_viz3d + +# ============================================== +# 2) (Alternative) Run with ALIKED + LightGlue: +# ============================================== +# python3 -m slam.monocular.main4 \ +# --dataset kitti \ +# --base_dir ./Dataset \ +# --use_lightglue \ +# --ransac_thresh 1.0 +# # --fps 10 \ +# # --no_viz3d + + +# # TEST +# python3 -m slam.monocular.main_revamped \ +# --dataset kitti \ +# --base_dir ./Dataset \ +# --detector orb \ +# --matcher bf \ +# --fps 10 \ +# --ransac_thresh 1.0 \ +# --no_viz3d + + +python3 -m slam.monocular.main_revamped \ + --dataset kitti \ + --base_dir ./Dataset \ + --use_lightglue \ + # --fps 10 \ + # --no_viz3d \ No newline at end of file diff --git a/modules/SimpleSLAM/setup.py b/modules/SimpleSLAM/setup.py new file mode 100644 index 0000000000..143cd325a2 --- /dev/null +++ b/modules/SimpleSLAM/setup.py @@ -0,0 +1,18 @@ +# setup.py +from setuptools import setup, find_packages + +setup( + name = "opencv_simple_slam", + version = "0.1.0", + description = "A simple feature-based SLAM using OpenCV and LightGlue", + packages = find_packages(), # this will pick up your `slam/` package + install_requires=[ + "opencv-python", + "numpy", + "lz4", + "tqdm", + "torch", # if you intend to support LightGlue + "lightglue @ git+https://github.com/cvg/LightGlue.git", + ], +) + diff --git a/modules/SimpleSLAM/slam/__init__.py b/modules/SimpleSLAM/slam/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/core/__init__.py b/modules/SimpleSLAM/slam/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/core/archieve/visualization_keyframes.py b/modules/SimpleSLAM/slam/core/archieve/visualization_keyframes.py new file mode 100644 index 0000000000..c3b94f42d1 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/archieve/visualization_keyframes.py @@ -0,0 +1,492 @@ +import os +import glob +import argparse +import time +from tqdm import tqdm +from dataclasses import dataclass +import lz4.frame +import cv2 +import numpy as np + + +# Optional LightGlue imports +try: + import torch + from lightglue import LightGlue, ALIKED + from lightglue.utils import rbd , load_image +except ImportError: + raise ImportError('LightGlue unavailable, please install it using instructions on https://github.com/cvg/LightGlue') + + +def get_detector(detector_type, max_features=6000): + if detector_type == 'orb': + return cv2.ORB_create(max_features) + elif detector_type == 'sift': + return cv2.SIFT_create() + elif detector_type == 'akaze': + return cv2.AKAZE_create() + raise ValueError(f"Unsupported detector: {detector_type}") + +def get_matcher(matcher_type, detector_type=None): + if matcher_type == 'bf': + norm = cv2.NORM_HAMMING if detector_type in ['orb','akaze'] else cv2.NORM_L2 + return cv2.BFMatcher(norm, crossCheck=True) + raise ValueError(f"Unsupported matcher: {matcher_type}") + +def opencv_detector_and_matcher(img1,img2,detector,matcher): + kp1,des1=detector.detectAndCompute(img1,None) + kp2,des2=detector.detectAndCompute(img2,None) + if des1 is None or des2 is None: + return [],[],[] + matches=matcher.match(des1,des2) + return kp1, kp2, des1, des2, sorted(matches,key=lambda m:m.distance) + +def bgr_to_tensor(image): + """Convert a OpenCV‐style BGR uint8 into (3,H,W) torch tensor in [0,1] .""" + img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0 + tensor = torch.from_numpy(img_rgb).permute(2,0,1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + +def tensor_to_bgr(img_tensor): + """Convert a (1,3,H,W) or (3,H,W) torch tensor in [0,1] to RGB → OpenCV‐style BGR uint8 image""" + if img_tensor.dim() == 4: + img_tensor = img_tensor[0] + img_np = img_tensor.permute(1,2,0).cpu().numpy() + img_np = (img_np * 255).clip(0,255).astype(np.uint8) + return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) + +def convert_lightglue_to_opencv(keypoints0, keypoints1, matches): + """ + Convert filtered LightGlue keypoints and matches into OpenCV-compatible KeyPoint and DMatch objects. + Here, keypoints0 and keypoints1 are assumed to already be filtered (only matched keypoints). + + Returns: + opencv_kp0: List of cv2.KeyPoint objects for image0. + opencv_kp1: List of cv2.KeyPoint objects for image1. + opencv_matches: List of cv2.DMatch objects where each match is simply (i,i). + """ + # TODO: convert descriptors as well + n_matches = keypoints0.shape[0] # number of matched keypoints + opencv_kp0 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints0] + opencv_kp1 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints1] + + opencv_matches = [ + cv2.DMatch(int(q), int(t), 0, 0.0) for q, t in matches + ] + + return opencv_kp0, opencv_kp1, opencv_matches + +def lightglue_match(img1, img2, extractor, matcher, min_conf=0.0): + """ + Uses ALIKED and LightGlue to extract features and match them, + then converts the outputs to OpenCV-compatible formats. + Returns: + pos_kp0: numpy array of keypoint coordinates for image0 (shape (K,2)). + pos_kp1: numpy array of keypoint coordinates for image1 (shape (K,2)). + opencv_matches: list of cv2.DMatch objects. + opencv_kp0: list of cv2.KeyPoint objects for image0. + opencv_kp1: list of cv2.KeyPoint objects for image1. + """ + # TODO: add min_conf filtering + # TODO: return descriptors as well + # Convert image to torch tensor + img1_tensor = bgr_to_tensor(img1) + img2_tensor = bgr_to_tensor(img2) + feats0 = extractor.extract(img1_tensor) + feats1 = extractor.extract(img2_tensor) + matches_out = matcher({'image0': feats0, 'image1': feats1}) + # Remove batch dimension using rbd from lightglue.utils + feats0 = rbd(feats0) + feats1 = rbd(feats1) + matches_out = rbd(matches_out) + + # Get matches and keypoints as numpy arrays. + matches = matches_out['matches'] + # keypoints returned by LightGlue are numpy arrays of (x,y) + keypoints0 = feats0['keypoints'] + keypoints1 = feats1['keypoints'] + descriptors0 = feats0['descriptors'] + descriptors1 = feats1['descriptors'] + # print("discriptor types:", type(descriptors0), type(descriptors1)) + + + # Convert the keypoints and matches to OpenCV formats. + opencv_kp0, opencv_kp1, opencv_matches = convert_lightglue_to_opencv(keypoints0, keypoints1, matches) + + return opencv_kp0, opencv_kp1, descriptors0, descriptors1, opencv_matches + +def update_and_prune_tracks(matches, prev_map, tracks, kp_curr, frame_idx, next_track_id, prune_age=30): + """ + Update feature tracks given a list of matches, then prune stale ones. + + Args: + matches : List[cv2.DMatch] between previous and current keypoints. + prev_map : Dict[int, int] mapping prev-frame kp index -> track_id. + tracks : Dict[int, List[Tuple[int, int, int]]], + each track_id → list of (frame_idx, x, y). + kp_curr : List[cv2.KeyPoint] for current frame. + frame_idx : int, index of the current frame (1-based or 0-based). + next_track_id : int, the next unused track ID. + prune_age : int, max age (in frames) before a track is discarded. + + Returns: + curr_map : Dict[int, int] mapping curr-frame kp index → track_id. + tracks : Updated tracks dict. + next_track_id : Updated next unused track ID. + """ + curr_map = {} + + # 1) Assign each match to an existing or new track_id + for m in matches: + q = m.queryIdx # keypoint idx in previous frame + t = m.trainIdx # keypoint idx in current frame + + # extract integer pixel coords + x, y = int(kp_curr[t].pt[0]), int(kp_curr[t].pt[1]) + + if q in prev_map: + # continue an existing track + tid = prev_map[q] + else: + # start a brand-new track + tid = next_track_id + tracks[tid] = [] + next_track_id += 1 + + # map this current keypoint to the track + curr_map[t] = tid + # append the new observation + tracks[tid].append((frame_idx, x, y)) + + # 2) Prune any track not seen in the last `prune_age` frames + for tid, pts in list(tracks.items()): + last_seen_frame = pts[-1][0] + if (frame_idx - last_seen_frame) > prune_age: + del tracks[tid] + + return curr_map, tracks, next_track_id + +def filter_matches_ransac(kp1, kp2, matches, thresh): + """ + Filter a list of cv2.DMatch objects using RANSAC on the fundamental matrix. + + Args: + kp1 (List[cv2.KeyPoint]): KeyPoints from the first image. + kp2 (List[cv2.KeyPoint]): KeyPoints from the second image. + matches (List[cv2.DMatch]) : Initial matches between kp1 and kp2. + thresh (float) : RANSAC inlier threshold (pixels). + + Returns: + List[cv2.DMatch]: Only those matches deemed inliers by RANSAC. + """ + # Need at least 8 points for a fundamental matrix + if len(matches) < 8: + return matches + + # Build corresponding point arrays + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + # Run RANSAC to find inlier mask + F, mask = cv2.findFundamentalMat( + pts1, pts2, + method=cv2.FM_RANSAC, + ransacReprojThreshold=thresh, + confidence=0.99 + ) + mask = mask.ravel().astype(bool) + + # Keep only the inlier matches + inliers = [m for m, ok in zip(matches, mask) if ok] + return inliers + +def draw_tracks(vis, tracks, current_frame, max_age=10, sample_rate=5, max_tracks=1000): + """ + Draw each track's path with color decaying from green (new) to red (old). + Only draw up to max_tracks most recent tracks, and sample every sample_rate-th track. + """ + recent=[(tid,pts) for tid,pts in tracks.items() if pts and current_frame-pts[-1][0]<=max_age] + recent.sort(key=lambda x:x[1][-1][0],reverse=True) + drawn=0 + for tid,pts in recent: + if drawn>=max_tracks: break + if tid%sample_rate!=0: continue + pts=[p for p in pts if current_frame-p[0]<=max_age] + for j in range(1,len(pts)): + frame_idx,x0,y0=pts[j-1] + _,x1,y1=pts[j] + age=current_frame-frame_idx + ratio=age/max_age + b=0 + g=int(255*(1-ratio)) + r=int(255*ratio) + cv2.line(vis,(int(x0),int(y0)),(int(x1),int(y1)),(b,g,r),2) + drawn+=1 + return vis + +def dataloader(args): + """ + Load the dataset and return the detector, matcher, and sequence of images. + """ + # init modules once + if args.use_lightglue: + detector= ALIKED(max_num_keypoints=2048).eval().cuda() + matcher= LightGlue(features='aliked').eval().cuda() + else: + detector=get_detector(args.detector) + matcher=get_matcher(args.matcher,args.detector) + + # load sequence + prefix=os.path.join(args.base_dir, args.dataset) + is_custom=False + if args.dataset=='kitti': img_dir,pat=os.path.join(prefix,'05','image_0'),'*.png' + elif args.dataset=='parking': img_dir,pat=os.path.join(prefix,'images'),'*.png' + elif args.dataset=='malaga': img_dir,pat =os.path.join(prefix,'malaga-urban-dataset-extract-07_rectified_800x600_Images'),'*_left.jpg' + elif args.dataset=='tum-rgbd': img_dir,pat =os.path.join(prefix,'rgbd_dataset_freiburg1_room', 'rgb'),'*.png' + else: + vid=os.path.join(prefix,'custom_compress.mp4') + cap=cv2.VideoCapture(vid) + seq=[] + while True: + ok,fr=cap.read() + if not ok: break; seq.append(fr) + cap.release(); is_custom=True + + # load images + if not is_custom: seq=sorted(glob.glob(os.path.join(img_dir,pat))) + + return detector, matcher, seq + +def load_frame_pair(args,seq, i): + """ + Load a pair of consecutive frames from the sequence. + Args: + seq (list): List of image paths or frames. + i (int): Index of the current frame. + Returns: + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + """ + # load images + if args.dataset=='custom': + img1, img2 = seq[i], seq[i+1] + else: + img1 = cv2.imread(seq[i]) + img2=cv2.imread(seq[i+1]) + + return img1, img2 + +def detect_and_match(args, img1, img2, detector, matcher): + """ + Load two consecutive images from the sequence and match features using the specified detector and matcher. + Args: + args (argparse.Namespace): Command line arguments. + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + detector: Feature detector object. + matcher: Feature matcher object. + Returns: + kp_map1 (list): Keypoints from the first image. + kp_map2 (list): Keypoints from the second image. + matches (list): List of matched features. + """ + # match features + if args.use_lightglue: + kp_map1, kp_map2, des1, des2, matches = lightglue_match(img1,img2,detector,matcher) + else: + kp_map1, kp_map2, des1, des2, matches = opencv_detector_and_matcher(img1, img2, detector, matcher) + + return kp_map1, kp_map2, des1, des2, matches + +def is_new_keyframe(frame_idx: int, + matches_to_kf: list[cv2.DMatch], + n_kf_features: int, + kp_curr: list[cv2.KeyPoint], + kp_kf: list[cv2.KeyPoint], + kf_max_disp: float = 30.0, + kf_min_inliers: int = 125, + kf_cooldown: int = 5, + last_kf_idx: int = -999) -> bool: + """ + Decide whether the *current* frame should become a new key-frame. + + Parameters + ---------- + frame_idx : index of the current frame in the sequence + matches_to_kf : list of inlier cv2.DMatch between LAST KF and current frame + n_kf_features : total number of keypoints that existed in the last KF + kp_curr : keypoints detected in the current frame + kp_kf : keypoints of the last key-frame + kf_max_disp : pixel-space parallax threshold (avg. L2) to trigger a new KF + kf_min_inliers : minimum *surviving-match* ratio; below → new KF + kf_cooldown : minimum #frames to wait after last KF before creating another + last_kf_idx : frame index of the most recent key-frame + + Returns + ------- + bool : True ⇒ promote current frame to key-frame + """ + + # 0) Cool-down guard # + if frame_idx - last_kf_idx < kf_cooldown: + return False # too soon to spawn another KF + + # 1) Overlap / track-survival test # + # If the key-frame had zero features (should never happen) → force new KF + if not matches_to_kf or not n_kf_features: # no matches or features at all ⇒ we must create a new KF + return True + + if len(matches_to_kf) < kf_min_inliers: + return True # too few matches survived, so we must create a new KF + + # OVERLAP RATIO TEST (not used anymore, see below) # This Logic is not working well, so we use a different metric + # overlap_ratio = len(matches_to_kf) / float(n_kf_features) + # print(f"[KF] Overlap ratio: {overlap_ratio:.2f} (matches={len(matches_to_kf)})") + # if overlap_ratio < kf_min_ratio: + # return True + + # 2) Parallax test (average pixel displacement) #TODO replace with a more robust metric + # Compute mean Euclidean displacement between matched pairs + disp = [ + np.hypot( + kp_curr[m.trainIdx].pt[0] - kp_kf[m.queryIdx].pt[0], + kp_curr[m.trainIdx].pt[1] - kp_kf[m.queryIdx].pt[1] + ) + for m in matches_to_kf + ] + + if np.mean(disp) > kf_max_disp: + return True + return False + +def make_thumb(bgr, hw=(854,480)): + """Return lz4-compressed JPEG thumbnail (bytes).""" + th = cv2.resize(bgr, hw) + ok, enc = cv2.imencode('.jpg', th, [int(cv2.IMWRITE_JPEG_QUALITY), 70]) + return lz4.frame.compress(enc.tobytes()) if ok else b'' + + +@dataclass +class Keyframe: + idx: int # global frame index + path: str # on-disk image file OR "" if custom video + kps: list[cv2.KeyPoint] # keypoints (needed for geometric checks) + desc: np.ndarray # descriptors (uint8/float32) + thumb: bytes # lz4-compressed thumbnail for UI + + +def main(): + parser=argparse.ArgumentParser("Feature tracking with RANSAC filtering") + parser.add_argument('--dataset',choices=['kitti','malaga','tum-rgbd','custom'],required=True) + parser.add_argument('--base_dir',default='.././Dataset') + parser.add_argument('--detector',choices=['orb','sift','akaze'],default='orb') + parser.add_argument('--matcher',choices=['bf'],default='bf') + parser.add_argument('--use_lightglue',action='store_true') + parser.add_argument('--fps',type=float,default=10) + # --- RANSAC related CLI flags ------------------------------------------- + parser.add_argument('--ransac_thresh',type=float,default=1.0, help='RANSAC threshold for fundamental matrix') + # --- Key-frame related CLI flags ----------------------------------------- + parser.add_argument('--kf_max_disp', type=float, default=45, + help='Min avg. pixel displacement wrt last keyframe') + parser.add_argument('--kf_min_inliers', type=float, default=150, + help='Min surviving-match features wrt last keyframe') + parser.add_argument('--kf_cooldown', type=int, default=3, + help='Frames to wait before next keyframe check') + parser.add_argument('--kf_thumb_hw', type=int, nargs=2, default=[640,360], + help='Width Height of key-frame thumbnail') + args=parser.parse_args() + + # initialize detector and matcher + detector, matcher, seq = dataloader(args) + + # tracking data + track_id=0; prev_map={}; tracks={} + cv2.namedWindow('Feature Tracking',cv2.WINDOW_NORMAL) + cv2.resizeWindow('Feature Tracking',1200,600) + + total=len(seq)-1; prev_time=time.time(); achieved_fps=0.0 + for i in tqdm(range(total),desc='Tracking'): + # load frame pair + img1, img2 = load_frame_pair(args, seq, i) + + # Detect and match features + kp_map1, kp_map2, des1, des2, matches = detect_and_match(args, img1, img2, detector, matcher) + + # filter with RANSAC + matches = filter_matches_ransac(kp_map1, kp_map2, matches, args.ransac_thresh) + + # update & prune tracks + frame_no = i + 1 + prev_map, tracks, track_id = update_and_prune_tracks(matches, prev_map, tracks, kp_map2, frame_no, track_id, prune_age=30) + + # ---------------------------------------------------------------------- + # Key-frame subsystem (light-weight, zero full-image retention) + # ---------------------------------------------------------------------- + if i == 0: # bootstrap KF0 + thumb = make_thumb(img1, tuple(args.kf_thumb_hw)) + kfs = [Keyframe(0, seq[0] if isinstance(seq[0],str) else "", + kp_map1, des1, thumb)] + last_kf_idx = 0 + else: + # 1) match *stored* KF descriptors ↔ current frame descriptors + kf = kfs[-1] + kf_matches = matcher.match(kf.desc, des2) if len(des2) and len(kf.desc) else [] + kf_matches = filter_matches_ransac(kf.kps, kp_map2, kf_matches, args.ransac_thresh) + + # 2) decide + if is_new_keyframe(frame_no, + kf_matches, + n_kf_features=len(kf.kps), + kp_curr=kp_map2, + kp_kf=kf.kps, + kf_max_disp=args.kf_max_disp, + kf_min_inliers=args.kf_min_inliers, + kf_cooldown=args.kf_cooldown, + last_kf_idx=last_kf_idx): + thumb = make_thumb(img2, tuple(args.kf_thumb_hw)) + kfs.append(Keyframe(frame_no, + seq[i+1] if isinstance(seq[i+1],str) else "", + kp_map2, des2, thumb)) + last_kf_idx = frame_no + # print(f"[KF] {frame_no:4d} total={len(kfs)}") + + + # draw + vis= img2.copy() + vis=draw_tracks(vis,tracks,i+1) + for t,tid in prev_map.items(): + cv2.circle(vis,tuple(map(int, kp_map2[t].pt)),3,(0,255,0),-1) + + + # -------------------------------------------------------- + # overlay active KF id + cv2.putText(vis, f"KF index:{last_kf_idx} , len: {len(kfs)}", (10,60), + cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2) + + # assemble strip + thumbs = [cv2.imdecode(np.frombuffer(lz4.frame.decompress(kf.thumb),np.uint8), + cv2.IMREAD_COLOR) for kf in kfs[-4:]] + bar = np.hstack(thumbs) if thumbs else np.zeros((args.kf_thumb_hw[1],args.kf_thumb_hw[0],3), np.uint8) + cv2.imshow('Keyframes', bar) + + MAX_MEM_KF = 500 + if len(kfs) > MAX_MEM_KF: + dump = kfs.pop(0) # oldest KF + # -------------------------------------------------------- + + text=f"Frame {i+1}/{total} | Tracks: {len(tracks)} | FPS: {achieved_fps:.1f}" + cv2.putText(vis,text,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) + + # show + interval_ms=int(1000/args.fps) if args.fps>0 else 0 + key=cv2.waitKey(interval_ms) + cv2.imshow('Feature Tracking',vis) + # measure fps + now=time.time();dt=now-prev_time + if dt>0: achieved_fps=1.0/dt + prev_time=now + if key&0xFF==27: break + cv2.destroyAllWindows() + +if __name__=='__main__': + main() diff --git a/modules/SimpleSLAM/slam/core/archieve/visualize_tracking.py b/modules/SimpleSLAM/slam/core/archieve/visualize_tracking.py new file mode 100644 index 0000000000..e1d89c72b3 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/archieve/visualize_tracking.py @@ -0,0 +1,375 @@ +import os +import glob +import argparse +import time +from tqdm import tqdm +from dataclasses import dataclass + +import cv2 +import numpy as np + + +# Optional LightGlue imports +try: + import torch + from lightglue import LightGlue, ALIKED + from lightglue.utils import rbd , load_image +except ImportError: + raise ImportError('LightGlue unavailable, please install it using instructions on https://github.com/cvg/LightGlue') + + +def get_detector(detector_type, max_features=6000): + if detector_type == 'orb': + return cv2.ORB_create(max_features) + elif detector_type == 'sift': + return cv2.SIFT_create() + elif detector_type == 'akaze': + return cv2.AKAZE_create() + raise ValueError(f"Unsupported detector: {detector_type}") + +def get_matcher(matcher_type, detector_type=None): + if matcher_type == 'bf': + norm = cv2.NORM_HAMMING if detector_type in ['orb','akaze'] else cv2.NORM_L2 + return cv2.BFMatcher(norm, crossCheck=True) + raise ValueError(f"Unsupported matcher: {matcher_type}") + +def opencv_detector_and_matcher(img1,img2,detector,matcher): + kp1,des1=detector.detectAndCompute(img1,None) + kp2,des2=detector.detectAndCompute(img2,None) + if des1 is None or des2 is None: + return [],[],[] + matches=matcher.match(des1,des2) + return kp1, kp2, des1, des2, sorted(matches,key=lambda m:m.distance) + +def bgr_to_tensor(image): + """Convert a OpenCV‐style BGR uint8 into (3,H,W) torch tensor in [0,1] .""" + img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0 + tensor = torch.from_numpy(img_rgb).permute(2,0,1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + +def tensor_to_bgr(img_tensor): + """Convert a (1,3,H,W) or (3,H,W) torch tensor in [0,1] to RGB → OpenCV‐style BGR uint8 image""" + if img_tensor.dim() == 4: + img_tensor = img_tensor[0] + img_np = img_tensor.permute(1,2,0).cpu().numpy() + img_np = (img_np * 255).clip(0,255).astype(np.uint8) + return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) + +def convert_lightglue_to_opencv(keypoints0, keypoints1, matches): + """ + Convert filtered LightGlue keypoints and matches into OpenCV-compatible KeyPoint and DMatch objects. + Here, keypoints0 and keypoints1 are assumed to already be filtered (only matched keypoints). + + Returns: + opencv_kp0: List of cv2.KeyPoint objects for image0. + opencv_kp1: List of cv2.KeyPoint objects for image1. + opencv_matches: List of cv2.DMatch objects where each match is simply (i,i). + """ + # TODO: convert descriptors as well + n_matches = keypoints0.shape[0] # number of matched keypoints + opencv_kp0 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints0] + opencv_kp1 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints1] + + opencv_matches = [ + cv2.DMatch(int(q), int(t), 0, 0.0) for q, t in matches + ] + + return opencv_kp0, opencv_kp1, opencv_matches + +def lightglue_match(img1, img2, extractor, matcher, min_conf=0.0): + """ + Uses ALIKED and LightGlue to extract features and match them, + then converts the outputs to OpenCV-compatible formats. + Returns: + pos_kp0: numpy array of keypoint coordinates for image0 (shape (K,2)). + pos_kp1: numpy array of keypoint coordinates for image1 (shape (K,2)). + opencv_matches: list of cv2.DMatch objects. + opencv_kp0: list of cv2.KeyPoint objects for image0. + opencv_kp1: list of cv2.KeyPoint objects for image1. + """ + # TODO: add min_conf filtering + # TODO: return descriptors as well + # Convert image to torch tensor + img1_tensor = bgr_to_tensor(img1) + img2_tensor = bgr_to_tensor(img2) + feats0 = extractor.extract(img1_tensor) + feats1 = extractor.extract(img2_tensor) + matches_out = matcher({'image0': feats0, 'image1': feats1}) + # Remove batch dimension using rbd from lightglue.utils + feats0 = rbd(feats0) + feats1 = rbd(feats1) + matches_out = rbd(matches_out) + + # Get matches and keypoints as numpy arrays. + matches = matches_out['matches'] # shape (K,2) + # keypoints returned by LightGlue are numpy arrays of (x,y) + keypoints0 = feats0['keypoints'] + keypoints1 = feats1['keypoints'] + descriptors0 = feats0['descriptors'] # shape (K,D) + descriptors1 = feats1['descriptors'] # shape (K,D) + print("discriptor types:", type(descriptors0), type(descriptors1)) + + + # Convert the keypoints and matches to OpenCV formats. + opencv_kp0, opencv_kp1, opencv_matches = convert_lightglue_to_opencv(keypoints0, keypoints1, matches) + + return opencv_kp0, opencv_kp1, descriptors0, descriptors1, opencv_matches + +def update_and_prune_tracks(matches, prev_map, tracks, kp_curr, frame_idx, next_track_id, prune_age=30): + """ + Update feature tracks given a list of matches, then prune stale ones. + + Args: + matches : List[cv2.DMatch] between previous and current keypoints. + prev_map : Dict[int, int] mapping prev-frame kp index -> track_id. + tracks : Dict[int, List[Tuple[int, int, int]]], + each track_id → list of (frame_idx, x, y). + kp_curr : List[cv2.KeyPoint] for current frame. + frame_idx : int, index of the current frame (1-based or 0-based). + next_track_id : int, the next unused track ID. + prune_age : int, max age (in frames) before a track is discarded. + + Returns: + curr_map : Dict[int, int] mapping curr-frame kp index → track_id. + tracks : Updated tracks dict. + next_track_id : Updated next unused track ID. + """ + curr_map = {} + + # 1) Assign each match to an existing or new track_id + for m in matches: + q = m.queryIdx # keypoint idx in previous frame + t = m.trainIdx # keypoint idx in current frame + + # extract integer pixel coords + x, y = int(kp_curr[t].pt[0]), int(kp_curr[t].pt[1]) + + if q in prev_map: + # continue an existing track + tid = prev_map[q] + else: + # start a brand-new track + tid = next_track_id + tracks[tid] = [] + next_track_id += 1 + + # map this current keypoint to the track + curr_map[t] = tid + # append the new observation + tracks[tid].append((frame_idx, x, y)) + + # 2) Prune any track not seen in the last `prune_age` frames + for tid, pts in list(tracks.items()): + last_seen_frame = pts[-1][0] + if (frame_idx - last_seen_frame) > prune_age: + del tracks[tid] + + return curr_map, tracks, next_track_id + +def filter_matches_ransac(kp1, kp2, matches, thresh): + """ + Filter a list of cv2.DMatch objects using RANSAC on the fundamental matrix. + + Args: + kp1 (List[cv2.KeyPoint]): KeyPoints from the first image. + kp2 (List[cv2.KeyPoint]): KeyPoints from the second image. + matches (List[cv2.DMatch]) : Initial matches between kp1 and kp2. + thresh (float) : RANSAC inlier threshold (pixels). + + Returns: + List[cv2.DMatch]: Only those matches deemed inliers by RANSAC. + """ + # Need at least 8 points for a fundamental matrix + if len(matches) < 8: + return matches + + # Build corresponding point arrays + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + # Run RANSAC to find inlier mask + F, mask = cv2.findFundamentalMat( + pts1, pts2, + method=cv2.FM_RANSAC, + ransacReprojThreshold=thresh, + confidence=0.99 + ) + mask = mask.ravel().astype(bool) + + # Keep only the inlier matches + inliers = [m for m, ok in zip(matches, mask) if ok] + return inliers + +def draw_tracks(vis, tracks, current_frame, max_age=10, sample_rate=5, max_tracks=1000): + """ + Draw each track's path with color decaying from green (new) to red (old). + Only draw up to max_tracks most recent tracks, and sample every sample_rate-th track. + """ + recent=[(tid,pts) for tid,pts in tracks.items() if pts and current_frame-pts[-1][0]<=max_age] + recent.sort(key=lambda x:x[1][-1][0],reverse=True) + drawn=0 + for tid,pts in recent: + if drawn>=max_tracks: break + if tid%sample_rate!=0: continue + pts=[p for p in pts if current_frame-p[0]<=max_age] + for j in range(1,len(pts)): + frame_idx,x0,y0=pts[j-1] + _,x1,y1=pts[j] + age=current_frame-frame_idx + ratio=age/max_age + b=0 + g=int(255*(1-ratio)) + r=int(255*ratio) + cv2.line(vis,(int(x0),int(y0)),(int(x1),int(y1)),(b,g,r),2) + drawn+=1 + return vis + +def dataloader(args): + """ + Load the dataset and return the detector, matcher, and sequence of images. + """ + # init modules once + if args.use_lightglue: + detector=ALIKED(max_num_keypoints=2048).eval().cuda() + matcher=LightGlue(features='aliked').eval().cuda() + else: + detector=get_detector(args.detector) + matcher=get_matcher(args.matcher,args.detector) + + # load sequence + prefix=os.path.join(args.base_dir, args.dataset) + is_custom=False + if args.dataset=='kitti': img_dir,pat=os.path.join(prefix,'05','image_0'),'*.png' + elif args.dataset=='parking': img_dir,pat=os.path.join(prefix,'images'),'*.png' + elif args.dataset=='malaga': img_dir,pat =os.path.join(prefix,'malaga-urban-dataset-extract-07_rectified_800x600_Images'),'*_left.jpg' + elif args.dataset=='tum-rgbd': img_dir,pat =os.path.join(prefix,'rgbd_dataset_freiburg1_room', 'rgb'),'*.png' + else: + vid=os.path.join(prefix,'custom_compress.mp4') + cap=cv2.VideoCapture(vid) + seq=[] + while True: + ok,fr=cap.read() + if not ok: break; seq.append(fr) + cap.release(); is_custom=True + + # load images + if not is_custom: seq=sorted(glob.glob(os.path.join(img_dir,pat))) + + return detector, matcher, seq + +def load_frame_pair(args,seq, i): + """ + Load a pair of consecutive frames from the sequence. + Args: + seq (list): List of image paths or frames. + i (int): Index of the current frame. + Returns: + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + """ + # load images + if args.dataset=='custom': + img1, img2 = seq[i], seq[i+1] + else: + img1 = cv2.imread(seq[i]) + img2=cv2.imread(seq[i+1]) + + return img1, img2 + +def detect_and_match(args, img1, img2, detector, matcher): + """ + Load two consecutive images from the sequence and match features using the specified detector and matcher. + Args: + args (argparse.Namespace): Command line arguments. + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + detector: Feature detector object. + matcher: Feature matcher object. + Returns: + kp_map1 (list): Keypoints from the first image. + kp_map2 (list): Keypoints from the second image. + matches (list): List of matched features. + """ + # match features + if args.use_lightglue: + kp_map1, kp_map2, des1, des2, matches = lightglue_match(img1,img2,detector,matcher) + else: + kp_map1, kp_map2, des1, des2, matches = opencv_detector_and_matcher(img1, img2, detector, matcher) + + return kp_map1, kp_map2, des1, des2, matches + + +@dataclass +class Keyframe: + idx: int # global frame index + path: str # on-disk image file OR "" if custom video + kps: list[cv2.KeyPoint] # keypoints (needed for geometric checks) + desc: np.ndarray # descriptors (uint8/float32) + thumb: bytes # lz4-compressed thumbnail for UI + + +def main(): + parser=argparse.ArgumentParser("Feature tracking with RANSAC filtering") + parser.add_argument('--dataset',choices=['kitti','malaga','tum-rgb','custom'],required=True) + parser.add_argument('--base_dir',default='.././Dataset') + parser.add_argument('--detector',choices=['orb','sift','akaze'],default='orb') + parser.add_argument('--matcher',choices=['bf'],default='bf') + parser.add_argument('--use_lightglue',action='store_true') + parser.add_argument('--fps',type=float,default=10) + # --- RANSAC related CLI flags ------------------------------------------- + parser.add_argument('--ransac_thresh',type=float,default=1.0, help='RANSAC threshold for fundamental matrix') + # --- Key-frame related CLI flags ----------------------------------------- + parser.add_argument('--kf_max_disp', type=float, default=30, + help='Min avg. pixel displacement wrt last keyframe') + parser.add_argument('--kf_min_ratio', type=float, default=0.5, + help='Min surviving-match ratio wrt last keyframe') + parser.add_argument('--kf_cooldown', type=int, default=5, + help='Frames to wait before next keyframe check') + + args=parser.parse_args() + + # initialize detector and matcher + detector, matcher, seq = dataloader(args) + + # tracking data + track_id=0; prev_map={}; tracks={} + cv2.namedWindow('Feature Tracking',cv2.WINDOW_NORMAL) + cv2.resizeWindow('Feature Tracking',1200,600) + + total=len(seq)-1; prev_time=time.time(); achieved_fps=0.0 + for i in tqdm(range(total),desc='Tracking'): + # load frame pair + img1, img2 = load_frame_pair(args, seq, i) + + # Detect and match features + kp_map1, kp_map2, des1, des2, matches = detect_and_match(args, img1, img2, detector, matcher) + + # filter with RANSAC + matches = filter_matches_ransac(kp_map1, kp_map2, matches, args.ransac_thresh) + + # update & prune tracks + frame_no = i + 1 + prev_map, tracks, track_id = update_and_prune_tracks(matches, prev_map, tracks, kp_map2, frame_no, track_id, prune_age=30) + + # draw + vis= img2.copy() + vis=draw_tracks(vis,tracks,i+1) + for t,tid in prev_map.items(): + cv2.circle(vis,tuple(map(int, kp_map2[t].pt)),3,(0,255,0),-1) + + text=f"Frame {i+1}/{total} | Tracks: {len(tracks)} | FPS: {achieved_fps:.1f}" + cv2.putText(vis,text,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) + + # show + interval_ms=int(1000/args.fps) if args.fps>0 else 0 + key=cv2.waitKey(interval_ms) + cv2.imshow('Feature Tracking',vis) + # measure fps + now=time.time();dt=now-prev_time + if dt>0: achieved_fps=1.0/dt + prev_time=now + if key&0xFF==27: break + cv2.destroyAllWindows() + +if __name__=='__main__': + main() diff --git a/modules/SimpleSLAM/slam/core/ba_utils.py b/modules/SimpleSLAM/slam/core/ba_utils.py new file mode 100644 index 0000000000..1cac125ec7 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/ba_utils.py @@ -0,0 +1,266 @@ +# -*- coding: utf-8 -*- +""" +ba_utils.py +=========== + +ALWAYS PASS POSE as T_wc (world→camera) (camera-from-world) pose convention to pyceres. + +Light-weight Bundle-Adjustment helpers built on **pyceres**. +Implements + • two_view_ba(...) + • pose_only_ba(...) + • local_bundle_adjustment(...) +and keeps the older run_bundle_adjustment (full BA) for +back-compatibility. + +A *blue-print* for global_bundle_adjustment is included but not wired. +""" +from __future__ import annotations +import cv2 +import numpy as np +import pyceres +from pycolmap import cost_functions, CameraModelId +import math +from scipy.spatial.transform import Rotation as R + +# TODO: USES T_cw (camera-from-world) convention for storing poses in the map, because of PyCERES, REMEMBER TO CONVERT, use below functions +from slam.core.pose_utils import _pose_inverse, _pose_to_quat_trans, _quat_trans_to_pose + +# TODO : UNCOMMENT below block IF YOU USE USES World-from-camera (camera-to-world) pose convention T_wc (camera→world) for storing poses in the map. +# from slam.core.pose_utils import ( +# _pose_inverse, # T ↔ T⁻¹ +# _pose_to_quat_trans as _cw_to_quat_trans, +# _quat_trans_to_pose as _quat_trans_to_cw, +# ) +# def _pose_to_quat_trans(T_wc): +# """ +# Convert **camera→world** pose T_wc to the (q_cw, t_cw) parameterisation +# expected by COLMAP/pyceres residuals. +# """ +# return _cw_to_quat_trans(_pose_inverse(T_wc)) + + +# def _quat_trans_to_pose(q_cw, t_cw): +# """ +# Convert optimised (q_cw, t_cw) back to a **camera→world** SE3 matrix. +# """ +# return _pose_inverse(_quat_trans_to_cw(q_cw, t_cw)) + +# --------------------------------------------------------------------- # +# Shared helper to add one reprojection residual +# --------------------------------------------------------------------- # +def _add_reproj_edge(problem, loss_fn, + kp_uv: tuple[float, float], + quat_param, trans_param, + point_param, intr_param): + """Create a pyceres residual block for one observation.""" + cost = cost_functions.ReprojErrorCost( + CameraModelId.PINHOLE, + np.asarray(kp_uv, np.float64) + ) + problem.add_residual_block( + cost, loss_fn, + [quat_param, trans_param, point_param, intr_param] + ) + +# --------------------------------------------------------------------- # +# 1) Two-view BA (bootstrap refinement) +# --------------------------------------------------------------------- # +# --- ba_utils.py: entry points now use kfs instead of a separate keypoints list --- +def two_view_ba(world_map, K, kfs, max_iters: int = 20): + """ + Refine the two initial camera poses + all bootstrap landmarks. + Expects KF indices 0 and 1 to exist. + """ + assert len(world_map.poses) >= 2, "two_view_ba expects at least 2 poses" + _core_ba(world_map, K, kfs, + opt_kf_idx=[0, 1], + fix_kf_idx=[], + max_iters=max_iters, + info_tag="[2-view BA]") + +# --------------------------------------------------------------------- # +# 2) Pose-only BA (current frame refinement) +# --------------------------------------------------------------------- # +def pose_only_ba(world_map, K, kfs, + kf_idx: int, max_iters: int = 8, + huber_thr: float = 2.0): + """ + Optimise only one **keyframe** pose while keeping all 3-D points fixed. + """ + import pyceres + fx, fy, cx, cy = float(K[0,0]), float(K[1,1]), float(K[0,2]), float(K[1,2]) + intr = np.array([fx, fy, cx, cy], np.float64) + + # read pose from keyframe (authoritative), fall back to map if needed + Tcw = kfs[kf_idx].pose + quat, trans = _pose_to_quat_trans(Tcw) + + problem = pyceres.Problem() + problem.add_parameter_block(quat, 4) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(trans, 3) + + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + loss_fn = pyceres.HuberLoss(huber_thr) + + # add residuals for observations in this KF + added = 0 + for mp in world_map.points.values(): + # point is constant in pose-only BA + problem.add_parameter_block(mp.position, 3) + problem.set_parameter_block_constant(mp.position) + for f_idx, kp_idx, _desc in mp.observations: + if f_idx != kf_idx: + continue + u, v = kfs[f_idx].kps[kp_idx].pt + _add_reproj_edge(problem, loss_fn, (u, v), quat, trans, mp.position, intr) + added += 1 + + if added < 10: + print("POSE-ONLY BA skipped – not enough residuals") + return + + opts = pyceres.SolverOptions() + opts.max_num_iterations = max_iters + summary = pyceres.SolverSummary() + pyceres.solve(opts, problem, summary) + + # write back to both KF and Map (keep consistent) + new_Tcw = _quat_trans_to_pose(quat, trans) + kfs[kf_idx].pose = new_Tcw + if len(world_map.poses) > kf_idx: + world_map.poses[kf_idx][:] = new_Tcw + print(f"[Pose-only BA] iters={getattr(summary,'num_successful_steps',0)} residuals={added}") + +# --------------------------------------------------------------------- # +# 3) Local BA (sliding window) +# --------------------------------------------------------------------- # +def local_bundle_adjustment(world_map, K, kfs, + center_kf_idx: int, + window_size: int = 6, + max_points : int = 10000, + max_iters : int = 15): + """ + Optimise a sliding window of keyframes around center_kf_idx plus + all landmarks they observe. Older KFs are fixed (gauge). + """ + first_opt = max(1, center_kf_idx - window_size + 1) + opt_kf = list(range(first_opt, center_kf_idx + 1)) + fix_kf = list(range(0, first_opt)) + print(f"opt_kf={opt_kf}, fix_kf={fix_kf}, center_kf_idx={center_kf_idx}") + + _core_ba(world_map, K, kfs, + opt_kf_idx = opt_kf, + fix_kf_idx = fix_kf, + max_points = max_points, + max_iters = max_iters, + info_tag = f"[Local BA @ KF {center_kf_idx}]") + +# --------------------------------------------------------------------- # +# 4) Global BA (blue-print only) +# --------------------------------------------------------------------- # +def global_bundle_adjustment_blueprint(world_map, K, keypoints): + """ + *** NOT WIRED YET *** + + Outline: + • opt_kf_idx = all key-frames + • fix_kf_idx = [] (maybe fix the very first to anchor gauge) + • run _core_ba(...) with a robust kernel + • run asynchronously (thread) and allow early termination + if tracking thread needs the map + """ + raise NotImplementedError + + +# --------------------------------------------------------------------- # +# Shared low-level BA engine +# --------------------------------------------------------------------- # +def _core_ba(world_map, K, kfs, + *, + opt_kf_idx: list[int], + fix_kf_idx: list[int], + max_points: int | None = None, + max_iters : int = 20, + info_tag : str = ""): + """ + Sparse BA over a subset of poses (keyframes) + points. + Poses come from kfs[idx].pose; residual UVs from kfs[idx].kps[kp_idx].pt + """ + import pyceres + fx, fy, cx, cy = float(K[0,0]), float(K[1,1]), float(K[0,2]), float(K[1,2]) + intr = np.array([fx, fy, cx, cy], np.float64) + + problem = pyceres.Problem() + loss_fn = pyceres.HuberLoss(2.0) + + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + # pose blocks (opt + fixed) from Keyframe poses + quat_params, trans_params = {}, {} + for k in opt_kf_idx: + quat, tr = _pose_to_quat_trans(kfs[k].pose) + quat_params[k] = quat; trans_params[k] = tr + problem.add_parameter_block(quat, 4) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(tr, 3) + + for k in fix_kf_idx: + quat, tr = _pose_to_quat_trans(kfs[k].pose) + quat_params[k] = quat; trans_params[k] = tr + problem.add_parameter_block(quat, 4) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(tr, 3) + problem.set_parameter_block_constant(quat) + problem.set_parameter_block_constant(tr) + + # point blocks + residuals + added_pts = 0 + added_res = 0 + for mp in world_map.points.values(): + # keep only points seen by at least one optimisable KF + if not any(f in opt_kf_idx for f, _, _ in mp.observations): + continue + if max_points and added_pts >= max_points: + continue + + problem.add_parameter_block(mp.position, 3) + added_pts += 1 + + for f_idx, kp_idx, _desc in mp.observations: + if (f_idx not in opt_kf_idx) and (f_idx not in fix_kf_idx): + continue + u, v = kfs[f_idx].kps[kp_idx].pt + _add_reproj_edge(problem, loss_fn, + (u, v), + quat_params[f_idx], + trans_params[f_idx], + mp.position, + intr) + added_res += 1 + + if added_res < 10: + print(f"{info_tag} skipped – not enough residuals") + return + + opts = pyceres.SolverOptions() + opts.max_num_iterations = max_iters + opts.minimizer_progress_to_stdout = True + summary = pyceres.SolverSummary() + pyceres.solve(opts, problem, summary) + + # write poses back → Keyframes + Map + for k in opt_kf_idx: + new_Tcw = _quat_trans_to_pose(quat_params[k], trans_params[k]) + kfs[k].pose = new_Tcw + if len(world_map.poses) > k: + world_map.poses[k][:] = new_Tcw + + iters = getattr(summary, "iterations_used", + getattr(summary, "num_successful_steps", + getattr(summary, "num_iterations", None))) + print(f"{info_tag} iters={iters} χ²={summary.final_cost:.2f} residuals={added_res}") \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/core/dataloader.py b/modules/SimpleSLAM/slam/core/dataloader.py new file mode 100644 index 0000000000..5991f02063 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/dataloader.py @@ -0,0 +1,337 @@ +# dataloader.py + +import os +import glob +import cv2 +import numpy as np +import pickle +from typing import List, Optional, Dict, Tuple +import pandas as pd + +def _parse_tum_rgb_list(txt_path: str, seq_dir: str) -> Tuple[List[str], List[float]]: + """Parse TUM rgb.txt returning (paths, timestamps).""" + paths, stamps = [], [] + with open(txt_path, "r") as f: + for line in f: + if line.startswith("#") or not line.strip(): + continue + ts_str, rel = line.strip().split() + stamps.append(float(ts_str)) + paths.append(os.path.join(seq_dir, rel)) + return paths, stamps + +def load_sequence(args) -> List[str]: + """ + Return a list `seq` of either on-disk image paths or in-memory BGR frames. + (Exactly as before—only left / monocular images.) + """ + # TODO: also have to add the ability to load a video file + # TODO: add the ability to load an entire dataset various sequences + prefix = os.path.join(args.base_dir, args.dataset) + + if args.dataset == 'kitti': + img_dir, pat = os.path.join(prefix, '05', 'image_0'), '*.png' + seq = sorted(glob.glob(os.path.join(img_dir, pat))) + + elif args.dataset == 'parking': + img_dir, pat = os.path.join(prefix, 'images'), '*.png' + seq = sorted(glob.glob(os.path.join(img_dir, pat))) + + elif args.dataset == 'malaga': + img_dir = os.path.join(prefix, + 'malaga-urban-dataset-extract-07_rectified_800x600_Images') + seq = sorted(glob.glob(os.path.join(img_dir, '*_left.jpg'))) + + elif args.dataset == 'tum-rgbd': + img_dir = os.path.join(prefix, 'rgbd_dataset_freiburg1_room', 'rgb') + seq = sorted(glob.glob(os.path.join(img_dir, '*.png'))) + + elif args.dataset == 'custom': + vid = os.path.join(prefix, 'custom_compress.mp4') + cap = cv2.VideoCapture(vid) + seq = [] + while True: + ok, fr = cap.read() + if not ok: + break + seq.append(fr) + cap.release() + + else: + raise ValueError(f"Unknown dataset: {args.dataset}") + + if len(seq) < 2: + raise RuntimeError("Dataset must contain at least two frames.") + return seq + + +def load_frame_pair(args, seq, i): + """ + Convenience wrapper: returns BGR frame i and i+1. + Works for both path-based and in-memory sequences. + """ + if args.dataset == 'custom': # frames already in‐memory + return seq[i], seq[i + 1] + return cv2.imread(seq[i]), cv2.imread(seq[i + 1]) + + +# --------------------------------------------------------------------------- # +# New: stereo image paths +# --------------------------------------------------------------------------- # +# USAGE: right_seq = load_stereo_paths(args) # unused for now +def load_stereo_paths(args) -> List[str]: + """ + Return sorted list of right-camera image paths + if the dataset provides them; else an empty list. + """ + prefix = os.path.join(args.base_dir, args.dataset) + if args.dataset == 'kitti': + right_dir, pat = os.path.join(prefix, '05', 'image_1'), '*.png' + return sorted(glob.glob(os.path.join(right_dir, pat))) + if args.dataset == 'malaga': + right_dir = os.path.join(prefix, + 'malaga-urban-dataset-extract-07_rectified_800x600_Images') + return sorted(glob.glob(os.path.join(right_dir, '*_right.jpg'))) + # parking, tum‐rgbd, custom have no right camera + return [] + + +# --------------------------------------------------------------------------- # +# New: calibration loaders +# --------------------------------------------------------------------------- # +def load_calibration(args) -> Dict[str, np.ndarray]: + """ + Returns a dict with keys: + 'K_l', 'P_l', ← left intrinsics 3×3 and 3×4 + 'K_r', 'P_r' ← right intrinsics & proj (if available) + """ + # TODO: add TUM RGB-D calibration + prefix = os.path.join(args.base_dir, args.dataset) + + if args.dataset == 'kitti': + return _calib_kitti() + if args.dataset == 'malaga': + return _calib_malaga() + if args.dataset == "tum-rgbd": + return _calib_tum() + if args.dataset == 'custom': + calib_path = os.path.join(prefix, 'calibration.pkl') + return _calib_custom(calib_path) + + raise ValueError(f"No calibration loader for {args.dataset}") + + +def _calib_kitti() -> Dict[str, np.ndarray]: + params_l = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + sep=' ', dtype=np.float64).reshape(3, 4) + K_l = params_l[:3, :3] + + params_r = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + sep=' ', dtype=np.float64).reshape(3, 4) + K_r = params_r[:3, :3] + + return {'K_l': K_l, 'P_l': params_l, + 'K_r': K_r, 'P_r': params_r} + +def _calib_malaga() -> Dict[str, np.ndarray]: + # left intrinsics & proj (from your rough code) + K_l = np.array([ + [795.11588, 0.0, 517.12973], + [0.0, 795.11588, 395.59665], + [0.0, 0.0, 1.0 ] + ]) + P_l = np.hstack([K_l, np.zeros((3,1))]) + # Right camera: assume identity extrinsics for now + return {'K_l': K_l, 'P_l': P_l, 'K_r': K_l.copy(), 'P_r': P_l.copy()} + +def _calib_tum(): + K = np.array([[517.3, 0., 318.6], + [0., 516.5, 255.3], + [0., 0., 1. ]]) + D = np.array([ 0.2624, -0.9531, 0.0054, 0.0026, -1.1633 ]) # d0-d4 + P = np.hstack([K, np.zeros((3,1))]) + return {"K_l": K, "P_l": P, "D_l": D, "K_r":None, "P_r":None, "D_r":None} + + +def _calib_custom(calib_path: str) -> Dict[str, np.ndarray]: + with open(calib_path, 'rb') as f: + camera_matrix, *_ = pickle.load(f) + P = np.hstack([camera_matrix, np.zeros((3,1))]) + return {'K_l': camera_matrix, 'P_l': P, 'K_r': None, 'P_r': None} + + +# --------------------------------------------------------------------------- # +# New: ground‐truth loaders +# --------------------------------------------------------------------------- # + +def _quat_to_rot(qx: float, qy: float, qz: float, qw: float) -> np.ndarray: + """Convert unit quaternion to 3×3 rotation matrix (TUM order).""" + xx, yy, zz = qx * qx, qy * qy, qz * qz + xy, xz, yz = qx * qy, qx * qz, qy * qz + wx, wy, wz = qw * qx, qw * qy, qw * qz + + return np.array( + [ + [1.0 - 2.0 * (yy + zz), 2.0 * (xy - wz), 2.0 * (xz + wy)], + [2.0 * (xy + wz), 1.0 - 2.0 * (xx + zz), 2.0 * (yz - wx)], + [2.0 * (xz - wy), 2.0 * (yz + wx), 1.0 - 2.0 * (xx + yy)], + ], + dtype=np.float64, + ) + +def _align_tum_gt(rgb_ts: List[float], gt_ts: List[float], poses: List[np.ndarray]) -> List[np.ndarray]: + """Align ground‑truth poses to rgb timestamps via nearest‑neighbour search.""" + aligned = [] + j = 0 + for t in rgb_ts: + # advance j until gt_ts[j] >= t + while j + 1 < len(gt_ts) and gt_ts[j] < t: + j += 1 + # choose closer of j and j‑1 + if j == 0: + aligned.append(poses[0]) + else: + if abs(gt_ts[j] - t) < abs(gt_ts[j - 1] - t): + aligned.append(poses[j]) + else: + aligned.append(poses[j - 1]) + return aligned + + +def load_groundtruth(args) -> Optional[np.ndarray]: + """ + Returns an array of shape [N×3×4] with ground‐truth camera poses, + or None if no GT is available for this dataset. + """ + # TODO: add the ability to load the GT generally from a file - ie just give a path of dataset and the function will parse it + prefix = os.path.join(args.base_dir, args.dataset) + + if args.dataset == 'kitti': + poses = np.loadtxt(os.path.join(prefix, 'poses/05.txt')) + return poses.reshape(-1, 3, 4) + + if args.dataset == 'malaga': + seq = load_sequence(args) + filepath = os.path.join( + prefix, + 'malaga-urban-dataset-extract-07_all-sensors_GPS.txt' + ) + return _malaga_get_gt(filepath, seq) + + if args.dataset == "tum-rgbd": + seq_dir = os.path.join(prefix, "rgbd_dataset_freiburg1_room") + rgb_list = os.path.join(seq_dir, "rgb.txt") + gt_file = os.path.join(seq_dir, "groundtruth.txt") + + # --- read rgb timestamps --- # + _, rgb_ts = _parse_tum_rgb_list(rgb_list, seq_dir) + + # --- read ground‑truth trajectory --- # + gt_ts, poses = [], [] + with open(gt_file, "r") as f: + for line in f: + if line.startswith("#") or not line.strip(): + continue + items = line.strip().split() + ts = float(items[0]) + tx, ty, tz = map(float, items[1:4]) + qx, qy, qz, qw = map(float, items[4:8]) + R = _quat_to_rot(qx, qy, qz, qw) + P = np.hstack([R, np.array([[tx], [ty], [tz]], dtype=np.float64)]) + gt_ts.append(ts) + poses.append(P) + + aligned = _align_tum_gt(rgb_ts, gt_ts, poses) + return np.stack(aligned, axis=0) # [N×3×4] + + else: + print(f"No ground truth available for dataset: {args.dataset}") + pass + + return None + +# --------------------------------------------------------------------------- # +# Malaga-urban ground truth helper functions +# --------------------------------------------------------------------------- # + +def _malaga_get_gt( + filepath: str, + seq: List[str] +) -> np.ndarray: + """ + Load Malaga-urban GPS/INS ground truth and align it to the left-image sequence. + Returns [N×3×4] pose array (no file writes). + """ + # 1) read and trim the GPS log + col_names = [ + "Time","Lat","Lon","Alt","fix","sats","speed","dir", + "LocalX","LocalY","LocalZ","rawlogID","GeocenX","GeocenY","GeocenZ", + "GPSX","GPSY","GPSZ","GPSVX","GPSVY","GPSVZ","LocalVX","LocalVY","LocalVZ","SATTime" + ] + df = pd.read_csv( + filepath, + sep=r'\s+', + comment='%', + header=None, + names=col_names + ) + df = df[["Time","LocalX","LocalY","LocalZ"]].sort_values(by="Time").reset_index(drop=True) + times = df["Time"].values + t0, t1 = times[0], times[-1] + + # 2) keep only images whose timestamp falls within the GT interval + valid_seq = [] + for img in seq: + ts = extract_file_timestamp(img) + if t0 <= ts <= t1: + valid_seq.append(img) + seq[:] = valid_seq # trim the sequence in place + + # 3) build the pose list + poses = [] + for img in seq: + ts = extract_file_timestamp(img) + position = get_position_at_time(ts, df) # [-y, z, x] + P4 = np.eye(4, dtype=np.float64) + P4[:3, 3] = position + poses.append(P4[:3,:4]) + + return np.stack(poses, axis=0) # [N×3×4] + + +def extract_file_timestamp(filepath: str) -> float: + """ + Extract the floating‐point timestamp from a Malaga filename: + e.g. '..._1234567890.123_left.jpg' → 1234567890.123 + """ + name = os.path.basename(filepath) + parts = name.split("_") + return float(parts[2]) + + +def get_position_at_time(timestamp: float, df: pd.DataFrame) -> np.ndarray: + """ + Linearly interpolate the (LocalX,LocalY,LocalZ) at the given timestamp. + Returns a length-3 vector [-y, z, x] to match camera axes. + """ + times = df["Time"].values + idx = np.searchsorted(times, timestamp) + idx = np.clip(idx, 1, len(times)-1) + t_prev, t_next = times[idx-1], times[idx] + row_prev, row_next = df.iloc[idx-1], df.iloc[idx] + x0, y0, z0 = row_prev[["LocalX","LocalY","LocalZ"]] + x1, y1, z1 = row_next[["LocalX","LocalY","LocalZ"]] + + # use ASCII variable name 'alpha' instead of a Greek symbol + alpha = (timestamp - t_prev) / (t_next - t_prev) if (t_next != t_prev) else 0.0 + + x = x0 + alpha * (x1 - x0) + y = y0 + alpha * (y1 - y0) + z = z0 + alpha * (z1 - z0) + return np.array([-y, z, x], dtype=np.float64) \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/core/features_utils.py b/modules/SimpleSLAM/slam/core/features_utils.py new file mode 100644 index 0000000000..efae8990f1 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/features_utils.py @@ -0,0 +1,240 @@ +# features_utils.py +import cv2 +import numpy as np +from typing import List, Tuple +# optional LightGlue imports guarded by try/except +try: + import torch + from lightglue import LightGlue, ALIKED + from lightglue.utils import rbd , load_image +except ImportError: + raise ImportError('LightGlue unavailable, please install it using instructions on https://github.com/cvg/LightGlue') + + + +# --------------------------------------------------------------------------- # +# Initialisation helpers +# --------------------------------------------------------------------------- # +def init_feature_pipeline(args): + """ + Instantiate detector & matcher according to CLI arguments. + Returns (detector, matcher) + """ + if args.use_lightglue: + device = "cuda" if torch.cuda.is_available() else "cpu" + detector = ALIKED(max_num_keypoints=2048).eval().to(device) + matcher = LightGlue(features='aliked').eval().to(device) + else: + detector = _get_opencv_detector(args.detector) + matcher = _get_opencv_matcher(args.matcher, args.detector) + return detector, matcher + + +def _get_opencv_detector(detector_type, max_features=6000): + if detector_type == 'orb': + return cv2.ORB_create(max_features) + if detector_type == 'sift': + return cv2.SIFT_create() + if detector_type == 'akaze': + return cv2.AKAZE_create() + raise ValueError(f"Unsupported detector: {detector_type}") + + +def _get_opencv_matcher(matcher_type, detector_type): + if matcher_type != 'bf': + raise ValueError(f"Unsupported matcher: {matcher_type}") + norm = cv2.NORM_HAMMING if detector_type in ['orb', 'akaze'] else cv2.NORM_L2 + return cv2.BFMatcher(norm, crossCheck=True) + +# --------------------------------------------------------------------------- # +# Individual feature extraction and matching functions +# --------------------------------------------------------------------------- # + +def _convert_lg_kps_to_opencv(kp0: torch.Tensor) -> List[cv2.KeyPoint]: + cv_kp0 = [cv2.KeyPoint(float(x), float(y), 1) for x, y in kp0] + return cv_kp0 + +def _convert_opencv_to_lg_kps(cv_kp: List[cv2.KeyPoint]) -> torch.Tensor: + """ + Convert a list of cv2.KeyPoint into a tensor of shape [M×2] + containing (x, y) coordinates for LightGlue. + Args: + cv_kp: list of cv2.KeyPoint + Returns: + Tensor of shape [M, 2], dtype=torch.float32 + """ + coords = [(kp.pt[0], kp.pt[1]) for kp in cv_kp] + if not coords: + return torch.empty((0, 2), dtype=torch.float32) + return torch.tensor(coords, dtype=torch.float32) + + +def _convert_lg_matches_to_opencv(matches_raw: torch.Tensor) -> List[cv2.DMatch]: + pairs = matches_raw.cpu().numpy().tolist() + cv_matches = [cv2.DMatch(int(i), int(j), 0, 0.0) for i, j in pairs] + return cv_matches + +def feature_extractor(args, img: np.ndarray, detector): + """ + Extract features from a single image. + Returns a FrameFeatures with OpenCV-compatible keypoints and descriptors, + and optional LightGlue tensors if using LightGlue. + """ + + if args.use_lightglue: + t0 = _bgr_to_tensor(img) + feats = detector.extract(t0) + feats = rbd(feats) + lg_kps = feats['keypoints'] + kp0 = _convert_lg_kps_to_opencv(lg_kps) + des0_t = feats['descriptors'] + des0 = des0_t.detach().to("cpu").float().numpy() + des0 /= (np.linalg.norm(des0, axis=1, keepdims=True) + 1e-8).astype(np.float32) # L2 normalized + return kp0, des0 + + else: + kp0, des0 = detector.detectAndCompute(img, None) + if des0 is None: + return [], [] + return kp0, des0 + +def feature_matcher(args, kp0, kp1, des0, des1, matcher): + """ + Match features between two frames and return OpenCV-compatible matches. + Works with LightGlue (torch) and OpenCV BF (ORB). + - kp0, kp1: List[cv2.KeyPoint] + - des0, des1: + * LightGlue path: NumPy float32 (from ALIKE) or torch float32 + * BF path: uint8 (ORB) + """ + if args.use_lightglue: + import torch + + # --- 1) Use the model's own device as source of truth --- + try: + dev = next(matcher.parameters()).device + except StopIteration: + dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") + matcher = matcher.to(dev).eval() + + # --- 2) Helpers to put inputs on the same device & dtype --- + def _to_torch_float32(x): + # accepts numpy or torch, returns torch.float32 on dev + if isinstance(x, np.ndarray): + x = torch.from_numpy(x) + return x.to(dev, dtype=torch.float32) + + # keypoints: convert OpenCV -> (N,2) tensor, put on dev, add batch dim + k0 = _convert_opencv_to_lg_kps(kp0) + k1 = _convert_opencv_to_lg_kps(kp1) + if not isinstance(k0, torch.Tensor): + k0 = torch.as_tensor(k0, dtype=torch.float32) + if not isinstance(k1, torch.Tensor): + k1 = torch.as_tensor(k1, dtype=torch.float32) + lg_kp0 = k0.to(dev, dtype=torch.float32).unsqueeze(0) # [1, M, 2] + lg_kp1 = k1.to(dev, dtype=torch.float32).unsqueeze(0) # [1, N, 2] + + # descriptors: accept numpy or torch, move to dev, add batch dim + if des0 is None or des1 is None: + return [] # nothing to match + lg_desc0 = _to_torch_float32(des0).unsqueeze(0) # [1, M, D] + lg_desc1 = _to_torch_float32(des1).unsqueeze(0) # [1, N, D] + + # --- 3) Run LightGlue on a single device in inference mode --- + with torch.inference_mode(): + raw = matcher({ + 'image0': {'keypoints': lg_kp0, 'descriptors': lg_desc0}, + 'image1': {'keypoints': lg_kp1, 'descriptors': lg_desc1}, + }) + raw = rbd(raw) + + matches_raw = raw['matches'] + # Confidence may be 'scores' or 'confidence' + conf = raw['scores'] if 'scores' in raw else raw.get('confidence', None) + if conf is not None: + thr = float(getattr(args, 'min_conf', 0.7)) + matches_raw = matches_raw[conf > thr] + + return _convert_lg_matches_to_opencv(matches_raw) + + # -------- ORB / OpenCV BF path -------- + # BFMatcher expects uint8 binary descriptors for Hamming. + else: + # OpenCV matching + matches = matcher.match(des0, des1) + return sorted(matches, key=lambda m: m.distance) + + +# --------------------------------------------------------------------------- # +# Public API +# --------------------------------------------------------------------------- # + +def filter_matches_ransac(kp1, kp2, matches, thresh=1.0): + """ + Drop outliers using the fundamental matrix + RANSAC. + """ + if len(matches) < 8: + return matches + + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + _, mask = cv2.findFundamentalMat(pts1, pts2, + cv2.FM_RANSAC, thresh, 0.99) + mask = mask.ravel().astype(bool) + return [m for m, ok in zip(matches, mask) if ok] + + +# --------------------------------------------------------------------------- # +# Convenience method for LightGlue/OpenCV pipeline to detect and match features in two images +# NO LONGER USED IN THE MAIN SLAM PIPELINE, BUT LEFT FOR REFERENCE +# (e.g. for testing purposes). +# --------------------------------------------------------------------------- # +def _opencv_detect_and_match(img1, img2, detector, matcher): + kp1, des1 = detector.detectAndCompute(img1, None) + kp2, des2 = detector.detectAndCompute(img2, None) + + if des1 is None or des2 is None: + return [], [], [], [], [] # gracefully handle empty images + + matches = sorted(matcher.match(des1, des2), key=lambda m: m.distance) + + return kp1, kp2, des1, des2, matches + +def _bgr_to_tensor(image): + img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 + tensor = torch.from_numpy(img_rgb).permute(2, 0, 1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + + +def _convert_lightglue_to_opencv(kp0, kp1, matches): + n = kp0.shape[0] + cv_kp0 = [cv2.KeyPoint(float(x), float(y), 1) for x, y in kp0] + cv_kp1 = [cv2.KeyPoint(float(x), float(y), 1) for x, y in kp1] + cv_matches = [cv2.DMatch(int(i), int(j), 0, 0.0) for i, j in matches] + return cv_kp0, cv_kp1, cv_matches + + +def _lightglue_detect_and_match(img1, img2, extractor, matcher): + t0, t1 = _bgr_to_tensor(img1), _bgr_to_tensor(img2) + + f0, f1 = extractor.extract(t0), extractor.extract(t1) + matches = matcher({'image0': f0, 'image1': f1}) + + # remove batch dimension + f0, f1 = rbd(f0), rbd(f1) + matches = rbd(matches) + + kp0, kp1 = f0['keypoints'], f1['keypoints'] + des0, des1 = f0['descriptors'], f1['descriptors'] + cv_kp0, cv_kp1, cv_matches = _convert_lightglue_to_opencv(kp0, kp1, + matches['matches']) + return cv_kp0, cv_kp1, des0, des1, cv_matches + +def detect_and_match(img1, img2, detector, matcher, args): + """ + Front-end entry. Chooses OpenCV or LightGlue depending on CLI flag. + """ + if args.use_lightglue: + return _lightglue_detect_and_match(img1, img2, detector, matcher) + return _opencv_detect_and_match(img1, img2, detector, matcher) diff --git a/modules/SimpleSLAM/slam/core/keyframe_utils.py b/modules/SimpleSLAM/slam/core/keyframe_utils.py new file mode 100644 index 0000000000..8d2979304f --- /dev/null +++ b/modules/SimpleSLAM/slam/core/keyframe_utils.py @@ -0,0 +1,159 @@ +# keyframe.py +from dataclasses import dataclass +import cv2 +import numpy as np +import lz4.frame +from typing import List, Tuple +from slam.core.features_utils import feature_matcher, filter_matches_ransac + +# --------------------------------------------------------------------------- # +# Dataclass +# --------------------------------------------------------------------------- # +@dataclass +class Keyframe: + idx: int # global frame index + frame_idx: int # actual frame number (0-based), where this KF was created + path: str # "" for in-memory frames + kps: list[cv2.KeyPoint] + desc: np.ndarray + pose: np.ndarray # 4×4 camera-from-world (T_cw) pose + thumb: bytes # lz4-compressed JPEG + + +# --------------------------------------------------------------------------- # +# Helpers +# --------------------------------------------------------------------------- # +def make_thumb(bgr, hw=(640, 360)): + th = cv2.resize(bgr, hw) + ok, enc = cv2.imencode('.jpg', th, + [int(cv2.IMWRITE_JPEG_QUALITY), 70]) + return lz4.frame.compress(enc.tobytes()) if ok else b'' + +# keyframe_utils.py + +def _rot_deg_from_Tcw(Tcw_prev: np.ndarray, Tcw_curr: np.ndarray) -> float: + """Angular change between two camera-from-world poses (degrees).""" + R1, R2 = Tcw_prev[:3, :3], Tcw_curr[:3, :3] + R = R2 @ R1.T + # clamp for numerical safety + cos = max(min((np.trace(R) - 1.0) * 0.5, 1.0), -1.0) + return float(np.degrees(np.arccos(cos))) + +def is_new_keyframe( + frame_no: int, + matches_to_kf: list, + kp_curr: list[cv2.KeyPoint], + kp_kf: list[cv2.KeyPoint], + Tcw_prev_kf: np.ndarray | None, + Tcw_curr: np.ndarray | None, + *, + kf_cooldown: int = 5, + kf_min_inliers: int = 125, + kf_min_ratio: float = 0.35, + kf_max_disp: float = 30.0, + kf_min_rot_deg: float = 8.0, + last_kf_frame_no: int = -999 +) -> bool: + """Decide whether current frame should be promoted to a keyframe. + + Gate by age, then trigger on any of: weak track, large image motion, or rotation. + All thresholds are intentionally conservative—tune per dataset. + """ + # ---- 1) age gate ---- + age = frame_no - last_kf_frame_no + if age < kf_cooldown: + return False + + # ---- 2) compute signals ---- + n_inl = len(matches_to_kf) + n_ref = max(1, len(kp_kf)) + inlier_ratio = n_inl / n_ref + + if n_inl > 0: + disp = np.hypot( + np.array([kp_curr[m.trainIdx].pt[0] - kp_kf[m.queryIdx].pt[0] for m in matches_to_kf]), + np.array([kp_curr[m.trainIdx].pt[1] - kp_kf[m.queryIdx].pt[1] for m in matches_to_kf]) + ) + mean_disp = float(np.mean(disp)) + med_disp = float(np.median(disp)) + else: + mean_disp = med_disp = 0.0 + + rot_deg = 0.0 + if Tcw_prev_kf is not None and Tcw_curr is not None: + rot_deg = _rot_deg_from_Tcw(Tcw_prev_kf, Tcw_curr) + + # ---- 3) triggers ---- + weak_track = (n_inl < kf_min_inliers) or (inlier_ratio < kf_min_ratio) + large_flow = (med_disp > kf_max_disp) # median more robust than mean + view_change = (rot_deg > kf_min_rot_deg) + + return weak_track or large_flow or view_change + +def select_keyframe( + args, + seq: List[str], + frame_idx: int, + img2, kp2, des2, + Tcw_curr, + matcher, + kfs: List[Keyframe], + last_kf_frame_no: int +) -> Tuple[List[Keyframe], int]: + """ + Decide whether to add a new Keyframe at this iteration. + + Parameters + ---------- + args + CLI namespace (provides use_lightglue, ransac_thresh, kf_* params). + seq + Original sequence list (so we can grab file paths if needed). + frame_idx + zero-based index of the *first* of the pair. Keyframes use i+1 as frame number. + img2 + BGR image for frame i+1 (for thumbnail if we promote). + kp2, des2 + KPs/descriptors of frame i+1. + pose2 + Current camera-to-world pose estimate for frame i+1 (4×4). May be None. + matcher + Either the OpenCV BF/FLANN matcher or the LightGlue matcher. + kfs + Current list of Keyframe objects. + last_kf_frame_no + Frame number (1-based) of the last keyframe added; or -inf if none. + + Returns + ------- + kfs + Possibly-extended list of Keyframe objects. + last_kf_frame_no + Updated last keyframe frame number (still the same if we didn’t add one). + """ + frame_no = frame_idx + 1 + if not kfs: + return kfs, last_kf_frame_no + + prev_kf = kfs[-1] + + raw_matches = feature_matcher(args, prev_kf.kps, kp2, prev_kf.desc, des2, matcher) + matches = filter_matches_ransac(prev_kf.kps, kp2, raw_matches, args.ransac_thresh) + + if is_new_keyframe( + frame_no, matches, kp2, prev_kf.kps, + Tcw_prev_kf=prev_kf.pose, Tcw_curr=Tcw_curr, + kf_cooldown=args.kf_cooldown, + kf_min_inliers=args.kf_min_inliers, + kf_min_ratio=getattr(args, "kf_min_ratio", 0.35), + kf_max_disp=args.kf_max_disp, + kf_min_rot_deg=getattr(args, "kf_min_rot_deg", 8.0), + last_kf_frame_no=last_kf_frame_no, + ): + thumb = make_thumb(img2, tuple(args.kf_thumb_hw)) + path = seq[frame_idx + 1] if isinstance(seq[frame_idx + 1], str) else "" + seq_id = len(kfs) + kfs.append(Keyframe(seq_id, frame_no, path, kp2, des2, Tcw_curr, thumb)) + last_kf_frame_no = frame_no + + return kfs, last_kf_frame_no diff --git a/modules/SimpleSLAM/slam/core/landmark_utils.py b/modules/SimpleSLAM/slam/core/landmark_utils.py new file mode 100644 index 0000000000..a74bc76794 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/landmark_utils.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +""" +landmark_utils.py +~~~~~~~~~~~~~~~~~ +Classes and helper functions for managing 3‑D landmarks and camera poses +in an incremental VO / SLAM pipeline. + +* MapPoint ─ encapsulates a single 3‑D landmark. +* Map ─ container for all landmarks + camera trajectory. +* triangulate_points ─ convenience wrapper around OpenCV triangulation. + +The module is intentionally lightweight and free of external dependencies +beyond NumPy + OpenCV; this makes it easy to unit‑test without a heavy +visualisation stack. +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Tuple, Optional + +import cv2 +import numpy as np +from scipy.spatial import cKDTree + +# datatype test for feature descriptors +def _canon_desc(desc): + try: + import torch + if isinstance(desc, torch.Tensor): + desc = desc.detach().to("cpu") + if desc.dtype != torch.uint8: + desc = desc.float() + desc = desc.contiguous().numpy() + except Exception: + pass + d = np.asarray(desc) + if d.dtype == np.uint8: # ORB/AKAZE (binary) + return d.reshape(-1) + d = d.astype(np.float32, copy=False) # ALIKE/LightGlue (float) + n = np.linalg.norm(d) + 1e-8 + return (d / n).reshape(-1) + +# --------------------------------------------------------------------------- # +# MapPoint +# --------------------------------------------------------------------------- # +@dataclass +class MapPoint: + """A single triangulated 3‑D landmark. + + Parameters + ---------- + id + Unique landmark identifier. + position + 3‑D position in world coordinates (shape ``(3,)``). + keyframe_idx + Index of the keyframe that first observed / created this landmark. + colour + RGB colour (linear, 0‑1) associated with the point (shape ``(3,)``). + observations + List of observations in the form ``(frame_idx, kp_idx, descriptor)`` where + * ``keyframe_idx`` – Keyframe - idx where the keypoint was detected. + * ``kp_idx``    – index of the keypoint inside that frame. + * ``descriptor`` – feature descriptor as a 1‑D ``np.ndarray``. + """ + id: int + position: np.ndarray # shape (3,) + keyframe_idx: int = -1 + colour: np.ndarray = field(default_factory=lambda: np.ones(3, dtype=np.float32)) # (3,) in **linear** RGB 0-1 + observations: List[Tuple[int, int, np.ndarray]] = field(default_factory=list) # (keyframe_idx, kp_idx, descriptor) + + def add_observation(self, keyframe_idx: int, kp_idx: int, descriptor: np.ndarray) -> None: + """Register that *kp_idx* in *keyframe_idx* observes this landmark.""" + self.observations.append((keyframe_idx, kp_idx, _canon_desc(descriptor))) + + +# --------------------------------------------------------------------------- # +# Map container +# --------------------------------------------------------------------------- # +class Map: + """A minimalistic map: 3‑D points + camera trajectory.""" + + def __init__(self) -> None: + self.points: Dict[int, MapPoint] = {} + self.keyframe_indices: List[int] = [] + self.poses: List[np.ndarray] = [] # List of 4×4 camera‑to‑world matrices (World-from-Camera) + self._next_pid: int = 0 + + # ---------------- Camera trajectory ---------------- # + def add_pose(self, pose_c_w: np.ndarray, is_keyframe: bool) -> None: + """Append a 4×4 *pose_c_w* (camera‑to‑world) to the trajectory.""" ## CHANGED TO CAMERA-FROM-WORLD + assert pose_c_w.shape == (4, 4), "Pose must be 4×4 homogeneous matrix" + self.poses.append(pose_c_w.copy()) + if is_keyframe: + self.keyframe_indices.append(len(self.poses) - 1) + + # ---------------- Landmarks ------------------------ # + def add_points(self, pts3d: np.ndarray, colours: Optional[np.ndarray] = None, + keyframe_idx: int = -1) -> List[int]: + """Add a set of 3‑D points and return the list of newly assigned ids.""" + if pts3d.ndim != 2 or pts3d.shape[1] != 3: + raise ValueError("pts3d must be (N,3)") + new_ids: List[int] = [] + + colours = colours if colours is not None else np.ones_like(pts3d) + for p, c in zip(pts3d, colours): + pid = self._next_pid + self.points[pid] = MapPoint( + pid, + p.astype(np.float64), + keyframe_idx, + c.astype(np.float32), + ) + new_ids.append(pid) + self._next_pid += 1 + return new_ids + + + # ---------------- Convenience accessors ------------ # + def get_point_array(self) -> np.ndarray: + """Return all landmark positions as an (N,3) array (N may be 0).""" + if not self.points: + return np.empty((0, 3)) + return np.stack([mp.position for mp in self.points.values()], axis=0) + + def get_color_array(self) -> np.ndarray: + if not self.points: + return np.empty((0, 3), np.float32) + return np.stack([mp.colour for mp in self.points.values()]) + + def point_ids(self) -> List[int]: + return list(self.points.keys()) + + def __len__(self) -> int: + return len(self.points) + + # ---------------- Merging landmarks ---------------- # + def fuse_closeby_duplicate_landmarks(self, radius: float = 0.05) -> None: + """Average-merge landmarks whose centres are closer than ``radius``.""" + + if len(self.points) < 2: + return + + ids = list(self.points.keys()) + pts = np.stack([self.points[i].position for i in ids]) + tree = cKDTree(pts) + pairs = sorted(tree.query_pairs(radius)) + + removed: set[int] = set() + for i, j in pairs: + ida, idb = ids[i], ids[j] + if idb in removed or ida in removed: + continue + pa = self.points[ida].position + pb = self.points[idb].position + self.points[ida].position = (pa + pb) * 0.5 + removed.add(idb) + + for idx in removed: + self.points.pop(idx, None) + diff --git a/modules/SimpleSLAM/slam/core/pnp_utils.py b/modules/SimpleSLAM/slam/core/pnp_utils.py new file mode 100644 index 0000000000..a8f42900d6 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/pnp_utils.py @@ -0,0 +1,254 @@ +# slam/core/pnp_utils.py +import logging +from dataclasses import dataclass +from typing import List, Tuple, Dict, Optional +import numpy as np +import cv2 +from scipy.spatial import cKDTree + +log = logging.getLogger("pnp") + +# ---------- small utilities ---------- + +def _pose_inverse(T: np.ndarray) -> np.ndarray: + R, t = T[:3, :3], T[:3, 3] + Ti = np.eye(4, dtype=T.dtype) + Ti[:3, :3] = R.T + Ti[:3, 3] = -R.T @ t + return Ti + +def _pose_from_Rt(R: np.ndarray, t: np.ndarray) -> np.ndarray: + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t.reshape(3) + return T + +def predict_pose_const_vel(Tcw_prevprev: np.ndarray, Tcw_prev: np.ndarray) -> np.ndarray: + """Constant-velocity on SE(3) for T_cw (camera-from-world). + T_pred = T_prev * inv(T_prevprev) * T_prev + """ + return Tcw_prev @ _pose_inverse(Tcw_prevprev) @ Tcw_prev + +def project_point(K: np.ndarray, T_cw: np.ndarray, Xw: np.ndarray) -> Tuple[np.ndarray, float]: + """Return (u,v) and depth z in the current camera.""" + Xc = T_cw[:3, :3] @ Xw + T_cw[:3, 3] + z = float(Xc[2]) + if z <= 1e-8: + return np.array([-1.0, -1.0], dtype=np.float32), z + uv = (K @ (Xc / z))[:2] + return uv.astype(np.float32), z + +def _hamming(a: np.ndarray, b: np.ndarray) -> int: + # ORB/BRIEF descriptors: uint8 row vectors + return int(cv2.norm(a, b, cv2.NORM_HAMMING)) + +def _choose_mp_descriptor(observations: List[Tuple[int, int, np.ndarray]]) -> Optional[np.ndarray]: + if not observations: + return None + # use most-recent observation's descriptor + return observations[-1][2] + +@dataclass +class Matches2D3D: + pts3d: np.ndarray # (N,3) world + pts2d: np.ndarray # (N,2) image + kp_indices: List[int] # indices into current frame keypoints + mp_ids: List[int] # matched map point ids + + +def _as_np1d(x): + if x is None: + return None + a = np.asarray(x) + return a.reshape(-1) + +def _kp_coords(kps): + """Accepts List[cv2.KeyPoint] or ndarray (N,2). Returns float32 (N,2).""" + if isinstance(kps, (list, tuple)) and len(kps) > 0 and hasattr(kps[0], "pt"): + return np.float32([kp.pt for kp in kps]) + kps = np.asarray(kps) + assert kps.ndim == 2 and kps.shape[1] >= 2, "kps must be (N,2)" + return kps[:, :2].astype(np.float32, copy=False) + +def _desc_distance(a, b, metric="auto"): + """ + Robust descriptor distance that supports: + - binary (uint8) -> Hamming + - float (float32/64) -> L2 (default) or cosine if metric='cosine' + """ + a = _as_np1d(a); b = _as_np1d(b) + if a is None or b is None: + return np.inf + if a.shape[0] != b.shape[0]: + return np.inf + + if metric == "auto": + metric = "hamming" if a.dtype == np.uint8 and b.dtype == np.uint8 else "l2" + + if metric == "hamming": + # ensure uint8 contiguous + a = a.astype(np.uint8, copy=False) + b = b.astype(np.uint8, copy=False) + return float(cv2.norm(a, b, cv2.NORM_HAMMING)) + + if metric == "cosine": + a = a.astype(np.float32, copy=False) + b = b.astype(np.float32, copy=False) + na = float(np.linalg.norm(a)); nb = float(np.linalg.norm(b)) + if na == 0.0 or nb == 0.0: + return np.inf + # convert similarity -> distance in [0,2] + cos_sim = float(np.dot(a, b) / (na * nb)) + return 1.0 - cos_sim + + # default L2 + a = a.astype(np.float32, copy=False) + b = b.astype(np.float32, copy=False) + return float(np.linalg.norm(a - b)) + +# TODO Optimal way would be to add a parameter to class map_point that keeps a running average of descriptor(Also know as reference descriptor) +def _best_mp_distance_to_cur_desc(mp, d_cur, max_obs_check: int = 6) -> float: + """Return the smallest distance between the current descriptor and any + of the map point's stored observation descriptors (check last N).""" + if not mp.observations: + return float("inf") + # check only the most recent few to keep it fast + obs_iter = mp.observations[-max_obs_check:] + best = float("inf") + for _, _, d in obs_iter: + if d is None: + continue + best = min(best, _desc_distance(d, d_cur, metric="auto")) + return best + + +def reproject_and_match_2d3d(world_map, + K: np.ndarray, + Tcw_pred: np.ndarray, + kps_cur, # list[cv2.KeyPoint] OR ndarray (N,2) + des_cur: np.ndarray, # ORB: uint8; ALIKE: float32 + img_w: int, img_h: int, + radius_px: float = 12.0, + max_hamm: int = 64, + max_l2: float = 0.8, # TODO Magic Variable + use_cosine: bool = False): + """ + Reproject landmarks and match within a window around projections. + Works with ORB/BRIEF (uint8) and ALIKE/LightGlue-style float descriptors. + """ + # bail early if no features/descriptors + if des_cur is None or (hasattr(des_cur, "size") and des_cur.size == 0): + return Matches2D3D(np.zeros((0,3),np.float32), np.zeros((0,2),np.float32), [], []) + if not world_map.points: + return Matches2D3D(np.zeros((0,3),np.float32), np.zeros((0,2),np.float32), [], []) + + pts2d_cur = _kp_coords(kps_cur) + tree = cKDTree(pts2d_cur) + + used_kps = set() + pts3d, pts2d, kpids, mpids = [], [], [], [] + + # pick metric once based on current descriptor dtype + cur_is_binary = (des_cur.dtype == np.uint8) + metric = "hamming" if cur_is_binary else ("cosine" if use_cosine else "l2") + thr = max_hamm if metric == "hamming" else max_l2 + + for mp_id, mp in world_map.points.items(): + # project + uv, z = project_point(K, Tcw_pred, mp.position) + if z <= 0: + continue + u, v = float(uv[0]), float(uv[1]) + if u < 0 or v < 0 or u >= img_w or v >= img_h: + continue + + # small-window search + cand_idx = tree.query_ball_point([u, v], r=radius_px) + if not cand_idx: + continue + + # descriptor of this map point (use most recent obs) + mp_desc = _choose_mp_descriptor(mp.observations) + if mp_desc is None: + continue + + best_i, best_d = -1, 1e9 + for i in cand_idx: + if i in used_kps: + continue + d = _best_mp_distance_to_cur_desc(mp, des_cur[i], max_obs_check=6) + if d < best_d: + best_d, best_i = d, i + + if best_i < 0 or best_d > thr: + continue + + used_kps.add(best_i) + pts3d.append(mp.position.astype(np.float32)) + pts2d.append(pts2d_cur[best_i]) + kpids.append(best_i) + mpids.append(mp_id) + + if not pts3d: + return Matches2D3D(np.zeros((0,3),np.float32), np.zeros((0,2),np.float32), [], []) + + return Matches2D3D( + np.asarray(pts3d, np.float32), + np.asarray(pts2d, np.float32), + kpids, mpids + ) + + +def solve_pnp_ransac(pts3d: np.ndarray, + pts2d: np.ndarray, + K: np.ndarray, + ransac_px: float, + Tcw_init: Optional[np.ndarray] = None, + iters: int = 200, + conf: float = 0.999) -> Tuple[Optional[np.ndarray], np.ndarray]: + """Return (T_cw, inlier_mask) or (None, empty)""" + if len(pts3d) < 4: + return None, np.zeros((0,), dtype=bool) + + use_guess = Tcw_init is not None + rvec0, tvec0 = None, None + if use_guess: + R0, t0 = Tcw_init[:3, :3], Tcw_init[:3, 3] + rvec0, _ = cv2.Rodrigues(R0) + tvec0 = t0.reshape(3, 1) + + ok, rvec, tvec, inliers = cv2.solvePnPRansac( + pts3d, pts2d, K, None, + useExtrinsicGuess=bool(use_guess), + iterationsCount=int(iters), + reprojectionError=float(ransac_px), + confidence=float(conf), + flags=cv2.SOLVEPNP_ITERATIVE, + rvec=rvec0, tvec=tvec0 + ) + if not ok or inliers is None or len(inliers) < 4: + return None, np.zeros((len(pts3d),), dtype=bool) + + R, _ = cv2.Rodrigues(rvec) + Tcw = _pose_from_Rt(R, tvec.reshape(3)) + mask = np.zeros((len(pts3d),), dtype=bool) + mask[inliers.ravel().astype(int)] = True + return Tcw, mask + +# ---------- optional: tiny debug overlay ---------- + +def draw_reprojection_debug(img_bgr: np.ndarray, + K: np.ndarray, + Tcw: np.ndarray, + matches: Matches2D3D, + inlier_mask: np.ndarray) -> np.ndarray: + vis = img_bgr.copy() + for (Xw, uv, ok) in zip(matches.pts3d, matches.pts2d, inlier_mask): + # draw observed point + cv2.circle(vis, tuple(np.int32(uv)), 3, (0, 255, 0) if ok else (0, 0, 255), -1) + # draw reprojected point from estimated pose + uvp, z = project_point(K, Tcw, Xw.astype(np.float64)) + if z > 0: + cv2.circle(vis, tuple(np.int32(uvp)), 2, (255, 255, 0), 1) + cv2.line(vis, tuple(np.int32(uv)), tuple(np.int32(uvp)), (255, 255, 0), 1) + return vis diff --git a/modules/SimpleSLAM/slam/core/pose_utils.py b/modules/SimpleSLAM/slam/core/pose_utils.py new file mode 100644 index 0000000000..5d15fffbd1 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/pose_utils.py @@ -0,0 +1,124 @@ +import numpy as np +from scipy.spatial.transform import Rotation + + +def project_to_SO3(M): + """ + Helper function - Projects a near-rotation 3x3 matrix M onto SO(3) via SVD. + Returns the closest rotation (Frobenius norm). + """ + U, S, Vt = np.linalg.svd(M) + R = U @ Vt + if np.linalg.det(R) < 0: + U[:, -1] *= -1 + R = U @ Vt + return R + +def _pose_inverse(T, validate=True): + """ + Invert a 4x4 homogeneous rigid transform using SciPy. Return 4×4 inverse of a rigid‑body transform specified by R|t + + Parameters + ---------- + T : array-like (4,4) + Homogeneous transform [[R, t],[0,0,0,1]]. + validate : bool + If True, re-project rotation onto SO(3) via SciPy (robust to mild drift). + + Returns + ------- + T_inv : (4,4) ndarray + Inverse transform. + """ + T = np.asarray(T, dtype=float) + if T.shape != (4,4): + raise ValueError("T must be 4x4.") + Rmat = T[:3,:3] + t = T[:3, 3] + + if validate: + # Produces closest rotation in Frobenius norm + # Re-projects matrix to ensure perfect orthonormality + Rmat = project_to_SO3(Rmat) + + R_inv = Rmat.T + t_inv = -R_inv @ t + + T_inv = np.eye(4) + T_inv[:3,:3] = R_inv + T_inv[:3, 3] = t_inv + return T_inv + +def _pose_rt_to_homogenous(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Convert relative pose (R, t) to Homogeneous matrix **T_c1→c2**.""" + T = np.eye(4) + T[:3, :3] = R + T[:3, 3] = t.ravel() + return T + +# ******************************************************************************************** +# BA HELPER FUNCTIONS : Small pose ⇄ parameter converters +# ******************************************************************************************** + +def _pose_to_quat_trans(T, ordering="xyzw"): # pyceres uses maybe (xyzw), optionally you could use "wxyz" + """ + Convert a camera-from-world pose matrix T_cw (4x4) into (quat_cw, t_cw) + + Parameters + ---------- + T : (4,4) array-like + Homogeneous transform. + ordering : str + "wxyz" (default) or "xyzw". + Returns + ------- + q : (4,) ndarray + Quaternion in requested ordering, unit norm (w>=0 if wxyz). + t : (3,) ndarray + Translation vector. + """ + T = np.asarray(T, dtype=float) + assert T.shape == (4,4) + Rmat = T[:3,:3] + t = T[:3,3].copy() + + # Re-orthonormalize (optional but good hygiene) + U, _, Vt = np.linalg.svd(Rmat) + Rmat = U @ Vt + if np.linalg.det(Rmat) < 0: # enforce right-handed + U[:, -1] *= -1 + Rmat = U @ Vt + + rot = Rotation.from_matrix(Rmat) + # SciPy gives quaternions in (x, y, z, w) + q_xyzw = rot.as_quat() + + # Normalize (usually already unit) + q_xyzw = q_xyzw / np.linalg.norm(q_xyzw) + + if ordering.lower() == "xyzw": + q = q_xyzw + # optional consistent sign: enforce w>=0 + if q[-1] < 0: q = -q + else: # wxyz + w = q_xyzw[3] + q = np.array([w, *q_xyzw[:3]]) + if q[0] < 0: q = -q + return q, t + +def _quat_trans_to_pose(q, t, ordering="xyzw"): + q = np.asarray(q, dtype=float) + t = np.asarray(t, dtype=float).reshape(3) + if ordering.lower() == "wxyz": + w, x, y, z = q + q_xyzw = np.array([x, y, z, w]) + else: + x, y, z, w = q + q_xyzw = q + # Normalize in case + q_xyzw = q_xyzw / np.linalg.norm(q_xyzw) + Rmat = Rotation.from_quat(q_xyzw).as_matrix() + T = np.eye(4) + T[:3,:3] = Rmat + T[:3,3] = t + return T diff --git a/modules/SimpleSLAM/slam/core/trajectory_utils.py b/modules/SimpleSLAM/slam/core/trajectory_utils.py new file mode 100644 index 0000000000..f4852d276a --- /dev/null +++ b/modules/SimpleSLAM/slam/core/trajectory_utils.py @@ -0,0 +1,52 @@ +""" +trajectory_utils.py +~~~~~~~~~~~~~~~~~~~ +Small utilities for aligning, transforming and plotting camera +trajectories. + +This file contains *no* project-specific imports, so it can be reused +from notebooks or quick scripts without pulling the whole SLAM stack. +""" +from __future__ import annotations +from typing import Tuple, Optional + +import numpy as np +import matplotlib.pyplot as plt + + +# ------------------------------------------------------------------ # +# Alignment helpers +# ------------------------------------------------------------------ # +def compute_gt_alignment(gt_T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """ + Return (R_align, t_align) such that + + p_slam = R_align @ p_gt + t_align + + maps *ground-truth* positions into the SLAM world used in this + project (camera-0 frame at t=0). + + Parameters + ---------- + gt_T : (N,4,4) homogeneous ground-truth poses, first one defines the + reference frame. + + Notes + ----- + *KITTI* supplies (3×4) poses whose *first* matrix is *already* + camera-0 wrt world, therefore the alignment is an identity – but we + keep it generic so other datasets work too. + """ + if gt_T.ndim != 3 or gt_T.shape[1:] != (4, 4): + raise ValueError("gt_T must be (N,4,4)") + R0, t0 = gt_T[0, :3, :3], gt_T[0, :3, 3] + R_align = np.eye(3) @ R0.T + t_align = -R_align @ t0 + return R_align, t_align + + +def apply_alignment(p_gt: np.ndarray, + R_align: np.ndarray, + t_align: np.ndarray) -> np.ndarray: + """Transform *one* 3-D point from GT to SLAM coordinates.""" + return R_align @ p_gt + t_align diff --git a/modules/SimpleSLAM/slam/core/triangulation_utils.py b/modules/SimpleSLAM/slam/core/triangulation_utils.py new file mode 100644 index 0000000000..e908792b5c --- /dev/null +++ b/modules/SimpleSLAM/slam/core/triangulation_utils.py @@ -0,0 +1,228 @@ +# slam/core/triangulation_utils.py +""" +2-View Triangulation (compatible with Map API) +- Matches prev_kf <-> cur_kf +- cv2.triangulatePoints with P = K @ Tcw[:3,:] +- Gates: parallax (optional), cheirality/depth, reprojection +- Inserts points via Map.add_points(...) + MapPoint.add_observation(...) +- Rich debug logging & reason counters +""" +import numpy as np +import cv2 +import logging +from collections import Counter + +from slam.core.features_utils import ( + feature_matcher, + filter_matches_ransac +) +from slam.core.two_view_bootstrap import pts_from_matches + +log_tri = logging.getLogger("triangulation") + +# ---------- Helpers ---------- + +def _RCt_from_Tcw(Tcw: np.ndarray): + """Decompose 4x4 Tcw into (R, C, t), where x_c = R (X - C) = R X + t.""" + R = Tcw[:3, :3] + t = Tcw[:3, 3] + C = -R.T @ t + return R, C, t + +def _P_from_K_Tcw(K: np.ndarray, Tcw: np.ndarray) -> np.ndarray: + """3x4 projection: P = K @ Tcw[:3,:] (world -> camera homogeneous).""" + return K @ Tcw[:3, :] + +def _project_px(K, R, t, X): + """Project world point X with camera (R,t). Returns 2D pixel or None if behind camera.""" + Xc = R @ X + t + if Xc[2] <= 1e-6: + return None + u = K @ (Xc / Xc[2]) + return u[:2] + +def _angle_parallax_deg(K, uv1, uv2): + """Angle between unit rays (for parallax gating).""" + x1 = np.linalg.inv(K) @ np.array([uv1[0], uv1[1], 1.0], dtype=float) + x2 = np.linalg.inv(K) @ np.array([uv2[0], uv2[1], 1.0], dtype=float) + v1 = x1 / (np.linalg.norm(x1) + 1e-12) + v2 = x2 / (np.linalg.norm(x2) + 1e-12) + c = np.clip(np.dot(v1, v2), -1.0, 1.0) + return float(np.degrees(np.arccos(c))) + + +def _map_add_point(world_map, Xw, desc, obs_list): + """ + Insert one 3-D point into Map and add its observations. + + Uses Map.add_points(...) (your Map stores points in a dict, not a list), + then calls MapPoint.add_observation(...) for each view. + """ + # 1) create the point (returns [pid]) + ids = world_map.add_points(np.asarray([Xw], dtype=np.float64)) + if not ids: + raise RuntimeError("Map.add_points returned no ids.") + pid = ids[0] + + # 2) add observations + try: + for ob in obs_list: + kf_idx = int(ob["kf_idx"]) + kp_idx = int(ob["kp_idx"]) + if desc is None and "desc" in ob and ob["desc"] is not None: + d = ob["desc"] + else: + d = desc + world_map.points[pid].add_observation(kf_idx, kp_idx, d if d is not None else np.zeros((1,), np.uint8)) + except Exception as e: + # If anything goes wrong, remove the half-created landmark to keep map clean + world_map.points.pop(pid, None) + raise e + + return pid + + +# ---------- Public: triangulate between two keyframes ---------- + +def triangulate_between_kfs_2view( + args, K, world_map, prev_kf, cur_kf, matcher, log, + use_parallax_gate: bool = True, parallax_min_deg: float = 2.0, + reproj_px_max: float | None = None, + debug_max_examples: int = 10 +): + """ + Triangulate new points from matches between two keyframes. + + Args: + args: CLI args (uses min_depth, max_depth, ransac_thresh) + K: (3,3) intrinsics + world_map: Map (points: dict[int, MapPoint], add_points(...)) + prev_kf, cur_kf: Keyframe objects with .idx, .kps, .desc, .pose (Tcw) + matcher: feature matcher from your pipeline + log: logger (we also use 'triangulation' logger for extra debug) + """ + # 0) Match + geometric filtering + raw = feature_matcher(args, prev_kf.kps, cur_kf.kps, prev_kf.desc, cur_kf.desc, matcher) + matches = filter_matches_ransac(prev_kf.kps, cur_kf.kps, raw, args.ransac_thresh) + + log.info("[TRI] KF %d→%d raw=%d after_RANSAC=%d (th=%.2f px)", + prev_kf.idx, cur_kf.idx, len(raw), len(matches), float(args.ransac_thresh)) + log_tri.debug("prev_kf: idx=%d, kps=%d | cur_kf: idx=%d, kps=%d", + prev_kf.idx, len(prev_kf.kps), cur_kf.idx, len(cur_kf.kps)) + + if len(matches) == 0: + log.info("[TRI] No matches to triangulate for KFs %d→%d.", prev_kf.idx, cur_kf.idx) + return [] + + # 1) Build point arrays (Nx2) in pixel coords + pts1, pts2 = pts_from_matches(prev_kf.kps, cur_kf.kps, matches) + + # 2) Projection matrices + P1 = _P_from_K_Tcw(K, prev_kf.pose) + P2 = _P_from_K_Tcw(K, cur_kf.pose) + + # 3) Triangulate (homogeneous 4xN -> Nx3) + X4 = cv2.triangulatePoints(P1, P2, pts1.T.astype(np.float64), pts2.T.astype(np.float64)) + w = X4[3, :] + valid_w = np.isfinite(w) & (np.abs(w) > 1e-12) + if valid_w.sum() == 0: + log.warning("[TRI] cv2.triangulatePoints produced no finite depths (w).") + return [] + + X = (X4[:3, valid_w] / w[valid_w]).T # Nx3 valid + idx_valid = np.flatnonzero(valid_w) + + # 4) Decompose poses for gating + R1, C1, t1 = _RCt_from_Tcw(prev_kf.pose) + R2, C2, t2 = _RCt_from_Tcw(cur_kf.pose) + + # Choose reprojection threshold + if reproj_px_max is None: + reproj_px_max = float(args.ransac_thresh) + + # Debug counters + reasons = Counter() + kept_ids = [] + examples_shown = 0 + + # Precompute optional parallax stats for a quick sense of geometry + if use_parallax_gate: + sample_parallaxes = [] + for m in matches[:200]: # sample for speed + uv1 = prev_kf.kps[m.queryIdx].pt + uv2 = cur_kf.kps[m.trainIdx].pt + sample_parallaxes.append(_angle_parallax_deg(K, uv1, uv2)) + if sample_parallaxes: + log.info("[TRI] Parallax(sample of %d): med=%.2f°, p25=%.2f°, p75=%.2f°", + len(sample_parallaxes), + float(np.median(sample_parallaxes)), + float(np.percentile(sample_parallaxes, 25)), + float(np.percentile(sample_parallaxes, 75))) + + # 5) Iterate matches and corresponding 3D points + min_d = float(getattr(args, "min_depth", 0.0)) + max_d = float(getattr(args, "max_depth", 1e6)) + + for out_idx, m_idx in enumerate(idx_valid): + Xw = X[out_idx] + m = matches[m_idx] + i1, i2 = m.queryIdx, m.trainIdx + uv1 = prev_kf.kps[i1].pt + uv2 = cur_kf.kps[i2].pt + + # (a) Optional parallax gate + if use_parallax_gate: + par = _angle_parallax_deg(K, uv1, uv2) + if par < parallax_min_deg: + reasons["low_parallax"] += 1 + if examples_shown < debug_max_examples: + log_tri.debug("reject: low_parallax (%.2f°) match=(%d,%d)", par, i1, i2) + continue + + # (b) Cheirality + depth window + z1 = (R1 @ Xw + t1)[2] + z2 = (R2 @ Xw + t2)[2] + if not (min_d <= z1 <= max_d and min_d <= z2 <= max_d): + reasons["bad_depth"] += 1 + if examples_shown < debug_max_examples: + log_tri.debug("reject: bad_depth z1=%.3f z2=%.3f in [%.3f, %.3f]", z1, z2, min_d, max_d) + continue + + # (c) Reprojection gate + u1h = _project_px(K, R1, t1, Xw) + u2h = _project_px(K, R2, t2, Xw) + if (u1h is None) or (u2h is None): + reasons["behind_cam"] += 1 + if examples_shown < debug_max_examples: + log_tri.debug("reject: behind_cam u1h=%s u2h=%s", str(u1h), str(u2h)) + continue + + e1 = float(np.linalg.norm(u1h - uv1)) + e2 = float(np.linalg.norm(u2h - uv2)) + if max(e1, e2) > reproj_px_max: + reasons["high_reproj"] += 1 + if examples_shown < debug_max_examples: + log_tri.debug("reject: high_reproj e1=%.2f e2=%.2f > %.2f", e1, e2, reproj_px_max) + continue + + # (d) Insert MapPoint (descriptor: take from current KF) + desc_cur = cur_kf.desc[i2] if cur_kf.desc is not None else None + obs_list = [ + {"kf_idx": prev_kf.idx, "kp_idx": i1, "uv": uv1, "desc": prev_kf.desc[i1] if prev_kf.desc is not None else None}, + {"kf_idx": cur_kf.idx, "kp_idx": i2, "uv": uv2, "desc": desc_cur}, + ] + pid = _map_add_point(world_map, Xw, desc_cur, obs_list) + kept_ids.append(pid) + reasons["kept"] += 1 + + if examples_shown < debug_max_examples: + log_tri.debug("kept pid=%d par=%.2f° z1=%.2f z2=%.2f e1=%.2f e2=%.2f", pid, + _angle_parallax_deg(K, uv1, uv2) if use_parallax_gate else -1.0, z1, z2, e1, e2) + examples_shown += 1 + + # 6) Summary + log.info("[TRI] KF %d<->%d: kept=%d of %d valid w | reasons: %s (reproj<=%.1fpx, depth∈[%.2f,%.2f])", + prev_kf.idx, cur_kf.idx, len(kept_ids), int(valid_w.sum()), + dict(reasons), reproj_px_max, min_d, max_d) + + return kept_ids diff --git a/modules/SimpleSLAM/slam/core/two_view_bootstrap.py b/modules/SimpleSLAM/slam/core/two_view_bootstrap.py new file mode 100644 index 0000000000..a4e61680ff --- /dev/null +++ b/modules/SimpleSLAM/slam/core/two_view_bootstrap.py @@ -0,0 +1,418 @@ +""" +two_view_bootstrap.py (verbose, self-contained) +------------------------------------------------ +ORB-style two-view bootstrap for monocular SLAM with helpful logging. + +What it does: +- Competes Homography (H) vs Fundamental (F) on comparable residuals. +- Recovers a valid (R, t) with cheirality + parallax checks. +- Optionally returns a robust inlier mask aligned with your matches. +- Builds the initial map (KF0 = I, KF1 = [R|t]), triangulates once, + depth-filters, and adds landmarks + observations. + +How to use in main: +1) Use `evaluate_two_view_bootstrap_with_masks` to decide on a pair. +2) Call `bootstrap_two_view_map(...)` to create KF0+KF1 and landmarks. + +Author: ChatGPT assistant +""" +from dataclasses import dataclass +from enum import Enum, auto +from typing import Optional, Tuple +import numpy as np +import cv2 +import logging + +from slam.core.pose_utils import _pose_rt_to_homogenous + +# ---------------- logging ---------------- +logger = logging.getLogger("two_view_bootstrap") +# if not logger.handlers: +# _h = logging.StreamHandler() +# _fmt = logging.Formatter("[%(levelname)s] %(name)s:%(funcName)s: %(message)s") +# _h.setFormatter(_fmt) +# logger.addHandler(_h) +# logger.setLevel(logging.INFO) # override from your main if you want DEBUG + +# ---------------- data types ---------------- + +class TwoViewModel(Enum): + HOMOGRAPHY = auto() + FUNDAMENTAL = auto() + +@dataclass +class InitParams: + ransac_px: float = 1.5 + chi2_H: float = 5.99 # chi^2 95% for 2 DoF (px^2) + chi2_F: float = 3.84 # chi^2 95% for 1 DoF (px^2) + min_pts_for_tests: int = 60 + min_posdepth: float = 0.90 + min_parallax_deg: float = 1.5 + score_ratio_H: float = 0.45 # prefer H if S_H/(S_H+S_F) > 0.45 + +@dataclass +class TwoViewScores: + S_H: float + S_F: float + ratio_H: float + +@dataclass +class TwoViewPose: + model: TwoViewModel + R: np.ndarray # 3x3 + t: np.ndarray # 3x1 + posdepth: float # fraction in front of both cams + parallax_deg: float # median triangulation angle + +@dataclass +class TwoViewDecision: + pose: TwoViewPose + inlier_mask: np.ndarray # boolean mask aligned with pts_ref/pts_cur (N,) + +# ---------------- residuals & scoring ---------------- + +def symmetric_transfer_errors_H(H: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray) -> np.ndarray: + """Squared symmetric transfer error for a homography H.""" + x1 = cv2.convertPointsToHomogeneous(pts_ref)[:, 0, :].T # 3xN + x2 = cv2.convertPointsToHomogeneous(pts_cur)[:, 0, :].T + Hx1 = H @ x1 + Hinv = np.linalg.inv(H) + Hinvx2 = Hinv @ x2 + p2 = (Hx1[:2] / (Hx1[2] + 1e-12)).T + p1 = (Hinvx2[:2] / (Hinvx2[2] + 1e-12)).T + e12 = np.sum((pts_cur - p2) ** 2, axis=1) + e21 = np.sum((pts_ref - p1) ** 2, axis=1) + d2 = e12 + e21 + logger.debug("H symmetric errors: med=%.3f px^2, 75p=%.3f px^2", + float(np.median(d2)), float(np.percentile(d2, 75))) + return d2 + +def sampson_distances_F(F: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray) -> np.ndarray: + """Sampson distance for a fundamental matrix F.""" + x1 = cv2.convertPointsToHomogeneous(pts_ref)[:, 0, :] + x2 = cv2.convertPointsToHomogeneous(pts_cur)[:, 0, :] + Fx1 = (F @ x1.T).T + Ftx2 = (F.T @ x2.T).T + num = (np.sum(x2 * (F @ x1.T).T, axis=1)) ** 2 + den = Fx1[:, 0] ** 2 + Fx1[:, 1] ** 2 + Ftx2[:, 0] ** 2 + Ftx2[:, 1] ** 2 + 1e-12 + d2 = num / den + logger.debug("F Sampson distances: med=%.3f, 75p=%.3f", + float(np.median(d2)), float(np.percentile(d2, 75))) + return d2 + +def truncated_inlier_score(residuals_sq: np.ndarray, chi2_cutoff: float) -> float: + """ORB-style truncated linear score: sum(max(0, chi2 - d^2)).""" + S = float(np.maximum(0.0, chi2_cutoff - residuals_sq).sum()) + logger.debug("Truncated score @chi2=%.2f → S=%.1f (inlier-like=%d/%d)", + chi2_cutoff, S, int((residuals_sq < chi2_cutoff).sum()), residuals_sq.size) + return S + +def compute_model_scores(H: Optional[np.ndarray], + F: Optional[np.ndarray], + pts_ref: np.ndarray, + pts_cur: np.ndarray, + params: InitParams) -> TwoViewScores: + S_H = truncated_inlier_score(symmetric_transfer_errors_H(H, pts_ref, pts_cur), params.chi2_H) if H is not None else 0.0 + S_F = truncated_inlier_score(sampson_distances_F(F, pts_ref, pts_cur), params.chi2_F) if F is not None else 0.0 + ratio_H = S_H / (S_H + S_F + 1e-12) + logger.info("Scores S_H=%.1f S_F=%.1f → ratio_H=%.3f", S_H, S_F, ratio_H) + return TwoViewScores(S_H=S_H, S_F=S_F, ratio_H=ratio_H) + +# ---------------- triangulation validation ---------------- + +def triangulation_metrics(K: np.ndarray, + R: np.ndarray, + t: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray) -> Tuple[float, float, int]: + """Return (posdepth_fraction, median_parallax_deg, N_points_used).""" + if len(pts_ref) < 2: + return 0.0, 0.0, 0 + p1n = cv2.undistortPoints(pts_ref.reshape(-1, 1, 2), K, None).reshape(-1, 2) + p2n = cv2.undistortPoints(pts_cur.reshape(-1, 1, 2), K, None).reshape(-1, 2) + P1 = np.hstack([np.eye(3), np.zeros((3, 1))]) + P2 = np.hstack([R, t.reshape(3, 1)]) + Xh = cv2.triangulatePoints(P1, P2, p1n.T, p2n.T) + X = (Xh[:3] / (Xh[3] + 1e-12)).T # Nx3 + + z1 = X[:, 2] + X2 = (R @ X.T + t.reshape(3, 1)).T + z2 = X2[:, 2] + posdepth = float(np.mean((z1 > 0) & (z2 > 0))) + + C1 = np.zeros((1, 3)) + C2 = (-R.T @ t).reshape(1, 3) + v1 = X - C1 + v2 = X - C2 + cosang = np.sum(v1 * v2, axis=1) / (np.linalg.norm(v1, axis=1) * np.linalg.norm(v2, axis=1) + 1e-12) + parallax_deg = float(np.degrees(np.median(np.arccos(np.clip(cosang, -1.0, 1.0))))) + + logger.debug("Triangulation metrics: posdepth=%.3f parallax_med=%.2f° N=%d", + posdepth, parallax_deg, len(X)) + return posdepth, parallax_deg, len(X) + +def validate_two_view_pose(K: np.ndarray, + R: np.ndarray, + t: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray, + params: InitParams) -> Tuple[bool, float, float]: + posdepth, parallax_deg, N = triangulation_metrics(K, R, t, pts_ref, pts_cur) + ok = (N >= params.min_pts_for_tests and + posdepth >= params.min_posdepth and + parallax_deg >= params.min_parallax_deg) + logger.info("Validate pose: ok=%s N=%d posdepth=%.3f parallax=%.2f° (req: N≥%d, pos≥%.2f, par≥%.2f°)", + ok, N, posdepth, parallax_deg, params.min_pts_for_tests, params.min_posdepth, params.min_parallax_deg) + return ok, posdepth, parallax_deg + +# ---------------- pose recovery for each model ---------------- + +def recover_pose_from_homography(K: np.ndarray, + H: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray, + params: InitParams) -> Optional[TwoViewPose]: + ret = cv2.decomposeHomographyMat(H, K) + if ret is None or len(ret) < 4: + logger.warning("Homography decomposition failed.") + return None + _, Rs, ts, _ = ret + logger.info("Homography decomposition → %d candidates", len(Rs)) + best: Optional[TwoViewPose] = None + best_key = (-1.0, -1.0) # maximize (posdepth, parallax) + + for idx, (R, t) in enumerate(zip(Rs, ts)): + t = t.reshape(3, 1) + t = t / (np.linalg.norm(t) + 1e-12) # scale-free + ok, pd, ang = validate_two_view_pose(K, R, t, pts_ref, pts_cur, params) + logger.info(" H-cand #%d: ok=%s posdepth=%.3f parallax=%.2f°", idx, ok, pd, ang) + if ok and (pd, ang) > best_key: + best = TwoViewPose(TwoViewModel.HOMOGRAPHY, R, t, pd, ang) + best_key = (pd, ang) + if best is None: + logger.info("No homography candidate passed validation.") + else: + logger.info("Chosen H-candidate: posdepth=%.3f parallax=%.2f°", best.posdepth, best.parallax_deg) + return best + +def recover_pose_from_fundamental(K: np.ndarray, + F: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray, + params: InitParams) -> Optional[TwoViewPose]: + E = K.T @ F @ K + ok, R, t, mask = cv2.recoverPose(E, pts_ref, pts_cur, K) + ninl = int(np.count_nonzero(mask)) if mask is not None else 0 + logger.info("recoverPose(E): ok=%s inliers=%d", ok, ninl) + if not ok or mask is None or ninl < params.min_pts_for_tests: + logger.info("F/E rejected: not enough inliers for validation.") + return None + inl = mask.ravel().astype(bool) + ok2, pd, ang = validate_two_view_pose(K, R, t, pts_ref[inl], pts_cur[inl], params) + if ok2: + logger.info("F/E accepted: posdepth=%.3f parallax=%.2f°", pd, ang) + return TwoViewPose(TwoViewModel.FUNDAMENTAL, R, t, pd, ang) + logger.info("F/E rejected after validation.") + return None + +# ---------------- top-level model selection ---------------- + +def evaluate_two_view_bootstrap(K: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray, + params: InitParams = InitParams() + ) -> Optional[TwoViewPose]: + """Pick H vs F with comparable residuals, then recover a valid (R,t).""" + H, maskH = cv2.findHomography(pts_ref, pts_cur, cv2.RANSAC, params.ransac_px) + F, maskF = cv2.findFundamentalMat(pts_ref, pts_cur, cv2.FM_RANSAC, params.ransac_px) + + nH = int(maskH.sum()) if (maskH is not None and maskH.size) else 0 + nF = int(maskF.sum()) if (maskF is not None and maskF.size) else 0 + logger.info("RANSAC: H-inliers=%d (th=%.2f px), F-inliers=%d (th=%.2f px)", + nH, params.ransac_px, nF, params.ransac_px) + + if H is None and F is None: + logger.info("Both H and F estimation failed → reject pair.") + return None + + scores = compute_model_scores(H, F, pts_ref, pts_cur, params) + + if scores.ratio_H > params.score_ratio_H and H is not None: + logger.info("Model selection: prefer HOMOGRAPHY (ratio_H=%.3f > %.2f)", + scores.ratio_H, params.score_ratio_H) + pose = recover_pose_from_homography(K, H, pts_ref, pts_cur, params) + if pose is not None: + return pose + logger.info("H path failed validation → trying F/E fallback.") + else: + logger.info("Model selection: prefer FUNDAMENTAL/E (ratio_H=%.3f ≤ %.2f)", + scores.ratio_H, params.score_ratio_H) + + if F is not None: + pose = recover_pose_from_fundamental(K, F, pts_ref, pts_cur, params) + if pose is not None: + return pose + + logger.info("Pair rejected: ambiguous or too weak for initialization.") + return None + +# ---------------- masks for the chosen model ---------------- + +def _final_inlier_mask_for_model(model: TwoViewModel, + pts_ref: np.ndarray, + pts_cur: np.ndarray, + K: np.ndarray, + R: np.ndarray, + t: np.ndarray, + ransac_px: float) -> np.ndarray: + """ + Make a robust inlier mask aligned with pts_ref/pts_cur for the chosen model. + F/E: intersect F-RANSAC inliers with recoverPose mask. + H: use H-RANSAC inliers. + """ + def _as_bool(m): + if m is None: + return None + v = np.asarray(m).ravel() + # treat any nonzero as True (works for 1 and 255) + return (v.astype(np.uint8) > 0) + + if model is TwoViewModel.FUNDAMENTAL: + F, maskF = cv2.findFundamentalMat(pts_ref, pts_cur, cv2.FM_RANSAC, ransac_px) + if F is None or maskF is None: + return np.zeros(len(pts_ref), dtype=bool) + E = K.T @ F @ K + _, _, _, maskRP = cv2.recoverPose(E, pts_ref, pts_cur, K) + mF = _as_bool(maskF) + mRP = _as_bool(maskRP) + return mF if mRP is None else (mF & mRP) + else: + H, maskH = cv2.findHomography(pts_ref, pts_cur, cv2.RANSAC, ransac_px) + if H is None or maskH is None: + return np.zeros(len(pts_ref), dtype=bool) + return _as_bool(maskH) + + +def evaluate_two_view_bootstrap_with_masks(K: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray, + params: InitParams = InitParams() + ) -> Optional[TwoViewDecision]: + """Same as evaluate_two_view_bootstrap, but also returns a robust inlier mask.""" + pose = evaluate_two_view_bootstrap(K, pts_ref, pts_cur, params) + if pose is None: + return None + mask = _final_inlier_mask_for_model(pose.model, pts_ref, pts_cur, K, pose.R, pose.t, params.ransac_px) + return TwoViewDecision(pose=pose, inlier_mask=mask.astype(bool)) + +# ---------------- map building (non-redundant) ---------------- + +def _triangulate_points_cv(K: np.ndarray, + R: np.ndarray, + t: np.ndarray, + pts_ref: np.ndarray, + pts_cur: np.ndarray) -> np.ndarray: + """Triangulate in the reference camera frame (world := cam0).""" + p1n = cv2.undistortPoints(pts_ref.reshape(-1, 1, 2), K, None).reshape(-1, 2) + p2n = cv2.undistortPoints(pts_cur.reshape(-1, 1, 2), K, None).reshape(-1, 2) + P1 = np.hstack([np.eye(3), np.zeros((3, 1))]) + P2 = np.hstack([R, t.reshape(3, 1)]) + Xh = cv2.triangulatePoints(P1, P2, p1n.T, p2n.T) + X = (Xh[:3] / (Xh[3] + 1e-12)).T # Nx3 in cam0/world + return X + +def bootstrap_two_view_map(K: np.ndarray, + kp_ref, desc_ref, + kp_cur, desc_cur, + matches, # list[cv2.DMatch] between (ref,cur) + args, + world_map, + params: InitParams = InitParams(), + decision: Optional[TwoViewDecision] = None): + """ + Build the initial map from one accepted two-view pair. + + If you already ran the gate, pass its 'decision' to avoid recomputation. + Otherwise this function will run the gate internally. + + Side effects: + - Inserts KF0 (Tcw = I) and KF1 (Tcw = [R|t]) into world_map. + - Triangulates, depth-filters, and adds points + observations. + + Returns: (success: bool, T0_cw: 4x4, T1_cw: 4x4) + """ + if len(matches) < 50: + logger.info("[BOOTSTRAP] Not enough matches for init (%d < 50).", len(matches)) + return False, None, None + + # Build float arrays aligned with 'matches' + pts_ref = np.float32([kp_ref[m.queryIdx].pt for m in matches]) + pts_cur = np.float32([kp_cur[m.trainIdx].pt for m in matches]) + + # 1) Model selection + (R,t) + inlier mask + if decision is None: + decision = evaluate_two_view_bootstrap_with_masks(K, pts_ref, pts_cur, params) + if decision is None: + logger.info("[BOOTSTRAP] Pair rejected by gate; aborting.") + return False, None, None + pose = decision.pose + mask = decision.inlier_mask.astype(bool) + ninl = int(mask.sum()) + logger.info("[BOOTSTRAP] Using model=%s with %d inliers.", pose.model.name, ninl) + + if ninl < params.min_pts_for_tests: + logger.info("[BOOTSTRAP] Too few inliers after gating (%d < %d).", ninl, params.min_pts_for_tests) + return False, None, None + + # 2) Triangulate once on the final inliers + p0 = pts_ref[mask] + p1 = pts_cur[mask] + Xw = _triangulate_points_cv(K, pose.R, pose.t, p0, p1) # world := cam0 + + # 3) Cheirality + depth range gating (both views) + z0 = Xw[:, 2] + X1 = (pose.R @ Xw.T + pose.t.reshape(3, 1)).T + z1 = X1[:, 2] + + min_d = float(getattr(args, "min_depth", 0.0)) + max_d = float(getattr(args, "max_depth", 1e6)) + ok = (z0 > min_d) & (z0 < max_d) & (z1 > min_d) & (z1 < max_d) + Xw = Xw[ok] + logger.info("[BOOTSTRAP] Triangulated=%d kept=%d after depth filter [%.3g, %.3g].", + len(p0), len(Xw), min_d, max_d) + if len(Xw) < 80: + logger.info("[BOOTSTRAP] Not enough 3D points to seed the map (%d < 80).", len(Xw)) + return False, None, None + + # 4) Insert KF0 (I) and KF1 ([R|t]) exactly once, in this order + T0_cw = np.eye(4, dtype=np.float64) + T1_cw = _pose_rt_to_homogenous(pose.R, pose.t) + # world_map.add_pose(T0_cw, is_keyframe=True) # KF0 + # world_map.add_pose(T1_cw, is_keyframe=True) # KF1 + + # 5) Add 3D points and observations + cols = np.full((len(Xw), 3), 0.7, dtype=np.float32) # grey + ids = world_map.add_points(Xw, cols, keyframe_idx=0) + + # map inlier indices back to kp indices + qidx = np.int32([m.queryIdx for m in matches]) + tidx = np.int32([m.trainIdx for m in matches]) + sel = np.where(mask)[0][ok] # indices into 'matches' of inliers that passed depth + + for pid, i0, i1 in zip(ids, qidx[sel], tidx[sel]): + world_map.points[pid].add_observation(0, i0, desc_ref[i0]) # KF0 + world_map.points[pid].add_observation(1, i1, desc_cur[i1]) # KF1 + + logger.info("[BOOTSTRAP] Map initialised: %d landmarks, 2 keyframes (KF0=I, KF1=[R|t]).", len(ids)) + return True, T0_cw, T1_cw + +# ---------------- convenience ---------------- + +def pts_from_matches(kps_ref, kps_cur, matches): + pts_ref = np.float32([kps_ref[m.queryIdx].pt for m in matches]) + pts_cur = np.float32([kps_cur[m.trainIdx].pt for m in matches]) + return pts_ref, pts_cur diff --git a/modules/SimpleSLAM/slam/core/visualization_utils.py b/modules/SimpleSLAM/slam/core/visualization_utils.py new file mode 100644 index 0000000000..fb42fd2585 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/visualization_utils.py @@ -0,0 +1,497 @@ +# visualization_utils.py +from __future__ import annotations +""" +visualization_utils.py +~~~~~~~~~~~~~~~~~~~~~~ +Clean, modular utilities for +* drawing 2‑D track overlays with OpenCV, and +* rendering a coloured 3‑D map in an **Open3D** window. + +Main entry‑points +----------------- +``draw_tracks(img, tracks, frame_no)`` + Overlay recent feature tracks. + +``Visualizer3D`` + Live window that shows + • the SLAM point cloud, colour‑coded along an axis or PCA‑auto; + • the camera trajectory (blue line); + • new landmarks highlighted (default bright‑green). + Supports WASDQE first‑person navigation when the Open3D build exposes + key‑callback APIs. +""" + +from typing import Dict, List, Tuple, Optional, Literal +import warnings, threading, cv2, numpy as np + +import cv2 +import numpy as np + +try: + import open3d as o3d # type: ignore +except Exception as exc: # pragma: no cover + o3d = None # type: ignore + _OPEN3D_ERR = exc +else: + _OPEN3D_ERR = None + +from slam.core.landmark_utils import Map +import numpy as np + +ColourAxis = Literal["x", "y", "z", "auto"] + +# --------------------------------------------------------------------------- # +# 3‑D visualiser (Open3D only) # +# --------------------------------------------------------------------------- # + +class Visualizer3D: + """Open3D window that shows the coloured point-cloud and camera path.""" + + def __init__( + self, + color_axis: ColourAxis = "z", + *, + new_colour: Tuple[float, float, float] = (0.0, 1.0, 0.0), + window_size: Tuple[int, int] = (1280, 720), + nav_step: float = 0.25, + ) -> None: + self.backend = "none" + self._closed = False + self._lock = threading.Lock() + self.paused = False + + self.color_axis = color_axis + self.new_colour = np.asarray(new_colour, dtype=np.float32) + self.nav_step = nav_step + + # scalar-to-colour normalisation + self._v_min: Optional[float] = None + self._v_max: Optional[float] = None + self._pc_vec: Optional[np.ndarray] = None # PCA axis for "auto" + + # ------------------------------------------------------------------ # + # Open3D init + # ------------------------------------------------------------------ # + if o3d is None: + warnings.warn(f"[Visualizer3D] Open3D missing → window disabled ({_OPEN3D_ERR})") + return + + vis_cls = ( + o3d.visualization.VisualizerWithKeyCallback + if hasattr(o3d.visualization, "VisualizerWithKeyCallback") + else o3d.visualization.Visualizer + ) + self.vis = vis_cls() + self.vis.create_window("SLAM Map", width=window_size[0], height=window_size[1]) + self.backend = "open3d" + + self.pcd = o3d.geometry.PointCloud() + self.lines = o3d.geometry.LineSet() + self._first = True + + if isinstance(self.vis, o3d.visualization.VisualizerWithKeyCallback): + self._bind_nav_keys() + + print(f"[Visualizer3D] ready | colour_axis={self.color_axis}") + + # ------------------------------------------------------------------ # + # WASDQE first-person navigation + # ------------------------------------------------------------------ # + def _bind_nav_keys(self): + vc = self.vis.get_view_control() + + def translate(delta: np.ndarray): + cam = vc.convert_to_pinhole_camera_parameters() + T = np.eye(4); T[:3, 3] = delta * self.nav_step + cam.extrinsic = T @ cam.extrinsic + vc.convert_from_pinhole_camera_parameters(cam) + return False + + key_map = { + ord("W"): np.array([0, 0, -1]), + ord("S"): np.array([0, 0, 1]), + ord("A"): np.array([-1, 0, 0]), + ord("D"): np.array([ 1, 0, 0]), + ord("Q"): np.array([0, 1, 0]), + ord("E"): np.array([0, -1, 0]), + } + for k, v in key_map.items(): + self.vis.register_key_callback(k, lambda _v, vec=v: translate(vec)) + + # ------------------------------------------------------------------ # + # Helpers for colour mapping + # ------------------------------------------------------------------ # + def _compute_scalar(self, pts: np.ndarray) -> np.ndarray: + if self.color_axis in ("x", "y", "z"): + return pts[:, {"x": 0, "y": 1, "z": 2}[self.color_axis]] + if self._pc_vec is None: # first call → PCA axis + centred = pts - pts.mean(0) + _, _, vh = np.linalg.svd(centred, full_matrices=False) + self._pc_vec = vh[0] + return pts @ self._pc_vec + + def _normalise(self, scalars: np.ndarray) -> np.ndarray: + if self._v_min is None: # initialise 5th–95th perc. + self._v_min, self._v_max = np.percentile(scalars, [5, 95]) + else: # expand running min / max + self._v_min = min(self._v_min, scalars.min()) + self._v_max = max(self._v_max, scalars.max()) + return np.clip((scalars - self._v_min) / (self._v_max - self._v_min + 1e-6), 0, 1) + + def _colormap(self, norm: np.ndarray) -> np.ndarray: + try: + import matplotlib.cm as cm + return cm.get_cmap("turbo")(norm)[:, :3] + except Exception: + # fall-back: simple HSV → RGB + h = (1 - norm) * 240 + c = np.ones_like(h); m = np.zeros_like(h) + x = c * (1 - np.abs((h / 60) % 2 - 1)) + return np.select( + [h < 60, h < 120, h < 180, h < 240, h < 300, h >= 300], + [ + np.stack([c, x, m], 1), np.stack([x, c, m], 1), + np.stack([m, c, x], 1), np.stack([m, x, c], 1), + np.stack([x, m, c], 1), np.stack([c, m, x], 1), + ]) + + # ------------------------------------------------------------------ # + # Public interface + # ------------------------------------------------------------------ # + def update(self, slam_map: Map, new_ids: Optional[List[int]] = None): + if self.backend != "open3d" or len(slam_map.points) == 0: + return + if self.paused: + with self._lock: + self.vis.poll_events(); self.vis.update_renderer() + return + + # -------------------------- build numpy arrays ------------------------- + pts = slam_map.get_point_array() + col = slam_map.get_color_array() if hasattr(slam_map, "get_color_array") else None + if col is None or len(col) == 0: # legacy maps without colour + scal = self._compute_scalar(pts) + col = self._colormap(self._normalise(scal)) + else: + col = col.astype(np.float32) + + # pts = slam_map.get_point_array() + # col = slam_map.get_color_array() if hasattr(slam_map, "get_color_array") else None + + # use_fallback = ( + # col is None + # or len(col) == 0 + # or (isinstance(col, np.ndarray) and col.ndim == 2 and col.shape[1] == 3 + # and np.allclose(col.std(axis=0), 0, atol=1e-6)) # colours are (nearly) uniform + # ) + + # if use_fallback: + # # vertical gradient (axis set by color_axis, you pass "y" at construction) + # scal = self._compute_scalar(pts) + # col = self._colormap(self._normalise(scal)).astype(np.float32) + # else: + # col = col.astype(np.float32) + + + # keep arrays in sync (pad / trim) + if len(col) < len(pts): + diff = len(pts) - len(col) + col = np.vstack([col, np.full((diff, 3), 0.8, np.float32)]) + elif len(col) > len(pts): + col = col[: len(pts)] + + # highlight newly-added landmarks + if new_ids: + id_to_i = {pid: i for i, pid in enumerate(slam_map.point_ids())} + for nid in new_ids: + if nid in id_to_i: + col[id_to_i[nid]] = self.new_colour + + # ----------------------- Open3D geometry update ------------------------ + with self._lock: + self.pcd.points = o3d.utility.Vector3dVector(pts) + self.pcd.colors = o3d.utility.Vector3dVector(col) + + self._update_lineset(slam_map) + + if self._first: + self.vis.add_geometry(self.pcd); self.vis.add_geometry(self.lines) + self._first = False + else: + self.vis.update_geometry(self.pcd); self.vis.update_geometry(self.lines) + + self.vis.poll_events(); self.vis.update_renderer() + + # ------------------------------------------------------------------ # + # Blue camera trajectory poly-line + # ------------------------------------------------------------------ # + def _update_lineset(self, slam_map: Map): + if len(slam_map.poses) < 2: + return + path = np.asarray([p[:3, 3] for p in slam_map.poses], np.float32) + self.lines.points = o3d.utility.Vector3dVector(path) + self.lines.lines = o3d.utility.Vector2iVector([[i, i + 1] for i in range(len(path) - 1)]) + self.lines.colors = o3d.utility.Vector3dVector(np.tile([[0, 0, 1]], (len(path) - 1, 1))) + + # ------------------------------------------------------------------ # + # Clean shutdown + # ------------------------------------------------------------------ # + def close(self): + if self.backend == "open3d" and not self._closed: + self.vis.destroy_window(); self._closed = True +# --------------------------------------------------------------------------- # +# 2‑D overlay helpers +# --------------------------------------------------------------------------- # + +def draw_tracks( + vis: np.ndarray, + tracks: Dict[int, List[Tuple[int, int, int]]], + current_frame: int, + max_age: int = 10, + sample_rate: int = 5, + max_tracks: int = 100, +) -> np.ndarray: + """Draw ageing feature tracks as fading polylines. + + Parameters + ---------- + vis : BGR uint8 image (modified *in‑place*) + tracks : {track_id: [(frame_idx,x,y), ...]} + current_frame: index of the frame being drawn + max_age : only show segments younger than this (#frames) + sample_rate : skip tracks where `track_id % sample_rate != 0` to avoid clutter + max_tracks : cap total rendered tracks for speed + """ + recent = [ + (tid, pts) + for tid, pts in tracks.items() + if current_frame - pts[-1][0] <= max_age + ] + recent.sort(key=lambda x: x[1][-1][0], reverse=True) + + drawn = 0 + for tid, pts in recent: + if drawn >= max_tracks: + break + if tid % sample_rate: + continue + pts = [p for p in pts if current_frame - p[0] <= max_age] + for j in range(1, len(pts)): + _, x0, y0 = pts[j - 1] + _, x1, y1 = pts[j] + ratio = (current_frame - pts[j - 1][0]) / max_age + colour = (0, int(255 * (1 - ratio)), int(255 * ratio)) + cv2.line(vis, (x0, y0), (x1, y1), colour, 2) + drawn += 1 + return vis + + +# --------------------------------------------------------------------------- # +# Lightweight 2-D trajectory plotter (Matplotlib) +# --------------------------------------------------------------------------- # + +# ---- Trajectory 2D (x–z) + simple pause UI ---- +class Trajectory2D: + def __init__(self, gt_T_list=None, win="Trajectory 2D (x–z)"): + self.win = win + self.gt_T = gt_T_list # list of 4x4 Twc (or None) + self.est_xyz = [] # estimated camera centers (world) + self.gt_xyz = [] # paired GT centers + self.align_ok = False + self.s = 1.0 + self.R = np.eye(3) + self.t = np.zeros(3) + + # ensure a real window exists (Qt backend is happier with this) + cv2.namedWindow(self.win, cv2.WINDOW_NORMAL) + cv2.resizeWindow(self.win, 500, 500) + + @staticmethod + def _cam_center_from_Tcw(Tcw: np.ndarray) -> np.ndarray: + R, t = Tcw[:3, :3], Tcw[:3, 3] + return (-R.T @ t).astype(np.float64) + + def _maybe_update_alignment(self, Kpairs=60): + if len(self.est_xyz) < 6 or len(self.gt_xyz) < 6: + return + X = np.asarray(self.gt_xyz[-Kpairs:], np.float64) # GT + Y = np.asarray(self.est_xyz[-Kpairs:], np.float64) # EST + muX, muY = X.mean(0), Y.mean(0) + X0, Y0 = X - muX, Y - muY + cov = (Y0.T @ X0) / X.shape[0] + U, S, Vt = np.linalg.svd(cov) + D = np.diag([1, 1, np.sign(np.linalg.det(U @ Vt))]) + R = U @ D @ Vt + varY = (Y0**2).sum() / X.shape[0] + s = (S * np.diag(D)).sum() / (varY + 1e-12) + t = muX - s * (R @ muY) + self.s, self.R, self.t = float(s), R, t + self.align_ok = True + + def push(self, frame_idx: int, Tcw: np.ndarray): + self.est_xyz.append(self._cam_center_from_Tcw(Tcw)) + if self.gt_T is not None and 0 <= frame_idx < len(self.gt_T): + self.gt_xyz.append(self.gt_T[frame_idx][:3, 3].astype(np.float64)) + self._maybe_update_alignment(Kpairs=min(100, len(self.est_xyz))) + + def draw(self, paused=False, size=(720, 720), margin=60): + W, H = size + canvas = np.full((H, W, 3), 255, np.uint8) + + # nothing to draw yet + if not self.est_xyz: + cv2.imshow(self.win, canvas) + return + + E = np.asarray(self.est_xyz, np.float64) + if self.align_ok: + E = (self.s * (self.R @ E.T)).T + self.t + + curves = [("estimate", E, (255, 0, 0))] # BGR: blue + have_gt = len(self.gt_xyz) > 0 + if have_gt: + G = np.asarray(self.gt_xyz, np.float64) + curves.append(("ground-truth", G, (0, 0, 255))) # red + allpts = np.vstack([E[:, [0, 2]], G[:, [0, 2]]]) + else: + allpts = E[:, [0, 2]] + + # ----- nice bounds with padding ----- + minx, minz = allpts.min(axis=0); maxx, maxz = allpts.max(axis=0) + pad_x = 0.05 * max(1e-6, maxx - minx) + pad_z = 0.05 * max(1e-6, maxz - minz) + minx -= pad_x; maxx += pad_x + minz -= pad_z; maxz += pad_z + + spanx = max(maxx - minx, 1e-6) + spanz = max(maxz - minz, 1e-6) + sx = (W - 2 * margin) / spanx + sz = (H - 2 * margin) / spanz + s = min(sx, sz) + + def to_px(x, z): + u = int((x - minx) * s + margin) + v = int((maxz - z) * s + margin) # flip z for image y + return u, v + + # ----- grid + ticks (matplotlib-ish) ----- + def linspace_ticks(a, b, m=6): + # simple ticks; good enough for live viz + return np.linspace(a, b, m) + + ticks_x = linspace_ticks(minx, maxx, 6) + ticks_z = linspace_ticks(minz, maxz, 6) + + # draw background grid + for x in ticks_x: + u0, v0 = to_px(x, minz) + u1, v1 = to_px(x, maxz) + cv2.line(canvas, (u0, v0), (u1, v1), (235, 235, 235), 1, cv2.LINE_AA) + for z in ticks_z: + u0, v0 = to_px(minx, z) + u1, v1 = to_px(maxx, z) + cv2.line(canvas, (u0, v0), (u1, v1), (235, 235, 235), 1, cv2.LINE_AA) + + # axis box + cv2.rectangle(canvas, (margin-1, margin-1), (W-margin, H-margin), (200, 200, 200), 1) + + # tick labels + font = cv2.FONT_HERSHEY_SIMPLEX + for x in ticks_x: + u, v = to_px(x, minz) + cv2.putText(canvas, f"{x:.1f}", (u-14, H - margin + 20), font, 0.4, (0,0,0), 1, cv2.LINE_AA) + for z in ticks_z: + u, v = to_px(minx, z) + cv2.putText(canvas, f"{z:.1f}", (margin - 50, v+4), font, 0.4, (0,0,0), 1, cv2.LINE_AA) + + # axis labels + cv2.putText(canvas, "x", (W//2, H - 10), font, 0.6, (0,0,0), 1, cv2.LINE_AA) + cv2.putText(canvas, "z", (10, H//2), font, 0.6, (0,0,0), 1, cv2.LINE_AA) + + # ----- draw curves ----- + for name, C, color in curves: + if C.shape[0] < 2: + # single point + u, v = to_px(C[-1, 0], C[-1, 2]) + cv2.circle(canvas, (u, v), 3, color, -1, cv2.LINE_AA) + else: + pts = [to_px(x, z) for (x, _, z) in C] + for p, q in zip(pts[:-1], pts[1:]): + cv2.line(canvas, p, q, color, 2, cv2.LINE_AA) + cv2.circle(canvas, pts[-1], 3, color, -1, cv2.LINE_AA) + + # legend + legend_x, legend_y = margin + 10, margin + 20 + for idx, (name, _, color) in enumerate(curves): + y = legend_y + idx * 20 + cv2.line(canvas, (legend_x, y), (legend_x + 30, y), color, 3, cv2.LINE_AA) + cv2.putText(canvas, name, (legend_x + 40, y + 4), font, 0.5, (60,60,60), 1, cv2.LINE_AA) + + # title and paused hint + cv2.putText(canvas, "Trajectory 2D (x–z)", (margin, 30), font, 0.7, (0,0,0), 2, cv2.LINE_AA) + if paused: + cv2.putText(canvas, "PAUSED [p: resume | n: step | q/Esc: quit]", + (margin, H - 15), font, 0.5, (20,20,20), 2, cv2.LINE_AA) + + cv2.imshow(self.win, canvas) + + +# --------------------------------------------------------------------------- # +# UI for Pausing and stepping through the pipeline +# --------------------------------------------------------------------------- # +class VizUI: + """Tiny UI state for pausing/stepping the pipeline.""" + def __init__(self, pause_key='p', step_key='n', quit_keys=('q', 27)): + self.pause_key = self._to_code(pause_key) + self.step_key = self._to_code(step_key) + self.quit_keys = {self._to_code(k) for k in (quit_keys if isinstance(quit_keys, (tuple, list, set)) else (quit_keys,))} + self.paused = False + self._request_quit = False + self._do_step = False + + @staticmethod + def _to_code(k): + return k if isinstance(k, int) else ord(k) + + def poll(self, delay_ms=1): + k = cv2.waitKey(delay_ms) & 0xFF + if k == 255: # no key + return + if k in self.quit_keys: + self._request_quit = True + return + if k == self.pause_key: + self.paused = not self.paused + self._do_step = False + return + if k == self.step_key: + self._do_step = True + return + + def should_quit(self): + return self._request_quit + + def wait_if_paused(self): + """Block while paused, but allow 'n' to step one iteration.""" + if not self.paused: + return False # not blocking + while True: + k = cv2.waitKey(30) & 0xFF + if k == self.pause_key: # resume + self.paused = False + return False + if k in self.quit_keys: + self._request_quit = True + return False + if k == self.step_key: + # allow one iteration to run, remain paused afterward + self._do_step = True + return True # consume one step + + def consume_step(self): + """Return True once if a single-step was requested.""" + if self._do_step: + self._do_step = False + return True + return False + diff --git a/modules/SimpleSLAM/slam/core/visualize_ba.py b/modules/SimpleSLAM/slam/core/visualize_ba.py new file mode 100644 index 0000000000..de6d6bc22b --- /dev/null +++ b/modules/SimpleSLAM/slam/core/visualize_ba.py @@ -0,0 +1,111 @@ +# --------------------------------------------------------------------------- # +# BA reprojection debug helpers +# --------------------------------------------------------------------------- # +import cv2 +import numpy as np +from copy import deepcopy +from typing import Tuple, List + +def _gather_3d_2d(world_map, keypoints, frame_idx: int + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Collect (pts3d_w , measured_px) pairs for *one* frame. + """ + pts3d, uv = [], [] + for mp in world_map.points.values(): + for f_idx, kp_idx, _ in mp.observations: + if f_idx == frame_idx: + pts3d.append(mp.position) + uv.append(keypoints[f_idx][kp_idx].pt) + break + if not pts3d: # nothing observed by this camera + return np.empty((0, 3)), np.empty((0, 2)) + return np.asarray(pts3d, np.float32), np.asarray(uv, np.float32) + + +def _project(K, T_wc, pts3d_w): + """ + Vectorised pin-hole projection (world ➜ camera ➜ image). + """ + T_cw = np.linalg.inv(T_wc) + R_cw, t_cw = T_cw[:3, :3], T_cw[:3, 3] + rvec, _ = cv2.Rodrigues(R_cw) + proj, _ = cv2.projectPoints(pts3d_w, rvec, t_cw.reshape(3, 1), K, None) + return proj.reshape(-1, 2) + + +def draw_ba_reprojection(img_bgr, K, + T_wc_before, T_wc_after, + pts3d_before, pts3d_after, pts2d_meas, + win_name="BA reprojection", rad=4) -> np.ndarray: + """ + Overlay *measured* KPs (white crosses), projections *before* BA (red), + projections *after* BA (green). An arrow connects before → after so + you can see the correction visually. + """ + img = img_bgr.copy() + proj_before = _project(K, T_wc_before, pts3d_before) + proj_after = _project(K, T_wc_after, pts3d_after) + + for (u_meas, v_meas), (u_b, v_b), (u_a, v_a) in zip(pts2d_meas, + proj_before, + proj_after): + p_meas = (int(round(u_meas)), int(round(v_meas))) + p_b = (int(round(u_b)), int(round(v_b))) + p_a = (int(round(u_a)), int(round(v_a))) + + cv2.drawMarker(img, p_meas, (255,255,255), cv2.MARKER_CROSS, rad*2, 1) + cv2.circle(img, p_b, rad, (0,0,255), -1) # red = before BA + cv2.circle(img, p_a, rad, (0,255,0), 2) # green = after BA + cv2.arrowedLine(img, p_b, p_a, (255,255,0), 1, tipLength=0.25) + + cv2.imshow(win_name, img) + cv2.waitKey(1) + return img + +# --------------------------------------------------------------------------- # +# BA-window visualiser (all key-frames in the sliding window) +# --------------------------------------------------------------------------- # +def visualize_ba_window(seq, args, + K, + world_map_before, world_map_after, + keypoints, + opt_kf_idx: list[int], + rad_px: int = 4) -> None: + """ + For every key-frame in *opt_kf_idx* show an OpenCV window with: + • measured kp (white cross), + • projection **before** BA (red dot), + • projection **after** BA (green dot), + • arrow before → after (cyan). + + Each window is titled "BA KF " – hit any key to close all. + """ + from slam.core.dataloader import load_frame_pair # avoids cyclic imports + + for k in opt_kf_idx: + # --- 1) pull rgb ------------------------------------------------------- + # `load_frame_pair(args, seq, i)` returns (img_i , img_{i+1}); + # the KF itself is at i+1 → ask for i = k-1 + img_bgr, _ = load_frame_pair(args, seq, k-1) + + # --- 2) collect correspondences --------------------------------------- + pts3d_b, uv = _gather_3d_2d(world_map_before, keypoints, k) + if len(pts3d_b) == 0: # nothing observed by this KF + continue + pts3d_a, _ = _gather_3d_2d(world_map_after, keypoints, k) + + # Quantitative Error check + err_b = np.linalg.norm(_project(K, world_map_before.poses[k], pts3d_b) - uv, axis=1) + err_a = np.linalg.norm(_project(K, world_map_after.poses[k], pts3d_a) - uv, axis=1) + print(f'KF {k}: mean reproj error {err_b.mean():5.2f} px → {err_a.mean():5.2f} px') + + + # --- 3) overlay ------------------------------------------------------- + draw_ba_reprojection(img_bgr, K, + world_map_before.poses[k], + world_map_after.poses[k], + pts3d_b, pts3d_a, uv, + win_name=f"BA KF {k}", rad=rad_px) + cv2.waitKey(0) # press any key once all windows are up + cv2.destroyAllWindows() diff --git a/modules/SimpleSLAM/slam/monocular/__init__.py b/modules/SimpleSLAM/slam/monocular/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/monocular/main.py b/modules/SimpleSLAM/slam/monocular/main.py new file mode 100644 index 0000000000..038da8e8d0 --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main.py @@ -0,0 +1,630 @@ +# main.py +""" +WITH MULTI-VIEW TRIANGULATION - New PnP + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +from copy import deepcopy +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm +from typing import List + +from slam.core.pose_utils import _pose_inverse, _pose_rt_to_homogenous + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, TrajectoryPlotter +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map +from slam.core.triangulation_utils import update_and_prune_tracks, MultiViewTriangulator, triangulate_points +from slam.core.pnp_utils import associate_landmarks, refine_pose_pnp +from slam.core.ba_utils import ( + two_view_ba, + pose_only_ba, + local_bundle_adjustment +) + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=3.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=5) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=0.60) + p.add_argument("--max_depth", type=float, default=50.0) + p.add_argument('--mvt_rep_err', type=float, default=30.0, + help='Max mean reprojection error (px) for multi-view triangulation') + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=20) + p.add_argument('--proj_radius', type=float, default=12.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + # Bundle Adjustment + p.add_argument('--local_ba_window', type=int, default=6, help='Window size (number of keyframes) for local BA') + + return p + + +# --------------------------------------------------------------------------- # +# Bootstrap initialisation +# --------------------------------------------------------------------------- # +def try_bootstrap(K, kp0, descs0, kp1, descs1, matches, args, world_map): + """Return (success, T_cam0_w, T_cam1_w) and add initial landmarks.""" + if len(matches) < 50: + return False, None, None + + # 1. pick a model: Essential (general) *or* Homography (planar) + pts0 = np.float32([kp0[m.queryIdx].pt for m in matches]) + pts1 = np.float32([kp1[m.trainIdx].pt for m in matches]) + + E, inl_E = cv2.findEssentialMat( + pts0, pts1, K, cv2.RANSAC, 0.999, args.ransac_thresh) + + H, inl_H = cv2.findHomography( + pts0, pts1, cv2.RANSAC, args.ransac_thresh) + + # score = #inliers (ORB-SLAM uses a more elaborate model-selection score) + use_E = (inl_E.sum() > inl_H.sum()) + + if use_E and E is None: + return False, None, None + if not use_E and H is None: + return False, None, None + + if use_E: + _, R, t, inl_pose = cv2.recoverPose(E, pts0, pts1, K) + mask = (inl_E.ravel() & inl_pose.ravel()).astype(bool) + else: + print("[BOOTSTRAP] Using Homography for initialisation") + R, t = cv2.decomposeHomographyMat(H, K)[1:3] # take the best hypothesis + mask = inl_H.ravel().astype(bool) + + # 2. triangulate those inliers – exactly once + p0 = pts0[mask] + p1 = pts1[mask] + pts3d = triangulate_points(K, R, t, p0, p1) + + z0 = pts3d[:, 2] + pts3d_cam1 = (R @ pts3d.T + t.reshape(3, 1)).T + z1 = pts3d_cam1[:, 2] + + # both cameras see the point in front of them + ok = ( (z0 > args.min_depth) & (z0 < args.max_depth) & # in front of cam‑0 + (z1 > args.min_depth) & (z1 < args.max_depth) ) # in front of cam‑1) + pts3d = pts3d[ok] + print(f"[BOOTSTRAP] Triangulated {len(pts3d)} points. Status: {ok.sum()} inliers") + + if len(pts3d) < 80: + print("[BOOTSTRAP] Not enough points to bootstrap the map.") + return False, None, None + + T1_cw = _pose_rt_to_homogenous(R, t) # camera-from-world + T1_wc = _pose_inverse(T1_cw) # world-from-camera + # 3. fill the map + world_map.add_pose(T1_wc, is_keyframe=True) # Keyframe because we only bootstrap on keyframes + + cols = np.full((len(pts3d), 3), 0.7) # grey – colour is optional here + ids = world_map.add_points(pts3d, cols, keyframe_idx=0) #TODO 0 or 1 + + # ----------------------------------------------- + # add (frame_idx , kp_idx) pairs for each new MP + # ----------------------------------------------- + inlier_kp_idx = np.where(mask)[0][ok] # kp indices that survived depth + # build index arrays once + qidx = np.int32([m.queryIdx for m in matches]) + tidx = np.int32([m.trainIdx for m in matches]) + sel = np.where(mask)[0][ok] # selected matches + + for pid, i0, i1 in zip(ids, qidx[sel], tidx[sel]): + world_map.points[pid].add_observation(0, i0, descs0[i0]) + world_map.points[pid].add_observation(1, i1, descs1[i1]) + + + + print(f"[BOOTSTRAP] Map initialised with {len(ids)} landmarks.") + return True, T1_wc + + +# --------------------------------------------------------------------------- # +# Continuous pose tracking (PnP) +# --------------------------------------------------------------------------- # +def visualize_pnp_reprojection(img_bgr, K, T_wc, pts3d_w, pts2d_px, inlier_mask=None, + win_name="PnP debug", thickness=2): + """ + Draw projected 3‑D landmarks (from world) on top of the current image and connect + them to the actual detected keypoints. + + img_bgr : current image (BGR) + K : 3x3 intrinsics + T_wc : 4x4 pose (cam->world) + pts3d_w : (N,3) 3D points in *world* coords + pts2d_px : (N,2) measured pixel locations that took part in PnP + inlier_mask : optional boolean array (len=N). If given, only inliers get lines. + """ + import cv2 + import numpy as np + + img = img_bgr.copy() + # world -> cam + T_cw = np.linalg.inv(T_wc) + R_cw, t_cw = T_cw[:3, :3], T_cw[:3, 3] + rvec, _ = cv2.Rodrigues(R_cw) + tvec = t_cw.reshape(3, 1) + + proj, _ = cv2.projectPoints(pts3d_w.astype(np.float32), rvec, tvec, K, None) + proj = proj.reshape(-1, 2) + + if inlier_mask is None: + inlier_mask = np.ones(len(proj), dtype=bool) + + for (u_meas, v_meas), (u_proj, v_proj), ok in zip(pts2d_px, proj, inlier_mask): + color = (0, 255, 0) if ok else (0, 0, 255) # green for inlier, red for outlier + cv2.circle(img, (int(round(u_proj)), int(round(v_proj))), 4, (255, 0, 0), -1) # projected (blue) + cv2.circle(img, (int(round(u_meas)), int(round(v_meas))), 4, color, -1) # measured + if ok: + cv2.line(img, (int(round(u_meas)), int(round(v_meas))), + (int(round(u_proj)), int(round(v_proj))), color, thickness) + + cv2.imshow(win_name, img) + cv2.waitKey(1) + return img + +def track_with_pnp(K, + kp_prev, kp_cur, desc_prev, desc_cur, matches, img2, + frame_no, + Twc_prev, + world_map, args): + """ + Continuous PnP tracking: project map landmarks with Twc_prev, associate to + current keypoints, estimate Twc for the current frame, and register inliers. + + Returns + ------- + ok : bool + Twc_cur : (4,4) np.ndarray – pose camera-to-world for frame `frame_no` + used_cur_idx : set[int] – kp indices on the current image that were + part of the inlier PnP solution. These are + excluded from later triangulation. + """ + + # -------------------- 0) Quick checks -------------------- + if len(world_map.points) < 4 or len(kp_cur) == 0: + print(f"[PNP] Not enough data at frame {frame_no} " + f"(points={len(world_map.points)}, kps={len(kp_cur)})") + return False, None, set() + + # Gather map points and their ids (fixed order pairing) + mp_ids = world_map.point_ids() # list[int] + pts_w = world_map.get_point_array().astype(np.float32) # (N,3) + if len(mp_ids) != len(pts_w): + # Extremely defensive: inconsistent map containers + print("[PNP] Map containers out of sync; skipping frame.") + return False, None, set() + + # -------------------- 1) Assoc 3D↔2D by projection -------------------- + # Project landmarks into the current image using previous pose + # (world → current cam prediction) + proj = associate_landmarks.project_points(K, Twc_prev, pts_w) if False else None # to satisfy linters + from slam.core.pnp_utils import project_points # local import to avoid cycles + proj_px = project_points(K, Twc_prev, pts_w) # (N,2) + kp_xy = np.float32([kp.pt for kp in kp_cur]) # (K,2) + + # -------------------- 1a) Hard gates before NN association -------------------- + # World → previous camera coordinates + T_cw = np.linalg.inv(Twc_prev) + R_cw, t_cw = T_cw[:3, :3], T_cw[:3, 3] + Xc = (R_cw @ pts_w.T + t_cw[:, None]).T # (N,3) in camera frame + z = Xc[:, 2] + + H, W = img2.shape[:2] + margin = float(getattr(args, "proj_margin", 2.0)) # px + min_depth = float(getattr(args, "min_depth", 0.1)) # m + max_depth = float(getattr(args, "max_depth", np.inf)) # m + + u, v = proj_px[:, 0], proj_px[:, 1] + valid_depth = (z > min_depth) & (z < max_depth) + valid_pix = np.isfinite(u) & np.isfinite(v) & \ + (u >= margin) & (u < W - margin) & \ + (v >= margin) & (v < H - margin) + + keep = np.where(valid_depth & valid_pix)[0] + if keep.size < 4: + print(f"[PNP] Too few gated points (keep={keep.size}) at frame {frame_no}") + return False, None, set() + + # shrink arrays to only gated landmarks + pts_w = pts_w[keep] + proj_px = proj_px[keep] + mp_ids = [mp_ids[i] for i in keep] + + def _associate(search_rad_px: float): + used_kp = set() + obj_pts, img_pts = [], [] + obj_pids, kp_ids = [], [] + r2 = search_rad_px * search_rad_px + for i, (u, v) in enumerate(proj_px): + # greedy nearest neighbour in image space + d2 = np.sum((kp_xy - (u, v))**2, axis=1) + j = int(np.argmin(d2)) + if d2[j] < r2 and j not in used_kp: + obj_pts.append(pts_w[i]) + img_pts.append(kp_xy[j]) + obj_pids.append(mp_ids[i]) # keep landmark id + kp_ids.append(j) # keep kp index + used_kp.add(j) + if len(obj_pts) == 0: + return (np.empty((0, 3), np.float32), np.empty((0, 2), np.float32), + [], []) + return (np.asarray(obj_pts, np.float32), + np.asarray(img_pts, np.float32), + obj_pids, kp_ids) + + # Try with configured radius; if too few, expand once + pts3d, pts2d, obj_pids, kp_ids = _associate(args.proj_radius) + if len(pts3d) < max(4, args.pnp_min_inliers // 2): + pts3d, pts2d, obj_pids, kp_ids = _associate(args.proj_radius * 1.5) + + if len(pts3d) < 4: + print(f"[PNP] Not enough 2D–3D correspondences (found {len(pts3d)}) at frame {frame_no}") + return False, None, set() + + # -------------------- 2) Robust PnP (+ LM refine) -------------------- + # R,t returned map **world → camera** (T_cw); convert to **camera → world** + R_cw, tvec = refine_pose_pnp(K, pts3d, pts2d) # AP3P RANSAC + LM + + if R_cw is None: + print(f"[PNP] RANSAC/LM failed at frame {frame_no}") + return False, None, set() + print(f'tvec_flat = {tvec}') + T_cw = _pose_rt_to_homogenous(R_cw, tvec) + Twc_cur = _pose_inverse(T_cw) + + # -------------------- 3) Compute inliers w.r.t. refined pose -------------------- + # Build an inlier mask via reprojection error under the refined pose + rvec, _ = cv2.Rodrigues(R_cw) + proj_refined, _ = cv2.projectPoints(pts3d.astype(np.float32), rvec, tvec, K, None) + proj_refined = proj_refined.reshape(-1, 2) + + reproj_err = np.linalg.norm(proj_refined - pts2d, axis=1) + inlier_mask = reproj_err < float(3) #TODO magic number, should be in args + num_inl = int(np.count_nonzero(inlier_mask)) + if num_inl < args.pnp_min_inliers: + print(f"[PNP] Too few inliers after refine ({num_inl}<{args.pnp_min_inliers}) at frame {frame_no}") + return False, None, set() + + # # -------------------- 4) Bookkeeping: add observations -------------------- + # used_cur_idx = set() + # for ok, pid, kp_idx in zip(inlier_mask, obj_pids, kp_ids): + # if not ok: + # continue + # world_map.points[pid].add_observation(frame_no, kp_idx, desc_cur[kp_idx]) # TODO change frame_no + # used_cur_idx.add(kp_idx) + + print(f"[PNP] Pose @ frame {frame_no} refined with {num_inl} inliers") + + # -------------------- 5) Debug visualization (unchanged) -------------------- + visualize_pnp_reprojection( + img2, K, Twc_cur, + pts3d_w=pts3d[inlier_mask], + pts2d_px=pts2d[inlier_mask], + inlier_mask=np.ones(num_inl, dtype=bool), + win_name="PnP reprojection" + ) + # print(f"[PNP] Pose @ frame {frame_no} refined with {len(inlier_mask)} inliers") + return True, Twc_cur, (obj_pids, kp_ids, inlier_mask) + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + mvt = MultiViewTriangulator( + K, + min_views=3, # ← “every 3 key-frames” + merge_radius=args.merge_radius, + max_rep_err=args.mvt_rep_err, + min_depth=args.min_depth, + max_depth=args.max_depth) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + initialised = False + tracking_lost = False + + + world_map = Map() + Twc_cur_pose = np.eye(4) # camera‑to‑world (identity at t=0) + world_map.add_pose(Twc_cur_pose, is_keyframe=True) # initial pose + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + plot2d = TrajectoryPlotter() + + kfs: list[Keyframe] = [] + last_kf_frame_no = -999 + + # TODO FOR BUNDLE ADJUSTMENT + frame_keypoints: List[List[cv2.KeyPoint]] = [] #CHANGE + frame_keypoints.append([]) # placeholder to keep indices aligned + + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + # cv2.namedWindow('Feature Tracking', cv2.WINDOW_NORMAL) + # cv2.resizeWindow('Feature Tracking', 1200, 600) + + total = len(seq) - 1 + + + + new_ids: list[int] = [] # CHANGE + + for i in tqdm(range(total), desc='Tracking'): + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + # --- filter matches with RANSAC --- + matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + # ---------------- 2D - Feature Tracking -------------------------- # TODO should be done every keyframe maybe thats why we are having issues with BA + frame_no = i + 1 + curr_map, tracks, next_track_id = update_and_prune_tracks( + matches, prev_map, tracks, kp2, frame_no, next_track_id) + prev_map = curr_map # for the next iteration + + # ---------------- Key-frame decision -------------------------- # TODO make every frame a keyframe + + if i == 0: + kfs.append(Keyframe(idx=0, frame_idx=1, path=seq[0] if isinstance(seq[0], str) else "", + kps=kp1, desc=des1, pose=Twc_cur_pose, thumb=make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_frame_no = kfs[-1].frame_idx + prev_len = len(kfs) + is_kf = False + continue + else: + prev_len = len(kfs) + kfs, last_kf_frame_no = select_keyframe( + args, seq, i, img2, kp2, des2, Twc_cur_pose, matcher, kfs, last_kf_frame_no) + is_kf = len(kfs) > prev_len + + if is_kf: + frame_keypoints.append(kp2.copy()) + # print("len(kfs) = ", len(kfs), "last_kf_frame_no = ", last_kf_frame_no) + # ------------------------------------------------ bootstrap ------------------------------------------------ # TODO FIND A BETTER WAY TO manage index + if not initialised: + if len(kfs) < 2: + continue + bootstrap_matches = feature_matcher(args, kfs[0].kps, kfs[-1].kps, kfs[0].desc, kfs[-1].desc, matcher) + bootstrap_matches = filter_matches_ransac(kfs[0].kps, kfs[-1].kps, bootstrap_matches, args.ransac_thresh) + ok, Twc_temp_pose = try_bootstrap(K, kfs[0].kps, kfs[0].desc, kfs[-1].kps, kfs[-1].desc, bootstrap_matches, args, world_map) + if ok: + frame_keypoints[0] = kfs[0].kps.copy() # BA (img1 is frame-0) + frame_keypoints[-1] = kfs[-1].kps.copy() # BA (img2 is frame-1) + # print("POSES: " ,world_map.poses) + # two_view_ba(world_map, K, frame_keypoints, max_iters=25) # BA + + initialised = True + Twc_cur_pose = world_map.poses[-1].copy() # we are at frame i+1 + continue + else: + print("******************BOOTSTRAP FAILED**************") + continue # keep trying with next frame + + # ------------------------------------------------ tracking ------------------------------------------------- + Twc_pose_prev = Twc_cur_pose.copy() + # print(f'pose before PnP: {Twc_cur_pose}') + # ok_pnp, Twc_cur_pose, used_idx = solve_pnp_step( + # K, Twc_pose_pred, world_map, kp2, args) # kp2 is the current frame keypoints + ok_pnp, Twc_cur_pose, assoc = track_with_pnp(K, kp1, kp2, des1, des2, matches, + frame_no=i + 1, img2=img2, + Twc_prev=Twc_pose_prev, # pose from the *previous* iteration + world_map=world_map, + args=args) + + + if True: # fallback to 2-D-2-D if PnP failed + print(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + # raise Exception(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + if not is_kf: + last_kf = kfs[-1] + else: + last_kf = kfs[-2] if len(kfs) > 1 else kfs[0] + + tracking_matches = feature_matcher(args, last_kf.kps, kp2, last_kf.desc, des2, matcher) + tracking_matches = filter_matches_ransac(last_kf.kps, kp2, tracking_matches, args.ransac_thresh) + + pts0 = np.float32([last_kf.kps[m.queryIdx].pt for m in tracking_matches]) + pts1 = np.float32([kp2[m.trainIdx].pt for m in tracking_matches]) + E, mask = cv2.findEssentialMat(pts0, pts1, K, cv2.RANSAC, + 0.999, args.ransac_thresh) + if E is None: + tracking_lost = True + continue + _, R, t, mpose = cv2.recoverPose(E, pts0, pts1, K) + T_rel = _pose_rt_to_homogenous(R, t) # c₁ → c₂ + Twc_cur_pose = last_kf.pose @ np.linalg.inv(T_rel) # c₂ → world + tracking_lost = False + + if is_kf: + world_map.add_pose(Twc_cur_pose, is_keyframe=is_kf) + if ok_pnp and assoc is not None: + obj_pids, kp_ids, inlier_mask = assoc + kf_idx = len(world_map.poses) - 1 # index of the newly added KF + for ok, pid, kp_idx in zip(inlier_mask, obj_pids, kp_ids): + if ok: + world_map.points[pid].add_observation(kf_idx, kp_idx, des2[kp_idx]) + + + # pose_only_ba(world_map, K, frame_keypoints, # FOR BA + # frame_idx=len(world_map.poses)-1) + + # ------------------------------------------------ map growth ------------------------------------------------ + if is_kf: + # 1) hand the new KF to the multi-view triangulator + kf_pose_idx = len(world_map.poses) - 1 # this is the new pose’s index + mvt.add_keyframe( + frame_idx=kf_pose_idx, # global frame number of this KF + Twc_pose=Twc_cur_pose, + kps=kp2, + track_map=curr_map, + img_bgr=img2, + descriptors=des2) # new key-frame + + # 2) try triangulating all tracks that now have ≥3 distinct KFs + new_mvt_ids = mvt.triangulate_ready_tracks(world_map) + + # 3) visualisation hook + new_ids = new_mvt_ids # keeps 3-D viewer in sync + + # ------------------------------------------------ Local Bundle Adjustment ------------------------------------------------ + if is_kf and (len(kfs) % args.local_ba_window == 0): # or len(world_map.keyframe_indices) > args.local_ba_window + pose_prev = Twc_cur_pose.copy() + center_kf_idx = kfs[-1].idx + print(f"[BA] Running local BA around key-frame {center_kf_idx} (window size = {args.local_ba_window}) , current = {len(world_map.poses) - 1}") + # print(f'len keyframes = {len(kfs)}, len frame_keypoints = {len(frame_keypoints)}, len poses = {len(world_map.poses)}') + # print(f"world_map.poses = {len(world_map.poses)}, \n raw: {world_map.poses} \n keyframe_indices= {len(world_map.keyframe_indices)},\n raw: {world_map.keyframe_indices}") + # --- before BA ----------------------------------------------------------- + from slam.core.visualize_ba import visualize_ba_window + world_map_before = deepcopy(world_map) # <—— new + local_bundle_adjustment( + world_map, K, frame_keypoints, + center_kf_idx=len(world_map.poses) - 1, + window_size=args.local_ba_window) + + first_opt = max(1, center_kf_idx - args.local_ba_window + 1) + opt_kf = list(range(first_opt, center_kf_idx + 1)) + # visualize_ba_window(seq, args, + # K, + # world_map_before, world_map, # before / after maps + # frame_keypoints, + # opt_kf) # list of key-frame indices + + # p = Twc_cur_pose[:3, 3] + # p_gt = gt_T[i + 1, :3, 3] + # print(f"Cam position z = {p}, GT = {p_gt} (should decrease on KITTI)") + + # --- 2-D path plot (cheap) ---------------------------------------------- + def _scale_translation(pos3: np.ndarray, scale: float) -> np.ndarray: + """ + Scale a 3D *position* (translation) by 'scale' for plotting. + NOTE: Do NOT multiply the whole 4x4 pose by a scalar—only scale the translation component. + """ + return np.asarray(pos3, dtype=float) * float(scale) + + est_pos = Twc_cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + # gt_pos = apply_alignment(p_gt, R_align, t_align) + gt_pos = _scale_translation(p_gt, 0.3) + plot2d.append(est_pos, gt_pos, mirror_x=False) + + + # # --- 2-D track maintenance (for GUI only) --- + # frame_no = i + 1 + # prev_map, tracks, next_track_id = update_and_prune_tracks( + # matches, prev_map, tracks, kp2, frame_no, next_track_id) + + + # --- 3-D visualisation --- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + + key = cv2.waitKey(1) & 0xFF + if key == 27: + break + if key == ord('p') and viz3d is not None: + viz3d.paused = not viz3d.paused + + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/monocular/main2.py b/modules/SimpleSLAM/slam/monocular/main2.py new file mode 100644 index 0000000000..f51fa35f91 --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main2.py @@ -0,0 +1,751 @@ +# main.py +""" +WITH MULTI-VIEW TRIANGULATION - New PnP + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +from copy import deepcopy +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm +from typing import List + +from slam.core.pose_utils import _pose_inverse, _pose_rt_to_homogenous + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, TrajectoryPlotter +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map +from slam.core.triangulation_utils import update_and_prune_tracks, MultiViewTriangulator, triangulate_points +from slam.core.pnp_utils import associate_landmarks, refine_pose_pnp +from slam.core.ba_utils import ( + two_view_ba, + pose_only_ba, + local_bundle_adjustment +) + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=3.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=5) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=0.60) + p.add_argument("--max_depth", type=float, default=50.0) + p.add_argument('--mvt_rep_err', type=float, default=30.0, + help='Max mean reprojection error (px) for multi-view triangulation') + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=5) + p.add_argument('--proj_radius', type=float, default=12.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + # Bundle Adjustment + p.add_argument('--local_ba_window', type=int, default=6, help='Window size (number of keyframes) for local BA') + + return p + + +# --------------------------------------------------------------------------- # +# Bootstrap initialisation +# --------------------------------------------------------------------------- # +def try_bootstrap(K, kp0, descs0, kp1, descs1, matches, args, world_map): + """Return (success, T_cam0_w, T_cam1_w) and add initial landmarks.""" + if len(matches) < 50: + return False, None, None + + # 1. pick a model: Essential (general) *or* Homography (planar) + pts0 = np.float32([kp0[m.queryIdx].pt for m in matches]) + pts1 = np.float32([kp1[m.trainIdx].pt for m in matches]) + + E, inl_E = cv2.findEssentialMat( + pts0, pts1, K, cv2.RANSAC, 0.999, args.ransac_thresh) + + H, inl_H = cv2.findHomography( + pts0, pts1, cv2.RANSAC, args.ransac_thresh) + + # score = #inliers (ORB-SLAM uses a more elaborate model-selection score) + use_E = (inl_E.sum() > inl_H.sum()) + + if use_E and E is None: + return False, None, None + if not use_E and H is None: + return False, None, None + + if use_E: + _, R, t, inl_pose = cv2.recoverPose(E, pts0, pts1, K) + mask = (inl_E.ravel() & inl_pose.ravel()).astype(bool) + else: + print("[BOOTSTRAP] Using Homography for initialisation") + R, t = cv2.decomposeHomographyMat(H, K)[1:3] # take the best hypothesis + mask = inl_H.ravel().astype(bool) + + # 2. triangulate those inliers – exactly once + p0 = pts0[mask] + p1 = pts1[mask] + pts3d = triangulate_points(K, R, t, p0, p1) + + z0 = pts3d[:, 2] + pts3d_cam1 = (R @ pts3d.T + t.reshape(3, 1)).T + z1 = pts3d_cam1[:, 2] + + # both cameras see the point in front of them + ok = ( (z0 > args.min_depth) & (z0 < args.max_depth) & # in front of cam‑0 + (z1 > args.min_depth) & (z1 < args.max_depth) ) # in front of cam‑1) + pts3d = pts3d[ok] + print(f"[BOOTSTRAP] Triangulated {len(pts3d)} points. Status: {ok.sum()} inliers") + + if len(pts3d) < 80: + print("[BOOTSTRAP] Not enough points to bootstrap the map.") + return False, None, None + + T1_cw = _pose_rt_to_homogenous(R, t) # camera-from-world + T1_wc = _pose_inverse(T1_cw) # world-from-camera + # 3. fill the map + world_map.add_pose(T1_wc, is_keyframe=True) # Keyframe because we only bootstrap on keyframes + + cols = np.full((len(pts3d), 3), 0.7) # grey – colour is optional here + ids = world_map.add_points(pts3d, cols, keyframe_idx=0) #TODO 0 or 1 + + # ----------------------------------------------- + # add (frame_idx , kp_idx) pairs for each new MP + # ----------------------------------------------- + inlier_kp_idx = np.where(mask)[0][ok] # kp indices that survived depth + # build index arrays once + qidx = np.int32([m.queryIdx for m in matches]) + tidx = np.int32([m.trainIdx for m in matches]) + sel = np.where(mask)[0][ok] # selected matches + + for pid, i0, i1 in zip(ids, qidx[sel], tidx[sel]): + world_map.points[pid].add_observation(0, i0, descs0[i0]) + world_map.points[pid].add_observation(1, i1, descs1[i1]) + + + + print(f"[BOOTSTRAP] Map initialised with {len(ids)} landmarks.") + return True, T1_wc + + +# --------------------------------------------------------------------------- # +# Continuous pose tracking (PnP) +# --------------------------------------------------------------------------- # +def visualize_pnp_reprojection(img_bgr, K, T_wc, pts3d_w, pts2d_px, inlier_mask=None, + win_name="PnP debug", thickness=2): + """ + Draw projected 3‑D landmarks (from world) on top of the current image and connect + them to the actual detected keypoints. + + img_bgr : current image (BGR) + K : 3x3 intrinsics + T_wc : 4x4 pose (cam->world) + pts3d_w : (N,3) 3D points in *world* coords + pts2d_px : (N,2) measured pixel locations that took part in PnP + inlier_mask : optional boolean array (len=N). If given, only inliers get lines. + """ + import cv2 + import numpy as np + + img = img_bgr.copy() + # world -> cam + T_cw = np.linalg.inv(T_wc) + R_cw, t_cw = T_cw[:3, :3], T_cw[:3, 3] + rvec, _ = cv2.Rodrigues(R_cw) + tvec = t_cw.reshape(3, 1) + + proj, _ = cv2.projectPoints(pts3d_w.astype(np.float32), rvec, tvec, K, None) + proj = proj.reshape(-1, 2) + + if inlier_mask is None: + inlier_mask = np.ones(len(proj), dtype=bool) + + for (u_meas, v_meas), (u_proj, v_proj), ok in zip(pts2d_px, proj, inlier_mask): + color = (0, 255, 0) if ok else (0, 0, 255) # green for inlier, red for outlier + cv2.circle(img, (int(round(u_proj)), int(round(v_proj))), 4, (255, 0, 0), -1) # projected (blue) + cv2.circle(img, (int(round(u_meas)), int(round(v_meas))), 4, color, -1) # measured + if ok: + cv2.line(img, (int(round(u_meas)), int(round(v_meas))), + (int(round(u_proj)), int(round(v_proj))), color, thickness) + + cv2.imshow(win_name, img) + cv2.waitKey(1) + return img + +def track_with_pnp( + K, + kp_prev, kp_cur, desc_prev, desc_cur, matches, img2, + frame_no, Twc_prev, Twc_pred, world_map, args +): + """ + Continuous PnP tracking: project map landmarks with Twc_pred, associate to + current keypoints, estimate Twc for the current frame, and register inliers. + + Returns + ------- + ok : bool + Twc_cur : (4,4) np.ndarray – pose camera-to-world for frame `frame_no` + assoc_tuple : (obj_pids, kp_ids, inlier_mask) for the final pose + """ + from slam.core.pnp_utils import project_points + + # -------------------- 0) Quick checks -------------------- + if len(world_map.points) < 4 or len(kp_cur) == 0: + print(f"[PNP] Not enough data at frame {frame_no} " + f"(points={len(world_map.points)}, kps={len(kp_cur)})") + return False, None, set() + + # Gather map points and their ids (fixed order pairing) + mp_ids = world_map.point_ids() # list[int] + pts_w = world_map.get_point_array().astype(np.float32) # (N,3) + if len(mp_ids) != len(pts_w): + print("[PNP] Map containers out of sync; skipping frame.") + return False, None, set() + + # ---------- Args / defaults ---------- + margin = float(getattr(args, "proj_margin", 20.0)) # px + min_depth = float(getattr(args, "min_depth", 0.1)) # m + max_depth = float(getattr(args, "max_depth", np.inf)) # m + proj_radius = float(getattr(args, "proj_radius", 6.0)) # px (at current scale) + pnp_min_inl = int(getattr(args, "pnp_min_inliers", 15)) + ransac_thr = float(getattr(args, "ransac_thresh", 2.0)) # px + # descriptor thresholds (optional; auto-selected by dtype) + desc_cos_min = float(getattr(args, "desc_cos_min", 0.70)) # for float descriptors + hamm_max = int(getattr(args, "hamm_max", 45)) # for binary descriptors + use_desc_chk = bool(getattr(args, "use_desc_check", True)) # gate by descriptor if possible + + H, W = img2.shape[:2] + kp_xy = np.float32([kp.pt for kp in kp_cur]) # (K,2) + + # -------------------- 1) Visibility gates (predicted pose) -------------------- + # Use Twc_pred to judge visibility (cheirality + frustum + light range band) + proj_px_pred = project_points(K, Twc_pred, pts_w) # (N,2) + T_cw_pred = np.linalg.inv(Twc_pred) + R_cw_pred, t_cw_pred = T_cw_pred[:3, :3], T_cw_pred[:3, 3] + Xc_pred = (R_cw_pred @ pts_w.T + t_cw_pred[:, None]).T # (N,3) + z_pred = Xc_pred[:, 2] + + u, v = proj_px_pred[:, 0], proj_px_pred[:, 1] + valid_pix = np.isfinite(u) & np.isfinite(v) & \ + (u >= margin) & (u < W - margin) & \ + (v >= margin) & (v < H - margin) + valid_depth = (z_pred > min_depth) & (z_pred < max_depth) + + # Range band relative to predicted camera center + C_pred = Twc_pred[:3, 3] + d = np.linalg.norm(pts_w - C_pred[None, :], axis=1) + # derive a robust band from the distribution (avoid 0/inf pathologies) + d_med = np.median(d[np.isfinite(d) & (d > 0)]) + if not np.isfinite(d_med) or d_med <= 0: + d_med = np.median(d) if np.median(d) > 0 else 1.0 + dmin = getattr(args, "range_band_min_ratio", 0.5) * d_med + dmax = getattr(args, "range_band_max_ratio", 3.0) * d_med + valid_range = (d >= dmin) & (d <= dmax) + + keep_idx = np.where(valid_pix & valid_depth & valid_range)[0] + if keep_idx.size < 4: + print(f"[PNP] Too few gated points (keep={keep_idx.size}) at frame {frame_no}") + return False, None, set() + + pts_w = pts_w[keep_idx] + proj_px = proj_px_pred[keep_idx] + mp_ids = [mp_ids[i] for i in keep_idx] + + # -------------------- Descriptor helpers (optional) -------------------- + def _lm_descriptor(pid): + p = world_map.points[pid] + # Try common fields/methods; fall back to None + if hasattr(p, "last_descriptor"): + try: + return p.last_descriptor() + except Exception: + pass + if hasattr(p, "descriptor"): + return p.descriptor + if hasattr(p, "descs"): + try: + if len(p.descs) > 0: + return p.descs[-1] + except Exception: + pass + return None + + def _desc_match_ok(d_map, d_cur): + if d_map is None or d_cur is None: + return True # no descriptor to check; allow + d_map = np.asarray(d_map).reshape(-1) + d_cur = np.asarray(d_cur).reshape(-1) + if d_map.dtype == np.uint8 or d_cur.dtype == np.uint8: + # binary (e.g., ORB) + dist = cv2.norm(d_map, d_cur, cv2.NORM_HAMMING) + return dist <= hamm_max + # float: use cosine similarity + num = float(np.dot(d_map, d_cur)) + den = float(np.linalg.norm(d_map) * np.linalg.norm(d_cur) + 1e-8) + cos = num / den + return cos >= desc_cos_min + + # -------------------- Association: windowed NN (+ optional descriptor) -------------------- + def _associate(search_rad_px: float): + used_kp = set() + obj_pts, img_pts = [], [] + obj_pids, kp_ids = [], [] + r2 = float(search_rad_px * search_rad_px) + + for i, (u_i, v_i) in enumerate(proj_px): + # Greedy NN in image space + d2 = np.sum((kp_xy - (u_i, v_i))**2, axis=1) + j = int(np.argmin(d2)) + if d2[j] >= r2 or j in used_kp: + continue + # Optional descriptor verification (if available) + if use_desc_chk: + lm_desc = _lm_descriptor(mp_ids[i]) + cur_desc = None + try: + cur_desc = desc_cur[j] + except Exception: + cur_desc = None + if not _desc_match_ok(lm_desc, cur_desc): + continue + + obj_pts.append(pts_w[i]) + img_pts.append(kp_xy[j]) + obj_pids.append(mp_ids[i]) # landmark id + kp_ids.append(j) # keypoint index + used_kp.add(j) + + if len(obj_pts) == 0: + return (np.empty((0, 3), np.float32), np.empty((0, 2), np.float32), + [], []) + return (np.asarray(obj_pts, np.float32), + np.asarray(img_pts, np.float32), + obj_pids, kp_ids) + + # Try with configured radius; if too few, expand once + pts3d, pts2d, obj_pids, kp_ids = _associate(proj_radius) + if len(pts3d) < max(4, pnp_min_inl // 2): + pts3d, pts2d, obj_pids, kp_ids = _associate(proj_radius * 1.5) + + if len(pts3d) < 4: + print(f"[PNP] Not enough 2D–3D correspondences (found {len(pts3d)}) at frame {frame_no}") + return False, None, set() + + # -------------------- 2) Robust PnP (+ LM refine) -------------------- + # R,t returned map **world → camera** (T_cw); convert to **camera → world** + R_cw, tvec = refine_pose_pnp(K, pts3d, pts2d) # AP3P RANSAC + LM + if R_cw is None: + print(f"[PNP] RANSAC/LM failed at frame {frame_no}") + return False, None, set() + + T_cw = _pose_rt_to_homogenous(R_cw, tvec) + Twc_cur = _pose_inverse(T_cw) + + # -------------------- 3) Inliers under refined pose (add cheirality) -------------------- + rvec, _ = cv2.Rodrigues(R_cw) + proj_refined, _ = cv2.projectPoints(pts3d.astype(np.float32), rvec, tvec, K, None) + proj_refined = proj_refined.reshape(-1, 2) + reproj_err = np.linalg.norm(proj_refined - pts2d, axis=1) + + # cheirality under refined pose + T_cw_cur = np.linalg.inv(Twc_cur) + R_cw_cur, t_cw_cur = T_cw_cur[:3, :3], T_cw_cur[:3, 3] + Xc_cur = (R_cw_cur @ pts3d.T + t_cw_cur[:, None]).T + z_cur = Xc_cur[:, 2] + + inlier_mask = (reproj_err < ransac_thr) & (z_cur > min_depth) + num_inl = int(np.count_nonzero(inlier_mask)) + if num_inl < pnp_min_inl: + print(f"[PNP] Too few inliers after refine ({num_inl}<{pnp_min_inl}) at frame {frame_no}") + return False, None, set() + + # -------------------- 4) One re-association pass with refined pose -------------------- + # Recompute gating with Twc_cur; then associate in a smaller window. + proj_px_cur = project_points(K, Twc_cur, pts_w) # note: pts_w is the gated subset + T_cw_cur_full = np.linalg.inv(Twc_cur) + Xc_cur_full = (T_cw_cur_full[:3, :3] @ pts_w.T + T_cw_cur_full[:3, 3][:, None]).T + z_cur_full = Xc_cur_full[:, 2] + u2, v2 = proj_px_cur[:, 0], proj_px_cur[:, 1] + valid_pix2 = np.isfinite(u2) & np.isfinite(v2) & \ + (u2 >= margin) & (u2 < W - margin) & \ + (v2 >= margin) & (v2 < H - margin) + valid_depth2 = (z_cur_full > min_depth) & (z_cur_full < max_depth) + keep2 = np.where(valid_pix2 & valid_depth2)[0] + if keep2.size >= 4: + pts_w2 = pts_w[keep2] + proj_px2 = proj_px_cur[keep2] + mp_ids2 = [mp_ids[i] for i in keep2] + + # Temporarily swap the working sets for a tighter association + pts_w_save, proj_px_save, mp_ids_save = pts_w, proj_px, mp_ids + pts_w, proj_px, mp_ids = pts_w2, proj_px2, mp_ids2 + + pts3d_b, pts2d_b, obj_pids_b, kp_ids_b = _associate(max(2.0, proj_radius * 0.5)) + if len(pts3d_b) >= 4: + R_cw_b, tvec_b = refine_pose_pnp(K, pts3d_b, pts2d_b) + if R_cw_b is not None: + T_cw_b = _pose_rt_to_homogenous(R_cw_b, tvec_b) + Twc_cur_b = _pose_inverse(T_cw_b) + + # Inliers for the second pass (with cheirality) + rvec_b, _ = cv2.Rodrigues(R_cw_b) + proj_b, _ = cv2.projectPoints(pts3d_b.astype(np.float32), rvec_b, tvec_b, K, None) + proj_b = proj_b.reshape(-1, 2) + err_b = np.linalg.norm(proj_b - pts2d_b, axis=1) + + T_cw_b_full = np.linalg.inv(Twc_cur_b) + Xc_b = (T_cw_b_full[:3, :3] @ pts3d_b.T + T_cw_b_full[:3, 3][:, None]).T + z_b = Xc_b[:, 2] + mask_b = (err_b < ransac_thr) & (z_b > min_depth) + if int(np.count_nonzero(mask_b)) >= max(num_inl, pnp_min_inl): + # Accept tighter solution + Twc_cur = Twc_cur_b + pts3d, pts2d, obj_pids, kp_ids = pts3d_b, pts2d_b, obj_pids_b, kp_ids_b + inlier_mask = mask_b + num_inl = int(np.count_nonzero(inlier_mask)) + + # restore working sets + pts_w, proj_px, mp_ids = pts_w_save, proj_px_save, mp_ids_save + + print(f"[PNP] Pose @ frame {frame_no} refined with {num_inl} inliers " + f"(thr={ransac_thr:.2f}, margin={margin:.1f}, radius={proj_radius:.1f})") + + # -------------------- 5) Debug visualization -------------------- + try: + visualize_pnp_reprojection( + img2, K, Twc_cur, + pts3d_w=pts3d[inlier_mask], + pts2d_px=pts2d[inlier_mask], + inlier_mask=np.ones(num_inl, dtype=bool), + win_name="PnP reprojection" + ) + except Exception as e: + print(f"[PNP] Viz error (ignored): {e}") + + # Return the triple (to match your current caller usage) + return True, Twc_cur, (obj_pids, kp_ids, inlier_mask) + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + mvt = MultiViewTriangulator( + K, + min_views=3, # ← “every 3 key-frames” + merge_radius=args.merge_radius, + max_rep_err=args.mvt_rep_err, + min_depth=args.min_depth, + max_depth=args.max_depth) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + initialised = False + tracking_lost = False + + + world_map = Map() + Twc_cur_pose = np.eye(4) # camera‑to‑world (identity at t=0) + world_map.add_pose(Twc_cur_pose, is_keyframe=True) # initial pose + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + plot2d = TrajectoryPlotter() + + kfs: list[Keyframe] = [] + last_kf_frame_no = -999 + + # TODO FOR BUNDLE ADJUSTMENT + frame_keypoints: List[List[cv2.KeyPoint]] = [] #CHANGE + frame_keypoints.append([]) # placeholder to keep indices aligned + + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + # cv2.namedWindow('Feature Tracking', cv2.WINDOW_NORMAL) + # cv2.resizeWindow('Feature Tracking', 1200, 600) + + total = len(seq) - 1 + + + + new_ids: list[int] = [] # CHANGE + + + Twc_prev_frame = None + Twc_prevprev_frame = None + + for i in tqdm(range(total), desc='Tracking'): + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + # --- filter matches with RANSAC --- + matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + # ---------------- 2D - Feature Tracking -------------------------- # TODO should be done every keyframe maybe thats why we are having issues with BA + frame_no = i + 1 + curr_map, tracks, next_track_id = update_and_prune_tracks( + matches, prev_map, tracks, kp2, frame_no, next_track_id) + prev_map = curr_map # for the next iteration + + # ---------------- Key-frame decision -------------------------- # TODO make every frame a keyframe + + if i == 0: + kfs.append(Keyframe(idx=0, frame_idx=1, path=seq[0] if isinstance(seq[0], str) else "", + kps=kp1, desc=des1, pose=Twc_cur_pose, thumb=make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_frame_no = kfs[-1].frame_idx + prev_len = len(kfs) + is_kf = False + continue + else: + prev_len = len(kfs) + kfs, last_kf_frame_no = select_keyframe( + args, seq, i, img2, kp2, des2, Twc_cur_pose, matcher, kfs, last_kf_frame_no) + is_kf = len(kfs) > prev_len + + if is_kf: + frame_keypoints.append(kp2.copy()) + # print("len(kfs) = ", len(kfs), "last_kf_frame_no = ", last_kf_frame_no) + # ------------------------------------------------ bootstrap ------------------------------------------------ # TODO FIND A BETTER WAY TO manage index + if not initialised: + if len(kfs) < 2: + continue + bootstrap_matches = feature_matcher(args, kfs[0].kps, kfs[-1].kps, kfs[0].desc, kfs[-1].desc, matcher) + bootstrap_matches = filter_matches_ransac(kfs[0].kps, kfs[-1].kps, bootstrap_matches, args.ransac_thresh) + ok, Twc_temp_pose = try_bootstrap(K, kfs[0].kps, kfs[0].desc, kfs[-1].kps, kfs[-1].desc, bootstrap_matches, args, world_map) + if ok: + frame_keypoints[0] = kfs[0].kps.copy() # BA (img1 is frame-0) + frame_keypoints[-1] = kfs[-1].kps.copy() # BA (img2 is frame-1) + # print("POSES: " ,world_map.poses) + # two_view_ba(world_map, K, frame_keypoints, max_iters=25) # BA + + initialised = True + Twc_cur_pose = world_map.poses[-1].copy() # we are at frame i+1 + continue + else: + print("******************BOOTSTRAP FAILED**************") + continue # keep trying with next frame + + # ------------------------------------------------ tracking ------------------------------------------------- + + # TODO constant-velocity prediction + # Twc_pred = Twc_cur_pose # fallback: last pose + # if i >= 2: + # Twc_prevprev = world_map.poses[-2] if len(world_map.poses) >= 2 else None + # if Twc_prevprev is not None: + # T_rel = np.linalg.inv(Twc_prevprev) @ Twc_cur_pose # c_{t-2} -> c_{t-1} + # Twc_pred = Twc_cur_pose @ T_rel # predict c_t + Twc_pred = Twc_cur_pose + if Twc_prev_frame is not None and Twc_prevprev_frame is not None: + T_rel = np.linalg.inv(Twc_prevprev_frame) @ Twc_prev_frame # c_{t-2} -> c_{t-1} + Twc_pred = Twc_prev_frame @ T_rel + + ok_pnp, Twc_cur_pose, assoc = track_with_pnp( + K, kp1, kp2, des1, des2, matches, frame_no=i+1, img2=img2, + Twc_prev=Twc_cur_pose, Twc_pred=Twc_pred, world_map=world_map, args=args) + + + + if not ok_pnp: # fallback to 2-D-2-D if PnP failed + print(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + # raise Exception(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + if not is_kf: + last_kf = kfs[-1] + else: + last_kf = kfs[-2] if len(kfs) > 1 else kfs[0] + + tracking_matches = feature_matcher(args, last_kf.kps, kp2, last_kf.desc, des2, matcher) + tracking_matches = filter_matches_ransac(last_kf.kps, kp2, tracking_matches, args.ransac_thresh) + + pts0 = np.float32([last_kf.kps[m.queryIdx].pt for m in tracking_matches]) + pts1 = np.float32([kp2[m.trainIdx].pt for m in tracking_matches]) + E, mask = cv2.findEssentialMat(pts0, pts1, K, cv2.RANSAC, + 0.999, args.ransac_thresh) + if E is None: + tracking_lost = True + continue + _, R, t, mpose = cv2.recoverPose(E, pts0, pts1, K) + T_rel = _pose_rt_to_homogenous(R, t) # c₁ → c₂ + Twc_cur_pose = last_kf.pose @ np.linalg.inv(T_rel) # c₂ → world + tracking_lost = False + + if is_kf: + world_map.add_pose(Twc_cur_pose, is_keyframe=is_kf) + if ok_pnp and assoc is not None: + obj_pids, kp_ids, inlier_mask = assoc + kf_idx = len(world_map.poses) - 1 # index of the newly added KF + for ok, pid, kp_idx in zip(inlier_mask, obj_pids, kp_ids): + if ok: + world_map.points[pid].add_observation(kf_idx, kp_idx, des2[kp_idx]) + + + # pose_only_ba(world_map, K, frame_keypoints, # FOR BA + # frame_idx=len(world_map.poses)-1) + + # ------------------------------------------------ map growth ------------------------------------------------ + if is_kf: + # 1) hand the new KF to the multi-view triangulator + kf_pose_idx = len(world_map.poses) - 1 # this is the new pose’s index + mvt.add_keyframe( + frame_idx=kf_pose_idx, # global frame number of this KF + Twc_pose=Twc_cur_pose, + kps=kp2, + track_map=curr_map, + img_bgr=img2, + descriptors=des2) # new key-frame + + # 2) try triangulating all tracks that now have ≥3 distinct KFs + new_mvt_ids = mvt.triangulate_ready_tracks(world_map) + + # 3) visualisation hook + new_ids = new_mvt_ids # keeps 3-D viewer in sync + + # ------------------------------------------------ Local Bundle Adjustment ------------------------------------------------ + if is_kf and (len(kfs) % args.local_ba_window == 0): # or len(world_map.keyframe_indices) > args.local_ba_window + pose_prev = Twc_cur_pose.copy() + center_kf_idx = kfs[-1].idx + print(f"[BA] Running local BA around key-frame {center_kf_idx} (window size = {args.local_ba_window}) , current = {len(world_map.poses) - 1}") + # print(f'len keyframes = {len(kfs)}, len frame_keypoints = {len(frame_keypoints)}, len poses = {len(world_map.poses)}') + # print(f"world_map.poses = {len(world_map.poses)}, \n raw: {world_map.poses} \n keyframe_indices= {len(world_map.keyframe_indices)},\n raw: {world_map.keyframe_indices}") + # --- before BA ----------------------------------------------------------- + from slam.core.visualize_ba import visualize_ba_window + world_map_before = deepcopy(world_map) # <—— new + local_bundle_adjustment( + world_map, K, frame_keypoints, + center_kf_idx=len(world_map.poses) - 1, + window_size=args.local_ba_window) + + first_opt = max(1, center_kf_idx - args.local_ba_window + 1) + opt_kf = list(range(first_opt, center_kf_idx + 1)) + # visualize_ba_window(seq, args, + # K, + # world_map_before, world_map, # before / after maps + # frame_keypoints, + # opt_kf) # list of key-frame indices + + # p = Twc_cur_pose[:3, 3] + # p_gt = gt_T[i + 1, :3, 3] + # print(f"Cam position z = {p}, GT = {p_gt} (should decrease on KITTI)") + + # --- 2-D path plot (cheap) ---------------------------------------------- + def _scale_translation(pos3: np.ndarray, scale: float) -> np.ndarray: + """ + Scale a 3D *position* (translation) by 'scale' for plotting. + NOTE: Do NOT multiply the whole 4x4 pose by a scalar—only scale the translation component. + """ + return np.asarray(pos3, dtype=float) * float(scale) + + est_pos = Twc_cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + # gt_pos = apply_alignment(p_gt, R_align, t_align) + gt_pos = _scale_translation(p_gt, 0.3) + plot2d.append(est_pos, gt_pos, mirror_x=False) + + + # # --- 2-D track maintenance (for GUI only) --- + # frame_no = i + 1 + # prev_map, tracks, next_track_id = update_and_prune_tracks( + # matches, prev_map, tracks, kp2, frame_no, next_track_id) + + + # --- 3-D visualisation --- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + + key = cv2.waitKey(1) & 0xFF + if key == 27: + break + if key == ord('p') and viz3d is not None: + viz3d.paused = not viz3d.paused + + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/monocular/main4.py b/modules/SimpleSLAM/slam/monocular/main4.py new file mode 100644 index 0000000000..3df1c9cfc6 --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main4.py @@ -0,0 +1,689 @@ +# main.py +""" +Monocular VO/SLAM main with: + • PnP tracking + Multi-View Triangulation + • Frame-by-frame track overlay + • NEW: Last-3 Keyframes triptych (OpenCV window, reliable) + - New landmarks (just triangulated) = GREEN dots + - Cross-KF polyline turns RED once a landmark is seen in ≥ --track_maturity KFs + - Per-panel overlay: "KF #i | features: N" + • Optional 2-D Matplotlib trajectory plotter (stable refresh) + +Notes: + - Keyframe thumbnails in your Keyframe dataclass are lz4-compressed JPEG bytes. + We now decode those safely for visualization. (see _decode_thumb) +""" + +import argparse +from copy import deepcopy +import cv2 +import numpy as np +from tqdm import tqdm +from typing import List, Tuple, Dict, Optional, Iterable, Set + +import lz4.frame # for decoding KF thumbnails + +from slam.core.pose_utils import _pose_inverse, _pose_rt_to_homogenous + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth, +) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac, +) + +from slam.core.keyframe_utils import ( + Keyframe, + select_keyframe, + make_thumb, # retained, but we keep our own BGR thumbs for GUI +) + +from slam.core.visualization_utils import ( + draw_tracks, + Visualizer3D, + TrajectoryPlotter, +) + +from slam.core.trajectory_utils import compute_gt_alignment +from slam.core.landmark_utils import Map +from slam.core.triangulation_utils import ( + update_and_prune_tracks, + MultiViewTriangulator, + triangulate_points, +) +from slam.core.pnp_utils import refine_pose_pnp +from slam.core.ba_utils import ( + two_view_ba, + pose_only_ba, + local_bundle_adjustment, +) + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=3.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=5) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, default=[640, 360]) + + # visualization toggles + p.add_argument("--no_viz3d", action="store_true", help="Disable 3-D visualization window") + p.add_argument('--no_plot2d', action='store_true', help='Disable the Matplotlib 2-D trajectory plotter') + + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=0.60) + p.add_argument("--max_depth", type=float, default=50.0) + p.add_argument('--mvt_rep_err', type=float, default=30.0, + help='Max mean reprojection error (px) for multi-view triangulation') + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=20) + p.add_argument('--proj_radius', type=float, default=12.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + # Bundle Adjustment + p.add_argument('--local_ba_window', type=int, default=6, help='Window size (number of keyframes) for local BA') + + # NEW: track maturity + triptych toggle + p.add_argument('--track_maturity', type=int, default=6, + help='#KFs after which a track is rendered in RED') + p.add_argument('--no_triptych', action='store_true', + help='Disable the last-3-keyframes triptych overlay') + + return p + + +# --------------------------------------------------------------------------- # +# Bootstrap initialisation +# --------------------------------------------------------------------------- # +def try_bootstrap(K, kp0, descs0, kp1, descs1, matches, args, world_map): + """Return (success, T1_wc) and add initial landmarks.""" + if len(matches) < 50: + return False, None + + pts0 = np.float32([kp0[m.queryIdx].pt for m in matches]) + pts1 = np.float32([kp1[m.trainIdx].pt for m in matches]) + + E, inl_E = cv2.findEssentialMat( + pts0, pts1, K, cv2.RANSAC, 0.999, args.ransac_thresh) + H, inl_H = cv2.findHomography(pts0, pts1, cv2.RANSAC, args.ransac_thresh) + + use_E = (inl_E is not None and inl_H is not None and inl_E.sum() > inl_H.sum()) or (inl_E is not None and H is None) + if use_E and E is None: + return False, None + if not use_E and H is None: + return False, None + + if use_E: + _, R, t, inl_pose = cv2.recoverPose(E, pts0, pts1, K) + mask = (inl_E.ravel() & inl_pose.ravel()).astype(bool) + else: + print("[BOOTSTRAP] Using Homography for initialisation") + R, t = cv2.decomposeHomographyMat(H, K)[1:3] + mask = inl_H.ravel().astype(bool) + + p0 = pts0[mask] + p1 = pts1[mask] + pts3d = triangulate_points(K, R, t, p0, p1) + + z0 = pts3d[:, 2] + pts3d_cam1 = (R @ pts3d.T + t.reshape(3, 1)).T + z1 = pts3d_cam1[:, 2] + + ok = ((z0 > args.min_depth) & (z0 < args.max_depth) & + (z1 > args.min_depth) & (z1 < args.max_depth)) + pts3d = pts3d[ok] + print(f"[BOOTSTRAP] Triangulated {len(pts3d)} points. Inliers after depth: {ok.sum()}") + + if len(pts3d) < 80: + print("[BOOTSTRAP] Not enough points to bootstrap the map.") + return False, None + + T1_cw = _pose_rt_to_homogenous(R, t) # camera-from-world + T1_wc = _pose_inverse(T1_cw) # world-from-camera + + world_map.add_pose(T1_wc, is_keyframe=True) + + cols = np.full((len(pts3d), 3), 0.7) + ids = world_map.add_points(pts3d, cols, keyframe_idx=0) + + inlier_kp_idx = np.where(mask)[0][ok] + qidx = np.int32([m.queryIdx for m in matches]) + tidx = np.int32([m.trainIdx for m in matches]) + sel = np.where(mask)[0][ok] + + for pid, i0, i1 in zip(ids, qidx[sel], tidx[sel]): + world_map.points[pid].add_observation(0, i0, descs0[i0]) + world_map.points[pid].add_observation(1, i1, descs1[i1]) + + print(f"[BOOTSTRAP] Map initialised with {len(ids)} landmarks.") + return True, T1_wc + + +# --------------------------------------------------------------------------- # +# PnP + reprojection debug +# --------------------------------------------------------------------------- # +def visualize_pnp_reprojection(img_bgr, K, T_wc, pts3d_w, pts2d_px, inlier_mask=None, + win_name="PnP debug", thickness=2): + img = img_bgr.copy() + T_cw = np.linalg.inv(T_wc) + R_cw, t_cw = T_cw[:3, :3], T_cw[:3, 3] + rvec, _ = cv2.Rodrigues(R_cw) + tvec = t_cw.reshape(3, 1) + + proj, _ = cv2.projectPoints(pts3d_w.astype(np.float32), rvec, tvec, K, None) + proj = proj.reshape(-1, 2) + + if inlier_mask is None: + inlier_mask = np.ones(len(proj), dtype=bool) + + for (u_meas, v_meas), (u_proj, v_proj), ok in zip(pts2d_px, proj, inlier_mask): + color = (0, 255, 0) if ok else (0, 0, 255) # green inlier, red outlier + cv2.circle(img, (int(round(u_proj)), int(round(v_proj))), 4, (255, 0, 0), -1) + cv2.circle(img, (int(round(u_meas)), int(round(v_meas))), 4, color, -1) + if ok: + cv2.line(img, + (int(round(u_meas)), int(round(v_meas))), + (int(round(u_proj)), int(round(v_proj))), + color, thickness) + + cv2.imshow(win_name, img) + cv2.waitKey(1) + return img + + +def track_with_pnp(K, + kp_prev, kp_cur, desc_prev, desc_cur, matches, img2, + frame_no, + Twc_prev, + world_map, args): + if len(world_map.points) < 4 or len(kp_cur) == 0: + print(f"[PNP] Not enough data at frame {frame_no} " + f"(points={len(world_map.points)}, kps={len(kp_cur)})") + return False, None, set() + + mp_ids = world_map.point_ids() + pts_w = world_map.get_point_array().astype(np.float32) + if len(mp_ids) != len(pts_w): + print("[PNP] Map containers out of sync; skipping frame.") + return False, None, set() + + from slam.core.pnp_utils import project_points + proj_px = project_points(K, Twc_prev, pts_w) + kp_xy = np.float32([kp.pt for kp in kp_cur]) + + def _associate(search_rad_px: float): + used_kp = set() + obj_pts, img_pts = [], [] + obj_pids, kp_ids = [], [] + r2 = search_rad_px * search_rad_px + for i, (u, v) in enumerate(proj_px): + d2 = np.sum((kp_xy - (u, v))**2, axis=1) + j = int(np.argmin(d2)) + if d2[j] < r2 and j not in used_kp: + obj_pts.append(pts_w[i]) + img_pts.append(kp_xy[j]) + obj_pids.append(mp_ids[i]) + kp_ids.append(j) + used_kp.add(j) + if len(obj_pts) == 0: + return (np.empty((0, 3), np.float32), np.empty((0, 2), np.float32), + [], []) + return (np.asarray(obj_pts, np.float32), + np.asarray(img_pts, np.float32), + obj_pids, kp_ids) + + pts3d, pts2d, obj_pids, kp_ids = _associate(args.proj_radius) + if len(pts3d) < max(4, args.pnp_min_inliers // 2): + pts3d, pts2d, obj_pids, kp_ids = _associate(args.proj_radius * 1.5) + if len(pts3d) < 4: + print(f"[PNP] Not enough 2D–3D correspondences (found {len(pts3d)}) at frame {frame_no}") + return False, None, set() + + R_cw, tvec = refine_pose_pnp(K, pts3d, pts2d) + if R_cw is None: + print(f"[PNP] RANSAC/LM failed at frame {frame_no}") + return False, None, set() + T_cw = _pose_rt_to_homogenous(R_cw, tvec) + Twc_cur = _pose_inverse(T_cw) + + rvec, _ = cv2.Rodrigues(R_cw) + proj_refined, _ = cv2.projectPoints(pts3d.astype(np.float32), rvec, tvec, K, None) + proj_refined = proj_refined.reshape(-1, 2) + reproj_err = np.linalg.norm(proj_refined - pts2d, axis=1) + inlier_mask = reproj_err < float(3) + num_inl = int(np.count_nonzero(inlier_mask)) + if num_inl < args.pnp_min_inliers: + print(f"[PNP] Too few inliers after refine ({num_inl}<{args.pnp_min_inliers}) at frame {frame_no}") + return False, None, set() + + print(f"[PNP] Pose @ frame {frame_no} refined with {num_inl} inliers") + + visualize_pnp_reprojection( + img2, K, Twc_cur, + pts3d_w=pts3d[inlier_mask], + pts2d_px=pts2d[inlier_mask], + inlier_mask=np.ones(num_inl, dtype=bool), + win_name="PnP reprojection" + ) + return True, Twc_cur, (obj_pids, kp_ids, inlier_mask) + + +# --------------------------------------------------------------------------- # +# NEW — Keyframe track visualizations +# --------------------------------------------------------------------------- # +def _ensure_bgr(img: np.ndarray) -> np.ndarray: + if img is None: + return None + if img.ndim == 2: + return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + if img.shape[2] == 4: + return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) + return img + +def _put_text_with_outline(img: np.ndarray, text: str, org: Tuple[int, int], + scale: float = 0.7, color=(255, 255, 255), + thickness: int = 2) -> None: + cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, scale, (0, 0, 0), thickness + 2, cv2.LINE_AA) + cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, scale, color, thickness, cv2.LINE_AA) + +def _decode_thumb(thumb_bytes: bytes) -> Optional[np.ndarray]: + """ + Decode lz4-compressed JPEG bytes (Keyframe.thumb) into a BGR image. + """ + if not isinstance(thumb_bytes, (bytes, bytearray, memoryview)): + return None + try: + raw = lz4.frame.decompress(thumb_bytes) # JPEG bytes + arr = np.frombuffer(raw, np.uint8) + img = cv2.imdecode(arr, cv2.IMREAD_COLOR) + return img + except Exception: + return None + +def _collect_obs_for_kf(world_map: Map, kf_idx: int) -> Dict[int, int]: + """Return {pid -> kp_idx} for all points that have an observation in keyframe `kf_idx`.""" + out: Dict[int, int] = {} + for pid, mp in world_map.points.items(): + for fidx, kpi, _ in mp.observations: + if fidx == kf_idx: + out[pid] = kpi + break + return out # observations are stored as (frame_idx, kp_idx, descriptor). :contentReference[oaicite:2]{index=2} + +def _track_length_in_keyframes(world_map: Map, pid: int) -> int: + if pid not in world_map.points: + return 0 + kf_set = {f for (f, _, _) in world_map.points[pid].observations} + return len(kf_set) + +def render_kf_triptych( + kf_triplet: List[Tuple[int, np.ndarray, List[cv2.KeyPoint]]], + world_map: Map, + *, + maturity_thresh: int = 6, + new_ids: Optional[Iterable[int]] = None, + title: Optional[str] = None, +) -> Optional[np.ndarray]: + """ + Render last-3 keyframes side-by-side, overlaying features and cross-KF tracks. + + kf_triplet: [(kf_pose_idx, bgr_image, keypoints), ...] len = 1..3 + """ + if not kf_triplet: + return None + + new_ids_set: Set[int] = set(new_ids) if new_ids is not None else set() + + # Normalise sizes (use first as reference) + H0, W0 = kf_triplet[0][1].shape[:2] + panels = [] + for kf_idx, img, _ in kf_triplet: + img = _ensure_bgr(img) + if img is None: + continue + if img.shape[:2] != (H0, W0): + img = cv2.resize(img, (W0, H0), interpolation=cv2.INTER_AREA) + panels.append((kf_idx, img.copy())) + if not panels: + return None + + # Precompute observations for each KF + obs_by_kf: Dict[int, Dict[int, int]] = {kf: _collect_obs_for_kf(world_map, kf) for kf, _ in panels} + + # Draw points on each panel + for (kf_idx, img), (_, _, kps) in zip(panels, kf_triplet): + obs = obs_by_kf.get(kf_idx, {}) + feat_count = len(obs) + _put_text_with_outline(img, f"KF #{kf_idx} | features: {feat_count}", (10, 24), 0.8, (255, 255, 255), 2) + for pid, kp_idx in obs.items(): + if kp_idx < 0 or kp_idx >= len(kps): # defensive + continue + x, y = map(int, np.round(kps[kp_idx].pt)) + color = (0, 255, 0) if pid in new_ids_set else (220, 220, 220) + r = 4 if pid in new_ids_set else 3 + cv2.circle(img, (x, y), r, color, -1) + + # Build canvas and paste panels + widths = [p[1].shape[1] for p in panels] + height = H0 + canvas = np.zeros((height, sum(widths), 3), dtype=np.uint8) + x0 = 0 + x_offsets = [] + for (_, img) in panels: + w = img.shape[1] + canvas[:, x0:x0+w] = img + x_offsets.append(x0) + x0 += w + + # Draw cross-KF polylines for landmarks that appear in ≥2 of the panels + track_map: Dict[int, List[Tuple[int, Tuple[int, int]]]] = {} + for pidx, (kf_idx, _img) in enumerate(panels): + obs = obs_by_kf.get(kf_idx, {}) + kps = kf_triplet[pidx][2] + for pid, kp_idx in obs.items(): + if kp_idx < 0 or kp_idx >= len(kps): + continue + pt = tuple(map(int, np.round(kps[kp_idx].pt))) + track_map.setdefault(pid, []).append((pidx, pt)) + + for pid, samples in track_map.items(): + if len(samples) < 2: + continue + samples.sort(key=lambda t: t[0]) + pts = [] + for pidx, (x, y) in samples: + pts.append([x + x_offsets[pidx], y]) + pts = np.array(pts, dtype=np.int32).reshape((-1, 1, 2)) + L = _track_length_in_keyframes(world_map, pid) + colour = (0, 0, 255) if L >= maturity_thresh else (0, 255, 0) + cv2.polylines(canvas, [pts], isClosed=False, color=colour, thickness=2, lineType=cv2.LINE_AA) + + if title: + _put_text_with_outline(canvas, title, (10, height - 10), 0.8, (255, 255, 255), 2) + + return canvas + + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) + groundtruth = load_groundtruth(args) + K = calib["K_l"] + P = calib["P_l"] + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 + _ = compute_gt_alignment(gt_T) # retained if you want aligned GT later + + # --- feature pipeline --- + detector, matcher = init_feature_pipeline(args) + + mvt = MultiViewTriangulator( + K, + min_views=3, + merge_radius=args.merge_radius, + max_rep_err=args.mvt_rep_err, + min_depth=args.min_depth, + max_depth=args.max_depth + ) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + initialised = False + + world_map = Map() + Twc_cur_pose = np.eye(4) + world_map.add_pose(Twc_cur_pose, is_keyframe=True) + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + + plot2d = None if args.no_plot2d else TrajectoryPlotter() # Matplotlib plotter window. :contentReference[oaicite:3]{index=3} + + kfs: list[Keyframe] = [] + last_kf_frame_no = -999 + frame_keypoints: List[List[cv2.KeyPoint]] = [] + frame_keypoints.append([]) + + total = len(seq) - 1 + + # For triptych: map[kf_pose_idx] -> (thumb_bgr, kps) + kf_vis: Dict[int, Tuple[np.ndarray, List[cv2.KeyPoint]]] = {} + + new_ids: List[int] = [] # new landmarks added at the most recent KF + + # Prepare a named, resizable window for the triptych (stable) + TRIP_WIN = "Keyframes (last 3) — feature tracks" + if not args.no_triptych: + cv2.namedWindow(TRIP_WIN, cv2.WINDOW_NORMAL) + + for i in tqdm(range(total), desc='Tracking'): + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + # ---------------- Frame-by-frame track overlay ---------------- + frame_no = i + 1 + curr_map, tracks, next_track_id = update_and_prune_tracks( + matches, prev_map, tracks, kp2, frame_no, next_track_id) + prev_map = curr_map + + vis_img = img2.copy() + draw_tracks(vis_img, tracks, frame_no, max_age=15, sample_rate=3, max_tracks=300) + cv2.imshow("Frame tracks (age-coloured)", vis_img) + + # ---------------- Key-frame decision ---------------- + if i == 0: + kfs.append(Keyframe(idx=0, frame_idx=1, + path=seq[0] if isinstance(seq[0], str) else "", + kps=kp1, desc=des1, + pose=Twc_cur_pose, + thumb=make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_frame_no = kfs[-1].frame_idx + is_kf = False + continue + else: + prev_len = len(kfs) + kfs, last_kf_frame_no = select_keyframe( + args, seq, i, img2, kp2, des2, Twc_cur_pose, matcher, kfs, last_kf_frame_no) + is_kf = len(kfs) > prev_len + + if is_kf: + frame_keypoints.append(kp2.copy()) + + # ---------------- Bootstrap ---------------- + if not initialised: + if len(kfs) < 2: + continue + bootstrap_matches = feature_matcher(args, kfs[0].kps, kfs[-1].kps, + kfs[0].desc, kfs[-1].desc, matcher) + bootstrap_matches = filter_matches_ransac(kfs[0].kps, kfs[-1].kps, + bootstrap_matches, args.ransac_thresh) + ok, Twc_temp_pose = try_bootstrap( + K, kfs[0].kps, kfs[0].desc, kfs[-1].kps, kfs[-1].desc, bootstrap_matches, args, world_map) + if ok: + frame_keypoints[0] = kfs[0].kps.copy() + frame_keypoints[-1] = kfs[-1].kps.copy() + initialised = True + Twc_cur_pose = world_map.poses[-1].copy() + continue + else: + print("****************** BOOTSTRAP FAILED **************") + continue + + # ---------------- Tracking (PnP) ---------------- + Twc_pose_prev = Twc_cur_pose.copy() + ok_pnp, Twc_cur_pose, assoc = track_with_pnp( + K, kp1, kp2, des1, des2, matches, + frame_no=i + 1, img2=img2, + Twc_prev=Twc_pose_prev, + world_map=world_map, + args=args + ) + + # Fallback to 2-D-2-D only if PnP failed + if not ok_pnp: + print(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + last_kf = kfs[-1] if not is_kf else (kfs[-2] if len(kfs) > 1 else kfs[0]) + + tracking_matches = feature_matcher(args, last_kf.kps, kp2, last_kf.desc, des2, matcher) + tracking_matches = filter_matches_ransac(last_kf.kps, kp2, tracking_matches, args.ransac_thresh) + + pts0 = np.float32([last_kf.kps[m.queryIdx].pt for m in tracking_matches]) + pts1 = np.float32([kp2[m.trainIdx].pt for m in tracking_matches]) + E, mask = cv2.findEssentialMat(pts0, pts1, K, cv2.RANSAC, 0.999, args.ransac_thresh) + if E is None: + continue + _, R, t, _ = cv2.recoverPose(E, pts0, pts1, K) + T_rel = _pose_rt_to_homogenous(R, t) + Twc_cur_pose = last_kf.pose @ np.linalg.inv(T_rel) + + if is_kf: + world_map.add_pose(Twc_cur_pose, is_keyframe=True) + + # Book-keep PnP inliers as observations on the new KF + if ok_pnp and assoc is not None: + obj_pids, kp_ids, inlier_mask = assoc + kf_idx = len(world_map.poses) - 1 # index of newly added KF + for ok_m, pid, kp_idx in zip(inlier_mask, obj_pids, kp_ids): + if ok_m and pid in world_map.points: + world_map.points[pid].add_observation(kf_idx, kp_idx, des2[kp_idx]) + + # ---------------- Map Growth via Multi-View Triangulation ------------- + if is_kf: + kf_pose_idx = len(world_map.poses) - 1 + + # Build a BGR thumbnail for GUI (avoid bytes-in-GUI issues) + w, h = tuple(args.kf_thumb_hw) # [W, H] by convention in your code. :contentReference[oaicite:4]{index=4} + thumb_bgr = cv2.resize(img2, (w, h), interpolation=cv2.INTER_AREA) + kf_vis[kf_pose_idx] = (thumb_bgr, kp2) + + # (Still store compressed thumb in Keyframe via select_keyframe) + mvt.add_keyframe( + frame_idx=kf_pose_idx, + Twc_pose=Twc_cur_pose, + kps=kp2, + track_map=curr_map, + img_bgr=img2, + descriptors=des2 + ) + new_mvt_ids = mvt.triangulate_ready_tracks(world_map) + new_ids = list(new_mvt_ids) + + # ---------------- Local Bundle Adjustment (optional) ------------------- + if is_kf and (len(kfs) % args.local_ba_window == 0): + center_kf_idx = len(world_map.poses) - 1 + print(f"[BA] Running local BA around key-frame {center_kf_idx} (window size = {args.local_ba_window})") + _ = deepcopy(world_map) + local_bundle_adjustment( + world_map, K, frame_keypoints, + center_kf_idx=center_kf_idx, + window_size=args.local_ba_window) + + # ---------------- 2D path plot (Matplotlib) ------------------- + def _scale_translation(pos3: np.ndarray, scale: float) -> np.ndarray: + """ + Scale a 3D *position* (translation) by 'scale' for plotting. + NOTE: Do NOT multiply the whole 4x4 pose by a scalar—only scale the translation component. + """ + return np.asarray(pos3, dtype=float) * float(scale) + + if plot2d is not None: + est_pos = Twc_cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + # gt_pos = apply_alignment(p_gt, R_align, t_align) + gt_pos = _scale_translation(p_gt, 0.3) + plot2d.append(est_pos, gt_pos, mirror_x=False) + # tiny event loop tick for reliable redraw across backends + import matplotlib.pyplot as _plt + _plt.pause(0.001) # <-- prevents the “small black rectangle” issue in many setups. :contentReference[oaicite:5]{index=5} + + # ---------------- 3D viz (Open3D) ---------------------- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + # ---------------- NEW: "last 3 KFs" triptych ----------- + if is_kf and not args.no_triptych: + last_kf_indices = sorted(kf_vis.keys())[-3:] + triplet = [] + for idx in last_kf_indices: + thumb, kps = kf_vis[idx] + # defensive: if thumb somehow missing, try decoding stored Keyframe.thumb + if thumb is None or thumb.size == 0: + # find KF order to map idx→Keyframe + # (poses are appended in order, so i-th KF has pose index world_map.keyframe_indices[i]) + # but we cached thumbs; this is a fallback for robustness + for kf in kfs: + if kf.pose is not None and idx < len(world_map.poses): + dec = _decode_thumb(kf.thumb) # lz4-compressed JPEG → BGR. :contentReference[oaicite:6]{index=6} + if dec is not None: + w, h = tuple(args.kf_thumb_hw) + thumb = cv2.resize(dec, (w, h), interpolation=cv2.INTER_AREA) + break + triplet.append((idx, thumb, kps)) + trip = render_kf_triptych( + triplet, world_map, + maturity_thresh=args.track_maturity, + new_ids=new_ids, + title=f"New points: green || Mature tracks (>= {args.track_maturity}) : red" + ) + if trip is not None: + # ensure the window is large enough to see everything + H, W = trip.shape[:2] + cv2.resizeWindow(TRIP_WIN, max(960, int(W)), max(360, int(H))) + cv2.imshow(TRIP_WIN, trip) + + # ---------------- UI keys ------------------------------ + key = cv2.waitKey(0) & 0xFF + if key == 27: # ESC + break + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() diff --git a/modules/SimpleSLAM/slam/monocular/main_revamped.py b/modules/SimpleSLAM/slam/monocular/main_revamped.py new file mode 100644 index 0000000000..99a06f2b81 --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main_revamped.py @@ -0,0 +1,474 @@ +# main.py +""" +WITH MULTI-VIEW TRIANGULATION - New PnP + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +from copy import deepcopy +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm +from typing import List + +from slam.core.pose_utils import _pose_inverse, _pose_rt_to_homogenous + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, Trajectory2D, VizUI +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map + +import logging +logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(name)s:%(funcName)s: %(message)s") # INFO for clean summary, DEBUG for deep dive +# logging.getLogger("two_view_bootstrap").setLevel(logging.DEBUG) +# logging.getLogger("pnp").setLevel(logging.DEBUG) +# logging.getLogger("multi_view").setLevel(logging.DEBUG) +# logging.getLogger("triangulation").setLevel(logging.DEBUG) +log = logging.getLogger("main") + + +from slam.core.two_view_bootstrap import ( + InitParams, + pts_from_matches, # used to build point arrays + evaluate_two_view_bootstrap_with_masks, # NEW (returns inlier mask) + bootstrap_two_view_map # NEW (builds KF0+KF1+points) +) + +from slam.core.pnp_utils import ( + predict_pose_const_vel, + reproject_and_match_2d3d, + solve_pnp_ransac, + draw_reprojection_debug +) + +from slam.core.triangulation_utils import ( + triangulate_between_kfs_2view +) + +from slam.core.ba_utils import pose_only_ba, local_bundle_adjustment + + +class BootstrapState: + def __init__(self): + self.has_ref = False + self.kps_ref = None + self.des_ref = None + self.img_ref = None + self.frame_id_ref = -1 + def seed(self, kps, des, img, frame_id): + self.has_ref = True + self.kps_ref, self.des_ref = kps, des + self.img_ref, self.frame_id_ref = img, frame_id + log.info(f"[Init] Seeded reference @frame={frame_id} (kps={len(kps)})") + def clear(self): + log.info("[Init] Clearing reference (bootstrap succeeded).") + self.__init__() + +def _refresh_ref_needed(matches, min_matches=80, max_age=30, cur_id=0, ref_id=0): + too_few = len(matches) < min_matches + too_old = (cur_id - ref_id) > max_age + if too_few: log.info(f"[Init] Refresh ref: few matches ({len(matches)}<{min_matches})") + if too_old: log.info(f"[Init] Refresh ref: age={cur_id-ref_id}>{max_age}") + return too_few or too_old + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=2.5) + + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_min_ratio', type=float, default=0.35, + help='Min inlier ratio (to prev KF kps) before promoting KF') + p.add_argument('--kf_min_rot_deg', type=float, default=8.0, + help='Min rotation (deg) wrt prev KF to trigger KF') + p.add_argument('--kf_cooldown', type=int, default=5) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=0.40) + p.add_argument("--max_depth", type=float, default=float('inf')) + p.add_argument('--mvt_rep_err', type=float, default=2.0, + help='Max mean reprojection error (px) for multi-view triangulation') + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=20) + p.add_argument('--proj_radius', type=float, default=12.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + # Bundle Adjustment + p.add_argument('--local_ba_window', type=int, default=6, help='Window size (number of keyframes) for local BA') + + return p + + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + + + bs = BootstrapState() + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + initialised = False + tracking_lost = False + + # --- World Map Initialization --- + world_map = Map() + Tcw_cur_pose = np.eye(4) # camera-from-world (identity at t=0) + + # --- Visualization --- + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + traj2d = Trajectory2D(gt_T_list=gt_T if groundtruth is not None else None) + ui = VizUI() # p: pause/resume, n: step, q/Esc: quit + + # --- Keyframe Initialization --- + kfs: list[Keyframe] = [] + last_kf_frame_no = -999 + + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + new_ids: list[int] = [] # list of new IDs for showing them in 3D viz + total = len(seq) - 1 + + for i in tqdm(range(total), desc='Tracking'): + + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + + # --------------------------------------------------------------------------- # + # ------------------------ Bootstrap (delayed two-view) ------------------------------------------------ + # --------------------------------------------------------------------------- # + if not initialised: + # 1) Seed a reference frame the first time we get here + if not bs.has_ref: + # use the *earlier* image/features as reference; your loop already has img1/img2, kp1/des1 & kp2/des2 + bs.seed(kp1, des1, img1, frame_id=i) # frame_id = i for img1 + continue + + # 2) Match reference ↔ current using your existing pipeline + matches_bs = feature_matcher(args, bs.kps_ref, kp2, bs.des_ref, des2, matcher) + matches_bs = filter_matches_ransac(bs.kps_ref, kp2, matches_bs, args.ransac_thresh) + log.info(f"[Init] Matches ref→cur: raw={len(matches_bs)} ransac_th={args.ransac_thresh:.2f}px") + + # Optional: refresh reference if matches are weak or ref is stale + if _refresh_ref_needed(matches_bs, min_matches=80, max_age=30, cur_id=i+1, ref_id=bs.frame_id_ref): + bs.seed(kp2, des2, img2, frame_id=i+1) + continue + + # 3) Build point arrays and evaluate the pair + pts_ref, pts_cur = pts_from_matches(bs.kps_ref, kp2, matches_bs) + init_params = InitParams( + ransac_px=float(args.ransac_thresh), + min_posdepth=0.90, + min_parallax_deg=1.5, + score_ratio_H=0.45 + ) + # Decide with masks (no redundancy later) + decision = evaluate_two_view_bootstrap_with_masks(K, pts_ref, pts_cur, init_params) + if decision is None: + log.info("[Init] Pair rejected → waiting for a better one.") + continue + + log.info(f"[Init] Accepted pair via {'H' if decision.pose.model.name=='HOMOGRAPHY' else 'F/E'}: " + f"posdepth={decision.pose.posdepth:.3f}, parallax={decision.pose.parallax_deg:.2f}°") + + # Build the initial map (KF0 = I, KF1 = [R|t]) once + ok, T0_cw, T1_cw = bootstrap_two_view_map( + K, + bs.kps_ref, bs.des_ref, # reference frame kps/desc + kp2, des2, # current frame kps/desc + matches_bs, + args, + world_map, + params=init_params, + decision=decision # pass decision to avoid recomputing/gating + ) + if not ok: + log.warning("[Init] bootstrap_two_view_map() failed — keep searching.") + continue + + # -------------BOOKKEEPING for Map and Keyframes--------------------- + world_map.add_pose(T0_cw, is_keyframe=True) # KF0 + world_map.add_pose(T1_cw, is_keyframe=True) # KF1 + + # --- 2D Trajectory visualization bookkeeping --- + traj2d.push(bs.frame_id_ref, T0_cw) + traj2d.push(i + 1, T1_cw) + + # --- Create the two initial Keyframes to mirror world_map KFs --- + try: + # Be explicit about indices: world_map added KF0 first, then KF1. + # Keep kfs indices aligned (0, 1) to avoid confusion later. + if len(kfs) != 0: + log.warning("[Init] Expected empty kfs at bootstrap, found len=%d. " + "Proceeding but indices may misalign.", len(kfs)) + + kf0_idx = 0 if len(kfs) == 0 else len(kfs) + kf1_idx = kf0_idx + 1 + + # Resolve frame indices and image paths + ref_fidx = bs.frame_id_ref # you seeded with this earlier + cur_fidx = i + 1 # kp2/img2 correspond to i+1 + ref_path = seq[ref_fidx] if isinstance(seq[ref_fidx], str) else "" + cur_path = seq[cur_fidx] if isinstance(seq[cur_fidx], str) else "" + + # Build thumbnails (guard if you disabled UI) + thumb0 = make_thumb(bs.img_ref, tuple(args.kf_thumb_hw)) if 'make_thumb' in globals() else None + thumb1 = make_thumb(img2, tuple(args.kf_thumb_hw)) if 'make_thumb' in globals() else None + + # Create KF0 = Identity pose + kf0 = Keyframe(idx=kf0_idx, frame_idx=ref_fidx, path=ref_path, kps=bs.kps_ref, desc=bs.des_ref, pose=T0_cw, thumb=thumb0) + + # Create KF1 = bootstrap pose + kf1 = Keyframe(idx=kf1_idx, frame_idx=cur_fidx, path=cur_path, kps=kp2, desc=des2, pose=T1_cw, thumb=thumb1) + + kfs.extend([kf0, kf1]) + last_kf_frame_no = cur_fidx # the most recently added keyframe + log.info("[Init] Inserted initial keyframes: KF0(frame=%d, idx=%d) & KF1(frame=%d, idx=%d).", + ref_fidx, kf0_idx, cur_fidx, kf1_idx) + + + # and also seed their mutual matches (helps get first 3-view tracks quickly) + raw01 = feature_matcher(args, kfs[0].kps, kfs[1].kps, kfs[0].desc, kfs[1].desc, matcher) + # --- END --- + + except Exception as e: + log.exception("[Init] Failed to create initial keyframes: %s", e) + + # SET INITIALISED FLAG + initialised = True + print("-----BOOTSTRAPPED SUCCESSFULLY-----") + + if viz3d: + # highlight everything once (all points are new at bootstrap) + viz3d.update(world_map, new_ids=world_map.point_ids()) + + Tcw_cur_pose = T1_cw.copy() # current camera pose (optional) + bs.clear() + continue + # --x------x----------x----------x----------x----x----x-- END -x----------x-----------x----------x---- + + # ------------------------------------------------------------------- # + # --------------------- Frame-to-Map Tracking ----------------------- # + # ------------------------------------------------------------------- # + # When initialised, use constant-velocity prediction, reproject, search, PnP. + if initialised: + # 1) Predict pose with constant velocity using last two poses + if len(world_map.poses) >= 2: + Tcw_prevprev = world_map.poses[-2] + Tcw_prev = world_map.poses[-1] + Tcw_pred = predict_pose_const_vel(Tcw_prevprev, Tcw_prev) + else: + Tcw_pred = Tcw_cur_pose + + # 2) Reproject active points and do small-window matching + H, W = img2.shape[:2] + m23d = reproject_and_match_2d3d( + world_map, K, Tcw_pred, kp2, des2, W, H, + radius_px=float(args.proj_radius), # CLI param + max_hamm=64 + ) + + log.debug(f"[Track] candidates for PnP: {len(m23d.pts3d)}") + + # 3) PnP+RANSAC + if len(m23d.pts3d) >= int(args.pnp_min_inliers): + Tcw_est, inl_mask = solve_pnp_ransac( + m23d.pts3d, m23d.pts2d, K, + ransac_px=float(args.ransac_thresh), + Tcw_init=Tcw_pred, + iters=300, conf=0.999 + ) + ninl = int(inl_mask.sum()) if inl_mask.size else 0 + if Tcw_est is not None and ninl >= int(args.pnp_min_inliers): + Tcw_cur_pose = Tcw_est + world_map.add_pose(Tcw_cur_pose, is_keyframe=False) + + # --- 2D Trajectory visualization bookkeeping --- + traj2d.push(i + 1, Tcw_cur_pose) + + log.info(f"[Track] PnP inliers={ninl} (th={args.ransac_thresh:.1f}px)") + + # Optional: quick visual sanity overlay (comment out if headless) + try: + dbg = draw_reprojection_debug(img2, K, Tcw_cur_pose, m23d, inl_mask) + cv2.imshow("Track debug", dbg) + # if cv2.waitKey(int(1000.0/max(args.fps, 1e-6))) == 27: + # break # TODO REMOVE + except Exception: + pass + + else: + log.warning(f"[Track] PnP failed or too few inliers (got {ninl}). Tracking lost?") + tracking_lost = True + else: + log.warning(f"[Track] Too few 2D–3D matches for PnP ({len(m23d.pts3d)} < {args.pnp_min_inliers}).") + tracking_lost = True + + # (Optional) if tracking_lost: trigger relocalization here in the future. + # --x------x----------x----------x----------x----x----x-- END -x----------x-----------x----------x---- + + + # ------------------------------------------------------------------- # + # --------------------- Keyframe Selection --------------------- # + # ------------------------------------------------------------------- # + prev_len = len(kfs) + kfs, last_kf_frame_no = select_keyframe( + args, seq, i, img2, kp2, des2, Tcw_cur_pose, matcher, kfs, last_kf_frame_no + ) + is_kf = (len(kfs) > prev_len) + # --x------x----------x----------x----------x----x----x-- END -x----------x-----------x----------x---- + + # ------------------------------------------------------------------- # + # --------------------- Map Growth (Triangulation) --------------------- # + # ------------------------------------------------------------------- # + if is_kf and len(kfs) >= 2: + prev_kf = kfs[-2] + curr_kf = kfs[-1] + new_ids = triangulate_between_kfs_2view( + args, K, world_map, prev_kf, curr_kf, matcher, log, + use_parallax_gate=True, parallax_min_deg=2.0, + reproj_px_max=float(args.ransac_thresh) + ) + if new_ids: + log.info("[Map] Triangulated %d new points after KF %d.", len(new_ids), curr_kf.idx) + + # TODO pose only BA should happen on every tracking frame and not here, but well, if it works we don't touch it + # --- BA pass 1: fast pose-only BA on current KF (uses its own obs) --- + try: + pose_only_ba(world_map, K, kfs, kf_idx=curr_kf.idx, max_iters=8, huber_thr=2.0) + except Exception as e: + log.warning(f"[BA] Pose-only BA failed: {e}") + + # --- BA pass 2: local BA on a small KF window --- + try: + local_bundle_adjustment( + world_map, K, kfs, + center_kf_idx=curr_kf.idx, + window_size=int(getattr(args, "local_ba_window", 6)), + max_points=10000, + max_iters=15 + ) + except Exception as e: + log.warning(f"[BA] Local BA failed: {e}") + + if viz3d: + viz3d.update(world_map, new_ids=new_ids) + # --x------x----------x----------x----------x----x----x-- END -x----------x-----------x----------x---- + + + # --------------------------------------MISCELLANEOUS INFO --------------------------------------------------- + # print(f"[traj2d] est={len(traj2d.est_xyz)} gt={len(traj2d.gt_xyz)}") + # print(f"Frame {i+1}/{total} | FPS: {achieved_fps:.1f} | KFs: {len(kfs)} | Map points: {len(world_map.points)}") + # --x------x----------x----------x----------x----x----x-- END -x----------x-----------x----------x---- + + + # ------------------------------------------------------------------- # + # --------------------- Visualization ----------------------- # + # ------------------------------------------------------------------- # + if viz3d: + viz3d.update(world_map, new_ids=new_ids) + # --- draw & UI control (end of iteration) --- + traj2d.draw(paused=ui.paused) + + ui.poll(1) # non-blocking poll + if ui.should_quit(): + break + + # If currently paused: block here until resume or do single-step + if ui.paused: + did_step = ui.wait_if_paused() + # if user pressed 'n', did_step=True -> allow this iteration to exit; + # next iteration will immediately pause again (nice for stepping). + # --x------x----------x----------x----------x----x----x-- END -x----------x-----------x----------x---- + + if viz3d: + viz3d.close() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/stereo/ROUGHstereo_tracker.py b/modules/SimpleSLAM/slam/stereo/ROUGHstereo_tracker.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/stereo/__init__.py b/modules/SimpleSLAM/slam/stereo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/tests/__init__.py b/modules/SimpleSLAM/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/tests/test_ba_utils.py b/modules/SimpleSLAM/tests/test_ba_utils.py new file mode 100644 index 0000000000..a7c97b347c --- /dev/null +++ b/modules/SimpleSLAM/tests/test_ba_utils.py @@ -0,0 +1,258 @@ +""" +Synthetic‐data unit‑tests for *ba_utils.py* +=========================================== + +These tests generate a small 3‑D scene, propagate a pin‑hole camera +through a known motion, add optional noise to the **initial** geometry +and verify that each bundle‑adjustment helper implemented in +*ba_utils.py* + + • two_view_ba + • pose_only_ba + • local_bundle_adjustment + +reduces the mean reprojection RMSE. + +**Pose convention – T_wc (camera→world)** +---------------------------------------- +Your SLAM pipeline stores a pose as the rigid-body transform **from the +camera frame to the world frame**. To project a world point X_w into a +camera at T_wc we therefore use + +``` +X_c = R_wcᵀ · (X_w − t_wc) +``` + +Both the synthetic generator and the RMSE metric below follow that +convention so that the tests are consistent with run-time code. + +Requires +-------- +* OpenCV ≥ 4 (for `cv2.KeyPoint`, `cv2.Rodrigues`) +* `pyceres` + `pycolmap` (same as *ba_utils*) + +Run with *pytest* or plain *unittest*: + +```bash +python -m pytest test_ba_utils_fixed.py # preferred +# – or – +python test_ba_utils_fixed.py # falls back to unittest.main() +``` +""" + +from __future__ import annotations +import math +import unittest +from typing import List, Tuple, Dict + +import cv2 +import numpy as np + +# import the module under test +import slam.core.ba_utils as bau + + +# ------------------------------------------------------------ +# Minimal SLAM‑like data containers understood by ba_utils +# ------------------------------------------------------------ +class MapPoint: + """Light‑weight replacement for *MapPoint*.""" + + def __init__(self, position: np.ndarray): + self.position: np.ndarray = position.astype(np.float64) + # list[(frame_idx, kp_idx)] + self.observations: List[Tuple[int, int]] = [] + + +class WorldMap: + """Holds camera poses (T_cw) and 3‑D points.""" + + def __init__(self): + self.poses: List[np.ndarray] = [] # each 4×4 SE(3) + self.points: Dict[int, MapPoint] = {} # pid → MapPoint + + +# ------------------------------------------------------------ +# Synthetic scene generator +# ------------------------------------------------------------ +WIDTH, HEIGHT = 1280, 960 +FX = FY = 800.0 +CX, CY = WIDTH / 2.0, HEIGHT / 2.0 +K = np.array([[FX, 0, CX], [0, FY, CY], [0, 0, 1]], np.float64) + + +def _yaw_to_R(yaw_deg: float) -> np.ndarray: + """Rotation around *y* axis (right‑handed, degrees → 3×3).""" + theta = math.radians(yaw_deg) + c, s = math.cos(theta), math.sin(theta) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], np.float64) + + +def generate_scene( + n_frames: int, + n_points: int = 50, + dx_per_frame: float = 0.10, + yaw_per_frame_deg: float = 2.0, + pix_noise: float = 1.0, + pose_trans_noise: float = 0.05, + pose_rot_noise_deg: float = 2.0, + point_noise: float = 0.05, + *, + add_noise: bool = True, +): + """Return *(world_map, keypoints)* with optional noisy initial estimates. + + Set *add_noise=False* (or individual *_noise parameters to 0) to + create a perfect initialisation that should converge in a single + BA iteration. Ground‑truth is used only to generate measurements – + the initial geometry fed to BA is perturbed only when noise is + requested. + """ + + # When add_noise=False force all noise parameters to zero + if not add_noise: + pix_noise = pose_trans_noise = pose_rot_noise_deg = point_noise = 0.0 + + rng = np.random.default_rng(42) + + # --- ground‑truth 3‑D points ------------------------------------- + pts_gt = np.column_stack( + ( + rng.uniform(-1.0, 1.0, n_points), # X + rng.uniform(-0.7, 0.7, n_points), # Y + rng.uniform(4.0, 8.0, n_points), # Z + ) + ) + + # --- ground‑truth camera poses (camera-from-world) --------------- + poses_gt: List[np.ndarray] = [] + for i in range(n_frames): + R = _yaw_to_R(i * yaw_per_frame_deg) + t = np.array([i * dx_per_frame, 0.0, 0.0], np.float64) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + poses_gt.append(T) + + # --- create (possibly noisy) *initial* map ----------------------- + wmap = WorldMap() + keypoints: List[List[cv2.KeyPoint]] = [[] for _ in range(n_frames)] + + # a) camera poses -------------------------------------------------- + for T_gt in poses_gt: + # translation noise + t_noise = rng.normal(0.0, pose_trans_noise, 3) + # small random rotation about a random axis + axis = rng.normal(0.0, 1.0, 3) + axis /= np.linalg.norm(axis) + angle = math.radians(pose_rot_noise_deg) * rng.normal() + R_noise, _ = cv2.Rodrigues(axis * angle) + + T_noisy = np.eye(4, dtype=np.float64) + T_noisy[:3, :3] = R_noise @ T_gt[:3, :3] + T_noisy[:3, 3] = T_gt[:3, 3] + t_noise + wmap.poses.append(T_noisy) + + # b) points + observations --------------------------------------- + for pid, X_w in enumerate(pts_gt): + X_init = X_w + rng.normal(0.0, point_noise, 3) + mp = MapPoint(X_init) + wmap.points[pid] = mp + + for f_idx, T_wc in enumerate(poses_gt): + # Project through **ground‑truth** pose to create measurement + R_wc, t_wc = T_wc[:3, :3], T_wc[:3, 3] + X_c = R_wc.T @ (X_w - t_wc) # world → camera frame + + Z = X_c[2] + if Z <= 0: # behind camera + continue + + # Homogeneous pixel coordinates via intrinsics + uv_h = K @ X_c + u = uv_h[0] / Z + v = uv_h[1] / Z + + if not (0.0 <= u < WIDTH and 0.0 <= v < HEIGHT): + continue + + # add pixel noise (measurement noise, not to the *initial* estimate) + u_meas = u + rng.normal(0.0, pix_noise) + v_meas = v + rng.normal(0.0, pix_noise) + + kp = cv2.KeyPoint(float(u_meas), float(v_meas), 1) + kp_idx = len(keypoints[f_idx]) + keypoints[f_idx].append(kp) + mp.observations.append((f_idx, kp_idx)) + + return wmap, keypoints + + +# ------------------------------------------------------------ +# Utility: reprojection RMSE +# ------------------------------------------------------------ + +def reproj_rmse(wmap: WorldMap, keypoints, frames: List[int] | None = None) -> float: + sq_err = 0.0 + count = 0 + frames = set(range(len(keypoints))) if frames is None else set(frames) + + for mp in wmap.points.values(): + for f_idx, kp_idx in mp.observations: + if f_idx not in frames: + continue + kp = keypoints[f_idx][kp_idx] + u_m, v_m = kp.pt + + T = wmap.poses[f_idx] + X = mp.position + R_wc, t_wc = T[:3, :3], T[:3, 3] + X_c = R_wc.T @ (X - t_wc) + Z = X_c[2] + if Z <= 0: + continue + + uv_h = K @ X_c + u_p = uv_h[0] / Z + v_p = uv_h[1] / Z + + sq_err += (u_p - u_m) ** 2 + (v_p - v_m) ** 2 + count += 2 + + return math.sqrt(sq_err / count) if count else 0.0 + + +# ------------------------------------------------------------ +# Unit‑tests +# ------------------------------------------------------------ +class TestBundleAdjustment(unittest.TestCase): + def test_two_view_ba(self): + wmap, kps = generate_scene(n_frames=2, add_noise=False) + e0 = reproj_rmse(wmap, kps) + self.assertAlmostEqual(e0, 0.0, places=6, msg="Initial reprojection error should be zero when add_noise=False") + bau.two_view_ba(wmap, K, kps, max_iters=30) + e1 = reproj_rmse(wmap, kps) + self.assertLessEqual(e1, e0 + 1e-9, msg=f"two_view_ba failed: {e0:.6f} → {e1:.6f}") + + def test_pose_only_ba(self): + wmap, kps = generate_scene(n_frames=3, add_noise=False) + e0 = reproj_rmse(wmap, kps, frames=[2]) + self.assertAlmostEqual(e0, 0.0, places=6) + bau.pose_only_ba(wmap, K, kps, frame_idx=2, max_iters=15) + e1 = reproj_rmse(wmap, kps, frames=[2]) + self.assertLessEqual(e1, e0 + 1e-9, msg=f"pose_only_ba failed: {e0:.6f} → {e1:.6f}") + + def test_local_bundle_adjustment(self): + wmap, kps = generate_scene(n_frames=10, add_noise=False) # use noise to actually test BA + e0 = reproj_rmse(wmap, kps) + bau.local_bundle_adjustment(wmap, K, kps, center_kf_idx=9, window_size=8, max_iters=25) + e1 = reproj_rmse(wmap, kps) + self.assertLess(e1, e0, msg=f"local_bundle_adjustment failed: {e0:.2f} → {e1:.2f}") + + +if __name__ == "__main__": + # Fallback to unittest runner when executed directly + unittest.main(verbosity=2) + +# todo make noise function, to add noise to the points and poses, look at using scipy to convert quaternions to add matrix +# have 0 noise and test if BA converges to 0 diff --git a/modules/SimpleSLAM/tests/test_ba_utils_T_c_w.py b/modules/SimpleSLAM/tests/test_ba_utils_T_c_w.py new file mode 100644 index 0000000000..d4f98de383 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_ba_utils_T_c_w.py @@ -0,0 +1,319 @@ +# """ +# Synthetic‐data unit‑tests for *ba_utils.py* +# =========================================== + +# These tests generate a small 3‑D scene, propagate a pin‑hole camera +# through a known motion, add optional noise to the **initial** geometry +# and verify that each bundle‑adjustment helper implemented in +# *ba_utils.py* + +# • two_view_ba +# • pose_only_ba +# • local_bundle_adjustment + +# reduces the mean reprojection RMSE. + +# **Pose convention – T_cw (camera-from-world)(world→camera)** +# ---------------------------------------- +# Your SLAM pipeline stores a pose as the rigid-body transform **from the +# camera frame to the world frame**. To project a world point X_w into a +# camera at T_wc we therefore use + +# ``` +# X_c = R_wcᵀ · (X_w − t_wc) +# ``` + +# Both the synthetic generator and the RMSE metric below follow that +# convention so that the tests are consistent with run-time code. + +# Requires +# -------- +# * OpenCV ≥ 4 (for `cv2.KeyPoint`, `cv2.Rodrigues`) +# * `pyceres` + `pycolmap` (same as *ba_utils*) + +# Run with *pytest* or plain *unittest*: + +# ```bash +# python -m pytest test_ba_utils_fixed.py # preferred +# # – or – +# python test_ba_utils_fixed.py # falls back to unittest.main() +# ``` +# """ + +from __future__ import annotations +import math +import unittest +from typing import List, Tuple, Dict + +import cv2 +import numpy as np + +# import the module under test +import slam.core.ba_utils as bau + +# TODO: Create Visualization for points and camera poses + +# ------------------------------------------------------------ +# Minimal SLAM‑like data containers understood by ba_utils +# ------------------------------------------------------------ +class MapPoint: + """Light‑weight replacement for *MapPoint*.""" + + def __init__(self, position: np.ndarray): + self.position: np.ndarray = position.astype(np.float64) + # list[(frame_idx, kp_idx)] + self.observations: List[Tuple[int, int, np.ndarray]] = [] + + +class WorldMap: + """Holds camera poses (T_cw) and 3‑D points.""" + + def __init__(self): + self.poses: List[np.ndarray] = [] # each 4×4 SE(3) + self.points: Dict[int, MapPoint] = {} # pid → MapPoint + + +# ------------------------------------------------------------ +# Pose conversion utilities +# ------------------------------------------------------------ +# TODO: Code duplication with slam/core/pose_utils.py, refactor +def T_wc_to_T_cw(T_wc: np.ndarray) -> np.ndarray: + """Convert camera-to-world pose to world-to-camera pose.""" + T_cw = np.eye(4, dtype=np.float64) + R_wc = T_wc[:3, :3] + t_wc = T_wc[:3, 3] + T_cw[:3, :3] = R_wc.T + T_cw[:3, 3] = -R_wc.T @ t_wc + return T_cw + + +def T_cw_to_T_wc(T_cw: np.ndarray) -> np.ndarray: + """Convert world-to-camera pose to camera-to-world pose.""" + T_wc = np.eye(4, dtype=np.float64) + R_cw = T_cw[:3, :3] + t_cw = T_cw[:3, 3] + T_wc[:3, :3] = R_cw.T + T_wc[:3, 3] = -R_cw.T @ t_cw + return T_wc + + +# ------------------------------------------------------------ +# Synthetic scene generator +# ------------------------------------------------------------ +WIDTH, HEIGHT = 1280, 960 +FX = FY = 800.0 +CX, CY = WIDTH / 2.0, HEIGHT / 2.0 +K = np.array([[FX, 0, CX], [0, FY, CY], [0, 0, 1]], np.float64) + + +def _yaw_to_R(yaw_deg: float) -> np.ndarray: + """Rotation around *y* axis (right‑handed, degrees → 3×3).""" + theta = math.radians(yaw_deg) + c, s = math.cos(theta), math.sin(theta) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], np.float64) + + +def generate_scene( + n_frames: int, + n_points: int = 50, + dx_per_frame: float = 0.10, + yaw_per_frame_deg: float = 2.0, + pix_noise: float = 5.0, + pose_trans_noise: float = 0.5, + pose_rot_noise_deg: float = 15.0, + point_noise: float = 0.05, + *, + add_noise: bool = True, +): + """Return *(world_map, keypoints)* with optional noisy initial estimates. + + Set *add_noise=False* (or individual *_noise parameters to 0) to + create a perfect initialisation that should converge in a single + BA iteration. Ground‑truth is used only to generate measurements – + the initial geometry fed to BA is perturbed only when noise is + requested. + + NOTE: This function generates T_wc poses but converts them to T_cw + for storage in world_map to match ba_utils expectations. + """ + + # When add_noise=False force all noise parameters to zero + if not add_noise: + pix_noise = pose_trans_noise = pose_rot_noise_deg = point_noise = 0.0 + + rng = np.random.default_rng(42) + + # --- ground‑truth 3‑D points ------------------------------------- + pts_gt = np.column_stack( + ( + rng.uniform(-1.0, 1.0, n_points), # X + rng.uniform(-0.7, 0.7, n_points), # Y + rng.uniform(4.0, 8.0, n_points), # Z + ) + ) + + # --- ground‑truth camera poses (camera-to-world T_wc for projection) --- + poses_gt_wc: List[np.ndarray] = [] + for i in range(n_frames): + R = _yaw_to_R(i * yaw_per_frame_deg) + t = np.array([i * dx_per_frame, 0.0, 0.0], np.float64) + T_wc = np.eye(4, dtype=np.float64) + T_wc[:3, :3] = R + T_wc[:3, 3] = t + poses_gt_wc.append(T_wc) + + # --- create (possibly noisy) *initial* map ----------------------- + wmap = WorldMap() + keypoints: List[List[cv2.KeyPoint]] = [[] for _ in range(n_frames)] + + # a) camera poses -------------------------------------------------- + for T_wc_gt in poses_gt_wc: + # Apply noise to T_wc + t_noise = rng.normal(0.0, pose_trans_noise, 3) + axis = rng.normal(0.0, 1.0, 3) + axis /= np.linalg.norm(axis) + angle = math.radians(pose_rot_noise_deg) * rng.normal() + R_noise, _ = cv2.Rodrigues(axis * angle) + + T_wc_noisy = np.eye(4, dtype=np.float64) + T_wc_noisy[:3, :3] = R_noise @ T_wc_gt[:3, :3] + T_wc_noisy[:3, 3] = T_wc_gt[:3, 3] + t_noise + + # Convert T_wc to T_cw for storage (ba_utils expects T_cw) + T_cw_noisy = T_wc_to_T_cw(T_wc_noisy) + wmap.poses.append(T_cw_noisy) + + # b) points + observations --------------------------------------- + for pid, X_w in enumerate(pts_gt): + X_init = X_w + rng.normal(0.0, point_noise, 3) + mp = MapPoint(X_init) + wmap.points[pid] = mp + + for f_idx, T_wc in enumerate(poses_gt_wc): + # Project through **ground‑truth** T_wc pose to create measurement + R_wc, t_wc = T_wc[:3, :3], T_wc[:3, 3] + X_c = R_wc.T @ (X_w - t_wc) # world → camera frame + + Z = X_c[2] + if Z <= 0: # behind camera + continue + + # Homogeneous pixel coordinates via intrinsics + uv_h = K @ X_c + u = uv_h[0] / Z + v = uv_h[1] / Z + + if not (0.0 <= u < WIDTH and 0.0 <= v < HEIGHT): + continue + + # add pixel noise (measurement noise, not to the *initial* estimate) + u_meas = u + rng.normal(0.0, pix_noise) + v_meas = v + rng.normal(0.0, pix_noise) + + kp = cv2.KeyPoint(float(u_meas), float(v_meas), 1) + kp_idx = len(keypoints[f_idx]) + keypoints[f_idx].append(kp) + mp.observations.append((f_idx, kp_idx, np.random.rand(128))) # dummy descriptor + + return wmap, keypoints + + +# ------------------------------------------------------------ +# Utility: reprojection RMSE +# ------------------------------------------------------------ + +def reproj_rmse(wmap: WorldMap, keypoints, frames: List[int] | None = None) -> float: + """ + Compute reprojection RMSE. + wmap.poses are assumed to be T_cw (world-to-camera) as expected by ba_utils. + """ + sq_err = 0.0 + count = 0 + frames = set(range(len(keypoints))) if frames is None else set(frames) + + for mp in wmap.points.values(): + for f_idx, kp_idx, descriptor in mp.observations: + if f_idx not in frames: + continue + kp = keypoints[f_idx][kp_idx] + u_m, v_m = kp.pt + + # wmap.poses[f_idx] is T_cw (world-to-camera) + T_cw = wmap.poses[f_idx] + X_w = mp.position + R_cw, t_cw = T_cw[:3, :3], T_cw[:3, 3] + X_c = R_cw @ X_w + t_cw # world → camera using T_cw + + Z = X_c[2] + if Z <= 0: + continue + + uv_h = K @ X_c + u_p = uv_h[0] / Z + v_p = uv_h[1] / Z + + sq_err += (u_p - u_m) ** 2 + (v_p - v_m) ** 2 + count += 2 + + return math.sqrt(sq_err / count) if count else 0.0 + + +# ------------------------------------------------------------ +# Unit‑tests +# ------------------------------------------------------------ +class TestBundleAdjustment(unittest.TestCase): + def test_two_view_ba(self): + wmap, kps = generate_scene(n_frames=2, add_noise=False) + e0 = reproj_rmse(wmap, kps) + self.assertAlmostEqual(e0, 0.0, places=3, msg="Initial reprojection error should be zero when add_noise=False") + bau.two_view_ba(wmap, K, kps, max_iters=30) + e1 = reproj_rmse(wmap, kps) + print(f"two_view_ba: e0={e0:.6f}, e1={e1:.6f}") + self.assertLessEqual(e1, e0 + 1e-9, msg=f"two_view_ba failed: {e0:.6f} → {e1:.6f}") + + def test_two_view_ba_with_noise(self): + wmap, kps = generate_scene(n_frames=2, add_noise=True) + e0 = reproj_rmse(wmap, kps) + bau.two_view_ba(wmap, K, kps, max_iters=30) + e1 = reproj_rmse(wmap, kps) + print(f"two_view_ba with noise: e0={e0:.6f}, e1={e1:.6f}") + self.assertLess(e1, e0, msg=f"two_view_ba failed to reduce error: {e0:.6f} → {e1:.6f}") + + def test_pose_only_ba(self): + wmap, kps = generate_scene(n_frames=3, add_noise=False) + e0 = reproj_rmse(wmap, kps, frames=[2]) + self.assertAlmostEqual(e0, 0.0, places=3) + bau.pose_only_ba(wmap, K, kps, frame_idx=2, max_iters=15) + e1 = reproj_rmse(wmap, kps, frames=[2]) + print(f"pose_only_ba: e0={e0:.6f}, e1={e1:.6f}") + self.assertLessEqual(e1, e0 + 1e-9, msg=f"pose_only_ba failed: {e0:.6f} → {e1:.6f}") + + def test_pose_only_ba_with_noise(self): + wmap, kps = generate_scene(n_frames=3, add_noise=True) + e0 = reproj_rmse(wmap, kps, frames=[2]) + bau.pose_only_ba(wmap, K, kps, frame_idx=2, max_iters=15) + e1 = reproj_rmse(wmap, kps, frames=[2]) + print(f"pose_only_ba with noise: e0={e0:.6f}, e1={e1:.6f}") + self.assertLess(e1, e0, msg=f"pose_only_ba failed to reduce error: {e0:.6f} → {e1:.6f}") + + def test_local_bundle_adjustment(self): + wmap, kps = generate_scene(n_frames=10, add_noise=False) + e0 = reproj_rmse(wmap, kps) + self.assertAlmostEqual(e0, 0.0, places=3, msg="Initial reprojection error should be zero when add_noise=False") + bau.local_bundle_adjustment(wmap, K, kps, center_kf_idx=9, window_size=8, max_iters=25) + e1 = reproj_rmse(wmap, kps) + print(f"local_bundle_adjustment: e0={e0:.6f}, e1={e1:.6f}") + self.assertLessEqual(e1, e0 + 1e-9, msg=f"local_bundle_adjustment failed: {e0:.6f} → {e1:.6f}") + + def test_local_bundle_adjustment_with_noise(self): + wmap, kps = generate_scene(n_frames=10, add_noise=True) + e0 = reproj_rmse(wmap, kps) + bau.local_bundle_adjustment(wmap, K, kps, center_kf_idx=9, window_size=8, max_iters=25) + e1 = reproj_rmse(wmap, kps) + print(f"local_bundle_adjustment with noise: e0={e0:.6f}, e1={e1:.6f}") + self.assertLess(e1, e0, msg=f"local_bundle_adjustment failed to reduce error: {e0:.2f} → {e1:.2f}") + + +if __name__ == "__main__": + # Fallback to unittest runner when executed directly + unittest.main(verbosity=2) \ No newline at end of file diff --git a/modules/SimpleSLAM/tests/test_ba_utils_T_w_c.py b/modules/SimpleSLAM/tests/test_ba_utils_T_w_c.py new file mode 100644 index 0000000000..8096e5d196 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_ba_utils_T_w_c.py @@ -0,0 +1,319 @@ +# """ +# Synthetic‐data unit‑tests for *ba_utils.py* +# =========================================== + +# These tests generate a small 3‑D scene, propagate a pin‑hole camera +# through a known motion, add optional noise to the **initial** geometry +# and verify that each bundle‑adjustment helper implemented in +# *ba_utils.py* + +# • two_view_ba +# • pose_only_ba +# • local_bundle_adjustment + +# reduces the mean reprojection RMSE. + +# **Pose convention – T_wc (world-from-camera)(camera→world)** +# ---------------------------------------- +# Your SLAM pipeline stores a pose as the rigid-body transform **from the +# camera frame to the world frame**. To project a world point X_w into a +# camera at T_wc we therefore use + +# ``` +# X_c = R_wcᵀ · (X_w − t_wc) +# ``` + +# Both the synthetic generator and the RMSE metric below follow that +# convention so that the tests are consistent with run-time code. + +# Requires +# -------- +# * OpenCV ≥ 4 (for `cv2.KeyPoint`, `cv2.Rodrigues`) +# * `pyceres` + `pycolmap` (same as *ba_utils*) + +# Run with *pytest* or plain *unittest*: + +# ```bash +# python -m pytest test_ba_utils_fixed.py # preferred +# # – or – +# python test_ba_utils_fixed.py # falls back to unittest.main() +# ``` +# """ + +from __future__ import annotations +import math +import unittest +from typing import List, Tuple, Dict + +import cv2 +import numpy as np + +# import the module under test +import slam.core.ba_utils as bau + +# TODO: Create Visualization for points and camera poses + +# ------------------------------------------------------------ +# Minimal SLAM‑like data containers understood by ba_utils +# ------------------------------------------------------------ +class MapPoint: + """Light‑weight replacement for *MapPoint*.""" + + def __init__(self, position: np.ndarray): + self.position: np.ndarray = position.astype(np.float64) + # list[(frame_idx, kp_idx)] + self.observations: List[Tuple[int, int]] = [] + + +class WorldMap: + """Holds camera poses (T_wc) and 3-D points.""" + + def __init__(self): + self.poses: List[np.ndarray] = [] # each 4×4 SE(3) + self.points: Dict[int, MapPoint] = {} # pid → MapPoint + + +# ------------------------------------------------------------ +# Pose conversion utilities +# ------------------------------------------------------------ +def T_wc_to_T_cw(T_wc: np.ndarray) -> np.ndarray: + """Convert camera-to-world pose to world-to-camera pose.""" + T_cw = np.eye(4, dtype=np.float64) + R_wc = T_wc[:3, :3] + t_wc = T_wc[:3, 3] + T_cw[:3, :3] = R_wc.T + T_cw[:3, 3] = -R_wc.T @ t_wc + return T_cw + + +def T_cw_to_T_wc(T_cw: np.ndarray) -> np.ndarray: + """Convert world-to-camera pose to camera-to-world pose.""" + T_wc = np.eye(4, dtype=np.float64) + R_cw = T_cw[:3, :3] + t_cw = T_cw[:3, 3] + T_wc[:3, :3] = R_cw.T + T_wc[:3, 3] = -R_cw.T @ t_cw + return T_wc + + +# ------------------------------------------------------------ +# Synthetic scene generator +# ------------------------------------------------------------ +WIDTH, HEIGHT = 1280, 960 +FX = FY = 800.0 +CX, CY = WIDTH / 2.0, HEIGHT / 2.0 +K = np.array([[FX, 0, CX], [0, FY, CY], [0, 0, 1]], np.float64) + + +def _yaw_to_R(yaw_deg: float) -> np.ndarray: + """Rotation around *y* axis (right‑handed, degrees → 3×3).""" + theta = math.radians(yaw_deg) + c, s = math.cos(theta), math.sin(theta) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], np.float64) + + +def generate_scene( + n_frames: int, + n_points: int = 50, + dx_per_frame: float = 0.10, + yaw_per_frame_deg: float = 2.0, + pix_noise: float = 5.0, + pose_trans_noise: float = 0.5, + pose_rot_noise_deg: float = 15.0, + point_noise: float = 0.05, + *, + add_noise: bool = True, +): + """Return *(world_map, keypoints)* with optional noisy initial estimates. + + Set *add_noise=False* (or individual *_noise parameters to 0) to + create a perfect initialisation that should converge in a single + BA iteration. Ground‑truth is used only to generate measurements – + the initial geometry fed to BA is perturbed only when noise is + requested. + + NOTE: This function generates T_wc poses but converts them to T_cw + for storage in world_map to match ba_utils expectations. + """ + + # When add_noise=False force all noise parameters to zero + if not add_noise: + pix_noise = pose_trans_noise = pose_rot_noise_deg = point_noise = 0.0 + + rng = np.random.default_rng(42) + + # --- ground‑truth 3‑D points ------------------------------------- + pts_gt = np.column_stack( + ( + rng.uniform(-1.0, 1.0, n_points), # X + rng.uniform(-0.7, 0.7, n_points), # Y + rng.uniform(4.0, 8.0, n_points), # Z + ) + ) + + # --- ground‑truth camera poses (camera-to-world T_wc for projection) --- + poses_gt_wc: List[np.ndarray] = [] + for i in range(n_frames): + R = _yaw_to_R(i * yaw_per_frame_deg) + t = np.array([i * dx_per_frame, 0.0, 0.0], np.float64) + T_wc = np.eye(4, dtype=np.float64) + T_wc[:3, :3] = R + T_wc[:3, 3] = t + poses_gt_wc.append(T_wc) + + # --- create (possibly noisy) *initial* map ----------------------- + wmap = WorldMap() + keypoints: List[List[cv2.KeyPoint]] = [[] for _ in range(n_frames)] + + # a) camera poses -------------------------------------------------- + for T_wc_gt in poses_gt_wc: + # Apply noise to T_wc + t_noise = rng.normal(0.0, pose_trans_noise, 3) + axis = rng.normal(0.0, 1.0, 3) + axis /= np.linalg.norm(axis) + angle = math.radians(pose_rot_noise_deg) * rng.normal() + R_noise, _ = cv2.Rodrigues(axis * angle) + + T_wc_noisy = np.eye(4, dtype=np.float64) + T_wc_noisy[:3, :3] = R_noise @ T_wc_gt[:3, :3] + T_wc_noisy[:3, 3] = T_wc_gt[:3, 3] + t_noise + + wmap.poses.append(T_wc_noisy) # store T_wc + # Convert T_wc to T_cw for storage (ba_utils expects T_cw) + # T_cw_noisy = T_wc_to_T_cw(T_wc_noisy) + # wmap.poses.append(T_cw_noisy) + + # b) points + observations --------------------------------------- + for pid, X_w in enumerate(pts_gt): + X_init = X_w + rng.normal(0.0, point_noise, 3) + mp = MapPoint(X_init) + wmap.points[pid] = mp + + for f_idx, T_wc in enumerate(poses_gt_wc): + # Project through **ground‑truth** T_wc pose to create measurement + R_wc, t_wc = T_wc[:3, :3], T_wc[:3, 3] + X_c = R_wc.T @ (X_w - t_wc) # world → camera frame + + Z = X_c[2] + if Z <= 0: # behind camera + continue + + # Homogeneous pixel coordinates via intrinsics + uv_h = K @ X_c + u = uv_h[0] / Z + v = uv_h[1] / Z + + if not (0.0 <= u < WIDTH and 0.0 <= v < HEIGHT): + continue + + # add pixel noise (measurement noise, not to the *initial* estimate) + u_meas = u + rng.normal(0.0, pix_noise) + v_meas = v + rng.normal(0.0, pix_noise) + + kp = cv2.KeyPoint(float(u_meas), float(v_meas), 1) + kp_idx = len(keypoints[f_idx]) + keypoints[f_idx].append(kp) + mp.observations.append((f_idx, kp_idx, np.random.rand(128))) + + return wmap, keypoints + + +# ------------------------------------------------------------ +# Utility: reprojection RMSE +# ------------------------------------------------------------ + +def reproj_rmse(wmap: WorldMap, keypoints, frames: List[int] | None = None) -> float: + """ + Compute reprojection RMSE. + wmap.poses are assumed to be T_wc (camera-to-world) as expected by ba_utils. + """ + sq_err = 0.0 + count = 0 + frames = set(range(len(keypoints))) if frames is None else set(frames) + + for mp in wmap.points.values(): + for f_idx, kp_idx, descriptors in mp.observations: + if f_idx not in frames: + continue + kp = keypoints[f_idx][kp_idx] + u_m, v_m = kp.pt + + # wmap.poses[f_idx] is T_wc (camera-to-world) + T_wc = wmap.poses[f_idx] + X_w = mp.position + R_wc, t_wc = T_wc[:3, :3], T_wc[:3, 3] + X_c = R_wc.T @ (X_w - t_wc) # using T_wc + + Z = X_c[2] + if Z <= 0: + continue + + uv_h = K @ X_c + u_p = uv_h[0] / Z + v_p = uv_h[1] / Z + + sq_err += (u_p - u_m) ** 2 + (v_p - v_m) ** 2 + count += 2 + + return math.sqrt(sq_err / count) if count else 0.0 + + +# ------------------------------------------------------------ +# Unit‑tests +# ------------------------------------------------------------ +class TestBundleAdjustment(unittest.TestCase): + def test_two_view_ba(self): + wmap, kps = generate_scene(n_frames=2, add_noise=False) + e0 = reproj_rmse(wmap, kps) + self.assertAlmostEqual(e0, 0.0, places=3, msg="Initial reprojection error should be zero when add_noise=False") + bau.two_view_ba(wmap, K, kps, max_iters=30) + e1 = reproj_rmse(wmap, kps) + print(f"two_view_ba: e0={e0:.6f}, e1={e1:.6f}") + self.assertLessEqual(e1, e0 + 1e-9, msg=f"two_view_ba failed: {e0:.6f} → {e1:.6f}") + + def test_two_view_ba_with_noise(self): + wmap, kps = generate_scene(n_frames=2, add_noise=True) + e0 = reproj_rmse(wmap, kps) + bau.two_view_ba(wmap, K, kps, max_iters=30) + e1 = reproj_rmse(wmap, kps) + print(f"two_view_ba with noise: e0={e0:.6f}, e1={e1:.6f}") + self.assertLess(e1, e0, msg=f"two_view_ba failed to reduce error: {e0:.6f} → {e1:.6f}") + + def test_pose_only_ba(self): + wmap, kps = generate_scene(n_frames=3, add_noise=False) + e0 = reproj_rmse(wmap, kps, frames=[2]) + self.assertAlmostEqual(e0, 0.0, places=3) + bau.pose_only_ba(wmap, K, kps, frame_idx=2, max_iters=15) + e1 = reproj_rmse(wmap, kps, frames=[2]) + print(f"pose_only_ba: e0={e0:.6f}, e1={e1:.6f}") + self.assertLessEqual(e1, e0 + 1e-9, msg=f"pose_only_ba failed: {e0:.6f} → {e1:.6f}") + + def test_pose_only_ba_with_noise(self): + wmap, kps = generate_scene(n_frames=3, add_noise=True) + e0 = reproj_rmse(wmap, kps, frames=[2]) + bau.pose_only_ba(wmap, K, kps, frame_idx=2, max_iters=15) + e1 = reproj_rmse(wmap, kps, frames=[2]) + print(f"pose_only_ba with noise: e0={e0:.6f}, e1={e1:.6f}") + self.assertLess(e1, e0, msg=f"pose_only_ba failed to reduce error: {e0:.6f} → {e1:.6f}") + + def test_local_bundle_adjustment(self): + wmap, kps = generate_scene(n_frames=10, add_noise=False) + e0 = reproj_rmse(wmap, kps) + self.assertAlmostEqual(e0, 0.0, places=3, msg="Initial reprojection error should be zero when add_noise=False") + bau.local_bundle_adjustment(wmap, K, kps, center_kf_idx=9, window_size=8, max_iters=25) + e1 = reproj_rmse(wmap, kps) + print(f"local_bundle_adjustment: e0={e0:.6f}, e1={e1:.6f}") + self.assertLessEqual(e1, e0 + 1e-9, msg=f"local_bundle_adjustment failed: {e0:.6f} → {e1:.6f}") + + def test_local_bundle_adjustment_with_noise(self): + wmap, kps = generate_scene(n_frames=10, add_noise=True) + e0 = reproj_rmse(wmap, kps) + bau.local_bundle_adjustment(wmap, K, kps, center_kf_idx=9, window_size=8, max_iters=25) + e1 = reproj_rmse(wmap, kps) + print(f"local_bundle_adjustment with noise: e0={e0:.6f}, e1={e1:.6f}") + self.assertLess(e1, e0, msg=f"local_bundle_adjustment failed to reduce error: {e0:.2f} → {e1:.2f}") + + +if __name__ == "__main__": + # Fallback to unittest runner when executed directly + unittest.main(verbosity=2) \ No newline at end of file diff --git a/modules/SimpleSLAM/tests/test_landmark_utils.py b/modules/SimpleSLAM/tests/test_landmark_utils.py new file mode 100644 index 0000000000..611b051b61 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_landmark_utils.py @@ -0,0 +1,142 @@ +# tests/test_landmark_utils.py +""" +Unit-tests for landmark_utils.py. + +We assume: + * pytest, numpy and scipy are available. + * landmark_utils.py sits on the Python path. +""" +from __future__ import annotations + +import sys +import types +import numpy as np +import pytest + +import slam.core.landmark_utils as lu + +# --------------------------------------------------------------------------- # +# Helpers / fixtures +# --------------------------------------------------------------------------- # +@pytest.fixture +def sample_points() -> np.ndarray: + """A simple, well-conditioned 3-D point cloud (float64).""" + return np.array( + [ + [0.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0], + ], + dtype=np.float64, + ) + + +@pytest.fixture +def map_with_points(sample_points) -> lu.Map: + """A map pre-populated with 4 landmarks.""" + m = lu.Map() + m.add_points(sample_points) + return m + + +# --------------------------------------------------------------------------- # +# MapPoint +# --------------------------------------------------------------------------- # +def test_mappoint_add_observation(): + mp = lu.MapPoint(id=0, position=np.zeros(3)) + + # --- new: fabricate two dummy 32-byte descriptors -------------------- # + desc1 = np.arange(32, dtype=np.uint8) + desc2 = np.arange(32, dtype=np.uint8)[::-1] + + mp.add_observation(frame_idx=5, kp_idx=17, descriptor=desc1) + mp.add_observation(frame_idx=6, kp_idx=3, descriptor=desc2) + + # (frame_idx, kp_idx) bookkeeping should still be correct + assert [(f, k) for f, k, _ in mp.observations] == [(5, 17), (6, 3)] + + # descriptors must be stored verbatim (and by value, not reference) + np.testing.assert_array_equal(mp.observations[0][2], desc1) + np.testing.assert_array_equal(mp.observations[1][2], desc2) + +# --------------------------------------------------------------------------- # +# Map-level functionality +# --------------------------------------------------------------------------- # +def test_add_points_and_accessors(sample_points): + m = lu.Map() + + ids = m.add_points(sample_points) # default colours + assert ids == list(range(4)), "Returned ids should be consecutive from 0" + assert len(m) == 4, "__len__ should reflect number of landmarks" + + # geometry + np.testing.assert_array_equal(m.get_point_array(), sample_points) + + # default colours should be all-ones, float32 + cols = m.get_color_array() + assert cols.dtype == np.float32 + np.testing.assert_array_equal(cols, np.ones_like(sample_points, dtype=np.float32)) + + # explicit colours + custom_cols = np.random.rand(4, 3).astype(np.float32) + m2 = lu.Map() + m2.add_points(sample_points, colours=custom_cols) + np.testing.assert_array_equal(m2.get_color_array(), custom_cols) + + +def test_add_points_invalid_shape(): + m = lu.Map() + bad = np.zeros((5, 2)) + with pytest.raises(ValueError): + m.add_points(bad) + + +def test_add_pose_and_bad_shape(): + m = lu.Map() + good_pose = np.eye(4) + m.add_pose(good_pose) + assert len(m.poses) == 1 + np.testing.assert_array_equal(m.poses[0], good_pose) + + bad_pose = np.eye(3) + with pytest.raises(AssertionError): + m.add_pose(bad_pose) + + +def test_point_ids_and_length(sample_points): + m = lu.Map() + m.add_points(sample_points) + assert m.point_ids() == [0, 1, 2, 3] + assert len(m) == 4 + + +# --------------------------------------------------------------------------- # +# Duplicate-landmark fusion +# --------------------------------------------------------------------------- # +def test_fuse_closeby_duplicate_landmarks_merging(): + m = lu.Map() + pts = np.array( + [ + [0.00, 0.00, 0.00], + [0.02, 0.00, 0.00], # within 5 cm of the first ⇒ should merge + [1.00, 0.00, 0.00], + ] + ) + m.add_points(pts) + + m.fuse_closeby_duplicate_landmarks(radius=0.05) + + assert len(m) == 2, "Two nearby points should have merged into one" + + # the surviving landmark at id=0 should now sit at their mean position + np.testing.assert_allclose( + m.points[0].position, np.array([0.01, 0.0, 0.0]), atol=1e-6 + ) + + +def test_fuse_closeby_duplicate_landmarks_no_merging(sample_points): + m = lu.Map() + m.add_points(sample_points) + m.fuse_closeby_duplicate_landmarks(radius=0.01) # radius too small + assert len(m) == 4, "No points should merge when radius is tiny" diff --git a/modules/SimpleSLAM/tests/test_lightglue_vs_manual.py b/modules/SimpleSLAM/tests/test_lightglue_vs_manual.py new file mode 100644 index 0000000000..2e27b1a3d1 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_lightglue_vs_manual.py @@ -0,0 +1,74 @@ +import pytest +import cv2 +import numpy as np +import torch +from types import SimpleNamespace + +# adjust this import to match your project layout +from slam.core.features_utils import ( + init_feature_pipeline, + _lightglue_detect_and_match, + feature_extractor, + feature_matcher, +) + +@pytest.fixture(scope="module") +def synthetic_pair(): + """ + Create two simple 200×200 BGR images, each with four white dots. + The second is a small translation of the first. + """ + img1 = np.zeros((200,200,3), dtype=np.uint8) + img2 = np.zeros((200,200,3), dtype=np.uint8) + pts = [(50,50), (150,50), (50,150), (150,150)] + for x,y in pts: + cv2.circle(img1, (x,y), 5, (255,255,255), -1) + cv2.circle(img2, (x+5,y+3), 5, (255,255,255), -1) + return img1, img2 + +def test_lightglue_pipeline_matches_manual(synthetic_pair): + img1, img2 = synthetic_pair + + # build a fake args object + args = SimpleNamespace(use_lightglue=True, + detector=None, # not used for LG + matcher=None, + min_conf=0.0) + + # 1) initialize LightGlue extractor & matcher + extractor, matcher = init_feature_pipeline(args) + + # 2) run the “direct” LG detect+match + kp1_dir, kp2_dir, desc1_dir, desc2_dir, matches_dir = ( + _lightglue_detect_and_match(img1, img2, extractor, matcher) + ) + + # 3) run our new split API + kp1_man, desc1_man = feature_extractor(args, img1, extractor) + kp2_man, desc2_man = feature_extractor(args, img2, extractor) + matches_man = feature_matcher(args, + kp1_man, kp2_man, + desc1_man, desc2_man, + matcher) + + # --- Assertions --- + + # a) same number of keypoints & identical positions + assert len(kp1_dir) == len(kp1_man) + for kd, km in zip(kp1_dir, kp1_man): + assert kd.pt == pytest.approx(km.pt) + + assert len(kp2_dir) == len(kp2_man) + for kd, km in zip(kp2_dir, kp2_man): + assert kd.pt == pytest.approx(km.pt) + + # b) descriptors are identical tensors + # (direct returns torch.Tensor, manual too) + assert torch.allclose(desc1_dir, desc1_man) + assert torch.allclose(desc2_dir, desc2_man) + + # c) same matches (queryIdx/trainIdx) + assert len(matches_dir) == len(matches_man) + for mdir, mman in zip(matches_dir, matches_man): + assert mdir.queryIdx == mman.queryIdx + assert mdir.trainIdx == mman.trainIdx diff --git a/modules/SimpleSLAM/tests/test_multi_view_triangulation-minimal.py b/modules/SimpleSLAM/tests/test_multi_view_triangulation-minimal.py new file mode 100644 index 0000000000..dce74396b9 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_multi_view_triangulation-minimal.py @@ -0,0 +1,73 @@ +import numpy as np +from slam.core.triangulation_utils import multi_view_triangulation + +def build_camera(pose_w_c, K): + """Return 3×4 projection matrix from camera→world pose.""" + return K @ np.linalg.inv(pose_w_c)[:3, :4] + +def random_pose(tx=0.0, ty=0.0, tz=0.0): + """Simple axis-aligned translation pose_w_c.""" + T = np.eye(4) + T[:3, 3] = [tx, ty, tz] + return T + +def test_triangulation_noise_free(): + # ---------- synthetic scene -------------------------------------- + fx = fy = 500.0 + cx = cy = 320.0 + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]]) + + # camera 0 at origin, camera 1 translated 1 m on +X, camera 2 on +Y + poses = [random_pose(0, 0, 0), + random_pose(1, 0, 0), + random_pose(0, 1, 0)] + + # ground-truth world point (in front of all cameras) + X_w_gt = np.array([2.0, 1.5, 8.0]) + + # synthetic pixel observations + pts2d = [] + for T in poses: + pc = (np.linalg.inv(T) @ np.append(X_w_gt, 1))[:3] + uv = (K @ pc)[:2] / pc[2] + pts2d.append(uv) + + # ---------- call the function ------------------------------------ + X_w_est = multi_view_triangulation( + K, poses, np.float32(pts2d), + min_depth=0.5, max_depth=50.0, max_rep_err=0.5) + + assert X_w_est is not None, "Triangulation unexpectedly failed" + # sub-millimetre accuracy in noise-free synthetic setting + assert np.allclose(X_w_est, X_w_gt, atol=1e-3) + +def test_triangulation_with_pixel_noise(): + np.random.seed(42) + fx = fy = 600.0 + cx = cy = 320.0 + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]]) + + poses = [random_pose(0, 0, 0), + random_pose(1, 0.2, 0), + random_pose(-0.3, 1, 0), + random_pose(0.5, -0.1, 0.3)] + + X_w_gt = np.array([-1.5, 0.8, 6.5]) + pts2d = [] + for T in poses: + pc = (np.linalg.inv(T) @ np.append(X_w_gt, 1))[:3] + uv = (K @ pc)[:2] / pc[2] + uv += np.random.normal(scale=0.4, size=2) # add 0.4-px Gaussian noise + pts2d.append(uv) + + X_w_est = multi_view_triangulation( + K, poses, np.float32(pts2d), + min_depth=0.5, max_depth=50.0, max_rep_err=2.0) + + assert X_w_est is not None, "Triangulation failed with moderate noise" + # centimetre-level accuracy is fine here + assert np.linalg.norm(X_w_est - X_w_gt) < 0.05 diff --git a/modules/SimpleSLAM/tests/test_multi_view_utils.py b/modules/SimpleSLAM/tests/test_multi_view_utils.py new file mode 100644 index 0000000000..c35d87e085 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_multi_view_utils.py @@ -0,0 +1,190 @@ +"""pytest test‑suite for `multi_view_utils.py` +================================================ + +Goals +----- +1. **Function integration** – + * `multi_view_triangulation()` + * `MultiViewTriangulator` + +2. **Real‑world wiring** – whenever the project’s own + `landmark_utils.Map` implementation is importable we use it, giving an + *integration* rather than pure‑unit test. When that class is missing (for + example in a stripped‑down CI job) we transparently fall back to a *very* + small stub that exposes only the methods/attributes the triangulator needs. + +3. **Numerical accuracy** – the synthetic scene is designed so that with 0.4 px + image noise and five views the RMS localisation error should be + ≲ 5 cm. If the implementation regresses we’ll catch it. + +Run with:: + + pytest tests/test_multi_view_utils.py +""" + +from __future__ import annotations + +import importlib +import sys +import types +from pathlib import Path + +import numpy as np +import pytest + +# --------------------------------------------------------------------------- # +# Locate & load the module under test – `multi_view_utils` +# --------------------------------------------------------------------------- # +# We try these locations in order: +# 1. Installed package → `slam.core.multi_view_utils` +# 2. Source tree root ("editable" dev) → `multi_view_utils` +# 3. Direct path fallback so the test also works when launched from a +# separate build/CI directory. + +mvu: types.ModuleType + +for _modname in ("slam.core.multi_view_utils", "multi_view_utils"): + try: + mvu = importlib.import_module(_modname) # type: ignore + break + except ModuleNotFoundError: # pragma: no cover – probe next option + pass + +# Public call‑ables +multi_view_triangulation = mvu.multi_view_triangulation # type: ignore[attr‑defined] +MultiViewTriangulator = mvu.MultiViewTriangulator # type: ignore[attr‑defined] + +# --------------------------------------------------------------------------- # +# Map implementation (real vs stub) +# --------------------------------------------------------------------------- # +# 1. Prefer the full implementation shipped with the repo. +# 2. Otherwise synthesize the minimal surface‑area stub so that the Triangulator +# can still be unit‑tested. +from slam.core.landmark_utils import Map as SLAMMap # type: ignore[attr‑defined] + + +# --------------------------------------------------------------------------- # +# Synthetic scene generation helpers +# --------------------------------------------------------------------------- # + +def _make_camera_pose(tx: float) -> np.ndarray: + """Camera looks down +Z, translated along +X by *tx* (c→w).""" + T = np.eye(4) + T[0, 3] = tx + return T + + +def _generate_scene( + n_views: int = 5, + n_pts: int = 40, + noise_px: float = 0.4, + seed: int | None = None, +): + """Build a toy scene and return (K, poses_w_c, pts_w, 2‑D projections).""" + + rng = np.random.default_rng(seed) + + # -- basic pin‑hole intrinsics (640×480) -- + K = np.array([[800.0, 0.0, 320.0], [0.0, 800.0, 240.0], [0.0, 0.0, 1.0]]) + + # -- camera trajectory (translate along X) -- + poses = [_make_camera_pose(t) for t in np.linspace(0.0, 1.0, n_views)] + + # -- random 3‑D points in front of cameras (z ∈ [4,6]) -- + pts_w = np.vstack( + ( + rng.uniform(-1.0, 1.0, n_pts), # x + rng.uniform(-1.0, 1.0, n_pts), # y + rng.uniform(4.0, 6.0, n_pts), # z – ensure positive depth + ) + ).T + + # -- project each point into every view and add Gaussian pixel noise -- + pts2d_all: list[np.ndarray] = [] + for T_w_c in poses: + P_c_w = np.linalg.inv(T_w_c) # w→c + uv_view = [] + for X_w in pts_w: + Xc_h = P_c_w @ np.append(X_w, 1.0) + uv = (K @ Xc_h[:3])[:2] / Xc_h[2] + uv += rng.normal(0.0, noise_px, 2) + uv_view.append(uv) + pts2d_all.append(np.asarray(uv_view, dtype=np.float32)) + + return K, poses, pts_w, pts2d_all + +# --------------------------------------------------------------------------- # +# Light cv2.KeyPoint substitute (Triangulator only needs `.pt`) +# --------------------------------------------------------------------------- # + +class _KeyPoint: # pylint: disable=too‑few‑public‑methods + def __init__(self, x: float, y: float): + self.pt = (float(x), float(y)) + +# --------------------------------------------------------------------------- # +# Tests +# --------------------------------------------------------------------------- # + + +def test_multi_view_triangulation_accuracy(): + """Direct N‑view triangulation should achieve < 5 cm RMS error.""" + K, poses, pts_w, pts2d = _generate_scene() + + errs: list[float] = [] + for j in range(len(pts_w)): + uv_track = [view[j] for view in pts2d] + X_hat = multi_view_triangulation( + K, + poses, + np.float32(uv_track), + min_depth=0.1, + max_depth=10.0, + max_rep_err=2.0, + ) + assert X_hat is not None, "Triangulation unexpectedly returned None" + errs.append(np.linalg.norm(X_hat - pts_w[j])) + + rms = float(np.sqrt(np.mean(np.square(errs)))) + assert rms < 5e-2, f"RMS error too high: {rms:.4f} m" + + +@pytest.mark.parametrize("min_views", [2, 3]) +def test_multiview_triangulator_pipeline(min_views: int): + """Full pipeline: incremental key‑frames → map landmarks.""" + K, poses, pts_w, pts2d = _generate_scene() + + tri = MultiViewTriangulator( + K, + min_views=min_views, + merge_radius=0.1, + max_rep_err=2.0, + min_depth=0.1, + max_depth=10.0, + ) + + world_map = SLAMMap() + + # ---- Feed key‑frames ---- + for frame_idx, (pose_w_c, uv_view) in enumerate(zip(poses, pts2d)): + kps: list[_KeyPoint] = [] + track_map: dict[int, int] = {} + for pid, (u, v) in enumerate(uv_view): + kps.append(_KeyPoint(u, v)) + track_map[pid] = pid # 1‑to‑1 track id + dummy_img = np.zeros((480, 640, 3), dtype=np.uint8) + descs = [np.zeros(32, dtype=np.uint8) for _ in kps] + tri.add_keyframe(frame_idx, pose_w_c, kps, track_map, dummy_img, descs) + + new_ids = tri.triangulate_ready_tracks(world_map) + assert len(new_ids) == len(pts_w), "Not all points were triangulated" + + # ---- Numerical accuracy ---- + errs: list[float] = [] + for pid in new_ids: + p_obj = world_map.points[pid] + # Real MapPoint uses `.position`; stub stores the same attribute name. + X_hat = p_obj.position if hasattr(p_obj, "position") else p_obj.xyz # type: ignore[attr‑defined] + errs.append(np.linalg.norm(X_hat - pts_w[pid])) + + rms = float(np.sqrt(np.mean(np.square(errs)))) + assert rms < 5e-2, f"RMS error too high: {rms:.4f} m" diff --git a/modules/SimpleSLAM/tests/test_pnp_utils.py b/modules/SimpleSLAM/tests/test_pnp_utils.py new file mode 100644 index 0000000000..f4e46939a0 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_pnp_utils.py @@ -0,0 +1,244 @@ +"""Unit tests for pnp_utils.py + +Test coverage goals +------------------- +Functions under test: + * project_points + * associate_landmarks + * refine_pose_pnp + +We generate synthetic, perfectly controlled data (and some noisy variants) so the +expected geometric relationships are analytically known. + +Conventions verified: + * refine_pose_pnp returns (R, t) mapping world -> camera (OpenCV convention). + * project_points expects pose_w_c (camera->world) and internally inverts it. + +Edge cases handled: + * Empty inputs in associate_landmarks + * Insufficient points for refine_pose_pnp + +Precision thresholds: + * Rotation matrices compared with max abs diff <= 1e-6 (ideal / noise-free) + * Translations compared with <= 1e-6 (ideal) or small tolerance with noise + +These tests use only NumPy + OpenCV (cv2). If OpenCV is not available the file +will skip tests that require it. +""" +from __future__ import annotations +import math +import numpy as np +import pytest + +try: + import cv2 # type: ignore + OPENCV_AVAILABLE = True +except Exception: # pragma: no cover + raise ImportError("OpenCV (cv2) is required for some tests") + +# Import functions under test +import importlib +from slam.core import pnp_utils +project_points = pnp_utils.project_points +associate_landmarks = pnp_utils.associate_landmarks +refine_pose_pnp = pnp_utils.refine_pose_pnp + + +# --------------------------------------------------------------------------- +# Helper creators +# --------------------------------------------------------------------------- + +def random_rotation(rng: np.random.Generator) -> np.ndarray: + """Return a random 3x3 rotation using axis-angle.""" + axis = rng.normal(size=3) + axis /= np.linalg.norm(axis) + angle = rng.uniform(-math.pi, math.pi) + K = np.array([[0, -axis[2], axis[1]], + [axis[2], 0, -axis[0]], + [-axis[1], axis[0], 0]]) + R = np.eye(3) + math.sin(angle) * K + (1 - math.cos(angle)) * (K @ K) + return R + +def make_camera_pose(rng: np.random.Generator): + """Return (pose_w_c, R_wc, t_wc) camera->world homogeneous pose.""" + R_wc = random_rotation(rng) + t_wc = rng.uniform(-2, 2, size=3) + pose_w_c = np.eye(4) + pose_w_c[:3, :3] = R_wc + pose_w_c[:3, 3] = t_wc + return pose_w_c, R_wc, t_wc + + +def invert_pose(T_w_c: np.ndarray) -> np.ndarray: + R = T_w_c[:3, :3] + t = T_w_c[:3, 3] + T_c_w = np.eye(4) + T_c_w[:3, :3] = R.T + T_c_w[:3, 3] = -R.T @ t + return T_c_w + +# --------------------------------------------------------------------------- +# project_points +# --------------------------------------------------------------------------- + +def test_project_points_identity_camera_center(): + K = np.array([[500, 0, 320], [0, 500, 240], [0, 0, 1]], dtype=float) + pose_w_c = np.eye(4) # camera at origin, world == camera + pts_w = np.array([[0, 0, 2], [1, 1, 2], [-1, -1, 2]], dtype=float) + uv = project_points(K, pose_w_c, pts_w) + # manual projection: u = fx*x/z + cx, v = fy*y/z + cy + expected = np.array([ + [500*0/2 + 320, 500*0/2 + 240], + [500*1/2 + 320, 500*1/2 + 240], + [500*-1/2 + 320, 500*-1/2 + 240] + ], dtype=np.float32) + assert np.allclose(uv, expected) + + +def test_project_points_random_pose_roundtrip(): + rng = np.random.default_rng(42) + K = np.array([[450, 0, 300], [0, 460, 200], [0, 0, 1]], dtype=float) + pose_w_c, R_wc, t_wc = make_camera_pose(rng) + # create random world points in front of camera: sample in camera frame then map to world + pts_c = rng.uniform(0.5, 5.0, size=(50, 3)) + pts_c[:, :2] -= 0.5 # some lateral spread + # camera->world pose: X_w = R_wc X_c + t_wc + pts_w = (R_wc @ pts_c.T).T + t_wc + # project + uv = project_points(K, pose_w_c, pts_w) + # reconstruct normalized rays and verify direction consistency + T_c_w = np.linalg.inv(pose_w_c) + R_cw = T_c_w[:3, :3] + t_cw = T_c_w[:3, 3] + # backproject one point: s * x_norm = R_cw (X_w - t_wc) ; we just check forward consistency + x = pts_w[0] + X_c = R_cw @ (x - pose_w_c[:3, 3]) + u_pred = K @ X_c + u_pred = u_pred[:2] / u_pred[2] + assert np.allclose(uv[0], u_pred, atol=1e-6) + +# --------------------------------------------------------------------------- +# associate_landmarks +# --------------------------------------------------------------------------- + +def make_keypoints(xy: np.ndarray): + kps = [] + for (u, v) in xy: + kp = cv2.KeyPoint(float(u), float(v), 1) + kps.append(kp) + return kps + +@pytest.mark.skipif(not OPENCV_AVAILABLE, reason="OpenCV required for keypoints") +def test_associate_landmarks_basic(): + K = np.array([[400, 0, 320], [0, 400, 240], [0, 0, 1]], dtype=float) + pose_w_c = np.eye(4) + pts_w = np.array([[0, 0, 2], [0.5, 0, 2], [0, 0.5, 2]], dtype=float) + proj = project_points(K, pose_w_c, pts_w) + # Add slight offsets so nearest still correct + keypoints_xy = proj + np.array([[0.2, -0.1],[0.1,0.2],[-0.15,0.05]]) + kps = make_keypoints(keypoints_xy) + pts3d, pts2d, kp_ids = associate_landmarks(K, pose_w_c, pts_w, kps, search_rad=5) + assert pts3d.shape == (3,3) + assert pts2d.shape == (3,2) + assert len(kp_ids) == 3 + # Each associated 2D close to projection + assert np.all(np.linalg.norm(pts2d - proj, axis=1) < 1.0) + +@pytest.mark.skipif(not OPENCV_AVAILABLE, reason="OpenCV required for keypoints") +def test_associate_landmarks_empty_inputs(): + K = np.eye(3) + pose_w_c = np.eye(4) + pts3d, pts2d, idxs = associate_landmarks(K, pose_w_c, np.empty((0,3)), [], search_rad=5) + assert pts3d.size == 0 and pts2d.size == 0 and idxs == [] + +# --------------------------------------------------------------------------- +# refine_pose_pnp +# --------------------------------------------------------------------------- +@pytest.mark.skipif(not OPENCV_AVAILABLE, reason="OpenCV solvePnP required") +def test_refine_pose_pnp_world_to_camera_convention(): + rng = np.random.default_rng(0) + # Intrinsics + fx, fy = 600, 610 + cx, cy = 320, 240 + K = np.array([[fx, 0, cx],[0, fy, cy],[0,0,1]], dtype=float) + + # Ground truth camera pose (world->camera) chosen randomly + R_wc = random_rotation(rng) # this is camera->world; invert for world->camera + t_wc = rng.uniform(-1,1,size=3) + T_w_c = np.eye(4) + T_w_c[:3,:3] = R_wc + T_w_c[:3,3] = t_wc + T_c_w = invert_pose(T_w_c) + R_cw = T_c_w[:3,:3] + t_cw = T_c_w[:3,3] + + # Synthesize world points visible in camera + pts_c = rng.uniform(1.0,4.0,size=(100,3)) + pts_c[:,0:2] -= 0.5 + pts_w = (R_wc @ pts_c.T).T + t_wc + + # Project to image using ground truth world->camera extrinsics (R_cw, t_cw) + P = K @ np.hstack([R_cw, t_cw.reshape(3,1)]) + pts_h = np.hstack([pts_w, np.ones((len(pts_w),1))]) + proj = (P @ pts_h.T).T + uv = (proj[:,:2] / proj[:,2:]) + + # Run PnP with a subset & small noise + sel = rng.choice(len(pts_w), size=60, replace=False) + pts3d = pts_w[sel] + pts2d = uv[sel] + rng.normal(scale=0.5, size=(60,2)) # pixel noise + + R_est, t_est = refine_pose_pnp(K, pts3d.astype(np.float32), pts2d.astype(np.float32)) + assert R_est is not None and t_est is not None + # Check they map world->camera: compare to ground truth R_cw, t_cw + # Allow some tolerance due to noise & RANSAC + assert np.allclose(R_est @ R_est.T, np.eye(3), atol=1e-6) + assert np.allclose(np.linalg.det(R_est), 1.0, atol=1e-6) + # Evaluate alignment of rotation + rot_err = np.rad2deg(np.arccos(max(-1.0, min(1.0, ((np.trace(R_est.T @ R_cw)-1)/2))))) + assert rot_err < 2.0 # degrees + assert np.linalg.norm(t_est - t_cw) < 0.1 + +@pytest.mark.skipif(not OPENCV_AVAILABLE, reason="OpenCV solvePnP required") +def test_refine_pose_pnp_insufficient_points(): + K = np.eye(3) + pts3d = np.random.randn(3,3).astype(np.float32) + pts2d = np.random.randn(3,2).astype(np.float32) + R, t = refine_pose_pnp(K, pts3d, pts2d) + assert R is None and t is None + + +# --------------------------------------------------------------------------- +# Integration test: using associate_landmarks + refine_pose_pnp pipeline +# --------------------------------------------------------------------------- +@pytest.mark.skipif(not OPENCV_AVAILABLE, reason="OpenCV required") +def test_pipeline_association_then_pnp(): + rng = np.random.default_rng(5) + K = np.array([[500,0,320],[0,500,240],[0,0,1]], dtype=float) + pose_w_c, R_wc, t_wc = make_camera_pose(rng) + # Generate random 3D points in camera frame then map to world + pts_c = rng.uniform(1.0,6.0,size=(120,3)) + pts_c[:, :2] -= 0.5 + pts_w = (R_wc @ pts_c.T).T + t_wc + # Project to image and add noise + uv = project_points(K, pose_w_c, pts_w) + rng.normal(scale=0.3,size=(len(pts_w),2)) + # Create keypoints at noisy positions + kps = [cv2.KeyPoint(float(u), float(v), 1) for (u,v) in uv] + # Subsample landmark set to mimic existing map + map_sel = rng.choice(len(pts_w), size=80, replace=False) + pts_w_map = pts_w[map_sel] + pts3d, pts2d, _ = associate_landmarks(K, pose_w_c, pts_w_map, kps, search_rad=4.0) + assert len(pts3d) >= 20 # should match many + R_est, t_est = refine_pose_pnp(K, pts3d, pts2d) + assert R_est is not None + # Compare to ground truth world->camera (R_cw,t_cw) + T_c_w = np.linalg.inv(pose_w_c) + R_cw = T_c_w[:3,:3] + t_cw = T_c_w[:3,3] + rot_err = np.rad2deg(np.arccos(max(-1.0, min(1.0, (np.trace(R_est.T @ R_cw)-1)/2)))) + assert rot_err < 2.5 + assert np.linalg.norm(t_est - t_cw) < 0.15 + +if __name__ == '__main__': # pragma: no cover + import sys, pytest + sys.exit(pytest.main([__file__, '-vv'])) diff --git a/modules/SimpleSLAM/tests/test_pose_utils.py b/modules/SimpleSLAM/tests/test_pose_utils.py new file mode 100644 index 0000000000..6874405f03 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_pose_utils.py @@ -0,0 +1,10 @@ +import numpy as np +from slam.core.pose_utils import _pose_inverse, project_to_SO3 + +def test_pose_inverse(): + R = project_to_SO3(np.random.randn(3,3)) # produce a valid rotation + t = np.random.randn(3) + T = np.eye(4); T[:3,:3]=R; T[:3,3]=t + T_inv = _pose_inverse(T) + I = T_inv @ T + assert np.allclose(I, np.eye(4), atol=1e-10) diff --git a/modules/SimpleSLAM/tests/test_track_with_pnp.py b/modules/SimpleSLAM/tests/test_track_with_pnp.py new file mode 100644 index 0000000000..bae6a6dae1 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_track_with_pnp.py @@ -0,0 +1,172 @@ +# tests/test_track_with_pnp.py +import os +import sys +import pathlib +import importlib +import numpy as np +import cv2 + +# --- Headless / GUI-off for tests --- +os.environ.setdefault("OPENCV_LOG_LEVEL", "SILENT") +os.environ.setdefault("QT_QPA_PLATFORM", "offscreen") +try: + import matplotlib + matplotlib.use("Agg") +except Exception: + pass + +# --- Put repo root (which contains 'slam/') on sys.path --- +ROOT = pathlib.Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +# --- Import the module under test (supports both layouts) --- +try: + main = importlib.import_module("slam.monocular.main") +except ImportError: + # Fallback if you run tests inside slam/monocular/ + main = importlib.import_module("main") + +track_with_pnp = getattr(main, "track_with_pnp") + +# Disable visualization during tests +def _no_viz(*args, **kwargs): + return +if hasattr(main, "visualize_pnp_reprojection"): + setattr(main, "visualize_pnp_reprojection", _no_viz) + +# ---------- Minimal stubs for world map / map points ---------- +class _DummyPoint: + def __init__(self, pid, xyz, desc=None): + self.id = pid + self.xyz = np.asarray(xyz, dtype=np.float32) + self.observations = [] + self.rep_desc = np.zeros(32, np.uint8) if desc is None else np.asarray(desc, np.uint8) + + def add_observation(self, frame_no, kp_idx, desc): + self.observations.append((int(frame_no), int(kp_idx))) + self.rep_desc = np.asarray(desc, np.uint8) + +class _DummyMap: + def __init__(self, pts_w): + self.points = [_DummyPoint(i, p) for i, p in enumerate(pts_w)] + + def point_ids(self): + return list(range(len(self.points))) + + def get_point_array(self): + return np.asarray([p.xyz for p in self.points], dtype=np.float32) + +# --------------------- Helpers for the test --------------------- +def _K(fx=500.0, fy=500.0, cx=320.0, cy=240.0): + return np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]], dtype=np.float64) + +def _Twc(R=np.eye(3), t=np.zeros(3)): + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = np.asarray(t, dtype=np.float64) + return T + +def _project_points(K, Twc, pts_w): + Tcw = np.linalg.inv(Twc) + P = (K @ Tcw[:3, :4]) + pts_h = np.hstack([pts_w, np.ones((len(pts_w), 1))]).T + uvw = P @ pts_h + uv = (uvw[:2] / uvw[2]).T + return uv.astype(np.float32) + +def _make_keypoints(uv): + return [cv2.KeyPoint(float(u), float(v), 1) for (u, v) in uv] + +class _Args: + def __init__(self, + proj_radius=12.0, + pnp_min_inliers=30, + ransac_thresh=3.0, + pnp_inlier_px=3.0): + self.proj_radius = float(proj_radius) + self.pnp_min_inliers = int(pnp_min_inliers) + self.ransac_thresh = float(ransac_thresh) + # some versions of main look for this explicitly + self.pnp_inlier_px = float(pnp_inlier_px) + +# --------------------------- Tests ----------------------------- +def test_track_with_pnp_happy_path(): + rng = np.random.default_rng(42) + K = _K() + + # Landmarks in world (3–8 m ahead) + N = 150 + X = rng.uniform(-2.0, 2.0, size=N) + Y = rng.uniform(-0.5, 0.5, size=N) + Z = rng.uniform(3.0, 8.0, size=N) + pts_w = np.stack([X, Y, Z], axis=1).astype(np.float32) + + # Prev pose at origin; GT pose: small forward motion + 1° yaw + Twc_prev = _Twc() + yaw = np.deg2rad(1.0) + Rz = np.array([[np.cos(yaw), -np.sin(yaw), 0], + [np.sin(yaw), np.cos(yaw), 0], + [0, 0, 1]], dtype=np.float64) + t_gt = np.array([0.02, -0.01, 0.12], dtype=np.float64) + Twc_gt = _Twc(Rz, t_gt) + + # Synthetic current keypoints (GT projection + pixel noise) + uv = _project_points(K, Twc_gt, pts_w) + uv += rng.normal(0.0, 0.5, size=uv.shape).astype(np.float32) + kp_cur = _make_keypoints(uv) + + # Prev-frame placeholders + kp_prev = [] + desc_dim = 32 + desc_cur = rng.integers(0, 256, size=(len(kp_cur), desc_dim), dtype=np.uint8) + desc_prev = np.empty((0, desc_dim), dtype=np.uint8) + matches = [] + + world_map = _DummyMap(pts_w) + img2 = np.zeros((480, 640, 3), dtype=np.uint8) + args = _Args() + + ok, Twc_est, used_idx = track_with_pnp( + K, + kp_prev, kp_cur, desc_prev, desc_cur, matches, img2, + frame_no=1, + Twc_prev=Twc_prev, + world_map=world_map, args=args + ) + + assert ok, "PnP tracking should succeed" + assert isinstance(Twc_est, np.ndarray) and Twc_est.shape == (4, 4) + + # Translation error (allow a few cm due to pixel noise) + t_err = np.linalg.norm(Twc_est[:3, 3] - Twc_gt[:3, 3]) + assert t_err < 0.05, f"translation error too high: {t_err:.3f} m" + + # Rotation error + dR = Twc_gt[:3, :3].T @ Twc_est[:3, :3] + ang = np.degrees(np.arccos(np.clip((np.trace(dR) - 1.0) / 2.0, -1.0, 1.0))) + assert ang < 2.0, f"rotation error too high: {ang:.2f} deg" + + # Inliers / bookkeeping + assert isinstance(used_idx, set) + assert len(used_idx) >= args.pnp_min_inliers + obs_total = sum(len(p.observations) for p in world_map.points) + assert obs_total >= len(used_idx) + +def test_track_with_pnp_not_enough_data(): + K = _K() + world_map = _DummyMap(pts_w=np.empty((0, 3), dtype=np.float32)) + args = _Args() + + ok, Twc_est, used_idx = track_with_pnp( + K, + kp_prev=[], kp_cur=[], desc_prev=np.empty((0, 32), np.uint8), + desc_cur=np.empty((0, 32), np.uint8), matches=[], img2=np.zeros((10, 10), np.uint8), + frame_no=0, Twc_prev=_Twc(), world_map=world_map, args=args + ) + + assert not ok + assert Twc_est is None + assert isinstance(used_idx, set) and len(used_idx) == 0 diff --git a/modules/SimpleSLAM/tools/trajectory_eval.py b/modules/SimpleSLAM/tools/trajectory_eval.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/tools/visualization.py b/modules/SimpleSLAM/tools/visualization.py new file mode 100644 index 0000000000..e69de29bb2