Commit 9c57fdc8 authored by Clément Pinard's avatar Clément Pinard
Browse files

Deal with indoor videos

Use indoor flight data to rescale the COLMAP model
If the rescale is not enough, registration will have another rescale, be robust to the change of scale for the different throshold
in occlusion generation
parent 4368bfa3
......@@ -30,7 +30,7 @@ def preprocess_metadata(metadata, proj):
speed = metadata[["speed_east", "speed_north", "speed_down"]].values * np.array([1, 1, -1])
timestamps = metadata["time"].values * 1e-6
positions = metadata[["x", "y", "z"]].values
if metadata["location_valid"].unique().tolist() == [0]:
if metadata["location_valid"].unique().tolist() == [False]:
metadata["indoor"] = True
positions = extrapolate_position(speed, timestamps, None, None)
else:
......@@ -41,10 +41,10 @@ def preprocess_metadata(metadata, proj):
invalidity_start = location_validity.index[location_validity == -1].tolist()
validity_start = location_validity.index[location_validity == 1].tolist()
if metadata["location_valid"].iloc[0] == 0:
if metadata["location_valid"].iloc[0]:
end = validity_start.pop(0)
positions[:end] = extrapolate_position(speed[:end], timestamps[:end], None, positions[end])
if metadata["location_valid"].iloc[-1] == 0:
if metadata["location_valid"].iloc[-1]:
start = invalidity_start.pop(-1) - 1
positions[start:] = extrapolate_position(speed[start:], timestamps[start:], positions[start], None)
......@@ -74,6 +74,7 @@ def extract_metadata(folder_path, file_path, native_wrapper, proj, w, h, f, save
metadata["framerate"] = f
metadata["video"] = file_path
metadata['frame'] = metadata.index + 1
metadata["location_valid"] = metadata["location_valid"] == 1
if save_path is not None:
metadata.to_csv(save_path)
return metadata
......@@ -35,7 +35,7 @@ def save_intrinsics(cameras, images, output_dir, downscale=1):
for _, img in images.items():
cam = cameras[img.camera_id]
intrinsics = construct_intrinsics(cam)
intrinsics_name = output_dir / Path(img.name).namebase + "_intrinsics.txt"
intrinsics_name = output_dir / Path(img.name).stem + "_intrinsics.txt"
np.savetxt(intrinsics_name, intrinsics)
......@@ -140,7 +140,7 @@ def process_one_frame(img_path, depth_path, occ_path,
if interpolated:
output_img[:5] = output_img[-5:] = output_img[:, :5] = output_img[:, -5:] = [255, 128, 0]
imwrite(video_output_dir/img_path.namebase + '.png', output_img)
imwrite(video_output_dir/img_path.stem + '.png', output_img)
parser = ArgumentParser(description='create a vizualisation from ground truth created',
......@@ -184,7 +184,7 @@ def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir,
cameras = []
for i in metadata["image_path"]:
img_path = images_root_folder / Path(i).relpath("Videos")
img_path = images_root_folder / i
imgs.append(img_path)
fname = img_path.basename()
......@@ -221,7 +221,7 @@ def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir,
raise e
if video:
video_path = str(video_output_dir/'{}_groundtruth_viz.mp4'.format(video_output_dir.namebase))
video_path = str(video_output_dir/'{}_groundtruth_viz.mp4'.format(video_output_dir.stem))
glob_pattern = str(video_output_dir/'*.png')
ffmpeg.create_video(video_path, glob_pattern, framerate)
......
#!/bin/bash
# This script helps you install the necessary tools to construct a depth enabled dataset with Anafi videos
# Note that CUDA and Anaconda need to be already installed
# Note that CUDA and Anaconda need to be already installed.
# For CUDA, try to install the last package : https://developer.nvidia.com/cuda-downloads instead of the one installed by APT
# Also note that for repo to work, git needs to be parametrized with email and name.
# It has been tested with Ubuntu 18.04 and Ubunut 20.04
# This command makes sure that the .so files pointed by the cmake commands are the right ones
......
......@@ -215,21 +215,31 @@ def main():
mxw.apply_transform_to_project(env["lidar_mlp"], env["aligned_mlp"], env["global_registration_matrix"])
else:
env["global_registration_matrix"] = get_matrix(env["matrix_path"])
mxw.apply_transform_to_project(env["lidar_mlp"], env["aligned_mlp"], env["global_registration_matrix"])
i += 1
if i not in args.skip_step:
print_step(i, "Occlusion Mesh computing")
'''combine the MLP file into a single ply file. We need the normals for the splats'''
if args.normals_method == "radius":
eth3d.compute_normals(env["with_normals_path"], env["aligned_mlp"], neighbor_radius=args.normals_radius)
else:
eth3d.compute_normals(env["with_normals_path"], env["aligned_mlp"], neighbor_count=args.normals_neighbours)
'''Create vis file that will tell by what images each point can be seen. We transfer this knowledge from georefrecon
to the Lidar model'''
scale = np.linalg.norm(env["global_registration_matrix"], ord=2)
with_normals_subsampled = env["with_normals_path"].stripext() + "_subsampled.ply"
pcl_util.create_vis_file(env["georefrecon_ply"], env["with_normals_path"],
resolution=args.mesh_resolution, output=env["with_normals_path"].stripext() + "_subsampled.ply")
colmap.delaunay_mesh(env["occlusion_ply"], input_ply=env["with_normals_path"].stripext() + "_subsampled.ply")
resolution=args.mesh_resolution / scale,
output=with_normals_subsampled)
'''Compute the occlusion mesh by fooling COLMAP into thinking the lidar point cloud was made with colmap'''
colmap.delaunay_mesh(env["occlusion_ply"], input_ply=with_normals_subsampled)
if args.splats:
eth3d.create_splats(env["splats_ply"], env["with_normals_path"], env["occlusion_ply"], threshold=args.splat_threshold)
eth3d.create_splats(env["splats_ply"], with_normals_subsampled, env["occlusion_ply"], threshold=args.splat_threshold / scale)
if args.inspect_dataset:
# First inspection : Check registration of the Lidar pointcloud wrt to COLMAP model but without the occlusion mesh
# Second inspection : Check the occlusion mesh and the splats
eth3d.inspect_dataset(scan_meshlab=env["aligned_mlp"],
colmap_model=env["georef_recon"],
image_path=env["image_path"])
......
......@@ -38,6 +38,8 @@ int main (int argc, char** argv)
pcl::console::parse_argument(argc, argv, "--output_cloud", output_cloud_path);
float resolution = 0.2; //20cm resolution
pcl::console::parse_argument(argc, argv, "--resolution", resolution);
float max_distance = 10;
pcl::console::parse_argument(argc, argv, "--max_distance", max_distance);
if (output_cloud_path.empty()){
LOG(ERROR) << "No output path was given";
......@@ -97,8 +99,12 @@ int main (int argc, char** argv)
for(auto it = lidar->begin(); it != lidar->end(); it++){
tree->nearestKSearch(*it, 1, nn_indices, nn_dists);
if(nn_dists[0] <= max_distance){
std::vector<int> image_idx = input_vis_points.at(nn_indices[0]);
output_vis_points.push_back(image_idx);
}else{
output_vis_points.push_back(std::vector<int>)
}
}
......
......@@ -25,7 +25,7 @@ def prepare_workspace(path, env, with_lidar=True):
env["lidar_mlp"] = env["workspace"] / "lidar.mlp"
env["with_normals_path"] = env["lidar_path"] / "with_normals.ply"
env["occlusion_ply"] = env["lidar_path"] / "occlusion_model.ply"
env["splats_ply"] = env["lidar_path"] / "splats_model.ply"
env["splats_ply"] = env["lidar_path"] / "splats_model.ply" if env["splats"] else None
env["occlusion_mlp"] = env["lidar_path"] / "occlusions.mlp"
env["splats_mlp"] = env["lidar_path"] / "splats.mlp"
env["matrix_path"] = env["workspace"] / "matrix_thorough.txt"
......@@ -72,7 +72,7 @@ def prepare_video_workspace(video_name, video_frames_folder,
video_env["final_model"] = colmap_root / "final"
output = {}
output["images_root_folder"] = raw_output_folder / "images"
output["video_frames_folder"] = output["images_root_folder"] / relative_path_folder
output["video_frames_folder"] = output["images_root_folder"] / "Video" / relative_path_folder
output["model_folder"] = raw_output_folder / "models" / relative_path_folder
output["interpolated_frames_list"] = output["model_folder"] / "interpolated_frames.txt"
output["final_model"] = output["model_folder"] / "final"
......
......@@ -210,10 +210,6 @@ def generate_GT(video_name, raw_output_folder, images_root_folder, video_frames_
if model_length < 2:
return
final_mlp = final_model / "aligned.mlp"
final_occlusions = final_model / "occlusions.mlp"
final_splats = final_model / "splats.mlp"
'''
In case the reconstructed model is only locally good, there's the possibility of having a specific
transformation matrix per video in the final model folder, which might work better than the the global registration_matrix
......@@ -222,6 +218,9 @@ def generate_GT(video_name, raw_output_folder, images_root_folder, video_frames_
if specific_matrix_path.isfile():
registration_matrix = np.linalg.inv(np.fromfile(specific_matrix_path, sep=" ").reshape(4, 4))
adjustment_matrix = registration_matrix * np.linalg.inv(global_registration_matrix)
final_mlp = final_model / "aligned.mlp"
final_occlusions = final_model / "occlusions.mlp"
final_splats = final_model / "splats.mlp"
mxw.apply_transform_to_project(aligned_mlp, final_mlp, adjustment_matrix)
mxw.create_project(final_occlusions, [occlusion_ply], transforms=[adjustment_matrix])
mxw.create_project(final_splats, [splats_ply], transforms=[adjustment_matrix])
......
......@@ -70,9 +70,9 @@ def get_georef(metadata):
relevant_data = metadata[["location_valid", "image_path", "x", "y", "z"]]
path_list = []
georef_list = []
for _, (gps, path, x, y, alt) in relevant_data.iterrows():
for _, (loc_valid, path, x, y, alt) in relevant_data.iterrows():
path_list.append(path)
if gps == 1:
if loc_valid:
georef_list.append("{} {} {} {}\n".format(path, x, y, alt))
return georef_list, path_list
......@@ -159,6 +159,8 @@ def process_video_folder(videos_list, existing_pictures, output_video_folder, im
print("extracting metadata for {} videos...".format(len(videos_list)))
videos_summary = {"anafi": {"indoor": 0, "outdoor": 0}, "generic": 0}
indoor_video_diameters = {}
for v in tqdm(videos_list):
width, height, framerate, num_frames = env["ffmpeg"].get_size_and_framerate(v)
video_output_folder = output_video_folder / "{}x{}".format(width, height) / v.stem
......@@ -170,15 +172,18 @@ def process_video_folder(videos_list, existing_pictures, output_video_folder, im
width, height, framerate)
metadata["model"] = "anafi"
metadata["camera_model"] = "PINHOLE"
raw_positions = metadata[["x", "y", "z"]]
video_displacement_diameter = np.linalg.norm(raw_positions.values.max(axis=0) - raw_positions.values.min(axis=0))
if metadata["indoor"].iloc[0]:
videos_summary["anafi"]["indoor"] += 1
indoor_video_diameters[video_displacement_diameter] = v
else:
videos_summary["anafi"]["outdoor"] += 1
raw_positions = metadata[["x", "y", "z"]]
if centroid is None:
'''No centroid (possibly because there was no georeferenced lidar model in the first place)
set it as the first valid GPS position of the first outdoor video'''
centroid = raw_positions[metadata["location_valid"] == 1].iloc[0].values
centroid = raw_positions[metadata["location_valid"]].iloc[0].values
zero_centered_positions = raw_positions.values - centroid
radius = np.max(np.abs(zero_centered_positions))
if radius > 1000:
......@@ -196,7 +201,7 @@ def process_video_folder(videos_list, existing_pictures, output_video_folder, im
# timestemp is in microseconds
metadata['time'] = 1e6 * metadata.index / framerate
metadata['indoor'] = True
metadata['location_valid'] = 0
metadata['location_valid'] = False
metadata["model"] = "generic"
metadata["camera_model"] = "PINHOLE"
metadata["picture_hfov"] = 0
......@@ -209,6 +214,7 @@ def process_video_folder(videos_list, existing_pictures, output_video_folder, im
metadata["y"] = np.NaN
metadata["z"] = np.NaN
videos_summary["generic"] += 1
metadata["num_frames"] = num_frames
if include_lowfps_thorough:
by_time = metadata.set_index(pd.to_datetime(metadata["time"], unit="us"))
by_time_lowfps = by_time.resample("{:.3f}S".format(1/fps)).first()
......@@ -221,6 +227,24 @@ def process_video_folder(videos_list, existing_pictures, output_video_folder, im
print("{} indoor anafi videos".format(videos_summary["anafi"]["indoor"]))
print("{} generic videos".format(videos_summary["generic"]))
if(videos_summary["anafi"]["outdoor"] == 0 and videos_summary["anafi"]["indoor"] > 0):
# We have no GPS data but we have navdata, which will help rescale the colmap model
# Take the longest video and do as if the GPS was valid
longest_video = indoor_video_diameters[max(indoor_video_diameters)]
print("Only indoor videos used, will use {} for COLMAP rescaling".format(longest_video))
video_index = final_metadata["video"] == longest_video
if include_lowfps_thorough:
# We already added frames to be sampled so we just copy the boolean to the "location_valid" column
final_metadata.loc[video_index, "location_valid"] = final_metadata.loc[video_index, "sampled"]
else:
# Take frames at lowfps, add it to the thorough photogrammetry and mark their location as valid
video_md = final_metadata[video_index]
by_time = video_md.set_index(pd.to_datetime(video_md["time"], unit="us"))
by_time_lowfps = by_time.resample("{:.3f}S".format(1/fps)).first()
to_georef = by_time["time"].isin(by_time_lowfps["time"]).values
final_metadata.loc[video_index, "sampled"] = to_georef
final_metadata.loc[video_index, "location_valid"] = to_georef
print("{} frames in total".format(len(final_metadata)))
cam_fields = ["width", "height", "framerate", "picture_hfov", "picture_vfov", "camera_model"]
......
......@@ -43,7 +43,7 @@ class Colmap(Wrapper):
options += ["--VocabTreeMatching.vocab_tree_path", vocab_tree]
self.__call__(options)
def map(self, output, input=None, multiple_models=False, start_frame_id=None):
def map(self, output, input=None, multiple_models=True, start_frame_id=None):
options = ["mapper", "--database_path", self.db,
"--image_path", self.image_path,
"--output_path", output]
......
......@@ -44,13 +44,17 @@ class ETH3D(Wrapper):
point_cloud=True, depth_maps=True, occlusion_maps=True):
options = ["GroundTruthCreator", "--scan_alignment_path", scan_meshlab,
"--image_base_path", self.image_path, "--state_path", colmap_model,
"--output_folder_path", output_folder, "--occlusion_mesh_path", occlusions,
"--occlusion_splats_path", splats,
"--output_folder_path", output_folder,
"--max_occlusion_depth", str(self.max_occlusion_depth),
"--write_point_cloud", "1" if point_cloud else "0",
"--write_depth_maps", "1" if depth_maps else "0",
"--write_occlusion_depth", "1" if occlusion_maps else "0",
"--compress_depth_maps", "1"]
if occlusions is not None:
options += ["--occlusion_mesh_path", occlusions]
if splats is not None:
options += ["--occlusion_splats_path", splats]
self.__call__(options)
def inspect_dataset(self, scan_meshlab, colmap_model, occlusions=None, splats=None, image_path=None):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment