Commit 2029bc81 authored by Clément Pinard's avatar Clément Pinard
Browse files

Add EuRoC dataset compatibility

parent af25e9b1
......@@ -186,6 +186,9 @@ All the parameters for `main_pipeline.py` are defined in the file `cli_utils.ply
* `--splat_threshold` : Distance from occlusion mesh at which a splat will be created for a particular point (default, 10cm)
* `--max_splate_size` : Splat size is defined by mean istance from its neighbours. You can define a max splat size for isolated points which otherwise would make a very large useless splat. If not set, will be `2.5*splat_threshold`.
7. Ground truth creation
* `--eth3d_splat_radius` : Splat radius for occlusion mesh boundaries, radius of area (in meters) which will be defined as invalid because of occlusion uncertainty, see `splat_radius` option for ETH3D. Thumb rule here is that it should be around your point cloud precision. (default 0.01, i.e. 1cm)
### Manual step by step
This will essentially do the same thing as the script, in order to let you change some steps at will.
......
......@@ -31,7 +31,7 @@ def add_to_db(db_path, metadata_path, frame_list_path, **env):
for _, row in tqdm(metadata.iterrows(), total=len(metadata)):
image_path = row["image_path"]
camera_id = row["camera_id"]
if row["location_valid"]:
if "location_valid" in row.keys() and row["location_valid"]:
frame_gps = row[["location_longitude", "location_latitude", "location_altitude"]]
else:
frame_gps = np.full(3, np.NaN)
......
......@@ -75,6 +75,12 @@ def extract_metadata(folder_path, file_path, native_wrapper, proj, w, h, f, save
metadata["video"] = file_path
metadata['frame'] = metadata.index + 1
metadata["location_valid"] = metadata["location_valid"] == 1
fx = metadata["width"] / (2 * np.tan(metadata["picture_hfov"] * np.pi/360))
fy = metadata["v_focal"] = metadata["height"] / (2 * np.tan(metadata["picture_vfov"] * np.pi/360))
params = np.stack([fx.values, fy.values], axis=-1)
print(params.shape)
metadata["camera_params"] = [tuple(p) for p in params]
if save_path is not None:
metadata.to_csv(save_path)
return metadata
......@@ -115,6 +115,8 @@ def set_argparser():
gt_parser = parser.add_argument_group("Ground Truth Creator")
gt_parser.add_argument('--max_occlusion_depth', default=250, type=float,
help='max depth for occlusion. Everything further will not be considered at infinity')
gt_parser.add_argument('--eth3d_splat_radius', default=0.01, type=float,
help='see splat radius for ETH3D')
return parser
......
......@@ -189,7 +189,6 @@ def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir,
cameras, images, _ = rm.read_model(final_model, '.txt')
metadata = pd.read_csv(metadata_path).set_index("db_id", drop=False).sort_values("time")
framerate = metadata["framerate"].values[0]
if downscale is None:
assert(width is not None)
......@@ -258,7 +257,7 @@ def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir,
if video:
video_path = str(video_output_dir.parent/'{}_groundtruth_viz.mp4'.format(video_output_dir.stem))
glob_pattern = str(video_output_dir/'*.png')
ffmpeg.create_video(video_path, glob_pattern, framerate)
ffmpeg.create_video(video_path, glob_pattern, True, framerate)
video_output_dir.rmtree_p()
......
import pandas as pd
import numpy as np
from path import Path
import yaml
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from colmap_util.read_model import Image, Camera, Point3D, write_model, rotmat2qvec
import meshlab_xml_writer as mxw
from tqdm import tqdm
from pyntcloud import PyntCloud
from scipy.spatial.transform import Rotation, Slerp
from scipy.interpolate import interp1d
from wrappers import FFMpeg
parser = ArgumentParser(description='Convert EuroC dataset to COLMAP',
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--root', metavar='DIR', type=Path, help='path to root folder eof EuRoC, where V[N]_[M]_[difficulty] folders should be')
parser.add_argument('--output_dir', metavar='DIR', default=None, type=Path)
parser.add_argument('--pointcloud_to_colmap', action='store_true')
parser.add_argument('--colmap_format', choices=['.txt', '.bin'], default='.txt')
parser.add_argument("--ffmpeg", default="ffmpeg", type=Path)
parser.add_argument('--log', default=None, type=Path)
parser.add_argument('-v', '--verbose', action="count", default=0)
def get_cam(yaml_path, cam_id):
with open(yaml_path) as f:
cam_dict = yaml.load(f, Loader=yaml.SafeLoader)
calib = cam_dict["T_BS"]
calib_matrix = np.array(calib["data"]).reshape((calib["rows"], calib["cols"]))
assert cam_dict["distortion_model"] == "radial-tangential"
w, h = cam_dict["resolution"]
cam = Camera(id=cam_id,
model="OPENCV",
width=w,
height=h,
params=np.array(cam_dict["intrinsics"] + cam_dict["distortion_coefficients"]))
return cam, calib_matrix
def get_vicon_calib(yaml_path):
with open(yaml_path) as f:
vicon_dict = yaml.load(f, Loader=yaml.SafeLoader)
calib = vicon_dict["T_BS"]
return np.array(calib["data"]).reshape((calib["rows"], calib["cols"]))
def create_image(img_id, cam_id, file_path, drone_tvec, drone_matrix, image_calib, vicon_calib):
drone_full_matrix = np.concatenate((np.hstack((drone_matrix, drone_tvec[:, None])), np.array([0, 0, 0, 1]).reshape(1, 4)))
image_matrix = drone_full_matrix @ np.linalg.inv(vicon_calib) @ image_calib
colmap_matrix = np.linalg.inv(image_matrix)
colmap_qvec = rotmat2qvec(colmap_matrix[:3, :3])
colmap_tvec = colmap_matrix[:3, -1]
return Image(id=img_id, qvec=colmap_qvec, tvec=colmap_tvec,
camera_id=cam_id, name=file_path,
xys=[], point3D_ids=[]), image_matrix[:3, -1]
def convert_cloud(input_dir, output_dir):
cloud_path = input_dir / "data.ply"
if not cloud_path.isfile():
return None
cloud = PyntCloud.from_file(cloud_path)
cloud.points = cloud.points[['x', 'y', 'z', 'intensity']]
yaml_path = input_dir / "sensor.yaml"
with open(yaml_path) as f:
cloud_dict = yaml.load(f, Loader=yaml.SafeLoader)
calib = cloud_dict["T_WR"]
transform = np.array(calib["data"]).reshape((calib["rows"], calib["cols"]))
output_ply = output_dir / "data.ply"
mxw.create_project(output_dir / 'data.mlp', [output_ply], labels=None, transforms=[transform])
cloud.to_file(output_ply)
return cloud
def main():
args = parser.parse_args()
scenes = ["V1", "V2"]
ffmpeg = FFMpeg(args.ffmpeg, verbose=args.verbose, logfile=args.log)
for s in scenes:
pointcloud = None
lidar_output = args.output_dir / s / "Lidar"
video_output = args.output_dir / s / "Videos"
lidar_output.makedirs_p()
video_output.makedirs_p()
(args.output_dir / s / "Pictures").makedirs_p()
colmap_model = {"cams": {},
"imgs": {},
"points": {}}
video_sequences = sorted(args.root.dirs("{}*".format(s)))
cam_id = 0
for v in video_sequences:
mav = v / "mav0"
cam_dirs = [mav/"cam0", mav/"cam1"]
vicon_dir = mav/"state_groundtruth_estimate0"
if pointcloud is None:
cloud = convert_cloud(mav/"pointcloud0", lidar_output)
vicon_poses = pd.read_csv(vicon_dir/"data.csv")
vicon_poses = vicon_poses.set_index("#timestamp")
min_ts, max_ts = min(vicon_poses.index), max(vicon_poses.index)
t_prefix = " p_RS_R_{} [m]"
q_prefix = " q_RS_{} []"
drone_tvec = vicon_poses[[t_prefix.format(dim) for dim in 'xyz']].values
drone_qvec = Rotation.from_quat(vicon_poses[[q_prefix.format(dim) for dim in 'xyzw']].values)
drone_qvec_slerp = Slerp(vicon_poses.index, drone_qvec)
drone_tvec_interp = interp1d(vicon_poses.index, drone_tvec.T)
vicon_calib = get_vicon_calib(vicon_dir/"sensor.yaml")
for cam in cam_dirs:
output_video_file = video_output/"{}_{}.mp4".format(v.stem, cam.stem)
image_georef = []
image_rel_paths = []
image_ids = []
qvecs = []
print("Converting camera {} from video {}...".format(cam.relpath(v), v))
if len(colmap_model["imgs"].keys()) == 0:
last_image_id = 0
else:
last_image_id = max(colmap_model["imgs"].keys()) + 1
colmap_cam, cam_calib = get_cam(cam/"sensor.yaml", cam_id)
colmap_model["cams"][cam_id] = colmap_cam
metadata = pd.read_csv(cam/"data.csv").sort_values(by=['#timestamp [ns]'])
metadata["camera_model"] = "OPENCV"
metadata["width"] = colmap_cam.width
metadata["height"] = colmap_cam.height
metadata["camera_params"] = [tuple(colmap_cam.params)] * len(metadata)
metadata["time"] = metadata['#timestamp [ns]']
metadata = metadata[(metadata['time'] > min_ts) & (metadata['time'] < max_ts)]
tvec_interpolated = drone_tvec_interp(metadata['time']).T
qvec_interpolated = drone_qvec_slerp(metadata['time'])
# Convert time from nanoseconds to microseconds for compatibility
metadata['time'] = metadata['time'] * 1e-3
for img_id, (filename, current_tvec, current_qvec) in tqdm(enumerate(zip(metadata["filename"].values,
tvec_interpolated,
qvec_interpolated)),
total=len(metadata)):
final_path = args.root.relpathto(cam / "data") / filename
image_rel_paths.append(final_path)
colmap_model["imgs"][img_id + last_image_id], georef = create_image(img_id + last_image_id, cam_id,
final_path, current_tvec,
current_qvec.as_matrix(),
cam_calib, vicon_calib)
image_georef.append(georef)
image_ids.append(img_id + last_image_id)
qvecs.append(current_qvec.as_quat())
metadata['x'], metadata['y'], metadata['z'] = np.array(image_georef).transpose()
qvecs_array = np.array(qvecs).transpose()
for coord, title in zip(qvecs_array, 'xyzw'):
metadata['frame_quat_{}'.format(title)] = coord
metadata['image_path'] = image_rel_paths
metadata['location_valid'] = True
metadata['indoor'] = True
metadata['video'] = cam
framerate = len(metadata) / np.ptp(metadata['time'].values * 1e-6)
metadata['framerate'] = framerate
# Copy images for ffmpeg
for i, f in enumerate(metadata["filename"]):
(cam / "data" / f).copy(video_output / "tmp_{:05d}.png".format(i))
glob_pattern = str(video_output / "tmp_%05d.png")
ffmpeg.create_video(output_video_file, glob_pattern, fps=framerate, glob=False)
frames_to_delete = video_output.files("tmp*")
for f in frames_to_delete:
f.remove()
# Save metadata in csv file
metadata_file_path = output_video_file.parent / "{}_metadata.csv".format(output_video_file.stem)
metadata.to_csv(metadata_file_path)
cam_id += 1
points = {}
if args.pointcloud_to_colmap and cloud is not None:
subsample = 1
print("Converting ...")
npy_points = cloud.points[['x', 'y', 'z', 'intensity']].values[::subsample]
for id_point, row in tqdm(enumerate(npy_points), total=len(npy_points)):
xyz = row[:3]
gray_level = int(row[-1]*255)
rgb = np.array([gray_level] * 3)
points[id_point] = Point3D(id=id_point, xyz=xyz, rgb=rgb,
error=0, image_ids=np.array([]),
point2D_idxs=np.array([]))
with open(args.output_dir/"images.txt", "w") as f1, open(args.root/"georef.txt", "w") as f2:
for path, pos in zip(image_rel_paths, image_georef):
f1.write(path + "\n")
f2.write("{} {} {} {}\n".format(path, *pos))
colmap_output = args.output_dir / s / "colmap_from_GT"
colmap_output.makedirs_p()
write_model(colmap_model["cams"],
colmap_model["imgs"],
colmap_model["points"],
colmap_output,
args.colmap_format)
if __name__ == '__main__':
main()
......@@ -3,7 +3,7 @@ import numpy as np
from path import Path
import yaml
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from colmap.read_model import Image, Camera, Point3D, write_model, qvec2rotmat, rotmat2qvec
from colmap_util.read_model import Image, Camera, Point3D, write_model, qvec2rotmat, rotmat2qvec
from tqdm import tqdm
from pyntcloud import PyntCloud
from itertools import islice
......
......@@ -35,6 +35,8 @@ def erosion(width, mask):
@torch.no_grad()
def extract_sky_mask(network, image_paths, mask_folder):
images = np.stack([imageio.imread(i) for i in image_paths])
if len(images.shape) == 3:
images = np.stack(3 * [images], axis=-1)
b, h, w, _ = images.shape
image_tensor = torch.from_numpy(images).float()/255
image_tensor = image_tensor.permute(0, 3, 1, 2) # shape [B, C, H, W]
......
......@@ -62,7 +62,7 @@ def main():
pdraw = PDraw(args.nw, verbose=args.verbose, logfile=args.log)
env["pdraw"] = pdraw
eth3d = ETH3D(args.eth3d, args.raw_output_folder / "Images", args.max_occlusion_depth,
verbose=args.verbose, logfile=args.log)
verbose=args.verbose, logfile=args.log, splat_radius=args.eth3d_splat_radius)
env["eth3d"] = eth3d
pcl_util = PCLUtil(args.pcl_util, verbose=args.verbose, logfile=args.log)
env["pcl_util"] = pcl_util
......
......@@ -33,7 +33,7 @@ def main():
pdraw = PDraw(args.nw, verbose=args.verbose, logfile=args.log)
env["pdraw"] = pdraw
eth3d = ETH3D(args.eth3d, args.raw_output_folder / "Images", args.max_occlusion_depth,
verbose=args.verbose, logfile=args.log)
verbose=args.verbose, logfile=args.log, splat_radius=args.eth3d_splat_radius)
env["eth3d"] = eth3d
pcl_util = PCLUtil(args.pcl_util, verbose=args.verbose, logfile=args.log)
env["pcl_util"] = pcl_util
......
......@@ -103,7 +103,7 @@ int main (int argc, char** argv)
std::vector<int> image_idx = input_vis_points.at(nn_indices[0]);
output_vis_points.push_back(image_idx);
}else{
output_vis_points.push_back(std::vector<int>)
output_vis_points.push_back(std::vector<int>());
}
}
......
......@@ -278,7 +278,8 @@ def generate_GT(video_name, raw_output_folder, images_root_folder, video_frames_
kitti_format_folder, viz_folder,
metadata, interpolated_frames,
visualization=True, video=True, downscale=4, threads=8, **env)
interpolated_frames_list.copy(kitti_format_folder)
if filter_models:
interpolated_frames_list.copy(kitti_format_folder)
if save_space:
(raw_output_folder / "occlusion_depth" / video_name.stem).rmtree_p()
......
This diff is collapsed.
......@@ -4,11 +4,12 @@ from .default_wrapper import Wrapper
class ETH3D(Wrapper):
"""docstring for Colmap"""
def __init__(self, build_folder, image_path, max_occlusion_depth, *args, **kwargs):
def __init__(self, build_folder, image_path, max_occlusion_depth, splat_radius, *args, **kwargs):
super().__init__(None, *args, **kwargs)
self.build_folder = build_folder
self.image_path = image_path
self.max_occlusion_depth = max_occlusion_depth
self.splat_radius = splat_radius
def __call__(self, options):
self.binary = self.build_folder / options[0]
......@@ -49,6 +50,7 @@ class ETH3D(Wrapper):
"--image_base_path", self.image_path, "--state_path", colmap_model,
"--output_folder_path", output_folder,
"--max_occlusion_depth", str(self.max_occlusion_depth),
"--splat_radius", str(self.splat_radius),
"--write_point_cloud", "1" if point_cloud else "0",
"--write_depth_maps", "1" if depth_maps else "0",
"--write_occlusion_depth", "1" if occlusion_maps else "0",
......@@ -65,7 +67,8 @@ class ETH3D(Wrapper):
image_path = self.image_path
options = ["DatasetInspector", "--scan_alignment_path", scan_meshlab,
"--image_base_path", image_path, "--state_path", colmap_model,
"--max_occlusion_depth", str(self.max_occlusion_depth)]
"--max_occlusion_depth", str(self.max_occlusion_depth),
"--splat_radius", str(self.splat_radius)]
if occlusions is not None:
options += ["--occlusion_mesh_path", occlusions]
if splats is not None:
......
......@@ -53,10 +53,10 @@ class FFMpeg(Wrapper):
frac_to_float(json_cam[0]["r_frame_rate"]),
int(json_cam[0]["nb_frames"]))
def create_video(self, video_path, glob_pattern, fps=30):
ffmpeg_options = ["-y", "-r", str(fps),
"-pattern_type", "glob", "-i",
glob_pattern, video_path]
def create_video(self, video_path, input_string, glob=True, fps=30):
ffmpeg_options = ["-y", "-r", "{:.2f}".format(fps)] + \
(["-pattern_type", "glob"] if glob else []) + \
["-i", input_string, video_path]
self.__call__(ffmpeg_options)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment