convert_dataset.py 15.8 KB
Newer Older
1
2
3
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from path import Path
from imageio import imread, imwrite
Clément Pinard's avatar
Clément Pinard committed
4
from skimage.transform import rescale, resize
5
6
7
8
9
10
11
12
13
from skimage.measure import block_reduce
from colmap_util import read_model as rm
import numpy as np
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from tqdm import tqdm
from wrappers import FFMpeg
import gzip
from pebble import ProcessPool
14
import yaml
15
from itertools import product
16
import pandas as pd
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def rescale_and_save_cameras(cameras, images, output_dir, output_width=None, downscale=None):
    def rescale_camera(cam):
        if downscale is None:
            current_downscale = output_width / cam.width
        else:
            current_downscale = downscale
        if 'SIMPLE' in cam.model or 'RADIAL' in cam.model:
            cam.params[:3] /= current_downscale
        else:
            cam.params[:4] /= current_downscale

        return cam._replace(width=int(cam.width//current_downscale),
                            height=int(cam.height//current_downscale))

    def construct_intrinsics(cam):
Clément Pinard's avatar
Clément Pinard committed
34
        # assert('PINHOLE' in cam.model)
35
        if 'SIMPLE' in cam.model or 'RADIAL' in cam.model:
36
            fx, cx, cy = cam.params
37
38
            fy = fx
        else:
39
            fx, fy, cx, cy, *_ = cam.params
40

41
42
        return np.array([[fx, 0, cx],
                         [0, fy, cy],
43
44
                         [0, 0, 1]])

45
    def save_cam(cam, intrinsics_path, yaml_path):
46
        intrinsics = construct_intrinsics(cam)
47
48
49
        np.savetxt(intrinsics_path, intrinsics)
        with open(yaml_path, 'w') as f:
            camera_dict = {"model": cam.model,
50
                           "params": cam.params.tolist(),
51
52
                           "width": cam.width,
                           "height": cam.height}
53
            yaml.dump(camera_dict, f, default_flow_style=False)
54
        return cam
55

56
    rescaled_cameras = {}
57
    if len(cameras) == 1:
58
59
60
        key = list(cameras.keys())[0]
        cam = cameras[key]
        rescaled_cameras[key] = rescale_camera(cam)
61
62
        save_cam(cam, output_dir / "intrinsics.txt", output_dir / "camera.yaml")

63
64
    else:
        for _, img in images.items():
65
66
67
68
69
70
71
72
73
            try:
                cam = rescaled_cameras[img.camera_id]
            except KeyError:
                cam = rescale_camera(cameras[img.camera_id])
                rescaled_cameras[img.camera_id] = cam
            finally:
                save_cam(cam, output_dir / Path(img.name).stem + "_intrinsics.txt",
                         output_dir / Path(img.name).stem + "_camera.yaml")
    return rescaled_cameras
74
75
76
77
78
79
80
81
82


def to_transform_matrix(q, t):
    cam_R = rm.qvec2rotmat(q).T
    cam_t = (- cam_R @ t).reshape(3, 1)
    transform = np.vstack((np.hstack([cam_R, cam_t]), [0, 0, 0, 1]))
    return transform


83
def save_poses(images, images_list, output_dir):
84
    starting_pos = None
85
86
87
88
89
90
91
92
93
94
    poses = []
    for i in images_list:
        try:
            img = images[i]
            current_pos = to_transform_matrix(img.qvec, img.tvec)
            if starting_pos is None:
                starting_pos = current_pos
            relative_position = np.linalg.inv(starting_pos) @ current_pos
            poses.append(relative_position[:3])
        except KeyError:
95
            # Frame is not registered so we put NaN coordinates instead
96
97
98
99
            poses.append(np.full((3, 4), np.NaN))
    poses = np.stack(poses)
    np.savetxt(output_dir/'poses.txt', poses.reshape((len(images_list), -1)))
    return poses
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146


def high_res_colormap(low_res_cmap, resolution=1000, max_value=1):
    # Construct the list colormap, with interpolated values for higer resolution
    # For a linear segmented colormap, you can just specify the number of point in
    # cm.get_cmap(name, lutsize) with the parameter lutsize
    x = np.linspace(0, 1, low_res_cmap.N)
    low_res = low_res_cmap(x)
    new_x = np.linspace(0, max_value, resolution)
    high_res = np.stack([np.interp(new_x, x, low_res[:, i]) for i in range(low_res.shape[1])], axis=1)
    return ListedColormap(high_res)


def opencv_rainbow(resolution=1000):
    # Construct the opencv equivalent of Rainbow
    opencv_rainbow_data = (
        (0.000, (1.00, 0.00, 0.00)),
        (0.400, (1.00, 1.00, 0.00)),
        (0.600, (0.00, 1.00, 0.00)),
        (0.800, (0.00, 0.00, 1.00)),
        (1.000, (0.60, 0.00, 1.00))
    )

    return LinearSegmentedColormap.from_list('opencv_rainbow', opencv_rainbow_data, resolution)


COLORMAPS = {'rainbow': opencv_rainbow(),
             'magma': high_res_colormap(cm.get_cmap('magma')),
             'bone': cm.get_cmap('bone', 10000)}


def apply_cmap_and_resize(depth, colormap, downscale):
    downscale_depth = block_reduce(depth, (downscale, downscale), np.min)
    finite_depth = depth[depth < np.inf]
    if finite_depth.size != 0:
        max_d = depth[depth < np.inf].max()
        depth_norm = downscale_depth/max_d
        depth_norm[downscale_depth == np.inf] = 1
    else:
        depth_norm = np.ones_like(downscale_depth)

    depth_viz = COLORMAPS[colormap](depth_norm)[:, :, :3]
    depth_viz[downscale_depth == np.inf] = 0
    return downscale_depth, depth_viz*255


def process_one_frame(img_path, depth_path, occ_path,
Clément Pinard's avatar
Clément Pinard committed
147
                      dataset_output_dir, video_output_dir, downscale, interpolated,
Clément Pinard's avatar
Clément Pinard committed
148
                      visualization=False, viz_width=1920, compressed=True):
149
    img = imread(img_path)
Clément Pinard's avatar
Clément Pinard committed
150
151
152
153
154
    if len(img.shape) == 3:
        h, w, _ = img.shape
    elif len(img.shape) == 2:
        h, w = img.shape
        img = img.reshape(h, w, 1)
Clément Pinard's avatar
Clément Pinard committed
155
156
157
    assert(viz_width % 2 == 0)
    viz_height = int(viz_width * h / (2*w)) * 2
    output_img = np.zeros((viz_height, viz_width, 3), dtype=np.uint8)
158
159
    rescaled_img = rescale(img, 1/downscale, multichannel=True)*255
    imwrite(dataset_output_dir / img_path.basename(), rescaled_img.astype(np.uint8))
Clément Pinard's avatar
Clément Pinard committed
160
161
162
163
164

    if visualization:
        viz_img = resize(img, (viz_height//2, viz_width//2))*255
        # Img goes to upper left corner of visualization
        output_img[:viz_height//2, :viz_width//2] = viz_img
165
    if depth_path is not None:
Clément Pinard's avatar
Clément Pinard committed
166
        with gzip.open(depth_path, "rb") if compressed else open(depth_path, "rb") as f:
167
            depth = np.frombuffer(f.read(), np.float32).reshape(h, w)
168
        output_depth_name = dataset_output_dir / img_path.stem + '.npy'
169
170
171
        downscaled_depth, viz = apply_cmap_and_resize(depth, 'rainbow', downscale)
        if not interpolated:
            np.save(output_depth_name, downscaled_depth)
Clément Pinard's avatar
Clément Pinard committed
172
173
174
175
176
177
178
179
180
181
        if visualization:
            viz_rescaled = resize(viz, (viz_height//2, viz_width//2))
            # Depth colormap goes to upper right corner
            output_img[:viz_height//2, viz_width//2:] = viz_rescaled
            # Mix Depth / image goest to lower left corner
            output_img[viz_height//2:, :viz_width//2] = \
                output_img[:viz_height//2, :viz_width//2]//2 + \
                output_img[:viz_height//2, viz_width//2:]//2

    if occ_path is not None and visualization:
Clément Pinard's avatar
Clément Pinard committed
182
        with gzip.open(occ_path, "rb") if compressed else open(occ_path, "rb") as f:
183
184
            occ = np.frombuffer(f.read(), np.float32).reshape(h, w)
        _, occ_viz = apply_cmap_and_resize(occ, 'bone', downscale)
Clément Pinard's avatar
Clément Pinard committed
185
186
187
        occ_viz_rescaled = resize(occ_viz, (viz_height//2, viz_width//2))
        # Occlusion depthmap visualization goes to lower right corner
        output_img[viz_height//2:, viz_width//2:] = occ_viz_rescaled
188
189
190
    if interpolated:
        output_img[:5] = output_img[-5:] = output_img[:, :5] = output_img[:, -5:] = [255, 128, 0]

Clément Pinard's avatar
Clément Pinard committed
191
192
    if visualization:
        imwrite(video_output_dir/img_path.stem + '.png', output_img)
193
194


Clément Pinard's avatar
Clément Pinard committed
195
parser = ArgumentParser(description='Convert dataset to KITTI format, optionnally create a visualization video',
196
197
                        formatter_class=ArgumentDefaultsHelpFormatter)

Clément Pinard's avatar
Clément Pinard committed
198
199
200
201
202
203
204
205
206
parser.add_argument('--depth_dir', metavar='DIR', type=Path, required=True,
                    help='folder where depth maps generated by ETH3D are stored Usually ends with  "ground_truth_depth/<video name>"')
parser.add_argument('--images_root_folder', metavar='DIR', type=Path, required=True,
                    help='folder where video frames are stored')
parser.add_argument('--occ_dir', metavar='DIR', type=Path,
                    help='folder where occlusion depth maps generated by ETH3D are stored. Usually ends with "occlusion_depth/<video name>"')
parser.add_argument('--metadata_path', type=Path, required=True,
                    help='path to metadata CSV file generated during video_to_colmap.py')
parser.add_argument('--dataset_output_dir', metavar='DIR', default=None, type=Path, required=True)
Clément Pinard's avatar
Clément Pinard committed
207
parser.add_argument('--video_output_dir', metavar='DIR', default=None, type=Path)
Clément Pinard's avatar
Clément Pinard committed
208
parser.add_argument('--interpolated_frames_path', metavar='TXT', type=Path)
Clément Pinard's avatar
Clément Pinard committed
209
parser.add_argument('--final_model', metavar='DIR', type=Path)
Clément Pinard's avatar
Clément Pinard committed
210
211
212
213
214
215
parser.add_argument('--visualize', action='store_true',
                    help='If selected, will generate images with depth colorized for visualization purpose')
parser.add_argument('--video', action='store_true',
                    help='If selected, will generate a video from visualization images')
parser.add_argument('--downscale', type=int, default=1, help='How much ground truth depth is downscaled in order to save space')
parser.add_argument('--threads', '-j', type=int, default=8, help='')
Clément Pinard's avatar
Clément Pinard committed
216
217
218
parser.add_argument('--compressed', action='store_true',
                    help='Indicates if GroundTruthCreator was used with option `--compress_depth_maps`')
parser.add_argument('--verbose', '-v', action='count', default=0)
219
220


Clément Pinard's avatar
Clément Pinard committed
221
def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir,
222
223
224
                    dataset_output_dir, video_output_dir, ffmpeg,
                    interpolated_frames=[], metadata=None, images_list=None,
                    threads=8, downscale=None, compressed=True,
Clément Pinard's avatar
Clément Pinard committed
225
                    width=None, visualization=False, video=False, verbose=0, **env):
226
227
    dataset_output_dir.makedirs_p()
    video_output_dir.makedirs_p()
Clément Pinard's avatar
Clément Pinard committed
228
229
    if video:
        visualization = True
230
    cameras_colmap, images_colmap, _ = rm.read_model(final_model, '.txt')
231
232
233
234
235
236
    # image_df = pd.DataFrame.from_dict(images, orient="index").set_index("id")

    if metadata is not None:
        metadata = metadata.set_index("db_id", drop=False).sort_values("time")
        framerate = metadata["framerate"].values[0]
        # image_df = image_df.reindex(metadata.index)
Clément Pinard's avatar
Clément Pinard committed
237
        images_list = metadata["image_path"].values
238
239
240
241
    else:
        assert images_list is not None
        framerate = None
        video = False
Clément Pinard's avatar
Clément Pinard committed
242

243
    # Discard images and cameras that are not represented by the image list
244
245
246
    images_colmap = {i.name: i for k, i in images_colmap.items() if i.name in images_list}
    cameras_ids = set([i.camera_id for i in images_colmap.values()])
    cameras_colmap = {k: cameras_colmap[k] for k in cameras_ids}
Clément Pinard's avatar
Clément Pinard committed
247

248
249
    if downscale is None:
        assert width is not None
250
251
252
253
254
    rescaled_cameras = rescale_and_save_cameras(cameras_colmap,
                                                images_colmap,
                                                dataset_output_dir,
                                                width, downscale)
    poses = save_poses(images_colmap, images_list, dataset_output_dir)
255
256
257
258
259

    depth_maps = []
    occ_maps = []
    interpolated = []
    imgs = []
260
    registered = []
261

262
    for i in images_list:
Clément Pinard's avatar
Clément Pinard committed
263
        img_path = images_root_folder / i
264
265
266
        imgs.append(img_path)

        fname = img_path.basename()
Clément Pinard's avatar
Clément Pinard committed
267
268
269
270
271
        depth_path = depth_dir / fname
        occ_path = occ_dir / fname
        if compressed:
            depth_path += ".gz"
            occ_path += ".gz"
272
273
        if i in images_colmap:
            assert depth_path.isfile()
274
            registered.append(True)
Clément Pinard's avatar
Clément Pinard committed
275
276
277
278
            if occ_path.isfile():
                occ_maps.append(occ_path)
            else:
                occ_maps.append(None)
279
            depth_maps.append(depth_path)
Clément Pinard's avatar
Clément Pinard committed
280
281
282
283
284
285
            if i in interpolated_frames:
                if verbose > 2:
                    print("Image {} was interpolated".format(fname))
                interpolated.append(True)
            else:
                interpolated.append(False)
286
        else:
Clément Pinard's avatar
Clément Pinard committed
287
288
            if verbose > 2:
                print("Image {} was not registered".format(fname))
289
            registered.append(False)
290
291
            depth_maps.append(None)
            occ_maps.append(None)
Clément Pinard's avatar
Clément Pinard committed
292
            interpolated.append(False)
293
294
295
296
297
298
    print('{}/{} Frames not registered ({:.2f}%)'.format(len(images_list) - sum(registered),
                                                         len(images_list),
                                                         100*(1 - sum(registered)/len(images_list))))
    print('{}/{} Frames interpolated ({:.2f}%)'.format(sum(interpolated),
                                                       len(images_list),
                                                       100*sum(interpolated)/len(images_list)))
299
300
    if threads == 1:
        for i, d, o, n in tqdm(zip(imgs, depth_maps, occ_maps, interpolated), total=len(imgs)):
Clément Pinard's avatar
Clément Pinard committed
301
            process_one_frame(i, d, o, dataset_output_dir, video_output_dir, downscale, n, visualization, viz_width=1920)
302
303
304
305
    else:
        with ProcessPool(max_workers=threads) as pool:
            tasks = pool.map(process_one_frame, imgs, depth_maps, occ_maps,
                             [dataset_output_dir]*len(imgs), [video_output_dir]*len(imgs),
Clément Pinard's avatar
Clément Pinard committed
306
307
                             [downscale]*len(imgs), interpolated,
                             [visualization]*len(imgs), [1920]*len(imgs))
308
309
310
311
312
313
314
            try:
                for _ in tqdm(tasks.result(), total=len(imgs)):
                    pass
            except KeyboardInterrupt as e:
                tasks.cancel()
                raise e

315
316
317
318
319
320
321
322
323
324
325
326
327
    if metadata is not None:
        wanted_keys = ['image_path', 'time', 'height', 'width', 'camera_model', 'camera_id']
        filtered_metadata = metadata[wanted_keys].copy()
        filtered_metadata['interpolated'] = interpolated
        filtered_metadata['registered'] = registered
        for i, j in product(range(3), range(4)):
            filtered_metadata['pose{}{}'.format(i, j)] = poses[:, i, j]

        filtered_metadata["fx"] = np.NaN
        filtered_metadata["fy"] = np.NaN
        filtered_metadata["cx"] = np.NaN
        filtered_metadata["cy"] = np.NaN
        for cam_id in filtered_metadata["camera_id"].unique():
328
            if cam_id not in rescaled_cameras.keys():
329
                continue
330
            cam = rescaled_cameras[cam_id]
331
            rows = filtered_metadata["camera_id"] == cam_id
332

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
            filtered_metadata.loc[rows, "fx"] = cam.params[0]
            if "SIMPLE" in cam.model or "RADIAL" in cam.model:
                filtered_metadata.loc[rows, "fy"] = cam.params[0]
                filtered_metadata.loc[rows, "cx"] = cam.params[1]
                filtered_metadata.loc[rows, "cy"] = cam.params[2]
            else:
                filtered_metadata.loc[rows, "fy"] = cam.params[1]
                filtered_metadata.loc[rows, "cx"] = cam.params[2]
                filtered_metadata.loc[rows, "cy"] = cam.params[3]
        filtered_metadata.to_csv(dataset_output_dir / 'metadata.csv')

    not_registered = [i + '\n' for i, r in zip(images_list, registered) if not r]
    with open(dataset_output_dir / 'not_registered.txt', 'w') as f:
        f.writelines(not_registered)

348
    if video:
Clément Pinard's avatar
Clément Pinard committed
349
        video_path = str(video_output_dir.parent/'{}_groundtruth_viz.mp4'.format(video_output_dir.stem))
350
        glob_pattern = str(video_output_dir/'*.png')
351
        ffmpeg.create_video(video_path, glob_pattern, True, framerate)
Clément Pinard's avatar
Clément Pinard committed
352
        video_output_dir.rmtree_p()
353
354
355
356
357


if __name__ == '__main__':
    args = parser.parse_args()
    env = vars(args)
358
    env["metadata"] = pd.read_csv(env["metadata_path"])
Clément Pinard's avatar
Clément Pinard committed
359
360
361
362
363
    if args.interpolated_frames_path is None:
        env["interpolated_frames"] = []
    else:
        with open(args.interpolated_frames_path, "r") as f:
            env["interpolated_frames"] = [line[:-1] for line in f.readlines()]
364
365
    env["ffmpeg"] = FFMpeg()
    convert_dataset(**env)