Delete convert_to_nuscenes.py

This commit is contained in:
2026-03-29 19:24:23 +08:00
parent d6668beff5
commit 1750a1fb4f

View File

@@ -1,730 +0,0 @@
"""
Convert private dataset to NuScenes-style .pkl and .coco.json files
compatible with MMDetection3D.
Coordinate system note
----------------------
Your dataset: X-right, Y-forward, Z-up (same as NuScenes LiDAR frame)
-> No rotation is needed for the point cloud.
-> lidar == ego == global (identity transforms, no vehicle odometry available)
Projection matrix note
----------------------
Your camera_raw stores a 3×4 matrix P = K @ [R | t]
where K is the 3×3 intrinsic and [R|t] maps points from the LiDAR frame
into the camera frame.
We decompose P via RQ-decomposition to recover K (intrinsic) and [R|t]
(lidar-to-camera extrinsic).
Usage
-----
python convert_to_nuscenes.py \
--root /path/to/your/dataset \
--out /path/to/output/dir \
--split 0.8 # fraction used for training (rest = val)
--tag mydata # prefix for output file names
"""
import os
import sys
import json
import uuid
import pickle
import argparse
import numpy as np
from pathlib import Path
from PIL import Image
from typing import Dict, List, Tuple, Optional
# ---------------------------------------------------------------------------
# Inline minimal copy of the dataset loader so the script is self-contained.
# If you already have my_dataset.py on PYTHONPATH you can remove this block
# and simply do: from my_dataset import MyDataset, SampleData, FrameData
# ---------------------------------------------------------------------------
import glob
import open3d as o3d
from dataclasses import dataclass, field
from tqdm import tqdm
@dataclass
class FrameData:
frame_index: int
lidar_points: np.ndarray # (N,3) float32
image_paths: Dict[str, str]
camera_raw: Dict[str, dict] # cam_name -> {projection_matrix, image_size}
labels: List[dict]
contain_labels: bool = False
@dataclass
class SampleData:
sample_id: str
frames: Dict[int, FrameData]
class MyDataset:
cam2filename = {
"front_120": "scanofilm_surround_front_120_8M.jpg",
"front_left_100": "scanofilm_surround_front_left_100_2M.jpg",
"front_right_100": "scanofilm_surround_front_right_100_2M.jpg",
"rear_100": "scanofilm_surround_rear_100_2M.jpg",
"rear_left_100": "scanofilm_surround_rear_left_100_2M.jpg",
"rear_right_100": "scanofilm_surround_rear_right_100_2M.jpg",
}
color2class = {
"#5414ED": "car", "#F6EE64": "pick-up-truck", "#F6A087": "small-truck",
"#BC4EF1": "truck", "#4E9AF1": "bus", "#F1A94E": "special-vehicle",
"#E1DFDD": "ignore", "#F91906": "tricyclist-withrider",
"#FA5F51": "tricyclist-withoutrider", "#B8CB30": "bicycle-withrider",
"#E6FD4E": "bicycle-withoutrider", "#876363": "people",
"#2CBDF5": "crowd-people", "#C9F52C": "crowd-bicycle",
"#DC6788": "crowd-car", "#6EC913": "traffic-cone",
"#0DDE69": "plastic-barrier", "#8260D2": "crash-barrels",
"#F1D1D1": "warning-triangle", "#FE6DF4": "crowd-traffic-cone",
"#D1AA35": "crowd-plastic-barrier", "#3BE8D0": "crowd-crash-barrels",
"#2B7567": "crowd-warning-triangle",
}
def __init__(self, root_folder: str):
self.root = root_folder
self.class2color = {v: k for k, v in self.color2class.items()}
self._samples: Dict[str, SampleData] = {}
self._load_all_samples()
@staticmethod
def _extract_id(folder_name: str) -> str:
return ''.join(c for c in os.path.basename(folder_name) if c.isdigit())
def _find_sample_folders(self):
return [f for f in glob.glob(os.path.join(self.root, "sample*"))
if os.path.isdir(f)]
def _find_paths_in_sample(self, sample_folder, sample_id):
pcd_folder = os.path.join(sample_folder, f"pcd_sequence{sample_id}")
pcd_files = glob.glob(os.path.join(pcd_folder, "*.pcd")) if os.path.isdir(pcd_folder) else []
img_folders = [os.path.join(sample_folder, str(i)) for i in range(len(pcd_files))]
return {
"pcd_folder": pcd_folder,
"img_folders": img_folders,
"page_json": os.path.join(sample_folder, f"test{sample_id}.json"),
"mark_json": os.path.join(sample_folder, f"test{sample_id}-mark.json"),
}
def _read_lidar_points(self, pcd_path):
try:
pcd = o3d.io.read_point_cloud(pcd_path, remove_nan_points=True,
remove_infinite_points=True, format="auto")
return np.asarray(pcd.points, dtype=np.float32)
except Exception as e:
print(f"Error loading {pcd_path}: {e}")
return None
def _load_all_lidar_frames(self, pcd_folder):
out = {}
for f in glob.glob(os.path.join(pcd_folder, "*.pcd")):
idx = int(os.path.basename(f).split(".")[0].split("n")[-1])
pts = self._read_lidar_points(f)
if pts is not None:
out[idx] = pts
return out
def _load_image_paths(self, img_folders):
out = {}
for folder in img_folders:
if not os.path.isdir(folder):
continue
idx = int(os.path.basename(folder))
paths = {cam: os.path.join(folder, fname)
for cam, fname in self.cam2filename.items()
if os.path.isfile(os.path.join(folder, fname))}
if paths:
out[idx] = paths
return out
def _load_camera_raw(self, page_json_path):
if not os.path.isfile(page_json_path):
return {}
with open(page_json_path, "r", encoding="utf-8") as f:
data = json.load(f)
try:
extend_source = data['data']['files'][0]['extendSources']
except (KeyError, IndexError):
return {}
out = {}
for d in extend_source:
pe = d.get('pageElement', {})
if "sensor" not in pe:
continue
try:
idx = int(d['fileName'].split('.')[0])
except Exception:
continue
cam_name = pe['sensor'].replace("ofilm_surround_", "")
for suf in ["_2M", "_8M"]:
if cam_name.endswith(suf):
cam_name = cam_name[:-len(suf)]
break
if cam_name not in self.cam2filename:
continue
matrix = pe.get('mtx', [])
image_size = pe.get('imageSize', [])
if not matrix or not image_size:
continue
matrix = np.array(matrix, dtype=np.float64)
if matrix.shape != (3, 4):
continue
out.setdefault(idx, {})[cam_name] = {
"projection_matrix": matrix,
"image_size": image_size,
}
return out
def _extract_text_label(self, json_path):
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)['data']['list'][0]
idx2rate = {k: float(v['accuracy'].replace('%', '')) / 100.0
for k, v in data['rate'].items()}
max_idx = max(idx2rate, key=idx2rate.get)
if idx2rate[max_idx] == 1.0:
return data['result'][max_idx]
return []
def _process_text_label(self, json_path):
label_list = self._extract_text_label(json_path)
if not label_list:
return {}
out = {}
for entry in label_list:
out[entry['index']] = self._process_frame_labels(entry['value'])
return out
def _process_frame_labels(self, label_list):
labels = []
for lb in label_list:
nl = {
'num': lb['num'],
'class': lb['label']['class-name'],
'is_moving': not bool(lb['label']['static']),
'isolation': bool(lb['label']['isolation']),
'color': self.class2color.get(lb['label']['class-name'], ''),
'points': [[float(lb['newPoints'][i]['x']),
float(lb['newPoints'][i]['y']),
float(lb['newPoints'][i]['z'])] for i in range(8)],
'center': [float(lb['x']), float(lb['y']), float(lb['z'])],
'rotateZ': float(lb['rotateZ']),
'dx': float(lb['width']),
'dy': float(lb['height']),
'dz': float(lb['depth']),
}
labels.append(nl)
return labels
def _load_one_sample(self, sample_folder):
sid = self._extract_id(sample_folder)
paths = self._find_paths_in_sample(sample_folder, sid)
lidar_dict = self._load_all_lidar_frames(paths["pcd_folder"])
img_dict = self._load_image_paths(paths["img_folders"])
cam_dict = self._load_camera_raw(paths["page_json"])
label_dict = self._process_text_label(paths["mark_json"])
if not (len(label_dict) == len(lidar_dict) == len(img_dict) == len(cam_dict)):
print(f"Sample {sid}: frame count mismatch, skipping.")
return None
frames = {idx: FrameData(
frame_index=idx,
lidar_points=lidar_dict[idx],
image_paths=img_dict[idx],
camera_raw=cam_dict[idx],
labels=label_dict[idx],
contain_labels=len(label_dict[idx]) > 0,
) for idx in range(len(lidar_dict))}
return SampleData(sample_id=sid, frames=frames) if frames else None
def _load_all_samples(self):
for sf in tqdm(self._find_sample_folders(), desc="Loading samples"):
s = self._load_one_sample(sf)
if s is not None:
self._samples[s.sample_id] = s
print(f"Loaded {len(self._samples)} samples.")
@property
def sample_ids(self):
return list(self._samples.keys())
def get_sample(self, sid):
return self._samples.get(sid)
def __len__(self):
return len(self._samples)
# ===========================================================================
# Helper math utilities
# ===========================================================================
def rq_decompose(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Decompose a 3×4 projection matrix P = K @ [R | t]
Returns K (3×3 upper-triangular intrinsic), R (3×3 rotation), t (3,).
"""
M = P[:, :3] # 3×3 left block
# RQ decomposition via QR on the transpose
Q, R = np.linalg.qr(np.linalg.inv(M).T)
R_mat = np.linalg.inv(R.T) # upper triangular -> intrinsic
Q_mat = Q.T # rotation
# Enforce positive diagonal on K
signs = np.sign(np.diag(R_mat))
signs[signs == 0] = 1.0
D = np.diag(signs)
R_mat = D @ R_mat
Q_mat = D @ Q_mat
# Recover t
t = np.linalg.inv(R_mat) @ P[:, 3]
return R_mat, Q_mat, t
def rotation_matrix_to_quaternion(R: np.ndarray) -> List[float]:
"""Convert 3×3 rotation matrix to quaternion [w, x, y, z]."""
trace = R[0, 0] + R[1, 1] + R[2, 2]
if trace > 0:
s = 0.5 / np.sqrt(trace + 1.0)
w = 0.25 / s
x = (R[2, 1] - R[1, 2]) * s
y = (R[0, 2] - R[2, 0]) * s
z = (R[1, 0] - R[0, 1]) * s
elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]:
s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2])
w = (R[2, 1] - R[1, 2]) / s
x = 0.25 * s
y = (R[0, 1] + R[1, 0]) / s
z = (R[0, 2] + R[2, 0]) / s
elif R[1, 1] > R[2, 2]:
s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2])
w = (R[0, 2] - R[2, 0]) / s
x = (R[0, 1] + R[1, 0]) / s
y = 0.25 * s
z = (R[1, 2] + R[2, 1]) / s
else:
s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1])
w = (R[1, 0] - R[0, 1]) / s
x = (R[0, 2] + R[2, 0]) / s
y = (R[1, 2] + R[2, 1]) / s
z = 0.25 * s
return [w, x, y, z]
def identity_quaternion() -> List[float]:
return [1.0, 0.0, 0.0, 0.0]
def project_3d_to_2d(points_3d: np.ndarray, P: np.ndarray) -> np.ndarray:
"""
Project Nx3 LiDAR points to image using 3×4 P.
Returns Nx2 pixel coordinates; filters out points behind camera.
"""
N = points_3d.shape[0]
pts_h = np.hstack([points_3d, np.ones((N, 1))]) # Nx4
proj = (P @ pts_h.T).T # Nx3
valid = proj[:, 2] > 0
uv = proj[valid, :2] / proj[valid, 2:3]
return uv, valid
def count_points_in_box(points: np.ndarray, cx, cy, cz, dx, dy, dz, yaw) -> int:
"""Count LiDAR points inside an axis-aligned-then-rotated bounding box."""
cos_y, sin_y = np.cos(-yaw), np.sin(-yaw)
R_inv = np.array([[cos_y, -sin_y, 0],
[sin_y, cos_y, 0],
[0, 0, 1]], dtype=np.float32)
pts_centered = points - np.array([cx, cy, cz], dtype=np.float32)
pts_rot = (R_inv @ pts_centered.T).T
mask = ((np.abs(pts_rot[:, 0]) <= dx / 2) &
(np.abs(pts_rot[:, 1]) <= dy / 2) &
(np.abs(pts_rot[:, 2]) <= dz / 2))
return int(mask.sum())
# ---------------------------------------------------------------------------
# NuScenes camera name mapping
# ---------------------------------------------------------------------------
CAM_NAME_MAP = {
"front_120": "CAM_FRONT",
"front_left_100": "CAM_FRONT_LEFT",
"front_right_100": "CAM_FRONT_RIGHT",
"rear_100": "CAM_BACK",
"rear_left_100": "CAM_BACK_LEFT",
"rear_right_100": "CAM_BACK_RIGHT",
}
# ---------------------------------------------------------------------------
# Class mapping: private dataset labels -> NuScenes labels
# ---------------------------------------------------------------------------
PRIVATE2NUSCENES = {
"car": "car",
"pick-up-truck": "car",
"small-truck": "truck",
"truck": "truck",
"bus": "bus",
"ignore": "car",
"special-vehicle": "construction_vehicle",
"tricyclist-withrider": "bicycle",
"tricyclist-withoutrider":"bicycle",
"bicycle-withrider": "bicycle",
"bicycle-withoutrider": "bicycle",
"people": "pedestrian",
"crowd-people": "pedestrian",
"crowd-bicycle": "bicycle",
"crowd-car": "car",
"traffic-cone": "traffic_cone",
"plastic-barrier": "barrier",
"crash-barrels": "barrier",
"warning-triangle": "barrier",
"crowd-traffic-cone": "barrier",
"crowd-plastic-barrier": "barrier",
"crowd-crash-barrels": "barrier",
"crowd-warning-triangle": "barrier",
}
# Official NuScenes detection classes (in canonical order)
DETECTION_CLASSES = [
"car", "truck", "bus", "construction_vehicle", "motorcycle",
"bicycle", "pedestrian", "traffic_cone", "barrier", "trailer",
]
CLASS2ID = {c: i for i, c in enumerate(DETECTION_CLASSES)}
# ===========================================================================
# Core conversion
# ===========================================================================
def frame_to_info(frame: FrameData, sample_id: str, out_lidar_dir: str,
out_img_dir: str, rel_lidar_root: str, rel_img_root: str) -> dict:
"""
Convert one FrameData into a NuScenes-style info dict.
Since we have no vehicle poses, we treat lidar = ego = global (identity).
"""
token = str(uuid.uuid4())
# ---- Save point cloud as .bin (float32 x,y,z) -------------------------
os.makedirs(out_lidar_dir, exist_ok=True)
bin_name = f"{sample_id}_frame{frame.frame_index:04d}.bin"
bin_path = os.path.join(out_lidar_dir, bin_name)
frame.lidar_points.astype(np.float32).tofile(bin_path)
lidar_path_rel = os.path.join(rel_lidar_root, bin_name)
# ---- Identity ego/global poses ----------------------------------------
zero3 = [0.0, 0.0, 0.0]
id_quat = identity_quaternion()
# ---- Camera info -------------------------------------------------------
cams_info = {}
target_W, target_H = 1920, 1280
for cam_key, nus_cam in CAM_NAME_MAP.items():
if cam_key not in frame.camera_raw:
continue
cam_data = frame.camera_raw[cam_key]
P = cam_data["projection_matrix"] # 3×4
ori_W, ori_H = cam_data["image_size"]
# Decompose P -> K (3×3 intrinsic), R_cam_from_lidar, t_cam_from_lidar
K, R_c2l_inv, t_in_cam = rq_decompose(P)
# R_c2l_inv is R such that x_cam = R @ x_lidar + t
R_lidar2cam = R_c2l_inv # camera_from_lidar rotation
t_lidar2cam = t_in_cam # translation in camera coords
# sensor2lidar = inverse of lidar2camera
R_cam2lidar = R_lidar2cam.T
t_cam2lidar = -R_cam2lidar @ t_lidar2cam
# adjust intrinsic matrix
sx = target_W / ori_W
sy = target_H / ori_H
K_adjusted = K.copy()
K_adjusted[0, 0] *= sx
K_adjusted[1, 1] *= sy
K_adjusted[0, 2] *= sx
K_adjusted[1, 2] *= sy
# Copy image to output directory
src_img = frame.image_paths[cam_key]
cam_img_dir = os.path.join(out_img_dir, nus_cam)
os.makedirs(cam_img_dir, exist_ok=True)
img_name = f"{sample_id}_frame{frame.frame_index:04d}.jpg"
dst_img = os.path.join(cam_img_dir, img_name)
img = Image.open(src_img)
img_resized = img.resize((target_W, target_H), Image.LANCZOS)
img_resized.save(dst_img)
img_path_rel = os.path.join(rel_img_root, nus_cam, img_name)
cam_token = str(uuid.uuid4())
cams_info[nus_cam] = {
"data_path": img_path_rel,
"type": nus_cam,
"sample_data_token": cam_token,
# camera -> ego (treat camera as fixed to lidar frame via R_cam2lidar)
"sensor2ego_translation": t_cam2lidar.tolist(),
"sensor2ego_rotation": rotation_matrix_to_quaternion(R_cam2lidar),
# ego -> global (identity)
"ego2global_translation": zero3,
"ego2global_rotation": id_quat,
# lidar -> camera extrinsic (for projection)
"sensor2lidar_translation": t_cam2lidar.tolist(),
"sensor2lidar_rotation": rotation_matrix_to_quaternion(R_cam2lidar),
# intrinsic
"cam_intrinsic": K_adjusted.tolist(),
"width": target_W,
"height": target_H,
"timestamp": frame.frame_index * 100000, # synthetic
}
# ---- Annotations -------------------------------------------------------
instances = []
for lb in frame.labels:
raw_cls = lb.get("class", "")
cls = PRIVATE2NUSCENES.get(raw_cls) # remap to NuScenes name
if cls is None or cls not in CLASS2ID:
continue
cx, cy, cz = lb["center"]
dx, dy, dz = lb["dx"], lb["dy"], lb["dz"]
yaw = lb["rotateZ"]
n_pts = count_points_in_box(frame.lidar_points, cx, cy, cz, dx, dy, dz, yaw)
instances.append({
"bbox_3d": [cx, cy, cz, dx, dy, dz, yaw],
"bbox_label_3d": CLASS2ID[cls],
"bbox": [0.0, 0.0, 1.0, 1.0], # no 2D GT available
"bbox_label": CLASS2ID[cls],
"velocity": [float("nan"), float("nan")],
"num_lidar_pts": n_pts,
"bbox_3d_isvalid": n_pts > 0,
})
info = {
"lidar_points": {
"lidar_path": lidar_path_rel,
"num_pts_feats": 3, # X, Y, Z only
},
"token": token,
"sweeps": [], # no sweep data available
"cams": cams_info,
# lidar == ego (identity)
"lidar2ego_translation": zero3,
"lidar2ego_rotation": id_quat,
# ego == global (identity)
"ego2global_translation": zero3,
"ego2global_rotation": id_quat,
"timestamp": frame.frame_index * 100000,
# annotations
"instances": instances,
}
return info
def build_coco_json(infos: List[dict]) -> dict:
"""Build a COCO-style mono3d JSON from NuScenes info dicts."""
categories = [{"id": i, "name": c} for i, c in enumerate(DETECTION_CLASSES)]
images = []
annotations = []
ann_id = 0
for info in infos:
frame_token = info["token"]
for nus_cam, cam_info in info["cams"].items():
img_token = cam_info["sample_data_token"]
W = cam_info["width"]
H = cam_info["height"]
K = np.array(cam_info["cam_intrinsic"]) # 3×3
img_entry = {
"file_name": cam_info["data_path"],
"id": img_token,
"token": frame_token,
"cam2ego_rotation": cam_info["sensor2ego_rotation"],
"cam2ego_translation": cam_info["sensor2ego_translation"],
"ego2global_rotation": info["ego2global_rotation"],
"ego2global_translation": info["ego2global_translation"],
"cam_intrinsic": cam_info["cam_intrinsic"],
"width": W,
"height": H,
}
images.append(img_entry)
# Build sensor2lidar matrix for projecting boxes
t_c2l = np.array(cam_info["sensor2lidar_translation"])
quat = cam_info["sensor2lidar_rotation"] # [w,x,y,z]
w, x, y, z = quat
R_c2l = np.array([
[1-2*(y*y+z*z), 2*(x*y-z*w), 2*(x*z+y*w)],
[2*(x*y+z*w), 1-2*(x*x+z*z), 2*(y*z-x*w)],
[2*(x*z-y*w), 2*(y*z+x*w), 1-2*(x*x+y*y)],
])
R_l2c = R_c2l.T
t_l2c = -R_l2c @ t_c2l
# Iterate over the valid "instances" key generated by frame_to_info
for inst in info.get("instances", []):
cls_id = inst["bbox_label_3d"]
if cls_id < 0 or cls_id >= len(DETECTION_CLASSES):
continue
cls = DETECTION_CLASSES[cls_id]
cx_l, cy_l, cz_l, dx, dy, dz, yaw_l = inst["bbox_3d"]
# Transform center to camera frame
center_l = np.array([cx_l, cy_l, cz_l])
center_c = R_l2c @ center_l + t_l2c
depth = center_c[2]
if depth <= 0:
continue # box behind camera
# Project 3D center to 2D
center_2d = K @ center_c
u = center_2d[0] / center_2d[2]
v = center_2d[1] / center_2d[2]
# Yaw in camera frame (approx: only rotate around Z in lidar)
yaw_cam = yaw_l
# 3D box annotation: [cx_cam, cy_cam, cz_cam, dx, dy, dz, yaw_cam]
bbox_cam3d = [float(center_c[0]), float(center_c[1]), float(center_c[2]),
float(dx), float(dy), float(dz), float(yaw_cam)]
# Project 8 corners to get 2D bbox
corners_l = _box_corners(cx_l, cy_l, cz_l, dx, dy, dz, yaw_l)
corners_c = (R_l2c @ corners_l.T).T + t_l2c
corners_c = corners_c[corners_c[:, 2] > 0]
if len(corners_c) == 0:
continue
proj_h = (K @ corners_c.T).T
uvs = proj_h[:, :2] / proj_h[:, 2:3]
x1, y1 = uvs[:, 0].min(), uvs[:, 1].min()
x2, y2 = uvs[:, 0].max(), uvs[:, 1].max()
x1 = max(0, min(x1, W)); x2 = max(0, min(x2, W))
y1 = max(0, min(y1, H)); y2 = max(0, min(y2, H))
bw, bh = x2 - x1, y2 - y1
if bw <= 0 or bh <= 0:
continue
# Extract velocity from the instance dict
vx, vy = inst.get("velocity", [float("nan"), float("nan")])
ann = {
"file_name": cam_info["data_path"],
"image_id": img_token,
"area": float(bw * bh),
"category_name": cls,
"category_id": cls_id,
"bbox": [float(x1), float(y1), float(bw), float(bh)],
"iscrowd": 0,
"bbox_cam3d": bbox_cam3d,
"velo_cam3d": [vx, vy],
"center2d": [float(u), float(v), float(depth)],
"attribute_name": "",
"attribute_id": -1,
"id": ann_id,
}
annotations.append(ann)
ann_id += 1
return {"categories": categories, "images": images, "annotations": annotations}
def _box_corners(cx, cy, cz, dx, dy, dz, yaw) -> np.ndarray:
"""Return 8 corners of a 3D box as (8,3) array."""
hx, hy, hz = dx / 2, dy / 2, dz / 2
corners = np.array([
[ hx, hy, hz], [ hx, hy, -hz], [ hx, -hy, hz], [ hx, -hy, -hz],
[-hx, hy, hz], [-hx, hy, -hz], [-hx, -hy, hz], [-hx, -hy, -hz],
], dtype=np.float32)
c, s = np.cos(yaw), np.sin(yaw)
Rz = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=np.float32)
corners = (Rz @ corners.T).T + np.array([cx, cy, cz], dtype=np.float32)
return corners
# ===========================================================================
# Main
# ===========================================================================
def main():
parser = argparse.ArgumentParser(description="Convert private dataset to NuScenes-style PKL/JSON.")
parser.add_argument("--root", default="sample", help="Root folder of your dataset")
parser.add_argument("--out", default="my_dataset", help="Output directory")
parser.add_argument("--split", type=float, default=0.8, help="Train fraction (0-1)")
parser.add_argument("--tag", default="mydata", help="Output file prefix")
args = parser.parse_args()
out_root = Path(args.out)
lidar_dir = out_root / "samples" / "LIDAR_TOP"
img_dir = out_root / "samples"
rel_lidar = "samples/LIDAR_TOP"
rel_img = "samples"
print("Loading dataset ...")
ds = MyDataset(args.root)
# Collect all (sample_id, frame_index) pairs
all_frames = []
for sid in sorted(ds.sample_ids):
sample = ds.get_sample(sid)
for fidx in sorted(sample.frames.keys()):
all_frames.append((sid, fidx, sample.frames[fidx]))
print(f"Total frames: {len(all_frames)}")
n_train = int(len(all_frames) * args.split)
train_frames = all_frames[:n_train]
val_frames = all_frames[n_train:]
def convert_split(frames, split_name):
infos = []
for sid, fidx, frame in tqdm(frames, desc=f"Converting {split_name}"):
info = frame_to_info(
frame, sid,
out_lidar_dir=str(lidar_dir),
out_img_dir=str(img_dir),
rel_lidar_root=rel_lidar,
rel_img_root=rel_img,
)
infos.append(info)
pkl_path = out_root / f"{args.tag}_infos_{split_name}.pkl"
json_path = out_root / f"{args.tag}_infos_{split_name}_mono3d.coco.json"
with open(pkl_path, "wb") as f:
pickle.dump({
"metainfo": {
"version": "custom-v1.0",
"classes": DETECTION_CLASSES,
},
"data_list": infos,
}, f)
print(f"Saved {pkl_path} ({len(infos)} frames)")
coco = build_coco_json(infos)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(coco, f, ensure_ascii=False, indent=2)
print(f"Saved {json_path} ({len(coco['images'])} images, {len(coco['annotations'])} anns)")
convert_split(train_frames, "train")
convert_split(val_frames, "val")
# Write class list for reference
classes_path = out_root / f"{args.tag}_classes.txt"
classes_path.write_text("\n".join(DETECTION_CLASSES))
print(f"\nDone. Output structure:\n {out_root}/")
print(f" ├── samples/LIDAR_TOP/*.bin")
print(f" ├── samples/CAM_FRONT/*.jpg (and other cameras)")
print(f" ├── {args.tag}_infos_train.pkl")
print(f" ├── {args.tag}_infos_val.pkl")
print(f" ├── {args.tag}_infos_train_mono3d.coco.json")
print(f" └── {args.tag}_infos_val_mono3d.coco.json")
if __name__ == "__main__":
main()