""" Convert private dataset to NuScenes-style .pkl and .coco.json files compatible with MMDetection3D. Coordinate system note ---------------------- Your dataset: X-right, Y-forward, Z-up (same as NuScenes LiDAR frame) -> No rotation is needed for the point cloud. -> lidar == ego == global (identity transforms, no vehicle odometry available) Projection matrix note ---------------------- Your camera_raw stores a 3×4 matrix P = K @ [R | t] where K is the 3×3 intrinsic and [R|t] maps points from the LiDAR frame into the camera frame. We decompose P via RQ-decomposition to recover K (intrinsic) and [R|t] (lidar-to-camera extrinsic). Usage ----- python convert_to_nuscenes.py \ --root /path/to/your/dataset \ --out /path/to/output/dir \ --split 0.8 # fraction used for training (rest = val) --tag mydata # prefix for output file names """ import os import sys import json import uuid import pickle import argparse import numpy as np from pathlib import Path from PIL import Image from typing import Dict, List, Tuple, Optional # --------------------------------------------------------------------------- # Inline minimal copy of the dataset loader so the script is self-contained. # If you already have my_dataset.py on PYTHONPATH you can remove this block # and simply do: from my_dataset import MyDataset, SampleData, FrameData # --------------------------------------------------------------------------- import glob import open3d as o3d from dataclasses import dataclass, field from tqdm import tqdm @dataclass class FrameData: frame_index: int lidar_points: np.ndarray # (N,3) float32 image_paths: Dict[str, str] camera_raw: Dict[str, dict] # cam_name -> {projection_matrix, image_size} labels: List[dict] contain_labels: bool = False @dataclass class SampleData: sample_id: str frames: Dict[int, FrameData] class MyDataset: cam2filename = { "front_120": "scanofilm_surround_front_120_8M.jpg", "front_left_100": "scanofilm_surround_front_left_100_2M.jpg", "front_right_100": "scanofilm_surround_front_right_100_2M.jpg", "rear_100": "scanofilm_surround_rear_100_2M.jpg", "rear_left_100": "scanofilm_surround_rear_left_100_2M.jpg", "rear_right_100": "scanofilm_surround_rear_right_100_2M.jpg", } color2class = { "#5414ED": "car", "#F6EE64": "pick-up-truck", "#F6A087": "small-truck", "#BC4EF1": "truck", "#4E9AF1": "bus", "#F1A94E": "special-vehicle", "#E1DFDD": "ignore", "#F91906": "tricyclist-withrider", "#FA5F51": "tricyclist-withoutrider", "#B8CB30": "bicycle-withrider", "#E6FD4E": "bicycle-withoutrider", "#876363": "people", "#2CBDF5": "crowd-people", "#C9F52C": "crowd-bicycle", "#DC6788": "crowd-car", "#6EC913": "traffic-cone", "#0DDE69": "plastic-barrier", "#8260D2": "crash-barrels", "#F1D1D1": "warning-triangle", "#FE6DF4": "crowd-traffic-cone", "#D1AA35": "crowd-plastic-barrier", "#3BE8D0": "crowd-crash-barrels", "#2B7567": "crowd-warning-triangle", } def __init__(self, root_folder: str): self.root = root_folder self.class2color = {v: k for k, v in self.color2class.items()} self._samples: Dict[str, SampleData] = {} self._load_all_samples() @staticmethod def _extract_id(folder_name: str) -> str: return ''.join(c for c in os.path.basename(folder_name) if c.isdigit()) def _find_sample_folders(self): return [f for f in glob.glob(os.path.join(self.root, "sample*")) if os.path.isdir(f)] def _find_paths_in_sample(self, sample_folder, sample_id): pcd_folder = os.path.join(sample_folder, f"pcd_sequence{sample_id}") pcd_files = glob.glob(os.path.join(pcd_folder, "*.pcd")) if os.path.isdir(pcd_folder) else [] img_folders = [os.path.join(sample_folder, str(i)) for i in range(len(pcd_files))] return { "pcd_folder": pcd_folder, "img_folders": img_folders, "page_json": os.path.join(sample_folder, f"test{sample_id}.json"), "mark_json": os.path.join(sample_folder, f"test{sample_id}-mark.json"), } def _read_lidar_points(self, pcd_path): try: pcd = o3d.io.read_point_cloud(pcd_path, remove_nan_points=True, remove_infinite_points=True, format="auto") return np.asarray(pcd.points, dtype=np.float32) except Exception as e: print(f"Error loading {pcd_path}: {e}") return None def _load_all_lidar_frames(self, pcd_folder): out = {} for f in glob.glob(os.path.join(pcd_folder, "*.pcd")): idx = int(os.path.basename(f).split(".")[0].split("n")[-1]) pts = self._read_lidar_points(f) if pts is not None: out[idx] = pts return out def _load_image_paths(self, img_folders): out = {} for folder in img_folders: if not os.path.isdir(folder): continue idx = int(os.path.basename(folder)) paths = {cam: os.path.join(folder, fname) for cam, fname in self.cam2filename.items() if os.path.isfile(os.path.join(folder, fname))} if paths: out[idx] = paths return out def _load_camera_raw(self, page_json_path): if not os.path.isfile(page_json_path): return {} with open(page_json_path, "r", encoding="utf-8") as f: data = json.load(f) try: extend_source = data['data']['files'][0]['extendSources'] except (KeyError, IndexError): return {} out = {} for d in extend_source: pe = d.get('pageElement', {}) if "sensor" not in pe: continue try: idx = int(d['fileName'].split('.')[0]) except Exception: continue cam_name = pe['sensor'].replace("ofilm_surround_", "") for suf in ["_2M", "_8M"]: if cam_name.endswith(suf): cam_name = cam_name[:-len(suf)] break if cam_name not in self.cam2filename: continue matrix = pe.get('mtx', []) image_size = pe.get('imageSize', []) if not matrix or not image_size: continue matrix = np.array(matrix, dtype=np.float64) if matrix.shape != (3, 4): continue out.setdefault(idx, {})[cam_name] = { "projection_matrix": matrix, "image_size": image_size, } return out def _extract_text_label(self, json_path): with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)['data']['list'][0] idx2rate = {k: float(v['accuracy'].replace('%', '')) / 100.0 for k, v in data['rate'].items()} max_idx = max(idx2rate, key=idx2rate.get) if idx2rate[max_idx] == 1.0: return data['result'][max_idx] return [] def _process_text_label(self, json_path): label_list = self._extract_text_label(json_path) if not label_list: return {} out = {} for entry in label_list: out[entry['index']] = self._process_frame_labels(entry['value']) return out def _process_frame_labels(self, label_list): labels = [] for lb in label_list: nl = { 'num': lb['num'], 'class': lb['label']['class-name'], 'is_moving': not bool(lb['label']['static']), 'isolation': bool(lb['label']['isolation']), 'color': self.class2color.get(lb['label']['class-name'], ''), 'points': [[float(lb['newPoints'][i]['x']), float(lb['newPoints'][i]['y']), float(lb['newPoints'][i]['z'])] for i in range(8)], 'center': [float(lb['x']), float(lb['y']), float(lb['z'])], 'rotateZ': float(lb['rotateZ']), 'dx': float(lb['width']), 'dy': float(lb['height']), 'dz': float(lb['depth']), } labels.append(nl) return labels def _load_one_sample(self, sample_folder): sid = self._extract_id(sample_folder) paths = self._find_paths_in_sample(sample_folder, sid) lidar_dict = self._load_all_lidar_frames(paths["pcd_folder"]) img_dict = self._load_image_paths(paths["img_folders"]) cam_dict = self._load_camera_raw(paths["page_json"]) label_dict = self._process_text_label(paths["mark_json"]) if not (len(label_dict) == len(lidar_dict) == len(img_dict) == len(cam_dict)): print(f"Sample {sid}: frame count mismatch, skipping.") return None frames = {idx: FrameData( frame_index=idx, lidar_points=lidar_dict[idx], image_paths=img_dict[idx], camera_raw=cam_dict[idx], labels=label_dict[idx], contain_labels=len(label_dict[idx]) > 0, ) for idx in range(len(lidar_dict))} return SampleData(sample_id=sid, frames=frames) if frames else None def _load_all_samples(self): for sf in tqdm(self._find_sample_folders(), desc="Loading samples"): s = self._load_one_sample(sf) if s is not None: self._samples[s.sample_id] = s print(f"Loaded {len(self._samples)} samples.") @property def sample_ids(self): return list(self._samples.keys()) def get_sample(self, sid): return self._samples.get(sid) def __len__(self): return len(self._samples) # =========================================================================== # Helper math utilities # =========================================================================== def rq_decompose(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Decompose a 3×4 projection matrix P = K @ [R | t] Returns K (3×3 upper-triangular intrinsic), R (3×3 rotation), t (3,). """ M = P[:, :3] # 3×3 left block # RQ decomposition via QR on the transpose Q, R = np.linalg.qr(np.linalg.inv(M).T) R_mat = np.linalg.inv(R.T) # upper triangular -> intrinsic Q_mat = Q.T # rotation # Enforce positive diagonal on K signs = np.sign(np.diag(R_mat)) signs[signs == 0] = 1.0 D = np.diag(signs) R_mat = D @ R_mat Q_mat = D @ Q_mat # Recover t t = np.linalg.inv(R_mat) @ P[:, 3] return R_mat, Q_mat, t def rotation_matrix_to_quaternion(R: np.ndarray) -> List[float]: """Convert 3×3 rotation matrix to quaternion [w, x, y, z].""" trace = R[0, 0] + R[1, 1] + R[2, 2] if trace > 0: s = 0.5 / np.sqrt(trace + 1.0) w = 0.25 / s x = (R[2, 1] - R[1, 2]) * s y = (R[0, 2] - R[2, 0]) * s z = (R[1, 0] - R[0, 1]) * s elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]: s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2]) w = (R[2, 1] - R[1, 2]) / s x = 0.25 * s y = (R[0, 1] + R[1, 0]) / s z = (R[0, 2] + R[2, 0]) / s elif R[1, 1] > R[2, 2]: s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2]) w = (R[0, 2] - R[2, 0]) / s x = (R[0, 1] + R[1, 0]) / s y = 0.25 * s z = (R[1, 2] + R[2, 1]) / s else: s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1]) w = (R[1, 0] - R[0, 1]) / s x = (R[0, 2] + R[2, 0]) / s y = (R[1, 2] + R[2, 1]) / s z = 0.25 * s return [w, x, y, z] def identity_quaternion() -> List[float]: return [1.0, 0.0, 0.0, 0.0] def project_3d_to_2d(points_3d: np.ndarray, P: np.ndarray) -> np.ndarray: """ Project Nx3 LiDAR points to image using 3×4 P. Returns Nx2 pixel coordinates; filters out points behind camera. """ N = points_3d.shape[0] pts_h = np.hstack([points_3d, np.ones((N, 1))]) # Nx4 proj = (P @ pts_h.T).T # Nx3 valid = proj[:, 2] > 0 uv = proj[valid, :2] / proj[valid, 2:3] return uv, valid def count_points_in_box(points: np.ndarray, cx, cy, cz, dx, dy, dz, yaw) -> int: """Count LiDAR points inside an axis-aligned-then-rotated bounding box.""" cos_y, sin_y = np.cos(-yaw), np.sin(-yaw) R_inv = np.array([[cos_y, -sin_y, 0], [sin_y, cos_y, 0], [0, 0, 1]], dtype=np.float32) pts_centered = points - np.array([cx, cy, cz], dtype=np.float32) pts_rot = (R_inv @ pts_centered.T).T mask = ((np.abs(pts_rot[:, 0]) <= dx / 2) & (np.abs(pts_rot[:, 1]) <= dy / 2) & (np.abs(pts_rot[:, 2]) <= dz / 2)) return int(mask.sum()) # --------------------------------------------------------------------------- # NuScenes camera name mapping # --------------------------------------------------------------------------- CAM_NAME_MAP = { "front_120": "CAM_FRONT", "front_left_100": "CAM_FRONT_LEFT", "front_right_100": "CAM_FRONT_RIGHT", "rear_100": "CAM_BACK", "rear_left_100": "CAM_BACK_LEFT", "rear_right_100": "CAM_BACK_RIGHT", } # --------------------------------------------------------------------------- # Class mapping: private dataset labels -> NuScenes labels # --------------------------------------------------------------------------- PRIVATE2NUSCENES = { "car": "car", "pick-up-truck": "car", "small-truck": "truck", "truck": "truck", "bus": "bus", "ignore": "car", "special-vehicle": "construction_vehicle", "tricyclist-withrider": "bicycle", "tricyclist-withoutrider":"bicycle", "bicycle-withrider": "bicycle", "bicycle-withoutrider": "bicycle", "people": "pedestrian", "crowd-people": "pedestrian", "crowd-bicycle": "bicycle", "crowd-car": "car", "traffic-cone": "traffic_cone", "plastic-barrier": "barrier", "crash-barrels": "barrier", "warning-triangle": "barrier", "crowd-traffic-cone": "barrier", "crowd-plastic-barrier": "barrier", "crowd-crash-barrels": "barrier", "crowd-warning-triangle": "barrier", } # Official NuScenes detection classes (in canonical order) DETECTION_CLASSES = [ "car", "truck", "bus", "construction_vehicle", "motorcycle", "bicycle", "pedestrian", "traffic_cone", "barrier", "trailer", ] CLASS2ID = {c: i for i, c in enumerate(DETECTION_CLASSES)} # =========================================================================== # Core conversion # =========================================================================== def frame_to_info(frame: FrameData, sample_id: str, out_lidar_dir: str, out_img_dir: str, rel_lidar_root: str, rel_img_root: str) -> dict: """ Convert one FrameData into a NuScenes-style info dict. Since we have no vehicle poses, we treat lidar = ego = global (identity). """ token = str(uuid.uuid4()) # ---- Save point cloud as .bin (float32 x,y,z) ------------------------- os.makedirs(out_lidar_dir, exist_ok=True) bin_name = f"{sample_id}_frame{frame.frame_index:04d}.bin" bin_path = os.path.join(out_lidar_dir, bin_name) frame.lidar_points.astype(np.float32).tofile(bin_path) lidar_path_rel = os.path.join(rel_lidar_root, bin_name) # ---- Identity ego/global poses ---------------------------------------- zero3 = [0.0, 0.0, 0.0] id_quat = identity_quaternion() # ---- Camera info ------------------------------------------------------- cams_info = {} target_W, target_H = 1920, 1280 for cam_key, nus_cam in CAM_NAME_MAP.items(): if cam_key not in frame.camera_raw: continue cam_data = frame.camera_raw[cam_key] P = cam_data["projection_matrix"] # 3×4 ori_W, ori_H = cam_data["image_size"] # Decompose P -> K (3×3 intrinsic), R_cam_from_lidar, t_cam_from_lidar K, R_c2l_inv, t_in_cam = rq_decompose(P) # R_c2l_inv is R such that x_cam = R @ x_lidar + t R_lidar2cam = R_c2l_inv # camera_from_lidar rotation t_lidar2cam = t_in_cam # translation in camera coords # sensor2lidar = inverse of lidar2camera R_cam2lidar = R_lidar2cam.T t_cam2lidar = -R_cam2lidar @ t_lidar2cam # adjust intrinsic matrix sx = target_W / ori_W sy = target_H / ori_H K_adjusted = K.copy() K_adjusted[0, 0] *= sx K_adjusted[1, 1] *= sy K_adjusted[0, 2] *= sx K_adjusted[1, 2] *= sy # Copy image to output directory src_img = frame.image_paths[cam_key] cam_img_dir = os.path.join(out_img_dir, nus_cam) os.makedirs(cam_img_dir, exist_ok=True) img_name = f"{sample_id}_frame{frame.frame_index:04d}.jpg" dst_img = os.path.join(cam_img_dir, img_name) img = Image.open(src_img) img_resized = img.resize((target_W, target_H), Image.LANCZOS) img_resized.save(dst_img) img_path_rel = os.path.join(rel_img_root, nus_cam, img_name) cam_token = str(uuid.uuid4()) cams_info[nus_cam] = { "data_path": img_path_rel, "type": nus_cam, "sample_data_token": cam_token, # camera -> ego (treat camera as fixed to lidar frame via R_cam2lidar) "sensor2ego_translation": t_cam2lidar.tolist(), "sensor2ego_rotation": rotation_matrix_to_quaternion(R_cam2lidar), # ego -> global (identity) "ego2global_translation": zero3, "ego2global_rotation": id_quat, # lidar -> camera extrinsic (for projection) "sensor2lidar_translation": t_cam2lidar.tolist(), "sensor2lidar_rotation": rotation_matrix_to_quaternion(R_cam2lidar), # intrinsic "cam_intrinsic": K_adjusted.tolist(), "width": target_W, "height": target_H, "timestamp": frame.frame_index * 100000, # synthetic } # ---- Annotations ------------------------------------------------------- instances = [] for lb in frame.labels: raw_cls = lb.get("class", "") cls = PRIVATE2NUSCENES.get(raw_cls) # remap to NuScenes name if cls is None or cls not in CLASS2ID: continue cx, cy, cz = lb["center"] dx, dy, dz = lb["dx"], lb["dy"], lb["dz"] yaw = lb["rotateZ"] n_pts = count_points_in_box(frame.lidar_points, cx, cy, cz, dx, dy, dz, yaw) instances.append({ "bbox_3d": [cx, cy, cz, dx, dy, dz, yaw], "bbox_label_3d": CLASS2ID[cls], "bbox": [0.0, 0.0, 1.0, 1.0], # no 2D GT available "bbox_label": CLASS2ID[cls], "velocity": [float("nan"), float("nan")], "num_lidar_pts": n_pts, "bbox_3d_isvalid": n_pts > 0, }) info = { "lidar_points": { "lidar_path": lidar_path_rel, "num_pts_feats": 3, # X, Y, Z only }, "token": token, "sweeps": [], # no sweep data available "cams": cams_info, # lidar == ego (identity) "lidar2ego_translation": zero3, "lidar2ego_rotation": id_quat, # ego == global (identity) "ego2global_translation": zero3, "ego2global_rotation": id_quat, "timestamp": frame.frame_index * 100000, # annotations "instances": instances, } return info def build_coco_json(infos: List[dict]) -> dict: """Build a COCO-style mono3d JSON from NuScenes info dicts.""" categories = [{"id": i, "name": c} for i, c in enumerate(DETECTION_CLASSES)] images = [] annotations = [] ann_id = 0 for info in infos: frame_token = info["token"] for nus_cam, cam_info in info["cams"].items(): img_token = cam_info["sample_data_token"] W = cam_info["width"] H = cam_info["height"] K = np.array(cam_info["cam_intrinsic"]) # 3×3 img_entry = { "file_name": cam_info["data_path"], "id": img_token, "token": frame_token, "cam2ego_rotation": cam_info["sensor2ego_rotation"], "cam2ego_translation": cam_info["sensor2ego_translation"], "ego2global_rotation": info["ego2global_rotation"], "ego2global_translation": info["ego2global_translation"], "cam_intrinsic": cam_info["cam_intrinsic"], "width": W, "height": H, } images.append(img_entry) # Build sensor2lidar matrix for projecting boxes t_c2l = np.array(cam_info["sensor2lidar_translation"]) quat = cam_info["sensor2lidar_rotation"] # [w,x,y,z] w, x, y, z = quat R_c2l = np.array([ [1-2*(y*y+z*z), 2*(x*y-z*w), 2*(x*z+y*w)], [2*(x*y+z*w), 1-2*(x*x+z*z), 2*(y*z-x*w)], [2*(x*z-y*w), 2*(y*z+x*w), 1-2*(x*x+y*y)], ]) R_l2c = R_c2l.T t_l2c = -R_l2c @ t_c2l # Iterate over the valid "instances" key generated by frame_to_info for inst in info.get("instances", []): cls_id = inst["bbox_label_3d"] if cls_id < 0 or cls_id >= len(DETECTION_CLASSES): continue cls = DETECTION_CLASSES[cls_id] cx_l, cy_l, cz_l, dx, dy, dz, yaw_l = inst["bbox_3d"] # Transform center to camera frame center_l = np.array([cx_l, cy_l, cz_l]) center_c = R_l2c @ center_l + t_l2c depth = center_c[2] if depth <= 0: continue # box behind camera # Project 3D center to 2D center_2d = K @ center_c u = center_2d[0] / center_2d[2] v = center_2d[1] / center_2d[2] # Yaw in camera frame (approx: only rotate around Z in lidar) yaw_cam = yaw_l # 3D box annotation: [cx_cam, cy_cam, cz_cam, dx, dy, dz, yaw_cam] bbox_cam3d = [float(center_c[0]), float(center_c[1]), float(center_c[2]), float(dx), float(dy), float(dz), float(yaw_cam)] # Project 8 corners to get 2D bbox corners_l = _box_corners(cx_l, cy_l, cz_l, dx, dy, dz, yaw_l) corners_c = (R_l2c @ corners_l.T).T + t_l2c corners_c = corners_c[corners_c[:, 2] > 0] if len(corners_c) == 0: continue proj_h = (K @ corners_c.T).T uvs = proj_h[:, :2] / proj_h[:, 2:3] x1, y1 = uvs[:, 0].min(), uvs[:, 1].min() x2, y2 = uvs[:, 0].max(), uvs[:, 1].max() x1 = max(0, min(x1, W)); x2 = max(0, min(x2, W)) y1 = max(0, min(y1, H)); y2 = max(0, min(y2, H)) bw, bh = x2 - x1, y2 - y1 if bw <= 0 or bh <= 0: continue # Extract velocity from the instance dict vx, vy = inst.get("velocity", [float("nan"), float("nan")]) ann = { "file_name": cam_info["data_path"], "image_id": img_token, "area": float(bw * bh), "category_name": cls, "category_id": cls_id, "bbox": [float(x1), float(y1), float(bw), float(bh)], "iscrowd": 0, "bbox_cam3d": bbox_cam3d, "velo_cam3d": [vx, vy], "center2d": [float(u), float(v), float(depth)], "attribute_name": "", "attribute_id": -1, "id": ann_id, } annotations.append(ann) ann_id += 1 return {"categories": categories, "images": images, "annotations": annotations} def _box_corners(cx, cy, cz, dx, dy, dz, yaw) -> np.ndarray: """Return 8 corners of a 3D box as (8,3) array.""" hx, hy, hz = dx / 2, dy / 2, dz / 2 corners = np.array([ [ hx, hy, hz], [ hx, hy, -hz], [ hx, -hy, hz], [ hx, -hy, -hz], [-hx, hy, hz], [-hx, hy, -hz], [-hx, -hy, hz], [-hx, -hy, -hz], ], dtype=np.float32) c, s = np.cos(yaw), np.sin(yaw) Rz = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=np.float32) corners = (Rz @ corners.T).T + np.array([cx, cy, cz], dtype=np.float32) return corners # =========================================================================== # Main # =========================================================================== def main(): parser = argparse.ArgumentParser(description="Convert private dataset to NuScenes-style PKL/JSON.") parser.add_argument("--root", default="sample", help="Root folder of your dataset") parser.add_argument("--out", default="my_dataset", help="Output directory") parser.add_argument("--split", type=float, default=0.8, help="Train fraction (0-1)") parser.add_argument("--tag", default="mydata", help="Output file prefix") args = parser.parse_args() out_root = Path(args.out) lidar_dir = out_root / "samples" / "LIDAR_TOP" img_dir = out_root / "samples" rel_lidar = "samples/LIDAR_TOP" rel_img = "samples" print("Loading dataset ...") ds = MyDataset(args.root) # Collect all (sample_id, frame_index) pairs all_frames = [] for sid in sorted(ds.sample_ids): sample = ds.get_sample(sid) for fidx in sorted(sample.frames.keys()): all_frames.append((sid, fidx, sample.frames[fidx])) print(f"Total frames: {len(all_frames)}") n_train = int(len(all_frames) * args.split) train_frames = all_frames[:n_train] val_frames = all_frames[n_train:] def convert_split(frames, split_name): infos = [] for sid, fidx, frame in tqdm(frames, desc=f"Converting {split_name}"): info = frame_to_info( frame, sid, out_lidar_dir=str(lidar_dir), out_img_dir=str(img_dir), rel_lidar_root=rel_lidar, rel_img_root=rel_img, ) infos.append(info) pkl_path = out_root / f"{args.tag}_infos_{split_name}.pkl" json_path = out_root / f"{args.tag}_infos_{split_name}_mono3d.coco.json" with open(pkl_path, "wb") as f: pickle.dump({ "metainfo": { "version": "custom-v1.0", "classes": DETECTION_CLASSES, }, "data_list": infos, }, f) print(f"Saved {pkl_path} ({len(infos)} frames)") coco = build_coco_json(infos) with open(json_path, "w", encoding="utf-8") as f: json.dump(coco, f, ensure_ascii=False, indent=2) print(f"Saved {json_path} ({len(coco['images'])} images, {len(coco['annotations'])} anns)") convert_split(train_frames, "train") convert_split(val_frames, "val") # Write class list for reference classes_path = out_root / f"{args.tag}_classes.txt" classes_path.write_text("\n".join(DETECTION_CLASSES)) print(f"\nDone. Output structure:\n {out_root}/") print(f" ├── samples/LIDAR_TOP/*.bin") print(f" ├── samples/CAM_FRONT/*.jpg (and other cameras)") print(f" ├── {args.tag}_infos_train.pkl") print(f" ├── {args.tag}_infos_val.pkl") print(f" ├── {args.tag}_infos_train_mono3d.coco.json") print(f" └── {args.tag}_infos_val_mono3d.coco.json") if __name__ == "__main__": main()