Delete convert_to_nuscenes.py

2026-03-29 19:24:23 +08:00
parent d6668beff5
commit 1750a1fb4f
1 changed files with 0 additions and 730 deletions
--- a/convert_to_nuscenes.py
+++ b/convert_to_nuscenes.py
@@ -1,730 +0,0 @@
-"""
-Convert private dataset to NuScenes-style .pkl and .coco.json files
-compatible with MMDetection3D.
-
-Coordinate system note
----------------------
-Your dataset: X-right, Y-forward, Z-up  (same as NuScenes LiDAR frame)
-  -> No rotation is needed for the point cloud.
-  -> lidar == ego == global  (identity transforms, no vehicle odometry available)
-
-Projection matrix note
----------------------
-Your camera_raw stores a 3×4 matrix  P = K @ [R | t]
-where K is the 3×3 intrinsic and [R|t] maps points from the LiDAR frame
-into the camera frame.
-We decompose P via RQ-decomposition to recover K (intrinsic) and [R|t]
-(lidar-to-camera extrinsic).
-
-Usage
-----
-python convert_to_nuscenes.py \
-    --root   /path/to/your/dataset \
-    --out    /path/to/output/dir  \
-    --split  0.8          # fraction used for training (rest = val)
-    --tag    mydata       # prefix for output file names
-"""
-
-import os
-import sys
-import json
-import uuid
-import pickle
-import argparse
-import numpy as np
-from pathlib import Path
-from PIL import Image
-from typing import Dict, List, Tuple, Optional
-
-# ---------------------------------------------------------------------------
-# Inline minimal copy of the dataset loader so the script is self-contained.
-# If you already have my_dataset.py on PYTHONPATH you can remove this block
-# and simply do:  from my_dataset import MyDataset, SampleData, FrameData
-# ---------------------------------------------------------------------------
-import glob
-import open3d as o3d
-from dataclasses import dataclass, field
-from tqdm import tqdm
-
-
-@dataclass
-class FrameData:
-    frame_index: int
-    lidar_points: np.ndarray          # (N,3) float32
-    image_paths: Dict[str, str]
-    camera_raw: Dict[str, dict]       # cam_name -> {projection_matrix, image_size}
-    labels: List[dict]
-    contain_labels: bool = False
-
-
-@dataclass
-class SampleData:
-    sample_id: str
-    frames: Dict[int, FrameData]
-
-
-class MyDataset:
-    cam2filename = {
-        "front_120":       "scanofilm_surround_front_120_8M.jpg",
-        "front_left_100":  "scanofilm_surround_front_left_100_2M.jpg",
-        "front_right_100": "scanofilm_surround_front_right_100_2M.jpg",
-        "rear_100":        "scanofilm_surround_rear_100_2M.jpg",
-        "rear_left_100":   "scanofilm_surround_rear_left_100_2M.jpg",
-        "rear_right_100":  "scanofilm_surround_rear_right_100_2M.jpg",
-    }
-    color2class = {
-        "#5414ED": "car", "#F6EE64": "pick-up-truck", "#F6A087": "small-truck",
-        "#BC4EF1": "truck", "#4E9AF1": "bus", "#F1A94E": "special-vehicle",
-        "#E1DFDD": "ignore", "#F91906": "tricyclist-withrider",
-        "#FA5F51": "tricyclist-withoutrider", "#B8CB30": "bicycle-withrider",
-        "#E6FD4E": "bicycle-withoutrider", "#876363": "people",
-        "#2CBDF5": "crowd-people", "#C9F52C": "crowd-bicycle",
-        "#DC6788": "crowd-car", "#6EC913": "traffic-cone",
-        "#0DDE69": "plastic-barrier", "#8260D2": "crash-barrels",
-        "#F1D1D1": "warning-triangle", "#FE6DF4": "crowd-traffic-cone",
-        "#D1AA35": "crowd-plastic-barrier", "#3BE8D0": "crowd-crash-barrels",
-        "#2B7567": "crowd-warning-triangle",
-    }
-
-    def __init__(self, root_folder: str):
-        self.root = root_folder
-        self.class2color = {v: k for k, v in self.color2class.items()}
-        self._samples: Dict[str, SampleData] = {}
-        self._load_all_samples()
-
-    @staticmethod
-    def _extract_id(folder_name: str) -> str:
-        return ''.join(c for c in os.path.basename(folder_name) if c.isdigit())
-
-    def _find_sample_folders(self):
-        return [f for f in glob.glob(os.path.join(self.root, "sample*"))
-                if os.path.isdir(f)]
-
-    def _find_paths_in_sample(self, sample_folder, sample_id):
-        pcd_folder = os.path.join(sample_folder, f"pcd_sequence{sample_id}")
-        pcd_files  = glob.glob(os.path.join(pcd_folder, "*.pcd")) if os.path.isdir(pcd_folder) else []
-        img_folders = [os.path.join(sample_folder, str(i)) for i in range(len(pcd_files))]
-        return {
-            "pcd_folder":  pcd_folder,
-            "img_folders": img_folders,
-            "page_json":   os.path.join(sample_folder, f"test{sample_id}.json"),
-            "mark_json":   os.path.join(sample_folder, f"test{sample_id}-mark.json"),
-        }
-
-    def _read_lidar_points(self, pcd_path):
-        try:
-            pcd = o3d.io.read_point_cloud(pcd_path, remove_nan_points=True,
-                                          remove_infinite_points=True, format="auto")
-            return np.asarray(pcd.points, dtype=np.float32)
-        except Exception as e:
-            print(f"Error loading {pcd_path}: {e}")
-            return None
-
-    def _load_all_lidar_frames(self, pcd_folder):
-        out = {}
-        for f in glob.glob(os.path.join(pcd_folder, "*.pcd")):
-            idx = int(os.path.basename(f).split(".")[0].split("n")[-1])
-            pts = self._read_lidar_points(f)
-            if pts is not None:
-                out[idx] = pts
-        return out
-
-    def _load_image_paths(self, img_folders):
-        out = {}
-        for folder in img_folders:
-            if not os.path.isdir(folder):
-                continue
-            idx = int(os.path.basename(folder))
-            paths = {cam: os.path.join(folder, fname)
-                     for cam, fname in self.cam2filename.items()
-                     if os.path.isfile(os.path.join(folder, fname))}
-            if paths:
-                out[idx] = paths
-        return out
-
-    def _load_camera_raw(self, page_json_path):
-        if not os.path.isfile(page_json_path):
-            return {}
-        with open(page_json_path, "r", encoding="utf-8") as f:
-            data = json.load(f)
-        try:
-            extend_source = data['data']['files'][0]['extendSources']
-        except (KeyError, IndexError):
-            return {}
-        out = {}
-        for d in extend_source:
-            pe = d.get('pageElement', {})
-            if "sensor" not in pe:
-                continue
-            try:
-                idx = int(d['fileName'].split('.')[0])
-            except Exception:
-                continue
-            cam_name = pe['sensor'].replace("ofilm_surround_", "")
-            for suf in ["_2M", "_8M"]:
-                if cam_name.endswith(suf):
-                    cam_name = cam_name[:-len(suf)]
-                    break
-            if cam_name not in self.cam2filename:
-                continue
-            matrix = pe.get('mtx', [])
-            image_size = pe.get('imageSize', [])
-            if not matrix or not image_size:
-                continue
-            matrix = np.array(matrix, dtype=np.float64)
-            if matrix.shape != (3, 4):
-                continue
-            out.setdefault(idx, {})[cam_name] = {
-                "projection_matrix": matrix,
-                "image_size": image_size,
-            }
-        return out
-
-    def _extract_text_label(self, json_path):
-        with open(json_path, 'r', encoding='utf-8') as f:
-            data = json.load(f)['data']['list'][0]
-        idx2rate = {k: float(v['accuracy'].replace('%', '')) / 100.0
-                    for k, v in data['rate'].items()}
-        max_idx = max(idx2rate, key=idx2rate.get)
-        if idx2rate[max_idx] == 1.0:
-            return data['result'][max_idx]
-        return []
-
-    def _process_text_label(self, json_path):
-        label_list = self._extract_text_label(json_path)
-        if not label_list:
-            return {}
-        out = {}
-        for entry in label_list:
-            out[entry['index']] = self._process_frame_labels(entry['value'])
-        return out
-
-    def _process_frame_labels(self, label_list):
-        labels = []
-        for lb in label_list:
-            nl = {
-                'num':       lb['num'],
-                'class':     lb['label']['class-name'],
-                'is_moving': not bool(lb['label']['static']),
-                'isolation': bool(lb['label']['isolation']),
-                'color':     self.class2color.get(lb['label']['class-name'], ''),
-                'points':    [[float(lb['newPoints'][i]['x']),
-                               float(lb['newPoints'][i]['y']),
-                               float(lb['newPoints'][i]['z'])] for i in range(8)],
-                'center':    [float(lb['x']), float(lb['y']), float(lb['z'])],
-                'rotateZ':   float(lb['rotateZ']),
-                'dx':        float(lb['width']),
-                'dy':        float(lb['height']),
-                'dz':        float(lb['depth']),
-            }
-            labels.append(nl)
-        return labels
-
-    def _load_one_sample(self, sample_folder):
-        sid = self._extract_id(sample_folder)
-        paths = self._find_paths_in_sample(sample_folder, sid)
-        lidar_dict = self._load_all_lidar_frames(paths["pcd_folder"])
-        img_dict   = self._load_image_paths(paths["img_folders"])
-        cam_dict   = self._load_camera_raw(paths["page_json"])
-        label_dict = self._process_text_label(paths["mark_json"])
-        if not (len(label_dict) == len(lidar_dict) == len(img_dict) == len(cam_dict)):
-            print(f"Sample {sid}: frame count mismatch, skipping.")
-            return None
-        frames = {idx: FrameData(
-            frame_index=idx,
-            lidar_points=lidar_dict[idx],
-            image_paths=img_dict[idx],
-            camera_raw=cam_dict[idx],
-            labels=label_dict[idx],
-            contain_labels=len(label_dict[idx]) > 0,
-        ) for idx in range(len(lidar_dict))}
-        return SampleData(sample_id=sid, frames=frames) if frames else None
-
-    def _load_all_samples(self):
-        for sf in tqdm(self._find_sample_folders(), desc="Loading samples"):
-            s = self._load_one_sample(sf)
-            if s is not None:
-                self._samples[s.sample_id] = s
-        print(f"Loaded {len(self._samples)} samples.")
-
-    @property
-    def sample_ids(self):
-        return list(self._samples.keys())
-
-    def get_sample(self, sid):
-        return self._samples.get(sid)
-
-    def __len__(self):
-        return len(self._samples)
-
-
-# ===========================================================================
-# Helper math utilities
-# ===========================================================================
-
-def rq_decompose(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    """
-    Decompose a 3×4 projection matrix P = K @ [R | t]
-    Returns K (3×3 upper-triangular intrinsic), R (3×3 rotation), t (3,).
-    """
-    M = P[:, :3]          # 3×3 left block
-    # RQ decomposition via QR on the transpose
-    Q, R = np.linalg.qr(np.linalg.inv(M).T)
-    R_mat = np.linalg.inv(R.T)   # upper triangular -> intrinsic
-    Q_mat = Q.T                   # rotation
-
-    # Enforce positive diagonal on K
-    signs = np.sign(np.diag(R_mat))
-    signs[signs == 0] = 1.0
-    D = np.diag(signs)
-    R_mat = D @ R_mat
-    Q_mat = D @ Q_mat
-
-    # Recover t
-    t = np.linalg.inv(R_mat) @ P[:, 3]
-
-    return R_mat, Q_mat, t
-
-
-def rotation_matrix_to_quaternion(R: np.ndarray) -> List[float]:
-    """Convert 3×3 rotation matrix to quaternion [w, x, y, z]."""
-    trace = R[0, 0] + R[1, 1] + R[2, 2]
-    if trace > 0:
-        s = 0.5 / np.sqrt(trace + 1.0)
-        w = 0.25 / s
-        x = (R[2, 1] - R[1, 2]) * s
-        y = (R[0, 2] - R[2, 0]) * s
-        z = (R[1, 0] - R[0, 1]) * s
-    elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]:
-        s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2])
-        w = (R[2, 1] - R[1, 2]) / s
-        x = 0.25 * s
-        y = (R[0, 1] + R[1, 0]) / s
-        z = (R[0, 2] + R[2, 0]) / s
-    elif R[1, 1] > R[2, 2]:
-        s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2])
-        w = (R[0, 2] - R[2, 0]) / s
-        x = (R[0, 1] + R[1, 0]) / s
-        y = 0.25 * s
-        z = (R[1, 2] + R[2, 1]) / s
-    else:
-        s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1])
-        w = (R[1, 0] - R[0, 1]) / s
-        x = (R[0, 2] + R[2, 0]) / s
-        y = (R[1, 2] + R[2, 1]) / s
-        z = 0.25 * s
-    return [w, x, y, z]
-
-
-def identity_quaternion() -> List[float]:
-    return [1.0, 0.0, 0.0, 0.0]
-
-
-def project_3d_to_2d(points_3d: np.ndarray, P: np.ndarray) -> np.ndarray:
-    """
-    Project Nx3 LiDAR points to image using 3×4 P.
-    Returns Nx2 pixel coordinates; filters out points behind camera.
-    """
-    N = points_3d.shape[0]
-    pts_h = np.hstack([points_3d, np.ones((N, 1))])   # Nx4
-    proj  = (P @ pts_h.T).T                            # Nx3
-    valid = proj[:, 2] > 0
-    uv = proj[valid, :2] / proj[valid, 2:3]
-    return uv, valid
-
-
-def count_points_in_box(points: np.ndarray, cx, cy, cz, dx, dy, dz, yaw) -> int:
-    """Count LiDAR points inside an axis-aligned-then-rotated bounding box."""
-    cos_y, sin_y = np.cos(-yaw), np.sin(-yaw)
-    R_inv = np.array([[cos_y, -sin_y, 0],
-                      [sin_y,  cos_y, 0],
-                      [0,      0,     1]], dtype=np.float32)
-    pts_centered = points - np.array([cx, cy, cz], dtype=np.float32)
-    pts_rot = (R_inv @ pts_centered.T).T
-    mask = ((np.abs(pts_rot[:, 0]) <= dx / 2) &
-            (np.abs(pts_rot[:, 1]) <= dy / 2) &
-            (np.abs(pts_rot[:, 2]) <= dz / 2))
-    return int(mask.sum())
-
-
-# ---------------------------------------------------------------------------
-# NuScenes camera name mapping
-# ---------------------------------------------------------------------------
-CAM_NAME_MAP = {
-    "front_120":       "CAM_FRONT",
-    "front_left_100":  "CAM_FRONT_LEFT",
-    "front_right_100": "CAM_FRONT_RIGHT",
-    "rear_100":        "CAM_BACK",
-    "rear_left_100":   "CAM_BACK_LEFT",
-    "rear_right_100":  "CAM_BACK_RIGHT",
-}
-
-# ---------------------------------------------------------------------------
-# Class mapping: private dataset labels -> NuScenes labels
-# ---------------------------------------------------------------------------
-PRIVATE2NUSCENES = {
-    "car":                    "car",
-    "pick-up-truck":          "car",
-    "small-truck":            "truck",
-    "truck":                  "truck",
-    "bus":                    "bus",
-    "ignore":                 "car",
-    "special-vehicle":        "construction_vehicle",
-    "tricyclist-withrider":   "bicycle",
-    "tricyclist-withoutrider":"bicycle",
-    "bicycle-withrider":      "bicycle",
-    "bicycle-withoutrider":   "bicycle",
-    "people":                 "pedestrian",
-    "crowd-people":           "pedestrian",
-    "crowd-bicycle":          "bicycle",
-    "crowd-car":              "car",
-    "traffic-cone":           "traffic_cone",
-    "plastic-barrier":        "barrier",
-    "crash-barrels":          "barrier",
-    "warning-triangle":       "barrier",
-    "crowd-traffic-cone":     "barrier",
-    "crowd-plastic-barrier":  "barrier",
-    "crowd-crash-barrels":    "barrier",
-    "crowd-warning-triangle": "barrier",
-}
-
-# Official NuScenes detection classes (in canonical order)
-DETECTION_CLASSES = [
-    "car", "truck", "bus", "construction_vehicle", "motorcycle",
-    "bicycle", "pedestrian", "traffic_cone", "barrier", "trailer",
-]
-CLASS2ID = {c: i for i, c in enumerate(DETECTION_CLASSES)}
-
-
-# ===========================================================================
-# Core conversion
-# ===========================================================================
-
-def frame_to_info(frame: FrameData, sample_id: str, out_lidar_dir: str,
-                  out_img_dir: str, rel_lidar_root: str, rel_img_root: str) -> dict:
-    """
-    Convert one FrameData into a NuScenes-style info dict.
-
-    Since we have no vehicle poses, we treat lidar = ego = global (identity).
-    """
-    token = str(uuid.uuid4())
-
-    # ---- Save point cloud as .bin (float32 x,y,z) -------------------------
-    os.makedirs(out_lidar_dir, exist_ok=True)
-    bin_name = f"{sample_id}_frame{frame.frame_index:04d}.bin"
-    bin_path  = os.path.join(out_lidar_dir, bin_name)
-    frame.lidar_points.astype(np.float32).tofile(bin_path)
-    lidar_path_rel = os.path.join(rel_lidar_root, bin_name)
-
-    # ---- Identity ego/global poses ----------------------------------------
-    zero3  = [0.0, 0.0, 0.0]
-    id_quat = identity_quaternion()
-
-    # ---- Camera info -------------------------------------------------------
-    cams_info = {}
-    target_W, target_H = 1920, 1280
-
-    for cam_key, nus_cam in CAM_NAME_MAP.items():
-        if cam_key not in frame.camera_raw:
-            continue
-        cam_data = frame.camera_raw[cam_key]
-        P = cam_data["projection_matrix"]   # 3×4
-        ori_W, ori_H = cam_data["image_size"]
-
-        # Decompose P -> K (3×3 intrinsic), R_cam_from_lidar, t_cam_from_lidar
-        K, R_c2l_inv, t_in_cam = rq_decompose(P)
-        # R_c2l_inv is R such that x_cam = R @ x_lidar + t
-        R_lidar2cam = R_c2l_inv          # camera_from_lidar rotation
-        t_lidar2cam = t_in_cam           # translation in camera coords
-
-        # sensor2lidar = inverse of lidar2camera
-        R_cam2lidar  = R_lidar2cam.T
-        t_cam2lidar  = -R_cam2lidar @ t_lidar2cam
-
-        # adjust intrinsic matrix
-        sx = target_W / ori_W
-        sy = target_H / ori_H
-        K_adjusted = K.copy()
-        K_adjusted[0, 0] *= sx
-        K_adjusted[1, 1] *= sy
-        K_adjusted[0, 2] *= sx
-        K_adjusted[1, 2] *= sy
-
-        # Copy image to output directory
-        src_img = frame.image_paths[cam_key]
-        cam_img_dir = os.path.join(out_img_dir, nus_cam)
-        os.makedirs(cam_img_dir, exist_ok=True)
-        img_name = f"{sample_id}_frame{frame.frame_index:04d}.jpg"
-        dst_img  = os.path.join(cam_img_dir, img_name)
-
-        img = Image.open(src_img)
-        img_resized = img.resize((target_W, target_H), Image.LANCZOS)
-        img_resized.save(dst_img)
-        img_path_rel = os.path.join(rel_img_root, nus_cam, img_name)
-
-        cam_token = str(uuid.uuid4())
-        cams_info[nus_cam] = {
-            "data_path":                img_path_rel,
-            "type":                     nus_cam,
-            "sample_data_token":        cam_token,
-            # camera -> ego  (treat camera as fixed to lidar frame via R_cam2lidar)
-            "sensor2ego_translation":   t_cam2lidar.tolist(),
-            "sensor2ego_rotation":      rotation_matrix_to_quaternion(R_cam2lidar),
-            # ego -> global  (identity)
-            "ego2global_translation":   zero3,
-            "ego2global_rotation":      id_quat,
-            # lidar -> camera extrinsic (for projection)
-            "sensor2lidar_translation": t_cam2lidar.tolist(),
-            "sensor2lidar_rotation":    rotation_matrix_to_quaternion(R_cam2lidar),
-            # intrinsic
-            "cam_intrinsic":            K_adjusted.tolist(),
-            "width":                    target_W,
-            "height":                   target_H,
-            "timestamp":                frame.frame_index * 100000,  # synthetic
-        }
-
-    # ---- Annotations -------------------------------------------------------
-    instances = []
-    for lb in frame.labels:
-        raw_cls = lb.get("class", "")
-        cls = PRIVATE2NUSCENES.get(raw_cls)   # remap to NuScenes name
-        if cls is None or cls not in CLASS2ID:
-            continue
-        cx, cy, cz = lb["center"]
-        dx, dy, dz = lb["dx"], lb["dy"], lb["dz"]
-        yaw = lb["rotateZ"]
-        n_pts = count_points_in_box(frame.lidar_points, cx, cy, cz, dx, dy, dz, yaw)
-
-        instances.append({
-        "bbox_3d":          [cx, cy, cz, dx, dy, dz, yaw],
-        "bbox_label_3d":    CLASS2ID[cls],
-        "bbox":             [0.0, 0.0, 1.0, 1.0],   # no 2D GT available
-        "bbox_label":       CLASS2ID[cls],
-        "velocity":         [float("nan"), float("nan")],
-        "num_lidar_pts":    n_pts,
-        "bbox_3d_isvalid":  n_pts > 0,
-        })
-
-    info = {
-        "lidar_points": {
-        "lidar_path":   lidar_path_rel,
-        "num_pts_feats": 3,                  # X, Y, Z only
-        },
-        "token":                  token,
-        "sweeps":                 [],   # no sweep data available
-        "cams":                   cams_info,
-        # lidar == ego (identity)
-        "lidar2ego_translation":  zero3,
-        "lidar2ego_rotation":     id_quat,
-        # ego == global (identity)
-        "ego2global_translation": zero3,
-        "ego2global_rotation":    id_quat,
-        "timestamp":              frame.frame_index * 100000,
-        # annotations
-        "instances":              instances,
-    }
-    return info
-
-
-def build_coco_json(infos: List[dict]) -> dict:
-    """Build a COCO-style mono3d JSON from NuScenes info dicts."""
-    categories = [{"id": i, "name": c} for i, c in enumerate(DETECTION_CLASSES)]
-    images     = []
-    annotations = []
-    ann_id = 0
-
-    for info in infos:
-        frame_token = info["token"]
-        for nus_cam, cam_info in info["cams"].items():
-            img_token = cam_info["sample_data_token"]
-            W = cam_info["width"]
-            H = cam_info["height"]
-            K = np.array(cam_info["cam_intrinsic"])          # 3×3
-
-            img_entry = {
-                "file_name":           cam_info["data_path"],
-                "id":                  img_token,
-                "token":               frame_token,
-                "cam2ego_rotation":    cam_info["sensor2ego_rotation"],
-                "cam2ego_translation": cam_info["sensor2ego_translation"],
-                "ego2global_rotation":    info["ego2global_rotation"],
-                "ego2global_translation": info["ego2global_translation"],
-                "cam_intrinsic":       cam_info["cam_intrinsic"],
-                "width":  W,
-                "height": H,
-            }
-            images.append(img_entry)
-
-            # Build sensor2lidar matrix for projecting boxes
-            t_c2l = np.array(cam_info["sensor2lidar_translation"])
-            quat  = cam_info["sensor2lidar_rotation"]   # [w,x,y,z]
-            w, x, y, z = quat
-            R_c2l = np.array([
-                [1-2*(y*y+z*z),   2*(x*y-z*w),   2*(x*z+y*w)],
-                [2*(x*y+z*w),   1-2*(x*x+z*z),   2*(y*z-x*w)],
-                [2*(x*z-y*w),     2*(y*z+x*w), 1-2*(x*x+y*y)],
-            ])
-            R_l2c = R_c2l.T
-            t_l2c = -R_l2c @ t_c2l
-
-            # Iterate over the valid "instances" key generated by frame_to_info
-            for inst in info.get("instances", []):
-                cls_id = inst["bbox_label_3d"]
-                if cls_id < 0 or cls_id >= len(DETECTION_CLASSES):
-                    continue
-                
-                cls = DETECTION_CLASSES[cls_id]
-                cx_l, cy_l, cz_l, dx, dy, dz, yaw_l = inst["bbox_3d"]
-
-                # Transform center to camera frame
-                center_l = np.array([cx_l, cy_l, cz_l])
-                center_c = R_l2c @ center_l + t_l2c
-                depth    = center_c[2]
-                if depth <= 0:
-                    continue   # box behind camera
-
-                # Project 3D center to 2D
-                center_2d = K @ center_c
-                u = center_2d[0] / center_2d[2]
-                v = center_2d[1] / center_2d[2]
-
-                # Yaw in camera frame (approx: only rotate around Z in lidar)
-                yaw_cam = yaw_l  
-
-                # 3D box annotation: [cx_cam, cy_cam, cz_cam, dx, dy, dz, yaw_cam]
-                bbox_cam3d = [float(center_c[0]), float(center_c[1]), float(center_c[2]),
-                              float(dx), float(dy), float(dz), float(yaw_cam)]
-
-                # Project 8 corners to get 2D bbox
-                corners_l  = _box_corners(cx_l, cy_l, cz_l, dx, dy, dz, yaw_l)
-                corners_c  = (R_l2c @ corners_l.T).T + t_l2c
-                corners_c  = corners_c[corners_c[:, 2] > 0]
-                if len(corners_c) == 0:
-                    continue
-                proj_h = (K @ corners_c.T).T
-                uvs    = proj_h[:, :2] / proj_h[:, 2:3]
-                x1, y1 = uvs[:, 0].min(), uvs[:, 1].min()
-                x2, y2 = uvs[:, 0].max(), uvs[:, 1].max()
-                x1 = max(0, min(x1, W)); x2 = max(0, min(x2, W))
-                y1 = max(0, min(y1, H)); y2 = max(0, min(y2, H))
-                bw, bh = x2 - x1, y2 - y1
-                if bw <= 0 or bh <= 0:
-                    continue
-
-                # Extract velocity from the instance dict
-                vx, vy = inst.get("velocity", [float("nan"), float("nan")])
-                
-                ann = {
-                    "file_name":      cam_info["data_path"],
-                    "image_id":       img_token,
-                    "area":           float(bw * bh),
-                    "category_name":  cls,
-                    "category_id":    cls_id,
-                    "bbox":           [float(x1), float(y1), float(bw), float(bh)],
-                    "iscrowd":        0,
-                    "bbox_cam3d":     bbox_cam3d,
-                    "velo_cam3d":     [vx, vy],
-                    "center2d":       [float(u), float(v), float(depth)],
-                    "attribute_name": "",
-                    "attribute_id":   -1,
-                    "id":             ann_id,
-                }
-                annotations.append(ann)
-                ann_id += 1
-
-    return {"categories": categories, "images": images, "annotations": annotations}
-
-
-def _box_corners(cx, cy, cz, dx, dy, dz, yaw) -> np.ndarray:
-    """Return 8 corners of a 3D box as (8,3) array."""
-    hx, hy, hz = dx / 2, dy / 2, dz / 2
-    corners = np.array([
-        [ hx,  hy,  hz], [ hx,  hy, -hz], [ hx, -hy,  hz], [ hx, -hy, -hz],
-        [-hx,  hy,  hz], [-hx,  hy, -hz], [-hx, -hy,  hz], [-hx, -hy, -hz],
-    ], dtype=np.float32)
-    c, s = np.cos(yaw), np.sin(yaw)
-    Rz = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=np.float32)
-    corners = (Rz @ corners.T).T + np.array([cx, cy, cz], dtype=np.float32)
-    return corners
-
-
-# ===========================================================================
-# Main
-# ===========================================================================
-
-def main():
-    parser = argparse.ArgumentParser(description="Convert private dataset to NuScenes-style PKL/JSON.")
-    parser.add_argument("--root",  default="sample", help="Root folder of your dataset")
-    parser.add_argument("--out",   default="my_dataset", help="Output directory")
-    parser.add_argument("--split", type=float, default=0.8, help="Train fraction (0-1)")
-    parser.add_argument("--tag",   default="mydata", help="Output file prefix")
-    args = parser.parse_args()
-
-    out_root     = Path(args.out)
-    lidar_dir    = out_root / "samples" / "LIDAR_TOP"
-    img_dir      = out_root / "samples"
-    rel_lidar    = "samples/LIDAR_TOP"
-    rel_img      = "samples"
-
-    print("Loading dataset ...")
-    ds = MyDataset(args.root)
-
-    # Collect all (sample_id, frame_index) pairs
-    all_frames = []
-    for sid in sorted(ds.sample_ids):
-        sample = ds.get_sample(sid)
-        for fidx in sorted(sample.frames.keys()):
-            all_frames.append((sid, fidx, sample.frames[fidx]))
-
-    print(f"Total frames: {len(all_frames)}")
-    n_train = int(len(all_frames) * args.split)
-    train_frames = all_frames[:n_train]
-    val_frames   = all_frames[n_train:]
-
-    def convert_split(frames, split_name):
-        infos = []
-        for sid, fidx, frame in tqdm(frames, desc=f"Converting {split_name}"):
-            info = frame_to_info(
-                frame, sid,
-                out_lidar_dir=str(lidar_dir),
-                out_img_dir=str(img_dir),
-                rel_lidar_root=rel_lidar,
-                rel_img_root=rel_img,
-            )
-            infos.append(info)
-
-        pkl_path  = out_root / f"{args.tag}_infos_{split_name}.pkl"
-        json_path = out_root / f"{args.tag}_infos_{split_name}_mono3d.coco.json"
-
-        with open(pkl_path, "wb") as f:
-            pickle.dump({
-                "metainfo": {
-                    "version": "custom-v1.0",
-                    "classes": DETECTION_CLASSES,
-                },
-                "data_list": infos,
-            }, f)
-        print(f"Saved {pkl_path}  ({len(infos)} frames)")
-
-        coco = build_coco_json(infos)
-        with open(json_path, "w", encoding="utf-8") as f:
-            json.dump(coco, f, ensure_ascii=False, indent=2)
-        print(f"Saved {json_path}  ({len(coco['images'])} images, {len(coco['annotations'])} anns)")
-
-    convert_split(train_frames, "train")
-    convert_split(val_frames,   "val")
-
-    # Write class list for reference
-    classes_path = out_root / f"{args.tag}_classes.txt"
-    classes_path.write_text("\n".join(DETECTION_CLASSES))
-    print(f"\nDone. Output structure:\n  {out_root}/")
-    print(f"  ├── samples/LIDAR_TOP/*.bin")
-    print(f"  ├── samples/CAM_FRONT/*.jpg  (and other cameras)")
-    print(f"  ├── {args.tag}_infos_train.pkl")
-    print(f"  ├── {args.tag}_infos_val.pkl")
-    print(f"  ├── {args.tag}_infos_train_mono3d.coco.json")
-    print(f"  └── {args.tag}_infos_val_mono3d.coco.json")
-
-
-if __name__ == "__main__":
-    main()