Spaces:

lerobot
/

visualize_dataset

Running

App Files Files Community

pepijn223 HF Staff commited on 19 days ago

Commit

e457eba

unverified ·

2 Parent(s): 9f9a669 bde0e45

Merge branch 'main' into feat/speedup_visulization

Browse files

Files changed (31) hide show

.dockerignore +10 -0
.github/workflows/deploy-release.yml +22 -0
.github/workflows/type-check.yml +29 -0
.gitignore +3 -0
Dockerfile +25 -0
README.md +73 -8
bun.lock +0 -0
eslint.config.mjs +6 -0
next.config.ts +1 -1
package.json +5 -1
src/app/[org]/[dataset]/[episode]/episode-viewer.tsx +6 -1
src/app/[org]/[dataset]/[episode]/fetch-data.ts +166 -265
src/app/[org]/[dataset]/page.tsx +4 -4
src/app/explore/explore-grid.tsx +0 -2
src/app/explore/page.tsx +12 -12
src/app/page.tsx +13 -13
src/components/data-recharts.tsx +30 -12
src/components/simple-videos-player.tsx +46 -20
src/components/videos-player.tsx +9 -7
src/types/chart.types.ts +24 -0
src/types/dataset.types.ts +70 -0
src/types/episode.types.ts +68 -0
src/types/index.ts +36 -0
src/types/video.types.ts +19 -0
src/utils/constants.ts +44 -0
src/utils/dataProcessing.ts +222 -0
src/utils/languageInstructions.ts +105 -0
src/utils/parquetUtils.ts +7 -7
src/utils/stringFormatting.ts +98 -0
src/utils/typeGuards.ts +114 -0
src/utils/versionUtils.ts +16 -9

.dockerignore ADDED Viewed

	@@ -0,0 +1,10 @@

+node_modules
+.next
+.git
+.gitignore
+README.md
+.env*.local
+*.log
+.DS_Store
+.vscode
+.idea

.github/workflows/deploy-release.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: Deploy to Hf Hub
+on:
+  push:
+    branches:
+      - main
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://mishig:$HF_TOKEN@huggingface.co/spaces/lerobot/visualize_dataset main -f

.github/workflows/type-check.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: Type Check & Lint
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  type-check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: oven-sh/setup-bun@v1
+        with:
+          bun-version: latest
+      - name: Install dependencies
+        run: bun install
+      - name: Type check
+        run: bun run type-check
+      - name: Lint
+        run: bun run lint
+      - name: Format check
+        run: bun run format:check

.gitignore CHANGED Viewed

@@ -40,3 +40,6 @@ yarn-error.log*
 # typescript
 *.tsbuildinfo
 next-env.d.ts

 # typescript
 *.tsbuildinfo
 next-env.d.ts
+# claude code local settings
+.claude/

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM oven/bun:1 AS base
+# Set working directory
+WORKDIR /app
+# Copy package files
+COPY package.json bun.lock* ./
+# Install dependencies
+RUN bun install --frozen-lockfile
+# Copy the rest of the application
+COPY . .
+# Build the application
+RUN bun run build
+# Expose port 7860
+EXPOSE 7860
+# Set environment variable for port
+ENV PORT=7860
+# Start the application
+CMD ["bun", "start"]

README.md CHANGED Viewed

@@ -1,3 +1,14 @@
 # LeRobot Dataset Visualizer
 LeRobot Dataset Tool and Visualizer is a web application for interactive exploration and visualization of robotics datasets, particularly those in the LeRobot format. It enables users to browse, view, and analyze episodes from large-scale robotics datasets, combining synchronized video playback with rich, interactive data graphs.
@@ -28,15 +39,28 @@ This tool is designed to help robotics researchers and practitioners quickly ins
 ## Getting Started
-First, run the development server:
 ```bash
-npm run dev
-# or
-yarn dev
-# or
-pnpm dev
-# or
 bun dev
 ```
@@ -44,13 +68,54 @@ Open [http://localhost:3000](http://localhost:3000) with your browser to see the
 You can start editing the page by modifying `src/app/page.tsx` or other files in the `src/` directory. The app supports hot-reloading for rapid development.
 ### Environment Variables
 - `DATASET_URL`: (optional) Base URL for dataset hosting (defaults to HuggingFace Datasets).
 ## Contributing
 Contributions, bug reports, and feature requests are welcome! Please open an issue or submit a pull request.
-### Acknowledgement
 The app was orignally created by [@Mishig25](https://github.com/mishig25) and taken from this PR [#1055](https://github.com/huggingface/lerobot/pull/1055)

+---
+title: Visualize Dataset (v2.0+ latest dataset format)
+emoji: 💻
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+pinned: false
+license: apache-2.0
+---
 # LeRobot Dataset Visualizer
 LeRobot Dataset Tool and Visualizer is a web application for interactive exploration and visualization of robotics datasets, particularly those in the LeRobot format. It enables users to browse, view, and analyze episodes from large-scale robotics datasets, combining synchronized video playback with rich, interactive data graphs.
 ## Getting Started
+### Prerequisites
+This project uses [Bun](https://bun.sh) as its package manager. If you don't have it installed:
+```bash
+# Install Bun
+curl -fsSL https://bun.sh/install | bash
+```
+### Installation
+Install dependencies:
+```bash
+bun install
+```
+### Development
+Run the development server:
 ```bash
 bun dev
 ```
 You can start editing the page by modifying `src/app/page.tsx` or other files in the `src/` directory. The app supports hot-reloading for rapid development.
+### Other Commands
+```bash
+# Build for production
+bun run build
+# Start production server
+bun start
+# Run linter
+bun run lint
+# Format code
+bun run format
+```
 ### Environment Variables
 - `DATASET_URL`: (optional) Base URL for dataset hosting (defaults to HuggingFace Datasets).
+## Docker Deployment
+This application can be deployed using Docker with bun for optimal performance and self-contained builds.
+### Build the Docker image
+```bash
+docker build -t lerobot-visualizer .
+```
+### Run the container
+```bash
+docker run -p 7860:7860 lerobot-visualizer
+```
+The application will be available at [http://localhost:7860](http://localhost:7860).
+### Run with custom environment variables
+```bash
+docker run -p 7860:7860 -e DATASET_URL=your-url lerobot-visualizer
+```
 ## Contributing
 Contributions, bug reports, and feature requests are welcome! Please open an issue or submit a pull request.
+### Acknowledgement
 The app was orignally created by [@Mishig25](https://github.com/mishig25) and taken from this PR [#1055](https://github.com/huggingface/lerobot/pull/1055)

bun.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

eslint.config.mjs CHANGED Viewed

@@ -11,6 +11,12 @@ const compat = new FlatCompat({
 const eslintConfig = [
   ...compat.extends("next/core-web-vitals", "next/typescript"),
 ];
 export default eslintConfig;

 const eslintConfig = [
   ...compat.extends("next/core-web-vitals", "next/typescript"),
+  {
+    rules: {
+      // Allow `any` type as warning - core types are implemented, peripheral areas still need typing
+      "@typescript-eslint/no-explicit-any": "warn",
+    },
+  },
 ];
 export default eslintConfig;

next.config.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { NextConfig } from "next";
-import packageJson from './package.json';
 const nextConfig: NextConfig = {
   typescript: {

 import type { NextConfig } from "next";
+import packageJson from "./package.json";
 const nextConfig: NextConfig = {
   typescript: {

package.json CHANGED Viewed

@@ -7,7 +7,11 @@
     "build": "next build",
     "start": "next start",
     "lint": "next lint",
-    "format": "prettier --write ."
   },
   "dependencies": {
     "@react-three/drei": "^10.7.7",

     "build": "next build",
     "start": "next start",
     "lint": "next lint",
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
+    "type-check": "tsc --noEmit",
+    "type-check:watch": "tsc --noEmit --watch",
+    "validate": "bun run type-check && bun run lint && bun run format:check"
   },
   "dependencies": {
     "@react-three/drei": "^10.7.7",

src/app/[org]/[dataset]/[episode]/episode-viewer.tsx CHANGED Viewed

@@ -51,6 +51,11 @@ export default function EpisodeViewer({
       </div>
     );
   }
   return (
     <TimeProvider duration={data!.duration}>
       <FlaggedEpisodesProvider>
@@ -197,7 +202,7 @@ function EpisodeViewerInner({ data, org, dataset }: { data: EpisodeData; org?: s
             link.href = v.url;
             document.head.appendChild(link);
             links.push(link);
-      }
         }
       })
       .catch(() => {});

       </div>
     );
   }
+  if (!data) {
+    return null;
+  }
   return (
     <TimeProvider duration={data!.duration}>
       <FlaggedEpisodesProvider>
             link.href = v.url;
             document.head.appendChild(link);
             links.push(link);
+          }
         }
       })
       .catch(() => {});

src/app/[org]/[dataset]/[episode]/fetch-data.ts CHANGED Viewed

@@ -6,8 +6,19 @@ import {
 } from "@/utils/parquetUtils";
 import { pick } from "@/utils/pick";
 import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
-const SERIES_NAME_DELIMITER = " | ";
 export type VideoInfo = {
   filename: string;
@@ -99,34 +110,10 @@ type ColumnDef = {
   value: string[];
 };
-function groupRowBySuffix(row: Record<string, number>): ChartRow {
-  const result: ChartRow = {};
-  const suffixGroups: Record<string, Record<string, number>> = {};
-  for (const [key, value] of Object.entries(row)) {
-    if (key === "timestamp") {
-      result["timestamp"] = value;
-      continue;
-    }
-    const parts = key.split(SERIES_NAME_DELIMITER);
-    if (parts.length === 2) {
-      const [prefix, suffix] = parts;
-      if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
-      suffixGroups[suffix][prefix] = value;
-    } else {
-      result[key] = value;
-    }
-  }
-  for (const [suffix, group] of Object.entries(suffixGroups)) {
-    const keys = Object.keys(group);
-    if (keys.length === 1) {
-      const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
-      result[fullName] = group[keys[0]];
-    } else {
-      result[suffix] = group;
-    }
-  }
-  return result;
-}
 export async function getEpisodeData(
   org: string,
@@ -141,7 +128,9 @@ export async function getEpisodeData(
     const info = rawInfo as unknown as DatasetMetadata;
     if (info.video_path === null) {
-      throw new Error("Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.");
     }
     console.time(`[perf] getEpisodeData (${version})`);
@@ -176,14 +165,14 @@ export async function getAdjacentEpisodesVideoInfo(
   dataset: string,
   currentEpisodeId: number,
   radius: number = 2,
-) {
   const repoId = `${org}/${dataset}`;
   try {
     const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
     const info = rawInfo as unknown as DatasetMetadata;
     const totalEpisodes = info.total_episodes;
-    const adjacentVideos: Array<{episodeId: number; videosInfo: VideoInfo[]}> = [];
     // Calculate adjacent episode IDs
     for (let offset = -radius; offset <= radius; offset++) {
@@ -195,24 +184,39 @@ export async function getAdjacentEpisodesVideoInfo(
           let videosInfo: VideoInfo[] = [];
           if (version === "v3.0") {
-            const episodeMetadata = await loadEpisodeMetadataV3Simple(repoId, version, episodeId);
-            videosInfo = extractVideoInfoV3WithSegmentation(repoId, version, info, episodeMetadata);
           } else {
             // For v2.x, use simpler video info extraction
             const episode_chunk = Math.floor(0 / 1000);
             videosInfo = Object.entries(info.features)
               .filter(([, value]) => value.dtype === "video")
               .map(([key]) => {
-                const videoPath = formatStringWithVars(info.video_path, {
                   video_key: key,
-                  episode_chunk: episode_chunk.toString().padStart(3, "0"),
-                  episode_index: episodeId.toString().padStart(6, "0"),
                 });
                 return {
                   filename: key,
                   url: buildVersionedUrl(repoId, version, videoPath),
                 };
               });
           }
           adjacentVideos.push({ episodeId, videosInfo });
@@ -258,43 +262,42 @@ async function getEpisodeDataV2(
           // episode id starts from 0
           (_, i) => i,
         )
-      : process.env.EPISODES
-          .split(/\s+/)
           .map((x) => parseInt(x.trim(), 10))
           .filter((x) => !isNaN(x));
       // Videos information
-    const videosInfo = Object.entries(info.features)
       .filter(([, value]) => value.dtype === "video")
       .map(([key]) => {
-      const videoPath = formatStringWithVars(info.video_path, {
         video_key: key,
-        episode_chunk: episode_chunk.toString().padStart(3, "0"),
-        episode_index: episodeId.toString().padStart(6, "0"),
       });
       return {
         filename: key,
         url: buildVersionedUrl(repoId, version, videoPath),
       };
-    });
   // Column data
   const columnNames = Object.entries(info.features)
     .filter(
       ([, value]) =>
-        ["float32", "int32"].includes(value.dtype) &&
-        value.shape.length === 1,
     )
     .map(([key, { shape }]) => ({ key, length: shape[0] }));
   // Exclude specific columns
-  const excludedColumns = [
-    "timestamp",
-    "frame_index",
-    "episode_index",
-    "index",
-    "task_index",
-  ];
   const filteredColumns = columnNames.filter(
     (column) => !excludedColumns.includes(column.key),
   );
@@ -315,7 +318,7 @@ async function getEpisodeDataV2(
         ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
         : Array.from(
             { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
-            (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
           ),
     };
   });
@@ -324,9 +327,11 @@ async function getEpisodeDataV2(
     repoId,
     version,
     formatStringWithVars(info.data_path, {
-      episode_chunk: episode_chunk.toString().padStart(3, "0"),
-      episode_index: episodeId.toString().padStart(6, "0"),
-    })
   );
   const arrayBuffer = await fetchParquetFile(parquetUrl);
@@ -366,20 +371,20 @@ async function getEpisodeDataV2(
       if (tasksResponse.ok) {
         const tasksText = await tasksResponse.text();
         const tasksData = tasksText
-          .split('\n')
-          .filter(line => line.trim())
-          .map(line => JSON.parse(line));
         if (tasksData && tasksData.length > 0) {
           const taskIndex = allData[0].task_index;
           const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
-          const taskData = tasksData.find(t => t.task_index === taskIndexNum);
           if (taskData) {
             task = taskData.task;
           }
         }
       }
-    } catch (error) {
       // No tasks metadata file for this v2.x dataset
     }
   }
@@ -414,86 +419,21 @@ async function getEpisodeDataV2(
     )
     .map(([key]) => key);
-  // 1. Group all numeric keys by suffix (excluding 'timestamp')
-  const numericKeys = seriesNames.filter((k) => k !== "timestamp");
-  const suffixGroupsMap: Record<string, string[]> = {};
-  for (const key of numericKeys) {
-    const parts = key.split(SERIES_NAME_DELIMITER);
-    const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
-    if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
-    suffixGroupsMap[suffix].push(key);
-  }
-  const suffixGroups = Object.values(suffixGroupsMap);
-  // 2. Compute min/max for each suffix group as a whole
-  const groupStats: Record<string, { min: number; max: number }> = {};
-  suffixGroups.forEach((group) => {
-    let min = Infinity,
-      max = -Infinity;
-    for (const row of chartData) {
-      for (const key of group) {
-        const v = row[key];
-        if (typeof v === "number" && !isNaN(v)) {
-          if (v < min) min = v;
-          if (v > max) max = v;
-        }
-      }
-    }
-    // Use the first key in the group as the group id
-    groupStats[group[0]] = { min, max };
-  });
-  // 3. Group suffix groups by similar scale (treat each suffix group as a unit)
-  const scaleGroups: Record<string, string[][]> = {};
-  const used = new Set<string>();
-  const SCALE_THRESHOLD = 2;
-  for (const group of suffixGroups) {
-    const groupId = group[0];
-    if (used.has(groupId)) continue;
-    const { min, max } = groupStats[groupId];
-    if (!isFinite(min) || !isFinite(max)) continue;
-    const logMin = Math.log10(Math.abs(min) + 1e-9);
-    const logMax = Math.log10(Math.abs(max) + 1e-9);
-    const unit: string[][] = [group];
-    used.add(groupId);
-    for (const other of suffixGroups) {
-      const otherId = other[0];
-      if (used.has(otherId) || otherId === groupId) continue;
-      const { min: omin, max: omax } = groupStats[otherId];
-      if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
-      const ologMin = Math.log10(Math.abs(omin) + 1e-9);
-      const ologMax = Math.log10(Math.abs(omax) + 1e-9);
-      if (
-        Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
-        Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
-      ) {
-        unit.push(other);
-        used.add(otherId);
-      }
-    }
-    scaleGroups[groupId] = unit;
-  }
-  // 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
-  const chartGroups: string[][] = Object.values(scaleGroups)
-    .sort((a, b) => b.length - a.length)
-    .flatMap((suffixGroupArr) => {
-      // suffixGroupArr is array of suffix groups (each is array of keys)
-      const merged = suffixGroupArr.flat();
-      if (merged.length > 6) {
-        const subgroups: string[][] = [];
-        for (let i = 0; i < merged.length; i += 6) {
-          subgroups.push(merged.slice(i, i + 6));
-        }
-        return subgroups;
-      }
-      return [merged];
-    });
   const duration = chartData[chartData.length - 1].timestamp;
   const chartDataGroups = chartGroups.map((group) =>
-    chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
   );
   return {
@@ -531,10 +471,19 @@ async function getEpisodeDataV3(
   const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
   // Load episode metadata to get timestamps for episode 0
-  const episodeMetadata = await loadEpisodeMetadataV3Simple(repoId, version, episodeId);
   // Create video info with segmentation using the metadata
-  const videosInfo = extractVideoInfoV3WithSegmentation(repoId, version, info, episodeMetadata);
   // Load episode data for charts
   const { chartDataGroups, flatChartData, ignoredColumns, task } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
@@ -563,9 +512,9 @@ async function loadEpisodeDataV3(
   episodeMetadata: EpisodeMetadataV3,
 ): Promise<{ chartDataGroups: ChartRow[][]; flatChartData: Record<string, number>[]; ignoredColumns: string[]; task?: string }> {
   // Build data file path using chunk and file indices
-  const dataChunkIndex = episodeMetadata.data_chunk_index || 0;
-  const dataFileIndex = episodeMetadata.data_file_index || 0;
-  const dataPath = `data/chunk-${dataChunkIndex.toString().padStart(3, "0")}/file-${dataFileIndex.toString().padStart(3, "0")}.parquet`;
   try {
     const dataUrl = buildVersionedUrl(repoId, version, dataPath);
@@ -684,20 +633,20 @@ function processEpisodeDataForCharts(
   // Common feature order for v3.0 datasets (but only include if they exist)
   const expectedFeatureOrder = [
-    'observation.state',
-    'action',
-    'timestamp',
-    'episode_index',
-    'frame_index',
-    'next.reward',
-    'next.done',
-    'index',
-    'task_index'
   ];
   // Map indices to features that actually exist
   let currentIndex = 0;
-  expectedFeatureOrder.forEach(feature => {
     if (featureKeys.includes(feature)) {
       v3IndexToFeatureMap[currentIndex.toString()] = feature;
       currentIndex++;
@@ -705,7 +654,7 @@ function processEpisodeDataForCharts(
   });
   // Columns to exclude from charts (note: 'task' is intentionally not excluded as we want to access it)
-  const excludedColumns = ['index', 'task_index', 'episode_index', 'frame_index', 'next.done'];
   // Create columns structure similar to V2.1 for proper hierarchical naming
   const columns: ColumnDef[] = Object.entries(info.features)
@@ -726,7 +675,7 @@ function processEpisodeDataForCharts(
           ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
           : Array.from(
               { length: feature.shape[0] || 1 },
-              (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
             ),
       };
     });
@@ -737,7 +686,7 @@ function processEpisodeDataForCharts(
     const allKeys: string[] = [];
     Object.entries(firstRow || {}).forEach(([key, value]) => {
-      if (key === 'timestamp') return; // Skip timestamp, we'll add it separately
       // Map numeric key to feature name if available
       const featureName = v3IndexToFeatureMap[key] || key;
@@ -749,7 +698,7 @@ function processEpisodeDataForCharts(
       if (excludedColumns.includes(featureName)) return;
       // Find the matching column definition to get proper names
-      const columnDef = columns.find(col => col.key === featureName);
       if (columnDef && Array.isArray(value) && value.length > 0) {
         // Use the proper hierarchical naming from column definition
         columnDef.value.forEach((seriesName, idx) => {
@@ -757,10 +706,10 @@ function processEpisodeDataForCharts(
             allKeys.push(seriesName);
           }
         });
-      } else if (typeof value === 'number' && !isNaN(value)) {
         // For scalar numeric values
         allKeys.push(featureName);
-      } else if (typeof value === 'bigint') {
         // For BigInt values
         allKeys.push(featureName);
       }
@@ -769,10 +718,7 @@ function processEpisodeDataForCharts(
     seriesNames = ["timestamp", ...allKeys];
   } else {
     // Fallback to column-based approach like V2.1
-    seriesNames = [
-      "timestamp",
-      ...columns.map(({ value }) => value).flat(),
-    ];
   }
   const chartData = episodeData.map((row, index) => {
@@ -783,14 +729,17 @@ function processEpisodeDataForCharts(
     let videoDuration = episodeData.length; // Fallback to data length
     if (episodeMetadata) {
       // Use actual video segment duration if available
-      videoDuration = (episodeMetadata.video_to_timestamp || 30) - (episodeMetadata.video_from_timestamp || 0);
     }
-    obj["timestamp"] = (index / Math.max(episodeData.length - 1, 1)) * videoDuration;
     // Add all data columns using hierarchical naming
-    if (row && typeof row === 'object') {
       Object.entries(row).forEach(([key, value]) => {
-        if (key === 'timestamp') {
           // Timestamp is already handled above
           return;
         }
@@ -805,21 +754,21 @@ function processEpisodeDataForCharts(
         if (excludedColumns.includes(featureName)) return;
         // Find the matching column definition to get proper series names
-        const columnDef = columns.find(col => col.key === featureName);
         if (Array.isArray(value) && columnDef) {
           // For array values like observation.state and action, use proper hierarchical naming
           value.forEach((val, idx) => {
             if (idx < columnDef.value.length) {
               const seriesName = columnDef.value[idx];
-              obj[seriesName] = typeof val === 'number' ? val : Number(val);
             }
           });
-        } else if (typeof value === 'number' && !isNaN(value)) {
           obj[featureName] = value;
-        } else if (typeof value === 'bigint') {
           obj[featureName] = Number(value);
-        } else if (typeof value === 'boolean') {
           // Convert boolean to number for charts
           obj[featureName] = value ? 1 : 0;
         }
@@ -837,92 +786,27 @@ function processEpisodeDataForCharts(
           ["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
       )
       .map(([key]) => key),
-    ...excludedColumns // Also include the manually excluded columns
   ];
-  // Group processing logic (using SERIES_NAME_DELIMITER like v2.1)
-  const numericKeys = seriesNames.filter((k) => k !== "timestamp");
-  const suffixGroupsMap: Record<string, string[]> = {};
-  for (const key of numericKeys) {
-    const parts = key.split(SERIES_NAME_DELIMITER);
-    const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
-    if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
-    suffixGroupsMap[suffix].push(key);
-  }
-  const suffixGroups = Object.values(suffixGroupsMap);
-  // Compute min/max for each suffix group
-  const groupStats: Record<string, { min: number; max: number }> = {};
-  suffixGroups.forEach((group) => {
-    let min = Infinity, max = -Infinity;
-    for (const row of chartData) {
-      for (const key of group) {
-        const v = row[key];
-        if (typeof v === "number" && !isNaN(v)) {
-          if (v < min) min = v;
-          if (v > max) max = v;
-        }
-      }
-    }
-    groupStats[group[0]] = { min, max };
-  });
-  // Group by similar scale
-  const scaleGroups: Record<string, string[][]> = {};
-  const used = new Set<string>();
-  const SCALE_THRESHOLD = 2;
-  for (const group of suffixGroups) {
-    const groupId = group[0];
-    if (used.has(groupId)) continue;
-    const { min, max } = groupStats[groupId];
-    if (!isFinite(min) || !isFinite(max)) continue;
-    const logMin = Math.log10(Math.abs(min) + 1e-9);
-    const logMax = Math.log10(Math.abs(max) + 1e-9);
-    const unit: string[][] = [group];
-    used.add(groupId);
-    for (const other of suffixGroups) {
-      const otherId = other[0];
-      if (used.has(otherId) || otherId === groupId) continue;
-      const { min: omin, max: omax } = groupStats[otherId];
-      if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
-      const ologMin = Math.log10(Math.abs(omin) + 1e-9);
-      const ologMax = Math.log10(Math.abs(omax) + 1e-9);
-      if (
-        Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
-        Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
-      ) {
-        unit.push(other);
-        used.add(otherId);
-      }
-    }
-    scaleGroups[groupId] = unit;
-  }
-  // Flatten into chartGroups
-  const chartGroups: string[][] = Object.values(scaleGroups)
-    .sort((a, b) => b.length - a.length)
-    .flatMap((suffixGroupArr) => {
-      const merged = suffixGroupArr.flat();
-      if (merged.length > 6) {
-        const subgroups = [];
-        for (let i = 0; i < merged.length; i += 6) {
-          subgroups.push(merged.slice(i, i + 6));
-        }
-        return subgroups;
-      }
-      return [merged];
-    });
   const chartDataGroups = chartGroups.map((group) =>
-    chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
   );
   return { chartDataGroups, flatChartData: chartData, ignoredColumns };
 }
 // Video info extraction with segmentation for v3.0
 function extractVideoInfoV3WithSegmentation(
   repoId: string,
@@ -931,13 +815,14 @@ function extractVideoInfoV3WithSegmentation(
   episodeMetadata: EpisodeMetadataV3,
 ): VideoInfo[] {
   // Get video features from dataset info
-  const videoFeatures = Object.entries(info.features)
-    .filter(([, value]) => value.dtype === "video");
   const videosInfo = videoFeatures.map(([videoKey]) => {
     // Check if we have per-camera metadata in the episode row
-    const cameraSpecificKeys = Object.keys(episodeMetadata).filter(key =>
-      key.startsWith(`videos/${videoKey}/`)
     );
     let chunkIndex: number, fileIndex: number, segmentStart: number, segmentEnd: number;
@@ -956,7 +841,15 @@ function extractVideoInfoV3WithSegmentation(
       segmentEnd = episodeMetadata.video_to_timestamp || 30;
     }
-    const videoPath = `videos/${videoKey}/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.mp4`;
     const fullUrl = buildVersionedUrl(repoId, version, videoPath);
     return {
@@ -964,9 +857,9 @@ function extractVideoInfoV3WithSegmentation(
       url: fullUrl,
       // Enable segmentation with timestamps from metadata
       isSegmented: true,
-      segmentStart: segmentStart,
-      segmentEnd: segmentEnd,
-      segmentDuration: segmentEnd - segmentStart,
     };
   });
@@ -988,8 +881,15 @@ async function loadEpisodeMetadataV3Simple(
   // Try loading episode metadata files until we find the episode
   while (!episodeRow) {
-    const episodesMetadataPath = `meta/episodes/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.parquet`;
-    const episodesMetadataUrl = buildVersionedUrl(repoId, version, episodesMetadataPath);
     try {
       const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
@@ -1015,9 +915,11 @@ async function loadEpisodeMetadataV3Simple(
         // Not in this file, try the next one
         fileIndex++;
       }
-    } catch (error) {
       // File doesn't exist - episode not found
-      throw new Error(`Episode ${episodeId} not found in metadata (searched up to file-${fileIndex.toString().padStart(3, "0")}.parquet)`);
     }
   }
@@ -1028,9 +930,9 @@ async function loadEpisodeMetadataV3Simple(
 // Simple parser for episode row - focuses on key fields for episodes
 function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3 {
   // v3.0 uses named keys in the episode metadata
-  if (row && typeof row === 'object') {
     // Check if this is v3.0 format with named keys
-    if ('episode_index' in row) {
       // v3.0 format - use named keys
       // Convert BigInt values to numbers
       const toBigIntSafe = (value: unknown): number => {
@@ -1079,7 +981,7 @@ function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3
         }
       });
-      return episodeData;
     } else {
       // Fallback to numeric keys for compatibility
       const toNum = (v: unknown, fallback = 0): number =>
@@ -1118,7 +1020,6 @@ function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3
 // ─── Stats computation ───────────────────────────────────────────
 /**

 } from "@/utils/parquetUtils";
 import { pick } from "@/utils/pick";
 import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
+import { PADDING, CHART_CONFIG, EXCLUDED_COLUMNS } from "@/utils/constants";
+import {
+  processChartDataGroups,
+  groupRowBySuffix,
+} from "@/utils/dataProcessing";
+import {
+  buildV3VideoPath,
+  buildV3DataPath,
+  buildV3EpisodesMetadataPath,
+} from "@/utils/stringFormatting";
+import { bigIntToNumber } from "@/utils/typeGuards";
+const SERIES_NAME_DELIMITER = CHART_CONFIG.SERIES_NAME_DELIMITER;
 export type VideoInfo = {
   filename: string;
   value: string[];
 };
+type AdjacentEpisodeVideos = {
+  episodeId: number;
+  videosInfo: VideoInfo[];
+};
 export async function getEpisodeData(
   org: string,
     const info = rawInfo as unknown as DatasetMetadata;
     if (info.video_path === null) {
+      throw new Error(
+        "Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.",
+      );
     }
     console.time(`[perf] getEpisodeData (${version})`);
   dataset: string,
   currentEpisodeId: number,
   radius: number = 2,
+): Promise<AdjacentEpisodeVideos[]> {
   const repoId = `${org}/${dataset}`;
   try {
     const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
     const info = rawInfo as unknown as DatasetMetadata;
     const totalEpisodes = info.total_episodes;
+    const adjacentVideos: AdjacentEpisodeVideos[] = [];
     // Calculate adjacent episode IDs
     for (let offset = -radius; offset <= radius; offset++) {
           let videosInfo: VideoInfo[] = [];
           if (version === "v3.0") {
+            const episodeMetadata = await loadEpisodeMetadataV3Simple(
+              repoId,
+              version,
+              episodeId,
+            );
+            videosInfo = extractVideoInfoV3WithSegmentation(
+              repoId,
+              version,
+              info,
+              episodeMetadata,
+            );
           } else {
             // For v2.x, use simpler video info extraction
+            if (info.video_path) {
             const episode_chunk = Math.floor(0 / 1000);
             videosInfo = Object.entries(info.features)
               .filter(([, value]) => value.dtype === "video")
               .map(([key]) => {
+                  const videoPath = formatStringWithVars(info.video_path!, {
                   video_key: key,
+                    episode_chunk: episode_chunk
+                      .toString()
+                      .padStart(PADDING.CHUNK_INDEX, "0"),
+                    episode_index: episodeId
+                      .toString()
+                      .padStart(PADDING.EPISODE_INDEX, "0"),
                 });
                 return {
                   filename: key,
                   url: buildVersionedUrl(repoId, version, videoPath),
                 };
               });
+            }
           }
           adjacentVideos.push({ episodeId, videosInfo });
           // episode id starts from 0
           (_, i) => i,
         )
+      : process.env.EPISODES.split(/\s+/)
           .map((x) => parseInt(x.trim(), 10))
           .filter((x) => !isNaN(x));
       // Videos information
+  const videosInfo =
+    info.video_path !== null
+      ? Object.entries(info.features)
       .filter(([, value]) => value.dtype === "video")
       .map(([key]) => {
+            const videoPath = formatStringWithVars(info.video_path!, {
         video_key: key,
+              episode_chunk: episode_chunk
+                .toString()
+                .padStart(PADDING.CHUNK_INDEX, "0"),
+              episode_index: episodeId
+                .toString()
+                .padStart(PADDING.EPISODE_INDEX, "0"),
       });
       return {
         filename: key,
         url: buildVersionedUrl(repoId, version, videoPath),
       };
+          })
+      : [];
   // Column data
   const columnNames = Object.entries(info.features)
     .filter(
       ([, value]) =>
+        ["float32", "int32"].includes(value.dtype) && value.shape.length === 1,
     )
     .map(([key, { shape }]) => ({ key, length: shape[0] }));
   // Exclude specific columns
+  const excludedColumns = EXCLUDED_COLUMNS.V2 as readonly string[];
   const filteredColumns = columnNames.filter(
     (column) => !excludedColumns.includes(column.key),
   );
         ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
         : Array.from(
             { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
+            (_, i) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${i}`,
           ),
     };
   });
     repoId,
     version,
     formatStringWithVars(info.data_path, {
+      episode_chunk: episode_chunk
+        .toString()
+        .padStart(PADDING.CHUNK_INDEX, "0"),
+      episode_index: episodeId.toString().padStart(PADDING.EPISODE_INDEX, "0"),
+    }),
   );
   const arrayBuffer = await fetchParquetFile(parquetUrl);
       if (tasksResponse.ok) {
         const tasksText = await tasksResponse.text();
         const tasksData = tasksText
+          .split("\n")
+          .filter((line) => line.trim())
+          .map((line) => JSON.parse(line));
         if (tasksData && tasksData.length > 0) {
           const taskIndex = allData[0].task_index;
           const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
+          const taskData = tasksData.find((t: Record<string, unknown>) => t.task_index === taskIndexNum);
           if (taskData) {
             task = taskData.task;
           }
         }
       }
+    } catch {
       // No tasks metadata file for this v2.x dataset
     }
   }
     )
     .map(([key]) => key);
+  // Process chart data into organized groups using utility function
+  const chartGroups = processChartDataGroups(seriesNames, chartData);
   const duration = chartData[chartData.length - 1].timestamp;
   const chartDataGroups = chartGroups.map((group) =>
+    chartData.map((row) => {
+      const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
+      // Ensure timestamp is always a number at the top level
+      return {
+        ...grouped,
+        timestamp:
+          typeof grouped.timestamp === "number" ? grouped.timestamp : 0,
+      };
+    }),
   );
   return {
   const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
   // Load episode metadata to get timestamps for episode 0
+  const episodeMetadata = await loadEpisodeMetadataV3Simple(
+    repoId,
+    version,
+    episodeId,
+  );
   // Create video info with segmentation using the metadata
+  const videosInfo = extractVideoInfoV3WithSegmentation(
+    repoId,
+    version,
+    info,
+    episodeMetadata,
+  );
   // Load episode data for charts
   const { chartDataGroups, flatChartData, ignoredColumns, task } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
   episodeMetadata: EpisodeMetadataV3,
 ): Promise<{ chartDataGroups: ChartRow[][]; flatChartData: Record<string, number>[]; ignoredColumns: string[]; task?: string }> {
   // Build data file path using chunk and file indices
+  const dataChunkIndex = bigIntToNumber(episodeMetadata.data_chunk_index, 0);
+  const dataFileIndex = bigIntToNumber(episodeMetadata.data_file_index, 0);
+  const dataPath = buildV3DataPath(dataChunkIndex, dataFileIndex);
   try {
     const dataUrl = buildVersionedUrl(repoId, version, dataPath);
   // Common feature order for v3.0 datasets (but only include if they exist)
   const expectedFeatureOrder = [
+    "observation.state",
+    "action",
+    "timestamp",
+    "episode_index",
+    "frame_index",
+    "next.reward",
+    "next.done",
+    "index",
+    "task_index",
   ];
   // Map indices to features that actually exist
   let currentIndex = 0;
+  expectedFeatureOrder.forEach((feature) => {
     if (featureKeys.includes(feature)) {
       v3IndexToFeatureMap[currentIndex.toString()] = feature;
       currentIndex++;
   });
   // Columns to exclude from charts (note: 'task' is intentionally not excluded as we want to access it)
+  const excludedColumns = EXCLUDED_COLUMNS.V3 as readonly string[];
   // Create columns structure similar to V2.1 for proper hierarchical naming
   const columns: ColumnDef[] = Object.entries(info.features)
           ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
           : Array.from(
               { length: feature.shape[0] || 1 },
+              (_, i) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${i}`,
             ),
       };
     });
     const allKeys: string[] = [];
     Object.entries(firstRow || {}).forEach(([key, value]) => {
+      if (key === "timestamp") return; // Skip timestamp, we'll add it separately
       // Map numeric key to feature name if available
       const featureName = v3IndexToFeatureMap[key] || key;
       if (excludedColumns.includes(featureName)) return;
       // Find the matching column definition to get proper names
+      const columnDef = columns.find((col) => col.key === featureName);
       if (columnDef && Array.isArray(value) && value.length > 0) {
         // Use the proper hierarchical naming from column definition
         columnDef.value.forEach((seriesName, idx) => {
             allKeys.push(seriesName);
           }
         });
+      } else if (typeof value === "number" && !isNaN(value)) {
         // For scalar numeric values
         allKeys.push(featureName);
+      } else if (typeof value === "bigint") {
         // For BigInt values
         allKeys.push(featureName);
       }
     seriesNames = ["timestamp", ...allKeys];
   } else {
     // Fallback to column-based approach like V2.1
+    seriesNames = ["timestamp", ...columns.map(({ value }) => value).flat()];
   }
   const chartData = episodeData.map((row, index) => {
     let videoDuration = episodeData.length; // Fallback to data length
     if (episodeMetadata) {
       // Use actual video segment duration if available
+      videoDuration =
+        (episodeMetadata.video_to_timestamp || 30) -
+        (episodeMetadata.video_from_timestamp || 0);
     }
+    obj["timestamp"] =
+      (index / Math.max(episodeData.length - 1, 1)) * videoDuration;
     // Add all data columns using hierarchical naming
+    if (row && typeof row === "object") {
       Object.entries(row).forEach(([key, value]) => {
+        if (key === "timestamp") {
           // Timestamp is already handled above
           return;
         }
         if (excludedColumns.includes(featureName)) return;
         // Find the matching column definition to get proper series names
+        const columnDef = columns.find((col) => col.key === featureName);
         if (Array.isArray(value) && columnDef) {
           // For array values like observation.state and action, use proper hierarchical naming
           value.forEach((val, idx) => {
             if (idx < columnDef.value.length) {
               const seriesName = columnDef.value[idx];
+              obj[seriesName] = typeof val === "number" ? val : Number(val);
             }
           });
+        } else if (typeof value === "number" && !isNaN(value)) {
           obj[featureName] = value;
+        } else if (typeof value === "bigint") {
           obj[featureName] = Number(value);
+        } else if (typeof value === "boolean") {
           // Convert boolean to number for charts
           obj[featureName] = value ? 1 : 0;
         }
           ["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
       )
       .map(([key]) => key),
+    ...excludedColumns, // Also include the manually excluded columns
   ];
+  // Process chart data into organized groups using utility function
+  const chartGroups = processChartDataGroups(seriesNames, chartData);
   const chartDataGroups = chartGroups.map((group) =>
+    chartData.map((row) => {
+      const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
+      // Ensure timestamp is always a number at the top level
+      return {
+        ...grouped,
+        timestamp:
+          typeof grouped.timestamp === "number" ? grouped.timestamp : 0,
+      };
+    }),
   );
   return { chartDataGroups, flatChartData: chartData, ignoredColumns };
 }
 // Video info extraction with segmentation for v3.0
 function extractVideoInfoV3WithSegmentation(
   repoId: string,
   episodeMetadata: EpisodeMetadataV3,
 ): VideoInfo[] {
   // Get video features from dataset info
+  const videoFeatures = Object.entries(info.features).filter(
+    ([, value]) => value.dtype === "video",
+  );
   const videosInfo = videoFeatures.map(([videoKey]) => {
     // Check if we have per-camera metadata in the episode row
+    const cameraSpecificKeys = Object.keys(episodeMetadata).filter((key) =>
+      key.startsWith(`videos/${videoKey}/`),
     );
     let chunkIndex: number, fileIndex: number, segmentStart: number, segmentEnd: number;
       segmentEnd = episodeMetadata.video_to_timestamp || 30;
     }
+    // Convert BigInt to number for timestamps
+    const startNum = bigIntToNumber(segmentStart);
+    const endNum = bigIntToNumber(segmentEnd);
+    const videoPath = buildV3VideoPath(
+      videoKey,
+      bigIntToNumber(chunkIndex, 0),
+      bigIntToNumber(fileIndex, 0),
+    );
     const fullUrl = buildVersionedUrl(repoId, version, videoPath);
     return {
       url: fullUrl,
       // Enable segmentation with timestamps from metadata
       isSegmented: true,
+      segmentStart: startNum,
+      segmentEnd: endNum,
+      segmentDuration: endNum - startNum,
     };
   });
   // Try loading episode metadata files until we find the episode
   while (!episodeRow) {
+    const episodesMetadataPath = buildV3EpisodesMetadataPath(
+      chunkIndex,
+      fileIndex,
+    );
+    const episodesMetadataUrl = buildVersionedUrl(
+      repoId,
+      version,
+      episodesMetadataPath,
+    );
     try {
       const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
         // Not in this file, try the next one
         fileIndex++;
       }
+    } catch {
       // File doesn't exist - episode not found
+      throw new Error(
+        `Episode ${episodeId} not found in metadata (searched up to file-${fileIndex.toString().padStart(PADDING.CHUNK_INDEX, "0")}.parquet)`,
+      );
     }
   }
 // Simple parser for episode row - focuses on key fields for episodes
 function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3 {
   // v3.0 uses named keys in the episode metadata
+  if (row && typeof row === "object") {
     // Check if this is v3.0 format with named keys
+    if ("episode_index" in row) {
       // v3.0 format - use named keys
       // Convert BigInt values to numbers
       const toBigIntSafe = (value: unknown): number => {
         }
       });
+      return episodeData as EpisodeMetadataV3;
     } else {
       // Fallback to numeric keys for compatibility
       const toNum = (v: unknown, fallback = 0): number =>
 // ─── Stats computation ───────────────────────────────────────────
 /**

src/app/[org]/[dataset]/page.tsx CHANGED Viewed

@@ -6,10 +6,10 @@ export default async function DatasetRootPage({
   params: Promise<{ org: string; dataset: string }>;
 }) {
   const { org, dataset } = await params;
-  const episodeN = process.env.EPISODES
-    ?.split(/\s+/)
-    .map((x) => parseInt(x.trim(), 10))
-    .filter((x) => !isNaN(x))[0] ?? 0;
   redirect(`/${org}/${dataset}/episode_${episodeN}`);
 }

   params: Promise<{ org: string; dataset: string }>;
 }) {
   const { org, dataset } = await params;
+  const episodeN =
+    process.env.EPISODES?.split(/\s+/)
+      .map((x) => parseInt(x.trim(), 10))
+      .filter((x) => !isNaN(x))[0] ?? 0;
   redirect(`/${org}/${dataset}/episode_${episodeN}`);
 }

src/app/explore/explore-grid.tsx CHANGED Viewed

@@ -2,8 +2,6 @@
 import React, { useEffect, useRef } from "react";
 import Link from "next/link";
-import { useRouter, useSearchParams } from "next/navigation";
 import { postParentMessageWithParams } from "@/utils/postParentMessage";
 type ExploreGridProps = {

 import React, { useEffect, useRef } from "react";
 import Link from "next/link";
 import { postParentMessageWithParams } from "@/utils/postParentMessage";
 type ExploreGridProps = {

src/app/explore/page.tsx CHANGED Viewed

@@ -1,17 +1,15 @@
 import React from "react";
 import ExploreGrid from "./explore-grid";
-import {
-  DatasetMetadata,
-  fetchJson,
-  formatStringWithVars,
-} from "@/utils/parquetUtils";
 import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
 export default async function ExplorePage({
   searchParams,
 }: {
-  searchParams: { p?: string };
 }) {
   let datasets: { id: string }[] = [];
   let currentPage = 1;
   let totalPages = 1;
@@ -25,8 +23,8 @@ export default async function ExplorePage({
     if (!res.ok) throw new Error("Failed to fetch datasets");
     const data = await res.json();
     const allDatasets = data.datasets || data;
-    // Use searchParams from props
-    const page = parseInt(searchParams?.p || "1", 10);
     const perPage = 30;
     currentPage = page;
@@ -46,24 +44,26 @@ export default async function ExplorePage({
         try {
           const [org, dataset] = ds.id.split("/");
           const repoId = `${org}/${dataset}`;
           // Try to get compatible version, but don't fail the entire page if incompatible
           let version: string;
           try {
             version = await getDatasetVersion(repoId);
           } catch (err) {
             // Dataset is not compatible, skip it silently
-            console.warn(`Skipping incompatible dataset ${repoId}: ${err instanceof Error ? err.message : err}`);
             return null;
           }
           const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
           const info = await fetchJson<DatasetMetadata>(jsonUrl);
           const videoEntry = Object.entries(info.features).find(
             ([, value]) => value.dtype === "video",
           );
           let videoUrl: string | null = null;
-          if (videoEntry) {
             const [key] = videoEntry;
             const videoPath = formatStringWithVars(info.video_path, {
               video_key: key,

 import React from "react";
 import ExploreGrid from "./explore-grid";
+import { fetchJson, formatStringWithVars } from "@/utils/parquetUtils";
 import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
+import type { DatasetMetadata } from "@/utils/parquetUtils";
 export default async function ExplorePage({
   searchParams,
 }: {
+  searchParams: Promise<{ p?: string }>;
 }) {
+  const params = await searchParams;
   let datasets: { id: string }[] = [];
   let currentPage = 1;
   let totalPages = 1;
     if (!res.ok) throw new Error("Failed to fetch datasets");
     const data = await res.json();
     const allDatasets = data.datasets || data;
+    // Use params from props
+    const page = parseInt(params?.p || "1", 10);
     const perPage = 30;
     currentPage = page;
         try {
           const [org, dataset] = ds.id.split("/");
           const repoId = `${org}/${dataset}`;
           // Try to get compatible version, but don't fail the entire page if incompatible
           let version: string;
           try {
             version = await getDatasetVersion(repoId);
           } catch (err) {
             // Dataset is not compatible, skip it silently
+            console.warn(
+              `Skipping incompatible dataset ${repoId}: ${err instanceof Error ? err.message : err}`,
+            );
             return null;
           }
           const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
           const info = await fetchJson<DatasetMetadata>(jsonUrl);
           const videoEntry = Object.entries(info.features).find(
             ([, value]) => value.dtype === "video",
           );
           let videoUrl: string | null = null;
+          if (videoEntry && info.video_path) {
             const [key] = videoEntry;
             const videoPath = formatStringWithVars(info.video_path, {
               video_key: key,

src/app/page.tsx CHANGED Viewed

@@ -27,31 +27,31 @@ function HomeInner() {
   useEffect(() => {
     // Redirect to the first episode of the dataset if REPO_ID is defined
     if (process.env.REPO_ID) {
-      const episodeN = process.env.EPISODES
-        ?.split(/\s+/)
-        .map((x) => parseInt(x.trim(), 10))
-        .filter((x) => !isNaN(x))[0] ?? 0;
       router.push(`/${process.env.REPO_ID}/episode_${episodeN}`);
       return;
     }
     // sync with hf.co/spaces URL params
-    if (searchParams.get('path')) {
-      router.push(searchParams.get('path')!);
       return;
     }
     // legacy sync with hf.co/spaces URL params
     let redirectUrl: string | null = null;
-    if (searchParams.get('dataset') && searchParams.get('episode')) {
-      redirectUrl = `/${searchParams.get('dataset')}/episode_${searchParams.get('episode')}`;
-    } else if (searchParams.get('dataset')) {
-      redirectUrl = `/${searchParams.get('dataset')}`;
     }
-    if (redirectUrl && searchParams.get('t')) {
-      redirectUrl += `?t=${searchParams.get('t')}`;
     }
     if (redirectUrl) {

   useEffect(() => {
     // Redirect to the first episode of the dataset if REPO_ID is defined
     if (process.env.REPO_ID) {
+      const episodeN =
+        process.env.EPISODES?.split(/\s+/)
+          .map((x) => parseInt(x.trim(), 10))
+          .filter((x) => !isNaN(x))[0] ?? 0;
       router.push(`/${process.env.REPO_ID}/episode_${episodeN}`);
       return;
     }
     // sync with hf.co/spaces URL params
+    if (searchParams.get("path")) {
+      router.push(searchParams.get("path")!);
       return;
     }
     // legacy sync with hf.co/spaces URL params
     let redirectUrl: string | null = null;
+    if (searchParams.get("dataset") && searchParams.get("episode")) {
+      redirectUrl = `/${searchParams.get("dataset")}/episode_${searchParams.get("episode")}`;
+    } else if (searchParams.get("dataset")) {
+      redirectUrl = `/${searchParams.get("dataset")}`;
     }
+    if (redirectUrl && searchParams.get("t")) {
+      redirectUrl += `?t=${searchParams.get("t")}`;
     }
     if (redirectUrl) {

src/components/data-recharts.tsx CHANGED Viewed

@@ -54,14 +54,14 @@ export const DataRecharts = React.memo(
     const [hoveredTime, setHoveredTime] = useState<number | null>(null);
     const [expanded, setExpanded] = useState(false);
-    if (!Array.isArray(data) || data.length === 0) return null;
     useEffect(() => {
       if (typeof onChartsReady === "function") onChartsReady();
     }, [onChartsReady]);
     const combinedData = useMemo(() => expanded ? mergeGroups(data) : [], [data, expanded]);
     return (
       <div>
         {data.length > 1 && (
@@ -101,7 +101,6 @@ export const DataRecharts = React.memo(
   },
 );
 const SingleDataGraph = React.memo(
   ({
     data,
@@ -125,9 +124,19 @@ const SingleDataGraph = React.memo(
           } else {
             result[key] = value;
           }
-        } else if (value !== null && typeof value === "object" && !Array.isArray(value)) {
           // If it's an object, recurse
-          Object.assign(result, flattenRow(value, prefix ? `${prefix}${SERIES_NAME_DELIMITER}${key}` : key));
         }
       }
       if ("timestamp" in row && typeof row["timestamp"] === "number") {
@@ -137,7 +146,7 @@ const SingleDataGraph = React.memo(
     }
     // Flatten all rows for recharts
-    const chartData = useMemo(() => data.map(row => flattenRow(row)), [data]);
     const [dataKeys, setDataKeys] = useState<string[]>([]);
     const [visibleKeys, setVisibleKeys] = useState<string[]>([]);
@@ -216,22 +225,29 @@ const SingleDataGraph = React.memo(
         groupColorMap[group] = CHART_COLORS[idx % CHART_COLORS.length];
       });
-      const isGroupChecked = (group: string) => groups[group].every(k => visibleKeys.includes(k));
-      const isGroupIndeterminate = (group: string) => groups[group].some(k => visibleKeys.includes(k)) && !isGroupChecked(group);
       const handleGroupCheckboxChange = (group: string) => {
         if (isGroupChecked(group)) {
           // Uncheck all children
-          setVisibleKeys((prev) => prev.filter(k => !groups[group].includes(k)));
         } else {
           // Check all children
-          setVisibleKeys((prev) => Array.from(new Set([...prev, ...groups[group]])));
         }
       };
       const handleCheckboxChange = (key: string) => {
         setVisibleKeys((prev) =>
-          prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key]
         );
       };
@@ -245,7 +261,9 @@ const SingleDataGraph = React.memo(
                   <input
                     type="checkbox"
                     checked={isGroupChecked(group)}
-                    ref={el => { if (el) el.indeterminate = isGroupIndeterminate(group); }}
                     onChange={() => handleGroupCheckboxChange(group)}
                     className="size-3"
                     style={{ accentColor: color }}

     const [hoveredTime, setHoveredTime] = useState<number | null>(null);
     const [expanded, setExpanded] = useState(false);
     useEffect(() => {
       if (typeof onChartsReady === "function") onChartsReady();
     }, [onChartsReady]);
     const combinedData = useMemo(() => expanded ? mergeGroups(data) : [], [data, expanded]);
+    if (!Array.isArray(data) || data.length === 0) return null;
     return (
       <div>
         {data.length > 1 && (
   },
 );
 const SingleDataGraph = React.memo(
   ({
     data,
           } else {
             result[key] = value;
           }
+        } else if (
+          value !== null &&
+          typeof value === "object" &&
+          !Array.isArray(value)
+        ) {
           // If it's an object, recurse
+          Object.assign(
+            result,
+            flattenRow(
+              value,
+              prefix ? `${prefix}${SERIES_NAME_DELIMITER}${key}` : key,
+            ),
+          );
         }
       }
       if ("timestamp" in row && typeof row["timestamp"] === "number") {
     }
     // Flatten all rows for recharts
+    const chartData = useMemo(() => data.map((row) => flattenRow(row)), [data]);
     const [dataKeys, setDataKeys] = useState<string[]>([]);
     const [visibleKeys, setVisibleKeys] = useState<string[]>([]);
         groupColorMap[group] = CHART_COLORS[idx % CHART_COLORS.length];
       });
+      const isGroupChecked = (group: string) =>
+        groups[group].every((k) => visibleKeys.includes(k));
+      const isGroupIndeterminate = (group: string) =>
+        groups[group].some((k) => visibleKeys.includes(k)) &&
+        !isGroupChecked(group);
       const handleGroupCheckboxChange = (group: string) => {
         if (isGroupChecked(group)) {
           // Uncheck all children
+          setVisibleKeys((prev) =>
+            prev.filter((k) => !groups[group].includes(k)),
+          );
         } else {
           // Check all children
+          setVisibleKeys((prev) =>
+            Array.from(new Set([...prev, ...groups[group]])),
+          );
         }
       };
       const handleCheckboxChange = (key: string) => {
         setVisibleKeys((prev) =>
+          prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key],
         );
       };
                   <input
                     type="checkbox"
                     checked={isGroupChecked(group)}
+                    ref={(el) => {
+                      if (el) el.indeterminate = isGroupIndeterminate(group);
+                    }}
                     onChange={() => handleGroupCheckboxChange(group)}
                     className="size-3"
                     style={{ accentColor: color }}

src/components/simple-videos-player.tsx CHANGED Viewed

@@ -5,6 +5,11 @@ import { useTime } from "../context/time-context";
 import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
 import type { VideoInfo } from "@/app/[org]/[dataset]/[episode]/fetch-data";
 type VideoPlayerProps = {
   videosInfo: VideoInfo[];
   onVideosReady?: () => void;
@@ -22,9 +27,9 @@ export const SimpleVideosPlayer = ({
   const [enlargedVideo, setEnlargedVideo] = React.useState<string | null>(null);
   const [showHiddenMenu, setShowHiddenMenu] = React.useState(false);
   const [videosReady, setVideosReady] = React.useState(false);
   const firstVisibleIdx = videosInfo.findIndex(
-    (video) => !hiddenVideos.includes(video.filename)
   );
   // Tracks the last time value set by the primary video's onTimeUpdate.
@@ -39,7 +44,7 @@ export const SimpleVideosPlayer = ({
   // Handle videos ready
   useEffect(() => {
     let readyCount = 0;
     const checkReady = () => {
       readyCount++;
       if (readyCount === videosInfo.length && onVideosReady) {
@@ -52,14 +57,17 @@ export const SimpleVideosPlayer = ({
     videoRefs.current.forEach((video, index) => {
       if (video) {
         const info = videosInfo[index];
         // Setup segment boundaries
         if (info.isSegmented) {
           const handleTimeUpdate = () => {
             const segmentEnd = info.segmentEnd || video.duration;
             const segmentStart = info.segmentStart || 0;
-            if (video.currentTime >= segmentEnd - 0.05) {
               video.currentTime = segmentStart;
               // Also update the global time to reset to start
               if (index === firstVisibleIdx) {
@@ -67,7 +75,7 @@ export const SimpleVideosPlayer = ({
               }
             }
           };
           const handleLoadedData = () => {
             video.currentTime = info.segmentStart || 0;
             checkReady();
@@ -109,17 +117,23 @@ export const SimpleVideosPlayer = ({
         }
       });
     };
-  }, [videosInfo, onVideosReady, setIsPlaying, firstVisibleIdx, setCurrentTime]);
   // Handle play/pause
   useEffect(() => {
     if (!videosReady) return;
     videoRefs.current.forEach((video, idx) => {
       if (video && !hiddenVideos.includes(videosInfo[idx].filename)) {
         if (isPlaying) {
-          video.play().catch(e => {
-            if (e.name !== 'AbortError') {
               console.error("Error playing video");
             }
           });
@@ -160,9 +174,9 @@ export const SimpleVideosPlayer = ({
   // Handle time update from first visible video
   const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
     const video = e.target as HTMLVideoElement;
-    const videoIndex = videoRefs.current.findIndex(ref => ref === video);
     const info = videosInfo[videoIndex];
     if (info) {
       let globalTime = video.currentTime;
       if (info.isSegmented) {
@@ -178,7 +192,7 @@ export const SimpleVideosPlayer = ({
     if (info.isSegmented) {
       const segmentStart = info.segmentStart || 0;
       const segmentEnd = info.segmentEnd || video.duration;
       if (video.currentTime < segmentStart || video.currentTime >= segmentEnd) {
         video.currentTime = segmentStart;
       }
@@ -206,7 +220,11 @@ export const SimpleVideosPlayer = ({
                 <button
                   key={filename}
                   className="block w-full text-left px-2 py-1 rounded hover:bg-slate-700 text-slate-100"
-                  onClick={() => setHiddenVideos(prev => prev.filter(v => v !== filename))}
                 >
                   {filename}
                 </button>
@@ -220,10 +238,10 @@ export const SimpleVideosPlayer = ({
       <div className="flex flex-wrap gap-x-2 gap-y-6">
         {videosInfo.map((info, idx) => {
           if (hiddenVideos.includes(info.filename)) return null;
           const isEnlarged = enlargedVideo === info.filename;
           const isFirstVisible = idx === firstVisibleIdx;
           return (
             <div
               key={info.filename}
@@ -239,15 +257,23 @@ export const SimpleVideosPlayer = ({
                   <button
                     title={isEnlarged ? "Minimize" : "Enlarge"}
                     className="ml-2 p-1 hover:bg-slate-700 rounded"
-                    onClick={() => setEnlargedVideo(isEnlarged ? null : info.filename)}
                   >
                     {isEnlarged ? <FaCompress /> : <FaExpand />}
                   </button>
                   <button
                     title="Hide Video"
                     className="ml-1 p-1 hover:bg-slate-700 rounded"
-                    onClick={() => setHiddenVideos(prev => [...prev, info.filename])}
-                    disabled={videosInfo.filter(v => !hiddenVideos.includes(v.filename)).length === 1}
                   >
                     <FaTimes />
                   </button>

 import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
 import type { VideoInfo } from "@/app/[org]/[dataset]/[episode]/fetch-data";
+const THRESHOLDS = {
+  VIDEO_SYNC_TOLERANCE: 0.2,
+  VIDEO_SEGMENT_BOUNDARY: 0.05,
+};
 type VideoPlayerProps = {
   videosInfo: VideoInfo[];
   onVideosReady?: () => void;
   const [enlargedVideo, setEnlargedVideo] = React.useState<string | null>(null);
   const [showHiddenMenu, setShowHiddenMenu] = React.useState(false);
   const [videosReady, setVideosReady] = React.useState(false);
   const firstVisibleIdx = videosInfo.findIndex(
+    (video) => !hiddenVideos.includes(video.filename),
   );
   // Tracks the last time value set by the primary video's onTimeUpdate.
   // Handle videos ready
   useEffect(() => {
     let readyCount = 0;
     const checkReady = () => {
       readyCount++;
       if (readyCount === videosInfo.length && onVideosReady) {
     videoRefs.current.forEach((video, index) => {
       if (video) {
         const info = videosInfo[index];
         // Setup segment boundaries
         if (info.isSegmented) {
           const handleTimeUpdate = () => {
             const segmentEnd = info.segmentEnd || video.duration;
             const segmentStart = info.segmentStart || 0;
+            if (
+              video.currentTime >=
+              segmentEnd - THRESHOLDS.VIDEO_SEGMENT_BOUNDARY
+            ) {
               video.currentTime = segmentStart;
               // Also update the global time to reset to start
               if (index === firstVisibleIdx) {
               }
             }
           };
           const handleLoadedData = () => {
             video.currentTime = info.segmentStart || 0;
             checkReady();
         }
       });
     };
+  }, [
+    videosInfo,
+    onVideosReady,
+    setIsPlaying,
+    firstVisibleIdx,
+    setCurrentTime,
+  ]);
   // Handle play/pause
   useEffect(() => {
     if (!videosReady) return;
     videoRefs.current.forEach((video, idx) => {
       if (video && !hiddenVideos.includes(videosInfo[idx].filename)) {
         if (isPlaying) {
+          video.play().catch((e) => {
+            if (e.name !== "AbortError") {
               console.error("Error playing video");
             }
           });
   // Handle time update from first visible video
   const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
     const video = e.target as HTMLVideoElement;
+    const videoIndex = videoRefs.current.findIndex((ref) => ref === video);
     const info = videosInfo[videoIndex];
     if (info) {
       let globalTime = video.currentTime;
       if (info.isSegmented) {
     if (info.isSegmented) {
       const segmentStart = info.segmentStart || 0;
       const segmentEnd = info.segmentEnd || video.duration;
       if (video.currentTime < segmentStart || video.currentTime >= segmentEnd) {
         video.currentTime = segmentStart;
       }
                 <button
                   key={filename}
                   className="block w-full text-left px-2 py-1 rounded hover:bg-slate-700 text-slate-100"
+                  onClick={() =>
+                    setHiddenVideos((prev) =>
+                      prev.filter((v) => v !== filename),
+                    )
+                  }
                 >
                   {filename}
                 </button>
       <div className="flex flex-wrap gap-x-2 gap-y-6">
         {videosInfo.map((info, idx) => {
           if (hiddenVideos.includes(info.filename)) return null;
           const isEnlarged = enlargedVideo === info.filename;
           const isFirstVisible = idx === firstVisibleIdx;
           return (
             <div
               key={info.filename}
                   <button
                     title={isEnlarged ? "Minimize" : "Enlarge"}
                     className="ml-2 p-1 hover:bg-slate-700 rounded"
+                    onClick={() =>
+                      setEnlargedVideo(isEnlarged ? null : info.filename)
+                    }
                   >
                     {isEnlarged ? <FaCompress /> : <FaExpand />}
                   </button>
                   <button
                     title="Hide Video"
                     className="ml-1 p-1 hover:bg-slate-700 rounded"
+                    onClick={() =>
+                      setHiddenVideos((prev) => [...prev, info.filename])
+                    }
+                    disabled={
+                      videosInfo.filter(
+                        (v) => !hiddenVideos.includes(v.filename),
+                      ).length === 1
+                    }
                   >
                     <FaTimes />
                   </button>

src/components/videos-player.tsx CHANGED Viewed

@@ -178,7 +178,7 @@ export const VideosPlayer = ({
     if (video && video.duration) {
       const videoIndex = videoRefs.current.findIndex(ref => ref === video);
       const videoInfo = videosInfo[videoIndex];
       if (videoInfo?.isSegmented) {
         const segmentStart = videoInfo.segmentStart || 0;
         const globalTime = Math.max(0, video.currentTime - segmentStart);
@@ -197,18 +197,20 @@ export const VideosPlayer = ({
     const onCanPlayThrough = (videoIndex: number) => {
       const video = videoRefs.current[videoIndex];
       const videoInfo = videosInfo[videoIndex];
       // Setup video segmentation for v3.0 chunked videos
       if (video && videoInfo?.isSegmented) {
         const segmentStart = videoInfo.segmentStart || 0;
         const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
         // Set initial time to segment start if not already set
-        if (video.currentTime < segmentStart || video.currentTime > segmentEnd) {
           video.currentTime = segmentStart;
         }
         // Add event listener to handle segment boundaries
         const handleTimeUpdate = () => {
           if (video.currentTime > segmentEnd) {
@@ -225,7 +227,7 @@ export const VideosPlayer = ({
           video.removeEventListener('timeupdate', handleTimeUpdate);
         });
       }
       videosReadyCount += 1;
       if (videosReadyCount === videosInfo.length) {
         if (typeof onVideosReady === "function") {

     if (video && video.duration) {
       const videoIndex = videoRefs.current.findIndex(ref => ref === video);
       const videoInfo = videosInfo[videoIndex];
       if (videoInfo?.isSegmented) {
         const segmentStart = videoInfo.segmentStart || 0;
         const globalTime = Math.max(0, video.currentTime - segmentStart);
     const onCanPlayThrough = (videoIndex: number) => {
       const video = videoRefs.current[videoIndex];
       const videoInfo = videosInfo[videoIndex];
       // Setup video segmentation for v3.0 chunked videos
       if (video && videoInfo?.isSegmented) {
         const segmentStart = videoInfo.segmentStart || 0;
         const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
         // Set initial time to segment start if not already set
+        if (
+          video.currentTime < segmentStart ||
+          video.currentTime > segmentEnd
+        ) {
           video.currentTime = segmentStart;
         }
         // Add event listener to handle segment boundaries
         const handleTimeUpdate = () => {
           if (video.currentTime > segmentEnd) {
           video.removeEventListener('timeupdate', handleTimeUpdate);
         });
       }
       videosReadyCount += 1;
       if (videosReadyCount === videosInfo.length) {
         if (typeof onVideosReady === "function") {

src/types/chart.types.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+/**
+ * Chart and data visualization type definitions
+ */
+// Chart data point structure
+export interface ChartDataPoint {
+  timestamp: number;
+  [key: string]: number | Record<string, number>; // Hierarchical data
+}
+// Chart data group
+export type ChartDataGroup = ChartDataPoint[];
+// Series column definition
+export interface SeriesColumn {
+  key: string;
+  value: string[]; // Series names
+}
+// Group statistics for scale calculation
+export interface GroupStats {
+  min: number;
+  max: number;
+}

src/types/dataset.types.ts ADDED Viewed

	@@ -0,0 +1,70 @@

+/**
+ * Dataset type definitions for LeRobot datasets
+ * Based on the LeRobot dataset format (v2.0, v2.1, v3.0)
+ */
+// Version management
+export type DatasetVersion = "v2.0" | "v2.1" | "v3.0";
+// Feature data types
+export type FeatureDType = "video" | "float32" | "int32" | "int64" | "bool";
+// Video-specific feature
+export interface VideoFeature {
+  dtype: "video";
+  shape: [number, number, number]; // [height, width, channels]
+  names: ["height", "width", "channel"];
+  video_info?: {
+    "video.fps": number;
+    "video.codec": string;
+    "video.pix_fmt": string;
+    "video.is_depth_map": boolean;
+    has_audio: boolean;
+  };
+}
+// Numeric feature (state, action, etc.)
+export interface NumericFeature {
+  dtype: "float32" | "int32" | "int64";
+  shape: number[];
+  names: string[] | { motors: string[] } | { [key: string]: string[] } | null;
+  fps?: number;
+}
+// Boolean feature
+export interface BooleanFeature {
+  dtype: "bool";
+  shape: number[];
+  names: null;
+  fps?: number;
+}
+// Discriminated union for all feature types
+export type Feature = VideoFeature | NumericFeature | BooleanFeature;
+// Complete dataset metadata
+export interface DatasetMetadata {
+  codebase_version: DatasetVersion;
+  robot_type: string;
+  total_episodes: number;
+  total_frames: number;
+  total_tasks: number;
+  total_videos?: number;
+  total_chunks?: number;
+  chunks_size: number;
+  fps: number;
+  splits: Record<string, string>;
+  data_path: string;
+  video_path: string | null;
+  features: Record<string, Feature>;
+  data_files_size_in_mb?: number;
+  video_files_size_in_mb?: number;
+}
+// Dataset info used in components
+export interface DatasetInfo {
+  repoId: string;
+  total_frames: number;
+  total_episodes: number;
+  fps: number;
+}

src/types/episode.types.ts ADDED Viewed

	@@ -0,0 +1,68 @@

+/**
+ * Episode type definitions for LeRobot datasets
+ */
+import type { DatasetInfo } from "./dataset.types";
+import type { VideoInfo } from "./video.types";
+import type { ChartDataGroup } from "./chart.types";
+// Episode metadata for v3.0
+export interface EpisodeMetadataV3 {
+  episode_index: number | bigint;
+  data_chunk_index: number | bigint;
+  data_file_index: number | bigint;
+  dataset_from_index: number | bigint;
+  dataset_to_index: number | bigint;
+  video_chunk_index?: number | bigint;
+  video_file_index?: number | bigint;
+  video_from_timestamp?: number;
+  video_to_timestamp?: number;
+  length: number | bigint;
+  // Per-camera metadata (optional)
+  [key: string]: number | bigint | undefined;
+}
+// Episode metadata for v2.x (simpler structure)
+export interface EpisodeMetadataV2 {
+  episode_chunk: number;
+  episode_index: number;
+}
+// Task metadata
+export interface TaskMetadata {
+  task_index: number | bigint;
+  task: string;
+}
+// Language instruction data
+export interface LanguageInstruction {
+  language_instruction?: string;
+  [key: `language_instruction_${number}`]: string | undefined;
+}
+// Episode data returned to components
+export interface EpisodeData {
+  datasetInfo: DatasetInfo;
+  episodeId: number;
+  videosInfo: VideoInfo[];
+  chartDataGroups: ChartDataGroup[];
+  episodes: number[];
+  ignoredColumns: string[];
+  duration: number;
+  task?: string;
+}
+// Raw parquet row structure
+export interface ParquetDataRow {
+  timestamp?: number;
+  episode_index?: number | bigint;
+  frame_index?: number | bigint;
+  index?: number | bigint;
+  task_index?: number | bigint;
+  "observation.state"?: number[];
+  action?: number[];
+  "next.reward"?: number;
+  "next.done"?: boolean;
+  language_instruction?: string;
+  [key: string]: unknown; // For additional fields
+}

src/types/index.ts ADDED Viewed

	@@ -0,0 +1,36 @@

+/**
+ * Central export for all type definitions
+ */
+// Dataset types
+export type {
+  DatasetVersion,
+  FeatureDType,
+  VideoFeature,
+  NumericFeature,
+  BooleanFeature,
+  Feature,
+  DatasetMetadata,
+  DatasetInfo,
+} from "./dataset.types";
+// Episode types
+export type {
+  EpisodeMetadataV3,
+  EpisodeMetadataV2,
+  TaskMetadata,
+  LanguageInstruction,
+  EpisodeData,
+  ParquetDataRow,
+} from "./episode.types";
+// Video types
+export type { VideoInfo, AdjacentEpisodeVideos } from "./video.types";
+// Chart types
+export type {
+  ChartDataPoint,
+  ChartDataGroup,
+  SeriesColumn,
+  GroupStats,
+} from "./chart.types";

src/types/video.types.ts ADDED Viewed

	@@ -0,0 +1,19 @@

+/**
+ * Video type definitions
+ */
+// Video information structure
+export interface VideoInfo {
+  filename: string;
+  url: string;
+  isSegmented?: boolean;
+  segmentStart?: number;
+  segmentEnd?: number;
+  segmentDuration?: number;
+}
+// Adjacent episode video info for preloading
+export interface AdjacentEpisodeVideos {
+  episodeId: number;
+  videosInfo: VideoInfo[];
+}

src/utils/constants.ts ADDED Viewed

	@@ -0,0 +1,44 @@

+/**
+ * Centralized constants for the lerobot-dataset-visualizer
+ * Eliminates magic numbers and provides single source of truth for configuration
+ */
+// Formatting constants for episode and file indexing
+export const PADDING = {
+  EPISODE_CHUNK: 3,
+  EPISODE_INDEX: 6,
+  FILE_INDEX: 3,
+  CHUNK_INDEX: 3,
+} as const;
+// Numeric thresholds for data processing
+export const THRESHOLDS = {
+  SCALE_GROUPING: 2,
+  EPSILON: 1e-9,
+  VIDEO_SYNC_TOLERANCE: 0.2,
+  VIDEO_SEGMENT_BOUNDARY: 0.05,
+} as const;
+// Chart configuration
+export const CHART_CONFIG = {
+  MAX_SERIES_PER_GROUP: 6,
+  SERIES_NAME_DELIMITER: " | ",
+} as const;
+// Video player configuration
+export const VIDEO_PLAYER = {
+  JUMP_SECONDS: 5,
+  STEP_SIZE: 0.01,
+  DEBOUNCE_MS: 200,
+} as const;
+// HTTP configuration
+export const HTTP = {
+  TIMEOUT_MS: 10000,
+} as const;
+// Excluded columns by dataset version
+export const EXCLUDED_COLUMNS = {
+  V2: ["timestamp", "frame_index", "episode_index", "index", "task_index"],
+  V3: ["index", "task_index", "episode_index", "frame_index", "next.done"],
+} as const;

src/utils/dataProcessing.ts ADDED Viewed

	@@ -0,0 +1,222 @@

+/**
+ * Data processing utilities for chart data grouping and transformation
+ * Consolidates duplicated logic from fetch-data.ts
+ */
+import { CHART_CONFIG, THRESHOLDS } from "./constants";
+import type { GroupStats } from "@/types";
+/**
+ * Groups row keys by suffix using delimiter
+ * Consolidates logic from lines 407-438 and 962-993 in fetch-data.ts
+ *
+ * @param row - Row data with numeric values
+ * @returns Grouped row data with nested structure for multi-key groups
+ */
+export function groupRowBySuffix(
+  row: Record<string, number>,
+): Record<string, number | Record<string, number>> {
+  const result: Record<string, number | Record<string, number>> = {};
+  const suffixGroups: Record<string, Record<string, number>> = {};
+  for (const [key, value] of Object.entries(row)) {
+    if (key === "timestamp") {
+      result["timestamp"] = value;
+      continue;
+    }
+    const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
+    if (parts.length === 2) {
+      const [prefix, suffix] = parts;
+      if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
+      suffixGroups[suffix][prefix] = value;
+    } else {
+      result[key] = value;
+    }
+  }
+  for (const [suffix, group] of Object.entries(suffixGroups)) {
+    const keys = Object.keys(group);
+    if (keys.length === 1) {
+      // Use the full original name as the key
+      const fullName = `${keys[0]}${CHART_CONFIG.SERIES_NAME_DELIMITER}${suffix}`;
+      result[fullName] = group[keys[0]];
+    } else {
+      result[suffix] = group;
+    }
+  }
+  return result;
+}
+/**
+ * Build suffix groups map from numeric keys
+ * Consolidates logic from lines 328-335 and 880-887 in fetch-data.ts
+ *
+ * @param numericKeys - Array of numeric column keys (excluding timestamp)
+ * @returns Map of suffix to array of keys with that suffix
+ */
+export function buildSuffixGroupsMap(
+  numericKeys: string[],
+): Record<string, string[]> {
+  const suffixGroupsMap: Record<string, string[]> = {};
+  for (const key of numericKeys) {
+    const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
+    const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
+    if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
+    suffixGroupsMap[suffix].push(key);
+  }
+  return suffixGroupsMap;
+}
+/**
+ * Compute min/max statistics for suffix groups
+ * Consolidates logic from lines 338-353 and 890-905 in fetch-data.ts
+ *
+ * @param chartData - Array of chart data rows
+ * @param suffixGroups - Array of suffix groups (each group is an array of keys)
+ * @returns Map of group ID to min/max statistics
+ */
+export function computeGroupStats(
+  chartData: Record<string, number>[],
+  suffixGroups: string[][],
+): Record<string, GroupStats> {
+  const groupStats: Record<string, GroupStats> = {};
+  suffixGroups.forEach((group) => {
+    let min = Infinity;
+    let max = -Infinity;
+    for (const row of chartData) {
+      for (const key of group) {
+        const v = row[key];
+        if (typeof v === "number" && !isNaN(v)) {
+          if (v < min) min = v;
+          if (v > max) max = v;
+        }
+      }
+    }
+    // Use the first key in the group as the group id
+    groupStats[group[0]] = { min, max };
+  });
+  return groupStats;
+}
+/**
+ * Group suffix groups by similar scale using logarithmic comparison
+ * Consolidates logic from lines 356-387 and 907-945 in fetch-data.ts
+ *
+ * This complex algorithm groups data series that have similar scales together,
+ * making charts more readable by avoiding mixing vastly different value ranges.
+ *
+ * @param suffixGroups - Array of suffix groups to analyze
+ * @param groupStats - Statistics for each group
+ * @returns Map of group ID to array of suffix groups with similar scales
+ */
+export function groupByScale(
+  suffixGroups: string[][],
+  groupStats: Record<string, GroupStats>,
+): Record<string, string[][]> {
+  const scaleGroups: Record<string, string[][]> = {};
+  const used = new Set<string>();
+  for (const group of suffixGroups) {
+    const groupId = group[0];
+    if (used.has(groupId)) continue;
+    const { min, max } = groupStats[groupId];
+    if (!isFinite(min) || !isFinite(max)) continue;
+    const logMin = Math.log10(Math.abs(min) + THRESHOLDS.EPSILON);
+    const logMax = Math.log10(Math.abs(max) + THRESHOLDS.EPSILON);
+    const unit: string[][] = [group];
+    used.add(groupId);
+    for (const other of suffixGroups) {
+      const otherId = other[0];
+      if (used.has(otherId) || otherId === groupId) continue;
+      const { min: omin, max: omax } = groupStats[otherId];
+      if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
+      const ologMin = Math.log10(Math.abs(omin) + THRESHOLDS.EPSILON);
+      const ologMax = Math.log10(Math.abs(omax) + THRESHOLDS.EPSILON);
+      if (
+        Math.abs(logMin - ologMin) <= THRESHOLDS.SCALE_GROUPING &&
+        Math.abs(logMax - ologMax) <= THRESHOLDS.SCALE_GROUPING
+      ) {
+        unit.push(other);
+        used.add(otherId);
+      }
+    }
+    scaleGroups[groupId] = unit;
+  }
+  return scaleGroups;
+}
+/**
+ * Flatten scale groups into chart groups with size limits
+ * Consolidates logic from lines 388-404 and 946-962 in fetch-data.ts
+ *
+ * Large groups are split into subgroups to avoid overcrowded charts.
+ *
+ * @param scaleGroups - Map of scale groups
+ * @returns Array of chart groups (each group is an array of series keys)
+ */
+export function flattenScaleGroups(
+  scaleGroups: Record<string, string[][]>,
+): string[][] {
+  return Object.values(scaleGroups)
+    .sort((a, b) => b.length - a.length)
+    .flatMap((suffixGroupArr) => {
+      const merged = suffixGroupArr.flat();
+      if (merged.length > CHART_CONFIG.MAX_SERIES_PER_GROUP) {
+        const subgroups: string[][] = [];
+        for (
+          let i = 0;
+          i < merged.length;
+          i += CHART_CONFIG.MAX_SERIES_PER_GROUP
+        ) {
+          subgroups.push(
+            merged.slice(i, i + CHART_CONFIG.MAX_SERIES_PER_GROUP),
+          );
+        }
+        return subgroups;
+      }
+      return [merged];
+    });
+}
+/**
+ * Complete pipeline to process chart data into organized groups
+ * Combines all the above functions into a single pipeline
+ *
+ * @param seriesNames - All series names including timestamp
+ * @param chartData - Array of chart data rows
+ * @returns Array of chart groups ready for visualization
+ */
+export function processChartDataGroups(
+  seriesNames: string[],
+  chartData: Record<string, number>[],
+): string[][] {
+  // 1. Build suffix groups
+  const numericKeys = seriesNames.filter((k) => k !== "timestamp");
+  const suffixGroupsMap = buildSuffixGroupsMap(numericKeys);
+  const suffixGroups = Object.values(suffixGroupsMap);
+  // 2. Compute statistics
+  const groupStats = computeGroupStats(chartData, suffixGroups);
+  // 3. Group by scale
+  const scaleGroups = groupByScale(suffixGroups, groupStats);
+  // 4. Flatten into chart groups
+  return flattenScaleGroups(scaleGroups);
+}

src/utils/languageInstructions.ts ADDED Viewed

	@@ -0,0 +1,105 @@

+/**
+ * Language instruction extraction utilities
+ * Consolidates duplicated logic from fetch-data.ts
+ */
+/**
+ * Extract language instructions from episode data rows
+ * Consolidates logic from lines 232-258 and 573-626 in fetch-data.ts
+ *
+ * This function checks for language_instruction fields in the provided rows.
+ * It supports both single and numbered language instruction fields
+ * (language_instruction, language_instruction_2, language_instruction_3, etc.)
+ *
+ * @param episodeData - Array of episode data rows
+ * @param sampleIndices - Indices of rows to check (default: [0] for first row only)
+ * @returns Concatenated language instructions or undefined if none found
+ */
+export function extractLanguageInstructions(
+  episodeData: Record<string, unknown>[],
+  sampleIndices: number[] = [0],
+): string | undefined {
+  if (episodeData.length === 0) return undefined;
+  const languageInstructions: string[] = [];
+  // Check specified rows for instructions
+  for (const idx of sampleIndices) {
+    if (idx >= episodeData.length) continue;
+    const row = episodeData[idx];
+    // Check for primary language_instruction field
+    if (
+      "language_instruction" in row &&
+      typeof row.language_instruction === "string" &&
+      row.language_instruction
+    ) {
+      languageInstructions.push(row.language_instruction);
+      // Check for numbered fields (language_instruction_2, _3, etc.)
+      let instructionNum = 2;
+      let key = `language_instruction_${instructionNum}`;
+      while (key in row && typeof row[key] === "string") {
+        languageInstructions.push(row[key] as string);
+        instructionNum++;
+        key = `language_instruction_${instructionNum}`;
+      }
+      // If we found instructions, stop searching other indices
+      if (languageInstructions.length > 0) break;
+    }
+  }
+  return languageInstructions.length > 0
+    ? languageInstructions.join("\n")
+    : undefined;
+}
+/**
+ * Extract task from task_index by looking up in tasks metadata
+ * Helper function for task extraction with proper type handling
+ *
+ * @param taskIndex - Task index (can be BigInt or number)
+ * @param tasksData - Array of task metadata objects
+ * @returns Task string or undefined if not found
+ */
+export function extractTaskFromMetadata(
+  taskIndex: unknown,
+  tasksData: Record<string, unknown>[],
+): string | undefined {
+  // Convert BigInt to number for comparison
+  const taskIndexNum =
+    typeof taskIndex === "bigint"
+      ? Number(taskIndex)
+      : typeof taskIndex === "number"
+        ? taskIndex
+        : undefined;
+  if (taskIndexNum === undefined || taskIndexNum < 0) {
+    return undefined;
+  }
+  if (taskIndexNum >= tasksData.length) {
+    return undefined;
+  }
+  const taskData = tasksData[taskIndexNum];
+  // Extract task from various possible fields
+  if (
+    taskData &&
+    "__index_level_0__" in taskData &&
+    typeof taskData.__index_level_0__ === "string"
+  ) {
+    return taskData.__index_level_0__;
+  } else if (
+    taskData &&
+    "task" in taskData &&
+    typeof taskData.task === "string"
+  ) {
+    return taskData.task;
+  }
+  return undefined;
+}

src/utils/parquetUtils.ts CHANGED Viewed

@@ -36,19 +36,19 @@ export async function fetchJson<T>(url: string): Promise<T> {
 export function formatStringWithVars(
   format: string,
-  vars: Record<string, string>,
 ): string {
-  return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => vars[key]);
 }
 // Fetch and parse the Parquet file
 export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
   const res = await fetch(url);
   if (!res.ok) {
     throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
   }
   return res.arrayBuffer();
 }
@@ -64,7 +64,7 @@ export async function readParquetColumn(
         columns: columns.length > 0 ? columns : undefined,
         onComplete: (data: unknown[][]) => {
           resolve(data);
-        }
       });
     } catch (error) {
       reject(error);
@@ -94,12 +94,12 @@ export function getRows(currentFrameData: unknown[], columns: ColumnInfo[]) {
     return [];
   }
-  const rows = [];
   const nRows = Math.max(...columns.map((column) => column.value.length));
   let rowIndex = 0;
   while (rowIndex < nRows) {
-    const row = [];
     // number of states may NOT match number of actions. In this case, we null-pad the 2D array
     const nullCell = { isNull: true };
     // row consists of [state value, action value]

 export function formatStringWithVars(
   format: string,
+  vars: Record<string, string | number>,
 ): string {
+  return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => String(vars[key]));
 }
 // Fetch and parse the Parquet file
 export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
   const res = await fetch(url);
   if (!res.ok) {
     throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
   }
   return res.arrayBuffer();
 }
         columns: columns.length > 0 ? columns : undefined,
         onComplete: (data: unknown[][]) => {
           resolve(data);
+        },
       });
     } catch (error) {
       reject(error);
     return [];
   }
+  const rows: Array<Array<{ isNull: true } | unknown>> = [];
   const nRows = Math.max(...columns.map((column) => column.value.length));
   let rowIndex = 0;
   while (rowIndex < nRows) {
+    const row: Array<{ isNull: true } | unknown> = [];
     // number of states may NOT match number of actions. In this case, we null-pad the 2D array
     const nullCell = { isNull: true };
     // row consists of [state value, action value]

src/utils/stringFormatting.ts ADDED Viewed

	@@ -0,0 +1,98 @@

+/**
+ * String formatting utilities for path construction
+ * Consolidates repeated padding and path building logic
+ */
+import { PADDING } from "./constants";
+/**
+ * Pad number to specified length with leading zeros
+ *
+ * @param num - Number to pad
+ * @param length - Desired string length
+ * @returns Zero-padded string
+ */
+export function padNumber(num: number, length: number): string {
+  return num.toString().padStart(length, "0");
+}
+/**
+ * Format episode chunk index with standard padding
+ *
+ * @param chunkIndex - Chunk index number
+ * @returns Padded chunk index string (e.g., "001")
+ */
+export function formatEpisodeChunk(chunkIndex: number): string {
+  return padNumber(chunkIndex, PADDING.EPISODE_CHUNK);
+}
+/**
+ * Format episode index with standard padding
+ *
+ * @param episodeIndex - Episode index number
+ * @returns Padded episode index string (e.g., "000042")
+ */
+export function formatEpisodeIndex(episodeIndex: number): string {
+  return padNumber(episodeIndex, PADDING.EPISODE_INDEX);
+}
+/**
+ * Format file index with standard padding
+ *
+ * @param fileIndex - File index number
+ * @returns Padded file index string (e.g., "001")
+ */
+export function formatFileIndex(fileIndex: number): string {
+  return padNumber(fileIndex, PADDING.FILE_INDEX);
+}
+/**
+ * Format chunk index with standard padding
+ *
+ * @param chunkIndex - Chunk index number
+ * @returns Padded chunk index string (e.g., "001")
+ */
+export function formatChunkIndex(chunkIndex: number): string {
+  return padNumber(chunkIndex, PADDING.CHUNK_INDEX);
+}
+/**
+ * Build video path for v3 datasets
+ *
+ * @param videoKey - Video key/name (e.g., "observation.image")
+ * @param chunkIndex - Data chunk index
+ * @param fileIndex - File index within chunk
+ * @returns Formatted video path (e.g., "videos/observation.image/chunk-001/file-000.mp4")
+ */
+export function buildV3VideoPath(
+  videoKey: string,
+  chunkIndex: number,
+  fileIndex: number,
+): string {
+  return `videos/${videoKey}/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.mp4`;
+}
+/**
+ * Build data path for v3 datasets
+ *
+ * @param chunkIndex - Data chunk index
+ * @param fileIndex - File index within chunk
+ * @returns Formatted data path (e.g., "data/chunk-001/file-000.parquet")
+ */
+export function buildV3DataPath(chunkIndex: number, fileIndex: number): string {
+  return `data/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.parquet`;
+}
+/**
+ * Build episodes metadata path for v3 datasets
+ *
+ * @param chunkIndex - Episode chunk index
+ * @param fileIndex - File index within chunk
+ * @returns Formatted episodes metadata path (e.g., "meta/episodes/chunk-001/file-000.parquet")
+ */
+export function buildV3EpisodesMetadataPath(
+  chunkIndex: number,
+  fileIndex: number,
+): string {
+  return `meta/episodes/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.parquet`;
+}

src/utils/typeGuards.ts ADDED Viewed

	@@ -0,0 +1,114 @@

+/**
+ * Type guard utilities for safe type narrowing
+ * Replaces unsafe type assertions throughout the codebase
+ */
+/**
+ * Type guard for BigInt values
+ *
+ * @param value - Value to check
+ * @returns True if value is a BigInt
+ */
+export function isBigInt(value: unknown): value is bigint {
+  return typeof value === "bigint";
+}
+/**
+ * Safe BigInt to number conversion
+ * Handles both BigInt and number inputs gracefully
+ *
+ * @param value - Value to convert (can be BigInt, number, or other)
+ * @param fallback - Fallback value if conversion fails (default: 0)
+ * @returns Number value or fallback
+ */
+export function bigIntToNumber(value: unknown, fallback: number = 0): number {
+  if (typeof value === "bigint") {
+    return Number(value);
+  }
+  if (typeof value === "number") {
+    return value;
+  }
+  return fallback;
+}
+/**
+ * Type guard for numeric values (including BigInt)
+ *
+ * @param value - Value to check
+ * @returns True if value is a number or BigInt
+ */
+export function isNumeric(value: unknown): value is number | bigint {
+  return typeof value === "number" || typeof value === "bigint";
+}
+/**
+ * Type guard for valid task index
+ * Ensures the value is a non-negative integer
+ *
+ * @param value - Value to check
+ * @returns True if value is a valid task index (non-negative number)
+ */
+export function isValidTaskIndex(value: unknown): value is number {
+  const num = bigIntToNumber(value, -1);
+  return num >= 0 && Number.isInteger(num);
+}
+/**
+ * Type guard for HTMLVideoElement
+ *
+ * @param element - Element to check
+ * @returns True if element is an HTMLVideoElement
+ */
+export function isVideoElement(element: unknown): element is HTMLVideoElement {
+  return element instanceof HTMLVideoElement;
+}
+/**
+ * Safe string conversion
+ * Converts any value to a string safely
+ *
+ * @param value - Value to convert
+ * @returns String representation of the value
+ */
+export function toString(value: unknown): string {
+  if (typeof value === "string") return value;
+  if (value === null || value === undefined) return "";
+  return String(value);
+}
+/**
+ * Type guard for string values
+ *
+ * @param value - Value to check
+ * @returns True if value is a non-empty string
+ */
+export function isNonEmptyString(value: unknown): value is string {
+  return typeof value === "string" && value.length > 0;
+}
+/**
+ * Type guard for objects
+ *
+ * @param value - Value to check
+ * @returns True if value is a non-null object
+ */
+export function isObject(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+/**
+ * Safe property access with type guard
+ * Checks if an object has a property and the property value matches the type guard
+ *
+ * @param obj - Object to check
+ * @param key - Property key to check
+ * @param typeGuard - Type guard function for the property value
+ * @returns True if property exists and passes type guard
+ */
+export function hasPropertyOfType<T>(
+  obj: unknown,
+  key: string,
+  typeGuard: (value: unknown) => value is T,
+): obj is Record<string, unknown> & { [K in typeof key]: T } {
+  return isObject(obj) && key in obj && typeGuard(obj[key]);
+}

src/utils/versionUtils.ts CHANGED Viewed

@@ -2,7 +2,8 @@
  * Utility functions for checking dataset version compatibility
  */
-const DATASET_URL = process.env.DATASET_URL || "https://huggingface.co/datasets";
 /**
  * Dataset information structure from info.json
@@ -44,17 +45,18 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
   try {
     const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
     const controller = new AbortController();
     const timeoutId = setTimeout(() => controller.abort(), 10000);
     const response = await fetch(testUrl, {
       method: "GET",
-      signal: controller.signal
     });
     clearTimeout(timeoutId);
     if (!response.ok) {
       throw new Error(`Failed to fetch dataset info: ${response.status}`);
     }
@@ -62,7 +64,9 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
     const data = await response.json();
     if (!data.features) {
-      throw new Error("Dataset info.json does not have the expected features structure");
     }
     datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
@@ -73,7 +77,7 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
     }
     throw new Error(
       `Dataset ${repoId} is not compatible with this visualizer. ` +
-      "Failed to read dataset information from the main revision."
     );
   }
 }
@@ -105,7 +109,10 @@ export async function getDatasetVersion(repoId: string): Promise<string> {
   return version;
 }
-export function buildVersionedUrl(repoId: string, version: string, path: string): string {
   return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
 }

  * Utility functions for checking dataset version compatibility
  */
+const DATASET_URL =
+  process.env.DATASET_URL || "https://huggingface.co/datasets";
 /**
  * Dataset information structure from info.json
   try {
     const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
     const controller = new AbortController();
     const timeoutId = setTimeout(() => controller.abort(), 10000);
     const response = await fetch(testUrl, {
       method: "GET",
+      cache: "no-store",
+      signal: controller.signal,
     });
     clearTimeout(timeoutId);
     if (!response.ok) {
       throw new Error(`Failed to fetch dataset info: ${response.status}`);
     }
     const data = await response.json();
     if (!data.features) {
+      throw new Error(
+        "Dataset info.json does not have the expected features structure",
+      );
     }
     datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
     }
     throw new Error(
       `Dataset ${repoId} is not compatible with this visualizer. ` +
+        "Failed to read dataset information from the main revision.",
     );
   }
 }
   return version;
 }
+export function buildVersionedUrl(
+  repoId: string,
+  version: string,
+  path: string,
+): string {
   return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
 }