pepijn223 HF Staff commited on
Commit
e457eba
·
unverified ·
2 Parent(s): 9f9a669 bde0e45

Merge branch 'main' into feat/speedup_visulization

Browse files
.dockerignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ node_modules
2
+ .next
3
+ .git
4
+ .gitignore
5
+ README.md
6
+ .env*.local
7
+ *.log
8
+ .DS_Store
9
+ .vscode
10
+ .idea
.github/workflows/deploy-release.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hf Hub
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ # to run this workflow manually from the Actions tab
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ sync-to-hub:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v3
16
+ with:
17
+ fetch-depth: 0
18
+ lfs: true
19
+ - name: Push to hub
20
+ env:
21
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
22
+ run: git push https://mishig:$HF_TOKEN@huggingface.co/spaces/lerobot/visualize_dataset main -f
.github/workflows/type-check.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Type Check & Lint
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ type-check:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: oven-sh/setup-bun@v1
16
+ with:
17
+ bun-version: latest
18
+
19
+ - name: Install dependencies
20
+ run: bun install
21
+
22
+ - name: Type check
23
+ run: bun run type-check
24
+
25
+ - name: Lint
26
+ run: bun run lint
27
+
28
+ - name: Format check
29
+ run: bun run format:check
.gitignore CHANGED
@@ -40,3 +40,6 @@ yarn-error.log*
40
  # typescript
41
  *.tsbuildinfo
42
  next-env.d.ts
 
 
 
 
40
  # typescript
41
  *.tsbuildinfo
42
  next-env.d.ts
43
+
44
+ # claude code local settings
45
+ .claude/
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM oven/bun:1 AS base
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Copy package files
7
+ COPY package.json bun.lock* ./
8
+
9
+ # Install dependencies
10
+ RUN bun install --frozen-lockfile
11
+
12
+ # Copy the rest of the application
13
+ COPY . .
14
+
15
+ # Build the application
16
+ RUN bun run build
17
+
18
+ # Expose port 7860
19
+ EXPOSE 7860
20
+
21
+ # Set environment variable for port
22
+ ENV PORT=7860
23
+
24
+ # Start the application
25
+ CMD ["bun", "start"]
README.md CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  # LeRobot Dataset Visualizer
2
 
3
  LeRobot Dataset Tool and Visualizer is a web application for interactive exploration and visualization of robotics datasets, particularly those in the LeRobot format. It enables users to browse, view, and analyze episodes from large-scale robotics datasets, combining synchronized video playback with rich, interactive data graphs.
@@ -28,15 +39,28 @@ This tool is designed to help robotics researchers and practitioners quickly ins
28
 
29
  ## Getting Started
30
 
31
- First, run the development server:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  ```bash
34
- npm run dev
35
- # or
36
- yarn dev
37
- # or
38
- pnpm dev
39
- # or
40
  bun dev
41
  ```
42
 
@@ -44,13 +68,54 @@ Open [http://localhost:3000](http://localhost:3000) with your browser to see the
44
 
45
  You can start editing the page by modifying `src/app/page.tsx` or other files in the `src/` directory. The app supports hot-reloading for rapid development.
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ### Environment Variables
48
 
49
  - `DATASET_URL`: (optional) Base URL for dataset hosting (defaults to HuggingFace Datasets).
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  ## Contributing
52
 
53
  Contributions, bug reports, and feature requests are welcome! Please open an issue or submit a pull request.
54
 
55
- ### Acknowledgement
 
56
  The app was orignally created by [@Mishig25](https://github.com/mishig25) and taken from this PR [#1055](https://github.com/huggingface/lerobot/pull/1055)
 
1
+ ---
2
+ title: Visualize Dataset (v2.0+ latest dataset format)
3
+ emoji: 💻
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: apache-2.0
10
+ ---
11
+
12
  # LeRobot Dataset Visualizer
13
 
14
  LeRobot Dataset Tool and Visualizer is a web application for interactive exploration and visualization of robotics datasets, particularly those in the LeRobot format. It enables users to browse, view, and analyze episodes from large-scale robotics datasets, combining synchronized video playback with rich, interactive data graphs.
 
39
 
40
  ## Getting Started
41
 
42
+ ### Prerequisites
43
+
44
+ This project uses [Bun](https://bun.sh) as its package manager. If you don't have it installed:
45
+
46
+ ```bash
47
+ # Install Bun
48
+ curl -fsSL https://bun.sh/install | bash
49
+ ```
50
+
51
+ ### Installation
52
+
53
+ Install dependencies:
54
+
55
+ ```bash
56
+ bun install
57
+ ```
58
+
59
+ ### Development
60
+
61
+ Run the development server:
62
 
63
  ```bash
 
 
 
 
 
 
64
  bun dev
65
  ```
66
 
 
68
 
69
  You can start editing the page by modifying `src/app/page.tsx` or other files in the `src/` directory. The app supports hot-reloading for rapid development.
70
 
71
+ ### Other Commands
72
+
73
+ ```bash
74
+ # Build for production
75
+ bun run build
76
+
77
+ # Start production server
78
+ bun start
79
+
80
+ # Run linter
81
+ bun run lint
82
+
83
+ # Format code
84
+ bun run format
85
+ ```
86
+
87
  ### Environment Variables
88
 
89
  - `DATASET_URL`: (optional) Base URL for dataset hosting (defaults to HuggingFace Datasets).
90
 
91
+ ## Docker Deployment
92
+
93
+ This application can be deployed using Docker with bun for optimal performance and self-contained builds.
94
+
95
+ ### Build the Docker image
96
+
97
+ ```bash
98
+ docker build -t lerobot-visualizer .
99
+ ```
100
+
101
+ ### Run the container
102
+
103
+ ```bash
104
+ docker run -p 7860:7860 lerobot-visualizer
105
+ ```
106
+
107
+ The application will be available at [http://localhost:7860](http://localhost:7860).
108
+
109
+ ### Run with custom environment variables
110
+
111
+ ```bash
112
+ docker run -p 7860:7860 -e DATASET_URL=your-url lerobot-visualizer
113
+ ```
114
+
115
  ## Contributing
116
 
117
  Contributions, bug reports, and feature requests are welcome! Please open an issue or submit a pull request.
118
 
119
+ ### Acknowledgement
120
+
121
  The app was orignally created by [@Mishig25](https://github.com/mishig25) and taken from this PR [#1055](https://github.com/huggingface/lerobot/pull/1055)
bun.lock ADDED
The diff for this file is too large to render. See raw diff
 
eslint.config.mjs CHANGED
@@ -11,6 +11,12 @@ const compat = new FlatCompat({
11
 
12
  const eslintConfig = [
13
  ...compat.extends("next/core-web-vitals", "next/typescript"),
 
 
 
 
 
 
14
  ];
15
 
16
  export default eslintConfig;
 
11
 
12
  const eslintConfig = [
13
  ...compat.extends("next/core-web-vitals", "next/typescript"),
14
+ {
15
+ rules: {
16
+ // Allow `any` type as warning - core types are implemented, peripheral areas still need typing
17
+ "@typescript-eslint/no-explicit-any": "warn",
18
+ },
19
+ },
20
  ];
21
 
22
  export default eslintConfig;
next.config.ts CHANGED
@@ -1,5 +1,5 @@
1
  import type { NextConfig } from "next";
2
- import packageJson from './package.json';
3
 
4
  const nextConfig: NextConfig = {
5
  typescript: {
 
1
  import type { NextConfig } from "next";
2
+ import packageJson from "./package.json";
3
 
4
  const nextConfig: NextConfig = {
5
  typescript: {
package.json CHANGED
@@ -7,7 +7,11 @@
7
  "build": "next build",
8
  "start": "next start",
9
  "lint": "next lint",
10
- "format": "prettier --write ."
 
 
 
 
11
  },
12
  "dependencies": {
13
  "@react-three/drei": "^10.7.7",
 
7
  "build": "next build",
8
  "start": "next start",
9
  "lint": "next lint",
10
+ "format": "prettier --write .",
11
+ "format:check": "prettier --check .",
12
+ "type-check": "tsc --noEmit",
13
+ "type-check:watch": "tsc --noEmit --watch",
14
+ "validate": "bun run type-check && bun run lint && bun run format:check"
15
  },
16
  "dependencies": {
17
  "@react-three/drei": "^10.7.7",
src/app/[org]/[dataset]/[episode]/episode-viewer.tsx CHANGED
@@ -51,6 +51,11 @@ export default function EpisodeViewer({
51
  </div>
52
  );
53
  }
 
 
 
 
 
54
  return (
55
  <TimeProvider duration={data!.duration}>
56
  <FlaggedEpisodesProvider>
@@ -197,7 +202,7 @@ function EpisodeViewerInner({ data, org, dataset }: { data: EpisodeData; org?: s
197
  link.href = v.url;
198
  document.head.appendChild(link);
199
  links.push(link);
200
- }
201
  }
202
  })
203
  .catch(() => {});
 
51
  </div>
52
  );
53
  }
54
+
55
+ if (!data) {
56
+ return null;
57
+ }
58
+
59
  return (
60
  <TimeProvider duration={data!.duration}>
61
  <FlaggedEpisodesProvider>
 
202
  link.href = v.url;
203
  document.head.appendChild(link);
204
  links.push(link);
205
+ }
206
  }
207
  })
208
  .catch(() => {});
src/app/[org]/[dataset]/[episode]/fetch-data.ts CHANGED
@@ -6,8 +6,19 @@ import {
6
  } from "@/utils/parquetUtils";
7
  import { pick } from "@/utils/pick";
8
  import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- const SERIES_NAME_DELIMITER = " | ";
11
 
12
  export type VideoInfo = {
13
  filename: string;
@@ -99,34 +110,10 @@ type ColumnDef = {
99
  value: string[];
100
  };
101
 
102
- function groupRowBySuffix(row: Record<string, number>): ChartRow {
103
- const result: ChartRow = {};
104
- const suffixGroups: Record<string, Record<string, number>> = {};
105
- for (const [key, value] of Object.entries(row)) {
106
- if (key === "timestamp") {
107
- result["timestamp"] = value;
108
- continue;
109
- }
110
- const parts = key.split(SERIES_NAME_DELIMITER);
111
- if (parts.length === 2) {
112
- const [prefix, suffix] = parts;
113
- if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
114
- suffixGroups[suffix][prefix] = value;
115
- } else {
116
- result[key] = value;
117
- }
118
- }
119
- for (const [suffix, group] of Object.entries(suffixGroups)) {
120
- const keys = Object.keys(group);
121
- if (keys.length === 1) {
122
- const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
123
- result[fullName] = group[keys[0]];
124
- } else {
125
- result[suffix] = group;
126
- }
127
- }
128
- return result;
129
- }
130
 
131
  export async function getEpisodeData(
132
  org: string,
@@ -141,7 +128,9 @@ export async function getEpisodeData(
141
  const info = rawInfo as unknown as DatasetMetadata;
142
 
143
  if (info.video_path === null) {
144
- throw new Error("Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.");
 
 
145
  }
146
 
147
  console.time(`[perf] getEpisodeData (${version})`);
@@ -176,14 +165,14 @@ export async function getAdjacentEpisodesVideoInfo(
176
  dataset: string,
177
  currentEpisodeId: number,
178
  radius: number = 2,
179
- ) {
180
  const repoId = `${org}/${dataset}`;
181
  try {
182
  const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
183
  const info = rawInfo as unknown as DatasetMetadata;
184
 
185
  const totalEpisodes = info.total_episodes;
186
- const adjacentVideos: Array<{episodeId: number; videosInfo: VideoInfo[]}> = [];
187
 
188
  // Calculate adjacent episode IDs
189
  for (let offset = -radius; offset <= radius; offset++) {
@@ -195,24 +184,39 @@ export async function getAdjacentEpisodesVideoInfo(
195
  let videosInfo: VideoInfo[] = [];
196
 
197
  if (version === "v3.0") {
198
- const episodeMetadata = await loadEpisodeMetadataV3Simple(repoId, version, episodeId);
199
- videosInfo = extractVideoInfoV3WithSegmentation(repoId, version, info, episodeMetadata);
 
 
 
 
 
 
 
 
 
200
  } else {
201
  // For v2.x, use simpler video info extraction
 
202
  const episode_chunk = Math.floor(0 / 1000);
203
  videosInfo = Object.entries(info.features)
204
  .filter(([, value]) => value.dtype === "video")
205
  .map(([key]) => {
206
- const videoPath = formatStringWithVars(info.video_path, {
207
  video_key: key,
208
- episode_chunk: episode_chunk.toString().padStart(3, "0"),
209
- episode_index: episodeId.toString().padStart(6, "0"),
 
 
 
 
210
  });
211
  return {
212
  filename: key,
213
  url: buildVersionedUrl(repoId, version, videoPath),
214
  };
215
  });
 
216
  }
217
 
218
  adjacentVideos.push({ episodeId, videosInfo });
@@ -258,43 +262,42 @@ async function getEpisodeDataV2(
258
  // episode id starts from 0
259
  (_, i) => i,
260
  )
261
- : process.env.EPISODES
262
- .split(/\s+/)
263
  .map((x) => parseInt(x.trim(), 10))
264
  .filter((x) => !isNaN(x));
265
 
266
  // Videos information
267
- const videosInfo = Object.entries(info.features)
 
 
268
  .filter(([, value]) => value.dtype === "video")
269
  .map(([key]) => {
270
- const videoPath = formatStringWithVars(info.video_path, {
271
  video_key: key,
272
- episode_chunk: episode_chunk.toString().padStart(3, "0"),
273
- episode_index: episodeId.toString().padStart(6, "0"),
 
 
 
 
274
  });
275
  return {
276
  filename: key,
277
  url: buildVersionedUrl(repoId, version, videoPath),
278
  };
279
- });
 
280
 
281
  // Column data
282
  const columnNames = Object.entries(info.features)
283
  .filter(
284
  ([, value]) =>
285
- ["float32", "int32"].includes(value.dtype) &&
286
- value.shape.length === 1,
287
  )
288
  .map(([key, { shape }]) => ({ key, length: shape[0] }));
289
 
290
  // Exclude specific columns
291
- const excludedColumns = [
292
- "timestamp",
293
- "frame_index",
294
- "episode_index",
295
- "index",
296
- "task_index",
297
- ];
298
  const filteredColumns = columnNames.filter(
299
  (column) => !excludedColumns.includes(column.key),
300
  );
@@ -315,7 +318,7 @@ async function getEpisodeDataV2(
315
  ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
316
  : Array.from(
317
  { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
318
- (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
319
  ),
320
  };
321
  });
@@ -324,9 +327,11 @@ async function getEpisodeDataV2(
324
  repoId,
325
  version,
326
  formatStringWithVars(info.data_path, {
327
- episode_chunk: episode_chunk.toString().padStart(3, "0"),
328
- episode_index: episodeId.toString().padStart(6, "0"),
329
- })
 
 
330
  );
331
 
332
  const arrayBuffer = await fetchParquetFile(parquetUrl);
@@ -366,20 +371,20 @@ async function getEpisodeDataV2(
366
  if (tasksResponse.ok) {
367
  const tasksText = await tasksResponse.text();
368
  const tasksData = tasksText
369
- .split('\n')
370
- .filter(line => line.trim())
371
- .map(line => JSON.parse(line));
372
 
373
  if (tasksData && tasksData.length > 0) {
374
  const taskIndex = allData[0].task_index;
375
  const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
376
- const taskData = tasksData.find(t => t.task_index === taskIndexNum);
377
  if (taskData) {
378
  task = taskData.task;
379
  }
380
  }
381
  }
382
- } catch (error) {
383
  // No tasks metadata file for this v2.x dataset
384
  }
385
  }
@@ -414,86 +419,21 @@ async function getEpisodeDataV2(
414
  )
415
  .map(([key]) => key);
416
 
417
- // 1. Group all numeric keys by suffix (excluding 'timestamp')
418
- const numericKeys = seriesNames.filter((k) => k !== "timestamp");
419
- const suffixGroupsMap: Record<string, string[]> = {};
420
- for (const key of numericKeys) {
421
- const parts = key.split(SERIES_NAME_DELIMITER);
422
- const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
423
- if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
424
- suffixGroupsMap[suffix].push(key);
425
- }
426
- const suffixGroups = Object.values(suffixGroupsMap);
427
-
428
- // 2. Compute min/max for each suffix group as a whole
429
- const groupStats: Record<string, { min: number; max: number }> = {};
430
- suffixGroups.forEach((group) => {
431
- let min = Infinity,
432
- max = -Infinity;
433
- for (const row of chartData) {
434
- for (const key of group) {
435
- const v = row[key];
436
- if (typeof v === "number" && !isNaN(v)) {
437
- if (v < min) min = v;
438
- if (v > max) max = v;
439
- }
440
- }
441
- }
442
- // Use the first key in the group as the group id
443
- groupStats[group[0]] = { min, max };
444
- });
445
-
446
- // 3. Group suffix groups by similar scale (treat each suffix group as a unit)
447
- const scaleGroups: Record<string, string[][]> = {};
448
- const used = new Set<string>();
449
- const SCALE_THRESHOLD = 2;
450
- for (const group of suffixGroups) {
451
- const groupId = group[0];
452
- if (used.has(groupId)) continue;
453
- const { min, max } = groupStats[groupId];
454
- if (!isFinite(min) || !isFinite(max)) continue;
455
- const logMin = Math.log10(Math.abs(min) + 1e-9);
456
- const logMax = Math.log10(Math.abs(max) + 1e-9);
457
- const unit: string[][] = [group];
458
- used.add(groupId);
459
- for (const other of suffixGroups) {
460
- const otherId = other[0];
461
- if (used.has(otherId) || otherId === groupId) continue;
462
- const { min: omin, max: omax } = groupStats[otherId];
463
- if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
464
- const ologMin = Math.log10(Math.abs(omin) + 1e-9);
465
- const ologMax = Math.log10(Math.abs(omax) + 1e-9);
466
- if (
467
- Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
468
- Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
469
- ) {
470
- unit.push(other);
471
- used.add(otherId);
472
- }
473
- }
474
- scaleGroups[groupId] = unit;
475
- }
476
-
477
- // 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
478
- const chartGroups: string[][] = Object.values(scaleGroups)
479
- .sort((a, b) => b.length - a.length)
480
- .flatMap((suffixGroupArr) => {
481
- // suffixGroupArr is array of suffix groups (each is array of keys)
482
- const merged = suffixGroupArr.flat();
483
- if (merged.length > 6) {
484
- const subgroups: string[][] = [];
485
- for (let i = 0; i < merged.length; i += 6) {
486
- subgroups.push(merged.slice(i, i + 6));
487
- }
488
- return subgroups;
489
- }
490
- return [merged];
491
- });
492
 
493
  const duration = chartData[chartData.length - 1].timestamp;
494
 
495
  const chartDataGroups = chartGroups.map((group) =>
496
- chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
 
 
 
 
 
 
 
 
497
  );
498
 
499
  return {
@@ -531,10 +471,19 @@ async function getEpisodeDataV3(
531
  const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
532
 
533
  // Load episode metadata to get timestamps for episode 0
534
- const episodeMetadata = await loadEpisodeMetadataV3Simple(repoId, version, episodeId);
 
 
 
 
535
 
536
  // Create video info with segmentation using the metadata
537
- const videosInfo = extractVideoInfoV3WithSegmentation(repoId, version, info, episodeMetadata);
 
 
 
 
 
538
 
539
  // Load episode data for charts
540
  const { chartDataGroups, flatChartData, ignoredColumns, task } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
@@ -563,9 +512,9 @@ async function loadEpisodeDataV3(
563
  episodeMetadata: EpisodeMetadataV3,
564
  ): Promise<{ chartDataGroups: ChartRow[][]; flatChartData: Record<string, number>[]; ignoredColumns: string[]; task?: string }> {
565
  // Build data file path using chunk and file indices
566
- const dataChunkIndex = episodeMetadata.data_chunk_index || 0;
567
- const dataFileIndex = episodeMetadata.data_file_index || 0;
568
- const dataPath = `data/chunk-${dataChunkIndex.toString().padStart(3, "0")}/file-${dataFileIndex.toString().padStart(3, "0")}.parquet`;
569
 
570
  try {
571
  const dataUrl = buildVersionedUrl(repoId, version, dataPath);
@@ -684,20 +633,20 @@ function processEpisodeDataForCharts(
684
 
685
  // Common feature order for v3.0 datasets (but only include if they exist)
686
  const expectedFeatureOrder = [
687
- 'observation.state',
688
- 'action',
689
- 'timestamp',
690
- 'episode_index',
691
- 'frame_index',
692
- 'next.reward',
693
- 'next.done',
694
- 'index',
695
- 'task_index'
696
  ];
697
 
698
  // Map indices to features that actually exist
699
  let currentIndex = 0;
700
- expectedFeatureOrder.forEach(feature => {
701
  if (featureKeys.includes(feature)) {
702
  v3IndexToFeatureMap[currentIndex.toString()] = feature;
703
  currentIndex++;
@@ -705,7 +654,7 @@ function processEpisodeDataForCharts(
705
  });
706
 
707
  // Columns to exclude from charts (note: 'task' is intentionally not excluded as we want to access it)
708
- const excludedColumns = ['index', 'task_index', 'episode_index', 'frame_index', 'next.done'];
709
 
710
  // Create columns structure similar to V2.1 for proper hierarchical naming
711
  const columns: ColumnDef[] = Object.entries(info.features)
@@ -726,7 +675,7 @@ function processEpisodeDataForCharts(
726
  ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
727
  : Array.from(
728
  { length: feature.shape[0] || 1 },
729
- (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
730
  ),
731
  };
732
  });
@@ -737,7 +686,7 @@ function processEpisodeDataForCharts(
737
  const allKeys: string[] = [];
738
 
739
  Object.entries(firstRow || {}).forEach(([key, value]) => {
740
- if (key === 'timestamp') return; // Skip timestamp, we'll add it separately
741
 
742
  // Map numeric key to feature name if available
743
  const featureName = v3IndexToFeatureMap[key] || key;
@@ -749,7 +698,7 @@ function processEpisodeDataForCharts(
749
  if (excludedColumns.includes(featureName)) return;
750
 
751
  // Find the matching column definition to get proper names
752
- const columnDef = columns.find(col => col.key === featureName);
753
  if (columnDef && Array.isArray(value) && value.length > 0) {
754
  // Use the proper hierarchical naming from column definition
755
  columnDef.value.forEach((seriesName, idx) => {
@@ -757,10 +706,10 @@ function processEpisodeDataForCharts(
757
  allKeys.push(seriesName);
758
  }
759
  });
760
- } else if (typeof value === 'number' && !isNaN(value)) {
761
  // For scalar numeric values
762
  allKeys.push(featureName);
763
- } else if (typeof value === 'bigint') {
764
  // For BigInt values
765
  allKeys.push(featureName);
766
  }
@@ -769,10 +718,7 @@ function processEpisodeDataForCharts(
769
  seriesNames = ["timestamp", ...allKeys];
770
  } else {
771
  // Fallback to column-based approach like V2.1
772
- seriesNames = [
773
- "timestamp",
774
- ...columns.map(({ value }) => value).flat(),
775
- ];
776
  }
777
 
778
  const chartData = episodeData.map((row, index) => {
@@ -783,14 +729,17 @@ function processEpisodeDataForCharts(
783
  let videoDuration = episodeData.length; // Fallback to data length
784
  if (episodeMetadata) {
785
  // Use actual video segment duration if available
786
- videoDuration = (episodeMetadata.video_to_timestamp || 30) - (episodeMetadata.video_from_timestamp || 0);
 
 
787
  }
788
- obj["timestamp"] = (index / Math.max(episodeData.length - 1, 1)) * videoDuration;
 
789
 
790
  // Add all data columns using hierarchical naming
791
- if (row && typeof row === 'object') {
792
  Object.entries(row).forEach(([key, value]) => {
793
- if (key === 'timestamp') {
794
  // Timestamp is already handled above
795
  return;
796
  }
@@ -805,21 +754,21 @@ function processEpisodeDataForCharts(
805
  if (excludedColumns.includes(featureName)) return;
806
 
807
  // Find the matching column definition to get proper series names
808
- const columnDef = columns.find(col => col.key === featureName);
809
 
810
  if (Array.isArray(value) && columnDef) {
811
  // For array values like observation.state and action, use proper hierarchical naming
812
  value.forEach((val, idx) => {
813
  if (idx < columnDef.value.length) {
814
  const seriesName = columnDef.value[idx];
815
- obj[seriesName] = typeof val === 'number' ? val : Number(val);
816
  }
817
  });
818
- } else if (typeof value === 'number' && !isNaN(value)) {
819
  obj[featureName] = value;
820
- } else if (typeof value === 'bigint') {
821
  obj[featureName] = Number(value);
822
- } else if (typeof value === 'boolean') {
823
  // Convert boolean to number for charts
824
  obj[featureName] = value ? 1 : 0;
825
  }
@@ -837,92 +786,27 @@ function processEpisodeDataForCharts(
837
  ["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
838
  )
839
  .map(([key]) => key),
840
- ...excludedColumns // Also include the manually excluded columns
841
  ];
842
 
843
- // Group processing logic (using SERIES_NAME_DELIMITER like v2.1)
844
- const numericKeys = seriesNames.filter((k) => k !== "timestamp");
845
- const suffixGroupsMap: Record<string, string[]> = {};
846
-
847
- for (const key of numericKeys) {
848
- const parts = key.split(SERIES_NAME_DELIMITER);
849
- const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
850
- if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
851
- suffixGroupsMap[suffix].push(key);
852
- }
853
- const suffixGroups = Object.values(suffixGroupsMap);
854
-
855
-
856
- // Compute min/max for each suffix group
857
- const groupStats: Record<string, { min: number; max: number }> = {};
858
- suffixGroups.forEach((group) => {
859
- let min = Infinity, max = -Infinity;
860
- for (const row of chartData) {
861
- for (const key of group) {
862
- const v = row[key];
863
- if (typeof v === "number" && !isNaN(v)) {
864
- if (v < min) min = v;
865
- if (v > max) max = v;
866
- }
867
- }
868
- }
869
- groupStats[group[0]] = { min, max };
870
- });
871
-
872
- // Group by similar scale
873
- const scaleGroups: Record<string, string[][]> = {};
874
- const used = new Set<string>();
875
- const SCALE_THRESHOLD = 2;
876
- for (const group of suffixGroups) {
877
- const groupId = group[0];
878
- if (used.has(groupId)) continue;
879
- const { min, max } = groupStats[groupId];
880
- if (!isFinite(min) || !isFinite(max)) continue;
881
- const logMin = Math.log10(Math.abs(min) + 1e-9);
882
- const logMax = Math.log10(Math.abs(max) + 1e-9);
883
- const unit: string[][] = [group];
884
- used.add(groupId);
885
- for (const other of suffixGroups) {
886
- const otherId = other[0];
887
- if (used.has(otherId) || otherId === groupId) continue;
888
- const { min: omin, max: omax } = groupStats[otherId];
889
- if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
890
- const ologMin = Math.log10(Math.abs(omin) + 1e-9);
891
- const ologMax = Math.log10(Math.abs(omax) + 1e-9);
892
- if (
893
- Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
894
- Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
895
- ) {
896
- unit.push(other);
897
- used.add(otherId);
898
- }
899
- }
900
- scaleGroups[groupId] = unit;
901
- }
902
-
903
- // Flatten into chartGroups
904
- const chartGroups: string[][] = Object.values(scaleGroups)
905
- .sort((a, b) => b.length - a.length)
906
- .flatMap((suffixGroupArr) => {
907
- const merged = suffixGroupArr.flat();
908
- if (merged.length > 6) {
909
- const subgroups = [];
910
- for (let i = 0; i < merged.length; i += 6) {
911
- subgroups.push(merged.slice(i, i + 6));
912
- }
913
- return subgroups;
914
- }
915
- return [merged];
916
- });
917
 
918
  const chartDataGroups = chartGroups.map((group) =>
919
- chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
 
 
 
 
 
 
 
 
920
  );
921
 
922
  return { chartDataGroups, flatChartData: chartData, ignoredColumns };
923
  }
924
 
925
-
926
  // Video info extraction with segmentation for v3.0
927
  function extractVideoInfoV3WithSegmentation(
928
  repoId: string,
@@ -931,13 +815,14 @@ function extractVideoInfoV3WithSegmentation(
931
  episodeMetadata: EpisodeMetadataV3,
932
  ): VideoInfo[] {
933
  // Get video features from dataset info
934
- const videoFeatures = Object.entries(info.features)
935
- .filter(([, value]) => value.dtype === "video");
 
936
 
937
  const videosInfo = videoFeatures.map(([videoKey]) => {
938
  // Check if we have per-camera metadata in the episode row
939
- const cameraSpecificKeys = Object.keys(episodeMetadata).filter(key =>
940
- key.startsWith(`videos/${videoKey}/`)
941
  );
942
 
943
  let chunkIndex: number, fileIndex: number, segmentStart: number, segmentEnd: number;
@@ -956,7 +841,15 @@ function extractVideoInfoV3WithSegmentation(
956
  segmentEnd = episodeMetadata.video_to_timestamp || 30;
957
  }
958
 
959
- const videoPath = `videos/${videoKey}/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.mp4`;
 
 
 
 
 
 
 
 
960
  const fullUrl = buildVersionedUrl(repoId, version, videoPath);
961
 
962
  return {
@@ -964,9 +857,9 @@ function extractVideoInfoV3WithSegmentation(
964
  url: fullUrl,
965
  // Enable segmentation with timestamps from metadata
966
  isSegmented: true,
967
- segmentStart: segmentStart,
968
- segmentEnd: segmentEnd,
969
- segmentDuration: segmentEnd - segmentStart,
970
  };
971
  });
972
 
@@ -988,8 +881,15 @@ async function loadEpisodeMetadataV3Simple(
988
 
989
  // Try loading episode metadata files until we find the episode
990
  while (!episodeRow) {
991
- const episodesMetadataPath = `meta/episodes/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.parquet`;
992
- const episodesMetadataUrl = buildVersionedUrl(repoId, version, episodesMetadataPath);
 
 
 
 
 
 
 
993
 
994
  try {
995
  const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
@@ -1015,9 +915,11 @@ async function loadEpisodeMetadataV3Simple(
1015
  // Not in this file, try the next one
1016
  fileIndex++;
1017
  }
1018
- } catch (error) {
1019
  // File doesn't exist - episode not found
1020
- throw new Error(`Episode ${episodeId} not found in metadata (searched up to file-${fileIndex.toString().padStart(3, "0")}.parquet)`);
 
 
1021
  }
1022
  }
1023
 
@@ -1028,9 +930,9 @@ async function loadEpisodeMetadataV3Simple(
1028
  // Simple parser for episode row - focuses on key fields for episodes
1029
  function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3 {
1030
  // v3.0 uses named keys in the episode metadata
1031
- if (row && typeof row === 'object') {
1032
  // Check if this is v3.0 format with named keys
1033
- if ('episode_index' in row) {
1034
  // v3.0 format - use named keys
1035
  // Convert BigInt values to numbers
1036
  const toBigIntSafe = (value: unknown): number => {
@@ -1079,7 +981,7 @@ function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3
1079
  }
1080
  });
1081
 
1082
- return episodeData;
1083
  } else {
1084
  // Fallback to numeric keys for compatibility
1085
  const toNum = (v: unknown, fallback = 0): number =>
@@ -1118,7 +1020,6 @@ function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3
1118
 
1119
 
1120
 
1121
-
1122
  // ─── Stats computation ───────────────────────────────────────────
1123
 
1124
  /**
 
6
  } from "@/utils/parquetUtils";
7
  import { pick } from "@/utils/pick";
8
  import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
9
+ import { PADDING, CHART_CONFIG, EXCLUDED_COLUMNS } from "@/utils/constants";
10
+ import {
11
+ processChartDataGroups,
12
+ groupRowBySuffix,
13
+ } from "@/utils/dataProcessing";
14
+ import {
15
+ buildV3VideoPath,
16
+ buildV3DataPath,
17
+ buildV3EpisodesMetadataPath,
18
+ } from "@/utils/stringFormatting";
19
+ import { bigIntToNumber } from "@/utils/typeGuards";
20
 
21
+ const SERIES_NAME_DELIMITER = CHART_CONFIG.SERIES_NAME_DELIMITER;
22
 
23
  export type VideoInfo = {
24
  filename: string;
 
110
  value: string[];
111
  };
112
 
113
+ type AdjacentEpisodeVideos = {
114
+ episodeId: number;
115
+ videosInfo: VideoInfo[];
116
+ };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  export async function getEpisodeData(
119
  org: string,
 
128
  const info = rawInfo as unknown as DatasetMetadata;
129
 
130
  if (info.video_path === null) {
131
+ throw new Error(
132
+ "Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.",
133
+ );
134
  }
135
 
136
  console.time(`[perf] getEpisodeData (${version})`);
 
165
  dataset: string,
166
  currentEpisodeId: number,
167
  radius: number = 2,
168
+ ): Promise<AdjacentEpisodeVideos[]> {
169
  const repoId = `${org}/${dataset}`;
170
  try {
171
  const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
172
  const info = rawInfo as unknown as DatasetMetadata;
173
 
174
  const totalEpisodes = info.total_episodes;
175
+ const adjacentVideos: AdjacentEpisodeVideos[] = [];
176
 
177
  // Calculate adjacent episode IDs
178
  for (let offset = -radius; offset <= radius; offset++) {
 
184
  let videosInfo: VideoInfo[] = [];
185
 
186
  if (version === "v3.0") {
187
+ const episodeMetadata = await loadEpisodeMetadataV3Simple(
188
+ repoId,
189
+ version,
190
+ episodeId,
191
+ );
192
+ videosInfo = extractVideoInfoV3WithSegmentation(
193
+ repoId,
194
+ version,
195
+ info,
196
+ episodeMetadata,
197
+ );
198
  } else {
199
  // For v2.x, use simpler video info extraction
200
+ if (info.video_path) {
201
  const episode_chunk = Math.floor(0 / 1000);
202
  videosInfo = Object.entries(info.features)
203
  .filter(([, value]) => value.dtype === "video")
204
  .map(([key]) => {
205
+ const videoPath = formatStringWithVars(info.video_path!, {
206
  video_key: key,
207
+ episode_chunk: episode_chunk
208
+ .toString()
209
+ .padStart(PADDING.CHUNK_INDEX, "0"),
210
+ episode_index: episodeId
211
+ .toString()
212
+ .padStart(PADDING.EPISODE_INDEX, "0"),
213
  });
214
  return {
215
  filename: key,
216
  url: buildVersionedUrl(repoId, version, videoPath),
217
  };
218
  });
219
+ }
220
  }
221
 
222
  adjacentVideos.push({ episodeId, videosInfo });
 
262
  // episode id starts from 0
263
  (_, i) => i,
264
  )
265
+ : process.env.EPISODES.split(/\s+/)
 
266
  .map((x) => parseInt(x.trim(), 10))
267
  .filter((x) => !isNaN(x));
268
 
269
  // Videos information
270
+ const videosInfo =
271
+ info.video_path !== null
272
+ ? Object.entries(info.features)
273
  .filter(([, value]) => value.dtype === "video")
274
  .map(([key]) => {
275
+ const videoPath = formatStringWithVars(info.video_path!, {
276
  video_key: key,
277
+ episode_chunk: episode_chunk
278
+ .toString()
279
+ .padStart(PADDING.CHUNK_INDEX, "0"),
280
+ episode_index: episodeId
281
+ .toString()
282
+ .padStart(PADDING.EPISODE_INDEX, "0"),
283
  });
284
  return {
285
  filename: key,
286
  url: buildVersionedUrl(repoId, version, videoPath),
287
  };
288
+ })
289
+ : [];
290
 
291
  // Column data
292
  const columnNames = Object.entries(info.features)
293
  .filter(
294
  ([, value]) =>
295
+ ["float32", "int32"].includes(value.dtype) && value.shape.length === 1,
 
296
  )
297
  .map(([key, { shape }]) => ({ key, length: shape[0] }));
298
 
299
  // Exclude specific columns
300
+ const excludedColumns = EXCLUDED_COLUMNS.V2 as readonly string[];
 
 
 
 
 
 
301
  const filteredColumns = columnNames.filter(
302
  (column) => !excludedColumns.includes(column.key),
303
  );
 
318
  ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
319
  : Array.from(
320
  { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
321
+ (_, i) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${i}`,
322
  ),
323
  };
324
  });
 
327
  repoId,
328
  version,
329
  formatStringWithVars(info.data_path, {
330
+ episode_chunk: episode_chunk
331
+ .toString()
332
+ .padStart(PADDING.CHUNK_INDEX, "0"),
333
+ episode_index: episodeId.toString().padStart(PADDING.EPISODE_INDEX, "0"),
334
+ }),
335
  );
336
 
337
  const arrayBuffer = await fetchParquetFile(parquetUrl);
 
371
  if (tasksResponse.ok) {
372
  const tasksText = await tasksResponse.text();
373
  const tasksData = tasksText
374
+ .split("\n")
375
+ .filter((line) => line.trim())
376
+ .map((line) => JSON.parse(line));
377
 
378
  if (tasksData && tasksData.length > 0) {
379
  const taskIndex = allData[0].task_index;
380
  const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
381
+ const taskData = tasksData.find((t: Record<string, unknown>) => t.task_index === taskIndexNum);
382
  if (taskData) {
383
  task = taskData.task;
384
  }
385
  }
386
  }
387
+ } catch {
388
  // No tasks metadata file for this v2.x dataset
389
  }
390
  }
 
419
  )
420
  .map(([key]) => key);
421
 
422
+ // Process chart data into organized groups using utility function
423
+ const chartGroups = processChartDataGroups(seriesNames, chartData);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
  const duration = chartData[chartData.length - 1].timestamp;
426
 
427
  const chartDataGroups = chartGroups.map((group) =>
428
+ chartData.map((row) => {
429
+ const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
430
+ // Ensure timestamp is always a number at the top level
431
+ return {
432
+ ...grouped,
433
+ timestamp:
434
+ typeof grouped.timestamp === "number" ? grouped.timestamp : 0,
435
+ };
436
+ }),
437
  );
438
 
439
  return {
 
471
  const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
472
 
473
  // Load episode metadata to get timestamps for episode 0
474
+ const episodeMetadata = await loadEpisodeMetadataV3Simple(
475
+ repoId,
476
+ version,
477
+ episodeId,
478
+ );
479
 
480
  // Create video info with segmentation using the metadata
481
+ const videosInfo = extractVideoInfoV3WithSegmentation(
482
+ repoId,
483
+ version,
484
+ info,
485
+ episodeMetadata,
486
+ );
487
 
488
  // Load episode data for charts
489
  const { chartDataGroups, flatChartData, ignoredColumns, task } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
 
512
  episodeMetadata: EpisodeMetadataV3,
513
  ): Promise<{ chartDataGroups: ChartRow[][]; flatChartData: Record<string, number>[]; ignoredColumns: string[]; task?: string }> {
514
  // Build data file path using chunk and file indices
515
+ const dataChunkIndex = bigIntToNumber(episodeMetadata.data_chunk_index, 0);
516
+ const dataFileIndex = bigIntToNumber(episodeMetadata.data_file_index, 0);
517
+ const dataPath = buildV3DataPath(dataChunkIndex, dataFileIndex);
518
 
519
  try {
520
  const dataUrl = buildVersionedUrl(repoId, version, dataPath);
 
633
 
634
  // Common feature order for v3.0 datasets (but only include if they exist)
635
  const expectedFeatureOrder = [
636
+ "observation.state",
637
+ "action",
638
+ "timestamp",
639
+ "episode_index",
640
+ "frame_index",
641
+ "next.reward",
642
+ "next.done",
643
+ "index",
644
+ "task_index",
645
  ];
646
 
647
  // Map indices to features that actually exist
648
  let currentIndex = 0;
649
+ expectedFeatureOrder.forEach((feature) => {
650
  if (featureKeys.includes(feature)) {
651
  v3IndexToFeatureMap[currentIndex.toString()] = feature;
652
  currentIndex++;
 
654
  });
655
 
656
  // Columns to exclude from charts (note: 'task' is intentionally not excluded as we want to access it)
657
+ const excludedColumns = EXCLUDED_COLUMNS.V3 as readonly string[];
658
 
659
  // Create columns structure similar to V2.1 for proper hierarchical naming
660
  const columns: ColumnDef[] = Object.entries(info.features)
 
675
  ? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
676
  : Array.from(
677
  { length: feature.shape[0] || 1 },
678
+ (_, i) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${i}`,
679
  ),
680
  };
681
  });
 
686
  const allKeys: string[] = [];
687
 
688
  Object.entries(firstRow || {}).forEach(([key, value]) => {
689
+ if (key === "timestamp") return; // Skip timestamp, we'll add it separately
690
 
691
  // Map numeric key to feature name if available
692
  const featureName = v3IndexToFeatureMap[key] || key;
 
698
  if (excludedColumns.includes(featureName)) return;
699
 
700
  // Find the matching column definition to get proper names
701
+ const columnDef = columns.find((col) => col.key === featureName);
702
  if (columnDef && Array.isArray(value) && value.length > 0) {
703
  // Use the proper hierarchical naming from column definition
704
  columnDef.value.forEach((seriesName, idx) => {
 
706
  allKeys.push(seriesName);
707
  }
708
  });
709
+ } else if (typeof value === "number" && !isNaN(value)) {
710
  // For scalar numeric values
711
  allKeys.push(featureName);
712
+ } else if (typeof value === "bigint") {
713
  // For BigInt values
714
  allKeys.push(featureName);
715
  }
 
718
  seriesNames = ["timestamp", ...allKeys];
719
  } else {
720
  // Fallback to column-based approach like V2.1
721
+ seriesNames = ["timestamp", ...columns.map(({ value }) => value).flat()];
 
 
 
722
  }
723
 
724
  const chartData = episodeData.map((row, index) => {
 
729
  let videoDuration = episodeData.length; // Fallback to data length
730
  if (episodeMetadata) {
731
  // Use actual video segment duration if available
732
+ videoDuration =
733
+ (episodeMetadata.video_to_timestamp || 30) -
734
+ (episodeMetadata.video_from_timestamp || 0);
735
  }
736
+ obj["timestamp"] =
737
+ (index / Math.max(episodeData.length - 1, 1)) * videoDuration;
738
 
739
  // Add all data columns using hierarchical naming
740
+ if (row && typeof row === "object") {
741
  Object.entries(row).forEach(([key, value]) => {
742
+ if (key === "timestamp") {
743
  // Timestamp is already handled above
744
  return;
745
  }
 
754
  if (excludedColumns.includes(featureName)) return;
755
 
756
  // Find the matching column definition to get proper series names
757
+ const columnDef = columns.find((col) => col.key === featureName);
758
 
759
  if (Array.isArray(value) && columnDef) {
760
  // For array values like observation.state and action, use proper hierarchical naming
761
  value.forEach((val, idx) => {
762
  if (idx < columnDef.value.length) {
763
  const seriesName = columnDef.value[idx];
764
+ obj[seriesName] = typeof val === "number" ? val : Number(val);
765
  }
766
  });
767
+ } else if (typeof value === "number" && !isNaN(value)) {
768
  obj[featureName] = value;
769
+ } else if (typeof value === "bigint") {
770
  obj[featureName] = Number(value);
771
+ } else if (typeof value === "boolean") {
772
  // Convert boolean to number for charts
773
  obj[featureName] = value ? 1 : 0;
774
  }
 
786
  ["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
787
  )
788
  .map(([key]) => key),
789
+ ...excludedColumns, // Also include the manually excluded columns
790
  ];
791
 
792
+ // Process chart data into organized groups using utility function
793
+ const chartGroups = processChartDataGroups(seriesNames, chartData);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
 
795
  const chartDataGroups = chartGroups.map((group) =>
796
+ chartData.map((row) => {
797
+ const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
798
+ // Ensure timestamp is always a number at the top level
799
+ return {
800
+ ...grouped,
801
+ timestamp:
802
+ typeof grouped.timestamp === "number" ? grouped.timestamp : 0,
803
+ };
804
+ }),
805
  );
806
 
807
  return { chartDataGroups, flatChartData: chartData, ignoredColumns };
808
  }
809
 
 
810
  // Video info extraction with segmentation for v3.0
811
  function extractVideoInfoV3WithSegmentation(
812
  repoId: string,
 
815
  episodeMetadata: EpisodeMetadataV3,
816
  ): VideoInfo[] {
817
  // Get video features from dataset info
818
+ const videoFeatures = Object.entries(info.features).filter(
819
+ ([, value]) => value.dtype === "video",
820
+ );
821
 
822
  const videosInfo = videoFeatures.map(([videoKey]) => {
823
  // Check if we have per-camera metadata in the episode row
824
+ const cameraSpecificKeys = Object.keys(episodeMetadata).filter((key) =>
825
+ key.startsWith(`videos/${videoKey}/`),
826
  );
827
 
828
  let chunkIndex: number, fileIndex: number, segmentStart: number, segmentEnd: number;
 
841
  segmentEnd = episodeMetadata.video_to_timestamp || 30;
842
  }
843
 
844
+ // Convert BigInt to number for timestamps
845
+ const startNum = bigIntToNumber(segmentStart);
846
+ const endNum = bigIntToNumber(segmentEnd);
847
+
848
+ const videoPath = buildV3VideoPath(
849
+ videoKey,
850
+ bigIntToNumber(chunkIndex, 0),
851
+ bigIntToNumber(fileIndex, 0),
852
+ );
853
  const fullUrl = buildVersionedUrl(repoId, version, videoPath);
854
 
855
  return {
 
857
  url: fullUrl,
858
  // Enable segmentation with timestamps from metadata
859
  isSegmented: true,
860
+ segmentStart: startNum,
861
+ segmentEnd: endNum,
862
+ segmentDuration: endNum - startNum,
863
  };
864
  });
865
 
 
881
 
882
  // Try loading episode metadata files until we find the episode
883
  while (!episodeRow) {
884
+ const episodesMetadataPath = buildV3EpisodesMetadataPath(
885
+ chunkIndex,
886
+ fileIndex,
887
+ );
888
+ const episodesMetadataUrl = buildVersionedUrl(
889
+ repoId,
890
+ version,
891
+ episodesMetadataPath,
892
+ );
893
 
894
  try {
895
  const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
 
915
  // Not in this file, try the next one
916
  fileIndex++;
917
  }
918
+ } catch {
919
  // File doesn't exist - episode not found
920
+ throw new Error(
921
+ `Episode ${episodeId} not found in metadata (searched up to file-${fileIndex.toString().padStart(PADDING.CHUNK_INDEX, "0")}.parquet)`,
922
+ );
923
  }
924
  }
925
 
 
930
  // Simple parser for episode row - focuses on key fields for episodes
931
  function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3 {
932
  // v3.0 uses named keys in the episode metadata
933
+ if (row && typeof row === "object") {
934
  // Check if this is v3.0 format with named keys
935
+ if ("episode_index" in row) {
936
  // v3.0 format - use named keys
937
  // Convert BigInt values to numbers
938
  const toBigIntSafe = (value: unknown): number => {
 
981
  }
982
  });
983
 
984
+ return episodeData as EpisodeMetadataV3;
985
  } else {
986
  // Fallback to numeric keys for compatibility
987
  const toNum = (v: unknown, fallback = 0): number =>
 
1020
 
1021
 
1022
 
 
1023
  // ─── Stats computation ───────────────────────────────────────────
1024
 
1025
  /**
src/app/[org]/[dataset]/page.tsx CHANGED
@@ -6,10 +6,10 @@ export default async function DatasetRootPage({
6
  params: Promise<{ org: string; dataset: string }>;
7
  }) {
8
  const { org, dataset } = await params;
9
- const episodeN = process.env.EPISODES
10
- ?.split(/\s+/)
11
- .map((x) => parseInt(x.trim(), 10))
12
- .filter((x) => !isNaN(x))[0] ?? 0;
13
 
14
  redirect(`/${org}/${dataset}/episode_${episodeN}`);
15
  }
 
6
  params: Promise<{ org: string; dataset: string }>;
7
  }) {
8
  const { org, dataset } = await params;
9
+ const episodeN =
10
+ process.env.EPISODES?.split(/\s+/)
11
+ .map((x) => parseInt(x.trim(), 10))
12
+ .filter((x) => !isNaN(x))[0] ?? 0;
13
 
14
  redirect(`/${org}/${dataset}/episode_${episodeN}`);
15
  }
src/app/explore/explore-grid.tsx CHANGED
@@ -2,8 +2,6 @@
2
 
3
  import React, { useEffect, useRef } from "react";
4
  import Link from "next/link";
5
-
6
- import { useRouter, useSearchParams } from "next/navigation";
7
  import { postParentMessageWithParams } from "@/utils/postParentMessage";
8
 
9
  type ExploreGridProps = {
 
2
 
3
  import React, { useEffect, useRef } from "react";
4
  import Link from "next/link";
 
 
5
  import { postParentMessageWithParams } from "@/utils/postParentMessage";
6
 
7
  type ExploreGridProps = {
src/app/explore/page.tsx CHANGED
@@ -1,17 +1,15 @@
1
  import React from "react";
2
  import ExploreGrid from "./explore-grid";
3
- import {
4
- DatasetMetadata,
5
- fetchJson,
6
- formatStringWithVars,
7
- } from "@/utils/parquetUtils";
8
  import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
 
9
 
10
  export default async function ExplorePage({
11
  searchParams,
12
  }: {
13
- searchParams: { p?: string };
14
  }) {
 
15
  let datasets: { id: string }[] = [];
16
  let currentPage = 1;
17
  let totalPages = 1;
@@ -25,8 +23,8 @@ export default async function ExplorePage({
25
  if (!res.ok) throw new Error("Failed to fetch datasets");
26
  const data = await res.json();
27
  const allDatasets = data.datasets || data;
28
- // Use searchParams from props
29
- const page = parseInt(searchParams?.p || "1", 10);
30
  const perPage = 30;
31
 
32
  currentPage = page;
@@ -46,24 +44,26 @@ export default async function ExplorePage({
46
  try {
47
  const [org, dataset] = ds.id.split("/");
48
  const repoId = `${org}/${dataset}`;
49
-
50
  // Try to get compatible version, but don't fail the entire page if incompatible
51
  let version: string;
52
  try {
53
  version = await getDatasetVersion(repoId);
54
  } catch (err) {
55
  // Dataset is not compatible, skip it silently
56
- console.warn(`Skipping incompatible dataset ${repoId}: ${err instanceof Error ? err.message : err}`);
 
 
57
  return null;
58
  }
59
-
60
  const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
61
  const info = await fetchJson<DatasetMetadata>(jsonUrl);
62
  const videoEntry = Object.entries(info.features).find(
63
  ([, value]) => value.dtype === "video",
64
  );
65
  let videoUrl: string | null = null;
66
- if (videoEntry) {
67
  const [key] = videoEntry;
68
  const videoPath = formatStringWithVars(info.video_path, {
69
  video_key: key,
 
1
  import React from "react";
2
  import ExploreGrid from "./explore-grid";
3
+ import { fetchJson, formatStringWithVars } from "@/utils/parquetUtils";
 
 
 
 
4
  import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
5
+ import type { DatasetMetadata } from "@/utils/parquetUtils";
6
 
7
  export default async function ExplorePage({
8
  searchParams,
9
  }: {
10
+ searchParams: Promise<{ p?: string }>;
11
  }) {
12
+ const params = await searchParams;
13
  let datasets: { id: string }[] = [];
14
  let currentPage = 1;
15
  let totalPages = 1;
 
23
  if (!res.ok) throw new Error("Failed to fetch datasets");
24
  const data = await res.json();
25
  const allDatasets = data.datasets || data;
26
+ // Use params from props
27
+ const page = parseInt(params?.p || "1", 10);
28
  const perPage = 30;
29
 
30
  currentPage = page;
 
44
  try {
45
  const [org, dataset] = ds.id.split("/");
46
  const repoId = `${org}/${dataset}`;
47
+
48
  // Try to get compatible version, but don't fail the entire page if incompatible
49
  let version: string;
50
  try {
51
  version = await getDatasetVersion(repoId);
52
  } catch (err) {
53
  // Dataset is not compatible, skip it silently
54
+ console.warn(
55
+ `Skipping incompatible dataset ${repoId}: ${err instanceof Error ? err.message : err}`,
56
+ );
57
  return null;
58
  }
59
+
60
  const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
61
  const info = await fetchJson<DatasetMetadata>(jsonUrl);
62
  const videoEntry = Object.entries(info.features).find(
63
  ([, value]) => value.dtype === "video",
64
  );
65
  let videoUrl: string | null = null;
66
+ if (videoEntry && info.video_path) {
67
  const [key] = videoEntry;
68
  const videoPath = formatStringWithVars(info.video_path, {
69
  video_key: key,
src/app/page.tsx CHANGED
@@ -27,31 +27,31 @@ function HomeInner() {
27
  useEffect(() => {
28
  // Redirect to the first episode of the dataset if REPO_ID is defined
29
  if (process.env.REPO_ID) {
30
- const episodeN = process.env.EPISODES
31
- ?.split(/\s+/)
32
- .map((x) => parseInt(x.trim(), 10))
33
- .filter((x) => !isNaN(x))[0] ?? 0;
34
 
35
  router.push(`/${process.env.REPO_ID}/episode_${episodeN}`);
36
  return;
37
  }
38
-
39
  // sync with hf.co/spaces URL params
40
- if (searchParams.get('path')) {
41
- router.push(searchParams.get('path')!);
42
  return;
43
  }
44
 
45
  // legacy sync with hf.co/spaces URL params
46
  let redirectUrl: string | null = null;
47
- if (searchParams.get('dataset') && searchParams.get('episode')) {
48
- redirectUrl = `/${searchParams.get('dataset')}/episode_${searchParams.get('episode')}`;
49
- } else if (searchParams.get('dataset')) {
50
- redirectUrl = `/${searchParams.get('dataset')}`;
51
  }
52
 
53
- if (redirectUrl && searchParams.get('t')) {
54
- redirectUrl += `?t=${searchParams.get('t')}`;
55
  }
56
 
57
  if (redirectUrl) {
 
27
  useEffect(() => {
28
  // Redirect to the first episode of the dataset if REPO_ID is defined
29
  if (process.env.REPO_ID) {
30
+ const episodeN =
31
+ process.env.EPISODES?.split(/\s+/)
32
+ .map((x) => parseInt(x.trim(), 10))
33
+ .filter((x) => !isNaN(x))[0] ?? 0;
34
 
35
  router.push(`/${process.env.REPO_ID}/episode_${episodeN}`);
36
  return;
37
  }
38
+
39
  // sync with hf.co/spaces URL params
40
+ if (searchParams.get("path")) {
41
+ router.push(searchParams.get("path")!);
42
  return;
43
  }
44
 
45
  // legacy sync with hf.co/spaces URL params
46
  let redirectUrl: string | null = null;
47
+ if (searchParams.get("dataset") && searchParams.get("episode")) {
48
+ redirectUrl = `/${searchParams.get("dataset")}/episode_${searchParams.get("episode")}`;
49
+ } else if (searchParams.get("dataset")) {
50
+ redirectUrl = `/${searchParams.get("dataset")}`;
51
  }
52
 
53
+ if (redirectUrl && searchParams.get("t")) {
54
+ redirectUrl += `?t=${searchParams.get("t")}`;
55
  }
56
 
57
  if (redirectUrl) {
src/components/data-recharts.tsx CHANGED
@@ -54,14 +54,14 @@ export const DataRecharts = React.memo(
54
  const [hoveredTime, setHoveredTime] = useState<number | null>(null);
55
  const [expanded, setExpanded] = useState(false);
56
 
57
- if (!Array.isArray(data) || data.length === 0) return null;
58
-
59
  useEffect(() => {
60
  if (typeof onChartsReady === "function") onChartsReady();
61
  }, [onChartsReady]);
62
 
63
  const combinedData = useMemo(() => expanded ? mergeGroups(data) : [], [data, expanded]);
64
 
 
 
65
  return (
66
  <div>
67
  {data.length > 1 && (
@@ -101,7 +101,6 @@ export const DataRecharts = React.memo(
101
  },
102
  );
103
 
104
-
105
  const SingleDataGraph = React.memo(
106
  ({
107
  data,
@@ -125,9 +124,19 @@ const SingleDataGraph = React.memo(
125
  } else {
126
  result[key] = value;
127
  }
128
- } else if (value !== null && typeof value === "object" && !Array.isArray(value)) {
 
 
 
 
129
  // If it's an object, recurse
130
- Object.assign(result, flattenRow(value, prefix ? `${prefix}${SERIES_NAME_DELIMITER}${key}` : key));
 
 
 
 
 
 
131
  }
132
  }
133
  if ("timestamp" in row && typeof row["timestamp"] === "number") {
@@ -137,7 +146,7 @@ const SingleDataGraph = React.memo(
137
  }
138
 
139
  // Flatten all rows for recharts
140
- const chartData = useMemo(() => data.map(row => flattenRow(row)), [data]);
141
  const [dataKeys, setDataKeys] = useState<string[]>([]);
142
  const [visibleKeys, setVisibleKeys] = useState<string[]>([]);
143
 
@@ -216,22 +225,29 @@ const SingleDataGraph = React.memo(
216
  groupColorMap[group] = CHART_COLORS[idx % CHART_COLORS.length];
217
  });
218
 
219
- const isGroupChecked = (group: string) => groups[group].every(k => visibleKeys.includes(k));
220
- const isGroupIndeterminate = (group: string) => groups[group].some(k => visibleKeys.includes(k)) && !isGroupChecked(group);
 
 
 
221
 
222
  const handleGroupCheckboxChange = (group: string) => {
223
  if (isGroupChecked(group)) {
224
  // Uncheck all children
225
- setVisibleKeys((prev) => prev.filter(k => !groups[group].includes(k)));
 
 
226
  } else {
227
  // Check all children
228
- setVisibleKeys((prev) => Array.from(new Set([...prev, ...groups[group]])));
 
 
229
  }
230
  };
231
 
232
  const handleCheckboxChange = (key: string) => {
233
  setVisibleKeys((prev) =>
234
- prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key]
235
  );
236
  };
237
 
@@ -245,7 +261,9 @@ const SingleDataGraph = React.memo(
245
  <input
246
  type="checkbox"
247
  checked={isGroupChecked(group)}
248
- ref={el => { if (el) el.indeterminate = isGroupIndeterminate(group); }}
 
 
249
  onChange={() => handleGroupCheckboxChange(group)}
250
  className="size-3"
251
  style={{ accentColor: color }}
 
54
  const [hoveredTime, setHoveredTime] = useState<number | null>(null);
55
  const [expanded, setExpanded] = useState(false);
56
 
 
 
57
  useEffect(() => {
58
  if (typeof onChartsReady === "function") onChartsReady();
59
  }, [onChartsReady]);
60
 
61
  const combinedData = useMemo(() => expanded ? mergeGroups(data) : [], [data, expanded]);
62
 
63
+ if (!Array.isArray(data) || data.length === 0) return null;
64
+
65
  return (
66
  <div>
67
  {data.length > 1 && (
 
101
  },
102
  );
103
 
 
104
  const SingleDataGraph = React.memo(
105
  ({
106
  data,
 
124
  } else {
125
  result[key] = value;
126
  }
127
+ } else if (
128
+ value !== null &&
129
+ typeof value === "object" &&
130
+ !Array.isArray(value)
131
+ ) {
132
  // If it's an object, recurse
133
+ Object.assign(
134
+ result,
135
+ flattenRow(
136
+ value,
137
+ prefix ? `${prefix}${SERIES_NAME_DELIMITER}${key}` : key,
138
+ ),
139
+ );
140
  }
141
  }
142
  if ("timestamp" in row && typeof row["timestamp"] === "number") {
 
146
  }
147
 
148
  // Flatten all rows for recharts
149
+ const chartData = useMemo(() => data.map((row) => flattenRow(row)), [data]);
150
  const [dataKeys, setDataKeys] = useState<string[]>([]);
151
  const [visibleKeys, setVisibleKeys] = useState<string[]>([]);
152
 
 
225
  groupColorMap[group] = CHART_COLORS[idx % CHART_COLORS.length];
226
  });
227
 
228
+ const isGroupChecked = (group: string) =>
229
+ groups[group].every((k) => visibleKeys.includes(k));
230
+ const isGroupIndeterminate = (group: string) =>
231
+ groups[group].some((k) => visibleKeys.includes(k)) &&
232
+ !isGroupChecked(group);
233
 
234
  const handleGroupCheckboxChange = (group: string) => {
235
  if (isGroupChecked(group)) {
236
  // Uncheck all children
237
+ setVisibleKeys((prev) =>
238
+ prev.filter((k) => !groups[group].includes(k)),
239
+ );
240
  } else {
241
  // Check all children
242
+ setVisibleKeys((prev) =>
243
+ Array.from(new Set([...prev, ...groups[group]])),
244
+ );
245
  }
246
  };
247
 
248
  const handleCheckboxChange = (key: string) => {
249
  setVisibleKeys((prev) =>
250
+ prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key],
251
  );
252
  };
253
 
 
261
  <input
262
  type="checkbox"
263
  checked={isGroupChecked(group)}
264
+ ref={(el) => {
265
+ if (el) el.indeterminate = isGroupIndeterminate(group);
266
+ }}
267
  onChange={() => handleGroupCheckboxChange(group)}
268
  className="size-3"
269
  style={{ accentColor: color }}
src/components/simple-videos-player.tsx CHANGED
@@ -5,6 +5,11 @@ import { useTime } from "../context/time-context";
5
  import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
6
  import type { VideoInfo } from "@/app/[org]/[dataset]/[episode]/fetch-data";
7
 
 
 
 
 
 
8
  type VideoPlayerProps = {
9
  videosInfo: VideoInfo[];
10
  onVideosReady?: () => void;
@@ -22,9 +27,9 @@ export const SimpleVideosPlayer = ({
22
  const [enlargedVideo, setEnlargedVideo] = React.useState<string | null>(null);
23
  const [showHiddenMenu, setShowHiddenMenu] = React.useState(false);
24
  const [videosReady, setVideosReady] = React.useState(false);
25
-
26
  const firstVisibleIdx = videosInfo.findIndex(
27
- (video) => !hiddenVideos.includes(video.filename)
28
  );
29
 
30
  // Tracks the last time value set by the primary video's onTimeUpdate.
@@ -39,7 +44,7 @@ export const SimpleVideosPlayer = ({
39
  // Handle videos ready
40
  useEffect(() => {
41
  let readyCount = 0;
42
-
43
  const checkReady = () => {
44
  readyCount++;
45
  if (readyCount === videosInfo.length && onVideosReady) {
@@ -52,14 +57,17 @@ export const SimpleVideosPlayer = ({
52
  videoRefs.current.forEach((video, index) => {
53
  if (video) {
54
  const info = videosInfo[index];
55
-
56
  // Setup segment boundaries
57
  if (info.isSegmented) {
58
  const handleTimeUpdate = () => {
59
  const segmentEnd = info.segmentEnd || video.duration;
60
  const segmentStart = info.segmentStart || 0;
61
-
62
- if (video.currentTime >= segmentEnd - 0.05) {
 
 
 
63
  video.currentTime = segmentStart;
64
  // Also update the global time to reset to start
65
  if (index === firstVisibleIdx) {
@@ -67,7 +75,7 @@ export const SimpleVideosPlayer = ({
67
  }
68
  }
69
  };
70
-
71
  const handleLoadedData = () => {
72
  video.currentTime = info.segmentStart || 0;
73
  checkReady();
@@ -109,17 +117,23 @@ export const SimpleVideosPlayer = ({
109
  }
110
  });
111
  };
112
- }, [videosInfo, onVideosReady, setIsPlaying, firstVisibleIdx, setCurrentTime]);
 
 
 
 
 
 
113
 
114
  // Handle play/pause
115
  useEffect(() => {
116
  if (!videosReady) return;
117
-
118
  videoRefs.current.forEach((video, idx) => {
119
  if (video && !hiddenVideos.includes(videosInfo[idx].filename)) {
120
  if (isPlaying) {
121
- video.play().catch(e => {
122
- if (e.name !== 'AbortError') {
123
  console.error("Error playing video");
124
  }
125
  });
@@ -160,9 +174,9 @@ export const SimpleVideosPlayer = ({
160
  // Handle time update from first visible video
161
  const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
162
  const video = e.target as HTMLVideoElement;
163
- const videoIndex = videoRefs.current.findIndex(ref => ref === video);
164
  const info = videosInfo[videoIndex];
165
-
166
  if (info) {
167
  let globalTime = video.currentTime;
168
  if (info.isSegmented) {
@@ -178,7 +192,7 @@ export const SimpleVideosPlayer = ({
178
  if (info.isSegmented) {
179
  const segmentStart = info.segmentStart || 0;
180
  const segmentEnd = info.segmentEnd || video.duration;
181
-
182
  if (video.currentTime < segmentStart || video.currentTime >= segmentEnd) {
183
  video.currentTime = segmentStart;
184
  }
@@ -206,7 +220,11 @@ export const SimpleVideosPlayer = ({
206
  <button
207
  key={filename}
208
  className="block w-full text-left px-2 py-1 rounded hover:bg-slate-700 text-slate-100"
209
- onClick={() => setHiddenVideos(prev => prev.filter(v => v !== filename))}
 
 
 
 
210
  >
211
  {filename}
212
  </button>
@@ -220,10 +238,10 @@ export const SimpleVideosPlayer = ({
220
  <div className="flex flex-wrap gap-x-2 gap-y-6">
221
  {videosInfo.map((info, idx) => {
222
  if (hiddenVideos.includes(info.filename)) return null;
223
-
224
  const isEnlarged = enlargedVideo === info.filename;
225
  const isFirstVisible = idx === firstVisibleIdx;
226
-
227
  return (
228
  <div
229
  key={info.filename}
@@ -239,15 +257,23 @@ export const SimpleVideosPlayer = ({
239
  <button
240
  title={isEnlarged ? "Minimize" : "Enlarge"}
241
  className="ml-2 p-1 hover:bg-slate-700 rounded"
242
- onClick={() => setEnlargedVideo(isEnlarged ? null : info.filename)}
 
 
243
  >
244
  {isEnlarged ? <FaCompress /> : <FaExpand />}
245
  </button>
246
  <button
247
  title="Hide Video"
248
  className="ml-1 p-1 hover:bg-slate-700 rounded"
249
- onClick={() => setHiddenVideos(prev => [...prev, info.filename])}
250
- disabled={videosInfo.filter(v => !hiddenVideos.includes(v.filename)).length === 1}
 
 
 
 
 
 
251
  >
252
  <FaTimes />
253
  </button>
 
5
  import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
6
  import type { VideoInfo } from "@/app/[org]/[dataset]/[episode]/fetch-data";
7
 
8
+ const THRESHOLDS = {
9
+ VIDEO_SYNC_TOLERANCE: 0.2,
10
+ VIDEO_SEGMENT_BOUNDARY: 0.05,
11
+ };
12
+
13
  type VideoPlayerProps = {
14
  videosInfo: VideoInfo[];
15
  onVideosReady?: () => void;
 
27
  const [enlargedVideo, setEnlargedVideo] = React.useState<string | null>(null);
28
  const [showHiddenMenu, setShowHiddenMenu] = React.useState(false);
29
  const [videosReady, setVideosReady] = React.useState(false);
30
+
31
  const firstVisibleIdx = videosInfo.findIndex(
32
+ (video) => !hiddenVideos.includes(video.filename),
33
  );
34
 
35
  // Tracks the last time value set by the primary video's onTimeUpdate.
 
44
  // Handle videos ready
45
  useEffect(() => {
46
  let readyCount = 0;
47
+
48
  const checkReady = () => {
49
  readyCount++;
50
  if (readyCount === videosInfo.length && onVideosReady) {
 
57
  videoRefs.current.forEach((video, index) => {
58
  if (video) {
59
  const info = videosInfo[index];
60
+
61
  // Setup segment boundaries
62
  if (info.isSegmented) {
63
  const handleTimeUpdate = () => {
64
  const segmentEnd = info.segmentEnd || video.duration;
65
  const segmentStart = info.segmentStart || 0;
66
+
67
+ if (
68
+ video.currentTime >=
69
+ segmentEnd - THRESHOLDS.VIDEO_SEGMENT_BOUNDARY
70
+ ) {
71
  video.currentTime = segmentStart;
72
  // Also update the global time to reset to start
73
  if (index === firstVisibleIdx) {
 
75
  }
76
  }
77
  };
78
+
79
  const handleLoadedData = () => {
80
  video.currentTime = info.segmentStart || 0;
81
  checkReady();
 
117
  }
118
  });
119
  };
120
+ }, [
121
+ videosInfo,
122
+ onVideosReady,
123
+ setIsPlaying,
124
+ firstVisibleIdx,
125
+ setCurrentTime,
126
+ ]);
127
 
128
  // Handle play/pause
129
  useEffect(() => {
130
  if (!videosReady) return;
131
+
132
  videoRefs.current.forEach((video, idx) => {
133
  if (video && !hiddenVideos.includes(videosInfo[idx].filename)) {
134
  if (isPlaying) {
135
+ video.play().catch((e) => {
136
+ if (e.name !== "AbortError") {
137
  console.error("Error playing video");
138
  }
139
  });
 
174
  // Handle time update from first visible video
175
  const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
176
  const video = e.target as HTMLVideoElement;
177
+ const videoIndex = videoRefs.current.findIndex((ref) => ref === video);
178
  const info = videosInfo[videoIndex];
179
+
180
  if (info) {
181
  let globalTime = video.currentTime;
182
  if (info.isSegmented) {
 
192
  if (info.isSegmented) {
193
  const segmentStart = info.segmentStart || 0;
194
  const segmentEnd = info.segmentEnd || video.duration;
195
+
196
  if (video.currentTime < segmentStart || video.currentTime >= segmentEnd) {
197
  video.currentTime = segmentStart;
198
  }
 
220
  <button
221
  key={filename}
222
  className="block w-full text-left px-2 py-1 rounded hover:bg-slate-700 text-slate-100"
223
+ onClick={() =>
224
+ setHiddenVideos((prev) =>
225
+ prev.filter((v) => v !== filename),
226
+ )
227
+ }
228
  >
229
  {filename}
230
  </button>
 
238
  <div className="flex flex-wrap gap-x-2 gap-y-6">
239
  {videosInfo.map((info, idx) => {
240
  if (hiddenVideos.includes(info.filename)) return null;
241
+
242
  const isEnlarged = enlargedVideo === info.filename;
243
  const isFirstVisible = idx === firstVisibleIdx;
244
+
245
  return (
246
  <div
247
  key={info.filename}
 
257
  <button
258
  title={isEnlarged ? "Minimize" : "Enlarge"}
259
  className="ml-2 p-1 hover:bg-slate-700 rounded"
260
+ onClick={() =>
261
+ setEnlargedVideo(isEnlarged ? null : info.filename)
262
+ }
263
  >
264
  {isEnlarged ? <FaCompress /> : <FaExpand />}
265
  </button>
266
  <button
267
  title="Hide Video"
268
  className="ml-1 p-1 hover:bg-slate-700 rounded"
269
+ onClick={() =>
270
+ setHiddenVideos((prev) => [...prev, info.filename])
271
+ }
272
+ disabled={
273
+ videosInfo.filter(
274
+ (v) => !hiddenVideos.includes(v.filename),
275
+ ).length === 1
276
+ }
277
  >
278
  <FaTimes />
279
  </button>
src/components/videos-player.tsx CHANGED
@@ -178,7 +178,7 @@ export const VideosPlayer = ({
178
  if (video && video.duration) {
179
  const videoIndex = videoRefs.current.findIndex(ref => ref === video);
180
  const videoInfo = videosInfo[videoIndex];
181
-
182
  if (videoInfo?.isSegmented) {
183
  const segmentStart = videoInfo.segmentStart || 0;
184
  const globalTime = Math.max(0, video.currentTime - segmentStart);
@@ -197,18 +197,20 @@ export const VideosPlayer = ({
197
  const onCanPlayThrough = (videoIndex: number) => {
198
  const video = videoRefs.current[videoIndex];
199
  const videoInfo = videosInfo[videoIndex];
200
-
201
  // Setup video segmentation for v3.0 chunked videos
202
  if (video && videoInfo?.isSegmented) {
203
  const segmentStart = videoInfo.segmentStart || 0;
204
  const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
205
-
206
-
207
  // Set initial time to segment start if not already set
208
- if (video.currentTime < segmentStart || video.currentTime > segmentEnd) {
 
 
 
209
  video.currentTime = segmentStart;
210
  }
211
-
212
  // Add event listener to handle segment boundaries
213
  const handleTimeUpdate = () => {
214
  if (video.currentTime > segmentEnd) {
@@ -225,7 +227,7 @@ export const VideosPlayer = ({
225
  video.removeEventListener('timeupdate', handleTimeUpdate);
226
  });
227
  }
228
-
229
  videosReadyCount += 1;
230
  if (videosReadyCount === videosInfo.length) {
231
  if (typeof onVideosReady === "function") {
 
178
  if (video && video.duration) {
179
  const videoIndex = videoRefs.current.findIndex(ref => ref === video);
180
  const videoInfo = videosInfo[videoIndex];
181
+
182
  if (videoInfo?.isSegmented) {
183
  const segmentStart = videoInfo.segmentStart || 0;
184
  const globalTime = Math.max(0, video.currentTime - segmentStart);
 
197
  const onCanPlayThrough = (videoIndex: number) => {
198
  const video = videoRefs.current[videoIndex];
199
  const videoInfo = videosInfo[videoIndex];
200
+
201
  // Setup video segmentation for v3.0 chunked videos
202
  if (video && videoInfo?.isSegmented) {
203
  const segmentStart = videoInfo.segmentStart || 0;
204
  const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
205
+
 
206
  // Set initial time to segment start if not already set
207
+ if (
208
+ video.currentTime < segmentStart ||
209
+ video.currentTime > segmentEnd
210
+ ) {
211
  video.currentTime = segmentStart;
212
  }
213
+
214
  // Add event listener to handle segment boundaries
215
  const handleTimeUpdate = () => {
216
  if (video.currentTime > segmentEnd) {
 
227
  video.removeEventListener('timeupdate', handleTimeUpdate);
228
  });
229
  }
230
+
231
  videosReadyCount += 1;
232
  if (videosReadyCount === videosInfo.length) {
233
  if (typeof onVideosReady === "function") {
src/types/chart.types.ts ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Chart and data visualization type definitions
3
+ */
4
+
5
+ // Chart data point structure
6
+ export interface ChartDataPoint {
7
+ timestamp: number;
8
+ [key: string]: number | Record<string, number>; // Hierarchical data
9
+ }
10
+
11
+ // Chart data group
12
+ export type ChartDataGroup = ChartDataPoint[];
13
+
14
+ // Series column definition
15
+ export interface SeriesColumn {
16
+ key: string;
17
+ value: string[]; // Series names
18
+ }
19
+
20
+ // Group statistics for scale calculation
21
+ export interface GroupStats {
22
+ min: number;
23
+ max: number;
24
+ }
src/types/dataset.types.ts ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Dataset type definitions for LeRobot datasets
3
+ * Based on the LeRobot dataset format (v2.0, v2.1, v3.0)
4
+ */
5
+
6
+ // Version management
7
+ export type DatasetVersion = "v2.0" | "v2.1" | "v3.0";
8
+
9
+ // Feature data types
10
+ export type FeatureDType = "video" | "float32" | "int32" | "int64" | "bool";
11
+
12
+ // Video-specific feature
13
+ export interface VideoFeature {
14
+ dtype: "video";
15
+ shape: [number, number, number]; // [height, width, channels]
16
+ names: ["height", "width", "channel"];
17
+ video_info?: {
18
+ "video.fps": number;
19
+ "video.codec": string;
20
+ "video.pix_fmt": string;
21
+ "video.is_depth_map": boolean;
22
+ has_audio: boolean;
23
+ };
24
+ }
25
+
26
+ // Numeric feature (state, action, etc.)
27
+ export interface NumericFeature {
28
+ dtype: "float32" | "int32" | "int64";
29
+ shape: number[];
30
+ names: string[] | { motors: string[] } | { [key: string]: string[] } | null;
31
+ fps?: number;
32
+ }
33
+
34
+ // Boolean feature
35
+ export interface BooleanFeature {
36
+ dtype: "bool";
37
+ shape: number[];
38
+ names: null;
39
+ fps?: number;
40
+ }
41
+
42
+ // Discriminated union for all feature types
43
+ export type Feature = VideoFeature | NumericFeature | BooleanFeature;
44
+
45
+ // Complete dataset metadata
46
+ export interface DatasetMetadata {
47
+ codebase_version: DatasetVersion;
48
+ robot_type: string;
49
+ total_episodes: number;
50
+ total_frames: number;
51
+ total_tasks: number;
52
+ total_videos?: number;
53
+ total_chunks?: number;
54
+ chunks_size: number;
55
+ fps: number;
56
+ splits: Record<string, string>;
57
+ data_path: string;
58
+ video_path: string | null;
59
+ features: Record<string, Feature>;
60
+ data_files_size_in_mb?: number;
61
+ video_files_size_in_mb?: number;
62
+ }
63
+
64
+ // Dataset info used in components
65
+ export interface DatasetInfo {
66
+ repoId: string;
67
+ total_frames: number;
68
+ total_episodes: number;
69
+ fps: number;
70
+ }
src/types/episode.types.ts ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Episode type definitions for LeRobot datasets
3
+ */
4
+
5
+ import type { DatasetInfo } from "./dataset.types";
6
+ import type { VideoInfo } from "./video.types";
7
+ import type { ChartDataGroup } from "./chart.types";
8
+
9
+ // Episode metadata for v3.0
10
+ export interface EpisodeMetadataV3 {
11
+ episode_index: number | bigint;
12
+ data_chunk_index: number | bigint;
13
+ data_file_index: number | bigint;
14
+ dataset_from_index: number | bigint;
15
+ dataset_to_index: number | bigint;
16
+ video_chunk_index?: number | bigint;
17
+ video_file_index?: number | bigint;
18
+ video_from_timestamp?: number;
19
+ video_to_timestamp?: number;
20
+ length: number | bigint;
21
+ // Per-camera metadata (optional)
22
+ [key: string]: number | bigint | undefined;
23
+ }
24
+
25
+ // Episode metadata for v2.x (simpler structure)
26
+ export interface EpisodeMetadataV2 {
27
+ episode_chunk: number;
28
+ episode_index: number;
29
+ }
30
+
31
+ // Task metadata
32
+ export interface TaskMetadata {
33
+ task_index: number | bigint;
34
+ task: string;
35
+ }
36
+
37
+ // Language instruction data
38
+ export interface LanguageInstruction {
39
+ language_instruction?: string;
40
+ [key: `language_instruction_${number}`]: string | undefined;
41
+ }
42
+
43
+ // Episode data returned to components
44
+ export interface EpisodeData {
45
+ datasetInfo: DatasetInfo;
46
+ episodeId: number;
47
+ videosInfo: VideoInfo[];
48
+ chartDataGroups: ChartDataGroup[];
49
+ episodes: number[];
50
+ ignoredColumns: string[];
51
+ duration: number;
52
+ task?: string;
53
+ }
54
+
55
+ // Raw parquet row structure
56
+ export interface ParquetDataRow {
57
+ timestamp?: number;
58
+ episode_index?: number | bigint;
59
+ frame_index?: number | bigint;
60
+ index?: number | bigint;
61
+ task_index?: number | bigint;
62
+ "observation.state"?: number[];
63
+ action?: number[];
64
+ "next.reward"?: number;
65
+ "next.done"?: boolean;
66
+ language_instruction?: string;
67
+ [key: string]: unknown; // For additional fields
68
+ }
src/types/index.ts ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Central export for all type definitions
3
+ */
4
+
5
+ // Dataset types
6
+ export type {
7
+ DatasetVersion,
8
+ FeatureDType,
9
+ VideoFeature,
10
+ NumericFeature,
11
+ BooleanFeature,
12
+ Feature,
13
+ DatasetMetadata,
14
+ DatasetInfo,
15
+ } from "./dataset.types";
16
+
17
+ // Episode types
18
+ export type {
19
+ EpisodeMetadataV3,
20
+ EpisodeMetadataV2,
21
+ TaskMetadata,
22
+ LanguageInstruction,
23
+ EpisodeData,
24
+ ParquetDataRow,
25
+ } from "./episode.types";
26
+
27
+ // Video types
28
+ export type { VideoInfo, AdjacentEpisodeVideos } from "./video.types";
29
+
30
+ // Chart types
31
+ export type {
32
+ ChartDataPoint,
33
+ ChartDataGroup,
34
+ SeriesColumn,
35
+ GroupStats,
36
+ } from "./chart.types";
src/types/video.types.ts ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Video type definitions
3
+ */
4
+
5
+ // Video information structure
6
+ export interface VideoInfo {
7
+ filename: string;
8
+ url: string;
9
+ isSegmented?: boolean;
10
+ segmentStart?: number;
11
+ segmentEnd?: number;
12
+ segmentDuration?: number;
13
+ }
14
+
15
+ // Adjacent episode video info for preloading
16
+ export interface AdjacentEpisodeVideos {
17
+ episodeId: number;
18
+ videosInfo: VideoInfo[];
19
+ }
src/utils/constants.ts ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Centralized constants for the lerobot-dataset-visualizer
3
+ * Eliminates magic numbers and provides single source of truth for configuration
4
+ */
5
+
6
+ // Formatting constants for episode and file indexing
7
+ export const PADDING = {
8
+ EPISODE_CHUNK: 3,
9
+ EPISODE_INDEX: 6,
10
+ FILE_INDEX: 3,
11
+ CHUNK_INDEX: 3,
12
+ } as const;
13
+
14
+ // Numeric thresholds for data processing
15
+ export const THRESHOLDS = {
16
+ SCALE_GROUPING: 2,
17
+ EPSILON: 1e-9,
18
+ VIDEO_SYNC_TOLERANCE: 0.2,
19
+ VIDEO_SEGMENT_BOUNDARY: 0.05,
20
+ } as const;
21
+
22
+ // Chart configuration
23
+ export const CHART_CONFIG = {
24
+ MAX_SERIES_PER_GROUP: 6,
25
+ SERIES_NAME_DELIMITER: " | ",
26
+ } as const;
27
+
28
+ // Video player configuration
29
+ export const VIDEO_PLAYER = {
30
+ JUMP_SECONDS: 5,
31
+ STEP_SIZE: 0.01,
32
+ DEBOUNCE_MS: 200,
33
+ } as const;
34
+
35
+ // HTTP configuration
36
+ export const HTTP = {
37
+ TIMEOUT_MS: 10000,
38
+ } as const;
39
+
40
+ // Excluded columns by dataset version
41
+ export const EXCLUDED_COLUMNS = {
42
+ V2: ["timestamp", "frame_index", "episode_index", "index", "task_index"],
43
+ V3: ["index", "task_index", "episode_index", "frame_index", "next.done"],
44
+ } as const;
src/utils/dataProcessing.ts ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Data processing utilities for chart data grouping and transformation
3
+ * Consolidates duplicated logic from fetch-data.ts
4
+ */
5
+
6
+ import { CHART_CONFIG, THRESHOLDS } from "./constants";
7
+ import type { GroupStats } from "@/types";
8
+
9
+ /**
10
+ * Groups row keys by suffix using delimiter
11
+ * Consolidates logic from lines 407-438 and 962-993 in fetch-data.ts
12
+ *
13
+ * @param row - Row data with numeric values
14
+ * @returns Grouped row data with nested structure for multi-key groups
15
+ */
16
+ export function groupRowBySuffix(
17
+ row: Record<string, number>,
18
+ ): Record<string, number | Record<string, number>> {
19
+ const result: Record<string, number | Record<string, number>> = {};
20
+ const suffixGroups: Record<string, Record<string, number>> = {};
21
+
22
+ for (const [key, value] of Object.entries(row)) {
23
+ if (key === "timestamp") {
24
+ result["timestamp"] = value;
25
+ continue;
26
+ }
27
+
28
+ const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
29
+ if (parts.length === 2) {
30
+ const [prefix, suffix] = parts;
31
+ if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
32
+ suffixGroups[suffix][prefix] = value;
33
+ } else {
34
+ result[key] = value;
35
+ }
36
+ }
37
+
38
+ for (const [suffix, group] of Object.entries(suffixGroups)) {
39
+ const keys = Object.keys(group);
40
+ if (keys.length === 1) {
41
+ // Use the full original name as the key
42
+ const fullName = `${keys[0]}${CHART_CONFIG.SERIES_NAME_DELIMITER}${suffix}`;
43
+ result[fullName] = group[keys[0]];
44
+ } else {
45
+ result[suffix] = group;
46
+ }
47
+ }
48
+
49
+ return result;
50
+ }
51
+
52
+ /**
53
+ * Build suffix groups map from numeric keys
54
+ * Consolidates logic from lines 328-335 and 880-887 in fetch-data.ts
55
+ *
56
+ * @param numericKeys - Array of numeric column keys (excluding timestamp)
57
+ * @returns Map of suffix to array of keys with that suffix
58
+ */
59
+ export function buildSuffixGroupsMap(
60
+ numericKeys: string[],
61
+ ): Record<string, string[]> {
62
+ const suffixGroupsMap: Record<string, string[]> = {};
63
+
64
+ for (const key of numericKeys) {
65
+ const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
66
+ const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
67
+ if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
68
+ suffixGroupsMap[suffix].push(key);
69
+ }
70
+
71
+ return suffixGroupsMap;
72
+ }
73
+
74
+ /**
75
+ * Compute min/max statistics for suffix groups
76
+ * Consolidates logic from lines 338-353 and 890-905 in fetch-data.ts
77
+ *
78
+ * @param chartData - Array of chart data rows
79
+ * @param suffixGroups - Array of suffix groups (each group is an array of keys)
80
+ * @returns Map of group ID to min/max statistics
81
+ */
82
+ export function computeGroupStats(
83
+ chartData: Record<string, number>[],
84
+ suffixGroups: string[][],
85
+ ): Record<string, GroupStats> {
86
+ const groupStats: Record<string, GroupStats> = {};
87
+
88
+ suffixGroups.forEach((group) => {
89
+ let min = Infinity;
90
+ let max = -Infinity;
91
+
92
+ for (const row of chartData) {
93
+ for (const key of group) {
94
+ const v = row[key];
95
+ if (typeof v === "number" && !isNaN(v)) {
96
+ if (v < min) min = v;
97
+ if (v > max) max = v;
98
+ }
99
+ }
100
+ }
101
+
102
+ // Use the first key in the group as the group id
103
+ groupStats[group[0]] = { min, max };
104
+ });
105
+
106
+ return groupStats;
107
+ }
108
+
109
+ /**
110
+ * Group suffix groups by similar scale using logarithmic comparison
111
+ * Consolidates logic from lines 356-387 and 907-945 in fetch-data.ts
112
+ *
113
+ * This complex algorithm groups data series that have similar scales together,
114
+ * making charts more readable by avoiding mixing vastly different value ranges.
115
+ *
116
+ * @param suffixGroups - Array of suffix groups to analyze
117
+ * @param groupStats - Statistics for each group
118
+ * @returns Map of group ID to array of suffix groups with similar scales
119
+ */
120
+ export function groupByScale(
121
+ suffixGroups: string[][],
122
+ groupStats: Record<string, GroupStats>,
123
+ ): Record<string, string[][]> {
124
+ const scaleGroups: Record<string, string[][]> = {};
125
+ const used = new Set<string>();
126
+
127
+ for (const group of suffixGroups) {
128
+ const groupId = group[0];
129
+ if (used.has(groupId)) continue;
130
+
131
+ const { min, max } = groupStats[groupId];
132
+ if (!isFinite(min) || !isFinite(max)) continue;
133
+
134
+ const logMin = Math.log10(Math.abs(min) + THRESHOLDS.EPSILON);
135
+ const logMax = Math.log10(Math.abs(max) + THRESHOLDS.EPSILON);
136
+ const unit: string[][] = [group];
137
+ used.add(groupId);
138
+
139
+ for (const other of suffixGroups) {
140
+ const otherId = other[0];
141
+ if (used.has(otherId) || otherId === groupId) continue;
142
+
143
+ const { min: omin, max: omax } = groupStats[otherId];
144
+ if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
145
+
146
+ const ologMin = Math.log10(Math.abs(omin) + THRESHOLDS.EPSILON);
147
+ const ologMax = Math.log10(Math.abs(omax) + THRESHOLDS.EPSILON);
148
+
149
+ if (
150
+ Math.abs(logMin - ologMin) <= THRESHOLDS.SCALE_GROUPING &&
151
+ Math.abs(logMax - ologMax) <= THRESHOLDS.SCALE_GROUPING
152
+ ) {
153
+ unit.push(other);
154
+ used.add(otherId);
155
+ }
156
+ }
157
+
158
+ scaleGroups[groupId] = unit;
159
+ }
160
+
161
+ return scaleGroups;
162
+ }
163
+
164
+ /**
165
+ * Flatten scale groups into chart groups with size limits
166
+ * Consolidates logic from lines 388-404 and 946-962 in fetch-data.ts
167
+ *
168
+ * Large groups are split into subgroups to avoid overcrowded charts.
169
+ *
170
+ * @param scaleGroups - Map of scale groups
171
+ * @returns Array of chart groups (each group is an array of series keys)
172
+ */
173
+ export function flattenScaleGroups(
174
+ scaleGroups: Record<string, string[][]>,
175
+ ): string[][] {
176
+ return Object.values(scaleGroups)
177
+ .sort((a, b) => b.length - a.length)
178
+ .flatMap((suffixGroupArr) => {
179
+ const merged = suffixGroupArr.flat();
180
+ if (merged.length > CHART_CONFIG.MAX_SERIES_PER_GROUP) {
181
+ const subgroups: string[][] = [];
182
+ for (
183
+ let i = 0;
184
+ i < merged.length;
185
+ i += CHART_CONFIG.MAX_SERIES_PER_GROUP
186
+ ) {
187
+ subgroups.push(
188
+ merged.slice(i, i + CHART_CONFIG.MAX_SERIES_PER_GROUP),
189
+ );
190
+ }
191
+ return subgroups;
192
+ }
193
+ return [merged];
194
+ });
195
+ }
196
+
197
+ /**
198
+ * Complete pipeline to process chart data into organized groups
199
+ * Combines all the above functions into a single pipeline
200
+ *
201
+ * @param seriesNames - All series names including timestamp
202
+ * @param chartData - Array of chart data rows
203
+ * @returns Array of chart groups ready for visualization
204
+ */
205
+ export function processChartDataGroups(
206
+ seriesNames: string[],
207
+ chartData: Record<string, number>[],
208
+ ): string[][] {
209
+ // 1. Build suffix groups
210
+ const numericKeys = seriesNames.filter((k) => k !== "timestamp");
211
+ const suffixGroupsMap = buildSuffixGroupsMap(numericKeys);
212
+ const suffixGroups = Object.values(suffixGroupsMap);
213
+
214
+ // 2. Compute statistics
215
+ const groupStats = computeGroupStats(chartData, suffixGroups);
216
+
217
+ // 3. Group by scale
218
+ const scaleGroups = groupByScale(suffixGroups, groupStats);
219
+
220
+ // 4. Flatten into chart groups
221
+ return flattenScaleGroups(scaleGroups);
222
+ }
src/utils/languageInstructions.ts ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Language instruction extraction utilities
3
+ * Consolidates duplicated logic from fetch-data.ts
4
+ */
5
+
6
+ /**
7
+ * Extract language instructions from episode data rows
8
+ * Consolidates logic from lines 232-258 and 573-626 in fetch-data.ts
9
+ *
10
+ * This function checks for language_instruction fields in the provided rows.
11
+ * It supports both single and numbered language instruction fields
12
+ * (language_instruction, language_instruction_2, language_instruction_3, etc.)
13
+ *
14
+ * @param episodeData - Array of episode data rows
15
+ * @param sampleIndices - Indices of rows to check (default: [0] for first row only)
16
+ * @returns Concatenated language instructions or undefined if none found
17
+ */
18
+ export function extractLanguageInstructions(
19
+ episodeData: Record<string, unknown>[],
20
+ sampleIndices: number[] = [0],
21
+ ): string | undefined {
22
+ if (episodeData.length === 0) return undefined;
23
+
24
+ const languageInstructions: string[] = [];
25
+
26
+ // Check specified rows for instructions
27
+ for (const idx of sampleIndices) {
28
+ if (idx >= episodeData.length) continue;
29
+
30
+ const row = episodeData[idx];
31
+
32
+ // Check for primary language_instruction field
33
+ if (
34
+ "language_instruction" in row &&
35
+ typeof row.language_instruction === "string" &&
36
+ row.language_instruction
37
+ ) {
38
+ languageInstructions.push(row.language_instruction);
39
+
40
+ // Check for numbered fields (language_instruction_2, _3, etc.)
41
+ let instructionNum = 2;
42
+ let key = `language_instruction_${instructionNum}`;
43
+ while (key in row && typeof row[key] === "string") {
44
+ languageInstructions.push(row[key] as string);
45
+ instructionNum++;
46
+ key = `language_instruction_${instructionNum}`;
47
+ }
48
+
49
+ // If we found instructions, stop searching other indices
50
+ if (languageInstructions.length > 0) break;
51
+ }
52
+ }
53
+
54
+ return languageInstructions.length > 0
55
+ ? languageInstructions.join("\n")
56
+ : undefined;
57
+ }
58
+
59
+ /**
60
+ * Extract task from task_index by looking up in tasks metadata
61
+ * Helper function for task extraction with proper type handling
62
+ *
63
+ * @param taskIndex - Task index (can be BigInt or number)
64
+ * @param tasksData - Array of task metadata objects
65
+ * @returns Task string or undefined if not found
66
+ */
67
+ export function extractTaskFromMetadata(
68
+ taskIndex: unknown,
69
+ tasksData: Record<string, unknown>[],
70
+ ): string | undefined {
71
+ // Convert BigInt to number for comparison
72
+ const taskIndexNum =
73
+ typeof taskIndex === "bigint"
74
+ ? Number(taskIndex)
75
+ : typeof taskIndex === "number"
76
+ ? taskIndex
77
+ : undefined;
78
+
79
+ if (taskIndexNum === undefined || taskIndexNum < 0) {
80
+ return undefined;
81
+ }
82
+
83
+ if (taskIndexNum >= tasksData.length) {
84
+ return undefined;
85
+ }
86
+
87
+ const taskData = tasksData[taskIndexNum];
88
+
89
+ // Extract task from various possible fields
90
+ if (
91
+ taskData &&
92
+ "__index_level_0__" in taskData &&
93
+ typeof taskData.__index_level_0__ === "string"
94
+ ) {
95
+ return taskData.__index_level_0__;
96
+ } else if (
97
+ taskData &&
98
+ "task" in taskData &&
99
+ typeof taskData.task === "string"
100
+ ) {
101
+ return taskData.task;
102
+ }
103
+
104
+ return undefined;
105
+ }
src/utils/parquetUtils.ts CHANGED
@@ -36,19 +36,19 @@ export async function fetchJson<T>(url: string): Promise<T> {
36
 
37
  export function formatStringWithVars(
38
  format: string,
39
- vars: Record<string, string>,
40
  ): string {
41
- return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => vars[key]);
42
  }
43
 
44
  // Fetch and parse the Parquet file
45
  export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
46
  const res = await fetch(url);
47
-
48
  if (!res.ok) {
49
  throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
50
  }
51
-
52
  return res.arrayBuffer();
53
  }
54
 
@@ -64,7 +64,7 @@ export async function readParquetColumn(
64
  columns: columns.length > 0 ? columns : undefined,
65
  onComplete: (data: unknown[][]) => {
66
  resolve(data);
67
- }
68
  });
69
  } catch (error) {
70
  reject(error);
@@ -94,12 +94,12 @@ export function getRows(currentFrameData: unknown[], columns: ColumnInfo[]) {
94
  return [];
95
  }
96
 
97
- const rows = [];
98
  const nRows = Math.max(...columns.map((column) => column.value.length));
99
  let rowIndex = 0;
100
 
101
  while (rowIndex < nRows) {
102
- const row = [];
103
  // number of states may NOT match number of actions. In this case, we null-pad the 2D array
104
  const nullCell = { isNull: true };
105
  // row consists of [state value, action value]
 
36
 
37
  export function formatStringWithVars(
38
  format: string,
39
+ vars: Record<string, string | number>,
40
  ): string {
41
+ return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => String(vars[key]));
42
  }
43
 
44
  // Fetch and parse the Parquet file
45
  export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
46
  const res = await fetch(url);
47
+
48
  if (!res.ok) {
49
  throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
50
  }
51
+
52
  return res.arrayBuffer();
53
  }
54
 
 
64
  columns: columns.length > 0 ? columns : undefined,
65
  onComplete: (data: unknown[][]) => {
66
  resolve(data);
67
+ },
68
  });
69
  } catch (error) {
70
  reject(error);
 
94
  return [];
95
  }
96
 
97
+ const rows: Array<Array<{ isNull: true } | unknown>> = [];
98
  const nRows = Math.max(...columns.map((column) => column.value.length));
99
  let rowIndex = 0;
100
 
101
  while (rowIndex < nRows) {
102
+ const row: Array<{ isNull: true } | unknown> = [];
103
  // number of states may NOT match number of actions. In this case, we null-pad the 2D array
104
  const nullCell = { isNull: true };
105
  // row consists of [state value, action value]
src/utils/stringFormatting.ts ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * String formatting utilities for path construction
3
+ * Consolidates repeated padding and path building logic
4
+ */
5
+
6
+ import { PADDING } from "./constants";
7
+
8
+ /**
9
+ * Pad number to specified length with leading zeros
10
+ *
11
+ * @param num - Number to pad
12
+ * @param length - Desired string length
13
+ * @returns Zero-padded string
14
+ */
15
+ export function padNumber(num: number, length: number): string {
16
+ return num.toString().padStart(length, "0");
17
+ }
18
+
19
+ /**
20
+ * Format episode chunk index with standard padding
21
+ *
22
+ * @param chunkIndex - Chunk index number
23
+ * @returns Padded chunk index string (e.g., "001")
24
+ */
25
+ export function formatEpisodeChunk(chunkIndex: number): string {
26
+ return padNumber(chunkIndex, PADDING.EPISODE_CHUNK);
27
+ }
28
+
29
+ /**
30
+ * Format episode index with standard padding
31
+ *
32
+ * @param episodeIndex - Episode index number
33
+ * @returns Padded episode index string (e.g., "000042")
34
+ */
35
+ export function formatEpisodeIndex(episodeIndex: number): string {
36
+ return padNumber(episodeIndex, PADDING.EPISODE_INDEX);
37
+ }
38
+
39
+ /**
40
+ * Format file index with standard padding
41
+ *
42
+ * @param fileIndex - File index number
43
+ * @returns Padded file index string (e.g., "001")
44
+ */
45
+ export function formatFileIndex(fileIndex: number): string {
46
+ return padNumber(fileIndex, PADDING.FILE_INDEX);
47
+ }
48
+
49
+ /**
50
+ * Format chunk index with standard padding
51
+ *
52
+ * @param chunkIndex - Chunk index number
53
+ * @returns Padded chunk index string (e.g., "001")
54
+ */
55
+ export function formatChunkIndex(chunkIndex: number): string {
56
+ return padNumber(chunkIndex, PADDING.CHUNK_INDEX);
57
+ }
58
+
59
+ /**
60
+ * Build video path for v3 datasets
61
+ *
62
+ * @param videoKey - Video key/name (e.g., "observation.image")
63
+ * @param chunkIndex - Data chunk index
64
+ * @param fileIndex - File index within chunk
65
+ * @returns Formatted video path (e.g., "videos/observation.image/chunk-001/file-000.mp4")
66
+ */
67
+ export function buildV3VideoPath(
68
+ videoKey: string,
69
+ chunkIndex: number,
70
+ fileIndex: number,
71
+ ): string {
72
+ return `videos/${videoKey}/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.mp4`;
73
+ }
74
+
75
+ /**
76
+ * Build data path for v3 datasets
77
+ *
78
+ * @param chunkIndex - Data chunk index
79
+ * @param fileIndex - File index within chunk
80
+ * @returns Formatted data path (e.g., "data/chunk-001/file-000.parquet")
81
+ */
82
+ export function buildV3DataPath(chunkIndex: number, fileIndex: number): string {
83
+ return `data/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.parquet`;
84
+ }
85
+
86
+ /**
87
+ * Build episodes metadata path for v3 datasets
88
+ *
89
+ * @param chunkIndex - Episode chunk index
90
+ * @param fileIndex - File index within chunk
91
+ * @returns Formatted episodes metadata path (e.g., "meta/episodes/chunk-001/file-000.parquet")
92
+ */
93
+ export function buildV3EpisodesMetadataPath(
94
+ chunkIndex: number,
95
+ fileIndex: number,
96
+ ): string {
97
+ return `meta/episodes/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.parquet`;
98
+ }
src/utils/typeGuards.ts ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Type guard utilities for safe type narrowing
3
+ * Replaces unsafe type assertions throughout the codebase
4
+ */
5
+
6
+ /**
7
+ * Type guard for BigInt values
8
+ *
9
+ * @param value - Value to check
10
+ * @returns True if value is a BigInt
11
+ */
12
+ export function isBigInt(value: unknown): value is bigint {
13
+ return typeof value === "bigint";
14
+ }
15
+
16
+ /**
17
+ * Safe BigInt to number conversion
18
+ * Handles both BigInt and number inputs gracefully
19
+ *
20
+ * @param value - Value to convert (can be BigInt, number, or other)
21
+ * @param fallback - Fallback value if conversion fails (default: 0)
22
+ * @returns Number value or fallback
23
+ */
24
+ export function bigIntToNumber(value: unknown, fallback: number = 0): number {
25
+ if (typeof value === "bigint") {
26
+ return Number(value);
27
+ }
28
+ if (typeof value === "number") {
29
+ return value;
30
+ }
31
+ return fallback;
32
+ }
33
+
34
+ /**
35
+ * Type guard for numeric values (including BigInt)
36
+ *
37
+ * @param value - Value to check
38
+ * @returns True if value is a number or BigInt
39
+ */
40
+ export function isNumeric(value: unknown): value is number | bigint {
41
+ return typeof value === "number" || typeof value === "bigint";
42
+ }
43
+
44
+ /**
45
+ * Type guard for valid task index
46
+ * Ensures the value is a non-negative integer
47
+ *
48
+ * @param value - Value to check
49
+ * @returns True if value is a valid task index (non-negative number)
50
+ */
51
+ export function isValidTaskIndex(value: unknown): value is number {
52
+ const num = bigIntToNumber(value, -1);
53
+ return num >= 0 && Number.isInteger(num);
54
+ }
55
+
56
+ /**
57
+ * Type guard for HTMLVideoElement
58
+ *
59
+ * @param element - Element to check
60
+ * @returns True if element is an HTMLVideoElement
61
+ */
62
+ export function isVideoElement(element: unknown): element is HTMLVideoElement {
63
+ return element instanceof HTMLVideoElement;
64
+ }
65
+
66
+ /**
67
+ * Safe string conversion
68
+ * Converts any value to a string safely
69
+ *
70
+ * @param value - Value to convert
71
+ * @returns String representation of the value
72
+ */
73
+ export function toString(value: unknown): string {
74
+ if (typeof value === "string") return value;
75
+ if (value === null || value === undefined) return "";
76
+ return String(value);
77
+ }
78
+
79
+ /**
80
+ * Type guard for string values
81
+ *
82
+ * @param value - Value to check
83
+ * @returns True if value is a non-empty string
84
+ */
85
+ export function isNonEmptyString(value: unknown): value is string {
86
+ return typeof value === "string" && value.length > 0;
87
+ }
88
+
89
+ /**
90
+ * Type guard for objects
91
+ *
92
+ * @param value - Value to check
93
+ * @returns True if value is a non-null object
94
+ */
95
+ export function isObject(value: unknown): value is Record<string, unknown> {
96
+ return typeof value === "object" && value !== null && !Array.isArray(value);
97
+ }
98
+
99
+ /**
100
+ * Safe property access with type guard
101
+ * Checks if an object has a property and the property value matches the type guard
102
+ *
103
+ * @param obj - Object to check
104
+ * @param key - Property key to check
105
+ * @param typeGuard - Type guard function for the property value
106
+ * @returns True if property exists and passes type guard
107
+ */
108
+ export function hasPropertyOfType<T>(
109
+ obj: unknown,
110
+ key: string,
111
+ typeGuard: (value: unknown) => value is T,
112
+ ): obj is Record<string, unknown> & { [K in typeof key]: T } {
113
+ return isObject(obj) && key in obj && typeGuard(obj[key]);
114
+ }
src/utils/versionUtils.ts CHANGED
@@ -2,7 +2,8 @@
2
  * Utility functions for checking dataset version compatibility
3
  */
4
 
5
- const DATASET_URL = process.env.DATASET_URL || "https://huggingface.co/datasets";
 
6
 
7
  /**
8
  * Dataset information structure from info.json
@@ -44,17 +45,18 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
44
 
45
  try {
46
  const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
47
-
48
  const controller = new AbortController();
49
  const timeoutId = setTimeout(() => controller.abort(), 10000);
50
 
51
  const response = await fetch(testUrl, {
52
  method: "GET",
53
- signal: controller.signal
 
54
  });
55
-
56
  clearTimeout(timeoutId);
57
-
58
  if (!response.ok) {
59
  throw new Error(`Failed to fetch dataset info: ${response.status}`);
60
  }
@@ -62,7 +64,9 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
62
  const data = await response.json();
63
 
64
  if (!data.features) {
65
- throw new Error("Dataset info.json does not have the expected features structure");
 
 
66
  }
67
 
68
  datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
@@ -73,7 +77,7 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
73
  }
74
  throw new Error(
75
  `Dataset ${repoId} is not compatible with this visualizer. ` +
76
- "Failed to read dataset information from the main revision."
77
  );
78
  }
79
  }
@@ -105,7 +109,10 @@ export async function getDatasetVersion(repoId: string): Promise<string> {
105
  return version;
106
  }
107
 
108
- export function buildVersionedUrl(repoId: string, version: string, path: string): string {
 
 
 
 
109
  return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
110
  }
111
-
 
2
  * Utility functions for checking dataset version compatibility
3
  */
4
 
5
+ const DATASET_URL =
6
+ process.env.DATASET_URL || "https://huggingface.co/datasets";
7
 
8
  /**
9
  * Dataset information structure from info.json
 
45
 
46
  try {
47
  const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
48
+
49
  const controller = new AbortController();
50
  const timeoutId = setTimeout(() => controller.abort(), 10000);
51
 
52
  const response = await fetch(testUrl, {
53
  method: "GET",
54
+ cache: "no-store",
55
+ signal: controller.signal,
56
  });
57
+
58
  clearTimeout(timeoutId);
59
+
60
  if (!response.ok) {
61
  throw new Error(`Failed to fetch dataset info: ${response.status}`);
62
  }
 
64
  const data = await response.json();
65
 
66
  if (!data.features) {
67
+ throw new Error(
68
+ "Dataset info.json does not have the expected features structure",
69
+ );
70
  }
71
 
72
  datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
 
77
  }
78
  throw new Error(
79
  `Dataset ${repoId} is not compatible with this visualizer. ` +
80
+ "Failed to read dataset information from the main revision.",
81
  );
82
  }
83
  }
 
109
  return version;
110
  }
111
 
112
+ export function buildVersionedUrl(
113
+ repoId: string,
114
+ version: string,
115
+ path: string,
116
+ ): string {
117
  return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
118
  }