Spaces:
Running
Running
Merge branch 'main' into feat/speedup_visulization
Browse files- .dockerignore +10 -0
- .github/workflows/deploy-release.yml +22 -0
- .github/workflows/type-check.yml +29 -0
- .gitignore +3 -0
- Dockerfile +25 -0
- README.md +73 -8
- bun.lock +0 -0
- eslint.config.mjs +6 -0
- next.config.ts +1 -1
- package.json +5 -1
- src/app/[org]/[dataset]/[episode]/episode-viewer.tsx +6 -1
- src/app/[org]/[dataset]/[episode]/fetch-data.ts +166 -265
- src/app/[org]/[dataset]/page.tsx +4 -4
- src/app/explore/explore-grid.tsx +0 -2
- src/app/explore/page.tsx +12 -12
- src/app/page.tsx +13 -13
- src/components/data-recharts.tsx +30 -12
- src/components/simple-videos-player.tsx +46 -20
- src/components/videos-player.tsx +9 -7
- src/types/chart.types.ts +24 -0
- src/types/dataset.types.ts +70 -0
- src/types/episode.types.ts +68 -0
- src/types/index.ts +36 -0
- src/types/video.types.ts +19 -0
- src/utils/constants.ts +44 -0
- src/utils/dataProcessing.ts +222 -0
- src/utils/languageInstructions.ts +105 -0
- src/utils/parquetUtils.ts +7 -7
- src/utils/stringFormatting.ts +98 -0
- src/utils/typeGuards.ts +114 -0
- src/utils/versionUtils.ts +16 -9
.dockerignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
node_modules
|
| 2 |
+
.next
|
| 3 |
+
.git
|
| 4 |
+
.gitignore
|
| 5 |
+
README.md
|
| 6 |
+
.env*.local
|
| 7 |
+
*.log
|
| 8 |
+
.DS_Store
|
| 9 |
+
.vscode
|
| 10 |
+
.idea
|
.github/workflows/deploy-release.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy to Hf Hub
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
# to run this workflow manually from the Actions tab
|
| 9 |
+
workflow_dispatch:
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
sync-to-hub:
|
| 13 |
+
runs-on: ubuntu-latest
|
| 14 |
+
steps:
|
| 15 |
+
- uses: actions/checkout@v3
|
| 16 |
+
with:
|
| 17 |
+
fetch-depth: 0
|
| 18 |
+
lfs: true
|
| 19 |
+
- name: Push to hub
|
| 20 |
+
env:
|
| 21 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 22 |
+
run: git push https://mishig:$HF_TOKEN@huggingface.co/spaces/lerobot/visualize_dataset main -f
|
.github/workflows/type-check.yml
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Type Check & Lint
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [main]
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
type-check:
|
| 11 |
+
runs-on: ubuntu-latest
|
| 12 |
+
steps:
|
| 13 |
+
- uses: actions/checkout@v4
|
| 14 |
+
|
| 15 |
+
- uses: oven-sh/setup-bun@v1
|
| 16 |
+
with:
|
| 17 |
+
bun-version: latest
|
| 18 |
+
|
| 19 |
+
- name: Install dependencies
|
| 20 |
+
run: bun install
|
| 21 |
+
|
| 22 |
+
- name: Type check
|
| 23 |
+
run: bun run type-check
|
| 24 |
+
|
| 25 |
+
- name: Lint
|
| 26 |
+
run: bun run lint
|
| 27 |
+
|
| 28 |
+
- name: Format check
|
| 29 |
+
run: bun run format:check
|
.gitignore
CHANGED
|
@@ -40,3 +40,6 @@ yarn-error.log*
|
|
| 40 |
# typescript
|
| 41 |
*.tsbuildinfo
|
| 42 |
next-env.d.ts
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# typescript
|
| 41 |
*.tsbuildinfo
|
| 42 |
next-env.d.ts
|
| 43 |
+
|
| 44 |
+
# claude code local settings
|
| 45 |
+
.claude/
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM oven/bun:1 AS base
|
| 2 |
+
|
| 3 |
+
# Set working directory
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Copy package files
|
| 7 |
+
COPY package.json bun.lock* ./
|
| 8 |
+
|
| 9 |
+
# Install dependencies
|
| 10 |
+
RUN bun install --frozen-lockfile
|
| 11 |
+
|
| 12 |
+
# Copy the rest of the application
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
# Build the application
|
| 16 |
+
RUN bun run build
|
| 17 |
+
|
| 18 |
+
# Expose port 7860
|
| 19 |
+
EXPOSE 7860
|
| 20 |
+
|
| 21 |
+
# Set environment variable for port
|
| 22 |
+
ENV PORT=7860
|
| 23 |
+
|
| 24 |
+
# Start the application
|
| 25 |
+
CMD ["bun", "start"]
|
README.md
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# LeRobot Dataset Visualizer
|
| 2 |
|
| 3 |
LeRobot Dataset Tool and Visualizer is a web application for interactive exploration and visualization of robotics datasets, particularly those in the LeRobot format. It enables users to browse, view, and analyze episodes from large-scale robotics datasets, combining synchronized video playback with rich, interactive data graphs.
|
|
@@ -28,15 +39,28 @@ This tool is designed to help robotics researchers and practitioners quickly ins
|
|
| 28 |
|
| 29 |
## Getting Started
|
| 30 |
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
```bash
|
| 34 |
-
npm run dev
|
| 35 |
-
# or
|
| 36 |
-
yarn dev
|
| 37 |
-
# or
|
| 38 |
-
pnpm dev
|
| 39 |
-
# or
|
| 40 |
bun dev
|
| 41 |
```
|
| 42 |
|
|
@@ -44,13 +68,54 @@ Open [http://localhost:3000](http://localhost:3000) with your browser to see the
|
|
| 44 |
|
| 45 |
You can start editing the page by modifying `src/app/page.tsx` or other files in the `src/` directory. The app supports hot-reloading for rapid development.
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
### Environment Variables
|
| 48 |
|
| 49 |
- `DATASET_URL`: (optional) Base URL for dataset hosting (defaults to HuggingFace Datasets).
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
## Contributing
|
| 52 |
|
| 53 |
Contributions, bug reports, and feature requests are welcome! Please open an issue or submit a pull request.
|
| 54 |
|
| 55 |
-
### Acknowledgement
|
|
|
|
| 56 |
The app was orignally created by [@Mishig25](https://github.com/mishig25) and taken from this PR [#1055](https://github.com/huggingface/lerobot/pull/1055)
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Visualize Dataset (v2.0+ latest dataset format)
|
| 3 |
+
emoji: 💻
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
# LeRobot Dataset Visualizer
|
| 13 |
|
| 14 |
LeRobot Dataset Tool and Visualizer is a web application for interactive exploration and visualization of robotics datasets, particularly those in the LeRobot format. It enables users to browse, view, and analyze episodes from large-scale robotics datasets, combining synchronized video playback with rich, interactive data graphs.
|
|
|
|
| 39 |
|
| 40 |
## Getting Started
|
| 41 |
|
| 42 |
+
### Prerequisites
|
| 43 |
+
|
| 44 |
+
This project uses [Bun](https://bun.sh) as its package manager. If you don't have it installed:
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
# Install Bun
|
| 48 |
+
curl -fsSL https://bun.sh/install | bash
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### Installation
|
| 52 |
+
|
| 53 |
+
Install dependencies:
|
| 54 |
+
|
| 55 |
+
```bash
|
| 56 |
+
bun install
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### Development
|
| 60 |
+
|
| 61 |
+
Run the development server:
|
| 62 |
|
| 63 |
```bash
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
bun dev
|
| 65 |
```
|
| 66 |
|
|
|
|
| 68 |
|
| 69 |
You can start editing the page by modifying `src/app/page.tsx` or other files in the `src/` directory. The app supports hot-reloading for rapid development.
|
| 70 |
|
| 71 |
+
### Other Commands
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
# Build for production
|
| 75 |
+
bun run build
|
| 76 |
+
|
| 77 |
+
# Start production server
|
| 78 |
+
bun start
|
| 79 |
+
|
| 80 |
+
# Run linter
|
| 81 |
+
bun run lint
|
| 82 |
+
|
| 83 |
+
# Format code
|
| 84 |
+
bun run format
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
### Environment Variables
|
| 88 |
|
| 89 |
- `DATASET_URL`: (optional) Base URL for dataset hosting (defaults to HuggingFace Datasets).
|
| 90 |
|
| 91 |
+
## Docker Deployment
|
| 92 |
+
|
| 93 |
+
This application can be deployed using Docker with bun for optimal performance and self-contained builds.
|
| 94 |
+
|
| 95 |
+
### Build the Docker image
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
docker build -t lerobot-visualizer .
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### Run the container
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
docker run -p 7860:7860 lerobot-visualizer
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
The application will be available at [http://localhost:7860](http://localhost:7860).
|
| 108 |
+
|
| 109 |
+
### Run with custom environment variables
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
docker run -p 7860:7860 -e DATASET_URL=your-url lerobot-visualizer
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
## Contributing
|
| 116 |
|
| 117 |
Contributions, bug reports, and feature requests are welcome! Please open an issue or submit a pull request.
|
| 118 |
|
| 119 |
+
### Acknowledgement
|
| 120 |
+
|
| 121 |
The app was orignally created by [@Mishig25](https://github.com/mishig25) and taken from this PR [#1055](https://github.com/huggingface/lerobot/pull/1055)
|
bun.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eslint.config.mjs
CHANGED
|
@@ -11,6 +11,12 @@ const compat = new FlatCompat({
|
|
| 11 |
|
| 12 |
const eslintConfig = [
|
| 13 |
...compat.extends("next/core-web-vitals", "next/typescript"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
];
|
| 15 |
|
| 16 |
export default eslintConfig;
|
|
|
|
| 11 |
|
| 12 |
const eslintConfig = [
|
| 13 |
...compat.extends("next/core-web-vitals", "next/typescript"),
|
| 14 |
+
{
|
| 15 |
+
rules: {
|
| 16 |
+
// Allow `any` type as warning - core types are implemented, peripheral areas still need typing
|
| 17 |
+
"@typescript-eslint/no-explicit-any": "warn",
|
| 18 |
+
},
|
| 19 |
+
},
|
| 20 |
];
|
| 21 |
|
| 22 |
export default eslintConfig;
|
next.config.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import type { NextConfig } from "next";
|
| 2 |
-
import packageJson from
|
| 3 |
|
| 4 |
const nextConfig: NextConfig = {
|
| 5 |
typescript: {
|
|
|
|
| 1 |
import type { NextConfig } from "next";
|
| 2 |
+
import packageJson from "./package.json";
|
| 3 |
|
| 4 |
const nextConfig: NextConfig = {
|
| 5 |
typescript: {
|
package.json
CHANGED
|
@@ -7,7 +7,11 @@
|
|
| 7 |
"build": "next build",
|
| 8 |
"start": "next start",
|
| 9 |
"lint": "next lint",
|
| 10 |
-
"format": "prettier --write ."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
},
|
| 12 |
"dependencies": {
|
| 13 |
"@react-three/drei": "^10.7.7",
|
|
|
|
| 7 |
"build": "next build",
|
| 8 |
"start": "next start",
|
| 9 |
"lint": "next lint",
|
| 10 |
+
"format": "prettier --write .",
|
| 11 |
+
"format:check": "prettier --check .",
|
| 12 |
+
"type-check": "tsc --noEmit",
|
| 13 |
+
"type-check:watch": "tsc --noEmit --watch",
|
| 14 |
+
"validate": "bun run type-check && bun run lint && bun run format:check"
|
| 15 |
},
|
| 16 |
"dependencies": {
|
| 17 |
"@react-three/drei": "^10.7.7",
|
src/app/[org]/[dataset]/[episode]/episode-viewer.tsx
CHANGED
|
@@ -51,6 +51,11 @@ export default function EpisodeViewer({
|
|
| 51 |
</div>
|
| 52 |
);
|
| 53 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
return (
|
| 55 |
<TimeProvider duration={data!.duration}>
|
| 56 |
<FlaggedEpisodesProvider>
|
|
@@ -197,7 +202,7 @@ function EpisodeViewerInner({ data, org, dataset }: { data: EpisodeData; org?: s
|
|
| 197 |
link.href = v.url;
|
| 198 |
document.head.appendChild(link);
|
| 199 |
links.push(link);
|
| 200 |
-
|
| 201 |
}
|
| 202 |
})
|
| 203 |
.catch(() => {});
|
|
|
|
| 51 |
</div>
|
| 52 |
);
|
| 53 |
}
|
| 54 |
+
|
| 55 |
+
if (!data) {
|
| 56 |
+
return null;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
return (
|
| 60 |
<TimeProvider duration={data!.duration}>
|
| 61 |
<FlaggedEpisodesProvider>
|
|
|
|
| 202 |
link.href = v.url;
|
| 203 |
document.head.appendChild(link);
|
| 204 |
links.push(link);
|
| 205 |
+
}
|
| 206 |
}
|
| 207 |
})
|
| 208 |
.catch(() => {});
|
src/app/[org]/[dataset]/[episode]/fetch-data.ts
CHANGED
|
@@ -6,8 +6,19 @@ import {
|
|
| 6 |
} from "@/utils/parquetUtils";
|
| 7 |
import { pick } from "@/utils/pick";
|
| 8 |
import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
const SERIES_NAME_DELIMITER =
|
| 11 |
|
| 12 |
export type VideoInfo = {
|
| 13 |
filename: string;
|
|
@@ -99,34 +110,10 @@ type ColumnDef = {
|
|
| 99 |
value: string[];
|
| 100 |
};
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
if (key === "timestamp") {
|
| 107 |
-
result["timestamp"] = value;
|
| 108 |
-
continue;
|
| 109 |
-
}
|
| 110 |
-
const parts = key.split(SERIES_NAME_DELIMITER);
|
| 111 |
-
if (parts.length === 2) {
|
| 112 |
-
const [prefix, suffix] = parts;
|
| 113 |
-
if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
|
| 114 |
-
suffixGroups[suffix][prefix] = value;
|
| 115 |
-
} else {
|
| 116 |
-
result[key] = value;
|
| 117 |
-
}
|
| 118 |
-
}
|
| 119 |
-
for (const [suffix, group] of Object.entries(suffixGroups)) {
|
| 120 |
-
const keys = Object.keys(group);
|
| 121 |
-
if (keys.length === 1) {
|
| 122 |
-
const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
|
| 123 |
-
result[fullName] = group[keys[0]];
|
| 124 |
-
} else {
|
| 125 |
-
result[suffix] = group;
|
| 126 |
-
}
|
| 127 |
-
}
|
| 128 |
-
return result;
|
| 129 |
-
}
|
| 130 |
|
| 131 |
export async function getEpisodeData(
|
| 132 |
org: string,
|
|
@@ -141,7 +128,9 @@ export async function getEpisodeData(
|
|
| 141 |
const info = rawInfo as unknown as DatasetMetadata;
|
| 142 |
|
| 143 |
if (info.video_path === null) {
|
| 144 |
-
throw new Error(
|
|
|
|
|
|
|
| 145 |
}
|
| 146 |
|
| 147 |
console.time(`[perf] getEpisodeData (${version})`);
|
|
@@ -176,14 +165,14 @@ export async function getAdjacentEpisodesVideoInfo(
|
|
| 176 |
dataset: string,
|
| 177 |
currentEpisodeId: number,
|
| 178 |
radius: number = 2,
|
| 179 |
-
) {
|
| 180 |
const repoId = `${org}/${dataset}`;
|
| 181 |
try {
|
| 182 |
const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
|
| 183 |
const info = rawInfo as unknown as DatasetMetadata;
|
| 184 |
|
| 185 |
const totalEpisodes = info.total_episodes;
|
| 186 |
-
const adjacentVideos:
|
| 187 |
|
| 188 |
// Calculate adjacent episode IDs
|
| 189 |
for (let offset = -radius; offset <= radius; offset++) {
|
|
@@ -195,24 +184,39 @@ export async function getAdjacentEpisodesVideoInfo(
|
|
| 195 |
let videosInfo: VideoInfo[] = [];
|
| 196 |
|
| 197 |
if (version === "v3.0") {
|
| 198 |
-
const episodeMetadata = await loadEpisodeMetadataV3Simple(
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
} else {
|
| 201 |
// For v2.x, use simpler video info extraction
|
|
|
|
| 202 |
const episode_chunk = Math.floor(0 / 1000);
|
| 203 |
videosInfo = Object.entries(info.features)
|
| 204 |
.filter(([, value]) => value.dtype === "video")
|
| 205 |
.map(([key]) => {
|
| 206 |
-
|
| 207 |
video_key: key,
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
});
|
| 211 |
return {
|
| 212 |
filename: key,
|
| 213 |
url: buildVersionedUrl(repoId, version, videoPath),
|
| 214 |
};
|
| 215 |
});
|
|
|
|
| 216 |
}
|
| 217 |
|
| 218 |
adjacentVideos.push({ episodeId, videosInfo });
|
|
@@ -258,43 +262,42 @@ async function getEpisodeDataV2(
|
|
| 258 |
// episode id starts from 0
|
| 259 |
(_, i) => i,
|
| 260 |
)
|
| 261 |
-
: process.env.EPISODES
|
| 262 |
-
.split(/\s+/)
|
| 263 |
.map((x) => parseInt(x.trim(), 10))
|
| 264 |
.filter((x) => !isNaN(x));
|
| 265 |
|
| 266 |
// Videos information
|
| 267 |
-
|
|
|
|
|
|
|
| 268 |
.filter(([, value]) => value.dtype === "video")
|
| 269 |
.map(([key]) => {
|
| 270 |
-
|
| 271 |
video_key: key,
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
});
|
| 275 |
return {
|
| 276 |
filename: key,
|
| 277 |
url: buildVersionedUrl(repoId, version, videoPath),
|
| 278 |
};
|
| 279 |
-
|
|
|
|
| 280 |
|
| 281 |
// Column data
|
| 282 |
const columnNames = Object.entries(info.features)
|
| 283 |
.filter(
|
| 284 |
([, value]) =>
|
| 285 |
-
["float32", "int32"].includes(value.dtype) &&
|
| 286 |
-
value.shape.length === 1,
|
| 287 |
)
|
| 288 |
.map(([key, { shape }]) => ({ key, length: shape[0] }));
|
| 289 |
|
| 290 |
// Exclude specific columns
|
| 291 |
-
const excludedColumns = [
|
| 292 |
-
"timestamp",
|
| 293 |
-
"frame_index",
|
| 294 |
-
"episode_index",
|
| 295 |
-
"index",
|
| 296 |
-
"task_index",
|
| 297 |
-
];
|
| 298 |
const filteredColumns = columnNames.filter(
|
| 299 |
(column) => !excludedColumns.includes(column.key),
|
| 300 |
);
|
|
@@ -315,7 +318,7 @@ async function getEpisodeDataV2(
|
|
| 315 |
? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
|
| 316 |
: Array.from(
|
| 317 |
{ length: columnNames.find((c) => c.key === key)?.length ?? 1 },
|
| 318 |
-
(_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
|
| 319 |
),
|
| 320 |
};
|
| 321 |
});
|
|
@@ -324,9 +327,11 @@ async function getEpisodeDataV2(
|
|
| 324 |
repoId,
|
| 325 |
version,
|
| 326 |
formatStringWithVars(info.data_path, {
|
| 327 |
-
episode_chunk: episode_chunk
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
| 330 |
);
|
| 331 |
|
| 332 |
const arrayBuffer = await fetchParquetFile(parquetUrl);
|
|
@@ -366,20 +371,20 @@ async function getEpisodeDataV2(
|
|
| 366 |
if (tasksResponse.ok) {
|
| 367 |
const tasksText = await tasksResponse.text();
|
| 368 |
const tasksData = tasksText
|
| 369 |
-
.split(
|
| 370 |
-
.filter(line => line.trim())
|
| 371 |
-
.map(line => JSON.parse(line));
|
| 372 |
|
| 373 |
if (tasksData && tasksData.length > 0) {
|
| 374 |
const taskIndex = allData[0].task_index;
|
| 375 |
const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
|
| 376 |
-
const taskData = tasksData.find(t => t.task_index === taskIndexNum);
|
| 377 |
if (taskData) {
|
| 378 |
task = taskData.task;
|
| 379 |
}
|
| 380 |
}
|
| 381 |
}
|
| 382 |
-
} catch
|
| 383 |
// No tasks metadata file for this v2.x dataset
|
| 384 |
}
|
| 385 |
}
|
|
@@ -414,86 +419,21 @@ async function getEpisodeDataV2(
|
|
| 414 |
)
|
| 415 |
.map(([key]) => key);
|
| 416 |
|
| 417 |
-
//
|
| 418 |
-
const
|
| 419 |
-
const suffixGroupsMap: Record<string, string[]> = {};
|
| 420 |
-
for (const key of numericKeys) {
|
| 421 |
-
const parts = key.split(SERIES_NAME_DELIMITER);
|
| 422 |
-
const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
|
| 423 |
-
if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
|
| 424 |
-
suffixGroupsMap[suffix].push(key);
|
| 425 |
-
}
|
| 426 |
-
const suffixGroups = Object.values(suffixGroupsMap);
|
| 427 |
-
|
| 428 |
-
// 2. Compute min/max for each suffix group as a whole
|
| 429 |
-
const groupStats: Record<string, { min: number; max: number }> = {};
|
| 430 |
-
suffixGroups.forEach((group) => {
|
| 431 |
-
let min = Infinity,
|
| 432 |
-
max = -Infinity;
|
| 433 |
-
for (const row of chartData) {
|
| 434 |
-
for (const key of group) {
|
| 435 |
-
const v = row[key];
|
| 436 |
-
if (typeof v === "number" && !isNaN(v)) {
|
| 437 |
-
if (v < min) min = v;
|
| 438 |
-
if (v > max) max = v;
|
| 439 |
-
}
|
| 440 |
-
}
|
| 441 |
-
}
|
| 442 |
-
// Use the first key in the group as the group id
|
| 443 |
-
groupStats[group[0]] = { min, max };
|
| 444 |
-
});
|
| 445 |
-
|
| 446 |
-
// 3. Group suffix groups by similar scale (treat each suffix group as a unit)
|
| 447 |
-
const scaleGroups: Record<string, string[][]> = {};
|
| 448 |
-
const used = new Set<string>();
|
| 449 |
-
const SCALE_THRESHOLD = 2;
|
| 450 |
-
for (const group of suffixGroups) {
|
| 451 |
-
const groupId = group[0];
|
| 452 |
-
if (used.has(groupId)) continue;
|
| 453 |
-
const { min, max } = groupStats[groupId];
|
| 454 |
-
if (!isFinite(min) || !isFinite(max)) continue;
|
| 455 |
-
const logMin = Math.log10(Math.abs(min) + 1e-9);
|
| 456 |
-
const logMax = Math.log10(Math.abs(max) + 1e-9);
|
| 457 |
-
const unit: string[][] = [group];
|
| 458 |
-
used.add(groupId);
|
| 459 |
-
for (const other of suffixGroups) {
|
| 460 |
-
const otherId = other[0];
|
| 461 |
-
if (used.has(otherId) || otherId === groupId) continue;
|
| 462 |
-
const { min: omin, max: omax } = groupStats[otherId];
|
| 463 |
-
if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
|
| 464 |
-
const ologMin = Math.log10(Math.abs(omin) + 1e-9);
|
| 465 |
-
const ologMax = Math.log10(Math.abs(omax) + 1e-9);
|
| 466 |
-
if (
|
| 467 |
-
Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
|
| 468 |
-
Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
|
| 469 |
-
) {
|
| 470 |
-
unit.push(other);
|
| 471 |
-
used.add(otherId);
|
| 472 |
-
}
|
| 473 |
-
}
|
| 474 |
-
scaleGroups[groupId] = unit;
|
| 475 |
-
}
|
| 476 |
-
|
| 477 |
-
// 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
|
| 478 |
-
const chartGroups: string[][] = Object.values(scaleGroups)
|
| 479 |
-
.sort((a, b) => b.length - a.length)
|
| 480 |
-
.flatMap((suffixGroupArr) => {
|
| 481 |
-
// suffixGroupArr is array of suffix groups (each is array of keys)
|
| 482 |
-
const merged = suffixGroupArr.flat();
|
| 483 |
-
if (merged.length > 6) {
|
| 484 |
-
const subgroups: string[][] = [];
|
| 485 |
-
for (let i = 0; i < merged.length; i += 6) {
|
| 486 |
-
subgroups.push(merged.slice(i, i + 6));
|
| 487 |
-
}
|
| 488 |
-
return subgroups;
|
| 489 |
-
}
|
| 490 |
-
return [merged];
|
| 491 |
-
});
|
| 492 |
|
| 493 |
const duration = chartData[chartData.length - 1].timestamp;
|
| 494 |
|
| 495 |
const chartDataGroups = chartGroups.map((group) =>
|
| 496 |
-
chartData.map((row) =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
);
|
| 498 |
|
| 499 |
return {
|
|
@@ -531,10 +471,19 @@ async function getEpisodeDataV3(
|
|
| 531 |
const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
|
| 532 |
|
| 533 |
// Load episode metadata to get timestamps for episode 0
|
| 534 |
-
const episodeMetadata = await loadEpisodeMetadataV3Simple(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
|
| 536 |
// Create video info with segmentation using the metadata
|
| 537 |
-
const videosInfo = extractVideoInfoV3WithSegmentation(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 538 |
|
| 539 |
// Load episode data for charts
|
| 540 |
const { chartDataGroups, flatChartData, ignoredColumns, task } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
|
|
@@ -563,9 +512,9 @@ async function loadEpisodeDataV3(
|
|
| 563 |
episodeMetadata: EpisodeMetadataV3,
|
| 564 |
): Promise<{ chartDataGroups: ChartRow[][]; flatChartData: Record<string, number>[]; ignoredColumns: string[]; task?: string }> {
|
| 565 |
// Build data file path using chunk and file indices
|
| 566 |
-
const dataChunkIndex = episodeMetadata.data_chunk_index
|
| 567 |
-
const dataFileIndex = episodeMetadata.data_file_index
|
| 568 |
-
const dataPath =
|
| 569 |
|
| 570 |
try {
|
| 571 |
const dataUrl = buildVersionedUrl(repoId, version, dataPath);
|
|
@@ -684,20 +633,20 @@ function processEpisodeDataForCharts(
|
|
| 684 |
|
| 685 |
// Common feature order for v3.0 datasets (but only include if they exist)
|
| 686 |
const expectedFeatureOrder = [
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
];
|
| 697 |
|
| 698 |
// Map indices to features that actually exist
|
| 699 |
let currentIndex = 0;
|
| 700 |
-
expectedFeatureOrder.forEach(feature => {
|
| 701 |
if (featureKeys.includes(feature)) {
|
| 702 |
v3IndexToFeatureMap[currentIndex.toString()] = feature;
|
| 703 |
currentIndex++;
|
|
@@ -705,7 +654,7 @@ function processEpisodeDataForCharts(
|
|
| 705 |
});
|
| 706 |
|
| 707 |
// Columns to exclude from charts (note: 'task' is intentionally not excluded as we want to access it)
|
| 708 |
-
const excludedColumns =
|
| 709 |
|
| 710 |
// Create columns structure similar to V2.1 for proper hierarchical naming
|
| 711 |
const columns: ColumnDef[] = Object.entries(info.features)
|
|
@@ -726,7 +675,7 @@ function processEpisodeDataForCharts(
|
|
| 726 |
? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
|
| 727 |
: Array.from(
|
| 728 |
{ length: feature.shape[0] || 1 },
|
| 729 |
-
(_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
|
| 730 |
),
|
| 731 |
};
|
| 732 |
});
|
|
@@ -737,7 +686,7 @@ function processEpisodeDataForCharts(
|
|
| 737 |
const allKeys: string[] = [];
|
| 738 |
|
| 739 |
Object.entries(firstRow || {}).forEach(([key, value]) => {
|
| 740 |
-
if (key ===
|
| 741 |
|
| 742 |
// Map numeric key to feature name if available
|
| 743 |
const featureName = v3IndexToFeatureMap[key] || key;
|
|
@@ -749,7 +698,7 @@ function processEpisodeDataForCharts(
|
|
| 749 |
if (excludedColumns.includes(featureName)) return;
|
| 750 |
|
| 751 |
// Find the matching column definition to get proper names
|
| 752 |
-
const columnDef = columns.find(col => col.key === featureName);
|
| 753 |
if (columnDef && Array.isArray(value) && value.length > 0) {
|
| 754 |
// Use the proper hierarchical naming from column definition
|
| 755 |
columnDef.value.forEach((seriesName, idx) => {
|
|
@@ -757,10 +706,10 @@ function processEpisodeDataForCharts(
|
|
| 757 |
allKeys.push(seriesName);
|
| 758 |
}
|
| 759 |
});
|
| 760 |
-
} else if (typeof value ===
|
| 761 |
// For scalar numeric values
|
| 762 |
allKeys.push(featureName);
|
| 763 |
-
} else if (typeof value ===
|
| 764 |
// For BigInt values
|
| 765 |
allKeys.push(featureName);
|
| 766 |
}
|
|
@@ -769,10 +718,7 @@ function processEpisodeDataForCharts(
|
|
| 769 |
seriesNames = ["timestamp", ...allKeys];
|
| 770 |
} else {
|
| 771 |
// Fallback to column-based approach like V2.1
|
| 772 |
-
seriesNames = [
|
| 773 |
-
"timestamp",
|
| 774 |
-
...columns.map(({ value }) => value).flat(),
|
| 775 |
-
];
|
| 776 |
}
|
| 777 |
|
| 778 |
const chartData = episodeData.map((row, index) => {
|
|
@@ -783,14 +729,17 @@ function processEpisodeDataForCharts(
|
|
| 783 |
let videoDuration = episodeData.length; // Fallback to data length
|
| 784 |
if (episodeMetadata) {
|
| 785 |
// Use actual video segment duration if available
|
| 786 |
-
videoDuration =
|
|
|
|
|
|
|
| 787 |
}
|
| 788 |
-
obj["timestamp"] =
|
|
|
|
| 789 |
|
| 790 |
// Add all data columns using hierarchical naming
|
| 791 |
-
if (row && typeof row ===
|
| 792 |
Object.entries(row).forEach(([key, value]) => {
|
| 793 |
-
if (key ===
|
| 794 |
// Timestamp is already handled above
|
| 795 |
return;
|
| 796 |
}
|
|
@@ -805,21 +754,21 @@ function processEpisodeDataForCharts(
|
|
| 805 |
if (excludedColumns.includes(featureName)) return;
|
| 806 |
|
| 807 |
// Find the matching column definition to get proper series names
|
| 808 |
-
const columnDef = columns.find(col => col.key === featureName);
|
| 809 |
|
| 810 |
if (Array.isArray(value) && columnDef) {
|
| 811 |
// For array values like observation.state and action, use proper hierarchical naming
|
| 812 |
value.forEach((val, idx) => {
|
| 813 |
if (idx < columnDef.value.length) {
|
| 814 |
const seriesName = columnDef.value[idx];
|
| 815 |
-
obj[seriesName] = typeof val ===
|
| 816 |
}
|
| 817 |
});
|
| 818 |
-
} else if (typeof value ===
|
| 819 |
obj[featureName] = value;
|
| 820 |
-
} else if (typeof value ===
|
| 821 |
obj[featureName] = Number(value);
|
| 822 |
-
} else if (typeof value ===
|
| 823 |
// Convert boolean to number for charts
|
| 824 |
obj[featureName] = value ? 1 : 0;
|
| 825 |
}
|
|
@@ -837,92 +786,27 @@ function processEpisodeDataForCharts(
|
|
| 837 |
["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
|
| 838 |
)
|
| 839 |
.map(([key]) => key),
|
| 840 |
-
...excludedColumns // Also include the manually excluded columns
|
| 841 |
];
|
| 842 |
|
| 843 |
-
//
|
| 844 |
-
const
|
| 845 |
-
const suffixGroupsMap: Record<string, string[]> = {};
|
| 846 |
-
|
| 847 |
-
for (const key of numericKeys) {
|
| 848 |
-
const parts = key.split(SERIES_NAME_DELIMITER);
|
| 849 |
-
const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
|
| 850 |
-
if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
|
| 851 |
-
suffixGroupsMap[suffix].push(key);
|
| 852 |
-
}
|
| 853 |
-
const suffixGroups = Object.values(suffixGroupsMap);
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
// Compute min/max for each suffix group
|
| 857 |
-
const groupStats: Record<string, { min: number; max: number }> = {};
|
| 858 |
-
suffixGroups.forEach((group) => {
|
| 859 |
-
let min = Infinity, max = -Infinity;
|
| 860 |
-
for (const row of chartData) {
|
| 861 |
-
for (const key of group) {
|
| 862 |
-
const v = row[key];
|
| 863 |
-
if (typeof v === "number" && !isNaN(v)) {
|
| 864 |
-
if (v < min) min = v;
|
| 865 |
-
if (v > max) max = v;
|
| 866 |
-
}
|
| 867 |
-
}
|
| 868 |
-
}
|
| 869 |
-
groupStats[group[0]] = { min, max };
|
| 870 |
-
});
|
| 871 |
-
|
| 872 |
-
// Group by similar scale
|
| 873 |
-
const scaleGroups: Record<string, string[][]> = {};
|
| 874 |
-
const used = new Set<string>();
|
| 875 |
-
const SCALE_THRESHOLD = 2;
|
| 876 |
-
for (const group of suffixGroups) {
|
| 877 |
-
const groupId = group[0];
|
| 878 |
-
if (used.has(groupId)) continue;
|
| 879 |
-
const { min, max } = groupStats[groupId];
|
| 880 |
-
if (!isFinite(min) || !isFinite(max)) continue;
|
| 881 |
-
const logMin = Math.log10(Math.abs(min) + 1e-9);
|
| 882 |
-
const logMax = Math.log10(Math.abs(max) + 1e-9);
|
| 883 |
-
const unit: string[][] = [group];
|
| 884 |
-
used.add(groupId);
|
| 885 |
-
for (const other of suffixGroups) {
|
| 886 |
-
const otherId = other[0];
|
| 887 |
-
if (used.has(otherId) || otherId === groupId) continue;
|
| 888 |
-
const { min: omin, max: omax } = groupStats[otherId];
|
| 889 |
-
if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
|
| 890 |
-
const ologMin = Math.log10(Math.abs(omin) + 1e-9);
|
| 891 |
-
const ologMax = Math.log10(Math.abs(omax) + 1e-9);
|
| 892 |
-
if (
|
| 893 |
-
Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
|
| 894 |
-
Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
|
| 895 |
-
) {
|
| 896 |
-
unit.push(other);
|
| 897 |
-
used.add(otherId);
|
| 898 |
-
}
|
| 899 |
-
}
|
| 900 |
-
scaleGroups[groupId] = unit;
|
| 901 |
-
}
|
| 902 |
-
|
| 903 |
-
// Flatten into chartGroups
|
| 904 |
-
const chartGroups: string[][] = Object.values(scaleGroups)
|
| 905 |
-
.sort((a, b) => b.length - a.length)
|
| 906 |
-
.flatMap((suffixGroupArr) => {
|
| 907 |
-
const merged = suffixGroupArr.flat();
|
| 908 |
-
if (merged.length > 6) {
|
| 909 |
-
const subgroups = [];
|
| 910 |
-
for (let i = 0; i < merged.length; i += 6) {
|
| 911 |
-
subgroups.push(merged.slice(i, i + 6));
|
| 912 |
-
}
|
| 913 |
-
return subgroups;
|
| 914 |
-
}
|
| 915 |
-
return [merged];
|
| 916 |
-
});
|
| 917 |
|
| 918 |
const chartDataGroups = chartGroups.map((group) =>
|
| 919 |
-
chartData.map((row) =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
);
|
| 921 |
|
| 922 |
return { chartDataGroups, flatChartData: chartData, ignoredColumns };
|
| 923 |
}
|
| 924 |
|
| 925 |
-
|
| 926 |
// Video info extraction with segmentation for v3.0
|
| 927 |
function extractVideoInfoV3WithSegmentation(
|
| 928 |
repoId: string,
|
|
@@ -931,13 +815,14 @@ function extractVideoInfoV3WithSegmentation(
|
|
| 931 |
episodeMetadata: EpisodeMetadataV3,
|
| 932 |
): VideoInfo[] {
|
| 933 |
// Get video features from dataset info
|
| 934 |
-
const videoFeatures = Object.entries(info.features)
|
| 935 |
-
|
|
|
|
| 936 |
|
| 937 |
const videosInfo = videoFeatures.map(([videoKey]) => {
|
| 938 |
// Check if we have per-camera metadata in the episode row
|
| 939 |
-
const cameraSpecificKeys = Object.keys(episodeMetadata).filter(key =>
|
| 940 |
-
key.startsWith(`videos/${videoKey}/`)
|
| 941 |
);
|
| 942 |
|
| 943 |
let chunkIndex: number, fileIndex: number, segmentStart: number, segmentEnd: number;
|
|
@@ -956,7 +841,15 @@ function extractVideoInfoV3WithSegmentation(
|
|
| 956 |
segmentEnd = episodeMetadata.video_to_timestamp || 30;
|
| 957 |
}
|
| 958 |
|
| 959 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 960 |
const fullUrl = buildVersionedUrl(repoId, version, videoPath);
|
| 961 |
|
| 962 |
return {
|
|
@@ -964,9 +857,9 @@ function extractVideoInfoV3WithSegmentation(
|
|
| 964 |
url: fullUrl,
|
| 965 |
// Enable segmentation with timestamps from metadata
|
| 966 |
isSegmented: true,
|
| 967 |
-
segmentStart:
|
| 968 |
-
segmentEnd:
|
| 969 |
-
segmentDuration:
|
| 970 |
};
|
| 971 |
});
|
| 972 |
|
|
@@ -988,8 +881,15 @@ async function loadEpisodeMetadataV3Simple(
|
|
| 988 |
|
| 989 |
// Try loading episode metadata files until we find the episode
|
| 990 |
while (!episodeRow) {
|
| 991 |
-
const episodesMetadataPath =
|
| 992 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 993 |
|
| 994 |
try {
|
| 995 |
const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
|
|
@@ -1015,9 +915,11 @@ async function loadEpisodeMetadataV3Simple(
|
|
| 1015 |
// Not in this file, try the next one
|
| 1016 |
fileIndex++;
|
| 1017 |
}
|
| 1018 |
-
} catch
|
| 1019 |
// File doesn't exist - episode not found
|
| 1020 |
-
throw new Error(
|
|
|
|
|
|
|
| 1021 |
}
|
| 1022 |
}
|
| 1023 |
|
|
@@ -1028,9 +930,9 @@ async function loadEpisodeMetadataV3Simple(
|
|
| 1028 |
// Simple parser for episode row - focuses on key fields for episodes
|
| 1029 |
function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3 {
|
| 1030 |
// v3.0 uses named keys in the episode metadata
|
| 1031 |
-
if (row && typeof row ===
|
| 1032 |
// Check if this is v3.0 format with named keys
|
| 1033 |
-
if (
|
| 1034 |
// v3.0 format - use named keys
|
| 1035 |
// Convert BigInt values to numbers
|
| 1036 |
const toBigIntSafe = (value: unknown): number => {
|
|
@@ -1079,7 +981,7 @@ function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3
|
|
| 1079 |
}
|
| 1080 |
});
|
| 1081 |
|
| 1082 |
-
return episodeData;
|
| 1083 |
} else {
|
| 1084 |
// Fallback to numeric keys for compatibility
|
| 1085 |
const toNum = (v: unknown, fallback = 0): number =>
|
|
@@ -1118,7 +1020,6 @@ function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3
|
|
| 1118 |
|
| 1119 |
|
| 1120 |
|
| 1121 |
-
|
| 1122 |
// ─── Stats computation ───────────────────────────────────────────
|
| 1123 |
|
| 1124 |
/**
|
|
|
|
| 6 |
} from "@/utils/parquetUtils";
|
| 7 |
import { pick } from "@/utils/pick";
|
| 8 |
import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
|
| 9 |
+
import { PADDING, CHART_CONFIG, EXCLUDED_COLUMNS } from "@/utils/constants";
|
| 10 |
+
import {
|
| 11 |
+
processChartDataGroups,
|
| 12 |
+
groupRowBySuffix,
|
| 13 |
+
} from "@/utils/dataProcessing";
|
| 14 |
+
import {
|
| 15 |
+
buildV3VideoPath,
|
| 16 |
+
buildV3DataPath,
|
| 17 |
+
buildV3EpisodesMetadataPath,
|
| 18 |
+
} from "@/utils/stringFormatting";
|
| 19 |
+
import { bigIntToNumber } from "@/utils/typeGuards";
|
| 20 |
|
| 21 |
+
const SERIES_NAME_DELIMITER = CHART_CONFIG.SERIES_NAME_DELIMITER;
|
| 22 |
|
| 23 |
export type VideoInfo = {
|
| 24 |
filename: string;
|
|
|
|
| 110 |
value: string[];
|
| 111 |
};
|
| 112 |
|
| 113 |
+
type AdjacentEpisodeVideos = {
|
| 114 |
+
episodeId: number;
|
| 115 |
+
videosInfo: VideoInfo[];
|
| 116 |
+
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
export async function getEpisodeData(
|
| 119 |
org: string,
|
|
|
|
| 128 |
const info = rawInfo as unknown as DatasetMetadata;
|
| 129 |
|
| 130 |
if (info.video_path === null) {
|
| 131 |
+
throw new Error(
|
| 132 |
+
"Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.",
|
| 133 |
+
);
|
| 134 |
}
|
| 135 |
|
| 136 |
console.time(`[perf] getEpisodeData (${version})`);
|
|
|
|
| 165 |
dataset: string,
|
| 166 |
currentEpisodeId: number,
|
| 167 |
radius: number = 2,
|
| 168 |
+
): Promise<AdjacentEpisodeVideos[]> {
|
| 169 |
const repoId = `${org}/${dataset}`;
|
| 170 |
try {
|
| 171 |
const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
|
| 172 |
const info = rawInfo as unknown as DatasetMetadata;
|
| 173 |
|
| 174 |
const totalEpisodes = info.total_episodes;
|
| 175 |
+
const adjacentVideos: AdjacentEpisodeVideos[] = [];
|
| 176 |
|
| 177 |
// Calculate adjacent episode IDs
|
| 178 |
for (let offset = -radius; offset <= radius; offset++) {
|
|
|
|
| 184 |
let videosInfo: VideoInfo[] = [];
|
| 185 |
|
| 186 |
if (version === "v3.0") {
|
| 187 |
+
const episodeMetadata = await loadEpisodeMetadataV3Simple(
|
| 188 |
+
repoId,
|
| 189 |
+
version,
|
| 190 |
+
episodeId,
|
| 191 |
+
);
|
| 192 |
+
videosInfo = extractVideoInfoV3WithSegmentation(
|
| 193 |
+
repoId,
|
| 194 |
+
version,
|
| 195 |
+
info,
|
| 196 |
+
episodeMetadata,
|
| 197 |
+
);
|
| 198 |
} else {
|
| 199 |
// For v2.x, use simpler video info extraction
|
| 200 |
+
if (info.video_path) {
|
| 201 |
const episode_chunk = Math.floor(0 / 1000);
|
| 202 |
videosInfo = Object.entries(info.features)
|
| 203 |
.filter(([, value]) => value.dtype === "video")
|
| 204 |
.map(([key]) => {
|
| 205 |
+
const videoPath = formatStringWithVars(info.video_path!, {
|
| 206 |
video_key: key,
|
| 207 |
+
episode_chunk: episode_chunk
|
| 208 |
+
.toString()
|
| 209 |
+
.padStart(PADDING.CHUNK_INDEX, "0"),
|
| 210 |
+
episode_index: episodeId
|
| 211 |
+
.toString()
|
| 212 |
+
.padStart(PADDING.EPISODE_INDEX, "0"),
|
| 213 |
});
|
| 214 |
return {
|
| 215 |
filename: key,
|
| 216 |
url: buildVersionedUrl(repoId, version, videoPath),
|
| 217 |
};
|
| 218 |
});
|
| 219 |
+
}
|
| 220 |
}
|
| 221 |
|
| 222 |
adjacentVideos.push({ episodeId, videosInfo });
|
|
|
|
| 262 |
// episode id starts from 0
|
| 263 |
(_, i) => i,
|
| 264 |
)
|
| 265 |
+
: process.env.EPISODES.split(/\s+/)
|
|
|
|
| 266 |
.map((x) => parseInt(x.trim(), 10))
|
| 267 |
.filter((x) => !isNaN(x));
|
| 268 |
|
| 269 |
// Videos information
|
| 270 |
+
const videosInfo =
|
| 271 |
+
info.video_path !== null
|
| 272 |
+
? Object.entries(info.features)
|
| 273 |
.filter(([, value]) => value.dtype === "video")
|
| 274 |
.map(([key]) => {
|
| 275 |
+
const videoPath = formatStringWithVars(info.video_path!, {
|
| 276 |
video_key: key,
|
| 277 |
+
episode_chunk: episode_chunk
|
| 278 |
+
.toString()
|
| 279 |
+
.padStart(PADDING.CHUNK_INDEX, "0"),
|
| 280 |
+
episode_index: episodeId
|
| 281 |
+
.toString()
|
| 282 |
+
.padStart(PADDING.EPISODE_INDEX, "0"),
|
| 283 |
});
|
| 284 |
return {
|
| 285 |
filename: key,
|
| 286 |
url: buildVersionedUrl(repoId, version, videoPath),
|
| 287 |
};
|
| 288 |
+
})
|
| 289 |
+
: [];
|
| 290 |
|
| 291 |
// Column data
|
| 292 |
const columnNames = Object.entries(info.features)
|
| 293 |
.filter(
|
| 294 |
([, value]) =>
|
| 295 |
+
["float32", "int32"].includes(value.dtype) && value.shape.length === 1,
|
|
|
|
| 296 |
)
|
| 297 |
.map(([key, { shape }]) => ({ key, length: shape[0] }));
|
| 298 |
|
| 299 |
// Exclude specific columns
|
| 300 |
+
const excludedColumns = EXCLUDED_COLUMNS.V2 as readonly string[];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
const filteredColumns = columnNames.filter(
|
| 302 |
(column) => !excludedColumns.includes(column.key),
|
| 303 |
);
|
|
|
|
| 318 |
? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
|
| 319 |
: Array.from(
|
| 320 |
{ length: columnNames.find((c) => c.key === key)?.length ?? 1 },
|
| 321 |
+
(_, i) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${i}`,
|
| 322 |
),
|
| 323 |
};
|
| 324 |
});
|
|
|
|
| 327 |
repoId,
|
| 328 |
version,
|
| 329 |
formatStringWithVars(info.data_path, {
|
| 330 |
+
episode_chunk: episode_chunk
|
| 331 |
+
.toString()
|
| 332 |
+
.padStart(PADDING.CHUNK_INDEX, "0"),
|
| 333 |
+
episode_index: episodeId.toString().padStart(PADDING.EPISODE_INDEX, "0"),
|
| 334 |
+
}),
|
| 335 |
);
|
| 336 |
|
| 337 |
const arrayBuffer = await fetchParquetFile(parquetUrl);
|
|
|
|
| 371 |
if (tasksResponse.ok) {
|
| 372 |
const tasksText = await tasksResponse.text();
|
| 373 |
const tasksData = tasksText
|
| 374 |
+
.split("\n")
|
| 375 |
+
.filter((line) => line.trim())
|
| 376 |
+
.map((line) => JSON.parse(line));
|
| 377 |
|
| 378 |
if (tasksData && tasksData.length > 0) {
|
| 379 |
const taskIndex = allData[0].task_index;
|
| 380 |
const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
|
| 381 |
+
const taskData = tasksData.find((t: Record<string, unknown>) => t.task_index === taskIndexNum);
|
| 382 |
if (taskData) {
|
| 383 |
task = taskData.task;
|
| 384 |
}
|
| 385 |
}
|
| 386 |
}
|
| 387 |
+
} catch {
|
| 388 |
// No tasks metadata file for this v2.x dataset
|
| 389 |
}
|
| 390 |
}
|
|
|
|
| 419 |
)
|
| 420 |
.map(([key]) => key);
|
| 421 |
|
| 422 |
+
// Process chart data into organized groups using utility function
|
| 423 |
+
const chartGroups = processChartDataGroups(seriesNames, chartData);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
const duration = chartData[chartData.length - 1].timestamp;
|
| 426 |
|
| 427 |
const chartDataGroups = chartGroups.map((group) =>
|
| 428 |
+
chartData.map((row) => {
|
| 429 |
+
const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
|
| 430 |
+
// Ensure timestamp is always a number at the top level
|
| 431 |
+
return {
|
| 432 |
+
...grouped,
|
| 433 |
+
timestamp:
|
| 434 |
+
typeof grouped.timestamp === "number" ? grouped.timestamp : 0,
|
| 435 |
+
};
|
| 436 |
+
}),
|
| 437 |
);
|
| 438 |
|
| 439 |
return {
|
|
|
|
| 471 |
const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
|
| 472 |
|
| 473 |
// Load episode metadata to get timestamps for episode 0
|
| 474 |
+
const episodeMetadata = await loadEpisodeMetadataV3Simple(
|
| 475 |
+
repoId,
|
| 476 |
+
version,
|
| 477 |
+
episodeId,
|
| 478 |
+
);
|
| 479 |
|
| 480 |
// Create video info with segmentation using the metadata
|
| 481 |
+
const videosInfo = extractVideoInfoV3WithSegmentation(
|
| 482 |
+
repoId,
|
| 483 |
+
version,
|
| 484 |
+
info,
|
| 485 |
+
episodeMetadata,
|
| 486 |
+
);
|
| 487 |
|
| 488 |
// Load episode data for charts
|
| 489 |
const { chartDataGroups, flatChartData, ignoredColumns, task } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
|
|
|
|
| 512 |
episodeMetadata: EpisodeMetadataV3,
|
| 513 |
): Promise<{ chartDataGroups: ChartRow[][]; flatChartData: Record<string, number>[]; ignoredColumns: string[]; task?: string }> {
|
| 514 |
// Build data file path using chunk and file indices
|
| 515 |
+
const dataChunkIndex = bigIntToNumber(episodeMetadata.data_chunk_index, 0);
|
| 516 |
+
const dataFileIndex = bigIntToNumber(episodeMetadata.data_file_index, 0);
|
| 517 |
+
const dataPath = buildV3DataPath(dataChunkIndex, dataFileIndex);
|
| 518 |
|
| 519 |
try {
|
| 520 |
const dataUrl = buildVersionedUrl(repoId, version, dataPath);
|
|
|
|
| 633 |
|
| 634 |
// Common feature order for v3.0 datasets (but only include if they exist)
|
| 635 |
const expectedFeatureOrder = [
|
| 636 |
+
"observation.state",
|
| 637 |
+
"action",
|
| 638 |
+
"timestamp",
|
| 639 |
+
"episode_index",
|
| 640 |
+
"frame_index",
|
| 641 |
+
"next.reward",
|
| 642 |
+
"next.done",
|
| 643 |
+
"index",
|
| 644 |
+
"task_index",
|
| 645 |
];
|
| 646 |
|
| 647 |
// Map indices to features that actually exist
|
| 648 |
let currentIndex = 0;
|
| 649 |
+
expectedFeatureOrder.forEach((feature) => {
|
| 650 |
if (featureKeys.includes(feature)) {
|
| 651 |
v3IndexToFeatureMap[currentIndex.toString()] = feature;
|
| 652 |
currentIndex++;
|
|
|
|
| 654 |
});
|
| 655 |
|
| 656 |
// Columns to exclude from charts (note: 'task' is intentionally not excluded as we want to access it)
|
| 657 |
+
const excludedColumns = EXCLUDED_COLUMNS.V3 as readonly string[];
|
| 658 |
|
| 659 |
// Create columns structure similar to V2.1 for proper hierarchical naming
|
| 660 |
const columns: ColumnDef[] = Object.entries(info.features)
|
|
|
|
| 675 |
? column_names.map((name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`)
|
| 676 |
: Array.from(
|
| 677 |
{ length: feature.shape[0] || 1 },
|
| 678 |
+
(_, i) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${i}`,
|
| 679 |
),
|
| 680 |
};
|
| 681 |
});
|
|
|
|
| 686 |
const allKeys: string[] = [];
|
| 687 |
|
| 688 |
Object.entries(firstRow || {}).forEach(([key, value]) => {
|
| 689 |
+
if (key === "timestamp") return; // Skip timestamp, we'll add it separately
|
| 690 |
|
| 691 |
// Map numeric key to feature name if available
|
| 692 |
const featureName = v3IndexToFeatureMap[key] || key;
|
|
|
|
| 698 |
if (excludedColumns.includes(featureName)) return;
|
| 699 |
|
| 700 |
// Find the matching column definition to get proper names
|
| 701 |
+
const columnDef = columns.find((col) => col.key === featureName);
|
| 702 |
if (columnDef && Array.isArray(value) && value.length > 0) {
|
| 703 |
// Use the proper hierarchical naming from column definition
|
| 704 |
columnDef.value.forEach((seriesName, idx) => {
|
|
|
|
| 706 |
allKeys.push(seriesName);
|
| 707 |
}
|
| 708 |
});
|
| 709 |
+
} else if (typeof value === "number" && !isNaN(value)) {
|
| 710 |
// For scalar numeric values
|
| 711 |
allKeys.push(featureName);
|
| 712 |
+
} else if (typeof value === "bigint") {
|
| 713 |
// For BigInt values
|
| 714 |
allKeys.push(featureName);
|
| 715 |
}
|
|
|
|
| 718 |
seriesNames = ["timestamp", ...allKeys];
|
| 719 |
} else {
|
| 720 |
// Fallback to column-based approach like V2.1
|
| 721 |
+
seriesNames = ["timestamp", ...columns.map(({ value }) => value).flat()];
|
|
|
|
|
|
|
|
|
|
| 722 |
}
|
| 723 |
|
| 724 |
const chartData = episodeData.map((row, index) => {
|
|
|
|
| 729 |
let videoDuration = episodeData.length; // Fallback to data length
|
| 730 |
if (episodeMetadata) {
|
| 731 |
// Use actual video segment duration if available
|
| 732 |
+
videoDuration =
|
| 733 |
+
(episodeMetadata.video_to_timestamp || 30) -
|
| 734 |
+
(episodeMetadata.video_from_timestamp || 0);
|
| 735 |
}
|
| 736 |
+
obj["timestamp"] =
|
| 737 |
+
(index / Math.max(episodeData.length - 1, 1)) * videoDuration;
|
| 738 |
|
| 739 |
// Add all data columns using hierarchical naming
|
| 740 |
+
if (row && typeof row === "object") {
|
| 741 |
Object.entries(row).forEach(([key, value]) => {
|
| 742 |
+
if (key === "timestamp") {
|
| 743 |
// Timestamp is already handled above
|
| 744 |
return;
|
| 745 |
}
|
|
|
|
| 754 |
if (excludedColumns.includes(featureName)) return;
|
| 755 |
|
| 756 |
// Find the matching column definition to get proper series names
|
| 757 |
+
const columnDef = columns.find((col) => col.key === featureName);
|
| 758 |
|
| 759 |
if (Array.isArray(value) && columnDef) {
|
| 760 |
// For array values like observation.state and action, use proper hierarchical naming
|
| 761 |
value.forEach((val, idx) => {
|
| 762 |
if (idx < columnDef.value.length) {
|
| 763 |
const seriesName = columnDef.value[idx];
|
| 764 |
+
obj[seriesName] = typeof val === "number" ? val : Number(val);
|
| 765 |
}
|
| 766 |
});
|
| 767 |
+
} else if (typeof value === "number" && !isNaN(value)) {
|
| 768 |
obj[featureName] = value;
|
| 769 |
+
} else if (typeof value === "bigint") {
|
| 770 |
obj[featureName] = Number(value);
|
| 771 |
+
} else if (typeof value === "boolean") {
|
| 772 |
// Convert boolean to number for charts
|
| 773 |
obj[featureName] = value ? 1 : 0;
|
| 774 |
}
|
|
|
|
| 786 |
["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
|
| 787 |
)
|
| 788 |
.map(([key]) => key),
|
| 789 |
+
...excludedColumns, // Also include the manually excluded columns
|
| 790 |
];
|
| 791 |
|
| 792 |
+
// Process chart data into organized groups using utility function
|
| 793 |
+
const chartGroups = processChartDataGroups(seriesNames, chartData);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 794 |
|
| 795 |
const chartDataGroups = chartGroups.map((group) =>
|
| 796 |
+
chartData.map((row) => {
|
| 797 |
+
const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
|
| 798 |
+
// Ensure timestamp is always a number at the top level
|
| 799 |
+
return {
|
| 800 |
+
...grouped,
|
| 801 |
+
timestamp:
|
| 802 |
+
typeof grouped.timestamp === "number" ? grouped.timestamp : 0,
|
| 803 |
+
};
|
| 804 |
+
}),
|
| 805 |
);
|
| 806 |
|
| 807 |
return { chartDataGroups, flatChartData: chartData, ignoredColumns };
|
| 808 |
}
|
| 809 |
|
|
|
|
| 810 |
// Video info extraction with segmentation for v3.0
|
| 811 |
function extractVideoInfoV3WithSegmentation(
|
| 812 |
repoId: string,
|
|
|
|
| 815 |
episodeMetadata: EpisodeMetadataV3,
|
| 816 |
): VideoInfo[] {
|
| 817 |
// Get video features from dataset info
|
| 818 |
+
const videoFeatures = Object.entries(info.features).filter(
|
| 819 |
+
([, value]) => value.dtype === "video",
|
| 820 |
+
);
|
| 821 |
|
| 822 |
const videosInfo = videoFeatures.map(([videoKey]) => {
|
| 823 |
// Check if we have per-camera metadata in the episode row
|
| 824 |
+
const cameraSpecificKeys = Object.keys(episodeMetadata).filter((key) =>
|
| 825 |
+
key.startsWith(`videos/${videoKey}/`),
|
| 826 |
);
|
| 827 |
|
| 828 |
let chunkIndex: number, fileIndex: number, segmentStart: number, segmentEnd: number;
|
|
|
|
| 841 |
segmentEnd = episodeMetadata.video_to_timestamp || 30;
|
| 842 |
}
|
| 843 |
|
| 844 |
+
// Convert BigInt to number for timestamps
|
| 845 |
+
const startNum = bigIntToNumber(segmentStart);
|
| 846 |
+
const endNum = bigIntToNumber(segmentEnd);
|
| 847 |
+
|
| 848 |
+
const videoPath = buildV3VideoPath(
|
| 849 |
+
videoKey,
|
| 850 |
+
bigIntToNumber(chunkIndex, 0),
|
| 851 |
+
bigIntToNumber(fileIndex, 0),
|
| 852 |
+
);
|
| 853 |
const fullUrl = buildVersionedUrl(repoId, version, videoPath);
|
| 854 |
|
| 855 |
return {
|
|
|
|
| 857 |
url: fullUrl,
|
| 858 |
// Enable segmentation with timestamps from metadata
|
| 859 |
isSegmented: true,
|
| 860 |
+
segmentStart: startNum,
|
| 861 |
+
segmentEnd: endNum,
|
| 862 |
+
segmentDuration: endNum - startNum,
|
| 863 |
};
|
| 864 |
});
|
| 865 |
|
|
|
|
| 881 |
|
| 882 |
// Try loading episode metadata files until we find the episode
|
| 883 |
while (!episodeRow) {
|
| 884 |
+
const episodesMetadataPath = buildV3EpisodesMetadataPath(
|
| 885 |
+
chunkIndex,
|
| 886 |
+
fileIndex,
|
| 887 |
+
);
|
| 888 |
+
const episodesMetadataUrl = buildVersionedUrl(
|
| 889 |
+
repoId,
|
| 890 |
+
version,
|
| 891 |
+
episodesMetadataPath,
|
| 892 |
+
);
|
| 893 |
|
| 894 |
try {
|
| 895 |
const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
|
|
|
|
| 915 |
// Not in this file, try the next one
|
| 916 |
fileIndex++;
|
| 917 |
}
|
| 918 |
+
} catch {
|
| 919 |
// File doesn't exist - episode not found
|
| 920 |
+
throw new Error(
|
| 921 |
+
`Episode ${episodeId} not found in metadata (searched up to file-${fileIndex.toString().padStart(PADDING.CHUNK_INDEX, "0")}.parquet)`,
|
| 922 |
+
);
|
| 923 |
}
|
| 924 |
}
|
| 925 |
|
|
|
|
| 930 |
// Simple parser for episode row - focuses on key fields for episodes
|
| 931 |
function parseEpisodeRowSimple(row: Record<string, unknown>): EpisodeMetadataV3 {
|
| 932 |
// v3.0 uses named keys in the episode metadata
|
| 933 |
+
if (row && typeof row === "object") {
|
| 934 |
// Check if this is v3.0 format with named keys
|
| 935 |
+
if ("episode_index" in row) {
|
| 936 |
// v3.0 format - use named keys
|
| 937 |
// Convert BigInt values to numbers
|
| 938 |
const toBigIntSafe = (value: unknown): number => {
|
|
|
|
| 981 |
}
|
| 982 |
});
|
| 983 |
|
| 984 |
+
return episodeData as EpisodeMetadataV3;
|
| 985 |
} else {
|
| 986 |
// Fallback to numeric keys for compatibility
|
| 987 |
const toNum = (v: unknown, fallback = 0): number =>
|
|
|
|
| 1020 |
|
| 1021 |
|
| 1022 |
|
|
|
|
| 1023 |
// ─── Stats computation ───────────────────────────────────────────
|
| 1024 |
|
| 1025 |
/**
|
src/app/[org]/[dataset]/page.tsx
CHANGED
|
@@ -6,10 +6,10 @@ export default async function DatasetRootPage({
|
|
| 6 |
params: Promise<{ org: string; dataset: string }>;
|
| 7 |
}) {
|
| 8 |
const { org, dataset } = await params;
|
| 9 |
-
const episodeN =
|
| 10 |
-
?.split(/\s+/)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
redirect(`/${org}/${dataset}/episode_${episodeN}`);
|
| 15 |
}
|
|
|
|
| 6 |
params: Promise<{ org: string; dataset: string }>;
|
| 7 |
}) {
|
| 8 |
const { org, dataset } = await params;
|
| 9 |
+
const episodeN =
|
| 10 |
+
process.env.EPISODES?.split(/\s+/)
|
| 11 |
+
.map((x) => parseInt(x.trim(), 10))
|
| 12 |
+
.filter((x) => !isNaN(x))[0] ?? 0;
|
| 13 |
|
| 14 |
redirect(`/${org}/${dataset}/episode_${episodeN}`);
|
| 15 |
}
|
src/app/explore/explore-grid.tsx
CHANGED
|
@@ -2,8 +2,6 @@
|
|
| 2 |
|
| 3 |
import React, { useEffect, useRef } from "react";
|
| 4 |
import Link from "next/link";
|
| 5 |
-
|
| 6 |
-
import { useRouter, useSearchParams } from "next/navigation";
|
| 7 |
import { postParentMessageWithParams } from "@/utils/postParentMessage";
|
| 8 |
|
| 9 |
type ExploreGridProps = {
|
|
|
|
| 2 |
|
| 3 |
import React, { useEffect, useRef } from "react";
|
| 4 |
import Link from "next/link";
|
|
|
|
|
|
|
| 5 |
import { postParentMessageWithParams } from "@/utils/postParentMessage";
|
| 6 |
|
| 7 |
type ExploreGridProps = {
|
src/app/explore/page.tsx
CHANGED
|
@@ -1,17 +1,15 @@
|
|
| 1 |
import React from "react";
|
| 2 |
import ExploreGrid from "./explore-grid";
|
| 3 |
-
import {
|
| 4 |
-
DatasetMetadata,
|
| 5 |
-
fetchJson,
|
| 6 |
-
formatStringWithVars,
|
| 7 |
-
} from "@/utils/parquetUtils";
|
| 8 |
import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
|
|
|
|
| 9 |
|
| 10 |
export default async function ExplorePage({
|
| 11 |
searchParams,
|
| 12 |
}: {
|
| 13 |
-
searchParams: { p?: string };
|
| 14 |
}) {
|
|
|
|
| 15 |
let datasets: { id: string }[] = [];
|
| 16 |
let currentPage = 1;
|
| 17 |
let totalPages = 1;
|
|
@@ -25,8 +23,8 @@ export default async function ExplorePage({
|
|
| 25 |
if (!res.ok) throw new Error("Failed to fetch datasets");
|
| 26 |
const data = await res.json();
|
| 27 |
const allDatasets = data.datasets || data;
|
| 28 |
-
// Use
|
| 29 |
-
const page = parseInt(
|
| 30 |
const perPage = 30;
|
| 31 |
|
| 32 |
currentPage = page;
|
|
@@ -46,24 +44,26 @@ export default async function ExplorePage({
|
|
| 46 |
try {
|
| 47 |
const [org, dataset] = ds.id.split("/");
|
| 48 |
const repoId = `${org}/${dataset}`;
|
| 49 |
-
|
| 50 |
// Try to get compatible version, but don't fail the entire page if incompatible
|
| 51 |
let version: string;
|
| 52 |
try {
|
| 53 |
version = await getDatasetVersion(repoId);
|
| 54 |
} catch (err) {
|
| 55 |
// Dataset is not compatible, skip it silently
|
| 56 |
-
console.warn(
|
|
|
|
|
|
|
| 57 |
return null;
|
| 58 |
}
|
| 59 |
-
|
| 60 |
const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
|
| 61 |
const info = await fetchJson<DatasetMetadata>(jsonUrl);
|
| 62 |
const videoEntry = Object.entries(info.features).find(
|
| 63 |
([, value]) => value.dtype === "video",
|
| 64 |
);
|
| 65 |
let videoUrl: string | null = null;
|
| 66 |
-
if (videoEntry) {
|
| 67 |
const [key] = videoEntry;
|
| 68 |
const videoPath = formatStringWithVars(info.video_path, {
|
| 69 |
video_key: key,
|
|
|
|
| 1 |
import React from "react";
|
| 2 |
import ExploreGrid from "./explore-grid";
|
| 3 |
+
import { fetchJson, formatStringWithVars } from "@/utils/parquetUtils";
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
|
| 5 |
+
import type { DatasetMetadata } from "@/utils/parquetUtils";
|
| 6 |
|
| 7 |
export default async function ExplorePage({
|
| 8 |
searchParams,
|
| 9 |
}: {
|
| 10 |
+
searchParams: Promise<{ p?: string }>;
|
| 11 |
}) {
|
| 12 |
+
const params = await searchParams;
|
| 13 |
let datasets: { id: string }[] = [];
|
| 14 |
let currentPage = 1;
|
| 15 |
let totalPages = 1;
|
|
|
|
| 23 |
if (!res.ok) throw new Error("Failed to fetch datasets");
|
| 24 |
const data = await res.json();
|
| 25 |
const allDatasets = data.datasets || data;
|
| 26 |
+
// Use params from props
|
| 27 |
+
const page = parseInt(params?.p || "1", 10);
|
| 28 |
const perPage = 30;
|
| 29 |
|
| 30 |
currentPage = page;
|
|
|
|
| 44 |
try {
|
| 45 |
const [org, dataset] = ds.id.split("/");
|
| 46 |
const repoId = `${org}/${dataset}`;
|
| 47 |
+
|
| 48 |
// Try to get compatible version, but don't fail the entire page if incompatible
|
| 49 |
let version: string;
|
| 50 |
try {
|
| 51 |
version = await getDatasetVersion(repoId);
|
| 52 |
} catch (err) {
|
| 53 |
// Dataset is not compatible, skip it silently
|
| 54 |
+
console.warn(
|
| 55 |
+
`Skipping incompatible dataset ${repoId}: ${err instanceof Error ? err.message : err}`,
|
| 56 |
+
);
|
| 57 |
return null;
|
| 58 |
}
|
| 59 |
+
|
| 60 |
const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
|
| 61 |
const info = await fetchJson<DatasetMetadata>(jsonUrl);
|
| 62 |
const videoEntry = Object.entries(info.features).find(
|
| 63 |
([, value]) => value.dtype === "video",
|
| 64 |
);
|
| 65 |
let videoUrl: string | null = null;
|
| 66 |
+
if (videoEntry && info.video_path) {
|
| 67 |
const [key] = videoEntry;
|
| 68 |
const videoPath = formatStringWithVars(info.video_path, {
|
| 69 |
video_key: key,
|
src/app/page.tsx
CHANGED
|
@@ -27,31 +27,31 @@ function HomeInner() {
|
|
| 27 |
useEffect(() => {
|
| 28 |
// Redirect to the first episode of the dataset if REPO_ID is defined
|
| 29 |
if (process.env.REPO_ID) {
|
| 30 |
-
const episodeN =
|
| 31 |
-
?.split(/\s+/)
|
| 32 |
-
|
| 33 |
-
|
| 34 |
|
| 35 |
router.push(`/${process.env.REPO_ID}/episode_${episodeN}`);
|
| 36 |
return;
|
| 37 |
}
|
| 38 |
-
|
| 39 |
// sync with hf.co/spaces URL params
|
| 40 |
-
if (searchParams.get(
|
| 41 |
-
router.push(searchParams.get(
|
| 42 |
return;
|
| 43 |
}
|
| 44 |
|
| 45 |
// legacy sync with hf.co/spaces URL params
|
| 46 |
let redirectUrl: string | null = null;
|
| 47 |
-
if (searchParams.get(
|
| 48 |
-
redirectUrl = `/${searchParams.get(
|
| 49 |
-
} else if (searchParams.get(
|
| 50 |
-
redirectUrl = `/${searchParams.get(
|
| 51 |
}
|
| 52 |
|
| 53 |
-
if (redirectUrl && searchParams.get(
|
| 54 |
-
redirectUrl += `?t=${searchParams.get(
|
| 55 |
}
|
| 56 |
|
| 57 |
if (redirectUrl) {
|
|
|
|
| 27 |
useEffect(() => {
|
| 28 |
// Redirect to the first episode of the dataset if REPO_ID is defined
|
| 29 |
if (process.env.REPO_ID) {
|
| 30 |
+
const episodeN =
|
| 31 |
+
process.env.EPISODES?.split(/\s+/)
|
| 32 |
+
.map((x) => parseInt(x.trim(), 10))
|
| 33 |
+
.filter((x) => !isNaN(x))[0] ?? 0;
|
| 34 |
|
| 35 |
router.push(`/${process.env.REPO_ID}/episode_${episodeN}`);
|
| 36 |
return;
|
| 37 |
}
|
| 38 |
+
|
| 39 |
// sync with hf.co/spaces URL params
|
| 40 |
+
if (searchParams.get("path")) {
|
| 41 |
+
router.push(searchParams.get("path")!);
|
| 42 |
return;
|
| 43 |
}
|
| 44 |
|
| 45 |
// legacy sync with hf.co/spaces URL params
|
| 46 |
let redirectUrl: string | null = null;
|
| 47 |
+
if (searchParams.get("dataset") && searchParams.get("episode")) {
|
| 48 |
+
redirectUrl = `/${searchParams.get("dataset")}/episode_${searchParams.get("episode")}`;
|
| 49 |
+
} else if (searchParams.get("dataset")) {
|
| 50 |
+
redirectUrl = `/${searchParams.get("dataset")}`;
|
| 51 |
}
|
| 52 |
|
| 53 |
+
if (redirectUrl && searchParams.get("t")) {
|
| 54 |
+
redirectUrl += `?t=${searchParams.get("t")}`;
|
| 55 |
}
|
| 56 |
|
| 57 |
if (redirectUrl) {
|
src/components/data-recharts.tsx
CHANGED
|
@@ -54,14 +54,14 @@ export const DataRecharts = React.memo(
|
|
| 54 |
const [hoveredTime, setHoveredTime] = useState<number | null>(null);
|
| 55 |
const [expanded, setExpanded] = useState(false);
|
| 56 |
|
| 57 |
-
if (!Array.isArray(data) || data.length === 0) return null;
|
| 58 |
-
|
| 59 |
useEffect(() => {
|
| 60 |
if (typeof onChartsReady === "function") onChartsReady();
|
| 61 |
}, [onChartsReady]);
|
| 62 |
|
| 63 |
const combinedData = useMemo(() => expanded ? mergeGroups(data) : [], [data, expanded]);
|
| 64 |
|
|
|
|
|
|
|
| 65 |
return (
|
| 66 |
<div>
|
| 67 |
{data.length > 1 && (
|
|
@@ -101,7 +101,6 @@ export const DataRecharts = React.memo(
|
|
| 101 |
},
|
| 102 |
);
|
| 103 |
|
| 104 |
-
|
| 105 |
const SingleDataGraph = React.memo(
|
| 106 |
({
|
| 107 |
data,
|
|
@@ -125,9 +124,19 @@ const SingleDataGraph = React.memo(
|
|
| 125 |
} else {
|
| 126 |
result[key] = value;
|
| 127 |
}
|
| 128 |
-
} else if (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
// If it's an object, recurse
|
| 130 |
-
Object.assign(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
}
|
| 132 |
}
|
| 133 |
if ("timestamp" in row && typeof row["timestamp"] === "number") {
|
|
@@ -137,7 +146,7 @@ const SingleDataGraph = React.memo(
|
|
| 137 |
}
|
| 138 |
|
| 139 |
// Flatten all rows for recharts
|
| 140 |
-
const chartData = useMemo(() => data.map(row => flattenRow(row)), [data]);
|
| 141 |
const [dataKeys, setDataKeys] = useState<string[]>([]);
|
| 142 |
const [visibleKeys, setVisibleKeys] = useState<string[]>([]);
|
| 143 |
|
|
@@ -216,22 +225,29 @@ const SingleDataGraph = React.memo(
|
|
| 216 |
groupColorMap[group] = CHART_COLORS[idx % CHART_COLORS.length];
|
| 217 |
});
|
| 218 |
|
| 219 |
-
const isGroupChecked = (group: string) =>
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
const handleGroupCheckboxChange = (group: string) => {
|
| 223 |
if (isGroupChecked(group)) {
|
| 224 |
// Uncheck all children
|
| 225 |
-
setVisibleKeys((prev) =>
|
|
|
|
|
|
|
| 226 |
} else {
|
| 227 |
// Check all children
|
| 228 |
-
setVisibleKeys((prev) =>
|
|
|
|
|
|
|
| 229 |
}
|
| 230 |
};
|
| 231 |
|
| 232 |
const handleCheckboxChange = (key: string) => {
|
| 233 |
setVisibleKeys((prev) =>
|
| 234 |
-
prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key]
|
| 235 |
);
|
| 236 |
};
|
| 237 |
|
|
@@ -245,7 +261,9 @@ const SingleDataGraph = React.memo(
|
|
| 245 |
<input
|
| 246 |
type="checkbox"
|
| 247 |
checked={isGroupChecked(group)}
|
| 248 |
-
ref={
|
|
|
|
|
|
|
| 249 |
onChange={() => handleGroupCheckboxChange(group)}
|
| 250 |
className="size-3"
|
| 251 |
style={{ accentColor: color }}
|
|
|
|
| 54 |
const [hoveredTime, setHoveredTime] = useState<number | null>(null);
|
| 55 |
const [expanded, setExpanded] = useState(false);
|
| 56 |
|
|
|
|
|
|
|
| 57 |
useEffect(() => {
|
| 58 |
if (typeof onChartsReady === "function") onChartsReady();
|
| 59 |
}, [onChartsReady]);
|
| 60 |
|
| 61 |
const combinedData = useMemo(() => expanded ? mergeGroups(data) : [], [data, expanded]);
|
| 62 |
|
| 63 |
+
if (!Array.isArray(data) || data.length === 0) return null;
|
| 64 |
+
|
| 65 |
return (
|
| 66 |
<div>
|
| 67 |
{data.length > 1 && (
|
|
|
|
| 101 |
},
|
| 102 |
);
|
| 103 |
|
|
|
|
| 104 |
const SingleDataGraph = React.memo(
|
| 105 |
({
|
| 106 |
data,
|
|
|
|
| 124 |
} else {
|
| 125 |
result[key] = value;
|
| 126 |
}
|
| 127 |
+
} else if (
|
| 128 |
+
value !== null &&
|
| 129 |
+
typeof value === "object" &&
|
| 130 |
+
!Array.isArray(value)
|
| 131 |
+
) {
|
| 132 |
// If it's an object, recurse
|
| 133 |
+
Object.assign(
|
| 134 |
+
result,
|
| 135 |
+
flattenRow(
|
| 136 |
+
value,
|
| 137 |
+
prefix ? `${prefix}${SERIES_NAME_DELIMITER}${key}` : key,
|
| 138 |
+
),
|
| 139 |
+
);
|
| 140 |
}
|
| 141 |
}
|
| 142 |
if ("timestamp" in row && typeof row["timestamp"] === "number") {
|
|
|
|
| 146 |
}
|
| 147 |
|
| 148 |
// Flatten all rows for recharts
|
| 149 |
+
const chartData = useMemo(() => data.map((row) => flattenRow(row)), [data]);
|
| 150 |
const [dataKeys, setDataKeys] = useState<string[]>([]);
|
| 151 |
const [visibleKeys, setVisibleKeys] = useState<string[]>([]);
|
| 152 |
|
|
|
|
| 225 |
groupColorMap[group] = CHART_COLORS[idx % CHART_COLORS.length];
|
| 226 |
});
|
| 227 |
|
| 228 |
+
const isGroupChecked = (group: string) =>
|
| 229 |
+
groups[group].every((k) => visibleKeys.includes(k));
|
| 230 |
+
const isGroupIndeterminate = (group: string) =>
|
| 231 |
+
groups[group].some((k) => visibleKeys.includes(k)) &&
|
| 232 |
+
!isGroupChecked(group);
|
| 233 |
|
| 234 |
const handleGroupCheckboxChange = (group: string) => {
|
| 235 |
if (isGroupChecked(group)) {
|
| 236 |
// Uncheck all children
|
| 237 |
+
setVisibleKeys((prev) =>
|
| 238 |
+
prev.filter((k) => !groups[group].includes(k)),
|
| 239 |
+
);
|
| 240 |
} else {
|
| 241 |
// Check all children
|
| 242 |
+
setVisibleKeys((prev) =>
|
| 243 |
+
Array.from(new Set([...prev, ...groups[group]])),
|
| 244 |
+
);
|
| 245 |
}
|
| 246 |
};
|
| 247 |
|
| 248 |
const handleCheckboxChange = (key: string) => {
|
| 249 |
setVisibleKeys((prev) =>
|
| 250 |
+
prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key],
|
| 251 |
);
|
| 252 |
};
|
| 253 |
|
|
|
|
| 261 |
<input
|
| 262 |
type="checkbox"
|
| 263 |
checked={isGroupChecked(group)}
|
| 264 |
+
ref={(el) => {
|
| 265 |
+
if (el) el.indeterminate = isGroupIndeterminate(group);
|
| 266 |
+
}}
|
| 267 |
onChange={() => handleGroupCheckboxChange(group)}
|
| 268 |
className="size-3"
|
| 269 |
style={{ accentColor: color }}
|
src/components/simple-videos-player.tsx
CHANGED
|
@@ -5,6 +5,11 @@ import { useTime } from "../context/time-context";
|
|
| 5 |
import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
|
| 6 |
import type { VideoInfo } from "@/app/[org]/[dataset]/[episode]/fetch-data";
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
type VideoPlayerProps = {
|
| 9 |
videosInfo: VideoInfo[];
|
| 10 |
onVideosReady?: () => void;
|
|
@@ -22,9 +27,9 @@ export const SimpleVideosPlayer = ({
|
|
| 22 |
const [enlargedVideo, setEnlargedVideo] = React.useState<string | null>(null);
|
| 23 |
const [showHiddenMenu, setShowHiddenMenu] = React.useState(false);
|
| 24 |
const [videosReady, setVideosReady] = React.useState(false);
|
| 25 |
-
|
| 26 |
const firstVisibleIdx = videosInfo.findIndex(
|
| 27 |
-
(video) => !hiddenVideos.includes(video.filename)
|
| 28 |
);
|
| 29 |
|
| 30 |
// Tracks the last time value set by the primary video's onTimeUpdate.
|
|
@@ -39,7 +44,7 @@ export const SimpleVideosPlayer = ({
|
|
| 39 |
// Handle videos ready
|
| 40 |
useEffect(() => {
|
| 41 |
let readyCount = 0;
|
| 42 |
-
|
| 43 |
const checkReady = () => {
|
| 44 |
readyCount++;
|
| 45 |
if (readyCount === videosInfo.length && onVideosReady) {
|
|
@@ -52,14 +57,17 @@ export const SimpleVideosPlayer = ({
|
|
| 52 |
videoRefs.current.forEach((video, index) => {
|
| 53 |
if (video) {
|
| 54 |
const info = videosInfo[index];
|
| 55 |
-
|
| 56 |
// Setup segment boundaries
|
| 57 |
if (info.isSegmented) {
|
| 58 |
const handleTimeUpdate = () => {
|
| 59 |
const segmentEnd = info.segmentEnd || video.duration;
|
| 60 |
const segmentStart = info.segmentStart || 0;
|
| 61 |
-
|
| 62 |
-
if (
|
|
|
|
|
|
|
|
|
|
| 63 |
video.currentTime = segmentStart;
|
| 64 |
// Also update the global time to reset to start
|
| 65 |
if (index === firstVisibleIdx) {
|
|
@@ -67,7 +75,7 @@ export const SimpleVideosPlayer = ({
|
|
| 67 |
}
|
| 68 |
}
|
| 69 |
};
|
| 70 |
-
|
| 71 |
const handleLoadedData = () => {
|
| 72 |
video.currentTime = info.segmentStart || 0;
|
| 73 |
checkReady();
|
|
@@ -109,17 +117,23 @@ export const SimpleVideosPlayer = ({
|
|
| 109 |
}
|
| 110 |
});
|
| 111 |
};
|
| 112 |
-
}, [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
// Handle play/pause
|
| 115 |
useEffect(() => {
|
| 116 |
if (!videosReady) return;
|
| 117 |
-
|
| 118 |
videoRefs.current.forEach((video, idx) => {
|
| 119 |
if (video && !hiddenVideos.includes(videosInfo[idx].filename)) {
|
| 120 |
if (isPlaying) {
|
| 121 |
-
video.play().catch(e => {
|
| 122 |
-
if (e.name !==
|
| 123 |
console.error("Error playing video");
|
| 124 |
}
|
| 125 |
});
|
|
@@ -160,9 +174,9 @@ export const SimpleVideosPlayer = ({
|
|
| 160 |
// Handle time update from first visible video
|
| 161 |
const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
|
| 162 |
const video = e.target as HTMLVideoElement;
|
| 163 |
-
const videoIndex = videoRefs.current.findIndex(ref => ref === video);
|
| 164 |
const info = videosInfo[videoIndex];
|
| 165 |
-
|
| 166 |
if (info) {
|
| 167 |
let globalTime = video.currentTime;
|
| 168 |
if (info.isSegmented) {
|
|
@@ -178,7 +192,7 @@ export const SimpleVideosPlayer = ({
|
|
| 178 |
if (info.isSegmented) {
|
| 179 |
const segmentStart = info.segmentStart || 0;
|
| 180 |
const segmentEnd = info.segmentEnd || video.duration;
|
| 181 |
-
|
| 182 |
if (video.currentTime < segmentStart || video.currentTime >= segmentEnd) {
|
| 183 |
video.currentTime = segmentStart;
|
| 184 |
}
|
|
@@ -206,7 +220,11 @@ export const SimpleVideosPlayer = ({
|
|
| 206 |
<button
|
| 207 |
key={filename}
|
| 208 |
className="block w-full text-left px-2 py-1 rounded hover:bg-slate-700 text-slate-100"
|
| 209 |
-
onClick={() =>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
>
|
| 211 |
{filename}
|
| 212 |
</button>
|
|
@@ -220,10 +238,10 @@ export const SimpleVideosPlayer = ({
|
|
| 220 |
<div className="flex flex-wrap gap-x-2 gap-y-6">
|
| 221 |
{videosInfo.map((info, idx) => {
|
| 222 |
if (hiddenVideos.includes(info.filename)) return null;
|
| 223 |
-
|
| 224 |
const isEnlarged = enlargedVideo === info.filename;
|
| 225 |
const isFirstVisible = idx === firstVisibleIdx;
|
| 226 |
-
|
| 227 |
return (
|
| 228 |
<div
|
| 229 |
key={info.filename}
|
|
@@ -239,15 +257,23 @@ export const SimpleVideosPlayer = ({
|
|
| 239 |
<button
|
| 240 |
title={isEnlarged ? "Minimize" : "Enlarge"}
|
| 241 |
className="ml-2 p-1 hover:bg-slate-700 rounded"
|
| 242 |
-
onClick={() =>
|
|
|
|
|
|
|
| 243 |
>
|
| 244 |
{isEnlarged ? <FaCompress /> : <FaExpand />}
|
| 245 |
</button>
|
| 246 |
<button
|
| 247 |
title="Hide Video"
|
| 248 |
className="ml-1 p-1 hover:bg-slate-700 rounded"
|
| 249 |
-
onClick={() =>
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
>
|
| 252 |
<FaTimes />
|
| 253 |
</button>
|
|
|
|
| 5 |
import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
|
| 6 |
import type { VideoInfo } from "@/app/[org]/[dataset]/[episode]/fetch-data";
|
| 7 |
|
| 8 |
+
const THRESHOLDS = {
|
| 9 |
+
VIDEO_SYNC_TOLERANCE: 0.2,
|
| 10 |
+
VIDEO_SEGMENT_BOUNDARY: 0.05,
|
| 11 |
+
};
|
| 12 |
+
|
| 13 |
type VideoPlayerProps = {
|
| 14 |
videosInfo: VideoInfo[];
|
| 15 |
onVideosReady?: () => void;
|
|
|
|
| 27 |
const [enlargedVideo, setEnlargedVideo] = React.useState<string | null>(null);
|
| 28 |
const [showHiddenMenu, setShowHiddenMenu] = React.useState(false);
|
| 29 |
const [videosReady, setVideosReady] = React.useState(false);
|
| 30 |
+
|
| 31 |
const firstVisibleIdx = videosInfo.findIndex(
|
| 32 |
+
(video) => !hiddenVideos.includes(video.filename),
|
| 33 |
);
|
| 34 |
|
| 35 |
// Tracks the last time value set by the primary video's onTimeUpdate.
|
|
|
|
| 44 |
// Handle videos ready
|
| 45 |
useEffect(() => {
|
| 46 |
let readyCount = 0;
|
| 47 |
+
|
| 48 |
const checkReady = () => {
|
| 49 |
readyCount++;
|
| 50 |
if (readyCount === videosInfo.length && onVideosReady) {
|
|
|
|
| 57 |
videoRefs.current.forEach((video, index) => {
|
| 58 |
if (video) {
|
| 59 |
const info = videosInfo[index];
|
| 60 |
+
|
| 61 |
// Setup segment boundaries
|
| 62 |
if (info.isSegmented) {
|
| 63 |
const handleTimeUpdate = () => {
|
| 64 |
const segmentEnd = info.segmentEnd || video.duration;
|
| 65 |
const segmentStart = info.segmentStart || 0;
|
| 66 |
+
|
| 67 |
+
if (
|
| 68 |
+
video.currentTime >=
|
| 69 |
+
segmentEnd - THRESHOLDS.VIDEO_SEGMENT_BOUNDARY
|
| 70 |
+
) {
|
| 71 |
video.currentTime = segmentStart;
|
| 72 |
// Also update the global time to reset to start
|
| 73 |
if (index === firstVisibleIdx) {
|
|
|
|
| 75 |
}
|
| 76 |
}
|
| 77 |
};
|
| 78 |
+
|
| 79 |
const handleLoadedData = () => {
|
| 80 |
video.currentTime = info.segmentStart || 0;
|
| 81 |
checkReady();
|
|
|
|
| 117 |
}
|
| 118 |
});
|
| 119 |
};
|
| 120 |
+
}, [
|
| 121 |
+
videosInfo,
|
| 122 |
+
onVideosReady,
|
| 123 |
+
setIsPlaying,
|
| 124 |
+
firstVisibleIdx,
|
| 125 |
+
setCurrentTime,
|
| 126 |
+
]);
|
| 127 |
|
| 128 |
// Handle play/pause
|
| 129 |
useEffect(() => {
|
| 130 |
if (!videosReady) return;
|
| 131 |
+
|
| 132 |
videoRefs.current.forEach((video, idx) => {
|
| 133 |
if (video && !hiddenVideos.includes(videosInfo[idx].filename)) {
|
| 134 |
if (isPlaying) {
|
| 135 |
+
video.play().catch((e) => {
|
| 136 |
+
if (e.name !== "AbortError") {
|
| 137 |
console.error("Error playing video");
|
| 138 |
}
|
| 139 |
});
|
|
|
|
| 174 |
// Handle time update from first visible video
|
| 175 |
const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
|
| 176 |
const video = e.target as HTMLVideoElement;
|
| 177 |
+
const videoIndex = videoRefs.current.findIndex((ref) => ref === video);
|
| 178 |
const info = videosInfo[videoIndex];
|
| 179 |
+
|
| 180 |
if (info) {
|
| 181 |
let globalTime = video.currentTime;
|
| 182 |
if (info.isSegmented) {
|
|
|
|
| 192 |
if (info.isSegmented) {
|
| 193 |
const segmentStart = info.segmentStart || 0;
|
| 194 |
const segmentEnd = info.segmentEnd || video.duration;
|
| 195 |
+
|
| 196 |
if (video.currentTime < segmentStart || video.currentTime >= segmentEnd) {
|
| 197 |
video.currentTime = segmentStart;
|
| 198 |
}
|
|
|
|
| 220 |
<button
|
| 221 |
key={filename}
|
| 222 |
className="block w-full text-left px-2 py-1 rounded hover:bg-slate-700 text-slate-100"
|
| 223 |
+
onClick={() =>
|
| 224 |
+
setHiddenVideos((prev) =>
|
| 225 |
+
prev.filter((v) => v !== filename),
|
| 226 |
+
)
|
| 227 |
+
}
|
| 228 |
>
|
| 229 |
{filename}
|
| 230 |
</button>
|
|
|
|
| 238 |
<div className="flex flex-wrap gap-x-2 gap-y-6">
|
| 239 |
{videosInfo.map((info, idx) => {
|
| 240 |
if (hiddenVideos.includes(info.filename)) return null;
|
| 241 |
+
|
| 242 |
const isEnlarged = enlargedVideo === info.filename;
|
| 243 |
const isFirstVisible = idx === firstVisibleIdx;
|
| 244 |
+
|
| 245 |
return (
|
| 246 |
<div
|
| 247 |
key={info.filename}
|
|
|
|
| 257 |
<button
|
| 258 |
title={isEnlarged ? "Minimize" : "Enlarge"}
|
| 259 |
className="ml-2 p-1 hover:bg-slate-700 rounded"
|
| 260 |
+
onClick={() =>
|
| 261 |
+
setEnlargedVideo(isEnlarged ? null : info.filename)
|
| 262 |
+
}
|
| 263 |
>
|
| 264 |
{isEnlarged ? <FaCompress /> : <FaExpand />}
|
| 265 |
</button>
|
| 266 |
<button
|
| 267 |
title="Hide Video"
|
| 268 |
className="ml-1 p-1 hover:bg-slate-700 rounded"
|
| 269 |
+
onClick={() =>
|
| 270 |
+
setHiddenVideos((prev) => [...prev, info.filename])
|
| 271 |
+
}
|
| 272 |
+
disabled={
|
| 273 |
+
videosInfo.filter(
|
| 274 |
+
(v) => !hiddenVideos.includes(v.filename),
|
| 275 |
+
).length === 1
|
| 276 |
+
}
|
| 277 |
>
|
| 278 |
<FaTimes />
|
| 279 |
</button>
|
src/components/videos-player.tsx
CHANGED
|
@@ -178,7 +178,7 @@ export const VideosPlayer = ({
|
|
| 178 |
if (video && video.duration) {
|
| 179 |
const videoIndex = videoRefs.current.findIndex(ref => ref === video);
|
| 180 |
const videoInfo = videosInfo[videoIndex];
|
| 181 |
-
|
| 182 |
if (videoInfo?.isSegmented) {
|
| 183 |
const segmentStart = videoInfo.segmentStart || 0;
|
| 184 |
const globalTime = Math.max(0, video.currentTime - segmentStart);
|
|
@@ -197,18 +197,20 @@ export const VideosPlayer = ({
|
|
| 197 |
const onCanPlayThrough = (videoIndex: number) => {
|
| 198 |
const video = videoRefs.current[videoIndex];
|
| 199 |
const videoInfo = videosInfo[videoIndex];
|
| 200 |
-
|
| 201 |
// Setup video segmentation for v3.0 chunked videos
|
| 202 |
if (video && videoInfo?.isSegmented) {
|
| 203 |
const segmentStart = videoInfo.segmentStart || 0;
|
| 204 |
const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
|
| 205 |
-
|
| 206 |
-
|
| 207 |
// Set initial time to segment start if not already set
|
| 208 |
-
if (
|
|
|
|
|
|
|
|
|
|
| 209 |
video.currentTime = segmentStart;
|
| 210 |
}
|
| 211 |
-
|
| 212 |
// Add event listener to handle segment boundaries
|
| 213 |
const handleTimeUpdate = () => {
|
| 214 |
if (video.currentTime > segmentEnd) {
|
|
@@ -225,7 +227,7 @@ export const VideosPlayer = ({
|
|
| 225 |
video.removeEventListener('timeupdate', handleTimeUpdate);
|
| 226 |
});
|
| 227 |
}
|
| 228 |
-
|
| 229 |
videosReadyCount += 1;
|
| 230 |
if (videosReadyCount === videosInfo.length) {
|
| 231 |
if (typeof onVideosReady === "function") {
|
|
|
|
| 178 |
if (video && video.duration) {
|
| 179 |
const videoIndex = videoRefs.current.findIndex(ref => ref === video);
|
| 180 |
const videoInfo = videosInfo[videoIndex];
|
| 181 |
+
|
| 182 |
if (videoInfo?.isSegmented) {
|
| 183 |
const segmentStart = videoInfo.segmentStart || 0;
|
| 184 |
const globalTime = Math.max(0, video.currentTime - segmentStart);
|
|
|
|
| 197 |
const onCanPlayThrough = (videoIndex: number) => {
|
| 198 |
const video = videoRefs.current[videoIndex];
|
| 199 |
const videoInfo = videosInfo[videoIndex];
|
| 200 |
+
|
| 201 |
// Setup video segmentation for v3.0 chunked videos
|
| 202 |
if (video && videoInfo?.isSegmented) {
|
| 203 |
const segmentStart = videoInfo.segmentStart || 0;
|
| 204 |
const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
|
| 205 |
+
|
|
|
|
| 206 |
// Set initial time to segment start if not already set
|
| 207 |
+
if (
|
| 208 |
+
video.currentTime < segmentStart ||
|
| 209 |
+
video.currentTime > segmentEnd
|
| 210 |
+
) {
|
| 211 |
video.currentTime = segmentStart;
|
| 212 |
}
|
| 213 |
+
|
| 214 |
// Add event listener to handle segment boundaries
|
| 215 |
const handleTimeUpdate = () => {
|
| 216 |
if (video.currentTime > segmentEnd) {
|
|
|
|
| 227 |
video.removeEventListener('timeupdate', handleTimeUpdate);
|
| 228 |
});
|
| 229 |
}
|
| 230 |
+
|
| 231 |
videosReadyCount += 1;
|
| 232 |
if (videosReadyCount === videosInfo.length) {
|
| 233 |
if (typeof onVideosReady === "function") {
|
src/types/chart.types.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Chart and data visualization type definitions
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
// Chart data point structure
|
| 6 |
+
export interface ChartDataPoint {
|
| 7 |
+
timestamp: number;
|
| 8 |
+
[key: string]: number | Record<string, number>; // Hierarchical data
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
// Chart data group
|
| 12 |
+
export type ChartDataGroup = ChartDataPoint[];
|
| 13 |
+
|
| 14 |
+
// Series column definition
|
| 15 |
+
export interface SeriesColumn {
|
| 16 |
+
key: string;
|
| 17 |
+
value: string[]; // Series names
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
// Group statistics for scale calculation
|
| 21 |
+
export interface GroupStats {
|
| 22 |
+
min: number;
|
| 23 |
+
max: number;
|
| 24 |
+
}
|
src/types/dataset.types.ts
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Dataset type definitions for LeRobot datasets
|
| 3 |
+
* Based on the LeRobot dataset format (v2.0, v2.1, v3.0)
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
// Version management
|
| 7 |
+
export type DatasetVersion = "v2.0" | "v2.1" | "v3.0";
|
| 8 |
+
|
| 9 |
+
// Feature data types
|
| 10 |
+
export type FeatureDType = "video" | "float32" | "int32" | "int64" | "bool";
|
| 11 |
+
|
| 12 |
+
// Video-specific feature
|
| 13 |
+
export interface VideoFeature {
|
| 14 |
+
dtype: "video";
|
| 15 |
+
shape: [number, number, number]; // [height, width, channels]
|
| 16 |
+
names: ["height", "width", "channel"];
|
| 17 |
+
video_info?: {
|
| 18 |
+
"video.fps": number;
|
| 19 |
+
"video.codec": string;
|
| 20 |
+
"video.pix_fmt": string;
|
| 21 |
+
"video.is_depth_map": boolean;
|
| 22 |
+
has_audio: boolean;
|
| 23 |
+
};
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
// Numeric feature (state, action, etc.)
|
| 27 |
+
export interface NumericFeature {
|
| 28 |
+
dtype: "float32" | "int32" | "int64";
|
| 29 |
+
shape: number[];
|
| 30 |
+
names: string[] | { motors: string[] } | { [key: string]: string[] } | null;
|
| 31 |
+
fps?: number;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
// Boolean feature
|
| 35 |
+
export interface BooleanFeature {
|
| 36 |
+
dtype: "bool";
|
| 37 |
+
shape: number[];
|
| 38 |
+
names: null;
|
| 39 |
+
fps?: number;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
// Discriminated union for all feature types
|
| 43 |
+
export type Feature = VideoFeature | NumericFeature | BooleanFeature;
|
| 44 |
+
|
| 45 |
+
// Complete dataset metadata
|
| 46 |
+
export interface DatasetMetadata {
|
| 47 |
+
codebase_version: DatasetVersion;
|
| 48 |
+
robot_type: string;
|
| 49 |
+
total_episodes: number;
|
| 50 |
+
total_frames: number;
|
| 51 |
+
total_tasks: number;
|
| 52 |
+
total_videos?: number;
|
| 53 |
+
total_chunks?: number;
|
| 54 |
+
chunks_size: number;
|
| 55 |
+
fps: number;
|
| 56 |
+
splits: Record<string, string>;
|
| 57 |
+
data_path: string;
|
| 58 |
+
video_path: string | null;
|
| 59 |
+
features: Record<string, Feature>;
|
| 60 |
+
data_files_size_in_mb?: number;
|
| 61 |
+
video_files_size_in_mb?: number;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
// Dataset info used in components
|
| 65 |
+
export interface DatasetInfo {
|
| 66 |
+
repoId: string;
|
| 67 |
+
total_frames: number;
|
| 68 |
+
total_episodes: number;
|
| 69 |
+
fps: number;
|
| 70 |
+
}
|
src/types/episode.types.ts
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Episode type definitions for LeRobot datasets
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
import type { DatasetInfo } from "./dataset.types";
|
| 6 |
+
import type { VideoInfo } from "./video.types";
|
| 7 |
+
import type { ChartDataGroup } from "./chart.types";
|
| 8 |
+
|
| 9 |
+
// Episode metadata for v3.0
|
| 10 |
+
export interface EpisodeMetadataV3 {
|
| 11 |
+
episode_index: number | bigint;
|
| 12 |
+
data_chunk_index: number | bigint;
|
| 13 |
+
data_file_index: number | bigint;
|
| 14 |
+
dataset_from_index: number | bigint;
|
| 15 |
+
dataset_to_index: number | bigint;
|
| 16 |
+
video_chunk_index?: number | bigint;
|
| 17 |
+
video_file_index?: number | bigint;
|
| 18 |
+
video_from_timestamp?: number;
|
| 19 |
+
video_to_timestamp?: number;
|
| 20 |
+
length: number | bigint;
|
| 21 |
+
// Per-camera metadata (optional)
|
| 22 |
+
[key: string]: number | bigint | undefined;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
// Episode metadata for v2.x (simpler structure)
|
| 26 |
+
export interface EpisodeMetadataV2 {
|
| 27 |
+
episode_chunk: number;
|
| 28 |
+
episode_index: number;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
// Task metadata
|
| 32 |
+
export interface TaskMetadata {
|
| 33 |
+
task_index: number | bigint;
|
| 34 |
+
task: string;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
// Language instruction data
|
| 38 |
+
export interface LanguageInstruction {
|
| 39 |
+
language_instruction?: string;
|
| 40 |
+
[key: `language_instruction_${number}`]: string | undefined;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
// Episode data returned to components
|
| 44 |
+
export interface EpisodeData {
|
| 45 |
+
datasetInfo: DatasetInfo;
|
| 46 |
+
episodeId: number;
|
| 47 |
+
videosInfo: VideoInfo[];
|
| 48 |
+
chartDataGroups: ChartDataGroup[];
|
| 49 |
+
episodes: number[];
|
| 50 |
+
ignoredColumns: string[];
|
| 51 |
+
duration: number;
|
| 52 |
+
task?: string;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// Raw parquet row structure
|
| 56 |
+
export interface ParquetDataRow {
|
| 57 |
+
timestamp?: number;
|
| 58 |
+
episode_index?: number | bigint;
|
| 59 |
+
frame_index?: number | bigint;
|
| 60 |
+
index?: number | bigint;
|
| 61 |
+
task_index?: number | bigint;
|
| 62 |
+
"observation.state"?: number[];
|
| 63 |
+
action?: number[];
|
| 64 |
+
"next.reward"?: number;
|
| 65 |
+
"next.done"?: boolean;
|
| 66 |
+
language_instruction?: string;
|
| 67 |
+
[key: string]: unknown; // For additional fields
|
| 68 |
+
}
|
src/types/index.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Central export for all type definitions
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
// Dataset types
|
| 6 |
+
export type {
|
| 7 |
+
DatasetVersion,
|
| 8 |
+
FeatureDType,
|
| 9 |
+
VideoFeature,
|
| 10 |
+
NumericFeature,
|
| 11 |
+
BooleanFeature,
|
| 12 |
+
Feature,
|
| 13 |
+
DatasetMetadata,
|
| 14 |
+
DatasetInfo,
|
| 15 |
+
} from "./dataset.types";
|
| 16 |
+
|
| 17 |
+
// Episode types
|
| 18 |
+
export type {
|
| 19 |
+
EpisodeMetadataV3,
|
| 20 |
+
EpisodeMetadataV2,
|
| 21 |
+
TaskMetadata,
|
| 22 |
+
LanguageInstruction,
|
| 23 |
+
EpisodeData,
|
| 24 |
+
ParquetDataRow,
|
| 25 |
+
} from "./episode.types";
|
| 26 |
+
|
| 27 |
+
// Video types
|
| 28 |
+
export type { VideoInfo, AdjacentEpisodeVideos } from "./video.types";
|
| 29 |
+
|
| 30 |
+
// Chart types
|
| 31 |
+
export type {
|
| 32 |
+
ChartDataPoint,
|
| 33 |
+
ChartDataGroup,
|
| 34 |
+
SeriesColumn,
|
| 35 |
+
GroupStats,
|
| 36 |
+
} from "./chart.types";
|
src/types/video.types.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Video type definitions
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
// Video information structure
|
| 6 |
+
export interface VideoInfo {
|
| 7 |
+
filename: string;
|
| 8 |
+
url: string;
|
| 9 |
+
isSegmented?: boolean;
|
| 10 |
+
segmentStart?: number;
|
| 11 |
+
segmentEnd?: number;
|
| 12 |
+
segmentDuration?: number;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
// Adjacent episode video info for preloading
|
| 16 |
+
export interface AdjacentEpisodeVideos {
|
| 17 |
+
episodeId: number;
|
| 18 |
+
videosInfo: VideoInfo[];
|
| 19 |
+
}
|
src/utils/constants.ts
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Centralized constants for the lerobot-dataset-visualizer
|
| 3 |
+
* Eliminates magic numbers and provides single source of truth for configuration
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
// Formatting constants for episode and file indexing
|
| 7 |
+
export const PADDING = {
|
| 8 |
+
EPISODE_CHUNK: 3,
|
| 9 |
+
EPISODE_INDEX: 6,
|
| 10 |
+
FILE_INDEX: 3,
|
| 11 |
+
CHUNK_INDEX: 3,
|
| 12 |
+
} as const;
|
| 13 |
+
|
| 14 |
+
// Numeric thresholds for data processing
|
| 15 |
+
export const THRESHOLDS = {
|
| 16 |
+
SCALE_GROUPING: 2,
|
| 17 |
+
EPSILON: 1e-9,
|
| 18 |
+
VIDEO_SYNC_TOLERANCE: 0.2,
|
| 19 |
+
VIDEO_SEGMENT_BOUNDARY: 0.05,
|
| 20 |
+
} as const;
|
| 21 |
+
|
| 22 |
+
// Chart configuration
|
| 23 |
+
export const CHART_CONFIG = {
|
| 24 |
+
MAX_SERIES_PER_GROUP: 6,
|
| 25 |
+
SERIES_NAME_DELIMITER: " | ",
|
| 26 |
+
} as const;
|
| 27 |
+
|
| 28 |
+
// Video player configuration
|
| 29 |
+
export const VIDEO_PLAYER = {
|
| 30 |
+
JUMP_SECONDS: 5,
|
| 31 |
+
STEP_SIZE: 0.01,
|
| 32 |
+
DEBOUNCE_MS: 200,
|
| 33 |
+
} as const;
|
| 34 |
+
|
| 35 |
+
// HTTP configuration
|
| 36 |
+
export const HTTP = {
|
| 37 |
+
TIMEOUT_MS: 10000,
|
| 38 |
+
} as const;
|
| 39 |
+
|
| 40 |
+
// Excluded columns by dataset version
|
| 41 |
+
export const EXCLUDED_COLUMNS = {
|
| 42 |
+
V2: ["timestamp", "frame_index", "episode_index", "index", "task_index"],
|
| 43 |
+
V3: ["index", "task_index", "episode_index", "frame_index", "next.done"],
|
| 44 |
+
} as const;
|
src/utils/dataProcessing.ts
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Data processing utilities for chart data grouping and transformation
|
| 3 |
+
* Consolidates duplicated logic from fetch-data.ts
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
import { CHART_CONFIG, THRESHOLDS } from "./constants";
|
| 7 |
+
import type { GroupStats } from "@/types";
|
| 8 |
+
|
| 9 |
+
/**
|
| 10 |
+
* Groups row keys by suffix using delimiter
|
| 11 |
+
* Consolidates logic from lines 407-438 and 962-993 in fetch-data.ts
|
| 12 |
+
*
|
| 13 |
+
* @param row - Row data with numeric values
|
| 14 |
+
* @returns Grouped row data with nested structure for multi-key groups
|
| 15 |
+
*/
|
| 16 |
+
export function groupRowBySuffix(
|
| 17 |
+
row: Record<string, number>,
|
| 18 |
+
): Record<string, number | Record<string, number>> {
|
| 19 |
+
const result: Record<string, number | Record<string, number>> = {};
|
| 20 |
+
const suffixGroups: Record<string, Record<string, number>> = {};
|
| 21 |
+
|
| 22 |
+
for (const [key, value] of Object.entries(row)) {
|
| 23 |
+
if (key === "timestamp") {
|
| 24 |
+
result["timestamp"] = value;
|
| 25 |
+
continue;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
|
| 29 |
+
if (parts.length === 2) {
|
| 30 |
+
const [prefix, suffix] = parts;
|
| 31 |
+
if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
|
| 32 |
+
suffixGroups[suffix][prefix] = value;
|
| 33 |
+
} else {
|
| 34 |
+
result[key] = value;
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
for (const [suffix, group] of Object.entries(suffixGroups)) {
|
| 39 |
+
const keys = Object.keys(group);
|
| 40 |
+
if (keys.length === 1) {
|
| 41 |
+
// Use the full original name as the key
|
| 42 |
+
const fullName = `${keys[0]}${CHART_CONFIG.SERIES_NAME_DELIMITER}${suffix}`;
|
| 43 |
+
result[fullName] = group[keys[0]];
|
| 44 |
+
} else {
|
| 45 |
+
result[suffix] = group;
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
return result;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/**
|
| 53 |
+
* Build suffix groups map from numeric keys
|
| 54 |
+
* Consolidates logic from lines 328-335 and 880-887 in fetch-data.ts
|
| 55 |
+
*
|
| 56 |
+
* @param numericKeys - Array of numeric column keys (excluding timestamp)
|
| 57 |
+
* @returns Map of suffix to array of keys with that suffix
|
| 58 |
+
*/
|
| 59 |
+
export function buildSuffixGroupsMap(
|
| 60 |
+
numericKeys: string[],
|
| 61 |
+
): Record<string, string[]> {
|
| 62 |
+
const suffixGroupsMap: Record<string, string[]> = {};
|
| 63 |
+
|
| 64 |
+
for (const key of numericKeys) {
|
| 65 |
+
const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
|
| 66 |
+
const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
|
| 67 |
+
if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
|
| 68 |
+
suffixGroupsMap[suffix].push(key);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
return suffixGroupsMap;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/**
|
| 75 |
+
* Compute min/max statistics for suffix groups
|
| 76 |
+
* Consolidates logic from lines 338-353 and 890-905 in fetch-data.ts
|
| 77 |
+
*
|
| 78 |
+
* @param chartData - Array of chart data rows
|
| 79 |
+
* @param suffixGroups - Array of suffix groups (each group is an array of keys)
|
| 80 |
+
* @returns Map of group ID to min/max statistics
|
| 81 |
+
*/
|
| 82 |
+
export function computeGroupStats(
|
| 83 |
+
chartData: Record<string, number>[],
|
| 84 |
+
suffixGroups: string[][],
|
| 85 |
+
): Record<string, GroupStats> {
|
| 86 |
+
const groupStats: Record<string, GroupStats> = {};
|
| 87 |
+
|
| 88 |
+
suffixGroups.forEach((group) => {
|
| 89 |
+
let min = Infinity;
|
| 90 |
+
let max = -Infinity;
|
| 91 |
+
|
| 92 |
+
for (const row of chartData) {
|
| 93 |
+
for (const key of group) {
|
| 94 |
+
const v = row[key];
|
| 95 |
+
if (typeof v === "number" && !isNaN(v)) {
|
| 96 |
+
if (v < min) min = v;
|
| 97 |
+
if (v > max) max = v;
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
// Use the first key in the group as the group id
|
| 103 |
+
groupStats[group[0]] = { min, max };
|
| 104 |
+
});
|
| 105 |
+
|
| 106 |
+
return groupStats;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
/**
|
| 110 |
+
* Group suffix groups by similar scale using logarithmic comparison
|
| 111 |
+
* Consolidates logic from lines 356-387 and 907-945 in fetch-data.ts
|
| 112 |
+
*
|
| 113 |
+
* This complex algorithm groups data series that have similar scales together,
|
| 114 |
+
* making charts more readable by avoiding mixing vastly different value ranges.
|
| 115 |
+
*
|
| 116 |
+
* @param suffixGroups - Array of suffix groups to analyze
|
| 117 |
+
* @param groupStats - Statistics for each group
|
| 118 |
+
* @returns Map of group ID to array of suffix groups with similar scales
|
| 119 |
+
*/
|
| 120 |
+
export function groupByScale(
|
| 121 |
+
suffixGroups: string[][],
|
| 122 |
+
groupStats: Record<string, GroupStats>,
|
| 123 |
+
): Record<string, string[][]> {
|
| 124 |
+
const scaleGroups: Record<string, string[][]> = {};
|
| 125 |
+
const used = new Set<string>();
|
| 126 |
+
|
| 127 |
+
for (const group of suffixGroups) {
|
| 128 |
+
const groupId = group[0];
|
| 129 |
+
if (used.has(groupId)) continue;
|
| 130 |
+
|
| 131 |
+
const { min, max } = groupStats[groupId];
|
| 132 |
+
if (!isFinite(min) || !isFinite(max)) continue;
|
| 133 |
+
|
| 134 |
+
const logMin = Math.log10(Math.abs(min) + THRESHOLDS.EPSILON);
|
| 135 |
+
const logMax = Math.log10(Math.abs(max) + THRESHOLDS.EPSILON);
|
| 136 |
+
const unit: string[][] = [group];
|
| 137 |
+
used.add(groupId);
|
| 138 |
+
|
| 139 |
+
for (const other of suffixGroups) {
|
| 140 |
+
const otherId = other[0];
|
| 141 |
+
if (used.has(otherId) || otherId === groupId) continue;
|
| 142 |
+
|
| 143 |
+
const { min: omin, max: omax } = groupStats[otherId];
|
| 144 |
+
if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
|
| 145 |
+
|
| 146 |
+
const ologMin = Math.log10(Math.abs(omin) + THRESHOLDS.EPSILON);
|
| 147 |
+
const ologMax = Math.log10(Math.abs(omax) + THRESHOLDS.EPSILON);
|
| 148 |
+
|
| 149 |
+
if (
|
| 150 |
+
Math.abs(logMin - ologMin) <= THRESHOLDS.SCALE_GROUPING &&
|
| 151 |
+
Math.abs(logMax - ologMax) <= THRESHOLDS.SCALE_GROUPING
|
| 152 |
+
) {
|
| 153 |
+
unit.push(other);
|
| 154 |
+
used.add(otherId);
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
scaleGroups[groupId] = unit;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
return scaleGroups;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
/**
|
| 165 |
+
* Flatten scale groups into chart groups with size limits
|
| 166 |
+
* Consolidates logic from lines 388-404 and 946-962 in fetch-data.ts
|
| 167 |
+
*
|
| 168 |
+
* Large groups are split into subgroups to avoid overcrowded charts.
|
| 169 |
+
*
|
| 170 |
+
* @param scaleGroups - Map of scale groups
|
| 171 |
+
* @returns Array of chart groups (each group is an array of series keys)
|
| 172 |
+
*/
|
| 173 |
+
export function flattenScaleGroups(
|
| 174 |
+
scaleGroups: Record<string, string[][]>,
|
| 175 |
+
): string[][] {
|
| 176 |
+
return Object.values(scaleGroups)
|
| 177 |
+
.sort((a, b) => b.length - a.length)
|
| 178 |
+
.flatMap((suffixGroupArr) => {
|
| 179 |
+
const merged = suffixGroupArr.flat();
|
| 180 |
+
if (merged.length > CHART_CONFIG.MAX_SERIES_PER_GROUP) {
|
| 181 |
+
const subgroups: string[][] = [];
|
| 182 |
+
for (
|
| 183 |
+
let i = 0;
|
| 184 |
+
i < merged.length;
|
| 185 |
+
i += CHART_CONFIG.MAX_SERIES_PER_GROUP
|
| 186 |
+
) {
|
| 187 |
+
subgroups.push(
|
| 188 |
+
merged.slice(i, i + CHART_CONFIG.MAX_SERIES_PER_GROUP),
|
| 189 |
+
);
|
| 190 |
+
}
|
| 191 |
+
return subgroups;
|
| 192 |
+
}
|
| 193 |
+
return [merged];
|
| 194 |
+
});
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/**
|
| 198 |
+
* Complete pipeline to process chart data into organized groups
|
| 199 |
+
* Combines all the above functions into a single pipeline
|
| 200 |
+
*
|
| 201 |
+
* @param seriesNames - All series names including timestamp
|
| 202 |
+
* @param chartData - Array of chart data rows
|
| 203 |
+
* @returns Array of chart groups ready for visualization
|
| 204 |
+
*/
|
| 205 |
+
export function processChartDataGroups(
|
| 206 |
+
seriesNames: string[],
|
| 207 |
+
chartData: Record<string, number>[],
|
| 208 |
+
): string[][] {
|
| 209 |
+
// 1. Build suffix groups
|
| 210 |
+
const numericKeys = seriesNames.filter((k) => k !== "timestamp");
|
| 211 |
+
const suffixGroupsMap = buildSuffixGroupsMap(numericKeys);
|
| 212 |
+
const suffixGroups = Object.values(suffixGroupsMap);
|
| 213 |
+
|
| 214 |
+
// 2. Compute statistics
|
| 215 |
+
const groupStats = computeGroupStats(chartData, suffixGroups);
|
| 216 |
+
|
| 217 |
+
// 3. Group by scale
|
| 218 |
+
const scaleGroups = groupByScale(suffixGroups, groupStats);
|
| 219 |
+
|
| 220 |
+
// 4. Flatten into chart groups
|
| 221 |
+
return flattenScaleGroups(scaleGroups);
|
| 222 |
+
}
|
src/utils/languageInstructions.ts
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Language instruction extraction utilities
|
| 3 |
+
* Consolidates duplicated logic from fetch-data.ts
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
/**
|
| 7 |
+
* Extract language instructions from episode data rows
|
| 8 |
+
* Consolidates logic from lines 232-258 and 573-626 in fetch-data.ts
|
| 9 |
+
*
|
| 10 |
+
* This function checks for language_instruction fields in the provided rows.
|
| 11 |
+
* It supports both single and numbered language instruction fields
|
| 12 |
+
* (language_instruction, language_instruction_2, language_instruction_3, etc.)
|
| 13 |
+
*
|
| 14 |
+
* @param episodeData - Array of episode data rows
|
| 15 |
+
* @param sampleIndices - Indices of rows to check (default: [0] for first row only)
|
| 16 |
+
* @returns Concatenated language instructions or undefined if none found
|
| 17 |
+
*/
|
| 18 |
+
export function extractLanguageInstructions(
|
| 19 |
+
episodeData: Record<string, unknown>[],
|
| 20 |
+
sampleIndices: number[] = [0],
|
| 21 |
+
): string | undefined {
|
| 22 |
+
if (episodeData.length === 0) return undefined;
|
| 23 |
+
|
| 24 |
+
const languageInstructions: string[] = [];
|
| 25 |
+
|
| 26 |
+
// Check specified rows for instructions
|
| 27 |
+
for (const idx of sampleIndices) {
|
| 28 |
+
if (idx >= episodeData.length) continue;
|
| 29 |
+
|
| 30 |
+
const row = episodeData[idx];
|
| 31 |
+
|
| 32 |
+
// Check for primary language_instruction field
|
| 33 |
+
if (
|
| 34 |
+
"language_instruction" in row &&
|
| 35 |
+
typeof row.language_instruction === "string" &&
|
| 36 |
+
row.language_instruction
|
| 37 |
+
) {
|
| 38 |
+
languageInstructions.push(row.language_instruction);
|
| 39 |
+
|
| 40 |
+
// Check for numbered fields (language_instruction_2, _3, etc.)
|
| 41 |
+
let instructionNum = 2;
|
| 42 |
+
let key = `language_instruction_${instructionNum}`;
|
| 43 |
+
while (key in row && typeof row[key] === "string") {
|
| 44 |
+
languageInstructions.push(row[key] as string);
|
| 45 |
+
instructionNum++;
|
| 46 |
+
key = `language_instruction_${instructionNum}`;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
// If we found instructions, stop searching other indices
|
| 50 |
+
if (languageInstructions.length > 0) break;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
return languageInstructions.length > 0
|
| 55 |
+
? languageInstructions.join("\n")
|
| 56 |
+
: undefined;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
/**
|
| 60 |
+
* Extract task from task_index by looking up in tasks metadata
|
| 61 |
+
* Helper function for task extraction with proper type handling
|
| 62 |
+
*
|
| 63 |
+
* @param taskIndex - Task index (can be BigInt or number)
|
| 64 |
+
* @param tasksData - Array of task metadata objects
|
| 65 |
+
* @returns Task string or undefined if not found
|
| 66 |
+
*/
|
| 67 |
+
export function extractTaskFromMetadata(
|
| 68 |
+
taskIndex: unknown,
|
| 69 |
+
tasksData: Record<string, unknown>[],
|
| 70 |
+
): string | undefined {
|
| 71 |
+
// Convert BigInt to number for comparison
|
| 72 |
+
const taskIndexNum =
|
| 73 |
+
typeof taskIndex === "bigint"
|
| 74 |
+
? Number(taskIndex)
|
| 75 |
+
: typeof taskIndex === "number"
|
| 76 |
+
? taskIndex
|
| 77 |
+
: undefined;
|
| 78 |
+
|
| 79 |
+
if (taskIndexNum === undefined || taskIndexNum < 0) {
|
| 80 |
+
return undefined;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
if (taskIndexNum >= tasksData.length) {
|
| 84 |
+
return undefined;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
const taskData = tasksData[taskIndexNum];
|
| 88 |
+
|
| 89 |
+
// Extract task from various possible fields
|
| 90 |
+
if (
|
| 91 |
+
taskData &&
|
| 92 |
+
"__index_level_0__" in taskData &&
|
| 93 |
+
typeof taskData.__index_level_0__ === "string"
|
| 94 |
+
) {
|
| 95 |
+
return taskData.__index_level_0__;
|
| 96 |
+
} else if (
|
| 97 |
+
taskData &&
|
| 98 |
+
"task" in taskData &&
|
| 99 |
+
typeof taskData.task === "string"
|
| 100 |
+
) {
|
| 101 |
+
return taskData.task;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
return undefined;
|
| 105 |
+
}
|
src/utils/parquetUtils.ts
CHANGED
|
@@ -36,19 +36,19 @@ export async function fetchJson<T>(url: string): Promise<T> {
|
|
| 36 |
|
| 37 |
export function formatStringWithVars(
|
| 38 |
format: string,
|
| 39 |
-
vars: Record<string, string>,
|
| 40 |
): string {
|
| 41 |
-
return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => vars[key]);
|
| 42 |
}
|
| 43 |
|
| 44 |
// Fetch and parse the Parquet file
|
| 45 |
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
|
| 46 |
const res = await fetch(url);
|
| 47 |
-
|
| 48 |
if (!res.ok) {
|
| 49 |
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
| 50 |
}
|
| 51 |
-
|
| 52 |
return res.arrayBuffer();
|
| 53 |
}
|
| 54 |
|
|
@@ -64,7 +64,7 @@ export async function readParquetColumn(
|
|
| 64 |
columns: columns.length > 0 ? columns : undefined,
|
| 65 |
onComplete: (data: unknown[][]) => {
|
| 66 |
resolve(data);
|
| 67 |
-
}
|
| 68 |
});
|
| 69 |
} catch (error) {
|
| 70 |
reject(error);
|
|
@@ -94,12 +94,12 @@ export function getRows(currentFrameData: unknown[], columns: ColumnInfo[]) {
|
|
| 94 |
return [];
|
| 95 |
}
|
| 96 |
|
| 97 |
-
const rows = [];
|
| 98 |
const nRows = Math.max(...columns.map((column) => column.value.length));
|
| 99 |
let rowIndex = 0;
|
| 100 |
|
| 101 |
while (rowIndex < nRows) {
|
| 102 |
-
const row = [];
|
| 103 |
// number of states may NOT match number of actions. In this case, we null-pad the 2D array
|
| 104 |
const nullCell = { isNull: true };
|
| 105 |
// row consists of [state value, action value]
|
|
|
|
| 36 |
|
| 37 |
export function formatStringWithVars(
|
| 38 |
format: string,
|
| 39 |
+
vars: Record<string, string | number>,
|
| 40 |
): string {
|
| 41 |
+
return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => String(vars[key]));
|
| 42 |
}
|
| 43 |
|
| 44 |
// Fetch and parse the Parquet file
|
| 45 |
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
|
| 46 |
const res = await fetch(url);
|
| 47 |
+
|
| 48 |
if (!res.ok) {
|
| 49 |
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
| 50 |
}
|
| 51 |
+
|
| 52 |
return res.arrayBuffer();
|
| 53 |
}
|
| 54 |
|
|
|
|
| 64 |
columns: columns.length > 0 ? columns : undefined,
|
| 65 |
onComplete: (data: unknown[][]) => {
|
| 66 |
resolve(data);
|
| 67 |
+
},
|
| 68 |
});
|
| 69 |
} catch (error) {
|
| 70 |
reject(error);
|
|
|
|
| 94 |
return [];
|
| 95 |
}
|
| 96 |
|
| 97 |
+
const rows: Array<Array<{ isNull: true } | unknown>> = [];
|
| 98 |
const nRows = Math.max(...columns.map((column) => column.value.length));
|
| 99 |
let rowIndex = 0;
|
| 100 |
|
| 101 |
while (rowIndex < nRows) {
|
| 102 |
+
const row: Array<{ isNull: true } | unknown> = [];
|
| 103 |
// number of states may NOT match number of actions. In this case, we null-pad the 2D array
|
| 104 |
const nullCell = { isNull: true };
|
| 105 |
// row consists of [state value, action value]
|
src/utils/stringFormatting.ts
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* String formatting utilities for path construction
|
| 3 |
+
* Consolidates repeated padding and path building logic
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
import { PADDING } from "./constants";
|
| 7 |
+
|
| 8 |
+
/**
|
| 9 |
+
* Pad number to specified length with leading zeros
|
| 10 |
+
*
|
| 11 |
+
* @param num - Number to pad
|
| 12 |
+
* @param length - Desired string length
|
| 13 |
+
* @returns Zero-padded string
|
| 14 |
+
*/
|
| 15 |
+
export function padNumber(num: number, length: number): string {
|
| 16 |
+
return num.toString().padStart(length, "0");
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/**
|
| 20 |
+
* Format episode chunk index with standard padding
|
| 21 |
+
*
|
| 22 |
+
* @param chunkIndex - Chunk index number
|
| 23 |
+
* @returns Padded chunk index string (e.g., "001")
|
| 24 |
+
*/
|
| 25 |
+
export function formatEpisodeChunk(chunkIndex: number): string {
|
| 26 |
+
return padNumber(chunkIndex, PADDING.EPISODE_CHUNK);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
/**
|
| 30 |
+
* Format episode index with standard padding
|
| 31 |
+
*
|
| 32 |
+
* @param episodeIndex - Episode index number
|
| 33 |
+
* @returns Padded episode index string (e.g., "000042")
|
| 34 |
+
*/
|
| 35 |
+
export function formatEpisodeIndex(episodeIndex: number): string {
|
| 36 |
+
return padNumber(episodeIndex, PADDING.EPISODE_INDEX);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
/**
|
| 40 |
+
* Format file index with standard padding
|
| 41 |
+
*
|
| 42 |
+
* @param fileIndex - File index number
|
| 43 |
+
* @returns Padded file index string (e.g., "001")
|
| 44 |
+
*/
|
| 45 |
+
export function formatFileIndex(fileIndex: number): string {
|
| 46 |
+
return padNumber(fileIndex, PADDING.FILE_INDEX);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
/**
|
| 50 |
+
* Format chunk index with standard padding
|
| 51 |
+
*
|
| 52 |
+
* @param chunkIndex - Chunk index number
|
| 53 |
+
* @returns Padded chunk index string (e.g., "001")
|
| 54 |
+
*/
|
| 55 |
+
export function formatChunkIndex(chunkIndex: number): string {
|
| 56 |
+
return padNumber(chunkIndex, PADDING.CHUNK_INDEX);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
/**
|
| 60 |
+
* Build video path for v3 datasets
|
| 61 |
+
*
|
| 62 |
+
* @param videoKey - Video key/name (e.g., "observation.image")
|
| 63 |
+
* @param chunkIndex - Data chunk index
|
| 64 |
+
* @param fileIndex - File index within chunk
|
| 65 |
+
* @returns Formatted video path (e.g., "videos/observation.image/chunk-001/file-000.mp4")
|
| 66 |
+
*/
|
| 67 |
+
export function buildV3VideoPath(
|
| 68 |
+
videoKey: string,
|
| 69 |
+
chunkIndex: number,
|
| 70 |
+
fileIndex: number,
|
| 71 |
+
): string {
|
| 72 |
+
return `videos/${videoKey}/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.mp4`;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
/**
|
| 76 |
+
* Build data path for v3 datasets
|
| 77 |
+
*
|
| 78 |
+
* @param chunkIndex - Data chunk index
|
| 79 |
+
* @param fileIndex - File index within chunk
|
| 80 |
+
* @returns Formatted data path (e.g., "data/chunk-001/file-000.parquet")
|
| 81 |
+
*/
|
| 82 |
+
export function buildV3DataPath(chunkIndex: number, fileIndex: number): string {
|
| 83 |
+
return `data/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.parquet`;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/**
|
| 87 |
+
* Build episodes metadata path for v3 datasets
|
| 88 |
+
*
|
| 89 |
+
* @param chunkIndex - Episode chunk index
|
| 90 |
+
* @param fileIndex - File index within chunk
|
| 91 |
+
* @returns Formatted episodes metadata path (e.g., "meta/episodes/chunk-001/file-000.parquet")
|
| 92 |
+
*/
|
| 93 |
+
export function buildV3EpisodesMetadataPath(
|
| 94 |
+
chunkIndex: number,
|
| 95 |
+
fileIndex: number,
|
| 96 |
+
): string {
|
| 97 |
+
return `meta/episodes/chunk-${formatChunkIndex(chunkIndex)}/file-${formatFileIndex(fileIndex)}.parquet`;
|
| 98 |
+
}
|
src/utils/typeGuards.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Type guard utilities for safe type narrowing
|
| 3 |
+
* Replaces unsafe type assertions throughout the codebase
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
/**
|
| 7 |
+
* Type guard for BigInt values
|
| 8 |
+
*
|
| 9 |
+
* @param value - Value to check
|
| 10 |
+
* @returns True if value is a BigInt
|
| 11 |
+
*/
|
| 12 |
+
export function isBigInt(value: unknown): value is bigint {
|
| 13 |
+
return typeof value === "bigint";
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
/**
|
| 17 |
+
* Safe BigInt to number conversion
|
| 18 |
+
* Handles both BigInt and number inputs gracefully
|
| 19 |
+
*
|
| 20 |
+
* @param value - Value to convert (can be BigInt, number, or other)
|
| 21 |
+
* @param fallback - Fallback value if conversion fails (default: 0)
|
| 22 |
+
* @returns Number value or fallback
|
| 23 |
+
*/
|
| 24 |
+
export function bigIntToNumber(value: unknown, fallback: number = 0): number {
|
| 25 |
+
if (typeof value === "bigint") {
|
| 26 |
+
return Number(value);
|
| 27 |
+
}
|
| 28 |
+
if (typeof value === "number") {
|
| 29 |
+
return value;
|
| 30 |
+
}
|
| 31 |
+
return fallback;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
/**
|
| 35 |
+
* Type guard for numeric values (including BigInt)
|
| 36 |
+
*
|
| 37 |
+
* @param value - Value to check
|
| 38 |
+
* @returns True if value is a number or BigInt
|
| 39 |
+
*/
|
| 40 |
+
export function isNumeric(value: unknown): value is number | bigint {
|
| 41 |
+
return typeof value === "number" || typeof value === "bigint";
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
/**
|
| 45 |
+
* Type guard for valid task index
|
| 46 |
+
* Ensures the value is a non-negative integer
|
| 47 |
+
*
|
| 48 |
+
* @param value - Value to check
|
| 49 |
+
* @returns True if value is a valid task index (non-negative number)
|
| 50 |
+
*/
|
| 51 |
+
export function isValidTaskIndex(value: unknown): value is number {
|
| 52 |
+
const num = bigIntToNumber(value, -1);
|
| 53 |
+
return num >= 0 && Number.isInteger(num);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/**
|
| 57 |
+
* Type guard for HTMLVideoElement
|
| 58 |
+
*
|
| 59 |
+
* @param element - Element to check
|
| 60 |
+
* @returns True if element is an HTMLVideoElement
|
| 61 |
+
*/
|
| 62 |
+
export function isVideoElement(element: unknown): element is HTMLVideoElement {
|
| 63 |
+
return element instanceof HTMLVideoElement;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
/**
|
| 67 |
+
* Safe string conversion
|
| 68 |
+
* Converts any value to a string safely
|
| 69 |
+
*
|
| 70 |
+
* @param value - Value to convert
|
| 71 |
+
* @returns String representation of the value
|
| 72 |
+
*/
|
| 73 |
+
export function toString(value: unknown): string {
|
| 74 |
+
if (typeof value === "string") return value;
|
| 75 |
+
if (value === null || value === undefined) return "";
|
| 76 |
+
return String(value);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/**
|
| 80 |
+
* Type guard for string values
|
| 81 |
+
*
|
| 82 |
+
* @param value - Value to check
|
| 83 |
+
* @returns True if value is a non-empty string
|
| 84 |
+
*/
|
| 85 |
+
export function isNonEmptyString(value: unknown): value is string {
|
| 86 |
+
return typeof value === "string" && value.length > 0;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
/**
|
| 90 |
+
* Type guard for objects
|
| 91 |
+
*
|
| 92 |
+
* @param value - Value to check
|
| 93 |
+
* @returns True if value is a non-null object
|
| 94 |
+
*/
|
| 95 |
+
export function isObject(value: unknown): value is Record<string, unknown> {
|
| 96 |
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
/**
|
| 100 |
+
* Safe property access with type guard
|
| 101 |
+
* Checks if an object has a property and the property value matches the type guard
|
| 102 |
+
*
|
| 103 |
+
* @param obj - Object to check
|
| 104 |
+
* @param key - Property key to check
|
| 105 |
+
* @param typeGuard - Type guard function for the property value
|
| 106 |
+
* @returns True if property exists and passes type guard
|
| 107 |
+
*/
|
| 108 |
+
export function hasPropertyOfType<T>(
|
| 109 |
+
obj: unknown,
|
| 110 |
+
key: string,
|
| 111 |
+
typeGuard: (value: unknown) => value is T,
|
| 112 |
+
): obj is Record<string, unknown> & { [K in typeof key]: T } {
|
| 113 |
+
return isObject(obj) && key in obj && typeGuard(obj[key]);
|
| 114 |
+
}
|
src/utils/versionUtils.ts
CHANGED
|
@@ -2,7 +2,8 @@
|
|
| 2 |
* Utility functions for checking dataset version compatibility
|
| 3 |
*/
|
| 4 |
|
| 5 |
-
const DATASET_URL =
|
|
|
|
| 6 |
|
| 7 |
/**
|
| 8 |
* Dataset information structure from info.json
|
|
@@ -44,17 +45,18 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
|
| 44 |
|
| 45 |
try {
|
| 46 |
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
|
| 47 |
-
|
| 48 |
const controller = new AbortController();
|
| 49 |
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
| 50 |
|
| 51 |
const response = await fetch(testUrl, {
|
| 52 |
method: "GET",
|
| 53 |
-
|
|
|
|
| 54 |
});
|
| 55 |
-
|
| 56 |
clearTimeout(timeoutId);
|
| 57 |
-
|
| 58 |
if (!response.ok) {
|
| 59 |
throw new Error(`Failed to fetch dataset info: ${response.status}`);
|
| 60 |
}
|
|
@@ -62,7 +64,9 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
|
| 62 |
const data = await response.json();
|
| 63 |
|
| 64 |
if (!data.features) {
|
| 65 |
-
throw new Error(
|
|
|
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
|
|
@@ -73,7 +77,7 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
|
| 73 |
}
|
| 74 |
throw new Error(
|
| 75 |
`Dataset ${repoId} is not compatible with this visualizer. ` +
|
| 76 |
-
|
| 77 |
);
|
| 78 |
}
|
| 79 |
}
|
|
@@ -105,7 +109,10 @@ export async function getDatasetVersion(repoId: string): Promise<string> {
|
|
| 105 |
return version;
|
| 106 |
}
|
| 107 |
|
| 108 |
-
export function buildVersionedUrl(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
|
| 110 |
}
|
| 111 |
-
|
|
|
|
| 2 |
* Utility functions for checking dataset version compatibility
|
| 3 |
*/
|
| 4 |
|
| 5 |
+
const DATASET_URL =
|
| 6 |
+
process.env.DATASET_URL || "https://huggingface.co/datasets";
|
| 7 |
|
| 8 |
/**
|
| 9 |
* Dataset information structure from info.json
|
|
|
|
| 45 |
|
| 46 |
try {
|
| 47 |
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
|
| 48 |
+
|
| 49 |
const controller = new AbortController();
|
| 50 |
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
| 51 |
|
| 52 |
const response = await fetch(testUrl, {
|
| 53 |
method: "GET",
|
| 54 |
+
cache: "no-store",
|
| 55 |
+
signal: controller.signal,
|
| 56 |
});
|
| 57 |
+
|
| 58 |
clearTimeout(timeoutId);
|
| 59 |
+
|
| 60 |
if (!response.ok) {
|
| 61 |
throw new Error(`Failed to fetch dataset info: ${response.status}`);
|
| 62 |
}
|
|
|
|
| 64 |
const data = await response.json();
|
| 65 |
|
| 66 |
if (!data.features) {
|
| 67 |
+
throw new Error(
|
| 68 |
+
"Dataset info.json does not have the expected features structure",
|
| 69 |
+
);
|
| 70 |
}
|
| 71 |
|
| 72 |
datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
|
|
|
|
| 77 |
}
|
| 78 |
throw new Error(
|
| 79 |
`Dataset ${repoId} is not compatible with this visualizer. ` +
|
| 80 |
+
"Failed to read dataset information from the main revision.",
|
| 81 |
);
|
| 82 |
}
|
| 83 |
}
|
|
|
|
| 109 |
return version;
|
| 110 |
}
|
| 111 |
|
| 112 |
+
export function buildVersionedUrl(
|
| 113 |
+
repoId: string,
|
| 114 |
+
version: string,
|
| 115 |
+
path: string,
|
| 116 |
+
): string {
|
| 117 |
return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
|
| 118 |
}
|
|
|