ibm-granite-speech-4.1-2b-onnx / granite_export_metadata.json
smcleod's picture
Add files using upload-large-folder tool
146f0ba verified
{
"variant": "base",
"upstream": {
"repo": "ibm-granite/granite-speech-4.1-2b",
"url": "https://huggingface.co/ibm-granite/granite-speech-4.1-2b",
"license": "Apache-2.0"
},
"topology": "encoder + prompt_encode + decode_step + embed_tokens (autoregressive)",
"graphs": [
{
"name": "fp32/encoder.onnx",
"sidecar": "fp32/encoder.onnx_data",
"precision": "fp32",
"size_bytes": 912937,
"sidecar_size_bytes": 1903334768,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "input_features",
"shape": [
"B",
"T",
160
],
"dtype": "float32"
}
],
"outputs": [
{
"name": "audio_embeds",
"shape": [
"B",
"T_audio",
2048
],
"dtype": "float32"
},
{
"name": "audio_embed_sizes",
"shape": [
"B"
],
"dtype": "int64"
}
]
},
{
"name": "int8/encoder.onnx",
"sidecar": "int8/encoder.onnx_data",
"precision": "int8-weights-only",
"size_bytes": 2681169,
"sidecar_size_bytes": 502619504,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "input_features",
"shape": [
"B",
"T",
160
],
"dtype": "float32"
}
],
"outputs": [
{
"name": "audio_embeds",
"shape": [
"B",
"T_audio",
2048
],
"dtype": "float32"
},
{
"name": "audio_embed_sizes",
"shape": [
"B"
],
"dtype": "int64"
}
]
},
{
"name": "fp16w/encoder.onnx",
"sidecar": "fp16w/encoder.onnx_data",
"precision": "fp16-weights-fp32-compute",
"size_bytes": 1224882,
"sidecar_size_bytes": 951668080,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "input_features",
"shape": [
"B",
"T",
160
],
"dtype": "float32"
}
],
"outputs": [
{
"name": "audio_embeds",
"shape": [
"B",
"T_audio",
2048
],
"dtype": "float32"
},
{
"name": "audio_embed_sizes",
"shape": [
"B"
],
"dtype": "int64"
}
]
},
{
"name": "fp32/prompt_encode.onnx",
"sidecar": "fp32/prompt_encode.onnx_data",
"precision": "fp32",
"size_bytes": 1843351,
"sidecar_size_bytes": 6527008768,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
"N",
2048
],
"dtype": "float32"
},
{
"name": "position_ids",
"shape": [
"B",
"N"
],
"dtype": "int64"
},
{
"name": "attention_mask",
"shape": [
"B",
1,
"N",
"N"
],
"dtype": "float32"
}
],
"outputs": [
{
"name": "logits",
"shape": [
"B",
"N",
100353
],
"dtype": "float32"
},
{
"name": "present.{i}.{key,value}",
"shape": [
"B",
4,
"N",
128
],
"dtype": "float32",
"note": "40 layers x 2 (key, value) = 80 KV-cache outputs"
}
]
},
{
"name": "int8/prompt_encode.onnx",
"sidecar": "int8/prompt_encode.onnx_data",
"precision": "int8-weights-only",
"size_bytes": 6417450,
"sidecar_size_bytes": 1632249856,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
"N",
2048
],
"dtype": "float32"
},
{
"name": "position_ids",
"shape": [
"B",
"N"
],
"dtype": "int64"
},
{
"name": "attention_mask",
"shape": [
"B",
1,
"N",
"N"
],
"dtype": "float32"
}
],
"outputs": [
{
"name": "logits",
"shape": [
"B",
"N",
100353
],
"dtype": "float32"
},
{
"name": "present.{i}.{key,value}",
"shape": [
"B",
4,
"N",
128
],
"dtype": "float32",
"note": "40 layers x 2 (key, value) = 80 KV-cache outputs"
}
]
},
{
"name": "fp16w/prompt_encode.onnx",
"sidecar": "fp16w/prompt_encode.onnx_data",
"precision": "fp16-weights-fp32-compute",
"size_bytes": 2573298,
"sidecar_size_bytes": 3263504384,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
"N",
2048
],
"dtype": "float32"
},
{
"name": "position_ids",
"shape": [
"B",
"N"
],
"dtype": "int64"
},
{
"name": "attention_mask",
"shape": [
"B",
1,
"N",
"N"
],
"dtype": "float32"
}
],
"outputs": [
{
"name": "logits",
"shape": [
"B",
"N",
100353
],
"dtype": "float32"
},
{
"name": "present.{i}.{key,value}",
"shape": [
"B",
4,
"N",
128
],
"dtype": "float32",
"note": "40 layers x 2 (key, value) = 80 KV-cache outputs"
}
]
},
{
"name": "fp32/decode_step.onnx",
"sidecar": "fp32/decode_step.onnx_data",
"precision": "fp32",
"size_bytes": 1849771,
"sidecar_size_bytes": 6527008768,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
1,
2048
],
"dtype": "float32"
},
{
"name": "position_ids",
"shape": [
"B",
1
],
"dtype": "int64"
},
{
"name": "attention_mask",
"shape": [
"B",
1,
1,
"T_total"
],
"dtype": "float32"
},
{
"name": "past_key_values.{i}.{key,value}",
"shape": [
"B",
4,
"T_past",
128
],
"dtype": "float32",
"note": "40 layers x 2 = 80 KV-cache inputs"
}
],
"outputs": [
{
"name": "logits",
"shape": [
"B",
1,
100353
],
"dtype": "float32"
},
{
"name": "present.{i}.{key,value}",
"shape": [
"B",
4,
"T_total",
128
],
"dtype": "float32",
"note": "40 layers x 2 = 80 KV-cache outputs"
}
]
},
{
"name": "int8/decode_step.onnx",
"sidecar": "int8/decode_step.onnx_data",
"precision": "int8-weights-only",
"size_bytes": 6424164,
"sidecar_size_bytes": 1632249856,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
1,
2048
],
"dtype": "float32"
},
{
"name": "position_ids",
"shape": [
"B",
1
],
"dtype": "int64"
},
{
"name": "attention_mask",
"shape": [
"B",
1,
1,
"T_total"
],
"dtype": "float32"
},
{
"name": "past_key_values.{i}.{key,value}",
"shape": [
"B",
4,
"T_past",
128
],
"dtype": "float32",
"note": "40 layers x 2 = 80 KV-cache inputs"
}
],
"outputs": [
{
"name": "logits",
"shape": [
"B",
1,
100353
],
"dtype": "float32"
},
{
"name": "present.{i}.{key,value}",
"shape": [
"B",
4,
"T_total",
128
],
"dtype": "float32",
"note": "40 layers x 2 = 80 KV-cache outputs"
}
]
},
{
"name": "fp16w/decode_step.onnx",
"sidecar": "fp16w/decode_step.onnx_data",
"precision": "fp16-weights-fp32-compute",
"size_bytes": 2580008,
"sidecar_size_bytes": 3263504384,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
1,
2048
],
"dtype": "float32"
},
{
"name": "position_ids",
"shape": [
"B",
1
],
"dtype": "int64"
},
{
"name": "attention_mask",
"shape": [
"B",
1,
1,
"T_total"
],
"dtype": "float32"
},
{
"name": "past_key_values.{i}.{key,value}",
"shape": [
"B",
4,
"T_past",
128
],
"dtype": "float32",
"note": "40 layers x 2 = 80 KV-cache inputs"
}
],
"outputs": [
{
"name": "logits",
"shape": [
"B",
1,
100353
],
"dtype": "float32"
},
{
"name": "present.{i}.{key,value}",
"shape": [
"B",
4,
"T_total",
128
],
"dtype": "float32",
"note": "40 layers x 2 = 80 KV-cache outputs"
}
]
},
{
"name": "fp32/embed_tokens.onnx",
"sidecar": "fp32/embed_tokens.onnx_data",
"precision": "fp32",
"size_bytes": 336,
"sidecar_size_bytes": 822091776,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "input_ids",
"shape": [
"B",
"N"
],
"dtype": "int64"
}
],
"outputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
"N",
2048
],
"dtype": "float32"
}
]
},
{
"name": "int8/embed_tokens.onnx",
"sidecar": "int8/embed_tokens.onnx_data",
"precision": "int8-weights-only",
"size_bytes": 991,
"sidecar_size_bytes": 205924356,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "input_ids",
"shape": [
"B",
"N"
],
"dtype": "int64"
}
],
"outputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
"N",
2048
],
"dtype": "float32"
}
]
},
{
"name": "fp16w/embed_tokens.onnx",
"sidecar": "fp16w/embed_tokens.onnx_data",
"precision": "fp16-weights-fp32-compute",
"size_bytes": 432,
"sidecar_size_bytes": 411045888,
"opset": 20,
"ir_version": 10,
"ai_onnx_only": true,
"inputs": [
{
"name": "input_ids",
"shape": [
"B",
"N"
],
"dtype": "int64"
}
],
"outputs": [
{
"name": "inputs_embeds",
"shape": [
"B",
"N",
2048
],
"dtype": "float32"
}
]
}
],
"parity": {
"fp32": {
"encoder": {
"argmax_only": false,
"max_abs_err": 4.481524229049683e-06,
"mean_abs_err": 1.243776637238625e-07,
"p99_abs_err": 6.463378667831421e-07,
"audio_embed_sizes_match": true,
"input_features_shape": [
1,
844,
160
],
"audio_embeds_shape": [
1,
171,
2048
]
},
"llm_e2e": {
"argmax_only": false,
"prompt_argmax_mismatches": 0,
"prompt_argmax_total": 190,
"prompt_logits_max_abs_err": 0.000364,
"decode_steps": 51,
"decode_argmax_mismatches": 0,
"decode_max_abs_err_step": null,
"tokens_match": true,
"transcript_match": true,
"source_note": "from task-11 record (parity.json was overwritten by a later encoder-only re-run; see dev-plan.md)"
}
},
"int8": {
"encoder": {
"argmax_only": true,
"max_abs_err": 0.16911625862121582,
"mean_abs_err": 0.010853650979697704,
"p99_abs_err": 0.044730618596076965
},
"llm_e2e": {
"argmax_only": true,
"prompt_argmax_mismatches": 58,
"prompt_argmax_total": 190,
"prompt_logits_max_abs_err": 10.136197090148926,
"prompt_logits_mean_abs_err": 0.778969943523407,
"decode_steps": 51,
"decode_argmax_mismatches": 0,
"decode_max_abs_err_step": 5.762608528137207,
"tokens_match": true,
"transcript_match": true
}
}
},
"multi_clip_parity": {
"rows": [
{
"name": "is-it-more-wood",
"duration_s": 46.9,
"fp32_byte_exact_vs_pt": true,
"int8_byte_exact_vs_pt": false,
"int8_wer_vs_pt": 0.3741,
"int8_wer_norm_vs_pt": 0.0,
"int8_vs_fp32_lev": 58,
"fp16w_byte_exact_vs_pt": true,
"fp16w_wer_vs_pt": 0.0,
"fp16w_wer_norm_vs_pt": 0.0,
"fp32_transcript": "Well, hello, Sam. Guess who? Yeah, it's Robert Clotworthy, the narrator of your favorite television show, \"The Curse of Oak Island.\" Yes, I'm the. Is it possible? Could it be? And what else do we say in Oak Island? A couple of words. They're not coming to me. Oh, yeah. More wood. But let's not forget. It is an island named after a tree. Well, here's the question. Why am I reaching out to you? Is it possible that I'm reaching out to you because it's your birthday? Could it be that Emma let the cat out of the bag? Well, the answer to those questions is yes. And she said, well, she contacted me. She said, Robert, you know, Sam is an amazing boyfriend. In fact, she used the word great. She said he is a great boyfriend.",
"int8_transcript": "Well hello Sam guess who yeah it's Robert Clotworthy the narrator of your favorite television show The Curse of Oak Island yes I'm the is it possible could it be and what else do we say in Oak Island a couple of words they're not coming to me oh yeah more wood but let's not forget it is an island named after a tree. well here's the question why am I reaching out to you is it possible that I'm reaching out to you because it's your birthday could it be that Emma let the cat out of the bag well the answer to those questions is yes and she said well she contacted me she said Robert you know Sam is an amazing boyfriend in fact she used the word great she said he is a great boyfriend.",
"fp16w_transcript": "Well, hello, Sam. Guess who? Yeah, it's Robert Clotworthy, the narrator of your favorite television show, \"The Curse of Oak Island.\" Yes, I'm the. Is it possible? Could it be? And what else do we say in Oak Island? A couple of words. They're not coming to me. Oh, yeah. More wood. But let's not forget. It is an island named after a tree. Well, here's the question. Why am I reaching out to you? Is it possible that I'm reaching out to you because it's your birthday? Could it be that Emma let the cat out of the bag? Well, the answer to those questions is yes. And she said, well, she contacted me. She said, Robert, you know, Sam is an amazing boyfriend. In fact, she used the word great. She said he is a great boyfriend.",
"pt_transcript": "Well, hello, Sam. Guess who? Yeah, it's Robert Clotworthy, the narrator of your favorite television show, \"The Curse of Oak Island.\" Yes, I'm the. Is it possible? Could it be? And what else do we say in Oak Island? A couple of words. They're not coming to me. Oh, yeah. More wood. But let's not forget. It is an island named after a tree. Well, here's the question. Why am I reaching out to you? Is it possible that I'm reaching out to you because it's your birthday? Could it be that Emma let the cat out of the bag? Well, the answer to those questions is yes. And she said, well, she contacted me. She said, Robert, you know, Sam is an amazing boyfriend. In fact, she used the word great. She said he is a great boyfriend."
},
{
"name": "two-speakers-1",
"duration_s": 93.8,
"fp32_byte_exact_vs_pt": true,
"int8_byte_exact_vs_pt": false,
"int8_wer_vs_pt": 0.0312,
"int8_wer_norm_vs_pt": 0.0136,
"int8_vs_fp32_lev": 20,
"fp16w_byte_exact_vs_pt": true,
"fp16w_wer_vs_pt": 0.0,
"fp16w_wer_norm_vs_pt": 0.0,
"fp32_transcript": "Today it is a true honor to speak with Demis Asavis, who is the CEO of DeepMind. Demis, welcome to the podcast. Thanks for having me. First question, given your neuroscience background, how do you think about intelligence? Specifically, do you think it's like one higher level general reasoning circuit, or do you think it's thousands of independent subskills and heuristics? Well, it's interesting because intelligence is so broad and, you know, what we use it for is so sort of generally applicable. I think that suggests that, you know, there must be some sort of high-level common things in, you know, common kind of algorithmic themes, I think, around how the brain processes the world around us. So, of course, then there are specialized parts of the brain that do specific things, but I think there are probably some underlying principles that underpin all of that. Yeah. How do you make sense of the fact that in these LLMs, though, when you give them a lot of data in any specific domain, they tend to get asymmetrically better in that domain? Wouldn't we expect a sort of like general improvement across all the different areas? Well, I think you, first of all, I think you do actually sometimes get surprising improvement in other domains when you improve in a specific domain. So, for example, when these large models sort of improve at coding, that can actually improve their general reasoning. So there is some evidence of some transfer, although I think we would like a lot more evidence of that. But also, you know, that's how the human brain learns, too, is if we experience and practice a lot of things like chess or, you know, writing.",
"int8_transcript": "Today it is a true honor to speak with Demis Savas, who is the CEO of DeepMind. Demis, welcome to the podcast. Thanks for having me. First question, given your neuroscience background, how do you think about intelligence? Specifically, do you think it's like one higher level general reasoning circuit, or do you think it's thousands of independent subskills and heuristics? Well, it's interesting because intelligence is so broad and, you know, what we use it for is so sort of generally applicable. I think that suggests that, you know, there must be some sort of high-level common things in, you know, common kind of algorithmic themes. I think around how the brain processes the world around us. So, of course, then there are specialized parts of the brain that do specific things, but I think there are probably some underlying principles that underpin all of that. Yeah. How do you make sense of the fact that in these LLMs, though, when you give them a lot of data in any specific domain, they tend to get asymmetrically better in that domain? Wouldn't we expect a sort of like general improvement across all the, all the different areas? Well, I think you, first of all, I think you do actually sometimes get surprising improvement in other domains when you improve in a specific domain. So, for example, when these large models sort of improve at coding, that can actually improve their general reasoning. So there is some evidence of some transfer, although I think we would like a lot more evidence of that. But also, you know, that's how the human brain learns too. If we experience and practice a lot of things like chess or, you know, writing.",
"fp16w_transcript": "Today it is a true honor to speak with Demis Asavis, who is the CEO of DeepMind. Demis, welcome to the podcast. Thanks for having me. First question, given your neuroscience background, how do you think about intelligence? Specifically, do you think it's like one higher level general reasoning circuit, or do you think it's thousands of independent subskills and heuristics? Well, it's interesting because intelligence is so broad and, you know, what we use it for is so sort of generally applicable. I think that suggests that, you know, there must be some sort of high-level common things in, you know, common kind of algorithmic themes, I think, around how the brain processes the world around us. So, of course, then there are specialized parts of the brain that do specific things, but I think there are probably some underlying principles that underpin all of that. Yeah. How do you make sense of the fact that in these LLMs, though, when you give them a lot of data in any specific domain, they tend to get asymmetrically better in that domain? Wouldn't we expect a sort of like general improvement across all the different areas? Well, I think you, first of all, I think you do actually sometimes get surprising improvement in other domains when you improve in a specific domain. So, for example, when these large models sort of improve at coding, that can actually improve their general reasoning. So there is some evidence of some transfer, although I think we would like a lot more evidence of that. But also, you know, that's how the human brain learns, too, is if we experience and practice a lot of things like chess or, you know, writing.",
"pt_transcript": "Today it is a true honor to speak with Demis Asavis, who is the CEO of DeepMind. Demis, welcome to the podcast. Thanks for having me. First question, given your neuroscience background, how do you think about intelligence? Specifically, do you think it's like one higher level general reasoning circuit, or do you think it's thousands of independent subskills and heuristics? Well, it's interesting because intelligence is so broad and, you know, what we use it for is so sort of generally applicable. I think that suggests that, you know, there must be some sort of high-level common things in, you know, common kind of algorithmic themes, I think, around how the brain processes the world around us. So, of course, then there are specialized parts of the brain that do specific things, but I think there are probably some underlying principles that underpin all of that. Yeah. How do you make sense of the fact that in these LLMs, though, when you give them a lot of data in any specific domain, they tend to get asymmetrically better in that domain? Wouldn't we expect a sort of like general improvement across all the different areas? Well, I think you, first of all, I think you do actually sometimes get surprising improvement in other domains when you improve in a specific domain. So, for example, when these large models sort of improve at coding, that can actually improve their general reasoning. So there is some evidence of some transfer, although I think we would like a lot more evidence of that. But also, you know, that's how the human brain learns, too, is if we experience and practice a lot of things like chess or, you know, writing."
},
{
"name": "two-speakers-2",
"duration_s": 38.8,
"fp32_byte_exact_vs_pt": true,
"int8_byte_exact_vs_pt": false,
"int8_wer_vs_pt": 0.2347,
"int8_wer_norm_vs_pt": 0.0,
"int8_vs_fp32_lev": 25,
"fp16w_byte_exact_vs_pt": true,
"fp16w_wer_vs_pt": 0.0,
"fp16w_wer_norm_vs_pt": 0.0,
"fp32_transcript": "For the first time ever, we may have things more intelligent than us. You believe they can understand. Yes. You believe they are intelligent. Yes. You believe these systems have experiences of their own and can make decisions based on those experiences. In the same sense as people do, yes. Are they conscious? I think they probably don't have much self-awareness at present. So in that sense, I don't think they're conscious. Will they have self-awareness? Oh, yes. I think they will in time. And so human beings will be the second most intelligent beings on the planet.",
"int8_transcript": "for the first time ever we may have things more intelligent than us. You believe they can understand yes you believe they are intelligent yes you believe these systems have experiences of their own and can make decisions based on those experiences in the same sense as people do yes are they conscious I think they probably don't have much self-awareness at present so in that sense I don't think they're conscious. will they have self-awareness. Oh yes i think they will in time. and so human beings will be the second most intelligent beings on the planet.",
"fp16w_transcript": "For the first time ever, we may have things more intelligent than us. You believe they can understand. Yes. You believe they are intelligent. Yes. You believe these systems have experiences of their own and can make decisions based on those experiences. In the same sense as people do, yes. Are they conscious? I think they probably don't have much self-awareness at present. So in that sense, I don't think they're conscious. Will they have self-awareness? Oh, yes. I think they will in time. And so human beings will be the second most intelligent beings on the planet.",
"pt_transcript": "For the first time ever, we may have things more intelligent than us. You believe they can understand. Yes. You believe they are intelligent. Yes. You believe these systems have experiences of their own and can make decisions based on those experiences. In the same sense as people do, yes. Are they conscious? I think they probably don't have much self-awareness at present. So in that sense, I don't think they're conscious. Will they have self-awareness? Oh, yes. I think they will in time. And so human beings will be the second most intelligent beings on the planet."
}
]
},
"toolchain": {
"transformers": "5.8.0",
"torch": "2.11.0",
"onnx": "1.21.0",
"onnxruntime": "1.25.1",
"exporter": "torch.onnx.export TorchScript path (dynamo=False)"
},
"ort_compatibility": "ort 2.0-rc.x (Rust crate); validated against onnxruntime 1.17 - 1.25",
"audio_token_id": 100352
}