{ "model": { "bos_token_id": 1, "context_length": 131072, "decoder": { "session_options": { "log_id": "onnxruntime-genai", "provider_options": [] }, "head_size": 96, "hidden_size": 3072, "inputs": { "input_ids": "input_ids", "attention_mask": "attention_mask", "past_key_names": "past_key_values.%d.key", "past_value_names": "past_key_values.%d.value", "past_sequence_length": "past_seq_len", "total_sequence_length": "total_seq_len" }, "outputs": { "logits": "logits", "present_key_names": "present.%d.key", "present_value_names": "present.%d.value" }, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "sliding_window": { "window_size": 64, "pad_value": 0, "alignment": "left", "slide_key_value_cache": false }, "pipeline": [ { "embeddings": { "filename": "embeddings.onnx", "inputs": [ "input_ids" ], "outputs": [ "/model/embed_tokens/Gather/output_0_QuantizeLinear_Output" ] }, "context_ctx": { "filename": "context_ctx.onnx", "inputs": [ "/model/embed_tokens/Gather/output_0_QuantizeLinear_Output", "past_key_values.0.key", "past_key_values.0.value", "past_seq_len", "total_seq_len", "past_key_values.1.key", "past_key_values.1.value", "past_key_values.2.key", "past_key_values.2.value", "past_key_values.3.key", "past_key_values.3.value", "past_key_values.4.key", "past_key_values.4.value", "past_key_values.5.key", "past_key_values.5.value", "past_key_values.6.key", "past_key_values.6.value", "past_key_values.7.key", "past_key_values.7.value", "past_key_values.8.key", "past_key_values.8.value", "past_key_values.9.key", "past_key_values.9.value", "past_key_values.10.key", "past_key_values.10.value", "past_key_values.11.key", "past_key_values.11.value", "past_key_values.12.key", "past_key_values.12.value", "past_key_values.13.key", "past_key_values.13.value", "past_key_values.14.key", "past_key_values.14.value", "past_key_values.15.key", "past_key_values.15.value", "past_key_values.16.key", "past_key_values.16.value", "past_key_values.17.key", "past_key_values.17.value", "past_key_values.18.key", "past_key_values.18.value", "past_key_values.19.key", "past_key_values.19.value", "past_key_values.20.key", "past_key_values.20.value", "past_key_values.21.key", "past_key_values.21.value", "past_key_values.22.key", "past_key_values.22.value", "past_key_values.23.key", "past_key_values.23.value", "past_key_values.24.key", "past_key_values.24.value", "past_key_values.25.key", "past_key_values.25.value", "past_key_values.26.key", "past_key_values.26.value", "past_key_values.27.key", "past_key_values.27.value", "past_key_values.28.key", "past_key_values.28.value", "past_key_values.29.key", "past_key_values.29.value", "past_key_values.30.key", "past_key_values.30.value", "past_key_values.31.key", "past_key_values.31.value" ], "outputs": [ "present.0.key", "present.0.value", "present.1.key", "present.1.value", "present.2.key", "present.2.value", "present.3.key", "present.3.value", "present.4.key", "present.4.value", "present.5.key", "present.5.value", "present.6.key", "present.6.value", "present.7.key", "present.7.value", "present.8.key", "present.8.value", "present.9.key", "present.9.value", "present.10.key", "present.10.value", "present.11.key", "present.11.value", "present.12.key", "present.12.value", "present.13.key", "present.13.value", "present.14.key", "present.14.value", "present.15.key", "present.15.value", "present.16.key", "present.16.value", "present.17.key", "present.17.value", "present.18.key", "present.18.value", "present.19.key", "present.19.value", "present.20.key", "present.20.value", "present.21.key", "present.21.value", "present.22.key", "present.22.value", "present.23.key", "present.23.value", "present.24.key", "present.24.value", "present.25.key", "present.25.value", "present.26.key", "present.26.value", "present.27.key", "present.27.value", "present.28.key", "present.28.value", "present.29.key", "present.29.value", "present.30.key", "present.30.value", "present.31.key", "present.31.value", "/model/layers.32/final_norm_layernorm/SkipLayerNorm_Mul_output_0_QuantizeLinear_Output" ], "session_options": { "provider_options": [ { "qnn": { "htp_performance_mode": "burst", "htp_graph_finalization_optimization_mode": "3", "soc_model": "60" } } ] }, "run_on_token_gen": false }, "iterator_ctx": { "filename": "iterator_ctx.onnx", "inputs": [ "/model/embed_tokens/Gather/output_0_QuantizeLinear_Output", "past_key_values.0.key", "past_key_values.0.value", "past_seq_len", "total_seq_len", "past_key_values.1.key", "past_key_values.1.value", "past_key_values.2.key", "past_key_values.2.value", "past_key_values.3.key", "past_key_values.3.value", "past_key_values.4.key", "past_key_values.4.value", "past_key_values.5.key", "past_key_values.5.value", "past_key_values.6.key", "past_key_values.6.value", "past_key_values.7.key", "past_key_values.7.value", "past_key_values.8.key", "past_key_values.8.value", "past_key_values.9.key", "past_key_values.9.value", "past_key_values.10.key", "past_key_values.10.value", "past_key_values.11.key", "past_key_values.11.value", "past_key_values.12.key", "past_key_values.12.value", "past_key_values.13.key", "past_key_values.13.value", "past_key_values.14.key", "past_key_values.14.value", "past_key_values.15.key", "past_key_values.15.value", "past_key_values.16.key", "past_key_values.16.value", "past_key_values.17.key", "past_key_values.17.value", "past_key_values.18.key", "past_key_values.18.value", "past_key_values.19.key", "past_key_values.19.value", "past_key_values.20.key", "past_key_values.20.value", "past_key_values.21.key", "past_key_values.21.value", "past_key_values.22.key", "past_key_values.22.value", "past_key_values.23.key", "past_key_values.23.value", "past_key_values.24.key", "past_key_values.24.value", "past_key_values.25.key", "past_key_values.25.value", "past_key_values.26.key", "past_key_values.26.value", "past_key_values.27.key", "past_key_values.27.value", "past_key_values.28.key", "past_key_values.28.value", "past_key_values.29.key", "past_key_values.29.value", "past_key_values.30.key", "past_key_values.30.value", "past_key_values.31.key", "past_key_values.31.value" ], "outputs": [ "present.0.key", "present.0.value", "present.1.key", "present.1.value", "present.2.key", "present.2.value", "present.3.key", "present.3.value", "present.4.key", "present.4.value", "present.5.key", "present.5.value", "present.6.key", "present.6.value", "present.7.key", "present.7.value", "present.8.key", "present.8.value", "present.9.key", "present.9.value", "present.10.key", "present.10.value", "present.11.key", "present.11.value", "present.12.key", "present.12.value", "present.13.key", "present.13.value", "present.14.key", "present.14.value", "present.15.key", "present.15.value", "present.16.key", "present.16.value", "present.17.key", "present.17.value", "present.18.key", "present.18.value", "present.19.key", "present.19.value", "present.20.key", "present.20.value", "present.21.key", "present.21.value", "present.22.key", "present.22.value", "present.23.key", "present.23.value", "present.24.key", "present.24.value", "present.25.key", "present.25.value", "present.26.key", "present.26.value", "present.27.key", "present.27.value", "present.28.key", "present.28.value", "present.29.key", "present.29.value", "present.30.key", "present.30.value", "present.31.key", "present.31.value", "/model/layers.32/final_norm_layernorm/SkipLayerNorm_Mul_output_0_QuantizeLinear_Output" ], "session_options": { "provider_options": [ { "qnn": { "htp_performance_mode": "burst", "htp_graph_finalization_optimization_mode": "3", "soc_model": "60" } } ] }, "run_on_prompt": false }, "lm_head": { "filename": "lm_head.onnx", "inputs": [ "/model/layers.32/final_norm_layernorm/SkipLayerNorm_Mul_output_0_QuantizeLinear_Output" ], "outputs": [ "logits" ] } } ] }, "eos_token_id": [ 32007, 32001, 32000 ], "pad_token_id": 32000, "type": "decoder-pipeline", "vocab_size": 32064 }, "search": { "diversity_penalty": 0.0, "do_sample": false, "early_stopping": true, "length_penalty": 1.0, "max_length": 131072, "min_length": 0, "no_repeat_ngram_size": 0, "num_beams": 1, "num_return_sequences": 1, "past_present_share_buffer": true, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 1, "top_p": 1.0 } }