diff --git "a/sparse_int8_ir/conf.yaml" "b/sparse_int8_ir/conf.yaml" new file mode 100644--- /dev/null +++ "b/sparse_int8_ir/conf.yaml" @@ -0,0 +1,2463 @@ +model: + name: model + operator: + input_data: + type: Input + output: + input_ids:0: + dtype: int32 + shape: [-1, -1] + attention_mask:0: + dtype: int32 + shape: [-1, -1] + distilbert.embeddings.position_embeddings.weight:0: + dtype: fp32 + shape: [512, 768] + location: [0, 1572864] + distilbert.embeddings.word_embeddings.weight:0: + dtype: fp32 + shape: [30522, 768] + location: [1572864, 93763584] + distilbert.embeddings.LayerNorm.weight:0: + dtype: fp32 + shape: [768] + location: [95336448, 3072] + distilbert.embeddings.LayerNorm.bias:0: + dtype: fp32 + shape: [768] + location: [95339520, 3072] + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [97139752, 4] + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [97139756, 4] + /distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [95342600, 589824] + distilbert.transformer.layer.0.attention.k_lin.bias:0: + dtype: s32 + shape: [768] + location: [95932424, 3072] + /distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [95935496, 3072] + /distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [95938568, 3072] + /distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [95941648, 4] + /distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [95941652, 4] + /distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [95941656, 589824] + distilbert.transformer.layer.0.attention.q_lin.bias:0: + dtype: s32 + shape: [768] + location: [96531480, 3072] + /distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [96534552, 3072] + /distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [96537624, 3072] + /distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [96540704, 4] + /distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [96540708, 4] + /distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [96540712, 589824] + distilbert.transformer.layer.0.attention.v_lin.bias:0: + dtype: s32 + shape: [768] + location: [97130536, 3072] + /distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [97133608, 3072] + /distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [97136680, 3072] + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: + dtype: fp32 + shape: [1] + location: [97139784, 4] + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: + dtype: fp32 + shape: [1] + location: [97139788, 4] + /distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: + dtype: fp32 + shape: [1] + location: [97139776, 4] + /distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: + dtype: fp32 + shape: [1] + location: [97139780, 4] + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: + dtype: fp32 + shape: [1] + location: [97738840, 4] + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: + dtype: fp32 + shape: [1] + location: [97738844, 4] + /distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [97139800, 589824] + distilbert.transformer.layer.0.attention.out_lin.bias:0: + dtype: s32 + shape: [768] + location: [97729624, 3072] + /distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [97732696, 3072] + /distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [97735768, 3072] + /distilbert/transformer/layer.0/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [97738848, 4] + /distilbert/transformer/layer.0/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [97738852, 4] + distilbert.transformer.layer.0.sa_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [97738856, 3072] + distilbert.transformer.layer.0.sa_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [97741928, 3072] + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [100141168, 4] + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [100141172, 4] + /distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0: + dtype: s8 + shape: [3072, 768] + location: [97745008, 2359296] + distilbert.transformer.layer.0.ffn.lin1.bias:0: + dtype: s32 + shape: [3072] + location: [100104304, 12288] + /distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [3072] + location: [100116592, 12288] + /distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [3072] + location: [100128880, 12288] + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [102509696, 4] + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [102509700, 4] + /distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 3072] + location: [100141184, 2359296] + distilbert.transformer.layer.0.ffn.lin2.bias:0: + dtype: s32 + shape: [768] + location: [102500480, 3072] + /distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [102503552, 3072] + /distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [102506624, 3072] + /distilbert/transformer/layer.0/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [102509704, 4] + /distilbert/transformer/layer.0/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [102509708, 4] + distilbert.transformer.layer.0.output_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [102509712, 3072] + distilbert.transformer.layer.0.output_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [102512784, 3072] + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [104313016, 4] + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [104313020, 4] + /distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [102515864, 589824] + distilbert.transformer.layer.1.attention.k_lin.bias:0: + dtype: s32 + shape: [768] + location: [103105688, 3072] + /distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [103108760, 3072] + /distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [103111832, 3072] + /distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [103114912, 4] + /distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [103114916, 4] + /distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [103114920, 589824] + distilbert.transformer.layer.1.attention.q_lin.bias:0: + dtype: s32 + shape: [768] + location: [103704744, 3072] + /distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [103707816, 3072] + /distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [103710888, 3072] + /distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [103713968, 4] + /distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [103713972, 4] + /distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [103713976, 589824] + distilbert.transformer.layer.1.attention.v_lin.bias:0: + dtype: s32 + shape: [768] + location: [104303800, 3072] + /distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [104306872, 3072] + /distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [104309944, 3072] + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: + dtype: fp32 + shape: [1] + location: [104313048, 4] + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: + dtype: fp32 + shape: [1] + location: [104313052, 4] + /distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: + dtype: fp32 + shape: [1] + location: [104313040, 4] + /distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: + dtype: fp32 + shape: [1] + location: [104313044, 4] + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: + dtype: fp32 + shape: [1] + location: [104912104, 4] + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: + dtype: fp32 + shape: [1] + location: [104912108, 4] + /distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [104313064, 589824] + distilbert.transformer.layer.1.attention.out_lin.bias:0: + dtype: s32 + shape: [768] + location: [104902888, 3072] + /distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [104905960, 3072] + /distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [104909032, 3072] + /distilbert/transformer/layer.1/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [104912112, 4] + /distilbert/transformer/layer.1/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [104912116, 4] + distilbert.transformer.layer.1.sa_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [104912120, 3072] + distilbert.transformer.layer.1.sa_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [104915192, 3072] + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [107314432, 4] + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [107314436, 4] + /distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0: + dtype: s8 + shape: [3072, 768] + location: [104918272, 2359296] + distilbert.transformer.layer.1.ffn.lin1.bias:0: + dtype: s32 + shape: [3072] + location: [107277568, 12288] + /distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [3072] + location: [107289856, 12288] + /distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [3072] + location: [107302144, 12288] + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [109682960, 4] + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [109682964, 4] + /distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 3072] + location: [107314448, 2359296] + distilbert.transformer.layer.1.ffn.lin2.bias:0: + dtype: s32 + shape: [768] + location: [109673744, 3072] + /distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [109676816, 3072] + /distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [109679888, 3072] + /distilbert/transformer/layer.1/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [109682968, 4] + /distilbert/transformer/layer.1/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [109682972, 4] + distilbert.transformer.layer.1.output_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [109682976, 3072] + distilbert.transformer.layer.1.output_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [109686048, 3072] + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [111486280, 4] + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [111486284, 4] + /distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [109689128, 589824] + distilbert.transformer.layer.2.attention.k_lin.bias:0: + dtype: s32 + shape: [768] + location: [110278952, 3072] + /distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [110282024, 3072] + /distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [110285096, 3072] + /distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [110288176, 4] + /distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [110288180, 4] + /distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [110288184, 589824] + distilbert.transformer.layer.2.attention.q_lin.bias:0: + dtype: s32 + shape: [768] + location: [110878008, 3072] + /distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [110881080, 3072] + /distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [110884152, 3072] + /distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [110887232, 4] + /distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [110887236, 4] + /distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [110887240, 589824] + distilbert.transformer.layer.2.attention.v_lin.bias:0: + dtype: s32 + shape: [768] + location: [111477064, 3072] + /distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [111480136, 3072] + /distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [111483208, 3072] + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: + dtype: fp32 + shape: [1] + location: [111486312, 4] + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: + dtype: fp32 + shape: [1] + location: [111486316, 4] + /distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: + dtype: fp32 + shape: [1] + location: [111486304, 4] + /distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: + dtype: fp32 + shape: [1] + location: [111486308, 4] + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: + dtype: fp32 + shape: [1] + location: [112085368, 4] + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: + dtype: fp32 + shape: [1] + location: [112085372, 4] + /distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [111486328, 589824] + distilbert.transformer.layer.2.attention.out_lin.bias:0: + dtype: s32 + shape: [768] + location: [112076152, 3072] + /distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [112079224, 3072] + /distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [112082296, 3072] + /distilbert/transformer/layer.2/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [112085376, 4] + /distilbert/transformer/layer.2/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [112085380, 4] + distilbert.transformer.layer.2.sa_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [112085384, 3072] + distilbert.transformer.layer.2.sa_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [112088456, 3072] + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [114487696, 4] + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [114487700, 4] + /distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0: + dtype: s8 + shape: [3072, 768] + location: [112091536, 2359296] + distilbert.transformer.layer.2.ffn.lin1.bias:0: + dtype: s32 + shape: [3072] + location: [114450832, 12288] + /distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [3072] + location: [114463120, 12288] + /distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [3072] + location: [114475408, 12288] + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [116856224, 4] + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [116856228, 4] + /distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 3072] + location: [114487712, 2359296] + distilbert.transformer.layer.2.ffn.lin2.bias:0: + dtype: s32 + shape: [768] + location: [116847008, 3072] + /distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [116850080, 3072] + /distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [116853152, 3072] + /distilbert/transformer/layer.2/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [116856232, 4] + /distilbert/transformer/layer.2/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [116856236, 4] + distilbert.transformer.layer.2.output_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [116856240, 3072] + distilbert.transformer.layer.2.output_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [116859312, 3072] + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [118659544, 4] + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [118659548, 4] + /distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [116862392, 589824] + distilbert.transformer.layer.3.attention.k_lin.bias:0: + dtype: s32 + shape: [768] + location: [117452216, 3072] + /distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [117455288, 3072] + /distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [117458360, 3072] + /distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [117461440, 4] + /distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [117461444, 4] + /distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [117461448, 589824] + distilbert.transformer.layer.3.attention.q_lin.bias:0: + dtype: s32 + shape: [768] + location: [118051272, 3072] + /distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [118054344, 3072] + /distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [118057416, 3072] + /distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [118060496, 4] + /distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [118060500, 4] + /distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [118060504, 589824] + distilbert.transformer.layer.3.attention.v_lin.bias:0: + dtype: s32 + shape: [768] + location: [118650328, 3072] + /distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [118653400, 3072] + /distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [118656472, 3072] + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: + dtype: fp32 + shape: [1] + location: [118659576, 4] + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: + dtype: fp32 + shape: [1] + location: [118659580, 4] + /distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: + dtype: fp32 + shape: [1] + location: [118659568, 4] + /distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: + dtype: fp32 + shape: [1] + location: [118659572, 4] + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: + dtype: fp32 + shape: [1] + location: [119258632, 4] + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: + dtype: fp32 + shape: [1] + location: [119258636, 4] + /distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [118659592, 589824] + distilbert.transformer.layer.3.attention.out_lin.bias:0: + dtype: s32 + shape: [768] + location: [119249416, 3072] + /distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [119252488, 3072] + /distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [119255560, 3072] + /distilbert/transformer/layer.3/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [119258640, 4] + /distilbert/transformer/layer.3/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [119258644, 4] + distilbert.transformer.layer.3.sa_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [119258648, 3072] + distilbert.transformer.layer.3.sa_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [119261720, 3072] + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [121660960, 4] + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [121660964, 4] + /distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0: + dtype: s8 + shape: [3072, 768] + location: [119264800, 2359296] + distilbert.transformer.layer.3.ffn.lin1.bias:0: + dtype: s32 + shape: [3072] + location: [121624096, 12288] + /distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [3072] + location: [121636384, 12288] + /distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [3072] + location: [121648672, 12288] + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [124029488, 4] + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [124029492, 4] + /distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 3072] + location: [121660976, 2359296] + distilbert.transformer.layer.3.ffn.lin2.bias:0: + dtype: s32 + shape: [768] + location: [124020272, 3072] + /distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [124023344, 3072] + /distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [124026416, 3072] + /distilbert/transformer/layer.3/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [124029496, 4] + /distilbert/transformer/layer.3/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [124029500, 4] + distilbert.transformer.layer.3.output_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [124029504, 3072] + distilbert.transformer.layer.3.output_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [124032576, 3072] + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [125832808, 4] + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [125832812, 4] + /distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [124035656, 589824] + distilbert.transformer.layer.4.attention.k_lin.bias:0: + dtype: s32 + shape: [768] + location: [124625480, 3072] + /distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [124628552, 3072] + /distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [124631624, 3072] + /distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [124634704, 4] + /distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [124634708, 4] + /distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [124634712, 589824] + distilbert.transformer.layer.4.attention.q_lin.bias:0: + dtype: s32 + shape: [768] + location: [125224536, 3072] + /distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [125227608, 3072] + /distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [125230680, 3072] + /distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [125233760, 4] + /distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [125233764, 4] + /distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [125233768, 589824] + distilbert.transformer.layer.4.attention.v_lin.bias:0: + dtype: s32 + shape: [768] + location: [125823592, 3072] + /distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [125826664, 3072] + /distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [125829736, 3072] + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: + dtype: fp32 + shape: [1] + location: [125832840, 4] + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: + dtype: fp32 + shape: [1] + location: [125832844, 4] + /distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: + dtype: fp32 + shape: [1] + location: [125832832, 4] + /distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: + dtype: fp32 + shape: [1] + location: [125832836, 4] + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: + dtype: fp32 + shape: [1] + location: [126431896, 4] + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: + dtype: fp32 + shape: [1] + location: [126431900, 4] + /distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [125832856, 589824] + distilbert.transformer.layer.4.attention.out_lin.bias:0: + dtype: s32 + shape: [768] + location: [126422680, 3072] + /distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [126425752, 3072] + /distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [126428824, 3072] + /distilbert/transformer/layer.4/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [126431904, 4] + /distilbert/transformer/layer.4/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [126431908, 4] + distilbert.transformer.layer.4.sa_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [126431912, 3072] + distilbert.transformer.layer.4.sa_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [126434984, 3072] + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [128834224, 4] + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [128834228, 4] + /distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0: + dtype: s8 + shape: [3072, 768] + location: [126438064, 2359296] + distilbert.transformer.layer.4.ffn.lin1.bias:0: + dtype: s32 + shape: [3072] + location: [128797360, 12288] + /distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [3072] + location: [128809648, 12288] + /distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [3072] + location: [128821936, 12288] + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [131202752, 4] + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [131202756, 4] + /distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 3072] + location: [128834240, 2359296] + distilbert.transformer.layer.4.ffn.lin2.bias:0: + dtype: s32 + shape: [768] + location: [131193536, 3072] + /distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [131196608, 3072] + /distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [131199680, 3072] + /distilbert/transformer/layer.4/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [131202760, 4] + /distilbert/transformer/layer.4/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [131202764, 4] + distilbert.transformer.layer.4.output_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [131202768, 3072] + distilbert.transformer.layer.4.output_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [131205840, 3072] + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [133006072, 4] + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [133006076, 4] + /distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [131208920, 589824] + distilbert.transformer.layer.5.attention.k_lin.bias:0: + dtype: s32 + shape: [768] + location: [131798744, 3072] + /distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [131801816, 3072] + /distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [131804888, 3072] + /distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [131807968, 4] + /distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [131807972, 4] + /distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [131807976, 589824] + distilbert.transformer.layer.5.attention.q_lin.bias:0: + dtype: s32 + shape: [768] + location: [132397800, 3072] + /distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [132400872, 3072] + /distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [132403944, 3072] + /distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [132407024, 4] + /distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [132407028, 4] + /distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [132407032, 589824] + distilbert.transformer.layer.5.attention.v_lin.bias:0: + dtype: s32 + shape: [768] + location: [132996856, 3072] + /distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [132999928, 3072] + /distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [133003000, 3072] + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: + dtype: fp32 + shape: [1] + location: [133006104, 4] + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: + dtype: fp32 + shape: [1] + location: [133006108, 4] + /distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: + dtype: fp32 + shape: [1] + location: [133006096, 4] + /distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: + dtype: fp32 + shape: [1] + location: [133006100, 4] + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: + dtype: fp32 + shape: [1] + location: [133605160, 4] + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: + dtype: fp32 + shape: [1] + location: [133605164, 4] + /distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 768] + location: [133006120, 589824] + distilbert.transformer.layer.5.attention.out_lin.bias:0: + dtype: s32 + shape: [768] + location: [133595944, 3072] + /distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [133599016, 3072] + /distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [133602088, 3072] + /distilbert/transformer/layer.5/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [133605168, 4] + /distilbert/transformer/layer.5/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [133605172, 4] + distilbert.transformer.layer.5.sa_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [133605176, 3072] + distilbert.transformer.layer.5.sa_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [133608248, 3072] + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [136007488, 4] + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [136007492, 4] + /distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0: + dtype: s8 + shape: [3072, 768] + location: [133611328, 2359296] + distilbert.transformer.layer.5.ffn.lin1.bias:0: + dtype: s32 + shape: [3072] + location: [135970624, 12288] + /distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [3072] + location: [135982912, 12288] + /distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [3072] + location: [135995200, 12288] + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [138376016, 4] + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [138376020, 4] + /distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 3072] + location: [136007504, 2359296] + distilbert.transformer.layer.5.ffn.lin2.bias:0: + dtype: s32 + shape: [768] + location: [138366800, 3072] + /distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [768] + location: [138369872, 3072] + /distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [768] + location: [138372944, 3072] + /distilbert/transformer/layer.5/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [138376024, 4] + /distilbert/transformer/layer.5/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [138376028, 4] + distilbert.transformer.layer.5.output_layer_norm.weight:0: + dtype: fp32 + shape: [768] + location: [138376032, 3072] + distilbert.transformer.layer.5.output_layer_norm.bias:0: + dtype: fp32 + shape: [768] + location: [138379104, 3072] + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: + dtype: fp32 + shape: [1] + location: [138383728, 4] + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: + dtype: fp32 + shape: [1] + location: [138383732, 4] + /qa_outputs/Transpose_output_0_quantized:0: + dtype: s8 + shape: [768, 2] + location: [138382184, 1536] + qa_outputs.bias:0: + dtype: s32 + shape: [2] + location: [138383720, 8] + /qa_outputs/Transpose_output_0_quantized:0_min: + dtype: fp32 + shape: [2] + location: [138383736, 8] + /qa_outputs/Transpose_output_0_quantized:0_max: + dtype: fp32 + shape: [2] + location: [138383744, 8] + /qa_outputs/Add_output_0:0_min: + dtype: fp32 + shape: [1] + location: [138383752, 4] + /qa_outputs/Add_output_0:0_max: + dtype: fp32 + shape: [1] + location: [138383756, 4] + padding_sequence: + type: PaddingSequence + input: + attention_mask:0: {} + output: + padding_sequence:0: {} + attr: + dst_shape: -1,12,0,-1 + dims: 1 + position_embeddings/after/reshape: + type: Reshape + input: + distilbert.embeddings.position_embeddings.weight:0: {} + input_ids:0: {} + output: + position_embeddings/after/reshape:0: {} + attr: + dst_shape: 1,-1,768 + dims: 1 + /distilbert/embeddings/position_embeddings/Gather: + type: Reshape + input: + position_embeddings/after/reshape:0: {} + output: + /distilbert/embeddings/position_embeddings/Gather_output_0:0: {} + attr: + dst_shape: 1,-1 + word_embeddings/reshape: + type: Reshape + input: + input_ids:0: {} + output: + word_embeddings/reshape:0: {} + attr: + dst_shape: -1 + /distilbert/embeddings/word_embeddings/Gather: + type: Gather + input: + word_embeddings/reshape:0: {} + distilbert.embeddings.word_embeddings.weight:0: {} + /distilbert/embeddings/position_embeddings/Gather_output_0:0: {} + input_ids:0: {} + output: + embeddings_add/reshape_2d:0: {} + attr: + axis: 0 + batch_dims: 0 + append_op: binary_add + reshape: -1,-1,768 + reshape_dims: 0,1 + mul: 1,2 + /distilbert/embeddings/LayerNorm/Add_1: + type: LayerNorm + input: + embeddings_add/reshape_2d:0: {} + distilbert.embeddings.LayerNorm.weight:0: {} + distilbert.embeddings.LayerNorm.bias:0: {} + output: + /distilbert/embeddings/LayerNorm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + /distilbert/transformer/layer.0/attention/k_lin/Add_quant_0_Reorder_Post_0: + type: Reorder + input: + /distilbert/embeddings/LayerNorm/Add_1_output_0:0: {} + output: + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} + attr: + src_perm: 0,1 + dst_perm: 1,0 + /distilbert/transformer/layer.0/attention/k_lin/Add_quant_0: + type: Quantize + input: + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} + output: + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.0/attention/k_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.0.attention.k_lin.bias:0: {} + /distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.0/attention/Reshape_1_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.0/attention/q_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.0.attention.q_lin.bias:0: {} + /distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.0/attention/Reshape_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.0/attention/v_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.0.attention.v_lin.bias:0: {} + /distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0: {} + attr: + output_dtype: s8 + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.0/attention/Where: + type: Matmul + input: + /distilbert/transformer/layer.0/attention/Reshape_output_0:0: {} + /distilbert/transformer/layer.0/attention/Reshape_1_output_0:0: {} + padding_sequence:0: {} + output: + /distilbert/transformer/layer.0/attention/Where_output_0:0: {} + attr: + src0_perm: 2,0,3,1 + src1_perm: 2,0,1,3 + output_scale: 0.125 + format_any: false + append_op: binary_add + /distilbert/transformer/layer.0/attention/Softmax: + type: Softmax + input: + /distilbert/transformer/layer.0/attention/Where_output_0:0: {} + /distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/attention/Softmax_output_0:0: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.0/attention/Transpose_3: + type: Matmul + input: + /distilbert/transformer/layer.0/attention/Softmax_output_0:0: {} + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0: {} + /distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: {} + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: {} + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0: {} + attr: + src1_perm: 2,0,3,1 + dst_perm: 1,3,0,2 + output_dtype: u8 + reshape: 768,-1 + /distilbert/transformer/layer.0/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0: {} + distilbert.transformer.layer.0.attention.out_lin.bias:0: {} + /distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} + /distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: {} + /distilbert/transformer/layer.0/Add_output_0:0_min: {} + /distilbert/transformer/layer.0/Add_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/Add_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.0/sa_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.0/Add_output_0:0: {} + distilbert.transformer.layer.0.sa_layer_norm.weight:0: {} + distilbert.transformer.layer.0.sa_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.0/ffn/activation/Mul_1_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.0/ffn/activation/Mul_1: + type: InnerProduct + input: + /distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.0.ffn.lin1.bias:0: {} + /distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0: {} + attr: + append_op: gelu_tanh + output_dtype: u8 + /distilbert/transformer/layer.0/Add_1: + type: InnerProduct + input: + /distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0: {} + distilbert.transformer.layer.0.ffn.lin2.bias:0: {} + /distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: {} + /distilbert/transformer/layer.0/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/Add_1_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.0/output_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.0/Add_1_output_0:0: {} + distilbert.transformer.layer.0.output_layer_norm.weight:0: {} + distilbert.transformer.layer.0.output_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.1/attention/k_lin/Add_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.1/attention/k_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.1.attention.k_lin.bias:0: {} + /distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.1/attention/Reshape_1_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.1/attention/q_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.1.attention.q_lin.bias:0: {} + /distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.1/attention/Reshape_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.1/attention/v_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.1.attention.v_lin.bias:0: {} + /distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0: {} + attr: + output_dtype: s8 + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.1/attention/Where: + type: Matmul + input: + /distilbert/transformer/layer.1/attention/Reshape_output_0:0: {} + /distilbert/transformer/layer.1/attention/Reshape_1_output_0:0: {} + padding_sequence:0: {} + output: + /distilbert/transformer/layer.1/attention/Where_output_0:0: {} + attr: + src0_perm: 2,0,3,1 + src1_perm: 2,0,1,3 + output_scale: 0.125 + format_any: false + append_op: binary_add + /distilbert/transformer/layer.1/attention/Softmax: + type: Softmax + input: + /distilbert/transformer/layer.1/attention/Where_output_0:0: {} + /distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/attention/Softmax_output_0:0: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.1/attention/Transpose_3: + type: Matmul + input: + /distilbert/transformer/layer.1/attention/Softmax_output_0:0: {} + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0: {} + /distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: {} + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: {} + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0: {} + attr: + src1_perm: 2,0,3,1 + dst_perm: 1,3,0,2 + output_dtype: u8 + reshape: 768,-1 + /distilbert/transformer/layer.1/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0: {} + distilbert.transformer.layer.1.attention.out_lin.bias:0: {} + /distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: {} + /distilbert/transformer/layer.1/Add_output_0:0_min: {} + /distilbert/transformer/layer.1/Add_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/Add_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.1/sa_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.1/Add_output_0:0: {} + distilbert.transformer.layer.1.sa_layer_norm.weight:0: {} + distilbert.transformer.layer.1.sa_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.1/ffn/activation/Mul_1_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.1/ffn/activation/Mul_1: + type: InnerProduct + input: + /distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.1.ffn.lin1.bias:0: {} + /distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0: {} + attr: + append_op: gelu_tanh + output_dtype: u8 + /distilbert/transformer/layer.1/Add_1: + type: InnerProduct + input: + /distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0: {} + distilbert.transformer.layer.1.ffn.lin2.bias:0: {} + /distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: {} + /distilbert/transformer/layer.1/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/Add_1_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.1/output_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.1/Add_1_output_0:0: {} + distilbert.transformer.layer.1.output_layer_norm.weight:0: {} + distilbert.transformer.layer.1.output_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.2/attention/k_lin/Add_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.2/attention/k_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.2.attention.k_lin.bias:0: {} + /distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.2/attention/Reshape_1_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.2/attention/q_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.2.attention.q_lin.bias:0: {} + /distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.2/attention/Reshape_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.2/attention/v_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.2.attention.v_lin.bias:0: {} + /distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0: {} + attr: + output_dtype: s8 + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.2/attention/Where: + type: Matmul + input: + /distilbert/transformer/layer.2/attention/Reshape_output_0:0: {} + /distilbert/transformer/layer.2/attention/Reshape_1_output_0:0: {} + padding_sequence:0: {} + output: + /distilbert/transformer/layer.2/attention/Where_output_0:0: {} + attr: + src0_perm: 2,0,3,1 + src1_perm: 2,0,1,3 + output_scale: 0.125 + format_any: false + append_op: binary_add + /distilbert/transformer/layer.2/attention/Softmax: + type: Softmax + input: + /distilbert/transformer/layer.2/attention/Where_output_0:0: {} + /distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/attention/Softmax_output_0:0: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.2/attention/Transpose_3: + type: Matmul + input: + /distilbert/transformer/layer.2/attention/Softmax_output_0:0: {} + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0: {} + /distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: {} + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: {} + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0: {} + attr: + src1_perm: 2,0,3,1 + dst_perm: 1,3,0,2 + output_dtype: u8 + reshape: 768,-1 + /distilbert/transformer/layer.2/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0: {} + distilbert.transformer.layer.2.attention.out_lin.bias:0: {} + /distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: {} + /distilbert/transformer/layer.2/Add_output_0:0_min: {} + /distilbert/transformer/layer.2/Add_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/Add_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.2/sa_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.2/Add_output_0:0: {} + distilbert.transformer.layer.2.sa_layer_norm.weight:0: {} + distilbert.transformer.layer.2.sa_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.2/ffn/activation/Mul_1_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.2/ffn/activation/Mul_1: + type: InnerProduct + input: + /distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.2.ffn.lin1.bias:0: {} + /distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0: {} + attr: + append_op: gelu_tanh + output_dtype: u8 + /distilbert/transformer/layer.2/Add_1: + type: InnerProduct + input: + /distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0: {} + distilbert.transformer.layer.2.ffn.lin2.bias:0: {} + /distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: {} + /distilbert/transformer/layer.2/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/Add_1_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.2/output_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.2/Add_1_output_0:0: {} + distilbert.transformer.layer.2.output_layer_norm.weight:0: {} + distilbert.transformer.layer.2.output_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.3/attention/k_lin/Add_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.3/attention/k_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.3.attention.k_lin.bias:0: {} + /distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.3/attention/Reshape_1_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.3/attention/q_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.3.attention.q_lin.bias:0: {} + /distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.3/attention/Reshape_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.3/attention/v_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.3.attention.v_lin.bias:0: {} + /distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0: {} + attr: + output_dtype: s8 + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.3/attention/Where: + type: Matmul + input: + /distilbert/transformer/layer.3/attention/Reshape_output_0:0: {} + /distilbert/transformer/layer.3/attention/Reshape_1_output_0:0: {} + padding_sequence:0: {} + output: + /distilbert/transformer/layer.3/attention/Where_output_0:0: {} + attr: + src0_perm: 2,0,3,1 + src1_perm: 2,0,1,3 + output_scale: 0.125 + format_any: false + append_op: binary_add + /distilbert/transformer/layer.3/attention/Softmax: + type: Softmax + input: + /distilbert/transformer/layer.3/attention/Where_output_0:0: {} + /distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/attention/Softmax_output_0:0: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.3/attention/Transpose_3: + type: Matmul + input: + /distilbert/transformer/layer.3/attention/Softmax_output_0:0: {} + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0: {} + /distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: {} + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: {} + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0: {} + attr: + src1_perm: 2,0,3,1 + dst_perm: 1,3,0,2 + output_dtype: u8 + reshape: 768,-1 + /distilbert/transformer/layer.3/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0: {} + distilbert.transformer.layer.3.attention.out_lin.bias:0: {} + /distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: {} + /distilbert/transformer/layer.3/Add_output_0:0_min: {} + /distilbert/transformer/layer.3/Add_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/Add_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.3/sa_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.3/Add_output_0:0: {} + distilbert.transformer.layer.3.sa_layer_norm.weight:0: {} + distilbert.transformer.layer.3.sa_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.3/ffn/activation/Mul_1_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.3/ffn/activation/Mul_1: + type: InnerProduct + input: + /distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.3.ffn.lin1.bias:0: {} + /distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0: {} + attr: + append_op: gelu_tanh + output_dtype: u8 + /distilbert/transformer/layer.3/Add_1: + type: InnerProduct + input: + /distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0: {} + distilbert.transformer.layer.3.ffn.lin2.bias:0: {} + /distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: {} + /distilbert/transformer/layer.3/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/Add_1_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.3/output_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.3/Add_1_output_0:0: {} + distilbert.transformer.layer.3.output_layer_norm.weight:0: {} + distilbert.transformer.layer.3.output_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.4/attention/k_lin/Add_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.4/attention/k_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.4.attention.k_lin.bias:0: {} + /distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.4/attention/Reshape_1_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.4/attention/q_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.4.attention.q_lin.bias:0: {} + /distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.4/attention/Reshape_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.4/attention/v_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.4.attention.v_lin.bias:0: {} + /distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0: {} + attr: + output_dtype: s8 + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.4/attention/Where: + type: Matmul + input: + /distilbert/transformer/layer.4/attention/Reshape_output_0:0: {} + /distilbert/transformer/layer.4/attention/Reshape_1_output_0:0: {} + padding_sequence:0: {} + output: + /distilbert/transformer/layer.4/attention/Where_output_0:0: {} + attr: + src0_perm: 2,0,3,1 + src1_perm: 2,0,1,3 + output_scale: 0.125 + format_any: false + append_op: binary_add + /distilbert/transformer/layer.4/attention/Softmax: + type: Softmax + input: + /distilbert/transformer/layer.4/attention/Where_output_0:0: {} + /distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/attention/Softmax_output_0:0: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.4/attention/Transpose_3: + type: Matmul + input: + /distilbert/transformer/layer.4/attention/Softmax_output_0:0: {} + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0: {} + /distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: {} + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: {} + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0: {} + attr: + src1_perm: 2,0,3,1 + dst_perm: 1,3,0,2 + output_dtype: u8 + reshape: 768,-1 + /distilbert/transformer/layer.4/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0: {} + distilbert.transformer.layer.4.attention.out_lin.bias:0: {} + /distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: {} + /distilbert/transformer/layer.4/Add_output_0:0_min: {} + /distilbert/transformer/layer.4/Add_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/Add_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.4/sa_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.4/Add_output_0:0: {} + distilbert.transformer.layer.4.sa_layer_norm.weight:0: {} + distilbert.transformer.layer.4.sa_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.4/ffn/activation/Mul_1_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.4/ffn/activation/Mul_1: + type: InnerProduct + input: + /distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.4.ffn.lin1.bias:0: {} + /distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0: {} + attr: + append_op: gelu_tanh + output_dtype: u8 + /distilbert/transformer/layer.4/Add_1: + type: InnerProduct + input: + /distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0: {} + distilbert.transformer.layer.4.ffn.lin2.bias:0: {} + /distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: {} + /distilbert/transformer/layer.4/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/Add_1_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.4/output_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.4/Add_1_output_0:0: {} + distilbert.transformer.layer.4.output_layer_norm.weight:0: {} + distilbert.transformer.layer.4.output_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.5/attention/k_lin/Add_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.5/attention/k_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.5.attention.k_lin.bias:0: {} + /distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.5/attention/Reshape_1_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.5/attention/q_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.5.attention.q_lin.bias:0: {} + /distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.5/attention/Reshape_output_0:0: {} + attr: + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.5/attention/v_lin/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.5.attention.v_lin.bias:0: {} + /distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: {} + input_ids:0: {} + output: + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0: {} + attr: + output_dtype: s8 + reshape: 12,64,-1, -1 + reshape_dims: '0' + /distilbert/transformer/layer.5/attention/Where: + type: Matmul + input: + /distilbert/transformer/layer.5/attention/Reshape_output_0:0: {} + /distilbert/transformer/layer.5/attention/Reshape_1_output_0:0: {} + padding_sequence:0: {} + output: + /distilbert/transformer/layer.5/attention/Where_output_0:0: {} + attr: + src0_perm: 2,0,3,1 + src1_perm: 2,0,1,3 + output_scale: 0.125 + format_any: false + append_op: binary_add + /distilbert/transformer/layer.5/attention/Softmax: + type: Softmax + input: + /distilbert/transformer/layer.5/attention/Where_output_0:0: {} + /distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/attention/Softmax_output_0:0: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.5/attention/Transpose_3: + type: Matmul + input: + /distilbert/transformer/layer.5/attention/Softmax_output_0:0: {} + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0: {} + /distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: {} + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: {} + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0: {} + attr: + src1_perm: 2,0,3,1 + dst_perm: 1,3,0,2 + output_dtype: u8 + reshape: 768,-1 + /distilbert/transformer/layer.5/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0: {} + distilbert.transformer.layer.5.attention.out_lin.bias:0: {} + /distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: {} + /distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: {} + /distilbert/transformer/layer.5/Add_output_0:0_min: {} + /distilbert/transformer/layer.5/Add_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/Add_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.5/sa_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.5/Add_output_0:0: {} + distilbert.transformer.layer.5.sa_layer_norm.weight:0: {} + distilbert.transformer.layer.5.sa_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + transpose_mode: 1, 0 + /distilbert/transformer/layer.5/ffn/activation/Mul_1_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /distilbert/transformer/layer.5/ffn/activation/Mul_1: + type: InnerProduct + input: + /distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_quant: {} + distilbert.transformer.layer.5.ffn.lin1.bias:0: {} + /distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: {} + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0: {} + attr: + append_op: gelu_tanh + output_dtype: u8 + /distilbert/transformer/layer.5/Add_1: + type: InnerProduct + input: + /distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0: {} + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0: {} + distilbert.transformer.layer.5.ffn.lin2.bias:0: {} + /distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_min: {} + /distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_max: {} + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: {} + /distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: {} + /distilbert/transformer/layer.5/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.5/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/Add_1_output_0:0: {} + attr: + append_op: sum + /distilbert/transformer/layer.5/Add_1_Reorder_Recover: + type: Reorder + input: + /distilbert/transformer/layer.5/Add_1_output_0:0: {} + output: + /distilbert/transformer/layer.5/Add_1_output_0:0_recover: {} + attr: + src_perm: 0,1 + dst_perm: 1,0 + /distilbert/transformer/layer.5/output_layer_norm/Add_1: + type: LayerNorm + input: + /distilbert/transformer/layer.5/Add_1_output_0:0_recover: {} + distilbert.transformer.layer.5.output_layer_norm.weight:0: {} + distilbert.transformer.layer.5.output_layer_norm.bias:0: {} + output: + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0: {} + attr: + epsilon: 9.999999960041972e-13 + /qa_outputs/Add_quant_0: + type: Quantize + input: + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0: {} + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: {} + output: + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_quant: {} + attr: + output_dtype: u8 + /qa_outputs/Add: + type: InnerProduct + input: + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_quant: {} + /qa_outputs/Transpose_output_0_quantized:0: {} + qa_outputs.bias:0: {} + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: {} + /distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: {} + /qa_outputs/Transpose_output_0_quantized:0_min: {} + /qa_outputs/Transpose_output_0_quantized:0_max: {} + /qa_outputs/Add_output_0:0_min: {} + /qa_outputs/Add_output_0:0_max: {} + input_ids:0: {} + output: + logits: {} + attr: + src1_perm: 1,0 + reshape: -1,-1,2 + reshape_dims: 0,1 + output_data: + type: Output + input: + logits: {}