|
{
|
|
"inputs": [
|
|
"images"
|
|
],
|
|
"modules": {
|
|
"avg_pool": {
|
|
"config": {
|
|
"args": {
|
|
"output_size": [
|
|
null,
|
|
1
|
|
]
|
|
}
|
|
},
|
|
"type": "torch.nn.AdaptiveAvgPool2d"
|
|
},
|
|
"feature_extraction": {
|
|
"config": {
|
|
"args": {
|
|
"input_channel": 1,
|
|
"output_channel": 512,
|
|
"variant": "DTRB"
|
|
}
|
|
},
|
|
"type": "DeepTextRecognition.ResNetModel"
|
|
},
|
|
"max": {
|
|
"config": {
|
|
"args": {
|
|
"dim": 2
|
|
}
|
|
},
|
|
"type": "torch.max"
|
|
},
|
|
"permute": {
|
|
"config": {
|
|
"args": {
|
|
"dims": [
|
|
0,
|
|
3,
|
|
1,
|
|
2
|
|
]
|
|
}
|
|
},
|
|
"type": "torch.permute"
|
|
},
|
|
"prediction": {
|
|
"config": {
|
|
"args": {
|
|
"hidden_size": 256,
|
|
"input_size": 256,
|
|
"num_classes": 96
|
|
}
|
|
},
|
|
"type": "DeepTextRecognition.AttentionModel"
|
|
},
|
|
"processing": {
|
|
"config": {
|
|
"args": {
|
|
"channels_size": 1,
|
|
"image_size": [
|
|
32,
|
|
100
|
|
],
|
|
"padding": "left"
|
|
}
|
|
},
|
|
"type": "DeepTextRecognition.ImageProcessor"
|
|
},
|
|
"sequence_modeling": {
|
|
"config": {
|
|
"args": {
|
|
"hidden_sizes": [
|
|
256,
|
|
256
|
|
],
|
|
"input_size": 512,
|
|
"output_size": 256
|
|
}
|
|
},
|
|
"type": "DeepTextRecognition.BiLSTMModel"
|
|
},
|
|
"squeeze": {
|
|
"config": {
|
|
"args": {
|
|
"dim": 3
|
|
}
|
|
},
|
|
"type": "torch.squeeze"
|
|
},
|
|
"tokenizer": {
|
|
"config": {
|
|
"args": {
|
|
"characters": [
|
|
"0",
|
|
"1",
|
|
"2",
|
|
"3",
|
|
"4",
|
|
"5",
|
|
"6",
|
|
"7",
|
|
"8",
|
|
"9",
|
|
"a",
|
|
"b",
|
|
"c",
|
|
"d",
|
|
"e",
|
|
"f",
|
|
"g",
|
|
"h",
|
|
"i",
|
|
"j",
|
|
"k",
|
|
"l",
|
|
"m",
|
|
"n",
|
|
"o",
|
|
"p",
|
|
"q",
|
|
"r",
|
|
"s",
|
|
"t",
|
|
"u",
|
|
"v",
|
|
"w",
|
|
"x",
|
|
"y",
|
|
"z",
|
|
"A",
|
|
"B",
|
|
"C",
|
|
"D",
|
|
"E",
|
|
"F",
|
|
"G",
|
|
"H",
|
|
"I",
|
|
"J",
|
|
"K",
|
|
"L",
|
|
"M",
|
|
"N",
|
|
"O",
|
|
"P",
|
|
"Q",
|
|
"R",
|
|
"S",
|
|
"T",
|
|
"U",
|
|
"V",
|
|
"W",
|
|
"X",
|
|
"Y",
|
|
"Z",
|
|
"!",
|
|
"\"",
|
|
"#",
|
|
"$",
|
|
"%",
|
|
"&",
|
|
"'",
|
|
"(",
|
|
")",
|
|
"*",
|
|
"+",
|
|
",",
|
|
"-",
|
|
".",
|
|
"/",
|
|
":",
|
|
";",
|
|
"<",
|
|
"=",
|
|
">",
|
|
"?",
|
|
"@",
|
|
"[",
|
|
"]",
|
|
"^",
|
|
"_",
|
|
"`",
|
|
"{",
|
|
"|",
|
|
"}",
|
|
"~"
|
|
],
|
|
"max_length": 25
|
|
}
|
|
},
|
|
"type": "DeepTextRecognition.AttentionTokenizer"
|
|
},
|
|
"transformation": {
|
|
"config": {
|
|
"args": {
|
|
"F": 20,
|
|
"I_channel_num": 1,
|
|
"I_r_size": [
|
|
32,
|
|
100
|
|
],
|
|
"I_size": [
|
|
32,
|
|
100
|
|
]
|
|
}
|
|
},
|
|
"type": "DeepTextRecognition.TPSModel"
|
|
}
|
|
},
|
|
"order": [
|
|
"processing",
|
|
"transformation",
|
|
"feature_extraction",
|
|
"permute",
|
|
"avg_pool",
|
|
"squeeze",
|
|
"sequence_modeling",
|
|
"prediction",
|
|
"max",
|
|
"tokenizer"
|
|
],
|
|
"outputs": [
|
|
"tokenizer:labels"
|
|
],
|
|
"routing": {
|
|
"avg_pool": {
|
|
"inputs": [
|
|
"permute:permuted_features"
|
|
],
|
|
"outputs": [
|
|
"avg_pool:pooled_features"
|
|
]
|
|
},
|
|
"feature_extraction": {
|
|
"inputs": [
|
|
"transformation:transformed_images"
|
|
],
|
|
"outputs": [
|
|
"feature_extraction:extracted_features"
|
|
]
|
|
},
|
|
"max": {
|
|
"inputs": [
|
|
"prediction:predictions"
|
|
],
|
|
"outputs": [
|
|
"max:none",
|
|
"max:predictions"
|
|
]
|
|
},
|
|
"permute": {
|
|
"inputs": [
|
|
"feature_extraction:extracted_features"
|
|
],
|
|
"outputs": [
|
|
"permute:permuted_features"
|
|
]
|
|
},
|
|
"prediction": {
|
|
"inputs": [
|
|
"sequence_modeling:modeled_features"
|
|
],
|
|
"outputs": [
|
|
"prediction:predictions"
|
|
]
|
|
},
|
|
"processing": {
|
|
"inputs": [
|
|
"images"
|
|
],
|
|
"outputs": [
|
|
"processing:processed_images"
|
|
]
|
|
},
|
|
"sequence_modeling": {
|
|
"inputs": [
|
|
"squeeze:squeezed_features"
|
|
],
|
|
"outputs": [
|
|
"sequence_modeling:modeled_features"
|
|
]
|
|
},
|
|
"squeeze": {
|
|
"inputs": [
|
|
"avg_pool:pooled_features"
|
|
],
|
|
"outputs": [
|
|
"squeeze:squeezed_features"
|
|
]
|
|
},
|
|
"tokenizer": {
|
|
"inputs": [
|
|
"max:predictions"
|
|
],
|
|
"outputs": [
|
|
"tokenizer:labels"
|
|
]
|
|
},
|
|
"transformation": {
|
|
"inputs": [
|
|
"processing:processed_images"
|
|
],
|
|
"outputs": [
|
|
"transformation:transformed_images"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|