| #map = affine_map<(d0, d1) -> (d1)>
|
| #map1 = affine_map<(d0, d1) -> (d0, d1)>
|
| module attributes {tf_saved_model.semantics} {
|
| memref.global "private" constant @__constant_4xi32_0 : memref<4xi32> = dense<0>
|
| memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1362807777, 1376833372, 1373258503, 1373287303]>
|
| memref.global "private" constant @__constant_4xi8 : memref<4xi8> = dense<41>
|
| memref.global "private" constant @__constant_64x4xi8 : memref<64x4xi8> = dense<"0x04A2815B08E58EC154A3D6FE75FB0C7FB142950272FA277E8C4FE5EFA18F6916784914FBAC469376D091D98A6D51D61FEDA70B3F37E677535F95AAD84D34584FBD033055E6899990F41BC0766A5FBD4AA662E9E9F38683379E8FF2A45BD36CDA3F54E54601DF38033B99D66F25E772D739BFDEDC8B5BD4D58D6EAA6681C1955A46A838E2899173C3CB934DF4DB7E4AA995B001D910BEF6F6BA76D1E178679325FC08FD21DD94664F6AB534CA5988CF84F68604520B1B0FFCB8CDC0F71DB57B3391A4E2DFF108E2DA15C1AE82A07F607B143409194DB001265AD26405AF43EC21FD98352A5CADBB9DB3D160F708B170D734795B4F4E3B60AFF6F0210FC654A184">
|
| func.func @main(%arg0: memref<16x64xi8> {ml_program.identifier = "serving_default_keras_tensor:0", tf_saved_model.index_path = ["keras_tensor"]}, %arg1: memref<16x4xi8> {ml_program.identifier = "StatefulPartitionedCall_1:0", tf_saved_model.index_path = ["output_0"]}) attributes {tf_saved_model.exported_names = ["serving_default"]} {
|
| %c-128_i32 = arith.constant -128 : i32
|
| %c0_i32 = arith.constant 0 : i32
|
| %c55_i32 = arith.constant 55 : i32
|
| %c127_i32 = arith.constant 127 : i32
|
| %c1_i64 = arith.constant 1 : i64
|
| %c31_i32 = arith.constant 31 : i32
|
| %c1073741824_i64 = arith.constant 1073741824 : i64
|
| %c-1073741824_i64 = arith.constant -1073741824 : i64
|
| %c-128_i16 = arith.constant -128 : i16
|
| %c0 = arith.constant 0 : index
|
| %c8 = arith.constant 8 : index
|
| %c64 = arith.constant 64 : index
|
| %c16 = arith.constant 16 : index
|
| %0 = memref.get_global @__constant_64x4xi8 : memref<64x4xi8>
|
| %1 = memref.get_global @__constant_4xi8 : memref<4xi8>
|
| %2 = memref.get_global @__constant_4xi32 : memref<4xi32>
|
| %3 = memref.get_global @__constant_4xi32_0 : memref<4xi32>
|
| %alloca = memref.alloca() : memref<16x4xi32>
|
| linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%3 : memref<4xi32>) outs(%alloca : memref<16x4xi32>) {
|
| ^bb0(%in: i32, %out: i32):
|
| linalg.yield %in : i32
|
| }
|
| %alloca_0 = memref.alloca() : memref<16x64xi16>
|
| linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0 : memref<16x64xi8>) outs(%alloca_0 : memref<16x64xi16>) {
|
| ^bb0(%in: i8, %out: i16):
|
| %4 = arith.extsi %in : i8 to i16
|
| %5 = arith.subi %4, %c-128_i16 : i16
|
| linalg.yield %5 : i16
|
| }
|
|
|
|
|
| scf.for %arg2 = %c0 to %c16 step %c8 {
|
| %subview = memref.subview %alloca[%arg2, 0] [8, 4] [1, 1] : memref<16x4xi32> to memref<8x4xi32, strided<[4, 1], offset: ?>>
|
| scf.for %arg3 = %c0 to %c64 step %c16 {
|
| %subview_1 = memref.subview %alloca_0[%arg2, %arg3] [8, 16] [1, 1] : memref<16x64xi16> to memref<8x16xi16, strided<[64, 1], offset: ?>>
|
| %subview_2 = memref.subview %0[%arg3, 0] [16, 4] [1, 1] : memref<64x4xi8> to memref<16x4xi8, strided<[4, 1], offset: ?>>
|
| linalg.matmul {matmul_ref_id = 0 : i32} ins(%subview_1, %subview_2 : memref<8x16xi16, strided<[64, 1], offset: ?>>, memref<16x4xi8, strided<[4, 1], offset: ?>>) outs(%subview : memref<8x4xi32, strided<[4, 1], offset: ?>>)
|
| memref.copy %subview, %subview : memref<8x4xi32, strided<[4, 1], offset: ?>> to memref<8x4xi32, strided<[4, 1], offset: ?>>
|
| } {id = "matmul_id_0_k_loop"}
|
| } {id = "matmul_id_0_m_loop"}
|
|
|
|
|
| linalg.generic {indexing_maps = [#map1, #map, #map, #map1], iterator_types = ["parallel", "parallel"]} ins(%alloca, %2, %1 : memref<16x4xi32>, memref<4xi32>, memref<4xi8>) outs(%arg1 : memref<16x4xi8>) {
|
| ^bb0(%in: i32, %in_1: i32, %in_2: i8, %out: i8):
|
| %4 = arith.extui %in_2 : i8 to i32
|
| %5 = arith.extsi %in : i32 to i64
|
| %6 = arith.extsi %in_1 : i32 to i64
|
| %7 = arith.muli %5, %6 : i64
|
| %8 = arith.extui %in_2 : i8 to i64
|
| %9 = arith.shli %c1_i64, %8 : i64
|
| %10 = arith.shrui %9, %c1_i64 : i64
|
| %11 = arith.addi %7, %10 : i64
|
| %12 = arith.cmpi sge, %in, %c0_i32 : i32
|
| %13 = arith.select %12, %c1073741824_i64, %c-1073741824_i64 : i64
|
| %14 = arith.addi %13, %11 : i64
|
| %15 = arith.cmpi sgt, %4, %c31_i32 : i32
|
| %16 = arith.select %15, %14, %11 : i64
|
| %17 = arith.shrsi %16, %8 : i64
|
| %18 = arith.trunci %17 : i64 to i32
|
| %19 = arith.addi %18, %c55_i32 : i32
|
| %20 = arith.maxsi %19, %c-128_i32 : i32
|
| %21 = arith.minsi %20, %c127_i32 : i32
|
| %22 = arith.trunci %21 : i32 to i8
|
| linalg.yield %22 : i8
|
| }
|
| return
|
| }
|
| }
|