diff --git a/deepsocflow/c/runtime.h b/deepsocflow/c/runtime.h index 7f959a4..a0764df 100644 --- a/deepsocflow/c/runtime.h +++ b/deepsocflow/c/runtime.h @@ -17,12 +17,12 @@ typedef float f32; typedef double f64; typedef const struct { - const u16 n, l, kw, coe, h, w, ci, co, w_kw2, t, p, cm, cm_p0, on, oh, ow, oc, ch, ph, cw, pw; + const u16 n, l, kw, coe, h, w, ci, co, w_kw2, t, p, cm, cm_p0, on, oh, ow, oc, ch, ph, cw, pw, pkh, psh, pkw, psw; const i32 xp_words, b_offset, w_bpt, w_bpt_p0, x_bpt, x_bpt_p0, o_words, o_bytes; const i8 ib_out, in_buffer_idx, out_buffer_idx, add_out_buffer_idx, add_in_buffer_idx; const i8 is_bias, is_pool, is_flatten, is_softmax; const i8 x_pad, b_val_shift, b_bias_shift, ca_nzero, ca_shift, ca_pl_scale, aa_nzero, aa_shift, aa_pl_scale, pa_nzero, pa_shift, pa_pl_scale, softmax_frac; - const i8 csh, csh_shift, pkh, psh, psh_shift, csw, csw_shift, pkw, psw, psw_shift, pool; + const i8 csh, csh_shift, psh_shift, csw, csw_shift, psw_shift, pool; const f32 softmax_max_f; const u64 header; const i32 debug_nhwc_words; @@ -30,7 +30,7 @@ typedef const struct { typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t; -#include "../../run/work/config_fw.h" +#include "config_fw.h" #define X_BITS (1 << X_BITS_L2) #define X_WORDS_PER_BYTE (8 / X_BITS) @@ -486,17 +486,17 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) { fclose(fp_raw); #endif set_config(p_config, A_DONE_READ + ocm_bank, 1); - debug_printf("-------- iw_kw2 0x%x done \n", iw_kw2); + debug_printf("%d-------- iw_kw2 %d done \n", ib, iw_kw2); } // iw_kw2 - debug_printf("-------- il %x done\n", il); + debug_printf("%d-------- il %d done\n", ib, il); } // il - debug_printf("-------- in %x done\n", in); + debug_printf("%d-------- in %d done\n", ib, in); } // in - debug_printf("------ it %x done\n", it); + debug_printf("%d------ it %d done\n", ib, it); } // it - debug_printf("--- ip %x done\n", ip); + debug_printf("%d--- ip %d done\n", ib, ip); } // ip - debug_printf("- done bundle!! ib:%x\n", ib); + debug_printf("%d- done bundle!! ib:%d\n", ib, ib); #ifdef SIM char f_path_debug [1000]; diff --git a/deepsocflow/py/dataflow.py b/deepsocflow/py/dataflow.py index 40c81ee..61c5799 100644 --- a/deepsocflow/py/dataflow.py +++ b/deepsocflow/py/dataflow.py @@ -309,16 +309,43 @@ def predict_bundle_performance(hw, r): clocks = clocks_p0 + (r.CP-1)*clocks_p mem_bits = mem_bits_p0 + (r.CP-1)*mem_bits_p - return clocks, mem_bits + operations = (r.XN * r.XH * r.XW * r.CI) * (r.KH * r.KW * r.CO) + utilization = operations / (hw.ROWS * hw.COLS * clocks) + + + return clocks, mem_bits, utilization def predict_model_performance(hw): - clocks_total = 0 + d_out = { + 'clocks_total': 0, + 'mem_bytes_total': 0, + 'utilization_all': [], + 'clocks_all': [], + 'mem_bytes_all': [], + } for b in BUNDLES: - clocks, mem_bits = predict_bundle_performance(hw=hw, r=b.r) - clocks_total += clocks + clocks, mem_bits, utilization = predict_bundle_performance(hw=hw, r=b.r) + d_out['clocks_total'] += clocks + d_out['mem_bytes_total'] += mem_bits/8 + + d_out['utilization_all'] += [utilization] + d_out['clocks_all'] += [clocks] + d_out['mem_bytes_all'] += [mem_bits/8] + + print(f'---{b.ib}: util:{100*utilization:.2f} mem_mb:{mem_bits/1024**2:.2f} {b.r.XN=} {b.r.XH=} {b.r.XW=} {b.r.CI=} {b.r.CO=} {b.r.KH=} {b.r.KW=}') + + d_out['seconds_per_batch'] = d_out['clocks_total'] / (hw.FREQ * 1e6) + d_out['frames_per_sec'] = hw.ROWS / d_out['seconds_per_batch'] + d_out['ms_per_frame'] = 1000 / d_out['frames_per_sec'] + + with open('util.txt', 'w') as f: + for line in d_out['utilization_all']: + f.write(f"{line}\n") + + with open('mem_bytes.txt', 'w') as f: + for line in d_out['mem_bytes_all']: + f.write(f"{line}\n") - time = clocks_total / (hw.FREQ * 1e6) - mem_bytes = mem_bits / 8 - return time, mem_bytes \ No newline at end of file + return d_out \ No newline at end of file diff --git a/deepsocflow/py/hardware.py b/deepsocflow/py/hardware.py index ccaf029..c8d1f58 100644 --- a/deepsocflow/py/hardware.py +++ b/deepsocflow/py/hardware.py @@ -227,7 +227,8 @@ def simulate(self, SIM='verilator', SIM_PATH=''): print(cmd) assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0 - + exit() + print("\n\nSIMULATING...\n\n") start = time.time() diff --git a/deepsocflow/py/xmodel.py b/deepsocflow/py/xmodel.py index f0fbcfc..284f04f 100644 --- a/deepsocflow/py/xmodel.py +++ b/deepsocflow/py/xmodel.py @@ -47,7 +47,7 @@ def export_inference(model, hw): BUNDLES.clear() user_model = model.layers[1] - input_shape = (1, *model.inputs[0].shape[1:]) + input_shape = (hw.ROWS, *model.inputs[0].shape[1:]) x_keras = tf.random.uniform(input_shape) x_qtensor = user_model.input_quant_layer(x_keras) out_keras = model(x_keras) @@ -144,6 +144,9 @@ def export_inference(model, hw): print(f'add_buffer_map:{add_buffer_map}') + d_perf = predict_model_performance(hw=hw) + print(f"Predicted performance: {d_perf}") + ''' Write Runtime Headers ''' @@ -215,12 +218,12 @@ def export_inference(model, hw): out_type = 'float' if (ib == len(BUNDLES)-1 and b.softmax) else 'int32_t' - ch.write(f" {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<4}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .on={b.r.ON:<3}, .oh={b.r.OH:<3}, .ow={b.r.OW:<3}, .oc={b.r.OC:<4}, .ch={b.r.CYH:<3}, .ph={b.r.PYH:<3}, .cw={b.r.CYW:<3}, .pw={b.r.PYW:<3}, ") + ch.write(f" {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<4}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .on={b.r.ON:<3}, .oh={b.r.OH:<3}, .ow={b.r.OW:<3}, .oc={b.r.OC:<4}, .ch={b.r.CYH:<3}, .ph={b.r.PYH:<3}, .cw={b.r.CYW:<3}, .pw={b.r.PYW:<3}, .pkh={b.r.PKH:<3}, .psh={b.r.PSH:<3}, .pkw={b.r.PKW:<3}, .psw={b.r.PSW:<3}, ") ch.write( f".xp_words={xp_words:<6}, .b_offset={b_words:<5}, .w_bpt={w_bpt:<5}, .w_bpt_p0={w_bpt_p0:<5}, .x_bpt={x_bpt:<8}, .x_bpt_p0={x_bpt_p0:<8}, .o_words={o_words_b:<8}, .o_bytes={o_bytes_b:<8}, ") ch.write( f".ib_out={ib_out:<4}, .in_buffer_idx={in_buffer_idx:<3}, .out_buffer_idx={b.out_buffer_idx:<3}, .add_out_buffer_idx={add_out_buffer_idx:<2}, .add_in_buffer_idx={add_in_buffer_idx:<2}, ") ch.write( f".is_bias={1*(b.core.b is not None):<3}, .is_flatten={1*(b.flatten is not None):<3}, .is_softmax={1*(b.softmax is not None):<3}, ") ch.write( f".x_pad={b.r.X_PAD:<3}, .b_val_shift={b.core.bias_val_shift:<3}, .b_bias_shift={b.core.bias_b_shift:<3}, .ca_nzero={ca_nzero:<3}, .ca_shift={ca_shift:<3}, .ca_pl_scale={ca_pl_scale:<3}, .aa_nzero={aa_nzero:<3}, .aa_shift={aa_shift:<3}, .aa_pl_scale={aa_pl_scale:<3}, .pa_nzero={pa_nzero:<3}, .pa_shift={pa_shift:<3}, .pa_pl_scale={pa_pl_scale:<3}, .softmax_frac={b.softmax_frac:<3}, ") - ch.write( f".csh={b.r.CSH:<3}, .csh_shift={b.r.CSH_SHIFT:<3}, .pkh={b.r.PKH:<3}, .psh={b.r.PSH:<3}, .psh_shift={b.r.PSH_SHIFT:<3}, .csw={b.r.CSW:<3}, .csw_shift={b.r.CSW_SHIFT:<3}, .pkw={b.r.PKW:<3}, .psw={b.r.PSW:<3}, .psw_shift={b.r.PSW_SHIFT:<3}, .pool={pool_type:<10}, ") + ch.write( f".csh={b.r.CSH:<3}, .csh_shift={b.r.CSH_SHIFT:<3}, .psh_shift={b.r.PSH_SHIFT:<3}, .csw={b.r.CSW:<3}, .csw_shift={b.r.CSW_SHIFT:<3}, .psw_shift={b.r.PSW_SHIFT:<3}, .pool={pool_type:<10}, ") ch.write( f".softmax_max_f={b.softmax_max_f:<15}, ") ch.write( f".header={b.r.header:>23}u, ") ch.write( f".debug_nhwc_words={b.oe_exp_nhwc.size:<9} }}") @@ -328,11 +331,6 @@ def export_inference(model, hw): def verify_inference(model, hw, SIM, SIM_PATH): - - seconds, mem_bytes = predict_model_performance(hw=hw) - print(f"Predicted time on hardware: {1000*seconds:.5f} ms/frame") - print(f"Predicted fps: {1/seconds}") - print(f"Data movement (bytes): mem_bytes") ''' RUN SIMULATION diff --git a/deepsocflow/rtl/defines.svh b/deepsocflow/rtl/defines.svh index 2514ace..7ad7936 100644 --- a/deepsocflow/rtl/defines.svh +++ b/deepsocflow/rtl/defines.svh @@ -1,4 +1,4 @@ - `include "../../run/work/config_hw.svh" + `include "config_hw.svh" `define BITS_KW2 $clog2((`KW_MAX+1)/2) diff --git a/resnet_50.py b/resnet_50.py new file mode 100644 index 0000000..32911e7 --- /dev/null +++ b/resnet_50.py @@ -0,0 +1,129 @@ +import os +import sys +sys.path.append("../../") +from deepsocflow import Bundle, Hardware, QModel, QInput + +''' +0. Specify Hardware +''' +hw = Hardware ( # Alternatively: hw = Hardware.from_json('hardware.json') + processing_elements = (7, 96) , # (rows, columns) of multiply-add units + frequency_mhz = 250 , # + bits_input = 4 , # bit width of input pixels and activations + bits_weights = 4 , # bit width of weights + bits_sum = 24 , # bit width of accumulator + bits_bias = 16 , # bit width of bias + max_batch_size = 64 , # + max_channels_in = 2048 , # + max_kernel_size = 13 , # + max_image_size = 512 , # + ram_weights_depth = 512 , # + ram_edges_depth = 524288 , # + axi_width = 128 , # + target_cpu_int_bits = 32 , # + valid_prob = 1 , # probability in which AXI-Stream s_valid signal should be toggled in simulation + ready_prob = 1 , # probability in which AXI-Stream m_ready signal should be toggled in simulation + data_dir = 'vectors', # directory to store generated test vectors + ) +hw.export() # Generates: config_hw.svh, config_hw.tcl, config_tb.svh, hardware.json +hw.export_vivado_tcl(board='zcu104') + +''' +1. Build Model +''' +XN = 7 +input_shape = (XN,224,224,3) # (XN, XH, XW, CI) + +QINT_BITS = 0 +kq = f'quantized_bits({hw.K_BITS},{QINT_BITS},False,True,1)' +bq = f'quantized_bits({hw.B_BITS},{QINT_BITS},False,True,1)' +qr = f'quantized_relu({hw.X_BITS},{QINT_BITS},negative_slope=0)' +qb = f'quantized_bits({hw.X_BITS},{QINT_BITS},False,False,1)' + + +x = x_in = QInput(shape=input_shape[1:], batch_size=XN, hw=hw, int_bits=QINT_BITS, name='input') + +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':7, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr}, pool= {'type':'max', 'size':3, 'strides':2, 'padding':'same', 'act_str':qb} )(x) # conv1_conv +x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv2_block1_0_conv +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block1_1_conv +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block1_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv2_block1_3_conv +# conv2_block1_add +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block2_1_conv +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block2_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv2_block2_3_conv +# conv2_block2_add +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block3_1_conv +x = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block3_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv2_block3_3_conv +# conv2_block3_add +x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv3_block1_0_conv +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block1_1_conv +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block1_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block1_3_conv +# conv3_block1_add +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block2_1_conv +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block2_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block2_3_conv +# conv3_block2_add +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block3_1_conv +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block3_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block3_3_conv +# conv3_block3_add +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block4_1_conv +x = Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block4_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block4_3_conv +# conv3_block4_add +x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv4_block1_0_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block1_1_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block1_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block1_3_conv +# conv4_block1_add +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block2_1_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block2_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block2_3_conv +# conv4_block2_add +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block3_1_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block3_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block3_3_conv +# conv4_block3_add +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block4_1_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block4_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block4_3_conv +# conv4_block4_add +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block5_1_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block5_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block5_3_conv +# conv4_block5_add +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block6_1_conv +x = Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block6_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block6_3_conv +# conv4_block6_add +x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv5_block1_0_conv +x = Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block1_1_conv +x = Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block1_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv5_block1_3_conv +# conv5_block1_add +x = Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block2_1_conv +x = Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block2_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv5_block2_3_conv +# conv5_block2_add +x = Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block3_1_conv +x = Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block3_2_conv +x = x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr}, pool= {'type':'avg', 'size':7, 'strides':7, 'padding':'same', 'act_str':qb}, flatten=True )(x, x1) # conv5_block3_3_conv +# conv5_block3_add +x = Bundle( core= {'type':'dense', 'units' :1000, 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, softmax= True)(x) + + +model = QModel(inputs=x_in.raw, outputs=x) + +for layer in model.layers: + layer.trainable = False + +model.compile() +model.summary() + + +model.export_inference(x=model.random_input, hw=hw) # Runs forward pass in float & int, compares them. Generates: config_fw.h (C firmware), weights.bin, expected.bin +(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.1/bin/") if os.name=='nt' else ('verilator', '') +model.verify_inference(SIM, SIM_PATH) # Runs SystemVerilog testbench with the model & weights, randomizing handshakes, testing with actual C firmware in simulation