diff --git a/deepsocflow/c/runtime.h b/deepsocflow/c/runtime.h
index 7f959a4..a0764df 100644
--- a/deepsocflow/c/runtime.h
+++ b/deepsocflow/c/runtime.h
@@ -17,12 +17,12 @@ typedef float    f32;
 typedef double   f64;
 
 typedef const struct {
-  const u16  n, l, kw, coe, h, w, ci, co, w_kw2, t, p, cm, cm_p0, on, oh, ow, oc, ch, ph, cw, pw;
+  const u16  n, l, kw, coe, h, w, ci, co, w_kw2, t, p, cm, cm_p0, on, oh, ow, oc, ch, ph, cw, pw, pkh, psh, pkw, psw;
   const i32  xp_words, b_offset, w_bpt, w_bpt_p0, x_bpt, x_bpt_p0, o_words, o_bytes;
   const i8   ib_out, in_buffer_idx, out_buffer_idx, add_out_buffer_idx, add_in_buffer_idx;
   const i8   is_bias, is_pool, is_flatten, is_softmax;
   const i8   x_pad, b_val_shift, b_bias_shift, ca_nzero, ca_shift, ca_pl_scale, aa_nzero, aa_shift, aa_pl_scale, pa_nzero, pa_shift, pa_pl_scale, softmax_frac;
-  const i8   csh, csh_shift, pkh, psh, psh_shift, csw, csw_shift, pkw, psw, psw_shift, pool;
+  const i8   csh, csh_shift, psh_shift, csw, csw_shift, psw_shift, pool;
   const f32  softmax_max_f;
   const u64  header;
   const i32  debug_nhwc_words;
@@ -30,7 +30,7 @@ typedef const struct {
 
 typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t;
 
-#include "../../run/work/config_fw.h"
+#include "config_fw.h"
 
 #define X_BITS            (1 << X_BITS_L2)
 #define X_WORDS_PER_BYTE  (8 / X_BITS)
@@ -486,17 +486,17 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
               fclose(fp_raw);
 #endif
               set_config(p_config, A_DONE_READ + ocm_bank, 1);
-              debug_printf("-------- iw_kw2 0x%x done \n", iw_kw2);
+              debug_printf("%d-------- iw_kw2 %d done \n", ib, iw_kw2);
             } // iw_kw2
-            debug_printf("-------- il %x done\n", il);
+            debug_printf("%d-------- il %d done\n", ib, il);
           } // il
-          debug_printf("-------- in %x done\n", in);
+          debug_printf("%d-------- in %d done\n", ib, in);
         } // in
-        debug_printf("------ it %x done\n", it);
+        debug_printf("%d------ it %d done\n", ib, it);
       } // it
-      debug_printf("--- ip %x done\n", ip);
+      debug_printf("%d--- ip %d done\n", ib, ip);
     } // ip
-    debug_printf("- done bundle!! ib:%x\n", ib);
+    debug_printf("%d- done bundle!! ib:%d\n", ib, ib);
 
 #ifdef SIM
     char f_path_debug [1000];
diff --git a/deepsocflow/py/dataflow.py b/deepsocflow/py/dataflow.py
index 40c81ee..61c5799 100644
--- a/deepsocflow/py/dataflow.py
+++ b/deepsocflow/py/dataflow.py
@@ -309,16 +309,43 @@ def predict_bundle_performance(hw, r):
     clocks    = clocks_p0 + (r.CP-1)*clocks_p
     mem_bits  = mem_bits_p0 + (r.CP-1)*mem_bits_p
 
-    return clocks, mem_bits
+    operations = (r.XN * r.XH * r.XW * r.CI) * (r.KH * r.KW * r.CO)
+    utilization = operations / (hw.ROWS * hw.COLS * clocks)
+
+
+    return clocks, mem_bits, utilization
 
 
 def predict_model_performance(hw):
 
-    clocks_total = 0
+    d_out = {
+        'clocks_total': 0,
+        'mem_bytes_total': 0,
+        'utilization_all': [],
+        'clocks_all': [],
+        'mem_bytes_all': [],
+    }
     for b in BUNDLES:
-        clocks, mem_bits = predict_bundle_performance(hw=hw, r=b.r)
-        clocks_total += clocks
+        clocks, mem_bits, utilization = predict_bundle_performance(hw=hw, r=b.r)
+        d_out['clocks_total'] += clocks
+        d_out['mem_bytes_total'] += mem_bits/8
+
+        d_out['utilization_all'] += [utilization]
+        d_out['clocks_all'] += [clocks]
+        d_out['mem_bytes_all'] += [mem_bits/8]
+
+        print(f'---{b.ib}: util:{100*utilization:.2f} mem_mb:{mem_bits/1024**2:.2f} {b.r.XN=} {b.r.XH=} {b.r.XW=} {b.r.CI=} {b.r.CO=} {b.r.KH=} {b.r.KW=}')
+
+    d_out['seconds_per_batch'] = d_out['clocks_total'] / (hw.FREQ * 1e6)
+    d_out['frames_per_sec'] = hw.ROWS / d_out['seconds_per_batch']
+    d_out['ms_per_frame'] = 1000 / d_out['frames_per_sec']
+
+    with open('util.txt', 'w') as f:
+        for line in d_out['utilization_all']:
+            f.write(f"{line}\n")
+
+    with open('mem_bytes.txt', 'w') as f:
+        for line in d_out['mem_bytes_all']:
+            f.write(f"{line}\n")
 
-    time = clocks_total / (hw.FREQ * 1e6)
-    mem_bytes = mem_bits / 8
-    return time, mem_bytes
\ No newline at end of file
+    return d_out
\ No newline at end of file
diff --git a/deepsocflow/py/hardware.py b/deepsocflow/py/hardware.py
index ccaf029..c8d1f58 100644
--- a/deepsocflow/py/hardware.py
+++ b/deepsocflow/py/hardware.py
@@ -227,7 +227,8 @@ def simulate(self, SIM='verilator', SIM_PATH=''):
             print(cmd)
             assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0
         
-
+        exit()
+        
         print("\n\nSIMULATING...\n\n")
         start = time.time()
 
diff --git a/deepsocflow/py/xmodel.py b/deepsocflow/py/xmodel.py
index f0fbcfc..284f04f 100644
--- a/deepsocflow/py/xmodel.py
+++ b/deepsocflow/py/xmodel.py
@@ -47,7 +47,7 @@ def export_inference(model, hw):
     BUNDLES.clear()
         
     user_model = model.layers[1]
-    input_shape = (1, *model.inputs[0].shape[1:])
+    input_shape = (hw.ROWS, *model.inputs[0].shape[1:])
     x_keras = tf.random.uniform(input_shape)
     x_qtensor = user_model.input_quant_layer(x_keras)
     out_keras = model(x_keras)
@@ -144,6 +144,9 @@ def export_inference(model, hw):
         print(f'add_buffer_map:{add_buffer_map}')     
 
 
+    d_perf = predict_model_performance(hw=hw)
+    print(f"Predicted performance: {d_perf}")
+
     '''
     Write Runtime Headers
     '''
@@ -215,12 +218,12 @@ def export_inference(model, hw):
 
             out_type = 'float' if (ib == len(BUNDLES)-1 and b.softmax) else 'int32_t'
 
-            ch.write(f"   {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<4}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .on={b.r.ON:<3}, .oh={b.r.OH:<3}, .ow={b.r.OW:<3}, .oc={b.r.OC:<4}, .ch={b.r.CYH:<3}, .ph={b.r.PYH:<3}, .cw={b.r.CYW:<3}, .pw={b.r.PYW:<3}, ")
+            ch.write(f"   {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<4}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .on={b.r.ON:<3}, .oh={b.r.OH:<3}, .ow={b.r.OW:<3}, .oc={b.r.OC:<4}, .ch={b.r.CYH:<3}, .ph={b.r.PYH:<3}, .cw={b.r.CYW:<3}, .pw={b.r.PYW:<3}, .pkh={b.r.PKH:<3}, .psh={b.r.PSH:<3}, .pkw={b.r.PKW:<3}, .psw={b.r.PSW:<3}, ")
             ch.write(     f".xp_words={xp_words:<6}, .b_offset={b_words:<5}, .w_bpt={w_bpt:<5}, .w_bpt_p0={w_bpt_p0:<5}, .x_bpt={x_bpt:<8}, .x_bpt_p0={x_bpt_p0:<8}, .o_words={o_words_b:<8}, .o_bytes={o_bytes_b:<8}, ")
             ch.write(     f".ib_out={ib_out:<4}, .in_buffer_idx={in_buffer_idx:<3}, .out_buffer_idx={b.out_buffer_idx:<3}, .add_out_buffer_idx={add_out_buffer_idx:<2}, .add_in_buffer_idx={add_in_buffer_idx:<2}, ")
             ch.write(     f".is_bias={1*(b.core.b is not None):<3}, .is_flatten={1*(b.flatten is not None):<3}, .is_softmax={1*(b.softmax is not None):<3}, ")
             ch.write(     f".x_pad={b.r.X_PAD:<3}, .b_val_shift={b.core.bias_val_shift:<3}, .b_bias_shift={b.core.bias_b_shift:<3}, .ca_nzero={ca_nzero:<3}, .ca_shift={ca_shift:<3}, .ca_pl_scale={ca_pl_scale:<3}, .aa_nzero={aa_nzero:<3}, .aa_shift={aa_shift:<3}, .aa_pl_scale={aa_pl_scale:<3}, .pa_nzero={pa_nzero:<3}, .pa_shift={pa_shift:<3}, .pa_pl_scale={pa_pl_scale:<3}, .softmax_frac={b.softmax_frac:<3}, ")
-            ch.write(     f".csh={b.r.CSH:<3}, .csh_shift={b.r.CSH_SHIFT:<3}, .pkh={b.r.PKH:<3}, .psh={b.r.PSH:<3}, .psh_shift={b.r.PSH_SHIFT:<3}, .csw={b.r.CSW:<3}, .csw_shift={b.r.CSW_SHIFT:<3}, .pkw={b.r.PKW:<3}, .psw={b.r.PSW:<3}, .psw_shift={b.r.PSW_SHIFT:<3}, .pool={pool_type:<10}, ")
+            ch.write(     f".csh={b.r.CSH:<3}, .csh_shift={b.r.CSH_SHIFT:<3}, .psh_shift={b.r.PSH_SHIFT:<3}, .csw={b.r.CSW:<3}, .csw_shift={b.r.CSW_SHIFT:<3}, .psw_shift={b.r.PSW_SHIFT:<3}, .pool={pool_type:<10}, ")
             ch.write(     f".softmax_max_f={b.softmax_max_f:<15}, ")
             ch.write(     f".header={b.r.header:>23}u, ")
             ch.write(     f".debug_nhwc_words={b.oe_exp_nhwc.size:<9} }}")
@@ -328,11 +331,6 @@ def export_inference(model, hw):
 
 
 def verify_inference(model, hw, SIM, SIM_PATH):
-    
-    seconds, mem_bytes = predict_model_performance(hw=hw)
-    print(f"Predicted time on hardware: {1000*seconds:.5f} ms/frame")
-    print(f"Predicted fps: {1/seconds}")
-    print(f"Data movement (bytes): mem_bytes")
 
     '''
     RUN SIMULATION
diff --git a/deepsocflow/rtl/defines.svh b/deepsocflow/rtl/defines.svh
index 2514ace..7ad7936 100644
--- a/deepsocflow/rtl/defines.svh
+++ b/deepsocflow/rtl/defines.svh
@@ -1,4 +1,4 @@
-    `include "../../run/work/config_hw.svh"
+    `include "config_hw.svh"
 
     `define BITS_KW2  $clog2((`KW_MAX+1)/2)
     
diff --git a/resnet_50.py b/resnet_50.py
new file mode 100644
index 0000000..32911e7
--- /dev/null
+++ b/resnet_50.py
@@ -0,0 +1,129 @@
+import os
+import sys
+sys.path.append("../../")
+from deepsocflow import Bundle, Hardware, QModel, QInput
+
+'''
+0. Specify Hardware
+'''
+hw = Hardware (                          # Alternatively: hw = Hardware.from_json('hardware.json')
+        processing_elements = (7, 96)  , # (rows, columns) of multiply-add units
+        frequency_mhz       = 250      , #  
+        bits_input          = 4        , # bit width of input pixels and activations
+        bits_weights        = 4        , # bit width of weights
+        bits_sum            = 24       , # bit width of accumulator
+        bits_bias           = 16       , # bit width of bias
+        max_batch_size      = 64       , # 
+        max_channels_in     = 2048     , #
+        max_kernel_size     = 13       , #
+        max_image_size      = 512      , #
+        ram_weights_depth   = 512      , #
+        ram_edges_depth     = 524288   , #
+        axi_width           = 128      , #
+        target_cpu_int_bits = 32       , #
+        valid_prob          = 1      , # probability in which AXI-Stream s_valid signal should be toggled in simulation
+        ready_prob          = 1      , # probability in which AXI-Stream m_ready signal should be toggled in simulation
+        data_dir            = 'vectors', # directory to store generated test vectors
+     )
+hw.export() # Generates: config_hw.svh, config_hw.tcl, config_tb.svh, hardware.json
+hw.export_vivado_tcl(board='zcu104')
+
+'''
+1. Build Model 
+'''
+XN = 7
+input_shape = (XN,224,224,3) # (XN, XH, XW, CI)
+
+QINT_BITS = 0
+kq = f'quantized_bits({hw.K_BITS},{QINT_BITS},False,True,1)'
+bq = f'quantized_bits({hw.B_BITS},{QINT_BITS},False,True,1)'
+qr = f'quantized_relu({hw.X_BITS},{QINT_BITS},negative_slope=0)'    
+qb = f'quantized_bits({hw.X_BITS},{QINT_BITS},False,False,1)'       
+
+
+x = x_in = QInput(shape=input_shape[1:], batch_size=XN, hw=hw, int_bits=QINT_BITS, name='input')
+
+x  = Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':7, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr}, pool= {'type':'max', 'size':3, 'strides':2, 'padding':'same', 'act_str':qb} )(x) # conv1_conv
+x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv2_block1_0_conv
+x =      Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block1_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block1_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv2_block1_3_conv
+# conv2_block1_add
+x =      Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block2_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block2_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv2_block2_3_conv
+# conv2_block2_add
+x =      Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block3_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':64 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv2_block3_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':256, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv2_block3_3_conv
+# conv2_block3_add
+x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv3_block1_0_conv
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block1_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block1_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block1_3_conv
+# conv3_block1_add
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block2_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block2_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block2_3_conv
+# conv3_block2_add
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block3_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block3_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block3_3_conv
+# conv3_block3_add
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block4_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':128, 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv3_block4_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':512, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv3_block4_3_conv
+# conv3_block4_add
+x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv4_block1_0_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block1_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block1_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block1_3_conv
+# conv4_block1_add
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block2_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block2_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block2_3_conv
+# conv4_block2_add
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block3_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block3_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block3_3_conv
+# conv4_block3_add
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block4_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block4_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block4_3_conv
+# conv4_block4_add
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block5_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block5_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block5_3_conv
+# conv4_block5_add
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block6_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':256 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv4_block6_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':1024, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv4_block6_3_conv
+# conv4_block6_add
+x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb} )(x) # conv5_block1_0_conv
+x =      Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':1, 'strides':2, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block1_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block1_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv5_block1_3_conv
+# conv5_block1_add
+x =      Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block2_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block2_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr})(x, x1) # conv5_block2_3_conv
+# conv5_block2_add
+x =      Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block3_1_conv
+x =      Bundle( core= {'type':'conv' , 'filters':512 , 'kernel_size':3, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qr} )(x) # conv5_block3_2_conv
+x = x1 = Bundle( core= {'type':'conv' , 'filters':2048, 'kernel_size':1, 'strides':1, 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, add = {'act_str':qr}, pool= {'type':'avg', 'size':7, 'strides':7, 'padding':'same', 'act_str':qb}, flatten=True )(x, x1) # conv5_block3_3_conv
+# conv5_block3_add
+x =      Bundle( core= {'type':'dense', 'units'  :1000,                                                 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':qb}, softmax= True)(x)
+
+
+model = QModel(inputs=x_in.raw, outputs=x)
+
+for layer in model.layers:
+    layer.trainable = False
+
+model.compile()
+model.summary()
+
+
+model.export_inference(x=model.random_input, hw=hw)  # Runs forward pass in float & int, compares them. Generates: config_fw.h (C firmware), weights.bin, expected.bin
+(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.1/bin/") if os.name=='nt' else ('verilator', '')
+model.verify_inference(SIM, SIM_PATH)   # Runs SystemVerilog testbench with the model & weights, randomizing handshakes, testing with actual C firmware in simulation