Skip to content

Commit

Permalink
Export performance
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Aug 1, 2024
1 parent 3e3bda2 commit 25c6d9b
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 26 deletions.
18 changes: 9 additions & 9 deletions deepsocflow/c/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@ typedef float f32;
typedef double f64;

typedef const struct {
const u16 n, l, kw, coe, h, w, ci, co, w_kw2, t, p, cm, cm_p0, on, oh, ow, oc, ch, ph, cw, pw;
const u16 n, l, kw, coe, h, w, ci, co, w_kw2, t, p, cm, cm_p0, on, oh, ow, oc, ch, ph, cw, pw, pkh, psh, pkw, psw;
const i32 xp_words, b_offset, w_bpt, w_bpt_p0, x_bpt, x_bpt_p0, o_words, o_bytes;
const i8 ib_out, in_buffer_idx, out_buffer_idx, add_out_buffer_idx, add_in_buffer_idx;
const i8 is_bias, is_pool, is_flatten, is_softmax;
const i8 x_pad, b_val_shift, b_bias_shift, ca_nzero, ca_shift, ca_pl_scale, aa_nzero, aa_shift, aa_pl_scale, pa_nzero, pa_shift, pa_pl_scale, softmax_frac;
const i8 csh, csh_shift, pkh, psh, psh_shift, csw, csw_shift, pkw, psw, psw_shift, pool;
const i8 csh, csh_shift, psh_shift, csw, csw_shift, psw_shift, pool;
const f32 softmax_max_f;
const u64 header;
const i32 debug_nhwc_words;
} Bundle_t;

typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t;

#include "../../run/work/config_fw.h"
#include "config_fw.h"

#define X_BITS (1 << X_BITS_L2)
#define X_WORDS_PER_BYTE (8 / X_BITS)
Expand Down Expand Up @@ -486,17 +486,17 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
fclose(fp_raw);
#endif
set_config(p_config, A_DONE_READ + ocm_bank, 1);
debug_printf("-------- iw_kw2 0x%x done \n", iw_kw2);
debug_printf("%d-------- iw_kw2 %d done \n", ib, iw_kw2);
} // iw_kw2
debug_printf("-------- il %x done\n", il);
debug_printf("%d-------- il %d done\n", ib, il);
} // il
debug_printf("-------- in %x done\n", in);
debug_printf("%d-------- in %d done\n", ib, in);
} // in
debug_printf("------ it %x done\n", it);
debug_printf("%d------ it %d done\n", ib, it);
} // it
debug_printf("--- ip %x done\n", ip);
debug_printf("%d--- ip %d done\n", ib, ip);
} // ip
debug_printf("- done bundle!! ib:%x\n", ib);
debug_printf("%d- done bundle!! ib:%d\n", ib, ib);

#ifdef SIM
char f_path_debug [1000];
Expand Down
41 changes: 34 additions & 7 deletions deepsocflow/py/dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,16 +309,43 @@ def predict_bundle_performance(hw, r):
clocks = clocks_p0 + (r.CP-1)*clocks_p
mem_bits = mem_bits_p0 + (r.CP-1)*mem_bits_p

return clocks, mem_bits
operations = (r.XN * r.XH * r.XW * r.CI) * (r.KH * r.KW * r.CO)
utilization = operations / (hw.ROWS * hw.COLS * clocks)


return clocks, mem_bits, utilization


def predict_model_performance(hw):

clocks_total = 0
d_out = {
'clocks_total': 0,
'mem_bytes_total': 0,
'utilization_all': [],
'clocks_all': [],
'mem_bytes_all': [],
}
for b in BUNDLES:
clocks, mem_bits = predict_bundle_performance(hw=hw, r=b.r)
clocks_total += clocks
clocks, mem_bits, utilization = predict_bundle_performance(hw=hw, r=b.r)
d_out['clocks_total'] += clocks
d_out['mem_bytes_total'] += mem_bits/8

d_out['utilization_all'] += [utilization]
d_out['clocks_all'] += [clocks]
d_out['mem_bytes_all'] += [mem_bits/8]

print(f'---{b.ib}: util:{100*utilization:.2f} mem_mb:{mem_bits/1024**2:.2f} {b.r.XN=} {b.r.XH=} {b.r.XW=} {b.r.CI=} {b.r.CO=} {b.r.KH=} {b.r.KW=}')

d_out['seconds_per_batch'] = d_out['clocks_total'] / (hw.FREQ * 1e6)
d_out['frames_per_sec'] = hw.ROWS / d_out['seconds_per_batch']
d_out['ms_per_frame'] = 1000 / d_out['frames_per_sec']

with open('util.txt', 'w') as f:
for line in d_out['utilization_all']:
f.write(f"{line}\n")

with open('mem_bytes.txt', 'w') as f:
for line in d_out['mem_bytes_all']:
f.write(f"{line}\n")

time = clocks_total / (hw.FREQ * 1e6)
mem_bytes = mem_bits / 8
return time, mem_bytes
return d_out
3 changes: 2 additions & 1 deletion deepsocflow/py/hardware.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ def simulate(self, SIM='verilator', SIM_PATH=''):
print(cmd)
assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0


exit()

print("\n\nSIMULATING...\n\n")
start = time.time()

Expand Down
14 changes: 6 additions & 8 deletions deepsocflow/py/xmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def export_inference(model, hw):
BUNDLES.clear()

user_model = model.layers[1]
input_shape = (1, *model.inputs[0].shape[1:])
input_shape = (hw.ROWS, *model.inputs[0].shape[1:])
x_keras = tf.random.uniform(input_shape)
x_qtensor = user_model.input_quant_layer(x_keras)
out_keras = model(x_keras)
Expand Down Expand Up @@ -144,6 +144,9 @@ def export_inference(model, hw):
print(f'add_buffer_map:{add_buffer_map}')


d_perf = predict_model_performance(hw=hw)
print(f"Predicted performance: {d_perf}")

'''
Write Runtime Headers
'''
Expand Down Expand Up @@ -215,12 +218,12 @@ def export_inference(model, hw):

out_type = 'float' if (ib == len(BUNDLES)-1 and b.softmax) else 'int32_t'

ch.write(f" {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<4}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .on={b.r.ON:<3}, .oh={b.r.OH:<3}, .ow={b.r.OW:<3}, .oc={b.r.OC:<4}, .ch={b.r.CYH:<3}, .ph={b.r.PYH:<3}, .cw={b.r.CYW:<3}, .pw={b.r.PYW:<3}, ")
ch.write(f" {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<4}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .on={b.r.ON:<3}, .oh={b.r.OH:<3}, .ow={b.r.OW:<3}, .oc={b.r.OC:<4}, .ch={b.r.CYH:<3}, .ph={b.r.PYH:<3}, .cw={b.r.CYW:<3}, .pw={b.r.PYW:<3}, .pkh={b.r.PKH:<3}, .psh={b.r.PSH:<3}, .pkw={b.r.PKW:<3}, .psw={b.r.PSW:<3}, ")
ch.write( f".xp_words={xp_words:<6}, .b_offset={b_words:<5}, .w_bpt={w_bpt:<5}, .w_bpt_p0={w_bpt_p0:<5}, .x_bpt={x_bpt:<8}, .x_bpt_p0={x_bpt_p0:<8}, .o_words={o_words_b:<8}, .o_bytes={o_bytes_b:<8}, ")
ch.write( f".ib_out={ib_out:<4}, .in_buffer_idx={in_buffer_idx:<3}, .out_buffer_idx={b.out_buffer_idx:<3}, .add_out_buffer_idx={add_out_buffer_idx:<2}, .add_in_buffer_idx={add_in_buffer_idx:<2}, ")
ch.write( f".is_bias={1*(b.core.b is not None):<3}, .is_flatten={1*(b.flatten is not None):<3}, .is_softmax={1*(b.softmax is not None):<3}, ")
ch.write( f".x_pad={b.r.X_PAD:<3}, .b_val_shift={b.core.bias_val_shift:<3}, .b_bias_shift={b.core.bias_b_shift:<3}, .ca_nzero={ca_nzero:<3}, .ca_shift={ca_shift:<3}, .ca_pl_scale={ca_pl_scale:<3}, .aa_nzero={aa_nzero:<3}, .aa_shift={aa_shift:<3}, .aa_pl_scale={aa_pl_scale:<3}, .pa_nzero={pa_nzero:<3}, .pa_shift={pa_shift:<3}, .pa_pl_scale={pa_pl_scale:<3}, .softmax_frac={b.softmax_frac:<3}, ")
ch.write( f".csh={b.r.CSH:<3}, .csh_shift={b.r.CSH_SHIFT:<3}, .pkh={b.r.PKH:<3}, .psh={b.r.PSH:<3}, .psh_shift={b.r.PSH_SHIFT:<3}, .csw={b.r.CSW:<3}, .csw_shift={b.r.CSW_SHIFT:<3}, .pkw={b.r.PKW:<3}, .psw={b.r.PSW:<3}, .psw_shift={b.r.PSW_SHIFT:<3}, .pool={pool_type:<10}, ")
ch.write( f".csh={b.r.CSH:<3}, .csh_shift={b.r.CSH_SHIFT:<3}, .psh_shift={b.r.PSH_SHIFT:<3}, .csw={b.r.CSW:<3}, .csw_shift={b.r.CSW_SHIFT:<3}, .psw_shift={b.r.PSW_SHIFT:<3}, .pool={pool_type:<10}, ")
ch.write( f".softmax_max_f={b.softmax_max_f:<15}, ")
ch.write( f".header={b.r.header:>23}u, ")
ch.write( f".debug_nhwc_words={b.oe_exp_nhwc.size:<9} }}")
Expand Down Expand Up @@ -328,11 +331,6 @@ def export_inference(model, hw):


def verify_inference(model, hw, SIM, SIM_PATH):

seconds, mem_bytes = predict_model_performance(hw=hw)
print(f"Predicted time on hardware: {1000*seconds:.5f} ms/frame")
print(f"Predicted fps: {1/seconds}")
print(f"Data movement (bytes): mem_bytes")

'''
RUN SIMULATION
Expand Down
2 changes: 1 addition & 1 deletion deepsocflow/rtl/defines.svh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
`include "../../run/work/config_hw.svh"
`include "config_hw.svh"

`define BITS_KW2 $clog2((`KW_MAX+1)/2)

Expand Down
Loading

0 comments on commit 25c6d9b

Please sign in to comment.