Skip to content

Commit

Permalink
Merge pull request #2808 from AlexandreSinger/feature-ap-testing
Browse files Browse the repository at this point in the history
[AP] Testing Infrastructure
  • Loading branch information
vaughnbetz authored Nov 18, 2024
2 parents 66f35d9 + 4650ba1 commit 9d920ec
Show file tree
Hide file tree
Showing 23 changed files with 6,583 additions and 7 deletions.
24 changes: 21 additions & 3 deletions vpr/src/analytical_place/full_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "logic_types.h"
#include "pack.h"
#include "physical_types.h"
#include "place_and_route.h"
#include "place_constraints.h"
#include "place_macro.h"
#include "verify_clustering.h"
Expand Down Expand Up @@ -103,9 +104,6 @@ class APClusterPlacer {
g_vpr_ctx.mutable_placement().cube_bb = false;
g_vpr_ctx.mutable_placement().compressed_block_grids = create_compressed_block_grids();

// Initialize the macros
blk_loc_registry.mutable_place_macros().alloc_and_load_placement_macros(directs);

// TODO: The next few steps will be basically a direct copy of the initial
// placement code since it does everything we need! It would be nice
// to share the code.
Expand Down Expand Up @@ -133,6 +131,13 @@ class APClusterPlacer {
const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
const auto& block_locs = g_vpr_ctx.placement().block_locs();
auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
// If this block has already been placed, just return true.
// TODO: This should be investigated further. What I think is happening
// is that a macro is being placed which contains another cluster.
// This must be a carry chain. May need to rewrite the algorithm
// below to use macros instead of clusters.
if (is_block_placed(clb_blk_id, block_locs))
return true;
VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
t_pl_macro pl_macro = get_macro(clb_blk_id);
t_pl_loc to_loc;
Expand Down Expand Up @@ -170,6 +175,10 @@ class APClusterPlacer {
bool exhaustively_place_cluster(ClusterBlockId clb_blk_id) {
const auto& block_locs = g_vpr_ctx.placement().block_locs();
auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
// If this block has already been placed, just return true.
// TODO: See similar comment above.
if (is_block_placed(clb_blk_id, block_locs))
return true;
VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
t_pl_macro pl_macro = get_macro(clb_blk_id);
const PartitionRegion& pr = is_cluster_constrained(clb_blk_id) ? g_vpr_ctx.floorplanning().cluster_constraints[clb_blk_id] : get_device_partition_region();
Expand Down Expand Up @@ -346,6 +355,10 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
for (APBlockId ap_blk_id : ap_netlist_.blocks()) {
const t_pack_molecule* blk_mol = ap_netlist_.block_molecule(ap_blk_id);
for (AtomBlockId atom_blk_id : blk_mol->atom_block_ids) {
// See issue #2791, some of the atom_block_ids may be invalid. They
// can safely be ignored.
if (!atom_blk_id.is_valid())
continue;
// Ensure that this block is not in any other AP block. That would
// be weird.
VTR_ASSERT(!atom_to_ap_block[atom_blk_id].is_valid());
Expand Down Expand Up @@ -429,5 +442,10 @@ void FullLegalizer::legalize(const PartialPlacement& p_placement) {
"Aborting program.\n",
num_placement_errors);
}

// TODO: This was taken from vpr_api. Not sure why it is needed. Should be
// made part of the placement and verify placement should check for
// it.
post_place_sync();
}

18 changes: 18 additions & 0 deletions vpr/src/analytical_place/partial_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ static inline PrimitiveVector get_primitive_mass(APBlockId blk_id,
PrimitiveVector mass;
const t_pack_molecule* mol = netlist.block_molecule(blk_id);
for (AtomBlockId atom_blk_id : mol->atom_block_ids) {
// See issue #2791, some of the atom_block_ids may be invalid. They can
// safely be ignored.
if (!atom_blk_id.is_valid())
continue;
const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id);
VTR_ASSERT_DEBUG(model->index >= 0);
mass.add_val_to_dim(get_model_mass(model), model->index);
Expand Down Expand Up @@ -354,6 +358,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
// Create visited flags for each bin. Set the source to visited.
vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
bin_visited[src_bin_id] = true;
// Create a distance count for each bin from the src.
vtr::vector_map<LegalizerBinId, unsigned> bin_distance(bins_.size(), 0);
// Flags to check if a specific model has been found in the given direction.
// In this case, direction is the direction of the largest component of the
// manhattan distance between the source bin and the target bin.
Expand Down Expand Up @@ -401,6 +407,11 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
// Pop the bin from the queue.
LegalizerBinId bin_id = q.front();
q.pop();
// If the distance of this block from the source is too large, do not
// explore.
unsigned curr_bin_dist = bin_distance[bin_id];
if (curr_bin_dist > max_bin_neighbor_dist_)
continue;
// Get the direct neighbors of the bin (neighbors that are directly
// touching).
auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_);
Expand Down Expand Up @@ -431,6 +442,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
}
// Mark this bin as visited and push it onto the queue.
bin_visited[dir_neighbor_bin_id] = true;
// Update the distance.
bin_distance[dir_neighbor_bin_id] = curr_bin_dist + 1;
// FIXME: This may be inneficient since it will do an entire BFS of
// the grid if a neighbor of a given type does not exist in
// a specific direction. Should add a check to see if it is
Expand Down Expand Up @@ -506,6 +519,7 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
tile_bin_[x][y] = new_bin_id;
}
}

// Get the number of models in the device.
size_t num_models = get_num_models();
// Connect the bins.
Expand All @@ -524,10 +538,14 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
compute_neighbors_of_bin(tile_bin_[x][y], num_models);
}
}

// Pre-compute the masses of the APBlocks
VTR_LOGV(log_verbosity_ >= 10, "Pre-computing the block masses...\n");
for (APBlockId blk_id : netlist.blocks()) {
block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist));
}
VTR_LOGV(log_verbosity_ >= 10, "Finished pre-computing the block masses.\n");

// Initialize the block_bins.
block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID());
}
Expand Down
12 changes: 12 additions & 0 deletions vpr/src/analytical_place/partial_legalizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,18 @@ class FlowBasedLegalizer : public PartialLegalizer {
/// enough space to flow blocks.
static constexpr size_t max_num_iterations_ = 100;

/// @brief The maximum number of hops away a neighbor of a bin can be. Where
/// a hop is the minimum number of bins you need to pass through to
/// get to this neighbor (manhattan distance in bins-space).
///
/// This is used to speed up the computation of the neighbors of bins since
/// it reduces the amount of the graph that needs to be explored.
///
/// TODO: This may need to be made per primitive type since some types may
/// need to explore more of the architecture than others to find
/// sufficient neighbors.
static constexpr unsigned max_bin_neighbor_dist_ = 4;

/// @brief A vector of all the bins in the legalizer.
vtr::vector_map<LegalizerBinId, LegalizerBin> bins_;

Expand Down
29 changes: 29 additions & 0 deletions vtr_flow/arch/timing/k6_frac_N10_40nm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,35 @@
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</auto_layout>
<!--
This architecture is commonly used for the MCNC Benchmark Suite. Below
are a set of fixed layouts which were found to work well for these
benchmarks. They were found by finding the minimum device size for each
benchmark and categorizing the benchmarks into the different fixed
layouts. Each fixed layout was chosen to be around 1.5x larger than the
previous.
-->
<fixed_layout name="mcnc_small" width="11" height="11">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="mcnc_medium" width="16" height="16">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<fixed_layout name="mcnc_large" width="22" height="22">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
</layout>
<device>
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
Expand Down
73 changes: 73 additions & 0 deletions vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_40nm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,79 @@
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
</auto_layout>
<!--
This architecture is commonly used for the VTR Benchmark Suite. Below
are a set of fixed layouts which were found to work well for these
benchmarks. They were found by finding the minimum device size for each
benchmark and categorizing the benchmarks into the different fixed
layouts. Each fixed layout was chosen to be around 1.5x larger than the
previous.
-->
<fixed_layout name="vtr_extra_small" width="20" height="20">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
</fixed_layout>
<fixed_layout name="vtr_small" width="30" height="30">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
</fixed_layout>
<fixed_layout name="vtr_medium" width="42" height="42">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
</fixed_layout>
<fixed_layout name="vtr_large" width="65" height="65">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
</fixed_layout>
<fixed_layout name="vtr_extra_large" width="105" height="105">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
</fixed_layout>
</layout>
<device>
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
%include "common/pass_requirements.vpr_status.txt"
%include "timing/pass_requirements.vpr_ap.txt"
%include "timing/pass_requirements.vpr_route_fixed_chan_width.txt"

%include "common/pass_requirements.vtr_benchmarks.txt"
15 changes: 15 additions & 0 deletions vtr_flow/parse/qor_config/qor_ap_fixed_chan_width.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This collects QoR data that is interesting for the AP flow running on a fixed
# channel width.

vpr_status;output.txt;vpr_status=(.*)
total_wirelength;vpr.out;\s*Total wirelength: (\d+)
# Final critical path delay (least slack): 6.34202 ns, Fmax: 157.678 MHz
crit_path_delay;vpr.out;Critical path: (.*) ns
ap_runtime;vpr.out;Analytical Placement took (.*) seconds
pack_runtime;vpr.out;Packing took (.*) seconds
# TODO: Figure out how to match Placement and not Analytical Placement better.
place_runtime;vpr.out;^(?!.*\bAnalytical\b).*Placement took (.*) seconds
route_runtime;vpr.out;Routing took (.*) seconds
total_runtime;vpr.out;The entire flow of VPR took (.*) seconds
num_clb;vpr.out;Netlist clb blocks:\s*(\d+)

5 changes: 5 additions & 0 deletions vtr_flow/scripts/python_libs/vtr/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ def parse_circuit_constraint_list(
"arch",
"device",
"constraints",
"route_chan_width",
]
)

Expand Down Expand Up @@ -792,6 +793,10 @@ def apply_cmd_line_circuit_constraints(cmd, circuit, config):
circuit_vpr_constraints = config.circuit_constraints[circuit]["constraints"]
if circuit_vpr_constraints is not None:
cmd += ["--read_vpr_constraints", circuit_vpr_constraints]
# Check if the circuit has constrained route channel width.
constrained_route_w = config.circuit_constraints[circuit]["route_chan_width"]
if constrained_route_w is not None:
cmd += ["--route_chan_width", constrained_route_w]

def resolve_vtr_source_file(config, filename, base_dir=""):
"""
Expand Down
Loading

0 comments on commit 9d920ec

Please sign in to comment.