Merge pull request #2808 from AlexandreSinger/feature-ap-testing

[AP] Testing Infrastructure
verilog-to-routing · Nov 18, 2024 · 9d920ec · 9d920ec
2 parents 66f35d9 + 4650ba1
commit 9d920ec
Show file tree

Hide file tree

Showing 23 changed files with 6,583 additions and 7 deletions.
diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp
@@ -26,6 +26,7 @@
 #include "logic_types.h"
 #include "pack.h"
 #include "physical_types.h"
+#include "place_and_route.h"
 #include "place_constraints.h"
 #include "place_macro.h"
 #include "verify_clustering.h"
@@ -103,9 +104,6 @@ class APClusterPlacer {
         g_vpr_ctx.mutable_placement().cube_bb = false;
         g_vpr_ctx.mutable_placement().compressed_block_grids = create_compressed_block_grids();
 
-        // Initialize the macros
-        blk_loc_registry.mutable_place_macros().alloc_and_load_placement_macros(directs);
-
         // TODO: The next few steps will be basically a direct copy of the initial
         //       placement code since it does everything we need! It would be nice
         //       to share the code.
@@ -133,6 +131,13 @@ class APClusterPlacer {
         const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
         const auto& block_locs = g_vpr_ctx.placement().block_locs();
         auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
+        // If this block has already been placed, just return true.
+        // TODO: This should be investigated further. What I think is happening
+        //       is that a macro is being placed which contains another cluster.
+        //       This must be a carry chain. May need to rewrite the algorithm
+        //       below to use macros instead of clusters.
+        if (is_block_placed(clb_blk_id, block_locs))
+            return true;
         VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
         t_pl_macro pl_macro = get_macro(clb_blk_id);
         t_pl_loc to_loc;
@@ -170,6 +175,10 @@ class APClusterPlacer {
     bool exhaustively_place_cluster(ClusterBlockId clb_blk_id) {
         const auto& block_locs = g_vpr_ctx.placement().block_locs();
         auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
+        // If this block has already been placed, just return true.
+        // TODO: See similar comment above.
+        if (is_block_placed(clb_blk_id, block_locs))
+            return true;
         VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
         t_pl_macro pl_macro = get_macro(clb_blk_id);
         const PartitionRegion& pr = is_cluster_constrained(clb_blk_id) ? g_vpr_ctx.floorplanning().cluster_constraints[clb_blk_id] : get_device_partition_region();
@@ -346,6 +355,10 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
     for (APBlockId ap_blk_id : ap_netlist_.blocks()) {
         const t_pack_molecule* blk_mol = ap_netlist_.block_molecule(ap_blk_id);
         for (AtomBlockId atom_blk_id : blk_mol->atom_block_ids) {
+            // See issue #2791, some of the atom_block_ids may be invalid. They
+            // can safely be ignored.
+            if (!atom_blk_id.is_valid())
+                continue;
             // Ensure that this block is not in any other AP block. That would
             // be weird.
             VTR_ASSERT(!atom_to_ap_block[atom_blk_id].is_valid());
@@ -429,5 +442,10 @@ void FullLegalizer::legalize(const PartialPlacement& p_placement) {
                   "Aborting program.\n",
                   num_placement_errors);
     }
+
+    // TODO: This was taken from vpr_api. Not sure why it is needed. Should be
+    //       made part of the placement and verify placement should check for
+    //       it.
+    post_place_sync();
 }
 
diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp
@@ -76,6 +76,10 @@ static inline PrimitiveVector get_primitive_mass(APBlockId blk_id,
     PrimitiveVector mass;
     const t_pack_molecule* mol = netlist.block_molecule(blk_id);
     for (AtomBlockId atom_blk_id : mol->atom_block_ids) {
+        // See issue #2791, some of the atom_block_ids may be invalid. They can
+        // safely be ignored.
+        if (!atom_blk_id.is_valid())
+            continue;
         const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id);
         VTR_ASSERT_DEBUG(model->index >= 0);
         mass.add_val_to_dim(get_model_mass(model), model->index);
@@ -354,6 +358,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
     // Create visited flags for each bin. Set the source to visited.
     vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
     bin_visited[src_bin_id] = true;
+    // Create a distance count for each bin from the src.
+    vtr::vector_map<LegalizerBinId, unsigned> bin_distance(bins_.size(), 0);
     // Flags to check if a specific model has been found in the given direction.
     // In this case, direction is the direction of the largest component of the
     // manhattan distance between the source bin and the target bin.
@@ -401,6 +407,11 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
         // Pop the bin from the queue.
         LegalizerBinId bin_id = q.front();
         q.pop();
+        // If the distance of this block from the source is too large, do not
+        // explore.
+        unsigned curr_bin_dist = bin_distance[bin_id];
+        if (curr_bin_dist > max_bin_neighbor_dist_)
+            continue;
         // Get the direct neighbors of the bin (neighbors that are directly
         // touching).
         auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_);
@@ -431,6 +442,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
             }
             // Mark this bin as visited and push it onto the queue.
             bin_visited[dir_neighbor_bin_id] = true;
+            // Update the distance.
+            bin_distance[dir_neighbor_bin_id] = curr_bin_dist + 1;
             // FIXME: This may be inneficient since it will do an entire BFS of
             //        the grid if a neighbor of a given type does not exist in
             //        a specific direction. Should add a check to see if it is
@@ -506,6 +519,7 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
             tile_bin_[x][y] = new_bin_id;
         }
     }
+
     // Get the number of models in the device.
     size_t num_models = get_num_models();
     // Connect the bins.
@@ -524,10 +538,14 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
             compute_neighbors_of_bin(tile_bin_[x][y], num_models);
         }
     }
+
     // Pre-compute the masses of the APBlocks
+    VTR_LOGV(log_verbosity_ >= 10, "Pre-computing the block masses...\n");
     for (APBlockId blk_id : netlist.blocks()) {
         block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist));
     }
+    VTR_LOGV(log_verbosity_ >= 10, "Finished pre-computing the block masses.\n");
+
     // Initialize the block_bins.
     block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID());
 }

diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h
@@ -196,6 +196,18 @@ class FlowBasedLegalizer : public PartialLegalizer {
     ///        enough space to flow blocks.
     static constexpr size_t max_num_iterations_ = 100;
 
+    /// @brief The maximum number of hops away a neighbor of a bin can be. Where
+    ///        a hop is the minimum number of bins you need to pass through to
+    ///        get to this neighbor (manhattan distance in bins-space).
+    ///
+    /// This is used to speed up the computation of the neighbors of bins since
+    /// it reduces the amount of the graph that needs to be explored.
+    ///
+    /// TODO: This may need to be made per primitive type since some types may
+    ///       need to explore more of the architecture than others to find
+    ///       sufficient neighbors.
+    static constexpr unsigned max_bin_neighbor_dist_ = 4;
+
     /// @brief A vector of all the bins in the legalizer.
     vtr::vector_map<LegalizerBinId, LegalizerBin> bins_;
 

diff --git a/vtr_flow/arch/timing/k6_frac_N10_40nm.xml b/vtr_flow/arch/timing/k6_frac_N10_40nm.xml
@@ -66,6 +66,35 @@
       <!--Fill with 'clb'-->
       <fill type="clb" priority="10"/>
     </auto_layout>
+    <!--
+        This architecture is commonly used for the MCNC Benchmark Suite. Below
+        are a set of fixed layouts which were found to work well for these
+        benchmarks. They were found by finding the minimum device size for each
+        benchmark and categorizing the benchmarks into the different fixed
+        layouts. Each fixed layout was chosen to be around 1.5x larger than the
+        previous.
+    -->
+    <fixed_layout name="mcnc_small" width="11" height="11">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+    </fixed_layout>
+    <fixed_layout name="mcnc_medium" width="16" height="16">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+    </fixed_layout>
+    <fixed_layout name="mcnc_large" width="22" height="22">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+    </fixed_layout>
   </layout>
   <device>
     <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 

diff --git a/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_40nm.xml b/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -237,6 +237,79 @@
       <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
       <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
     </auto_layout>
+    <!--
+        This architecture is commonly used for the VTR Benchmark Suite. Below
+        are a set of fixed layouts which were found to work well for these
+        benchmarks. They were found by finding the minimum device size for each
+        benchmark and categorizing the benchmarks into the different fixed
+        layouts. Each fixed layout was chosen to be around 1.5x larger than the
+        previous.
+    -->
+    <fixed_layout name="vtr_extra_small" width="20" height="20">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </fixed_layout>
+    <fixed_layout name="vtr_small" width="30" height="30">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </fixed_layout>
+    <fixed_layout name="vtr_medium" width="42" height="42">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </fixed_layout>
+    <fixed_layout name="vtr_large" width="65" height="65">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </fixed_layout>
+    <fixed_layout name="vtr_extra_large" width="105" height="105">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </fixed_layout>
   </layout>
   <device>
     <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 

diff --git a/vtr_flow/parse/pass_requirements/pass_requirements_ap_fixed_chan_width.txt b/vtr_flow/parse/pass_requirements/pass_requirements_ap_fixed_chan_width.txt
@@ -0,0 +1,5 @@
+%include "common/pass_requirements.vpr_status.txt"
+%include "timing/pass_requirements.vpr_ap.txt"
+%include "timing/pass_requirements.vpr_route_fixed_chan_width.txt"
+
+%include "common/pass_requirements.vtr_benchmarks.txt"
diff --git a/vtr_flow/parse/qor_config/qor_ap_fixed_chan_width.txt b/vtr_flow/parse/qor_config/qor_ap_fixed_chan_width.txt
@@ -0,0 +1,15 @@
+# This collects QoR data that is interesting for the AP flow running on a fixed
+# channel width.
+
+vpr_status;output.txt;vpr_status=(.*)
+total_wirelength;vpr.out;\s*Total wirelength: (\d+)
+# Final critical path delay (least slack): 6.34202 ns, Fmax: 157.678 MHz
+crit_path_delay;vpr.out;Critical path: (.*) ns
+ap_runtime;vpr.out;Analytical Placement took (.*) seconds
+pack_runtime;vpr.out;Packing took (.*) seconds
+# TODO: Figure out how to match Placement and not Analytical Placement better.
+place_runtime;vpr.out;^(?!.*\bAnalytical\b).*Placement took (.*) seconds
+route_runtime;vpr.out;Routing took (.*) seconds
+total_runtime;vpr.out;The entire flow of VPR took (.*) seconds
+num_clb;vpr.out;Netlist clb blocks:\s*(\d+)
+
diff --git a/vtr_flow/scripts/python_libs/vtr/task.py b/vtr_flow/scripts/python_libs/vtr/task.py
@@ -325,6 +325,7 @@ def parse_circuit_constraint_list(
             "arch",
             "device",
             "constraints",
+            "route_chan_width",
         ]
     )
 
@@ -792,6 +793,10 @@ def apply_cmd_line_circuit_constraints(cmd, circuit, config):
     circuit_vpr_constraints = config.circuit_constraints[circuit]["constraints"]
     if circuit_vpr_constraints is not None:
         cmd += ["--read_vpr_constraints", circuit_vpr_constraints]
+    # Check if the circuit has constrained route channel width.
+    constrained_route_w = config.circuit_constraints[circuit]["route_chan_width"]
+    if constrained_route_w is not None:
+        cmd += ["--route_chan_width", constrained_route_w]
 
 def resolve_vtr_source_file(config, filename, base_dir=""):
     """