diff --git a/build_ngtf.py b/build_ngtf.py index 5cafedc5..55bb5fae 100755 --- a/build_ngtf.py +++ b/build_ngtf.py @@ -70,6 +70,11 @@ def main(): action="store") parser.add_argument( + '--enable_variables_and_optimizers', + help="Ops like variable and optimizers are supported by nGraph in this version of the bridge\n", + action="store_true") + + parser.add_argument( '--use_grappler_optimizer', help="Use Grappler optimizer instead of the optimization passes\n", action="store_true") @@ -264,6 +269,11 @@ def main(): else: ngraph_tf_cmake_flags.extend(["-DNGRAPH_DISTRIBUTED_ENABLE=FALSE"]) + if (arguments.enable_variables_and_optimizers): + ngraph_tf_cmake_flags.extend(["-DNGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS=TRUE"]) + else: + ngraph_tf_cmake_flags.extend(["-DNGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS=FALSE"]) + if (arguments.use_grappler_optimizer): ngraph_tf_cmake_flags.extend( ["-DNGRAPH_TF_USE_GRAPPLER_OPTIMIZER=TRUE"]) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 284e27f9..b8fa51ec 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,6 +43,7 @@ set(SRC ngraph_freshness_tracker.cc ngraph_mark_for_clustering.cc ngraph_rewrite_for_tracking.cc + ngraph_rewrite_pass.cc ngraph_tracked_variable.cc ngraph_utils.cc tf_graphcycles.cc @@ -50,11 +51,43 @@ set(SRC version.cc ) -if(NGRAPH_TF_USE_GRAPPLER_OPTIMIZER) +message(STATUS "NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS: ${NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS}") + +if(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) + # common files + list(REMOVE_ITEM SRC ngraph_capture_variables.cc) + list(APPEND SRC enable_variable_ops/ngraph_capture_variables.cc) + + list(REMOVE_ITEM SRC ngraph_encapsulate_op.cc) + list(APPEND SRC enable_variable_ops/ngraph_encapsulate_op.cc) + + list(REMOVE_ITEM SRC ngraph_rewrite_for_tracking.cc) + list(APPEND SRC enable_variable_ops/ngraph_rewrite_for_tracking.cc) + + list(REMOVE_ITEM SRC ngraph_rewrite_pass.cc) + list(APPEND SRC enable_variable_ops/ngraph_rewrite_pass.cc) + + list(REMOVE_ITEM SRC ngraph_tracked_variable.cc) + list(APPEND SRC enable_variable_ops/ngraph_tracked_variable.cc) + + list(REMOVE_ITEM SRC ngraph_utils.cc) + list(APPEND SRC enable_variable_ops/ngraph_utils.cc) + + # new files + list(APPEND SRC enable_variable_ops/ngraph_assign_op.cc) + list(APPEND SRC enable_variable_ops/ngraph_catalog.cc) + list(APPEND SRC enable_variable_ops/ngraph_enter_in_catalog.cc) + list(APPEND SRC enable_variable_ops/ngraph_replace_op_utilities.cc) + list(APPEND SRC enable_variable_ops/ngraph_replace_variable_modifiers.cc) + list(APPEND SRC enable_variable_ops/ngraph_variable_modifiers.cc) + +endif() + + +if(NGRAPH_TF_USE_GRAPPLER_OPTIMIZER) + list(REMOVE_ITEM SRC ngraph_rewrite_pass.cc) list(APPEND SRC grappler/ngraph_optimizer.cc) add_definitions(-DNGRAPH_TF_USE_GRAPPLER_OPTIMIZER) -else() - list(APPEND SRC ngraph_rewrite_pass.cc) endif() message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}") diff --git a/src/enable_variable_ops/ngraph_assign_op.cc b/src/enable_variable_ops/ngraph_assign_op.cc new file mode 100644 index 00000000..cec8e3cf --- /dev/null +++ b/src/enable_variable_ops/ngraph_assign_op.cc @@ -0,0 +1,185 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use thi0s file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/lib/strings/strcat.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/default/logging.h" + +#include "ngraph/event_tracing.hpp" +#include "ngraph/runtime/backend.hpp" +#include "ngraph_catalog.h" +#include "ngraph_freshness_tracker.h" +#include "ngraph_timer.h" +#include "ngraph_utils.h" +#include "ngraph_var.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +/* ------------------------------------------------- +// +// NGraphAssignOp +// +---------------------------------------------------*/ + +// Computes *input[0] = input[1] +class NGraphAssignOp : public OpKernel { + private: + bool just_looking_; + bool copy_to_tf_; + int ng_graph_id_; + static int s_instance_count; + int my_instance_id{0}; + + // TODO(malikshr): Do we need these attributes, exist in TF Assign ops + // use_exclusive_lock_, validate_shape_, relax_constraints_; + + public: + explicit NGraphAssignOp(OpKernelConstruction* context) + : OpKernel(context), just_looking_(false), copy_to_tf_(false) { + OP_REQUIRES_OK(context, context->GetAttr("just_looking", &just_looking_)); + OP_REQUIRES_OK(context, context->GetAttr("copy_to_tf", ©_to_tf_)); + OP_REQUIRES_OK(context, context->GetAttr("ngraph_graph_id", &ng_graph_id_)); + + NGRAPH_VLOG(4) << "NGraphAssign:: Constructor called for: " << def().name() + << ",just looking " << PrintBool(just_looking_) + << ",copy-to-tf " << PrintBool(copy_to_tf_) << " ,Graph ID " + << ng_graph_id_; + + OP_REQUIRES(context, IsRefType(context->input_type(0)), + errors::InvalidArgument("lhs input needs to be a ref type")); + my_instance_id = s_instance_count; + s_instance_count++; + } + + void Compute(OpKernelContext* context) override { + std::ostringstream oss; + oss << "Execute: Assign_" << my_instance_id << ": " << name(); + ngraph::Event event_compute(oss.str(), name(), ""); + + NGRAPH_VLOG(4) << "NGraphAssign:: Compute called for: " << def().name() + << " ,just looking " << PrintBool(just_looking_) + << " ,copy-to-tf " << PrintBool(copy_to_tf_) << " ,Graph ID " + << ng_graph_id_; + + bool log_copies = false; + OP_REQUIRES_OK(context, IsCopyLogEnabled(ng_graph_id_, log_copies)); + std::stringstream copy_log_str; + copy_log_str << "KERNEL[" << type_string() << "]: " << name() + << " ,Copy_TF " << PrintBool(copy_to_tf_) << " ,Just_Looking " + << PrintBool(just_looking_) << "\n"; + int number_of_copies = 0; + + bool ref_exists = NGraphCatalog::ExistsInInputVariableSharedNameMap( + ng_graph_id_, def().name(), 0); + if (!ref_exists) { + OP_REQUIRES(context, ref_exists, + errors::Internal( + "Caught exception : RefInput to NGAssign not found \n")); + } + string get_ref_var_name = NGraphCatalog::GetInputVariableSharedName( + ng_graph_id_, def().name(), 0); + + NGraphVar* var; + OP_REQUIRES_OK(context, + context->resource_manager()->Lookup( + context->resource_manager()->default_container(), + get_ref_var_name, &var)); + + const Tensor& rhs = context->input(1); + + // We always return the input ref. + context->forward_ref_input_to_ref_output(0, 0); + + // get the nGraphTensor + shared_ptr ng_tensor_to_assign = var->ng_tensor(); + + // DO NOT CARE ABOUT SYNCING AS WE ARE ALWAYS SETTING THE NGTENSOR + + // Get input[1] + string valkey = to_string(ng_graph_id_) + "_" + def().input(1); + bool valref_exists = NGraphCatalog::ExistsInEncapOutputTensorMap(valkey); + if (valref_exists) { + // Value is from encap + NGRAPH_VLOG(4) << "NGraphAssign::Getting from catalog: " << valkey; + auto ng_val = NGraphCatalog::GetTensorFromEncapOutputTensorMap(valkey); + ng_tensor_to_assign->copy_from(*ng_val); + } else { + number_of_copies++; + copy_log_str << " COPY_INP_VAL[0]"; + NGRAPH_VLOG(4) << "NGraphAssign::Getting from TF : " << valkey; + void* tf_src_ptr = (void*)DMAHelper::base(&rhs); + ng_tensor_to_assign->write( + tf_src_ptr, 0, ng_tensor_to_assign->get_element_count() * + ng_tensor_to_assign->get_element_type().size()); + } + + mutex_lock l(*context->input_ref_mutex(0)); + Tensor old_lhs = context->mutable_input(0, /* lock_held */ true); + + if (copy_to_tf_) { + number_of_copies++; + copy_log_str << " COPY_TF "; + ReadNGTensor(ng_tensor_to_assign, &old_lhs); + + if (!just_looking_) { + // Some tf op might update the ng-tensor value so mark it stale + copy_log_str << " SET_SYNC "; + var->sync_ng_tensor(true); + } + } + + copy_log_str << " Number of copies " << number_of_copies << "\n"; + if (log_copies) { + cout << copy_log_str.str(); + } + + // Unref Var + var->Unref(); + event_compute.Stop(); + ngraph::Event::write_trace(event_compute); + } +}; + +int NGraphAssignOp::s_instance_count = 0; + +REGISTER_OP("NGraphAssign") + .Input("ref: Ref(T)") + .Input("value: T") + .Output("output_ref: Ref(T)") + .Attr("T: type") + .Attr("validate_shape: bool = true") + .Attr("use_locking: bool = true") + .Attr("just_looking: bool = false") + .Attr("copy_to_tf: bool = false") + .Attr("ngraph_graph_id: int"); + +REGISTER_KERNEL_BUILDER(Name("NGraphAssign").Device(DEVICE_CPU), + NGraphAssignOp); + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_capture_variables.cc b/src/enable_variable_ops/ngraph_capture_variables.cc new file mode 100644 index 00000000..0bb26102 --- /dev/null +++ b/src/enable_variable_ops/ngraph_capture_variables.cc @@ -0,0 +1,97 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" + +#include "ngraph_api.h" +#include "ngraph_capture_variables.h" +#include "ngraph_replace_op_utilities.h" +#include "ngraph_utils.h" + +using namespace std; + +namespace tensorflow { + +namespace ngraph_bridge { + +// +// Utility function to check if placement on the NGRAPH device has been +// requested. +// +// FIXME(amprocte): stubbed out for now because NGRAPH device is gone. +// +static bool NGraphPlacementRequested(const Node* node) { return true; } + +// +// Main entry point for the variable-capture. +// +Status CaptureVariables(Graph* graph, std::vector skip_these_nodes) { + const static std::map< + const string, + const pair>> + CAPTURE_REPLACE_OP_MAP{ + {"ApplyGradientDescent", std::make_pair("NGraphApplyGradientDescent", + ReplaceApplyGradientDescent)}, + {"Assign", std::make_pair("NGraphAssign", ReplaceAssign)}, + {"AssignAdd", std::make_pair("NGraphAssignAdd", ReplaceAssign)}, + {"AssignSub", std::make_pair("NGraphAssignSub", ReplaceAssign)}, + {"VariableV2", std::make_pair("NGraphVariable", ReplaceVariable)}}; + + std::vector replaced_nodes; + for (auto node : graph->op_nodes()) { + if (NGraphPlacementRequested(node)) { + auto itr = CAPTURE_REPLACE_OP_MAP.find(node->type_string()); + if (itr != CAPTURE_REPLACE_OP_MAP.end()) { + NGRAPH_VLOG(1) << "Capturing: " << node->name(); + Node* replacement; + + // Create the replacement node + TF_RETURN_IF_ERROR((itr->second.second)(graph, node, &replacement, + node->name(), itr->second.first, + false, false, 0, false)); + + std::vector edges; + + NGRAPH_VLOG(4) << "Replacing Node " << node->DebugString() << " with " + << replacement->DebugString(); + + TF_RETURN_IF_ERROR(ReplaceInputControlEdges(graph, node, replacement)); + TF_RETURN_IF_ERROR(ReplaceOutputEdges(graph, node, replacement)); + + replaced_nodes.push_back(node); + } + + } // end of checking NGraphPlacementRequested + } // end of looping through nodes in the graph + + for (auto node : replaced_nodes) { + NGRAPH_VLOG(4) << "Removing: " << node->name(); + graph->RemoveNode(node); + } + + return Status::OK(); +} + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_catalog.cc b/src/enable_variable_ops/ngraph_catalog.cc new file mode 100644 index 00000000..c7758aad --- /dev/null +++ b/src/enable_variable_ops/ngraph_catalog.cc @@ -0,0 +1,115 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#include "tensorflow/core/lib/core/errors.h" + +#include "ngraph/ngraph.hpp" +#include "ngraph/runtime/backend_manager.hpp" +#include "ngraph_catalog.h" +#include "ngraph_log.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +unordered_map NGraphCatalog::input_variable_sharedname_map_; +map> + NGraphCatalog::encap_output_tensor_map_; +unordered_map> + NGraphCatalog::encap_output_copy_indexes_map_; + +// Functions for Encapsulate Output Copy Indexes Map +void NGraphCatalog::AddToEncapOutputCopyIndexesMap(string key, + unordered_set val) { + NGraphCatalog::encap_output_copy_indexes_map_[key] = val; +} + +unordered_set NGraphCatalog::GetEncapOutputIndexesThatNeedCopy( + string key) { + return NGraphCatalog::encap_output_copy_indexes_map_[key]; +} + +bool NGraphCatalog::EncapOutputIndexNeedsCopy(string key, int index) { + auto itr = NGraphCatalog::encap_output_copy_indexes_map_.find(key); + if (itr != NGraphCatalog::encap_output_copy_indexes_map_.end()) { + auto op_copy_indexes = itr->second; + return (op_copy_indexes.find(index) != op_copy_indexes.end()); + } + // Should not reach here + return true; +} + +string NGraphCatalog::CreateNodeKey(int graph_id, string node_name, int index) { + if (index == 0) { + return to_string(graph_id) + "_" + node_name; + } + return to_string(graph_id) + "_" + node_name + ":" + to_string(index); +} + +// Functions for OutputTensorMap +void NGraphCatalog::AddToEncapOutputTensorMap( + string key, shared_ptr ng_val) { + NGraphCatalog::encap_output_tensor_map_[key] = ng_val; +} + +bool NGraphCatalog::ExistsInEncapOutputTensorMap(string key) { + auto itr = NGraphCatalog::encap_output_tensor_map_.find(key); + return itr != NGraphCatalog::encap_output_tensor_map_.end(); +} + +bool NGraphCatalog::ExistsInEncapOutputTensorMap(int graphid, string node_name, + int input_index) { + return NGraphCatalog::ExistsInEncapOutputTensorMap( + NGraphCatalog::CreateNodeKey(graphid, node_name, input_index)); +} + +shared_ptr +NGraphCatalog::GetTensorFromEncapOutputTensorMap(string key) { + return NGraphCatalog::encap_output_tensor_map_[key]; +} + +void NGraphCatalog::DeleteFromEncapOutputTensorMap(string key) { + NGraphCatalog::encap_output_tensor_map_.erase(key); +} + +// Functions relating Input Variable Shared Name Map +string NGraphCatalog::GetInputVariableSharedName(int graphid, string node_name, + int input_index) { + std::string node_key = + NGraphCatalog::CreateNodeKey(graphid, node_name, input_index); + return NGraphCatalog::input_variable_sharedname_map_[node_key]; +} + +void NGraphCatalog::AddToInputVariableSharedNameMap(string key, string val) { + NGraphCatalog::input_variable_sharedname_map_[key] = val; +} + +bool NGraphCatalog::ExistsInInputVariableSharedNameMap(string key) { + auto itr = NGraphCatalog::input_variable_sharedname_map_.find(key); + return itr != NGraphCatalog::input_variable_sharedname_map_.end(); +} + +bool NGraphCatalog::ExistsInInputVariableSharedNameMap(int graphid, + string node_name, + int input_index) { + return NGraphCatalog::ExistsInInputVariableSharedNameMap( + NGraphCatalog::CreateNodeKey(graphid, node_name, input_index)); +} + +} // ngraph_bridge +} // tensorflow diff --git a/src/enable_variable_ops/ngraph_catalog.h b/src/enable_variable_ops/ngraph_catalog.h new file mode 100644 index 00000000..3bbc741e --- /dev/null +++ b/src/enable_variable_ops/ngraph_catalog.h @@ -0,0 +1,110 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef NGRAPH_TF_CATALOG_H_ +#define NGRAPH_TF_CATALOG_H_ + +#include +#include +#include +#include + +#include "tensorflow/core/lib/core/errors.h" + +#include "ngraph/ngraph.hpp" +#include "ngraph/runtime/backend_manager.hpp" +#include "ngraph_log.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +class NGraphCatalog { + private: + // Map keeps track of nodes whose input is a variable tensor + // Will be used by Assign/Optimizers and NGraphEncapsulate Op + // Map of + // Key + // when op index ==0 + // string : GraphId + _ + nodename + // otherwise + // string : GraphId + _ + nodename + : + input_index + // Value : variable shared_name + // LOCK? + static unordered_map input_variable_sharedname_map_; + + // Map keeps track of nodes whose input is a tensor computed by NGraph + // For e.g. if the value to be assigned was computed by NGraphEncapsulate Op + // Will be used by Assign/Optimizers + // Map of + // Key + // when op index ==0 + // string : GraphId + _ + nodename + // otherwise + // string : GraphId + _ + nodename + : + output_index + // Value : shared_ptr + static map> encap_output_tensor_map_; + + // Map keeps track of output indexes of NGraphEncapsulate Op + // that will be used by TF Nodes or other NGraphEncapsulate Op + // Will be used by NGraphEncapsulateOP + // Map of + // Key + // string : nodename (nGraphEncapsulateOp name) + // Value : Set of indices + static unordered_map> + encap_output_copy_indexes_map_; + + public: + // Utility Functions for the data structures + // Functions for EncapsulateOutputCopyIndexes Map + static void AddToEncapOutputCopyIndexesMap(string key, + unordered_set val); + static bool EncapOutputIndexNeedsCopy(string key, int index); + static unordered_set GetEncapOutputIndexesThatNeedCopy(string key); + + // Functions for InputVariableSharedName Map + static string GetInputVariableSharedName(int graphid, string node_name, + int input_index); + + static void AddToInputVariableSharedNameMap(string key, string val); + + static bool ExistsInInputVariableSharedNameMap(string key); + static bool ExistsInInputVariableSharedNameMap(int graphid, string node_name, + int input_index); + + // Functions for EncapOutputTensorMap + static void AddToEncapOutputTensorMap(string key, + shared_ptr ng_val); + static bool ExistsInEncapOutputTensorMap(string key); + static bool ExistsInEncapOutputTensorMap(int graphid, string node_name, + int input_index); + + static shared_ptr GetTensorFromEncapOutputTensorMap( + string key); + static void DeleteFromEncapOutputTensorMap(string key); + + // Utility to create key to query the maps + static string CreateNodeKey(int graph_id, string node_name, int index); +}; + +} // ngraph_bridge +} // tensorflow + +#endif diff --git a/src/enable_variable_ops/ngraph_encapsulate_op.cc b/src/enable_variable_ops/ngraph_encapsulate_op.cc new file mode 100644 index 00000000..0de0f1f2 --- /dev/null +++ b/src/enable_variable_ops/ngraph_encapsulate_op.cc @@ -0,0 +1,895 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#include +#include +#include + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_constructor.h" + +#include "ngraph_backend_manager.h" +#include "ngraph_builder.h" +#include "ngraph_catalog.h" +#include "ngraph_cluster_manager.h" +#include "ngraph_freshness_tracker.h" +#include "ngraph_log.h" +#include "ngraph_mark_for_clustering.h" +#include "ngraph_timer.h" +#include "ngraph_utils.h" +#include "ngraph_var.h" + +#include "ngraph/event_tracing.hpp" +#include "ngraph/runtime/backend.hpp" + +#if defined NGRAPH_DISTRIBUTED +#include "ngraph/distributed.hpp" +#endif + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +// For each I/O tensor, cache TF's data ptr and nGraph's Tensor +using NgFunctionIOCache = std::unordered_map< + std::shared_ptr, + std::vector>>>; + +namespace ngraph_bridge { + +REGISTER_OP("NGraphEncapsulate") + .Input("args: Targuments") + .Attr("Targuments: list(type) >= 0") + .Output("results: Tresults") + .Attr("Tresults: list(type) >= 0") + .Attr("ngraph_cluster: int") + .Attr("ngraph_graph_id: int") + .SetIsStateful() + .Doc("nGraph Encapsulation Op. For use by the nGraph JIT only."); + +class NGraphEncapsulateOp : public OpKernel { + public: + //--------------------------------------------------------------------------- + // NGraphEncapsulateOp::ctor + //--------------------------------------------------------------------------- + explicit NGraphEncapsulateOp(OpKernelConstruction* ctx) + : OpKernel(ctx), + m_graph(OpRegistry::Global()), + m_freshness_tracker(nullptr) { + my_instance_id = s_instance_count; + s_instance_count++; + + std::ostringstream oss; + oss << "Encapsulate_" << my_instance_id << ": " << name(); + ngraph::Event event(oss.str(), name(), ""); + + NGRAPH_VLOG(1) << "NGraphEncapsulateOp: " << my_instance_id + << " Name: " << name(); + + GraphDef* graph_def; + + OP_REQUIRES_OK(ctx, ctx->GetAttr("ngraph_cluster", &m_ngraph_cluster)); + graph_def = NGraphClusterManager::GetClusterGraph(m_ngraph_cluster); + + GraphConstructorOptions opts; + opts.allow_internal_ops = true; + OP_REQUIRES_OK(ctx, ConvertGraphDefToGraph(opts, *graph_def, &m_graph)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("ngraph_graph_id", &m_graph_id)); + // + // Initialize the "m_input_is_static" vector as follows: + // (1) create m_input_is_static with n+1 elements, where n is the max arg + // index + // (2) for each _Arg node n, set m_input_is_static[n.index] to true if n + // is driving any static input; else set it to false. + // + + // Create the vector. + int32 max_arg_index = -1; + std::vector arg_nodes; + + for (auto node : m_graph.nodes()) { + if (node->type_string() == "_Arg") { + arg_nodes.push_back(node); + + int32 index; + OP_REQUIRES_OK(ctx, GetNodeAttr(node->attrs(), "index", &index)); + if (index > max_arg_index) max_arg_index = index; + } + } + + m_input_is_static = std::vector(max_arg_index + 1, false); + + // Fill the vector. + for (auto node : arg_nodes) { + int32 index; + OP_REQUIRES_OK(ctx, GetNodeAttr(node->attrs(), "index", &index)); + + bool is_static = false; + for (auto edge : node->out_edges()) { + if (edge->IsControlEdge() || !edge->dst()->IsOp()) { + continue; + } + + NGRAPH_VLOG(5) << "For arg " << index << " checking edge " + << edge->DebugString(); + + if (InputIsStatic(edge->dst(), edge->dst_input())) { + NGRAPH_VLOG(5) << "Marking edge static: " << edge->DebugString(); + is_static = true; + break; + } + } + + NGRAPH_VLOG(5) << "Marking arg " << index << " is_static: " << is_static; + m_input_is_static[index] = is_static; + } + + // Set the backend type for the op + OP_REQUIRES_OK(ctx, + ctx->GetAttr("_ngraph_backend", &m_op_backend_name)); + BackendManager::CreateBackend(m_op_backend_name); + event.Stop(); + ngraph::Event::write_trace(event); + } + + //--------------------------------------------------------------------------- + // ~NGraphEncapsulateOp() + //--------------------------------------------------------------------------- + ~NGraphEncapsulateOp() override { + std::ostringstream oss; + oss << "Destroy Encapsulate_" << my_instance_id << ": " << name(); + ngraph::Event event(oss.str(), name(), ""); + + // If the kernel goes away, we must de-register all of its cached + // functions + // from the freshness tracker. + if (m_freshness_tracker != nullptr) { + for (auto kv : m_ng_exec_map) { + m_freshness_tracker->RemoveUser(kv.second); + } + + // TODO(amprocte): We should be able to unref the tracker here, but it + // seems to screw things up in the C++ unit tests. + // m_freshness_tracker->Unref(); + } + + string node_name = "_ngraph_cluster_" + to_string(m_ngraph_cluster); + // TODO(malikshr) : Could be erroreneous we dont know if this + // destructor is called at the very end, if some modifier that uses this + // tensor is still active. + for (int i = 0; i < m_number_outputs; i++) { + string key = NGraphCatalog::CreateNodeKey(m_graph_id, node_name, i); + if (NGraphCatalog::ExistsInEncapOutputTensorMap(key)) { + auto temp = NGraphCatalog::GetTensorFromEncapOutputTensorMap(key); + temp.reset(); + NGraphCatalog::DeleteFromEncapOutputTensorMap(key); + } + } + + // Release the backend + BackendManager::ReleaseBackend(m_op_backend_name); + NGRAPH_VLOG(2) << "~NGraphEncapsulateOp()"; + + event.Stop(); + ngraph::Event::write_trace(event); + } + + template + static void TensorDataToStream(std::ostream& ostream, int64 n_elements, + const char* data) { + const T* data_T = reinterpret_cast(data); + for (size_t i = 0; i < n_elements; i++) { + ostream << data_T[i] << ","; + } + } + + //--------------------------------------------------------------------------- + // TensorToStream + //--------------------------------------------------------------------------- + static Status TensorToStream(std::ostream& ostream, const Tensor& tensor) { + const char* data = tensor.tensor_data().data(); + int64 n_elements = tensor.NumElements(); + switch (tensor.dtype()) { + case DT_HALF: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_FLOAT: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_DOUBLE: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_UINT32: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_INT32: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_UINT8: + case DT_QUINT8: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_UINT16: + case DT_QUINT16: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_INT8: + case DT_QINT8: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_INT16: + case DT_QINT16: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_UINT64: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_INT64: + TensorDataToStream(ostream, n_elements, data); + break; + case DT_BOOL: + TensorDataToStream(ostream, n_elements, data); + break; + default: + return errors::Internal("TensorToStream got unsupported data type ", + DataType_Name(tensor.dtype())); + break; + } + return Status::OK(); + } + + //--------------------------------------------------------------------------- + // OpKernel::Compute + //--------------------------------------------------------------------------- + void Compute(OpKernelContext* ctx) override { + std::ostringstream oss; + oss << "Execute: Encapsulate_" << my_instance_id << ": " << name(); + ngraph::Event event(oss.str(), name(), ""); + + Timer compute_time; + std::lock_guard lock(m_compute_lock); + + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute starting for cluster " + << m_ngraph_cluster; + + NGRAPH_VLOG(4) << "Got backend of type: " << m_op_backend_name; + ng::runtime::Backend* op_backend = + BackendManager::GetBackend(m_op_backend_name); + + ngraph::Event event_func_maybe_create("FunctionMaybeCreate", name(), ""); + Timer function_lookup_or_create; + // Get the inputs + std::vector input_shapes; + std::stringstream signature_ss; + for (int i = 0; i < ctx->num_inputs(); i++) { + const Tensor& input_tensor = ctx->input(i); + input_shapes.push_back(input_tensor.shape()); + for (const auto& x : input_tensor.shape()) { + signature_ss << x.size << ","; + } + signature_ss << ";"; + } + + signature_ss << "/"; + + std::vector static_input_map(ctx->num_inputs()); + for (int i = 0; i < ctx->num_inputs(); i++) { + const Tensor& input_tensor = ctx->input(i); + if (m_input_is_static[i]) { + static_input_map[i] = &input_tensor; + OP_REQUIRES_OK(ctx, TensorToStream(signature_ss, input_tensor)); + signature_ss << ";"; + } + } + + std::shared_ptr ng_function; + std::shared_ptr ng_exec; + std::shared_ptr evicted_ng_exec; + std::string signature = signature_ss.str(); + + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Computed signature: " << signature; + } + + auto it = m_ng_exec_map.find(signature); + + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute got inputs for cluster " + << m_ngraph_cluster; + + // Translate the TensorFlow graph to nGraph. + if (it == m_ng_exec_map.end()) { + // Measure the current total memory usage + long vm, rss, vm0, rss0; + MemoryProfile(vm0, rss0); + + NGRAPH_VLOG(1) << "Compilation cache miss: " << ctx->op_kernel().name(); + OP_REQUIRES_OK( + ctx, Builder::TranslateGraph(input_shapes, static_input_map, &m_graph, + ng_function)); + + auto function_size = ng_function->get_graph_size() / 1024; // kb unit + + // Serialize to nGraph if needed + if (std::getenv("NGRAPH_ENABLE_SERIALIZE") != nullptr) { + std::string file_name = + "tf_function_" + ctx->op_kernel().name() + ".json"; + NgraphSerialize("tf_function_" + ctx->op_kernel().name() + ".json", + ng_function); +#if defined NGRAPH_DISTRIBUTED + ngraph::Distributed dist; + int Rank_ID; + Rank_ID = dist.get_rank(); + NgraphSerialize("tf_function_" + ctx->op_kernel().name() + "_" + + to_string(Rank_ID) + ".json", + ng_function); +#endif + } + // Evict the cache if the number of elements exceeds the limit + const char* cache_depth_specified = + std::getenv("NGRAPH_TF_FUNCTION_CACHE_ITEM_DEPTH"); + if (cache_depth_specified != nullptr) { + my_function_cache_depth_in_items = atoi(cache_depth_specified); + } + + if (m_ng_exec_map.size() >= my_function_cache_depth_in_items) { + int input_tensors_bytes_free = 0; + evicted_ng_exec = m_ng_exec_map[m_lru.back()]; + m_ng_exec_map.erase(m_lru.back()); + m_ng_function_map.erase(evicted_ng_exec); + + // Call delete function here pf he erased func + op_backend->remove_compiled_function(evicted_ng_exec); + + // Now clean the input cache + std::vector>>& + input_caches = m_ng_exec_input_cache_map[evicted_ng_exec]; + for (auto& next_input : input_caches) { + input_tensors_bytes_free += next_input.second->get_size_in_bytes(); + next_input.second.reset(); + } + m_ng_exec_input_cache_map.erase(evicted_ng_exec); + + // Clean the output cache + std::vector>>& + output_caches = m_ng_exec_output_cache_map[evicted_ng_exec]; + int output_tensors_bytes_free = 0; + for (auto& next_output : output_caches) { + output_tensors_bytes_free += next_output.second->get_size_in_bytes(); + next_output.second.reset(); + } + m_ng_exec_output_cache_map.erase(evicted_ng_exec); + + m_lru.pop_back(); + NGRAPH_VLOG(1) << "NGRAPH_TF_MEM_PROFILE: OP_ID: " << my_instance_id + << " Step_ID: " << ctx->step_id() + << " Cluster: " << ctx->op_kernel().name() + << " Input Tensors freed: " + << input_tensors_bytes_free / (1024 * 1024) << " MB" + << " Output Tensors freed: " + << output_tensors_bytes_free / (1024 * 1024) << " MB"; + } // cache eviction if cache size greater than cache depth + + BackendManager::LockBackend(m_op_backend_name); + + ngraph::Event event_compile("Compile nGraph", name(), ""); + try { + ng_exec = op_backend->compile(ng_function); + } catch (const std::exception& exp) { + ng_function = m_ng_function_map[ng_exec]; + BackendManager::UnlockBackend(m_op_backend_name); + NgraphSerialize( + "tf_function_error_" + ctx->op_kernel().name() + ".json", + ng_function); + OP_REQUIRES( + ctx, false, + errors::Internal("Caught exception while compiling op_backend: ", + exp.what(), "\n")); + } catch (...) { + BackendManager::UnlockBackend(m_op_backend_name); + NgraphSerialize( + "tf_function_error_" + ctx->op_kernel().name() + ".json", + ng_function); + OP_REQUIRES(ctx, false, + errors::Internal("Error in compiling op_backend\n")); + } + BackendManager::UnlockBackend(m_op_backend_name); + event_compile.Stop(); + + m_ng_exec_map[signature] = ng_exec; + // caching ng_function to serialize to ngraph if needed + m_ng_function_map[ng_exec] = ng_function; + + m_lru.push_front(signature); + // Memory after + MemoryProfile(vm, rss); + auto delta_vm_mem = vm - vm0; + auto delta_res_mem = rss - rss0; + NGRAPH_VLOG(1) << "NGRAPH_TF_CACHE_PROFILE: OP_ID: " << my_instance_id + << " Step_ID: " << ctx->step_id() + << " Cache length: " << m_ng_exec_map.size() + << " Cluster: " << ctx->op_kernel().name() + << " Delta VM: " << delta_vm_mem + << " Delta RSS: " << delta_res_mem + << " Function size: " << function_size + << " KB Total RSS: " << rss / (1024 * 1024) << " GB " + << " VM: " << vm / (1024 * 1024) << " GB" << endl; + } // end of input signature not found in m_ng_exec_map + else { + // Found the input signature in m_ng_exec_map, use the cached executable + // Update the m_lru + if (signature != m_lru.front()) { + m_lru.remove(signature); + m_lru.push_front(signature); + } + ng_exec = it->second; + } + + int time_func_create_or_lookup = function_lookup_or_create.ElapsedInMS(); + event_func_maybe_create.Stop(); + + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute got graph for cluster " + << m_ngraph_cluster; + + Timer create_or_lookup_tensors; + + if (m_freshness_tracker == nullptr) { + auto creator = [](NGraphFreshnessTracker** tracker) { + *tracker = new NGraphFreshnessTracker(); + return Status::OK(); + }; + OP_REQUIRES_OK( + ctx, ctx->resource_manager()->LookupOrCreate( + ctx->resource_manager()->default_container(), + "ngraph_freshness_tracker", &m_freshness_tracker, creator)); + } + + NGRAPH_VLOG(4) + << "NGraphEncapsulateOp::Compute got freshness tracker for cluster " + << m_ngraph_cluster; + + // Allocate tensors for input arguments. + ngraph::Event event_alloc_input("Input: maybe create", name(), ""); + + vector> ng_inputs; + int ng_input_tensor_size_in_bytes = 0; + + std::vector>>& + input_caches = m_ng_exec_input_cache_map[ng_exec]; + input_caches.resize(input_shapes.size()); + + bool log_copies = false; + OP_REQUIRES_OK(ctx, IsCopyLogEnabled(m_graph_id, log_copies)); + std::stringstream copy_log_str; + copy_log_str << "KERNEL[" << type_string() << "]: " << name() + << " ,GraphID " << m_graph_id << "\n"; + int number_of_copies = 0; + + for (int i = 0; i < input_shapes.size(); i++) { + bool ref_exists = NGraphCatalog::ExistsInInputVariableSharedNameMap( + m_graph_id, def().name(), i); + + if (ref_exists) { + NGRAPH_VLOG(4) << "NGraphEncapsulateOp:: Input from Variable Node"; + ng_inputs.push_back(nullptr); + continue; + } + + NGRAPH_VLOG(4) << "NGraphEncapsulateOp:: Input from non Variable Node"; + ng::Shape ng_shape(input_shapes[i].dims()); + for (int j = 0; j < input_shapes[i].dims(); ++j) { + ng_shape[j] = input_shapes[i].dim_size(j); + } + ng::element::Type ng_element_type; + OP_REQUIRES_OK(ctx, TFDataTypeToNGraphElementType(ctx->input(i).dtype(), + &ng_element_type)); + + // At the first call of the ng_exec, both last_src_ptr and + // last_ng_tensor shall point to null. Otherwise, they are retrived + // from cache. + void* last_src_ptr = input_caches[i].first; + std::shared_ptr last_ng_tensor = + input_caches[i].second; + + void* current_src_ptr = (void*)DMAHelper::base(&ctx->input(i)); + std::shared_ptr current_ng_tensor = + get_current_ng_tensor(current_src_ptr, last_src_ptr, last_ng_tensor, + false, ng_exec, op_backend, ng_element_type, + ng_shape); + + bool is_cpu = m_op_backend_name == "CPU"; + + if (!is_cpu && current_ng_tensor->get_stale()) { + // Fresh or stale, in case of CPU this step is never needed + try { + number_of_copies++; + copy_log_str << " COPY_INP_VAL[" << i << "]"; + current_ng_tensor->write( + current_src_ptr, 0, + current_ng_tensor->get_element_count() * ng_element_type.size()); + } catch (const std::exception& exp) { + OP_REQUIRES( + ctx, false, + errors::Internal( + "Caught exception while transferring tensor data to nGraph: ", + exp.what(), "\n")); + } catch (...) { + OP_REQUIRES(ctx, false, + errors::Internal( + "Error in transferring tensor data to nGraph\n")); + } + } + + input_caches[i] = std::make_pair(current_src_ptr, current_ng_tensor); + ng_inputs.push_back(current_ng_tensor); + } // for (int i = 0; i < input_shapes.size(); i++) + + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute allocated argument tensors " + "for cluster " + << m_ngraph_cluster; + + event_alloc_input.Stop(); + + // Allocate tensors for the output results. + ngraph::Event event_alloc_output("Output: maybe create", name(), ""); + + vector> ng_outputs; + int ng_output_tensor_size_in_bytes = 0; + + std::vector>>& + output_caches = m_ng_exec_output_cache_map[ng_exec]; + output_caches.resize(ng_exec->get_results().size()); + + // ngraph executable returns get_results, using that to get the tensor shape + // and element type. + for (auto i = 0; i < ng_exec->get_results().size(); i++) { + auto ng_element = ng_exec->get_results()[i]; + auto ng_shape = ng_element->get_shape(); + auto ng_element_type = ng_element->get_element_type(); + + // Create the TF output tensor + vector dims; + for (auto dim : ng_shape) { + dims.push_back(dim); + } + + TensorShape tf_shape(dims); + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor)); + + // Make sure the nGraph-inferred element type agrees with what TensorFlow + // expected. + ng::element::Type expected_elem_type; + OP_REQUIRES_OK( + ctx, TFDataTypeToNGraphElementType(ctx->expected_output_dtype(i), + &expected_elem_type)); + OP_REQUIRES( + ctx, ng_element_type == expected_elem_type, + errors::Internal("Element type inferred by nGraph does not match " + "the element type expected by TensorFlow")); + + void* last_dst_ptr = output_caches[i].first; + std::shared_ptr last_ng_tensor = + output_caches[i].second; + + void* current_dst_ptr = DMAHelper::base(output_tensor); + std::shared_ptr current_ng_tensor = + get_current_ng_tensor(current_dst_ptr, last_dst_ptr, last_ng_tensor, + true, ng_exec, op_backend, ng_element_type, + ng_shape); + current_ng_tensor->set_stale(true); + output_caches[i] = std::make_pair(current_dst_ptr, current_ng_tensor); + ng_outputs.push_back(current_ng_tensor); + } + + ngraph::Event event_input_check_in_catalog( + "Get Variable Inputs from Resource Manager", name(), ""); + + for (int input_index = 0; input_index < input_shapes.size(); + input_index++) { + bool ref_exists = NGraphCatalog::ExistsInInputVariableSharedNameMap( + m_graph_id, def().name(), input_index); + + if (!ref_exists) { + OP_REQUIRES(ctx, ng_inputs[input_index] != nullptr, + errors::Internal("Input ", input_index, + " is not in Catalog nor was set from TF")); + continue; + } + + string ref_var_name = NGraphCatalog::GetInputVariableSharedName( + m_graph_id, def().name(), input_index); + NGraphVar* var; + OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup( + ctx->resource_manager()->default_container(), + ref_var_name, &var)); + + if (var->need_sync_ng_tensor()) { + number_of_copies++; + copy_log_str << "Var_Sync[" << input_index << "] "; + ngraph::Event event_sync_ng_tf_tensors( + "Output: ng_tensor and tf_tensor sync", name(), ""); + + NGRAPH_VLOG(4) << "In NGEncapsulate, ng tensor behind, needs to sync " + "with tf-tensor"; + WriteNGTensor(var->ng_tensor(), var->tensor()); + // TODO(malikshr): We will be able to set the sync_ng_tensor to false + // once we do topological sort to add attributes like copy_to_tf + event_sync_ng_tf_tensors.Stop(); + ngraph::Event::write_trace(event_sync_ng_tf_tensors); + } + + ng_inputs[input_index] = var->ng_tensor(); + + var->Unref(); + } + event_input_check_in_catalog.Stop(); + ngraph::Event::write_trace(event_input_check_in_catalog); + + NGRAPH_VLOG(4) + << "NGraphEncapsulateOp::Compute allocated result tensors for cluster " + << m_ngraph_cluster; + + int time_create_or_lookup_tensors = create_or_lookup_tensors.ElapsedInMS(); + event_alloc_output.Stop(); + + // Execute the nGraph function. + ngraph::Event event_execute_function("Execute nGraph", name(), ""); + Timer execute_function; + { + BackendManager::LockBackend(m_op_backend_name); + NGRAPH_VLOG(4) + << "NGraphEncapsulateOp::Compute call starting for cluster " + << m_ngraph_cluster; + try { + ng_exec->call(ng_outputs, ng_inputs); + } catch (const std::exception& exp) { + ng_function = m_ng_function_map[ng_exec]; + BackendManager::UnlockBackend(m_op_backend_name); + NgraphSerialize( + "tf_function_error_" + ctx->op_kernel().name() + ".json", + ng_function); + OP_REQUIRES(ctx, false, + errors::Internal( + "Caught exception while executing nGraph computation: ", + exp.what(), "\n")); + } catch (...) { + BackendManager::UnlockBackend(m_op_backend_name); + NgraphSerialize( + "tf_function_error_" + ctx->op_kernel().name() + ".json", + ng_function); + OP_REQUIRES( + ctx, false, + errors::Internal("Error in executing the nGraph computation\n")); + } + BackendManager::UnlockBackend(m_op_backend_name); + } + int time_execute_function = execute_function.ElapsedInMS(); + event_execute_function.Stop(); + + long vm, rss; + MemoryProfile(vm, rss); + NGRAPH_VLOG(1) << "NGRAPH_TF_MEM_PROFILE: OP_ID: " << my_instance_id + << " Step_ID: " << ctx->step_id() + << " Cluster: " << ctx->op_kernel().name() + << " Input Tensors created: " + << ng_input_tensor_size_in_bytes / (1024 * 1024) << " MB" + << " Output Tensors created: " + << ng_output_tensor_size_in_bytes / (1024 * 1024) << " MB" + << " Total process memory: " << rss / (1024 * 1024) << " GB"; + + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call done for cluster " + << m_ngraph_cluster; + + // Copy value to host if backend is not CPU + ngraph::Event event_copy_output("Output - copy back", name(), ""); + + Timer copy_output_tensors_to_host; + + try { + // Done to delete the tensors from catalog in the destructor + if (m_number_outputs == -1) { + NGRAPH_VLOG(4) << "Settig number of outputs for " << def().name(); + m_number_outputs = output_caches.size(); + } + size_t output_tensor_count = output_caches.size(); + std::vector> events; + for (size_t i = 0; i < output_tensor_count; ++i) { + string key = NGraphCatalog::CreateNodeKey(m_graph_id, def().name(), i); + bool ref_exists = NGraphCatalog::ExistsInEncapOutputTensorMap(key); + void* dst_ptr; + std::shared_ptr dst_tv; + std::tie(dst_ptr, dst_tv) = output_caches[i]; + + if (ref_exists) { + NGRAPH_VLOG(4) << "Saving output in Catalog " << key << dst_tv; + NGraphCatalog::AddToEncapOutputTensorMap(key, dst_tv); + } + + if (m_op_backend_name != "CPU" && + NGraphCatalog::EncapOutputIndexNeedsCopy(def().name(), i)) { + number_of_copies++; + copy_log_str << " COPY_OP_VAL[" << i << "]"; + + NGRAPH_VLOG(4) << "Copying Output " << def().name() + << " ,index: " << i; + auto ng_element_type = dst_tv->get_element_type(); + size_t copy_size = + dst_tv->get_element_count() * ng_element_type.size(); + string event_name = + "Output_" + to_string(i) + "_" + to_string(copy_size); + std::unique_ptr event_copy_output_next( + new ngraph::Event(event_name, name(), "")); + dst_tv->read(dst_ptr, 0, + dst_tv->get_element_count() * ng_element_type.size()); + event_copy_output_next->Stop(); + events.push_back(std::move(event_copy_output_next)); + } + } + // Now write the events back + for (auto& next : events) { + ngraph::Event::write_trace(*next.get()); + } + + } catch (const std::exception& exp) { + OP_REQUIRES( + ctx, false, + errors::Internal( + "Caught exception while transferring tensor data to host: ", + exp.what(), "\n")); + } catch (...) { + OP_REQUIRES( + ctx, false, + errors::Internal("Error in transferring tensor data to host\n")); + } + event_copy_output.Stop(); + + copy_log_str << " Number of copies " << number_of_copies << "\n"; + if (log_copies) { + cout << copy_log_str.str(); + } + + // Mark input tensors as fresh for the next time around. + // Note: these ng_tensors are being marked fresh so that in the next + // iteration if this encapsulate finds the tensor fresh, then it will use it + for (int i = 0; i < input_shapes.size(); i++) { + void* src_ptr = (void*)DMAHelper::base(&ctx->input(i)); + m_freshness_tracker->MarkFresh(src_ptr, ng_exec); + } + int time_copy_output_tensors_to_host = + copy_output_tensors_to_host.ElapsedInMS(); + + NGRAPH_VLOG(4) + << "NGraphEncapsulateOp::Compute done marking fresh for cluster " + << m_ngraph_cluster; + NGRAPH_VLOG(1) << "NGRAPH_TF_TIMING_PROFILE: OP_ID: " << my_instance_id + << " Step_ID: " << ctx->step_id() + << " Cluster: " << ctx->op_kernel().name() + << " Time-Compute: " << compute_time.ElapsedInMS() + << " Function-Create-or-Lookup: " + << time_func_create_or_lookup << " Create-and-copy-tensors: " + << time_create_or_lookup_tensors + << " Execute: " << time_execute_function + << " Copy-outputs-to-host: " + << time_copy_output_tensors_to_host; + event.Stop(); + ngraph::Event::write_trace(event_func_maybe_create); + ngraph::Event::write_trace(event_alloc_output); + ngraph::Event::write_trace(event_alloc_input); + ngraph::Event::write_trace(event_execute_function); + ngraph::Event::write_trace(event_copy_output); + ngraph::Event::write_trace(event); + + } // end compute + + private: + // TF Graph for the cluster + Graph m_graph; + + std::unordered_map> + m_ng_exec_map; + std::unordered_map, + std::shared_ptr> + m_ng_function_map; + + NgFunctionIOCache m_ng_exec_input_cache_map; + NgFunctionIOCache m_ng_exec_output_cache_map; + + // Freshness tracker maintains a set of ng::functions using a particular base + // pointer(for Tensor) + // A single instance of freshness_tracker is used across all + // nGraphEncapsulateOp and nGraphVariable op + NGraphFreshnessTracker* m_freshness_tracker; + int m_ngraph_cluster; + int m_graph_id; + std::vector m_input_is_static; + std::mutex m_compute_lock; + string m_op_backend_name; + + std::shared_ptr get_current_ng_tensor( + void* current_tf_ptr, void* last_tf_ptr, + const std::shared_ptr& last_ng_tensor, + const bool& output_tensor, + const std::shared_ptr& ng_exec, + ng::runtime::Backend* op_backend, + const ng::element::Type& ng_element_type, const ng::Shape& ng_shape) { + // NOTE: we assume that TF's pointers WILL change if it actually changes + // values. ie, it will not reuse the same space if its rewritten it + bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr; + bool no_ng_tensor_found = last_ng_tensor == nullptr; + bool is_cpu = m_op_backend_name == "CPU"; + + // We need to check last_ng_tensor != nullptr, since there are cases where + // at the first call to the ng_exec, both current_dst_ptr (when the + // output is a 0-sized tensor) and last_dst_ptr (uninitialized at the + // first call) are nullptr + // A new tensor needs to be created for sure if no_ng_tensor_found + // Additionally for CPU, it needs to be created if tf_tensor_has_changed, + // for others, we do not create + bool need_new_tensor_creation; + if (is_cpu) { + need_new_tensor_creation = no_ng_tensor_found || tf_tensor_has_changed; + } else { + need_new_tensor_creation = no_ng_tensor_found; + } + + // It is stale if a new tensor was created OR the tf tensor has changed OR + // (tf tensor has not changed, but freshness tracker says its stale) + bool is_stale; + if (output_tensor) { + is_stale = true; // For output tensors, it is always set stale to true + } else { + is_stale = need_new_tensor_creation || tf_tensor_has_changed || + (!tf_tensor_has_changed && + !m_freshness_tracker->IsFresh(current_tf_ptr, ng_exec)); + } + + // create a new ng tensor or use the last one + std::shared_ptr current_ng_tensor; + if (need_new_tensor_creation) { + if (is_cpu) { + current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, + current_tf_ptr); + } else { + current_ng_tensor = + op_backend->create_tensor(ng_element_type, ng_shape); + } + } else { + current_ng_tensor = last_ng_tensor; + } + current_ng_tensor->set_stale(is_stale); + return current_ng_tensor; + } + std::list m_lru; + int my_function_cache_depth_in_items = 16; + static int s_instance_count; + int my_instance_id{0}; + int m_number_outputs = -1; +}; + +int NGraphEncapsulateOp::s_instance_count = 0; + +} // namespace ngraph_bridge + +REGISTER_KERNEL_BUILDER(Name("NGraphEncapsulate").Device(DEVICE_CPU), + ngraph_bridge::NGraphEncapsulateOp); + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_enter_in_catalog.cc b/src/enable_variable_ops/ngraph_enter_in_catalog.cc new file mode 100644 index 00000000..b667ff09 --- /dev/null +++ b/src/enable_variable_ops/ngraph_enter_in_catalog.cc @@ -0,0 +1,145 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#pragma once + +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" + +#include "ngraph/ngraph.hpp" +#include "ngraph/serializer.hpp" +#include "ngraph_catalog.h" +#include "ngraph_log.h" +#include "ngraph_utils.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +// Used by Variable and other Modifier Ops (NGraphVariable, NGraphAssign) +// for accessing the variable object from resource manager using shared +// name +// If the op is not of type NGraphVariable, +// then recurse over its 1st input till we get reach the variable +// Assumes: the Variable that is being modified is the 1st input and the only +// modifiable input +// If the op has many such inputs, this function needs to be called for each of +// them +// It is bound to terminate as the modifier ops like Assign, AssignAdd, +// ApplyGradientDescent, etc +// always operate on a Variable +Status GetSharedName(Node* node, string* shared_name) { + if (node->type_string() == "NGraphVariable") { + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "shared_name", shared_name)); + if (shared_name->empty()) { + (*shared_name) = node->name(); + } + return Status::OK(); + } + + Node* input_0; + TF_RETURN_IF_ERROR(node->input_node(0, &input_0)); + return GetSharedName(input_0, shared_name); +} + +// 1. Populate the input_variable_map +// 2. Attach Graph Ids to the node +Status EnterInCatalog(Graph* graph, int graph_id) { + // Topological Sort + vector ordered; + GetReversePostOrder(*graph, &ordered); + + for (auto node : ordered) { + // Update the input variable map + if (IsNGVariableType(node->type_string())) { + string node_key = NGraphCatalog::CreateNodeKey(graph_id, node->name(), 0); + string shared_name; + TF_RETURN_IF_ERROR(GetSharedName(node, &shared_name)); + NGraphCatalog::AddToInputVariableSharedNameMap(node_key, shared_name); + + NGRAPH_VLOG(4) << "Adding in InputVariableSharedNameMap "; + NGRAPH_VLOG(4) << "Key: " << node_key; + NGRAPH_VLOG(4) << "Value: " << shared_name; + + } else if (node->type_string() == "NGraphEncapsulate") { + // input catalog + for (auto edge : node->in_edges()) { + if (edge->src()->IsOp() && !edge->IsControlEdge() && + IsNGVariableType(edge->src()->type_string())) { + auto src = edge->src(); + string node_key = NGraphCatalog::CreateNodeKey(graph_id, node->name(), + edge->dst_input()); + string shared_name; + TF_RETURN_IF_ERROR(GetSharedName(src, &shared_name)); + NGraphCatalog::AddToInputVariableSharedNameMap(node_key, shared_name); + NGRAPH_VLOG(4) << "Adding in InputVariableSharedNameMap "; + NGRAPH_VLOG(4) << "Key: " << node_key; + NGRAPH_VLOG(4) << "Value: " << shared_name; + } + } + + // output ng-copy map catalog + unordered_set op_index_to_copy; + NGRAPH_VLOG(4) << "Finding Output Copy required for " << node->name(); + for (auto edge : node->out_edges()) { + if (edge->dst()->IsOp() && !edge->IsControlEdge() && + !IsNGVariableType(edge->dst()->type_string())) { + NGRAPH_VLOG(4) << "Output Copy required for " << node->name() + << " ,index: " << edge->src_output() << " dstOpType " + << edge->dst()->type_string(); + op_index_to_copy.insert(edge->src_output()); + } + } + NGraphCatalog::AddToEncapOutputCopyIndexesMap(node->name(), + op_index_to_copy); + + } // end of node is type NGraphEncapsulate + + // Update the output tensor map + if (IsNGVariableType(node->type_string())) { + for (auto edge : node->in_edges()) { + if (!edge->src()->IsOp() || edge->IsControlEdge() || + IsRefType(edge->dst()->input_type(edge->dst_input())) || + edge->src()->type_string() != "NGraphEncapsulate") { + continue; + } + + NGRAPH_VLOG(4) << "Get " << node->type_string() + << "and input is from NGraphEncapsulate"; + + auto src = edge->src(); + int src_output = edge->src_output(); + string node_key = + NGraphCatalog::CreateNodeKey(graph_id, src->name(), src_output); + + // Will be updated with real tensors in Encapsulate + NGraphCatalog::AddToEncapOutputTensorMap(node_key, nullptr); + NGRAPH_VLOG(4) << "Adding in Output Tensor Map"; + NGRAPH_VLOG(4) << "Key: " << node_key; + } + } // end of if node of type NGraphAssign + } // enter in catalog + + NGRAPH_VLOG(4) << "Entered in Catalog"; + return Status::OK(); +} + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_enter_in_catalog.h b/src/enable_variable_ops/ngraph_enter_in_catalog.h new file mode 100644 index 00000000..ebb598da --- /dev/null +++ b/src/enable_variable_ops/ngraph_enter_in_catalog.h @@ -0,0 +1,70 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#ifndef NGRAPH_TF_ENTER_IN_CATALOG_H_ +#define NGRAPH_TF_ENTER_IN_CATALOG_H_ +#pragma once + +#include "tensorflow/core/graph/graph.h" + +#include "ngraph/ngraph.hpp" +#include "ngraph_catalog.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +// 1. Populate the NGraphCatalog +// 2. Attach Graph Ids to the node + +// Some terms: +// NGraphSupported Ops : NGraphVariable, NGraphAssign, NGraphEncapsulate +// NGraphVariableType Ops : NGraphVariable, NGraphAssign +// NG-Tensor : ngraph backend tensor + +// TF's Variable Op is a wrapper on a persistent TF-tensor which is stored +// in the TF Container and can be accessed/retrieved by TF Resource Manager +// The NGraphVariable Op is a wrapper on a pair of TF-Tensor and NG-Tensor that +// are synced lazily (when required) + +// We collect the below information for the catalog +// 1. If the NGraphSupportedOp gets input from a NGraphVariableType Op, +// it can directly access the ng-tensor via the TF Resource Manager using the +// shared Name +// We add mapping of {graphId_nodename_InputIndex : Shared_Name} to the +// InputVariableSharedNameMap +// +// 2. If the output of NGraphEncapsulate Op is an input to NGraphVariableType +// Op, we store this NG-Tensor +// so that it can be directly accessed in compute call of NGraphVariableType. +// We add mapping of {graphId_encapnodename_OutputIndex : NG-Tensor} to the +// EncapOutputTensorMap +// +// 3. If the output of NGraphEncapsulate Op is not required by a TF Op or +// NGraphEncapsulate Op, +// then we can avoid copying it to HOST +// We add mapping of {encapnodename : set of OutputIndexes that need a copy} to +// the EncapsulateOutputCopyIndexesMap +// + +Status EnterInCatalog(Graph* graph, int graph_id); + +} // ngraph_bridge +} // tensorflow + +#endif diff --git a/src/enable_variable_ops/ngraph_replace_op_utilities.cc b/src/enable_variable_ops/ngraph_replace_op_utilities.cc new file mode 100644 index 00000000..f15bc905 --- /dev/null +++ b/src/enable_variable_ops/ngraph_replace_op_utilities.cc @@ -0,0 +1,221 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/types.h" + +#include "ngraph_mark_for_clustering.h" +#include "ngraph_utils.h" + +using namespace std; + +namespace tensorflow { + +namespace ngraph_bridge { + +Status ReplaceApplyGradientDescent(Graph* graph, Node* node, Node** replacement, + const string replacement_node_name, + const string replacement_node_type, + const bool just_looking, + const bool outputs_ng_supported, + const int graph_id, + const bool is_backend_set) { + NGRAPH_VLOG(1) << "Start replacing NGraphApplyGradientDescent " + << node->name(); + + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &dtype)); + bool use_locking; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "use_locking", &use_locking)); + + NodeBuilder::NodeOut input_var; + NodeBuilder::NodeOut input_alpha; + NodeBuilder::NodeOut input_delta; + + std::vector input_edges; + TF_RETURN_IF_ERROR(node->input_edges(&input_edges)); + + NGRAPH_VLOG(1) << "No of input edges to ApplyGradientDescent " + << input_edges.size(); + + input_var = + NodeBuilder::NodeOut(input_edges[0]->src(), input_edges[0]->src_output()); + input_alpha = + NodeBuilder::NodeOut(input_edges[1]->src(), input_edges[1]->src_output()); + input_delta = + NodeBuilder::NodeOut(input_edges[2]->src(), input_edges[2]->src_output()); + + TF_RETURN_IF_ERROR(NodeBuilder(replacement_node_name, replacement_node_type) + .Attr("T", dtype) + .Attr("use_locking", use_locking) + .Attr("just_looking", just_looking) + .Attr("copy_to_tf", !outputs_ng_supported) + .Attr("ngraph_graph_id", graph_id) + .Input(input_var) + .Input(input_alpha) + .Input(input_delta) + .Device(node->assigned_device_name()) + .Finalize(graph, &(*replacement))); + + (*replacement)->set_assigned_device_name(node->assigned_device_name()); + + if (is_backend_set) { + std::string backend_name; + TF_RETURN_IF_ERROR( + GetNodeAttr(node->attrs(), "_ngraph_backend", &backend_name)); + SetNodeBackend(*replacement, backend_name); + } + + return Status::OK(); +} // end of ReplaceApplyGradientDescent + +Status ReplaceAssign(Graph* graph, Node* node, Node** replacement, + const string replacement_node_name, + const string replacement_node_type, + const bool just_looking, const bool outputs_ng_supported, + const int graph_id, const bool is_backend_set) { + NGRAPH_VLOG(1) << "Replacing " << node->name(); + + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &dtype)); + + NodeBuilder::NodeOut input_ref; + NodeBuilder::NodeOut input_val; + + for (auto edge : node->in_edges()) { + if (edge == NULL) { + NGRAPH_VLOG(1) << "Replacing " << replacement_node_type + << ", found null edge: "; + continue; + } + if (edge->dst()->IsOp() && !edge->IsControlEdge() && + IsRefType(edge->dst()->input_type(edge->dst_input()))) { + input_ref = NodeBuilder::NodeOut(edge->src(), edge->src_output()); + } else { + input_val = NodeBuilder::NodeOut(edge->src(), edge->src_output()); + } + } + + TF_RETURN_IF_ERROR(NodeBuilder(replacement_node_name, replacement_node_type) + .Attr("validate_shape", true) + .Attr("use_locking", true) + .Attr("T", dtype) + .Attr("just_looking", just_looking) + .Attr("copy_to_tf", !outputs_ng_supported) + .Attr("ngraph_graph_id", graph_id) + .Input(input_ref) + .Input(input_val) + .Device(node->assigned_device_name()) + .Finalize(graph, &(*replacement))); + + (*replacement)->set_assigned_device_name(node->assigned_device_name()); + + if (is_backend_set) { + std::string backend_name; + TF_RETURN_IF_ERROR( + GetNodeAttr(node->attrs(), "_ngraph_backend", &backend_name)); + SetNodeBackend(*replacement, backend_name); + } + + NGRAPH_VLOG(4) << "Replacing Node " << node->DebugString() << " with " + << (*replacement)->DebugString(); + return Status::OK(); +} + +Status ReplaceVariable(Graph* graph, Node* node, Node** replacement, + const string replacement_node_name, + const string replacement_node_type, + const bool just_looking, const bool outputs_ng_supported, + const int graph_id, const bool is_backend_set) { + NGRAPH_VLOG(1) << "Replacing NGraphVariable " << node->name(); + + TensorShape shape; + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "shape", &shape)); + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "dtype", &dtype)); + + std::string container; + std::string shared_name; + + if (GetNodeAttr(node->attrs(), "container", &container) != Status::OK()) { + container = ""; + } + if (GetNodeAttr(node->attrs(), "shared_name", &shared_name) != Status::OK()) { + shared_name = ""; + } + + TF_RETURN_IF_ERROR( + NodeBuilder(replacement_node_name, replacement_node_type) + .Attr("shape", shape) + .Attr("dtype", dtype) + .Attr("container", container) + .Attr("shared_name", + (shared_name.empty() ? node->name() : shared_name)) + .Attr("just_looking", just_looking) + .Attr("copy_to_tf", !outputs_ng_supported) + .Attr("ngraph_graph_id", graph_id) + .Device(node->assigned_device_name()) + .Finalize(graph, &(*replacement))); + + (*replacement)->set_assigned_device_name(node->assigned_device_name()); + + if (is_backend_set) { + std::string backend_name; + TF_RETURN_IF_ERROR( + GetNodeAttr(node->attrs(), "_ngraph_backend", &backend_name)); + SetNodeBackend(*replacement, backend_name); + } + NGRAPH_VLOG(4) << "Replacing Node " << node->DebugString() << " with " + << (*replacement)->DebugString(); + + return Status::OK(); +} + +// Though edges will be removed when we remove the node +// we specifically remove the edges to be sure +Status ReplaceInputControlEdges(Graph* graph, Node* node, Node* replacement) { + for (auto edge : node->in_edges()) { + NGRAPH_VLOG(4) << "Replacing: " << edge->DebugString(); + if (!edge->IsControlEdge()) continue; + graph->AddEdge(edge->src(), edge->src_output(), replacement, + edge->dst_input()); + graph->RemoveEdge(edge); + } + return Status::OK(); +} + +// Though edges will be removed when we remove the node +// we specifically remove the edges to be sure +Status ReplaceOutputEdges(Graph* graph, Node* node, Node* replacement) { + std::vector edges; + for (auto edge : node->out_edges()) { + edges.push_back(edge); + } + + for (auto edge : edges) { + NGRAPH_VLOG(4) << "Replacing: " << edge->DebugString(); + graph->AddEdge(replacement, edge->src_output(), edge->dst(), + edge->dst_input()); + graph->RemoveEdge(edge); + } + + return Status::OK(); +} + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_replace_op_utilities.h b/src/enable_variable_ops/ngraph_replace_op_utilities.h new file mode 100644 index 00000000..ab4d2e84 --- /dev/null +++ b/src/enable_variable_ops/ngraph_replace_op_utilities.h @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#ifndef NGRAPH_TF_REPLACE_OP_UTILITIES_H_ +#define NGRAPH_TF_REPLACE_OP_UTILITIES_H_ + +#pragma once + +#include "tensorflow/core/graph/graph.h" + +using namespace std; +namespace tensorflow { + +namespace ngraph_bridge { + +Status ReplaceApplyGradientDescent(Graph* graph, Node* node, Node** replacement, + const string replacement_node_name, + const string replacement_op_type, + const bool just_looking, + const bool outputs_ng_supported, + const int graph_id, + const bool is_backend_set); + +Status ReplaceAssign(Graph* graph, Node* node, Node** replacement, + const string replacement_node_name, + const string replacement_op_type, const bool just_looking, + const bool outputs_ng_supported, const int graph_id, + const bool is_backend_set); + +Status ReplaceVariable(Graph* graph, Node* node, Node** replacement, + const string replacement_node_name, + const string replacement_op_type, + const bool just_looking, const bool outputs_ng_supported, + const int graph_id, const bool is_backend_set); + +// Adds the edges that are incoming control edges to node +// as incoming control edges to the replacement node +// Removes the original edges +Status ReplaceInputControlEdges(Graph* graph, Node* node, Node* replacement); + +// Adds the edges that are outgoing from node +// as outgoing edges to the replacement node +// Removes the original edges +Status ReplaceOutputEdges(Graph* graph, Node* node, Node* replacement); + +} // namespace ngraph_bridge + +} // namespace tensorflow + +#endif // NGRAPH_TF_REPLACE_OP_UTILITIES_H_ \ No newline at end of file diff --git a/src/enable_variable_ops/ngraph_replace_variable_modifiers.cc b/src/enable_variable_ops/ngraph_replace_variable_modifiers.cc new file mode 100644 index 00000000..973b2060 --- /dev/null +++ b/src/enable_variable_ops/ngraph_replace_variable_modifiers.cc @@ -0,0 +1,187 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" + +#include "ngraph_api.h" +#include "ngraph_capture_variables.h" +#include "ngraph_replace_op_utilities.h" +#include "ngraph_replace_variable_modifiers.h" +#include "ngraph_utils.h" + +using namespace std; +namespace ng = ngraph; +namespace tensorflow { + +namespace ngraph_bridge { + +// TODO (malikshr) :: Currently we are not checking whether the new op name is +// unique +// New Op names are added in +// 1. ReplaceModifiers +// 2. RewriteForTracking + +Status ReplaceModifiers(Graph* graph, int graph_id) { + // Go over the nodes and replace variable modifiers + // Each Modifier is replaced with the corresponding computational TF + // graph followed by NGraphAssign Op + // If there is an incoming control edge to the Modifier Op + // It is attached to the first op in the series of the computation TF graph + vector remove_nodes; + for (auto node : graph->op_nodes()) { + if (node->type_string() == "NGraphAssignSub" || + node->type_string() == "NGraphAssignAdd") { + NodeBuilder::NodeOut input_ref; + NodeBuilder::NodeOut input_val; + + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &dtype)); + + for (auto edge : node->in_edges()) { + if (edge == NULL) { + continue; + } + if (edge->dst()->IsOp() && !edge->IsControlEdge() && + IsRefType(edge->dst()->input_type(edge->dst_input()))) { + input_ref = NodeBuilder::NodeOut(edge->src(), edge->src_output()); + } else { + input_val = NodeBuilder::NodeOut(edge->src(), edge->src_output()); + } + } + + string op_type = + (node->type_string() == "NGraphAssignSub") ? "Sub" : "Add"; + string op_name_suffix = "_" + op_type; + Node* compute_op; + string new_name_sub = node->name() + op_name_suffix; + TF_RETURN_IF_ERROR(NodeBuilder(new_name_sub, op_type) + .Input(input_ref) + .Input(input_val) + .Attr("T", dtype) + .Device(node->assigned_device_name()) + .Finalize(graph, &(compute_op))); + compute_op->set_assigned_device_name(node->assigned_device_name()); + NodeBuilder::NodeOut ndef_compute_op = + NodeBuilder::NodeOut(compute_op, 0); + + NGRAPH_VLOG(1) << "Compute op name: " << compute_op->name(); + NGRAPH_VLOG(1) << "Compute op assigned device: " + << compute_op->assigned_device_name(); + + Node* ngraphassign_op; + string new_name_ngassign = node->name() + "_NGraphAssign"; + + TF_RETURN_IF_ERROR(NodeBuilder(new_name_ngassign, "NGraphAssign") + .Attr("validate_shape", true) + .Attr("use_locking", true) + .Attr("T", dtype) + .Attr("ngraph_graph_id", 0) + .Input(input_ref) + .Input(ndef_compute_op) + .Device(node->assigned_device_name()) + .Finalize(graph, &ngraphassign_op)); + ngraphassign_op->set_assigned_device_name(node->assigned_device_name()); + NGRAPH_VLOG(1) << "Assign op name: " << ngraphassign_op->name(); + NGRAPH_VLOG(1) << "Assign op assigned device: " + << ngraphassign_op->assigned_device_name(); + + TF_RETURN_IF_ERROR(ReplaceInputControlEdges(graph, node, compute_op)); + TF_RETURN_IF_ERROR(ReplaceOutputEdges(graph, node, ngraphassign_op)); + + remove_nodes.push_back(node); + NGRAPH_VLOG(1) << "Removing node"; + + } // AssignSub + Assign Add + else if (node->type_string() == "NGraphApplyGradientDescent") { + NodeBuilder::NodeOut input_var; + NodeBuilder::NodeOut input_alpha; + NodeBuilder::NodeOut input_delta; + + std::vector input_edges; + TF_RETURN_IF_ERROR(node->input_edges(&input_edges)); + + NGRAPH_VLOG(1) << "No of input edges to ApplyGradientDescent " + << input_edges.size(); + + input_var = NodeBuilder::NodeOut(input_edges[0]->src(), + input_edges[0]->src_output()); + input_alpha = NodeBuilder::NodeOut(input_edges[1]->src(), + input_edges[1]->src_output()); + input_delta = NodeBuilder::NodeOut(input_edges[2]->src(), + input_edges[2]->src_output()); + + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &dtype)); + + Node* mul_op; + string new_name_mul = node->name() + "_Mul"; + TF_RETURN_IF_ERROR(NodeBuilder(new_name_mul, "Mul") + .Input(input_alpha) + .Input(input_delta) + .Attr("T", dtype) + .Device(node->assigned_device_name()) + .Finalize(graph, &(mul_op))); + mul_op->set_assigned_device_name(node->assigned_device_name()); + NodeBuilder::NodeOut ndef_mul_op = NodeBuilder::NodeOut(mul_op, 0); + + Node* sub_op; + string new_name_sub = node->name() + "_Sub"; + TF_RETURN_IF_ERROR(NodeBuilder(new_name_sub, "Sub") + .Input(input_var) + .Input(ndef_mul_op) + .Attr("T", dtype) + .Device(node->assigned_device_name()) + .Finalize(graph, &(sub_op))); + sub_op->set_assigned_device_name(node->assigned_device_name()); + NodeBuilder::NodeOut ndef_sub_op = NodeBuilder::NodeOut(sub_op, 0); + + Node* ngraphassign_op; + string new_name_ngassign = node->name() + "_NGraphAssign"; + + TF_RETURN_IF_ERROR(NodeBuilder(new_name_ngassign, "NGraphAssign") + .Attr("validate_shape", true) + .Attr("use_locking", true) + .Attr("T", dtype) + .Attr("ngraph_graph_id", 0) + .Input(input_var) + .Input(ndef_sub_op) + .Device(node->assigned_device_name()) + .Finalize(graph, &ngraphassign_op)); + ngraphassign_op->set_assigned_device_name(node->assigned_device_name()); + NGRAPH_VLOG(1) << "Assign op name: " << ngraphassign_op->name(); + NGRAPH_VLOG(1) << "Assign op assigned device: " + << ngraphassign_op->assigned_device_name(); + + TF_RETURN_IF_ERROR(ReplaceInputControlEdges(graph, node, mul_op)); + TF_RETURN_IF_ERROR(ReplaceOutputEdges(graph, node, ngraphassign_op)); + + remove_nodes.push_back(node); + + NGRAPH_VLOG(1) << "Replaced ApplyGradientDescent"; + } // Apply Gradient Descent + } + + for (auto node : remove_nodes) { + graph->RemoveNode(node); + } + + return Status::OK(); +} + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_replace_variable_modifiers.h b/src/enable_variable_ops/ngraph_replace_variable_modifiers.h new file mode 100644 index 00000000..2ad0a04a --- /dev/null +++ b/src/enable_variable_ops/ngraph_replace_variable_modifiers.h @@ -0,0 +1,36 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#pragma once + +#ifndef NGRAPH_TF_REPLACE_OPTIMIZERS_H_ +#define NGRAPH_TF_REPLACE_OPTIMIZERS_H_ + +#include "ngraph/runtime/backend.hpp" +#include "tensorflow/core/graph/graph.h" + +using namespace std; +namespace ng = ngraph; +namespace tensorflow { + +namespace ngraph_bridge { + +Status ReplaceModifiers(Graph* graph, int graph_id); + +} // ngraph_bridge +} // tensorflow + +#endif // NGRAPH_TF_REPLACE_OPTIMIZERS_H_ \ No newline at end of file diff --git a/src/enable_variable_ops/ngraph_rewrite_for_tracking.cc b/src/enable_variable_ops/ngraph_rewrite_for_tracking.cc new file mode 100644 index 00000000..a077e104 --- /dev/null +++ b/src/enable_variable_ops/ngraph_rewrite_for_tracking.cc @@ -0,0 +1,124 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/types.h" + +#include "ngraph_replace_op_utilities.h" +#include "ngraph_rewrite_for_tracking.h" +#include "ngraph_utils.h" + +using namespace std; + +namespace tensorflow { + +namespace ngraph_bridge { + +// +// Main entry point for rewrite-for-tracking. +// +Status RewriteForTracking(Graph* graph, int graph_id) { + const static std::map< + const string, + const function> + REWRITE_REPLACE_OP_MAP{{"NGraphAssign", ReplaceAssign}, + {"NGraphVariable", ReplaceVariable}}; + + std::vector replaced_nodes; + for (auto node : graph->op_nodes()) { + auto itr = REWRITE_REPLACE_OP_MAP.find(node->type_string()); + if (itr != REWRITE_REPLACE_OP_MAP.end()) { + NGRAPH_VLOG(1) << "Checking: " << DebugNode(node) << " " << node->name(); + + bool just_looking = true; + bool outputs_ng_supported = true; + + // Check if all the outputs of this node are supported by nGraph + for (auto edge : node->out_edges()) { + auto dst = edge->dst(); + NGRAPH_VLOG(1) << "dst node " << DebugNode(dst); + if (dst->IsOp() && !edge->IsControlEdge() && + !IsNGSupportedType(dst->type_string())) { + NGRAPH_VLOG(1) << "Dst node ngraph doesn't support "; + outputs_ng_supported = false; + break; + } + } + + // If any of the nodes reading from this Variable node read the data as + // reference then we dont track it, else we do + for (auto edge : node->out_edges()) { + if (edge->dst()->IsOp() && !edge->IsControlEdge() && + IsRefType(edge->dst()->input_type(edge->dst_input()))) { + // if the output reference is read by NGraph supported ops, do not + // turn off just_looking + if (!IsNGVariableType(edge->dst()->type_string())) { + NGRAPH_VLOG(1) << DebugNode(edge->dst()) + << "needs reference, setting just_looking to false"; + just_looking = false; + break; + } + } + } + + NGRAPH_VLOG(1) << "Just Looking: " << PrintBool(just_looking); + NGRAPH_VLOG(1) << "Outputs supported by nGraph: " + << PrintBool(outputs_ng_supported); + NGRAPH_VLOG(1) << "Requires Replacement " + << PrintBool(just_looking || !outputs_ng_supported); + + std::string node_new_name = node->name(); + + if (just_looking) { + node_new_name += "/peek"; + } + + if (!outputs_ng_supported) { + node_new_name += "/non_ng_outputs"; + } + + node_new_name += "/gid_" + to_string(graph_id); + NGRAPH_VLOG(1) << "Replacing " << node->name() << " New Node name " + << node_new_name; + + Node* replacement; + + // Create and add the replacement node + TF_RETURN_IF_ERROR((itr->second)(graph, node, &replacement, node_new_name, + node->type_string(), just_looking, + outputs_ng_supported, graph_id, true)); + + TF_RETURN_IF_ERROR(ReplaceInputControlEdges(graph, node, replacement)); + TF_RETURN_IF_ERROR(ReplaceOutputEdges(graph, node, replacement)); + + replaced_nodes.push_back(node); + + } // end of checking if it is NGVariableType + } // end of looping through the nodes in the graph + for (auto node : replaced_nodes) { + graph->RemoveNode(node); + } + + return Status::OK(); +} + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_rewrite_pass.cc b/src/enable_variable_ops/ngraph_rewrite_pass.cc new file mode 100644 index 00000000..b152f5e6 --- /dev/null +++ b/src/enable_variable_ops/ngraph_rewrite_pass.cc @@ -0,0 +1,330 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#include + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/graph/graph.h" + +#include "ngraph_api.h" +#include "ngraph_assign_clusters.h" +#include "ngraph_capture_variables.h" +#include "ngraph_deassign_clusters.h" +#include "ngraph_encapsulate_clusters.h" +#include "ngraph_enter_in_catalog.h" +#include "ngraph_log.h" +#include "ngraph_mark_for_clustering.h" +#include "ngraph_replace_variable_modifiers.h" +#include "ngraph_rewrite_for_tracking.h" +#include "tf_graph_writer.h" + +#if defined NGRAPH_DISTRIBUTED +#include "ngraph/distributed.hpp" +#endif + +using namespace std; + +namespace tensorflow { + +namespace ngraph_bridge { + +class NGraphRewritePass : public GraphOptimizationPass { + public: + virtual Status Run(const GraphOptimizationPassOptions& options) = 0; + + protected: + void DumpGraphs(const GraphOptimizationPassOptions& options, int idx, + std::string filename_prefix, std::string title) { + // If we have a "main" graph, dump that. + if (options.graph != nullptr) { + auto dot_filename = DotFilename(filename_prefix, idx); + auto pbtxt_filename = PbtxtFilename(filename_prefix, idx); + NGRAPH_VLOG(0) << "Dumping main graph to " << dot_filename; + NGRAPH_VLOG(0) << "Dumping main graph to " << pbtxt_filename; + + GraphToDotFile(options.graph->get(), dot_filename, title); + GraphToPbTextFile(options.graph->get(), pbtxt_filename); + } + + // If we have partition graphs (we shouldn't), dump those. + if (options.partition_graphs != nullptr) { + int sub_idx = 0; + + for (auto& kv : *options.partition_graphs) { + auto dot_filename = DotFilename(filename_prefix, idx, sub_idx); + auto pbtxt_filename = PbtxtFilename(filename_prefix, idx, sub_idx); + NGRAPH_VLOG(0) << "Dumping subgraph " << sub_idx << " to " + << dot_filename; + NGRAPH_VLOG(0) << "Dumping subgraph " << sub_idx << " to " + << pbtxt_filename; + + Graph* pg = kv.second.get(); + + GraphToDotFile(pg, dot_filename, title); + GraphToPbTextFile(pg, pbtxt_filename); + + sub_idx++; + } + } + } + + // Returns a fresh "serial number" to avoid filename collisions in the graph + // dumps. + static int FreshIndex() { + mutex_lock l(s_serial_counter_mutex); + return s_serial_counter++; + } + + static bool DumpAllGraphs() { + return std::getenv("NGRAPH_TF_DUMP_GRAPHS") != nullptr; + } + + private: + static std::string DotFilename(std::string kind, int idx) { + return GraphFilenamePrefix(kind, idx) + ".dot"; + } + static std::string PbtxtFilename(std::string kind, int idx) { + return GraphFilenamePrefix(kind, idx) + ".pbtxt"; + } + static std::string DotFilename(std::string kind, int idx, int sub_idx) { + return GraphFilenamePrefix(kind, idx, sub_idx) + ".dot"; + } + static std::string PbtxtFilename(std::string kind, int idx, int sub_idx) { + return GraphFilenamePrefix(kind, idx, sub_idx) + ".pbtxt"; + } + static std::string GraphFilenamePrefix(std::string kind, int idx) { + std::stringstream ss; + ss << kind << "_" << std::setfill('0') << std::setw(4) << idx; +#if defined NGRAPH_DISTRIBUTED + ngraph::Distributed dist; + int Rank_ID = dist.get_rank(); + ss << "_" << std::setfill('0') << std::setw(4) << Rank_ID; +#endif + return ss.str(); + } + static std::string GraphFilenamePrefix(std::string kind, int idx, + int sub_idx) { + std::stringstream ss; + ss << GraphFilenamePrefix(kind, idx) << "_" << std::setfill('0') + << std::setw(4) << sub_idx; +#if defined NGRAPH_DISTRIBUTED + ngraph::Distributed dist; + int Rank_ID = dist.get_rank(); + ss << "_" << std::setfill('0') << std::setw(4) << Rank_ID; +#endif + return ss.str(); + } + + static int s_serial_counter GUARDED_BY(s_serial_counter_mutex); + static mutex s_serial_counter_mutex; +}; + +int NGraphRewritePass::s_serial_counter = 0; +mutex NGraphRewritePass::s_serial_counter_mutex; + +// +// The variable capture pass replaces all instances of VariableV2 with the +// NGraphVariable op. Making this replacement allows us to substitute in a +// kernel that tracks the freshness of variables (invalidating freshness when +// the reference is handed off to an "untrusted" op). +// +class NGraphVariableCapturePass : public NGraphRewritePass { + public: + Status Run(const GraphOptimizationPassOptions& options) override { + // If we don't get a main graph, log that fact and bail. + if (options.graph == nullptr) { + NGRAPH_VLOG(0) << "NGraphVariableCapturePass: options.graph == nullptr"; + return Status::OK(); + } + + // For filename generation purposes, grab a fresh index. This is just an + // arbitrary integer to avoid filename collisions resulting from subsequent + // runs of this pass. + int idx = FreshIndex(); + + // If requested, dump pre-capture graphs. + if (DumpPrecaptureGraphs()) { + DumpGraphs(options, idx, "precapture", "Pre-Capture Graph"); + } + + // If ngraph is disabled via ngraph_bridge api or NGRAPH_TF_DISABLE is set + // we will not do anything; all subsequent + // passes become a no-op. + if (config::IsEnabled() == false || + std::getenv("NGRAPH_TF_DISABLE") != nullptr) { + return Status::OK(); + } + + // Do variable capture then, if requested, dump the graphs. + std::vector skip_these_nodes = {}; + TF_RETURN_IF_ERROR( + CaptureVariables(options.graph->get(), skip_these_nodes)); + if (DumpCapturedGraphs()) { + DumpGraphs(options, idx, "captured", "Graph With Variables Captured"); + } + + return Status::OK(); + } + + private: + static bool DumpPrecaptureGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_PRE_CAPTURED_GRAPHS") != nullptr; + } + static bool DumpCapturedGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_CAPTURED_GRAPHS") != nullptr; + } +}; + +// +// Pass that rewrites the graph for nGraph operation. +// +// The pass has several phases, each executed in sequence: +// +// 1. Marking [ngraph_mark_for_clustering.cc] +// 2. Cluster Assignment [ngraph_assign_clusters.cc] +// 3. Cluster Deassignment [ngraph_deassign_clusters.cc] +// 4. Cluster Encapsulation [ngraph_encapsulate_clusters.cc] +// +// Between phases, graph dumps (in both .dot and .pbtxt format) may be +// requested by setting the following environment variables: +// +// NGRAPH_TF_DUMP_UNMARKED_GRAPHS=1 dumps graphs before phase 1 +// NGRAPH_TF_DUMP_MARKED_GRAPHS=1 dumps graphs after phase 1 +// NGRAPH_TF_DUMP_CLUSTERED_GRAPHS=1 dumps graphs after phase 2 +// NGRAPH_TF_DUMP_DECLUSTERED_GRAPHS=1 dumps graphs after phase 3 +// NGRAPH_TF_DUMP_ENCAPSULATED_GRAPHS=1 dumps graphs after phase 4 +// NGRAPH_TF_DUMP_GRAPHS=1 all of the above +// +class NGraphEncapsulationPass : public NGraphRewritePass { + public: + Status Run(const GraphOptimizationPassOptions& options) override { + // If we don't get a main graph, log that fact and bail. + if (options.graph == nullptr) { + NGRAPH_VLOG(0) << "NGraphEncapsulationPass: options.graph == nullptr"; + return Status::OK(); + } + + // For filename generation purposes, grab a fresh index. This is just an + // arbitrary integer to avoid filename collisions resulting from subsequent + // runs of this pass. + int idx = FreshIndex(); + + // If requested, dump unmarked graphs. + if (DumpUnmarkedGraphs()) { + DumpGraphs(options, idx, "unmarked", "Unmarked Graph"); + } + + // If ngraph is disabled via ngraph_bridge api or NGRAPH_TF_DISABLE is set + // we will not do anything; all subsequent + // passes become a no-op. + if (config::IsEnabled() == false || + std::getenv("NGRAPH_TF_DISABLE") != nullptr) { + return Status::OK(); + } + + // 0. Replace optimizers then, if requested, dump the graphs. + TF_RETURN_IF_ERROR(ReplaceModifiers(options.graph->get(), idx)); + if (DumpMarkedGraphs()) { + DumpGraphs(options, idx, "replaced_modifier", + "Graph with Modifiers replaced"); + } + + // 1. Mark for clustering then, if requested, dump the graphs. + std::vector skip_these_nodes = {}; + TF_RETURN_IF_ERROR( + MarkForClustering(options.graph->get(), skip_these_nodes)); + if (DumpMarkedGraphs()) { + DumpGraphs(options, idx, "marked", "Graph Marked for Clustering"); + } + + // 2. Assign clusters then, if requested, dump the graphs. + TF_RETURN_IF_ERROR(AssignClusters(options.graph->get())); + if (DumpClusteredGraphs()) { + DumpGraphs(options, idx, "clustered", "Graph with Clusters Assigned"); + } + + // 3. Deassign trivial clusters then, if requested, dump the graphs. + TF_RETURN_IF_ERROR(DeassignClusters(options.graph->get())); + if (DumpDeclusteredGraphs()) { + DumpGraphs(options, idx, "declustered", + "Graph with Trivial Clusters De-Assigned"); + } + + // 4. Encapsulate clusters then, if requested, dump the graphs. + TF_RETURN_IF_ERROR(EncapsulateClusters(options.graph->get(), idx)); + if (DumpEncapsulatedGraphs()) { + DumpGraphs(options, idx, "encapsulated", + "Graph with Clusters Encapsulated"); + } + + // Rewrite for tracking then, if requested, dump the graphs. + TF_RETURN_IF_ERROR(RewriteForTracking(options.graph->get(), idx)); + if (DumpTrackedGraphs()) { + DumpGraphs(options, idx, "tracked", + "Graph with Variables Rewritten for Tracking"); + } + + // Enter in catalog then. + TF_RETURN_IF_ERROR(EnterInCatalog(options.graph->get(), idx)); + if (DumpCatalogedGraphs()) { + DumpGraphs(options, idx, "cataloged", + "Graph with Variables Inputs Entered in Catalog"); + } + + return Status::OK(); + } + + private: + static bool DumpUnmarkedGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_UNMARKED_GRAPHS") != nullptr; + } + static bool DumpMarkedGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_MARKED_GRAPHS") != nullptr; + } + static bool DumpClusteredGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_CLUSTERED_GRAPHS") != nullptr; + } + static bool DumpDeclusteredGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_DECLUSTERED_GRAPHS") != nullptr; + } + static bool DumpEncapsulatedGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_ENCAPSULATED_GRAPHS") != nullptr; + } + static bool DumpTrackedGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_TRACKED_GRAPHS") != nullptr; + } + + static bool DumpCatalogedGraphs() { + return DumpAllGraphs() || + std::getenv("NGRAPH_TF_DUMP_CATALOGED_GRAPHS") != nullptr; + } +}; + +} // namespace ngraph_bridge + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PLACEMENT, 0, + ngraph_bridge::NGraphVariableCapturePass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, + ngraph_bridge::NGraphEncapsulationPass); +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_tracked_variable.cc b/src/enable_variable_ops/ngraph_tracked_variable.cc new file mode 100644 index 00000000..415f2792 --- /dev/null +++ b/src/enable_variable_ops/ngraph_tracked_variable.cc @@ -0,0 +1,281 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/lib/strings/strcat.h" + +#include "tensorflow/core/platform/default/logging.h" + +#include "ngraph/runtime/backend.hpp" +#include "ngraph_backend_manager.h" +#include "ngraph_freshness_tracker.h" +#include "ngraph_utils.h" +#include "ngraph_var.h" + +#include "ngraph/event_tracing.hpp" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +// +// Forked from tensorflow:tensorflow/core/kernels/variable_ops.{cc,h} +// and tensorflow:tensorflow/core/ops/state_ops.cc. +// + +// Resource stored by variables in the resource manager +// (legacy, ref-style version). +// +// (Changes: Renamed from LegacyVar, modified to take a TensorShape in +// constructor.) + +/* ------------------------------------------------- +// +// NGraphVariableOp +// +---------------------------------------------------*/ +class NGraphVariableOp : public OpKernel { + public: + explicit NGraphVariableOp(OpKernelConstruction* context); + ~NGraphVariableOp() override; + void Compute(OpKernelContext* ctx) override; + + private: + int ng_graph_id_; + DataType dtype_; + TensorShape shape_; + bool just_looking_; + bool copy_to_tf_; + NGraphFreshnessTracker* tracker_; + string ng_backend_name_; + mutex init_mu_; + ContainerInfo cinfo_ GUARDED_BY(init_mu_); + bool initialized_ GUARDED_BY(init_mu_){false}; + static int s_instance_count; + int my_instance_id{0}; + + TF_DISALLOW_COPY_AND_ASSIGN(NGraphVariableOp); +}; + +int NGraphVariableOp::s_instance_count = 0; + +NGraphVariableOp::NGraphVariableOp(OpKernelConstruction* context) + : OpKernel(context), + tracker_(nullptr), + just_looking_(false), + copy_to_tf_(false), + dtype_(RemoveRefType(context->output_type(0))) { + my_instance_id = s_instance_count; + s_instance_count++; + + OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(context, context->GetAttr("just_looking", &just_looking_)); + OP_REQUIRES_OK(context, context->GetAttr("copy_to_tf", ©_to_tf_)); + OP_REQUIRES_OK(context, context->GetAttr("ngraph_graph_id", &ng_graph_id_)); + OP_REQUIRES_OK(context, + context->GetAttr("_ngraph_backend", &ng_backend_name_)); + NGRAPH_VLOG(4) << "NGraphVariable:: Constructor called for: " << def().name() + << " ,just looking " << just_looking_ << " ,copy-to-tf " + << copy_to_tf_ << " ,Graph ID " << ng_graph_id_ + << " ,backend_name " << ng_backend_name_; +} + +NGraphVariableOp::~NGraphVariableOp() { tracker_->Unref(); } + +// (Changes: Renamed from VariableOp, modified to pass TensorShape to NGraphVar +// constructor.) +void NGraphVariableOp::Compute(OpKernelContext* ctx) { + NGRAPH_VLOG(4) << "NGraphVariable:: Compute called for: " << def().name() + << " ,just looking " << just_looking_ << " ,copy-to-tf " + << copy_to_tf_ << " ,Graph ID " << ng_graph_id_ + << " ,backend_name " << ng_backend_name_; + + std::ostringstream oss; + oss << "NGraphVariable: " << my_instance_id << ": " << name(); + ngraph::Event event_compute(oss.str(), name(), ""); + + bool log_copies = false; + OP_REQUIRES_OK(ctx, IsCopyLogEnabled(ng_graph_id_, log_copies)); + std::stringstream copy_log_str; + copy_log_str << "KERNEL[" << type_string() << "]: " << name() << " ,Copy_TF " + << PrintBool(copy_to_tf_) << " ,Just_Looking " + << PrintBool(just_looking_) << "\n"; + int number_of_copies = 0; + + mutex_lock l(init_mu_); + if (!initialized_) { + // Analyze the node attribute of 'ndef' and decides the container and + // resource name the kernel should use for accessing the shared + // resource. + // + // 'ndef' is expected to have node attribute "container" and + // "shared_name". Returns non-OK if they are not provided or they are + // invalid. + // + // The policy is as following: + // * If the attribute "container" is non-empty, it is used as is. + // Otherwise, uses the resource manager's default container. + // * If the attribute "shared_name" is non-empty, it is used as is. + // Otherwise, if "use_node_name_as_default" is true, the kernel's + // node name is used as the resource name. Otherwise, a string + // unique to this process is used. + + // API: Status Init(ResourceMgr* rmgr, const NodeDef& ndef, + // bool use_node_name_as_default); + // + // + // We Use context's resource manager's default container + // And shared name is same as node_name + OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(), + true /* use name() */)); + initialized_ = true; + } + + auto creator = [this](NGraphVar** var) { + *var = new NGraphVar(dtype_, shape_, ng_backend_name_); + return Status::OK(); + }; + + // If "container" has a resource "name", returns it in + // "*resource". Otherwise, invokes creator() to create the resource. + // The caller takes the ownership of one ref on "*resource". + // + + // Here uses the Resource Manager's default container + NGraphVar* var; + OP_REQUIRES_OK(ctx, cinfo_.resource_manager()->LookupOrCreate( + cinfo_.container(), cinfo_.name(), &var, creator)); + + bool just_synced = false; + if (var->need_sync_ng_tensor()) { + number_of_copies++; + copy_log_str << "Var_Sync "; + NGRAPH_VLOG(4) << "in tracked variable, ng tensor behind, needs to sync " + "with tf-tensor"; + WriteNGTensor(var->ng_tensor(), var->tensor()); + var->sync_ng_tensor(false); + just_synced = true; + } + + // Output a reference to our tensor, so it may be updated. + // + // As long as the resource manager hasn't been cleared the ref we return + // here is valid because it owns a ref on var. + + // Mark the underlying tensor as stale. TODO(amprocte): Make this + // conditional on whether any reader is taking in a reference. More + // conservative condition that would work for now: invalidate if any + // reader is not NGraphEncapsulateOp. + auto t_creator = [this](NGraphFreshnessTracker** tracker) { + *tracker = new NGraphFreshnessTracker(); + return Status::OK(); + }; + if (tracker_ == nullptr) { + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Variable " << ctx->op_kernel().name() + << ": getting tracker"; + } + OP_REQUIRES_OK( + ctx, ctx->resource_manager()->LookupOrCreate( + ctx->resource_manager()->default_container(), + "ngraph_freshness_tracker", &tracker_, t_creator)); + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Variable " << ctx->op_kernel().name() + << ": got tracker"; + } + } + + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Variable " << ctx->op_kernel().name() << ": adding " + << DMAHelper::base(var->tensor()); + } + tracker_->AddTensor(DMAHelper::base(var->tensor())); + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Variable " << ctx->op_kernel().name() << ": added " + << DMAHelper::base(var->tensor()); + } + + if (copy_to_tf_) { + if (!just_synced) { + number_of_copies++; + copy_log_str << " COPY_TF "; + ReadNGTensor(var->ng_tensor(), var->tensor()); + NGRAPH_VLOG(4) << "Copying to TF Tensor"; + } + + if (!just_looking_) { + // Some tf op might update the tf-tensor + // So we need to sync_it_later + var->sync_ng_tensor(true); + copy_log_str << " SET_SYNC "; + } + } + + copy_log_str << " Number of copies " << number_of_copies << "\n"; + if (log_copies) { + cout << copy_log_str.str(); + } + + if (!just_looking_) { + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Variable " << ctx->op_kernel().name() << ": marking " + << DMAHelper::base(var->tensor()); + } + tracker_->MarkStale(DMAHelper::base(var->tensor())); + if (NGRAPH_VLOG_IS_ON(5)) { + NGRAPH_VLOG(5) << "Variable " << ctx->op_kernel().name() << ": marked " + << DMAHelper::base(var->tensor()); + } + } + // To output a reference. Caller retains ownership of mu and tensor_for_ref, + // and they must outlive all uses within the step. See comment above. + // REQUIRES: IsRefType(expected_output_dtype(index)) + ctx->set_output_ref(0, var->mu(), var->tensor()); + + if (ctx->track_allocations() && var->tensor()->IsInitialized()) { + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + attr.set_nic_compatible(true); + ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes()); + } + var->Unref(); + ngraph::Event::write_trace(event_compute); +} + +REGISTER_OP("NGraphVariable") + .Output("ref: Ref(dtype)") + .Attr("shape: shape") + .Attr("dtype: type") + .Attr("just_looking: bool = false") + .Attr("copy_to_tf: bool = false") + .Attr("container: string = ''") + .Attr("shared_name: string = ''") + .Attr("ngraph_graph_id: int") + .SetIsStateful() + .SetShapeFn(shape_inference::ExplicitShape); + +REGISTER_KERNEL_BUILDER(Name("NGraphVariable").Device(DEVICE_CPU), + NGraphVariableOp); + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_utils.cc b/src/enable_variable_ops/ngraph_utils.cc new file mode 100644 index 00000000..21db659a --- /dev/null +++ b/src/enable_variable_ops/ngraph_utils.cc @@ -0,0 +1,329 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/default/logging.h" +#include "tensorflow/core/platform/protobuf.h" + +#include "ngraph_utils.h" +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +// static int testing_graph_id=30; + +Status IsCopyLogEnabled(int graph_id, bool& is_copy_log_enabled) { + const char* copy_env_var = std::getenv("NGRAPH_TF_LOG_COPIES"); + if (copy_env_var == nullptr) { + is_copy_log_enabled = false; + return Status::OK(); + } + int test_graph_id; + + try { + test_graph_id = stoi(string(copy_env_var)); + } catch (const std::invalid_argument& ia) { + return errors::InvalidArgument("Invalid argument for NGRAPH_TF_LOG_COPIES"); + } + + // if -1 copies are logged for all graphs + is_copy_log_enabled = (test_graph_id == -1 || test_graph_id == graph_id); + return Status::OK(); +} + +void PrintTFTensor(Tensor& T1) { + NGRAPH_VLOG(4) << "all tensor values" << (T1).SummarizeValue(64) << endl; +} + +std::string DebugNode(Node* node) { + std::string temp = node->name(); + temp += "[" + node->type_string() + "]"; + return temp; +} + +std::string PrintBool(bool var) { return (var ? "Yes" : "No"); } + +bool IsNGVariableType(string node_type) { + return (node_type == "NGraphVariable" || node_type == "NGraphAssign"); +}; + +bool IsNGSupportedType(string node_type) { + return (IsNGVariableType(node_type) || node_type == "NGraphEncapsulate"); +}; + +// Read from this ng_tensor into tf_tensor +void ReadNGTensor(shared_ptr ng_tensor, + Tensor* tf_tensor) { + void* tf_src_ptr = (void*)DMAHelper::base(tf_tensor); + ng_tensor->read(tf_src_ptr, 0, ng_tensor->get_element_count() * + ng_tensor->get_element_type().size()); +} + +// Write into this ng_tensor from tf_tensor +void WriteNGTensor(shared_ptr ng_tensor, + Tensor* tf_tensor) { + void* tf_src_ptr = (void*)DMAHelper::base(tf_tensor); + ng_tensor->write(tf_src_ptr, 0, ng_tensor->get_element_count() * + ng_tensor->get_element_type().size()); +} + +void SummarizeOp(OpKernelConstruction* ctx, std::ostream& out) { + auto node_def = ctx->def(); + out << "Node name: " << node_def.name() << " Op: " << node_def.op() << "\n"; + out << "Inputs: " << node_def.input().size() << "\n "; + for (const std::string& input : node_def.input()) { + out << input << "\n "; + } + out << "\n"; +} + +std::ostream& DumpNGTensor(std::ostream& s, const string& name, + const std::shared_ptr& t) { + // std::shared_ptr t{get_tensor()}; + const ngraph::Shape& shape = t->get_shape(); + s << "Tensor<" << name << ": "; + + for (size_t i = 0; i < shape.size(); ++i) { + s << shape.at(i); + if (i + 1 < shape.size()) { + s << ", "; + } + } + size_t pos = 0; + s << ">{"; + size_t rank = shape.size(); + if (rank == 0) { + s << GetScalarFromTensor(t, pos++); + } else if (rank <= 2) { + s << "["; + for (size_t i = 0; i < shape.at(0); ++i) { + if (rank == 1) { + s << GetScalarFromTensor(t, pos++); + } else if (rank == 2) { + s << "["; + for (size_t j = 0; j < shape.at(1); ++j) { + s << GetScalarFromTensor(t, pos++); + + if (j + 1 < shape.at(1)) { + s << ", "; + } + } + s << "]"; + } + if (i + 1 < shape.at(0)) { + s << ", "; + } + } + s << "]"; + } + s << "}"; + return s; +} + +Status TFDataTypeToNGraphElementType(DataType tf_dt, + ngraph::element::Type* ng_et) { + switch (tf_dt) { + case DataType::DT_FLOAT: + *ng_et = ng::element::f32; + break; + case DataType::DT_DOUBLE: + *ng_et = ng::element::f64; + break; + case DataType::DT_INT32: + *ng_et = ng::element::i32; + break; + case DataType::DT_UINT8: + *ng_et = ng::element::u8; + break; + case DataType::DT_UINT16: + *ng_et = ng::element::u16; + break; + case DataType::DT_INT64: + *ng_et = ng::element::i64; + break; + case DataType::DT_UINT32: + *ng_et = ng::element::u32; + break; + case DataType::DT_UINT64: + *ng_et = ng::element::u64; + break; + case DataType::DT_BOOL: + *ng_et = ng::element::boolean; + break; + case DataType::DT_QINT8: + *ng_et = ng::element::i8; + break; + case DataType::DT_QUINT8: + *ng_et = ng::element::u8; + break; + case DataType::DT_QINT32: + *ng_et = ng::element::i32; + break; + default: + return errors::Unimplemented("Unsupported TensorFlow data type: ", + DataType_Name(tf_dt)); + } + return Status::OK(); +} + +Status TFTensorShapeToNGraphShape(const TensorShape& tf_shape, + ngraph::Shape* ng_shape) { + for (int i = 0; i < tf_shape.dims(); i++) { + if (tf_shape.dim_size(i) < 0) { + return errors::InvalidArgument( + "TensorFlow shape has a negative dimension size"); + } + } + + *ng_shape = ngraph::Shape(tf_shape.dims()); + for (int i = 0; i < tf_shape.dims(); i++) { + (*ng_shape)[i] = tf_shape.dim_size(i); + } + + return Status::OK(); +} + +void print_node_histogram(const std::unordered_map& histogram, + bool sorted) { + int histogram_size = histogram.size(); + if (histogram_size == 0) { + std::cout << "None"; + } else { + vector> vec(begin(histogram), end(histogram)); + if (sorted) { + sort(begin(vec), end(vec), + [](const pair& a, const pair& b) { + // descending sort + return a.second > b.second; + }); + } + + for (auto node : vec) { + bool endelem = node == vec.back(); + std::cout << " " << node.first << " -> " << node.second + << (endelem ? " " : ","); + } + } +} + +const gtl::ArraySlice& NGraphDTypes() { + static gtl::ArraySlice result{ + DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, + DT_UINT16, DT_UINT32, DT_UINT64, DT_BOOL, DT_QINT8, DT_QUINT8}; + return result; +} + +const gtl::ArraySlice& NGraphNumericDTypes() { + static gtl::ArraySlice result{ + DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, + DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}; + return result; +} + +const gtl::ArraySlice& NGraphNumericAndQuantizedDTypes() { + static gtl::ArraySlice result{ + DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_QINT8, DT_QUINT8}; + return result; +} + +const gtl::ArraySlice& NGraphIndexDTypes() { + static gtl::ArraySlice result{DT_INT32, DT_INT64}; + return result; +} + +const gtl::ArraySlice& NGraphSupportedQuantizedDTypes() { + static gtl::ArraySlice result{DT_QINT8, DT_QUINT8}; + return result; +} + +const gtl::ArraySlice& NGraphRealDTypes() { + static gtl::ArraySlice result{DT_FLOAT, DT_DOUBLE}; + return result; +} + +const gtl::ArraySlice& NGraphBiasDTypes() { + static gtl::ArraySlice result{DT_FLOAT, DT_QINT32}; + return result; +} + +Status CheckAxisDimInRange(std::vector axes, size_t rank) { + for (auto i : axes) { + if (i < (int)-rank || i >= (int)rank) { + return errors::InvalidArgument("Axis Dimension is out of range. Got ", i, + ", should be in range [-", rank, ", ", + rank, ")"); + } + } + return Status::OK(); +} + +void NgraphSerialize(const std::string& file_name, + const std::shared_ptr& ng_function) { + NGRAPH_VLOG(0) << "Serializing graph to: " << file_name << std::endl; + std::string js = ngraph::serialize(ng_function, 4); + std::ofstream f; + f.exceptions(std::ofstream::failbit | std::ofstream::badbit); + try { + f.open(file_name); + f << js; + f.close(); + } catch (std::ofstream::failure& e) { + NGRAPH_VLOG(0) << "Exception opening/closing file " << file_name + << std::endl; + NGRAPH_VLOG(0) << e.what() << std::endl; + } +} + +void MemoryProfile(long& vm_usage, long& resident_set) { + vm_usage = 0; + resident_set = 0; + + // Get the two fields we want + long vsize; + long rss; + + std::ifstream ifs("/proc/self/stat", std::ios_base::in); + std::string mem_in; + getline(ifs, mem_in); + if (mem_in != "") { + vector mem_str = ng::split(mem_in, ' '); + vsize = std::stol(mem_str[22]); + rss = std::stol(mem_str[23]); + + long page_size_kb = sysconf(_SC_PAGE_SIZE) / + 1024; // in case x86-64 is configured to use 2MB pages + vm_usage = vsize / 1024; // unit kb + resident_set = rss * page_size_kb; + } +}; + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/enable_variable_ops/ngraph_var.h b/src/enable_variable_ops/ngraph_var.h new file mode 100644 index 00000000..c04158ce --- /dev/null +++ b/src/enable_variable_ops/ngraph_var.h @@ -0,0 +1,101 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#ifndef NGRAPH_TF_NgraphVar_H_ +#define NGRAPH_TF_NgraphVar_H_ + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/lib/strings/strcat.h" + +#include "tensorflow/core/platform/default/logging.h" + +#include "ngraph/runtime/backend.hpp" +#include "ngraph_backend_manager.h" +#include "ngraph_freshness_tracker.h" +#include "ngraph_utils.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +// THIS CLASS IS NOT BEING USED ANYWHERE +class NGraphVar : public ResourceBase { + public: + explicit NGraphVar(DataType dtype, TensorShape shape, string BackendName) + : tf_tensor_(dtype, shape), + ng_backend_name_(BackendName), + sync_ng_tensor_(false) { + // TF datatype to nGraph element type + ng::element::Type ng_element_type; + TFDataTypeToNGraphElementType(dtype, &ng_element_type); + + // TF TensorShape to nGraphShape + ng::Shape ng_shape(shape.dims()); + for (int j = 0; j < shape.dims(); ++j) { + ng_shape[j] = shape.dim_size(j); + } + + // Create Backend + BackendManager::CreateBackend(ng_backend_name_); + ng::runtime::Backend* op_backend = + BackendManager::GetBackend(ng_backend_name_); + + // Create nGTensor + ng_tensor_ = op_backend->create_tensor(ng_element_type, ng_shape); + } + // Not copyable or movable. + NGraphVar(const NGraphVar&) = delete; + NGraphVar& operator=(const NGraphVar&) = delete; + + mutex* mu() { return &mu_; } + Tensor* tensor() { return &tf_tensor_; } + shared_ptr ng_tensor() { return ng_tensor_; }; + + string DebugString() override { + return strings::StrCat(DataTypeString(tf_tensor_.dtype()), "/", + tf_tensor_.shape().DebugString()); + } + + bool need_sync_ng_tensor() { return sync_ng_tensor_; } + void sync_ng_tensor(bool sync_ng_tensor) { sync_ng_tensor_ = sync_ng_tensor; } + + // TODO(malikshr): Implement syncing utility functions here + Status copy_ng_to_tf(); + Status copy_tf_to_ng(); + + private: + mutex mu_; + Tensor tf_tensor_; + shared_ptr ng_tensor_; + string ng_backend_name_; + // sync from tf to ng + bool sync_ng_tensor_; + ~NGraphVar() override { + // Release the backend + BackendManager::ReleaseBackend(ng_backend_name_); + NGRAPH_VLOG(2) << "~NGraphVar"; + } +}; + +} // namespace ng-bridge +} // namespace tf + +#endif diff --git a/src/enable_variable_ops/ngraph_variable_modifiers.cc b/src/enable_variable_ops/ngraph_variable_modifiers.cc new file mode 100644 index 00000000..94470b21 --- /dev/null +++ b/src/enable_variable_ops/ngraph_variable_modifiers.cc @@ -0,0 +1,169 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/lib/strings/strcat.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/default/logging.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "ngraph/runtime/backend.hpp" +#include "ngraph_backend_manager.h" +#include "ngraph_catalog.h" +#include "ngraph_freshness_tracker.h" +#include "ngraph_timer.h" +#include "ngraph_utils.h" +#include "ngraph_var.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +/* ------------------------------------------------- +// +// NGraphApplyGradientDescentOp +// +---------------------------------------------------*/ + +class NGraphApplyGradientDescentOp : public OpKernel { + private: + public: + explicit NGraphApplyGradientDescentOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES(context, false, + errors::Internal("This constructor should not get called", + name(), "\n")); + } + + //--------------------------------------------------------------------------- + // ~NGraphApplyGradientDescentOp() + //--------------------------------------------------------------------------- + ~NGraphApplyGradientDescentOp() override {} + + // This will never be called + void Compute(OpKernelContext* context) override { + OP_REQUIRES( + context, false, + errors::Internal("This kernel should not get called", name(), "\n")); + } // end of compute function +}; // end of NGraphApplyGradientDescent class definition + +REGISTER_OP("NGraphApplyGradientDescent") + .Input("var: Ref(T)") + .Input("alpha: T") + .Input("delta: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .Attr("just_looking: bool = false") + .Attr("copy_to_tf: bool = false") + .Attr("ngraph_graph_id: int"); + +REGISTER_KERNEL_BUILDER(Name("NGraphApplyGradientDescent").Device(DEVICE_CPU), + NGraphApplyGradientDescentOp); + +/* ------------------------------------------------- +// +// NGraphAssignSubOp +// +---------------------------------------------------*/ + +// Computes *input[0] = *input[0] - input[1] +class NGraphAssignSubOp : public OpKernel { + private: + // bool use_exclusive_lock_; //TF op has this + ~NGraphAssignSubOp() override {} + + public: + explicit NGraphAssignSubOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES(context, false, + errors::Internal("This constructor should not get called", + name(), "\n")); + } + + void Compute(OpKernelContext* context) override { + OP_REQUIRES( + context, false, + errors::Internal("This kernel should not get called", name(), "\n")); + } +}; + +REGISTER_OP("NGraphAssignSub") + .Input("ref: Ref(T)") + .Input("value: T") + .Output("output_ref: Ref(T)") + .Attr("T: type") + .Attr("validate_shape: bool = true") + .Attr("use_locking: bool = true") + .Attr("just_looking: bool = false") + .Attr("copy_to_tf: bool = false") + .Attr("ngraph_graph_id: int"); + +REGISTER_KERNEL_BUILDER(Name("NGraphAssignSub").Device(DEVICE_CPU), + NGraphAssignSubOp); + +/* ------------------------------------------------- +// +// NGraphAssignAddOp +// +---------------------------------------------------*/ + +// Computes *input[0] = *input[0] + input[1] +class NGraphAssignAddOp : public OpKernel { + public: + explicit NGraphAssignAddOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES(context, false, + errors::Internal("This constructor should not get called", + name(), "\n")); + } + + void Compute(OpKernelContext* context) override { + OP_REQUIRES( + context, false, + errors::Internal("This kernel should not get called", name(), "\n")); + } + + private: + ~NGraphAssignAddOp() override {} +}; + +REGISTER_OP("NGraphAssignAdd") + .Input("ref: Ref(T)") + .Input("value: T") + .Output("output_ref: Ref(T)") + .Attr("T: type") + .Attr("validate_shape: bool = true") + .Attr("use_locking: bool = true") + .Attr("just_looking: bool = false") + .Attr("copy_to_tf: bool = false") + .Attr("ngraph_graph_id: int"); + +REGISTER_KERNEL_BUILDER(Name("NGraphAssignAdd").Device(DEVICE_CPU), + NGraphAssignAddOp); + +} // namespace ngraph_bridge + +} // namespace tensorflow diff --git a/src/ngraph_builder.cc b/src/ngraph_builder.cc index 0e88baeb..00510842 100644 --- a/src/ngraph_builder.cc +++ b/src/ngraph_builder.cc @@ -2404,6 +2404,80 @@ static Status TranslateMaxPoolGradOp( return Status::OK(); } +static Status TranslateNonMaxSuppressionV4Op( + const Node* op, const std::vector& static_input_map, + Builder::OpMap& ng_op_map) { + shared_ptr ng_boxes, ng_scores; + TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_boxes, &ng_scores, + nullptr, nullptr, nullptr)); + + std::vector max_output_size; + TF_RETURN_IF_ERROR( + GetStaticInputVector(op, 2, static_input_map, &max_output_size)); + std::vector iou_threshold; + TF_RETURN_IF_ERROR( + GetStaticInputVector(op, 3, static_input_map, &iou_threshold)); + + std::vector score_threshold; + TF_RETURN_IF_ERROR( + GetStaticInputVector(op, 4, static_input_map, &score_threshold)); + + bool pad_to_max_output_size; + if (GetNodeAttr(op->attrs(), "pad_to_max_output_size", + &pad_to_max_output_size) != Status::OK()) { + pad_to_max_output_size = false; + } + // max_output_size must be scalar + if (max_output_size.size() != 1) { + return errors::InvalidArgument( + "NonMaxSuppressionV4 Op: max_output_size of nms must be scalar ", + max_output_size.size()); + } + // iou_threshold must be scalar + if (iou_threshold.size() != 1) { + return errors::InvalidArgument( + "NonMaxSuppressionV4 Op: iou_threshold of nms must be scalar ", + iou_threshold.size()); + } + + // score_threshold must be scalar + if (score_threshold.size() != 1) { + return errors::InvalidArgument( + "NonMaxSuppressionV4 Op: score_threshold of nms must be scalar ", + score_threshold.size()); + } + + std::string backend_name; + TF_RETURN_IF_ERROR(ngraph_bridge::GetNodeBackend(op, &backend_name)); + + if (backend_name != "NNPI") { + return errors::Internal("In translating NonMaxSuppressionV4 op ", + op->name(), " found requested backend ", + backend_name, " which is unsupported"); + } + + ng::runtime::Backend* backend = BackendManager::GetBackend(backend_name); + + shared_ptr ng_nmsv4 = backend->get_backend_op( + "NonMaxSuppressionV4", &ng_boxes, &ng_scores, + (size_t)(max_output_size[0]), (float)(iou_threshold[0]), + (float)score_threshold[0], (bool)pad_to_max_output_size); + if (ng_nmsv4 == nullptr) { + return errors::Internal("In translating NonMaxSuppressionV4 op ", + op->name(), + " backend could not return valid ngraph node"); + } + shared_ptr ng_selected_indices = + ConstructNgNode(op->name(), ng_nmsv4, 0); + shared_ptr ng_valid_output = + ConstructNgNode(op->name(), ng_nmsv4, 1); + + SaveNgOp(ng_op_map, op->name(), ng_selected_indices); + SaveNgOp(ng_op_map, op->name(), ng_valid_output); + + return Status::OK(); +} + static Status TranslateReduceOp( const Node* op, const std::vector& static_input_map, Builder::OpMap& ng_op_map, @@ -4357,6 +4431,7 @@ const static std::map< {"MaxPool", TranslateMaxPoolOp}, {"MaxPool3D", TranslateMaxPool3DOp}, {"MaxPoolGrad", TranslateMaxPoolGradOp}, + {"NonMaxSuppressionV4", TranslateNonMaxSuppressionV4Op}, {"Mean", TranslateMeanOp}, {"Min", TranslateDirectReduceOp}, {"Minimum", TranslateBinaryOp}, diff --git a/src/ngraph_encapsulate_clusters.cc b/src/ngraph_encapsulate_clusters.cc index b8445a09..19e866d0 100644 --- a/src/ngraph_encapsulate_clusters.cc +++ b/src/ngraph_encapsulate_clusters.cc @@ -70,7 +70,7 @@ static void AddInput(NodeDef* dst, StringPiece src_name, int src_slot) { } // ...end code copied and pasted (and modified) from graph.cc -Status EncapsulateClusters(Graph* graph) { +Status EncapsulateClusters(Graph* graph, int graph_id) { // A map from cluster indices to the expected device name for nodes // in that cluster. std::map device_name_map; @@ -331,6 +331,7 @@ Status EncapsulateClusters(Graph* graph) { .Attr("_ngraph_backend", cluster_backend) .Attr("Targuments", input_types) .Attr("Tresults", cluster_output_dt_map[cluster_idx]) + .Attr("ngraph_graph_id", graph_id) .Device(device_name_map[cluster_idx]) .Input(inputs) .Finalize(graph, &n); diff --git a/src/ngraph_encapsulate_clusters.h b/src/ngraph_encapsulate_clusters.h index 96806f24..2fba86f4 100644 --- a/src/ngraph_encapsulate_clusters.h +++ b/src/ngraph_encapsulate_clusters.h @@ -22,7 +22,7 @@ namespace tensorflow { namespace ngraph_bridge { -Status EncapsulateClusters(Graph* graph); +Status EncapsulateClusters(Graph* graph, int graph_id); } // namespace ngraph_bridge diff --git a/src/ngraph_encapsulate_op.cc b/src/ngraph_encapsulate_op.cc index 7ad0dee1..7979dfc5 100644 --- a/src/ngraph_encapsulate_op.cc +++ b/src/ngraph_encapsulate_op.cc @@ -60,6 +60,7 @@ REGISTER_OP("NGraphEncapsulate") .Output("results: Tresults") .Attr("Tresults: list(type) >= 0") .Attr("ngraph_cluster: int") + .Attr("ngraph_graph_id: int") .SetIsStateful() .Doc("nGraph Encapsulation Op. For use by the nGraph JIT only."); diff --git a/src/ngraph_mark_for_clustering.cc b/src/ngraph_mark_for_clustering.cc index c6278288..7efe6b9a 100644 --- a/src/ngraph_mark_for_clustering.cc +++ b/src/ngraph_mark_for_clustering.cc @@ -460,6 +460,8 @@ Status MarkForClustering(Graph* graph, type_constraint_map["Minimum"]["T"] = NGraphNumericDTypes(); type_constraint_map["Mul"]["T"] = NGraphNumericDTypes(); type_constraint_map["Neg"]["T"] = NGraphNumericDTypes(); + type_constraint_map["NonMaxSuppressionV4"]["T"] = { + DT_FLOAT}; // TF allows half too type_constraint_map["OneHot"]["T"] = NGraphDTypes(); type_constraint_map["Pack"]["T"] = NGraphDTypes(); type_constraint_map["Pad"]["T"] = NGraphDTypes(); @@ -574,6 +576,7 @@ Status MarkForClustering(Graph* graph, set_attributes_map["Max"] = SetStaticInputs({1}); set_attributes_map["Mean"] = SetStaticInputs({1}); set_attributes_map["Min"] = SetStaticInputs({1}); + set_attributes_map["NonMaxSuppressionV4"] = SetStaticInputs({2, 3, 4}); set_attributes_map["OneHot"] = SetStaticInputs({1}); set_attributes_map["Pad"] = SetStaticInputs({1}); set_attributes_map["Prod"] = SetStaticInputs({1}); @@ -626,7 +629,6 @@ Status MarkForClustering(Graph* graph, " is not supported"); } current_backend = backend_env; - // TODO: set backend. Then don't use current_backend } // Right now it cannot be inside the if(!initialized) block, because it is @@ -639,13 +641,26 @@ Status MarkForClustering(Graph* graph, return Status::OK(); }; + confirmation_function_map["NonMaxSuppressionV4"] = [¤t_backend]( + Node* n, bool* result) { + *result = (current_backend == "NNPI"); + return Status::OK(); + }; + std::unordered_map no_support_histogram; std::unordered_map fail_confirmation_histogram; std::unordered_map fail_constraint_histogram; vector nodes_marked_for_clustering; + vector variable_type_nodes; + for (auto node : graph->op_nodes()) { bool mark_for_clustering = false; + if (IsNGVariableType(node->type_string())) { + variable_type_nodes.push_back(node); + continue; + } + do { // check if output node bool skip_it = false; @@ -733,6 +748,10 @@ Status MarkForClustering(Graph* graph, } } + for (auto node : variable_type_nodes) { + SetNodeBackend(node, current_backend); + } + return Status::OK(); } diff --git a/src/ngraph_rewrite_for_tracking.cc b/src/ngraph_rewrite_for_tracking.cc index f0f4b638..e8d88f21 100644 --- a/src/ngraph_rewrite_for_tracking.cc +++ b/src/ngraph_rewrite_for_tracking.cc @@ -30,7 +30,7 @@ namespace ngraph_bridge { // // Main entry point for rewrite-for-tracking. // -Status RewriteForTracking(Graph* graph) { +Status RewriteForTracking(Graph* graph, int graph_id) { std::vector replaced_nodes; for (auto node : graph->op_nodes()) { diff --git a/src/ngraph_rewrite_for_tracking.h b/src/ngraph_rewrite_for_tracking.h index 9e884f03..f8b716a6 100644 --- a/src/ngraph_rewrite_for_tracking.h +++ b/src/ngraph_rewrite_for_tracking.h @@ -22,7 +22,7 @@ namespace tensorflow { namespace ngraph_bridge { -Status RewriteForTracking(Graph* graph); +Status RewriteForTracking(Graph* graph, int graph_id); } // namespace ngraph_bridge diff --git a/src/ngraph_rewrite_pass.cc b/src/ngraph_rewrite_pass.cc index afc8468b..1e3ab5df 100644 --- a/src/ngraph_rewrite_pass.cc +++ b/src/ngraph_rewrite_pass.cc @@ -208,14 +208,14 @@ class NGraphEncapsulationPass : public NGraphRewritePass { } // 4. Encapsulate clusters then, if requested, dump the graphs. - TF_RETURN_IF_ERROR(EncapsulateClusters(options.graph->get())); + TF_RETURN_IF_ERROR(EncapsulateClusters(options.graph->get(), idx)); if (DumpEncapsulatedGraphs()) { DumpGraphs(options, idx, "encapsulated", "Graph with Clusters Encapsulated"); } // Rewrite for tracking then, if requested, dump the graphs. - TF_RETURN_IF_ERROR(RewriteForTracking(options.graph->get())); + TF_RETURN_IF_ERROR(RewriteForTracking(options.graph->get(), idx)); if (DumpTrackedGraphs()) { DumpGraphs(options, idx, "tracked", "Graph with Variables Rewritten for Tracking"); diff --git a/src/ngraph_utils.cc b/src/ngraph_utils.cc index 8b81c4e5..9bf89d45 100644 --- a/src/ngraph_utils.cc +++ b/src/ngraph_utils.cc @@ -36,6 +36,10 @@ namespace tensorflow { namespace ngraph_bridge { +bool IsNGVariableType(string node_type) { + return node_type == "NGraphVariable"; +} + void SummarizeOp(OpKernelConstruction* ctx, std::ostream& out) { auto node_def = ctx->def(); out << "Node name: " << node_def.name() << " Op: " << node_def.op() << "\n"; diff --git a/src/ngraph_utils.h b/src/ngraph_utils.h index 2d895f36..b586c1f3 100644 --- a/src/ngraph_utils.h +++ b/src/ngraph_utils.h @@ -20,22 +20,49 @@ #include #include -#include "ngraph/ngraph.hpp" - +#include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/util/saved_tensor_slice_util.h" +#include "ngraph/ngraph.hpp" #include "ngraph/serializer.hpp" #include "ngraph_log.h" +namespace ng = ngraph; +using namespace std; namespace tensorflow { namespace ngraph_bridge { +/* ------------------------------------------------- +// +// NGraphVariableMap : Map of Variable names and their backend tensors +// +---------------------------------------------------*/ + +Status IsCopyLogEnabled(int graph_id, bool& is_copy_log_enabled); + +void PrintTFTensor(Tensor& T1); +std::string DebugNode(Node* node); + +// Read from this ng_tensor into tf_tensor +void ReadNGTensor(shared_ptr ng_tensor, Tensor* tf_tensor); +std::string PrintBool(bool var); + +// Write into this ng_tensor from tf_tensor +void WriteNGTensor(shared_ptr ng_tensor, + Tensor* tf_tensor); + void SummarizeOp(OpKernelConstruction* ctx, std::ostream& out); +// Node-types on a variable and are executed on nGraph +bool IsNGVariableType(string node_type); + +// Node-types that are executed on nGraph +bool IsNGSupportedType(string node_type); + // Taken from: tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc // Extract values from a Const op to `values`. Returns true if succeeds. // diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d616e911..1cd8cdde 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -52,6 +52,7 @@ set(SRC test_math_ops.cpp test_nn_ops.cpp test_array_ops.cpp + test_variable_ops.cpp opexecuter.cpp ) diff --git a/test/test_variable_ops.cpp b/test/test_variable_ops.cpp new file mode 100644 index 00000000..05287f80 --- /dev/null +++ b/test/test_variable_ops.cpp @@ -0,0 +1,364 @@ +/******************************************************************************* + * Copyright 2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "tensorflow/cc/client/client_session.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/public/session.h" + +#include "gtest/gtest.h" +#include "ngraph_assign_clusters.h" +#include "ngraph_backend_manager.h" +#include "ngraph_mark_for_clustering.h" +#include "test_utilities.h" +#include "tf_graph_writer.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +namespace testing { + +#define ASSERT_OK(x) ASSERT_EQ((x), ::tensorflow::Status::OK()); +#define ASSERT_NOT_OK(x) ASSERT_NE((x), ::tensorflow::Status::OK()); + +// Simple Graph +TEST(VariableTest, SmallGraph1) { + Scope root = Scope::NewRootScope(); + + PartialTensorShape varShape({2, 2}); + auto var = ops::Variable(root, varShape, DT_FLOAT); + auto init_value = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + auto var_assign = ops::Assign(root, var, init_value); + + auto c = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + + auto add = ops::Add(root, var, c); + + auto assign = ops::Assign(root, var, add); + + // Turn off optimizations so that all the nodes are processed + tensorflow::SessionOptions options; + options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_opt_level(tensorflow::OptimizerOptions_Level_L0); + options.config.mutable_graph_options() + ->mutable_rewrite_options() + ->set_constant_folding(tensorflow::RewriterConfig::OFF); + + // Run on nGraph + ActivateNGraph(); + ClientSession ng_session(root, options); + std::vector ng_outputs1; + std::vector ng_outputs2; + std::vector ng_outputs3; + + ASSERT_OK(ng_session.Run( + { + var_assign, + }, + &ng_outputs1)); + for (int i = 0; i < 20; i++) { + ASSERT_OK(ng_session.Run({assign}, &ng_outputs2)); + } + + ASSERT_OK(ng_session.Run({var}, &ng_outputs3)); + + // Run on TF + DeactivateNGraph(); + ClientSession tf_session(root, options); + std::vector tf_outputs1; + std::vector tf_outputs2; + std::vector tf_outputs3; + + ASSERT_OK(tf_session.Run( + { + var_assign, + }, + &tf_outputs1)); + for (int i = 0; i < 20; i++) { + ASSERT_OK(tf_session.Run({assign}, &tf_outputs2)); + } + + ASSERT_OK(tf_session.Run({var}, &tf_outputs3)); + + Compare(tf_outputs1, ng_outputs1); + Compare(tf_outputs2, ng_outputs2); + Compare(tf_outputs3, ng_outputs3); + + // For other test cases + ActivateNGraph(); +} + +// Graph with AssignAdd and AssignSub +TEST(VariableTest, SmallGraph2) { + Scope root = Scope::NewRootScope(); + + PartialTensorShape varShape({2, 2}); + auto var = ops::Variable(root.WithOpName("Var1"), varShape, DT_FLOAT); + auto init_value = ops::Const(root, {{2.f, 3.f}, {4.f, 5.f}}); + auto var_assign = ops::Assign(root, var, init_value); + + auto c = ops::Const(root, {{11.f, 12.f}, {13.f, 14.f}}); + + auto add = ops::Add(root.WithOpName("Add1"), var, c); + + auto assign = ops::AssignAdd(root, var, add); + + auto add2 = ops::Add(root.WithOpName("Add2"), assign, c); + + auto assign2 = ops::AssignSub(root, var, add2); + + // Turn off optimizations so that all the nodes are processed + tensorflow::SessionOptions options; + options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_opt_level(tensorflow::OptimizerOptions_Level_L0); + options.config.mutable_graph_options() + ->mutable_rewrite_options() + ->set_constant_folding(tensorflow::RewriterConfig::OFF); + + // Run on nGraph + ActivateNGraph(); + ClientSession ng_session(root, options); + std::vector ng_outputs1; + std::vector ng_outputs2; + std::vector ng_outputs3; + + ASSERT_OK(ng_session.Run({var_assign}, &ng_outputs1)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(ng_session.Run({assign2}, &ng_outputs2)); + } + + ASSERT_OK(ng_session.Run({var}, &ng_outputs3)); + + // Run on TF + DeactivateNGraph(); + ClientSession tf_session(root, options); + std::vector tf_outputs1; + std::vector tf_outputs2; + std::vector tf_outputs3; + + ASSERT_OK(tf_session.Run( + { + var_assign, + }, + &tf_outputs1)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(tf_session.Run({assign2}, &tf_outputs2)); + } + + tf_session.Run({var}, &tf_outputs3); + + Compare(tf_outputs1, ng_outputs1); + Compare(tf_outputs2, ng_outputs2); + Compare(tf_outputs3, ng_outputs3); + + ActivateNGraph(); +} + +// Graph withApplyGradientDescent +TEST(VariableTest, SmallGraph3) { + Scope root = Scope::NewRootScope(); + + PartialTensorShape varShape({2, 2}); + auto var = ops::Variable(root.WithOpName("Var1"), varShape, DT_FLOAT); + auto init_value = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + auto var_assign = ops::Assign(root, var, init_value); + + auto c = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + auto s = ops::Const(root, 1.f); + auto d = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + + auto add = ops::Add(root, var, c); + + auto assign = ops::AssignSub(root, var, add); + + auto apply_gradient_descent = ops::ApplyGradientDescent(root, assign, s, d); + + // Turn off optimizations so that all the nodes are processed + tensorflow::SessionOptions options; + options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_opt_level(tensorflow::OptimizerOptions_Level_L0); + options.config.mutable_graph_options() + ->mutable_rewrite_options() + ->set_constant_folding(tensorflow::RewriterConfig::OFF); + + // Run on nGraph + ActivateNGraph(); + ClientSession ng_session(root, options); + std::vector ng_outputs1; + std::vector ng_outputs2; + std::vector ng_outputs3; + std::vector ng_outputs4; + std::vector ng_outputs5; + + ASSERT_OK(ng_session.Run( + { + var_assign, + }, + &ng_outputs1)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(ng_session.Run({assign}, &ng_outputs2)); + } + + for (int i = 0; i < 10; i++) { + ASSERT_OK(ng_session.Run({apply_gradient_descent}, &ng_outputs3)); + } + + for (int i = 0; i < 10; i++) { + ASSERT_OK(ng_session.Run({assign}, &ng_outputs4)); + } + + ASSERT_OK(ng_session.Run({var}, &ng_outputs5)); + + // Run on TF + DeactivateNGraph(); + ClientSession tf_session(root, options); + std::vector tf_outputs1; + std::vector tf_outputs2; + std::vector tf_outputs3; + std::vector tf_outputs4; + std::vector tf_outputs5; + + ASSERT_OK(tf_session.Run( + { + var_assign, + }, + &tf_outputs1)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(tf_session.Run({assign}, &tf_outputs2)); + } + + for (int i = 0; i < 10; i++) { + ASSERT_OK(tf_session.Run({apply_gradient_descent}, &tf_outputs3)); + } + + for (int i = 0; i < 10; i++) { + ASSERT_OK(tf_session.Run({assign}, &tf_outputs4)); + } + + ASSERT_OK(tf_session.Run({var}, &tf_outputs5)); + Compare(tf_outputs1, ng_outputs1); + Compare(tf_outputs2, ng_outputs2); + Compare(tf_outputs3, ng_outputs3); + Compare(tf_outputs4, ng_outputs4); + Compare(tf_outputs5, ng_outputs5); + + ActivateNGraph(); +} + +// Graph with 2 Variables +TEST(VariableTest, SmallGraph4) { + Scope root = Scope::NewRootScope(); + + PartialTensorShape varShape({2, 2}); + auto var1 = ops::Variable(root, varShape, DT_FLOAT); + auto init_value = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + auto var1_assign = ops::Assign(root, var1, init_value); + + auto var2 = ops::Variable(root, varShape, DT_FLOAT); + auto init_value2 = ops::Const(root, {{123.f, 34.f}, {0.f, 112121.f}}); + auto var2_assign = ops::Assign(root, var2, init_value2); + + auto s = ops::Const(root, 1.f); + auto d = ops::Const(root, {{1.f, 1.f}, {1.f, 1.f}}); + + auto add = ops::Add(root, var1, var2); + auto assign = ops::Assign(root, var1, add); + auto apply_gradient_descent = ops::ApplyGradientDescent(root, var2, s, d); + auto mul = ops::Mul(root, var1, var2); + auto assign2 = ops::Assign(root, var2, mul); + auto mul2 = ops::Mul(root, var1, var2); + + // Turn off optimizations so that all the nodes are processed + tensorflow::SessionOptions options; + options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_opt_level(tensorflow::OptimizerOptions_Level_L0); + options.config.mutable_graph_options() + ->mutable_rewrite_options() + ->set_constant_folding(tensorflow::RewriterConfig::OFF); + + // Run on nGraph + ActivateNGraph(); + ClientSession ng_session(root, options); + std::vector ng_outputs1; + std::vector ng_outputs2; + std::vector ng_outputs3; + std::vector ng_outputs4; + std::vector ng_outputs5; + + ASSERT_OK(ng_session.Run({var1_assign, var2_assign}, &ng_outputs1)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(ng_session.Run({assign}, &ng_outputs2)); + } + + for (int i = 0; i < 5; i++) { + ASSERT_OK(ng_session.Run({apply_gradient_descent}, &ng_outputs3)); + } + + for (int i = 0; i < 10; i++) { + ASSERT_OK(ng_session.Run({mul2}, &ng_outputs4)); + } + + ASSERT_OK(ng_session.Run({var1, var2}, &ng_outputs5)); + + // Run on TF + DeactivateNGraph(); + ClientSession tf_session(root, options); + std::vector tf_outputs1; + std::vector tf_outputs2; + std::vector tf_outputs3; + std::vector tf_outputs4; + std::vector tf_outputs5; + + ASSERT_OK(tf_session.Run({var1_assign, var2_assign}, &tf_outputs1)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(tf_session.Run({assign}, &tf_outputs2)); + } + + for (int i = 0; i < 5; i++) { + ASSERT_OK(tf_session.Run({apply_gradient_descent}, &tf_outputs3)); + } + + for (int i = 0; i < 10; i++) { + ASSERT_OK(tf_session.Run({mul2}, &tf_outputs4)); + } + + ASSERT_OK(tf_session.Run({var1, var2}, &tf_outputs5)); + + Compare(tf_outputs1, ng_outputs1); + Compare(tf_outputs2, ng_outputs2); + Compare(tf_outputs3, ng_outputs3); + Compare(tf_outputs4, ng_outputs4); + Compare(tf_outputs5, ng_outputs5); + ActivateNGraph(); +} + +} // namespace testing +} // namespace ngraph_bridge +} // namespace tensorflow \ No newline at end of file