From 2f078e47b89e13a51e6fc3f1e8821fbecd90d473 Mon Sep 17 00:00:00 2001
From: Avijit <30507445+avijit-nervana@users.noreply.github.com>
Date: Tue, 19 Feb 2019 17:17:26 -0800
Subject: [PATCH] More diagnostic support (#435)

Added a Timer class to enable logging of time taken by various stages in NGraphEncapsulate::Compute(). Also enabled printing detailed version information
---
 README.md                           |   2 +-
 python/ngraph_bridge/__init__.in.py | 130 +++++++++++++++-------------
 src/ngraph_encapsulate_op.cc        |  27 +++++-
 src/ngraph_timer.h                  |  39 +++++++++
 src/version.cc                      |   4 +
 src/version.h                       |   3 +
 6 files changed, 143 insertions(+), 62 deletions(-)
 create mode 100644 src/ngraph_timer.h

diff --git a/README.md b/README.md
index 97519cc9..d466e106 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ The installation prerequisites are the same as described in the TensorFlow
 
         git clone https://github.com/NervanaSystems/ngraph-tf.git
         cd ngraph-tf
-        git checkout v0.10.0
+        git checkout v0.11.0-rc0
 
    
 2. Next run the following Python script to build TensorFlow, nGraph and the bridge. Please use Python 3.5:
diff --git a/python/ngraph_bridge/__init__.in.py b/python/ngraph_bridge/__init__.in.py
index 0438dcad..84a3b809 100644
--- a/python/ngraph_bridge/__init__.in.py
+++ b/python/ngraph_bridge/__init__.in.py
@@ -16,7 +16,7 @@
 
 from __future__ import absolute_import
 from __future__ import division
-from __future__ import print_function 
+from __future__ import print_function
 
 import importlib
 import os
@@ -24,7 +24,7 @@
 import time
 import getpass
 from platform import system
-   
+
 import numpy as np
 import tensorflow as tf
 from tensorflow.python import pywrap_tensorflow as py_tf
@@ -35,11 +35,12 @@
 
 import ctypes
 
-
-__all__ = ['enable', 'disable', 'is_enabled', 'backends_len', 'list_backends',
-    'set_backend', 'is_supported_backend', 'get_currently_set_backend_name' ,'start_logging_placement', 'stop_logging_placement',
-    'is_logging_placement', '__version__']
-
+__all__ = [
+    'enable', 'disable', 'is_enabled', 'backends_len', 'list_backends',
+    'set_backend', 'is_supported_backend', 'get_currently_set_backend_name',
+    'start_logging_placement', 'stop_logging_placement',
+    'is_logging_placement', '__version__'
+]
 
 ext = 'dylib' if system() == 'Darwin' else 'so'
 
@@ -50,34 +51,33 @@
 
 # converting version representations to strings if not already
 try:
-  TF_VERSION = str(TF_VERSION, 'ascii')
+    TF_VERSION = str(TF_VERSION, 'ascii')
 except TypeError:  # will happen for python 2 or if already string
-  pass
+    pass
 
 try:
-  TF_VERSION_NEEDED = str(TF_VERSION_NEEDED, 'ascii')
+    TF_VERSION_NEEDED = str(TF_VERSION_NEEDED, 'ascii')
 except TypeError:
-  pass
+    pass
 
 try:
-  if TF_GIT_VERSION.startswith("b'"):  # TF version can be a bytes __repr__()
-    TF_GIT_VERSION = eval(TF_GIT_VERSION)
-  TF_GIT_VERSION = str(TF_GIT_VERSION, 'ascii')
+    if TF_GIT_VERSION.startswith("b'"):  # TF version can be a bytes __repr__()
+        TF_GIT_VERSION = eval(TF_GIT_VERSION)
+    TF_GIT_VERSION = str(TF_GIT_VERSION, 'ascii')
 except TypeError:
-  pass
- 
+    pass
+
 try:
-  if TF_GIT_VERSION_BUILT_WITH.startswith("b'"):
-    TF_GIT_VERSION_BUILT_WITH = eval(TF_GIT_VERSION_BUILT_WITH)
-  TF_GIT_VERSION_BUILT_WITH = str(TF_GIT_VERSION_BUILT_WITH, 'ascii')
+    if TF_GIT_VERSION_BUILT_WITH.startswith("b'"):
+        TF_GIT_VERSION_BUILT_WITH = eval(TF_GIT_VERSION_BUILT_WITH)
+    TF_GIT_VERSION_BUILT_WITH = str(TF_GIT_VERSION_BUILT_WITH, 'ascii')
 except TypeError:
-  pass
-
-print("TensorFlow version installed: {0} ({1})".format(TF_VERSION,
-  TF_GIT_VERSION))
-print("nGraph bridge built with: {0} ({1})".format(TF_VERSION_NEEDED,
-  TF_GIT_VERSION_BUILT_WITH))
+    pass
 
+# print("TensorFlow version installed: {0} ({1})".format(TF_VERSION,
+#                                                        TF_GIT_VERSION))
+# print("nGraph bridge built with: {0} ({1})".format(TF_VERSION_NEEDED,
+#                                                    TF_GIT_VERSION_BUILT_WITH))
 
 # We need to revisit this later. We can automate that using cmake configure
 # command.
@@ -89,16 +89,19 @@
    (TF_INSTALLED_VER[1] == TF_NEEDED_VER[1]) and \
    ((TF_INSTALLED_VER[2].split('-'))[0] == (TF_NEEDED_VER[2].split('-'))[0]):
     libpath = os.path.dirname(__file__)
-    ngraph_bridge_lib = ctypes.cdll.LoadLibrary(os.path.join(
-      libpath, 'libngraph_bridge.' + ext))
+    ngraph_bridge_lib = ctypes.cdll.LoadLibrary(
+        os.path.join(libpath, 'libngraph_bridge.' + ext))
 else:
-    raise ValueError("Error: Installed TensorFlow version {0}\nnGraph bridge built with: {1}".format(
-      TF_VERSION, TF_VERSION_NEEDED ))
+    raise ValueError(
+        "Error: Installed TensorFlow version {0}\nnGraph bridge built with: {1}"
+        .format(TF_VERSION, TF_VERSION_NEEDED))
 
 
 def requested():
-    return ops.get_default_graph()._attr_scope(
-        {"_ngraph_requested": attr_value_pb2.AttrValue(b=True)})
+    return ops.get_default_graph()._attr_scope({
+        "_ngraph_requested":
+        attr_value_pb2.AttrValue(b=True)
+    })
 
 
 ngraph_bridge_lib.ngraph_is_enabled.restype = ctypes.c_bool
@@ -110,7 +113,7 @@ def requested():
 ngraph_bridge_lib.ngraph_get_currently_set_backend_name.restype = ctypes.c_bool
 ngraph_bridge_lib.ngraph_is_logging_placement.restype = ctypes.c_bool
 ngraph_bridge_lib.ngraph_tf_version.restype = ctypes.c_char_p
-
+ngraph_bridge_lib.ngraph_lib_version.restype = ctypes.c_char_p
 
 try:
     importlib.import_module('plaidml.settings')
@@ -122,57 +125,66 @@ def requested():
 
 
 def enable():
-  ngraph_bridge_lib.ngraph_enable()
+    ngraph_bridge_lib.ngraph_enable()
 
 
 def disable():
-  ngraph_bridge_lib.ngraph_disable()
+    ngraph_bridge_lib.ngraph_disable()
 
 
 def is_enabled():
-  return ngraph_bridge_lib.ngraph_is_enabled()
+    return ngraph_bridge_lib.ngraph_is_enabled()
 
 
 def backends_len():
-  return ngraph_bridge_lib.ngraph_backends_len()
+    return ngraph_bridge_lib.ngraph_backends_len()
 
 
 def list_backends():
-  len_backends = backends_len()
-  result = (ctypes.c_char_p * len_backends)()
-  if not ngraph_bridge_lib.ngraph_list_backends(result, len_backends):
-    raise Exception("Expected " + str(len_backends) + " backends, but got some  other number of backends")
-  list_result = list(result)
-  # convert bytes to string required for py3 (encode/decode bytes)
-  backend_list = []
-  for backend in list_result:
-    backend_list.append(backend.decode("utf-8"))
-  return backend_list
+    len_backends = backends_len()
+    result = (ctypes.c_char_p * len_backends)()
+    if not ngraph_bridge_lib.ngraph_list_backends(result, len_backends):
+        raise Exception("Expected " + str(len_backends) +
+                        " backends, but got some  other number of backends")
+    list_result = list(result)
+    # convert bytes to string required for py3 (encode/decode bytes)
+    backend_list = []
+    for backend in list_result:
+        backend_list.append(backend.decode("utf-8"))
+    return backend_list
 
 
 def set_backend(backend):
-  if not ngraph_bridge_lib.ngraph_set_backend(backend.encode("utf-8")):
-    raise Exception("Backend " + backend + " unavailable.")
+    if not ngraph_bridge_lib.ngraph_set_backend(backend.encode("utf-8")):
+        raise Exception("Backend " + backend + " unavailable.")
+
 
 def is_supported_backend(backend):
-  return ngraph_bridge_lib.ngraph_is_supported_backend(backend.encode("utf-8"))
+    return ngraph_bridge_lib.ngraph_is_supported_backend(
+        backend.encode("utf-8"))
+
 
 def get_currently_set_backend_name():
-  result = (ctypes.c_char_p * 1)()
-  if not ngraph_bridge_lib.ngraph_get_currently_set_backend_name(result):
-    raise Exception("Cannot get currently set backend")
-  list_result = list(result)
-  return list_result[0].decode("utf-8")
+    result = (ctypes.c_char_p * 1)()
+    if not ngraph_bridge_lib.ngraph_get_currently_set_backend_name(result):
+        raise Exception("Cannot get currently set backend")
+    list_result = list(result)
+    return list_result[0].decode("utf-8")
+
 
 def start_logging_placement():
-  ngraph_bridge_lib.ngraph_start_logging_placement()
+    ngraph_bridge_lib.ngraph_start_logging_placement()
 
 
 def stop_logging_placement():
-  ngraph_bridge_lib.ngraph_stop_logging_placement()
+    ngraph_bridge_lib.ngraph_stop_logging_placement()
 
 
 def is_logging_placement():
-  return ngraph_bridge_lib.ngraph_is_logging_placement()
- 
-__version__ = ngraph_bridge_lib.ngraph_tf_version()
+    return ngraph_bridge_lib.ngraph_is_logging_placement()
+
+__version__ = \
+  "nGraph bridge version: " + str(ngraph_bridge_lib.ngraph_tf_version()) + "\n" + \
+  "nGraph version used: " + str(ngraph_bridge_lib.ngraph_lib_version()) + "\n" + \
+  "TensorFlow verion used: " + TF_GIT_VERSION_BUILT_WITH
+  
diff --git a/src/ngraph_encapsulate_op.cc b/src/ngraph_encapsulate_op.cc
index c50d2609..d70f62aa 100644
--- a/src/ngraph_encapsulate_op.cc
+++ b/src/ngraph_encapsulate_op.cc
@@ -32,6 +32,7 @@
 #include "ngraph_freshness_tracker.h"
 #include "ngraph_log.h"
 #include "ngraph_mark_for_clustering.h"
+#include "ngraph_timer.h"
 #include "ngraph_utils.h"
 
 #include "ngraph/runtime/backend.hpp"
@@ -230,6 +231,7 @@ class NGraphEncapsulateOp : public OpKernel {
   // OpKernel::Compute
   //---------------------------------------------------------------------------
   void Compute(OpKernelContext* ctx) override {
+    Timer compute_time;
     std::lock_guard<std::mutex> lock(m_compute_lock);
     NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute starting for cluster "
                    << m_ngraph_cluster;
@@ -238,6 +240,7 @@ class NGraphEncapsulateOp : public OpKernel {
     ng::runtime::Backend* op_backend =
         BackendManager::GetBackend(m_op_backend_name);
 
+    Timer function_lookup_or_create;
     // Get the inputs
     std::vector<TensorShape> input_shapes;
     std::stringstream signature_ss;
@@ -370,9 +373,13 @@ class NGraphEncapsulateOp : public OpKernel {
       ng_function = it->second;
     }
 
+    int time_func_create_or_lookup = function_lookup_or_create.ElapsedInMS();
+
     NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute got graph for cluster "
                    << m_ngraph_cluster;
 
+    Timer create_or_lookup_tensors;
+
     if (m_freshness_tracker == nullptr) {
       auto creator = [](NGraphFreshnessTracker** tracker) {
         *tracker = new NGraphFreshnessTracker();
@@ -542,10 +549,11 @@ class NGraphEncapsulateOp : public OpKernel {
         << "NGraphEncapsulateOp::Compute allocated result tensors for cluster "
         << m_ngraph_cluster;
 
+    int time_create_or_lookup_tensors = create_or_lookup_tensors.ElapsedInMS();
+
     // Execute the nGraph function.
+    Timer execute_function;
     {
-      // mutex_lock l(s_ng_backend_mutex);
-      // std::lock_guard<std::mutex> lock(backend_mutex_ptr);
       BackendManager::LockBackend(m_op_backend_name);
       NGRAPH_VLOG(4)
           << "NGraphEncapsulateOp::Compute call starting for cluster "
@@ -574,6 +582,7 @@ class NGraphEncapsulateOp : public OpKernel {
       }
       BackendManager::UnlockBackend(m_op_backend_name);
     }
+    int time_execute_function = execute_function.ElapsedInMS();
 
     long vm, rss;
     MemoryProfile(vm, rss);
@@ -590,6 +599,8 @@ class NGraphEncapsulateOp : public OpKernel {
                    << m_ngraph_cluster;
 
     // Copy value to host if backend is not CPU
+    Timer copy_output_tensors_to_host;
+
     try {
       if (m_op_backend_name != "CPU") {
         for (size_t i = 0; i < output_caches.size(); ++i) {
@@ -618,10 +629,22 @@ class NGraphEncapsulateOp : public OpKernel {
       void* src_ptr = (void*)DMAHelper::base(&ctx->input(i));
       m_freshness_tracker->MarkFresh(src_ptr, ng_function);
     }
+    int time_copy_output_tensors_to_host =
+        copy_output_tensors_to_host.ElapsedInMS();
 
     NGRAPH_VLOG(4)
         << "NGraphEncapsulateOp::Compute done marking fresh for cluster "
         << m_ngraph_cluster;
+    NGRAPH_VLOG(1) << "NGRAPH_TF_TIMING_PROFILE: OP_ID: " << my_instance_id
+                   << " Step_ID: " << ctx->step_id()
+                   << " Cluster: " << ctx->op_kernel().name()
+                   << " Time-Compute: " << compute_time.ElapsedInMS()
+                   << " Function-Create-or-Lookup: "
+                   << time_func_create_or_lookup << " Create-and-copy-tensors: "
+                   << time_create_or_lookup_tensors
+                   << " Execute: " << time_execute_function
+                   << " Copy-outputs-to-host: "
+                   << time_copy_output_tensors_to_host;
   }  // end compute
 
  private:
diff --git a/src/ngraph_timer.h b/src/ngraph_timer.h
new file mode 100644
index 00000000..399033bd
--- /dev/null
+++ b/src/ngraph_timer.h
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright 2019 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+#ifndef NGRAPH_TF_BRIDGE_TIMER_H_
+#define NGRAPH_TF_BRIDGE_TIMER_H_
+
+#include <chrono>
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class Timer {
+ public:
+  Timer() : m_start(std::chrono::high_resolution_clock::now()) {}
+  int ElapsedInMS() {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+               std::chrono::high_resolution_clock::now() - m_start)
+        .count();
+  }
+
+ private:
+  const std::chrono::time_point<std::chrono::high_resolution_clock> m_start;
+};
+}  // namespace ngraph_bridge
+}  // namespace tensorflow
+
+#endif  // NGRAPH_TF_BRIDGE_TIMER_H_
diff --git a/src/version.cc b/src/version.cc
index 09a6696c..905f2868 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -17,6 +17,7 @@
 #include "version.h"
 #include <iostream>
 #include <string>
+#include "ngraph/ngraph.hpp"
 
 // nGraph-TensorFlow bridge uses semantic versioning: see http://semver.org/
 
@@ -38,8 +39,11 @@
       NG_TF_MINOR_VERSION) "." VERSION_STR(NG_TF_PATCH_VERSION) \
        NG_TF_VERSION_SUFFIX)
 
+extern "C" const char* get_ngraph_version_string();
+
 namespace tensorflow {
 namespace ngraph_bridge {
 const char* ngraph_tf_version() { return (NG_TF_VERSION_STRING); }
+const char* ngraph_lib_version() { return get_ngraph_version_string(); }
 }  // namespace ngraph_bridge
 }  // namespace tensorflow
diff --git a/src/version.h b/src/version.h
index d90e508f..bcadc4c6 100644
--- a/src/version.h
+++ b/src/version.h
@@ -21,6 +21,9 @@ namespace ngraph_bridge {
 extern "C" {
 // Returns the ngraph-tensorflow library version
 const char* ngraph_tf_version();
+
+// Returns the nGraph version this bridge was compiled with
+const char* ngraph_lib_version();
 }
 }  // namespace ngraph_bridge
 }  // namespace tensorflow