daphne-eu · corepointer · Jun 17, 2024 · Jun 17, 2024 · Jun 7, 2024 · Jun 18, 2024
diff --git a/UserConfig.json b/UserConfig.json
@@ -6,14 +6,14 @@
     "matmul_unroll_factor": 1,
     "matmul_unroll_jam_factor": 4,
     "matmul_num_vec_registers": 16,
-    "use_cuda": false,
+    "use_cuda": true,
     "use_vectorized_exec": false,
     "use_obj_ref_mgnt": true,
     "cuda_fuse_any": false,
     "use_mlir_codegen": false,
     "vectorized_single_queue": false,
     "debug_llvm": false,
-    "explain_kernels": false,
+    "explain_kernels": true,
     "explain_llvm": false,
     "explain_parsing": false,
     "explain_parsing_simplified": false,
@@ -28,24 +28,33 @@
     "numberOfThreads": -1,
     "minimumTaskSize": 1,
     "libdir": "{exedir}/../lib",
-    "daphnedsl_import_paths": {},
-    "force_cuda": false,
+    "daphnedsl_import_paths": {
+        "default_dirs": ["scripts/algorithms", "scripts"]
+    },
+    "force_cuda": true,
     "logging": [
-        { "log-level-limit": "ERROR" },
+        { "log-level-limit": "INFO" },
         {
             "comment": "This configuration controls logging in the GPU compiler pass only",
             "name": "compiler::cuda",
-            "level": "DEBUG",
+            "level": "INFO",
             "filename": "compiler-trace-cuda.txt",
             "format": "%^[%L %n]:%$ %v"
         },
         {
             "comment": "general runtime CUDA debug log",
             "name": "runtime::cuda",
-            "level": "INFO",
+            "level": "DEBUG",
             "filename": "compiler-debug-cuda.txt",
             "format": "%^[%L %n]:%$ %v"
         },
+        {
+            "comment": "DAPHNE runtime logs",
+            "name": "runtime",
+            "level": "DEBUG",
+            "filename": "",
+            "format": "%^[%n %L]:%$ %v"
+        },
         {
             "comment": "DAPHNE compiler logs",
             "name": "compiler",
@@ -56,14 +65,14 @@
         {
             "comment": "DAPHNE default",
             "name": "default",
-            "level": "INFO",
+            "level": "DEBUG",
             "filename": "",
             "format": "%^[%n %L]:%$ %v"
         },
         {
             "comment": "DAPHNE default",
             "name": "parser",
-            "level": "INFO",
+            "level": "DEBUG",
             "filename": "",
             "format": "%^[%n %L]:%$ %v"
         }

diff --git a/local/test-bn2.daph b/local/test-bn2.daph
@@ -0,0 +1,48 @@
+
+import "nn/layers/batch_norm2d_dsl.daph" as "bn2d";
+import "nn/layers/batch_norm2d_kernel.daph" as "bn2k";
+
+msec_factor = as.f32(0.000001);
+
+#input=\"/daphne/data/mnist20_features.csv\"
+#X = readMatrix($input);
+
+# num images
+N = 1;
+Himg = 8;
+Wimg = 8;
+#channels
+C = 1;
+# seed for random input generation
+seed = 42;
+mu = 0.1;
+#eps = 1e-5;
+eps = 0.00001;
+print("eps: ",0);print(eps);
+train = true;
+
+gamma, beta, ema_mean, ema_var = bn2d.init(C);
+print(gamma);
+print(beta);
+print(ema_mean);
+print(ema_var);
+
+X = rand(N, C * Himg * Wimg, 0.0, 256.0, 1.0, seed);
+batch_size = 1;
+X_batch = X[0:batch_size,];
+print("X:");
+print(X_batch);
+#img1 = reshape(X[0,], Himg, Wimg);
+#img = (img1 != 0);
+#print(img1);
+
+outBN, upd_ema_mean, upd_ema_var, cache_ema_mean, cache_ema_var =
+        bn2d.forward(X_batch, gamma, beta, C, Himg, Wimg, train, ema_mean, ema_var, mu, eps);
+
+print("outBNd: ",0);
+print(outBN);
+
+outBNk, upd_ema_meank, upd_ema_vark, cache_ema_meank, cache_ema_vark =
+        bn2k.forward(X_batch, gamma, beta, C, Himg, Wimg, train, ema_mean, ema_var, mu, eps);
+print("outBNk: ",0);
+print(outBNk);
diff --git a/local/test-dnn-ops.daph b/local/test-dnn-ops.daph
@@ -0,0 +1,96 @@
+
+import "nn/layers/conv2d_kernel.daph" as "conv2d";
+import "nn/layers/conv2d.daph" as "conv2d_dsl"; #returns segFault (issue #77)
+
+msec_factor = as.f32(0.000001);
+
+X1 = readMatrix($input);
+
+# num images
+N = 1;
+#Himg = 28;
+#Wimg = 28;
+Himg = 8;
+Wimg = 8;
+#channels
+numC = 1;
+# seed for random input generation
+seed = 42;
+X = rand(N, numC * Himg * Wimg, 0.0, 256.0, 1.0, seed);
+batch_size = 1;
+X_batch = X[0:batch_size,];
+print("X:");
+print(X_batch);
+img1 = reshape(X[0,], Himg, Wimg);
+#img = (img1 != 0);
+print(img1);
+
+numF = 1;
+#Hf = 5;
+#Wf = 5;
+Hf = 3;
+Wf = 3;
+
+#W = fill(1.0, numF, numC * Hf * Wf);
+#b = fill(0.0, numF, 1);
+#print(sum(W));
+#print(sum(b));
+
+W, b = conv2d.init(numF, numC, Hf, Wf, seed);
+print("W:");
+imgW = reshape(W[0,], Hf, Wf);
+#imgW = (imgW1 != 0);
+print(imgW);
+
+stride = 1;
+pad = 2;
+#X_conv, Hout, Wout = conv2d(X_batch, W, batch_size, numC, Himg, Wimg, Hf, Wf, stride, stride, pad, pad, b);
+
+t_start_conv1 = now();
+X_conv1, Hout1, Wout1 = conv2d.forward(X_batch, W, b, numC, Himg, Wimg, Hf, Wf, stride, stride, pad, pad);
+t_start_conv2 = now();
+X_conv2, Hout2, Wout2 = conv2d_dsl.forward(X_batch, W, b, numC, Himg, Wimg, Hf, Wf, stride, stride, pad, pad);
+t_end_conv = now();
+
+print("X dims: ",0) ;print(nrow(X),0);print("x",0);print(ncol(X));
+print("X_conv1 dims: ",0) ;print(nrow(X_conv1),0);print("x",0);print(ncol(X_conv1));
+print("X_conv2 dims: ",0) ;print(nrow(X_conv2),0);print("x",0);print(ncol(X_conv2));
+print("conv1 output dims: ",0);print(Hout1,0);print("x",0);print(Wout1);
+print("conv2 output dims: ",0);print(Hout2,0);print("x",0);print(Wout2);
+print("Time conv1:  ",0);print(as.f32((t_start_conv1 - t_start_conv1)) * msec_factor,0);print(" ms");
+print("Time conv2:  ",0);print(as.f32((t_end_conv - t_start_conv2)) * msec_factor,0);print(" ms");
+
+print(X_conv1);
+print(X_conv2);
+print(sum(X_conv1));
+print(sum(X_conv2));
+print(nrow(X_conv1));
+print(nrow(X_conv2));
+print(ncol(X_conv1));
+print(ncol(X_conv2));
+print(Hout1);
+print(Hout2);
+print(Wout1);
+print(Wout2);
+//print(X_conv[0:1,0:100]);
+#f1out = reshape(X_conv[0:1,0:784], 28, 28);
+#print(f1out[3:4,]);
+#print(sum(f1out));
+stop("stopping early");
+
+# seed for random input generation
+seed = 42;
+
+t_start = now();
+X_relu2 = rand(N, 97280, 0.0, 256.0, 1.0, seed);
+
+t_end_relu2 = now();
+X_pool,Hpool,Wpool=max_pool2d(X_relu2,N,32,4,760,2,2,2,2,0,0);
+t_end_pool = now();
+
+
+print("X_pool dims: ",0) ;print(nrow(X_pool),0);print("x",0);print(ncol(X_pool));
+print("Time maxpool2d:  ",0);print(as.f32((t_end_pool - t_end_relu2)) * msec_factor,0);print(" ms");
+t_end = now();
+print("Time total: ",0);print(as.f32((t_end - t_start)) * msec_factor,0);print(" ms");
+
diff --git a/scripts/examples/mnist_lenet-train.daph b/scripts/examples/mnist_lenet-train.daph
@@ -0,0 +1,160 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Modifications Copyright 2024 The DAPHNE Consortium
+#
+#-------------------------------------------------------------
+
+# This script has been manually translated from Apache SystemDS.
+
+# MNIST LeNet - Train
+#
+# This script trains a convolutional net using the "LeNet" architecture
+# on images of handwritten digits.
+#
+# Inputs:
+#  - train: File containing labeled MNIST training images.
+#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
+#  - test: File containing labeled MNIST test images.
+#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
+#  - C: Number of color chanels in the images.
+#  - Hin: Input image height.
+#  - Win: Input image width.
+#  - epochs: [DEFAULT: 10] Total number of full training loops over
+#     the full data set.
+#  - out_dir: [DEFAULT: "."] Directory to store weights and bias
+#     matrices of trained model, as well as final test accuracy.
+#  - fmt: [DEFAULT: "csv"] File format of `train` and `test` data.
+#     Options include: "csv", "mm", "text", and "binary".
+#
+# Outputs:
+#  - W1, W2, W3, W4: Files containing the trained weights of the model.
+#  - b1, b2, b3, b4: Files containing the trained biases of the model.
+#  - accuracy: File containing the final accuracy on the test data.
+#
+# Data:
+# The MNIST dataset contains labeled images of handwritten digits,
+# where each example is a 28x28 pixel image of grayscale values in
+# the range [0,255] stretched out as 784 pixels, and each label is
+# one of 10 possible digits in [0,9].
+#
+# Sample Invocation (running from outside the `nn` folder):
+# 1. Download data (60,000 training examples, and 10,000 test examples)
+#   ```
+#   nn/examples/get_mnist_data.sh
+#   ```
+#
+# 2. Execute using Spark
+#   ```
+#   spark-submit --master local[*] --driver-memory 10G
+#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
+#   $SYSTEMDS_ROOT/target/SystemDS.jar -f nn/examples/mnist_lenet-train.dml
+#   -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv
+#   C=1 Hin=28 Win=28 epochs=10 out_dir=nn/examples/model/mnist_lenet
+#   ```
+#
+
+import "nn/networks/lenet-train.daph" as "lenet";
+
+# Read training data & settings
+#fmt = ifdef($fmt, "csv")
+#train = read($train, format=fmt)
+#test = read($test, format=fmt)
+train_X = readMatrix("./data/mnist20_features.csv");
+train_Y = readMatrix("./data/mnist20_labels.csv");
+#test_X = readMatrix("./data/mnist20_features.csv");
+#test_Y = readMatrix("./data/mnist20_labels.csv");
+#C = $C
+#Hin = $Hin
+#Win = $Win
+
+#train_X = readMatrix("./data/mnist/mnist_features.csv");
+#train_Y = readMatrix("./data/mnist/mnist_labels.csv");
+#test_X = readMatrix("./data/mnist/mnist_features.csv");
+#test_Y = readMatrix("./data/mnist/mnist_labels.csv");
+
+C = 1;
+Hin = 28;
+Win = 28;
+#epochs = ifdef($epochs, 10)
+epochs = 2;
+#out_dir = ifdef($out_dir, ".")
+out_dir = ".";
+
+# Extract images and labels
+#images = train[,1:ncol(train)];
+#labels = train[,1];
+#X_test = test[,1:ncol(test)];
+#Y_test = test[,1];
+
+#images = train_X[0:5000,];
+#labels = train_Y[0:5000,];
+#X_test = train_X[5001:6000,];
+#Y_test = train_Y[5001:6000,];
+images = train_X[0:15,];
+labels = train_Y[0:15,];
+X_test = train_X[16:20,];
+Y_test = train_Y[16:20,];
+
+# Scale images to [-1,1], and one-hot encode the labels
+n = nrow(images);
+n_test = nrow(X_test);
+images = (images / 255.0) * 2 - 1;
+#labels = table(seq(1, n, 1), labels+1, n, 10);
+#labels = oneHot(labels, fill(as.si64(nrow(labels)),1, ncol(labels)));
+X_test = (X_test / 255.0) * 2 - 1;
+#Y_test = table(seq(1, n_test, 1), Y_test+1, n_test, 10);
+#Y_test = oneHot(Y_test, fill(nrow(Y_test),1, ncol(Y_test)));
+
+# Split into training (55,000 examples) and validation (5,000 examples)
+#X = images[5001:nrow(images),];
+#X_val = images[1:5000,];
+#Y = labels[5001:nrow(images),];
+#Y_val = labels[1:5000,];
+#X = images[1001:nrow(images) - 3000,];
+#X = images[1001:nrow(images) - 3000,];
+#X_val = images[0:1000,];
+#Y = labels[11:nrow(images),];
+#Y_val = labels[0:1000,];
+X = images[0:nrow(images) - 3,];
+X_val = images[nrow(images) - 3:nrow(images),];
+Y = labels[0:nrow(images)-3,];
+Y_val = labels[nrow(images) - 3:nrow(images),];
+
+
+# Train
+W1, b1, W2, b2, W3, b3, W4, b4 = lenet.train(X, Y, X_val, Y_val, C, Hin, Win, epochs);
+
+# Write model out
+write(W1, out_dir + "/W1.csv");
+write(b1, out_dir + "/b1.csv");
+write(W2, out_dir + "/W2.csv");
+write(b2, out_dir + "/b2.csv");
+write(W3, out_dir + "/W3.csv");
+write(b3, out_dir + "/b3.csv");
+write(W4, out_dir + "/W4.csv");
+write(b4, out_dir + "/b4.csv");
+
+# Eval on test set
+probs = lenet.predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4);
+loss, accuracy = lenet.eval(probs, Y_test);
+
+# Output results
+print("Test Accuracy: " + accuracy);
+#write(accuracy, out_dir + "/accuracy.csv");