Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dnn ops #734

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
59944e7
[DAPHNE-#758] MetaDataObject for CSRMatrix
corepointer Jun 17, 2024
ca1210e
[MINOR] Silenced a variety of compiler and linter warnings
corepointer Jun 17, 2024
4e0ae52
[BUGFIX] LoadPartitioningDistributed crashed
corepointer Jun 7, 2024
aaac9a7
[DAPHNE-#xyz1] DNN Ops Type/Shape Inference; NN Namespace
corepointer Jun 18, 2024
06bff26
[DAPHNE-#xyz2] ReLU backward pass
corepointer May 14, 2024
d6c4e1e
[DAPHNE-#xyz3] batchnorm2d fwd test/train cpp
corepointer Aug 19, 2024
511e379
[DAPHNE-#xyz4] Neural Network DaphneDSL Scripts
corepointer Aug 19, 2024
f4651b1
[DAPHNE-#xyz5] Lenet Example Pipeline for MNIST character classification
corepointer Aug 19, 2024
6d322c4
[DAPHNE-#xyz6] WIP Script level tests for DNN ops
corepointer Aug 19, 2024
a0c7d9a
[DAPHNE-#xyz7] Make DaphneContext global
corepointer May 22, 2024
7a652ba
[BUGFIX] AggAll CUDA launch config
corepointer Dec 31, 2023
cd58be5
[BUGFIXI] Only shortcut-reshape if shared_ptr is not null
corepointer Jun 25, 2024
f0ffae5
[MINOR] CUDA EwBinaryObjSca MIN/MAX/NEQ
corepointer Jun 25, 2024
f56c270
[MINOR] Comparing booleans
corepointer Jul 29, 2024
8d9d4d2
[MINOR] Cleanup & bugfix CUDA EwBinaryMat to handle mats of 1x1
corepointer Aug 19, 2024
b761c7e
[TODO] genkernel bugfix multi return ref datatype (dirty)
corepointer May 22, 2024
547d168
[TODO] possible problem: ErrorHandler does not check for null string
corepointer Jun 25, 2024
997a324
[TEMP] dnn cpp debug statements
corepointer Aug 19, 2024
5b2d890
[TEMP] UserConfig.json changes while debugging
corepointer Aug 19, 2024
afd7502
[LOCAL] test scripts
corepointer Aug 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions UserConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
"matmul_unroll_factor": 1,
"matmul_unroll_jam_factor": 4,
"matmul_num_vec_registers": 16,
"use_cuda": false,
"use_cuda": true,
"use_vectorized_exec": false,
"use_obj_ref_mgnt": true,
"cuda_fuse_any": false,
"use_mlir_codegen": false,
"vectorized_single_queue": false,
"debug_llvm": false,
"explain_kernels": false,
"explain_kernels": true,
"explain_llvm": false,
"explain_parsing": false,
"explain_parsing_simplified": false,
Expand All @@ -28,24 +28,33 @@
"numberOfThreads": -1,
"minimumTaskSize": 1,
"libdir": "{exedir}/../lib",
"daphnedsl_import_paths": {},
"force_cuda": false,
"daphnedsl_import_paths": {
"default_dirs": ["scripts/algorithms", "scripts"]
},
"force_cuda": true,
"logging": [
{ "log-level-limit": "ERROR" },
{ "log-level-limit": "INFO" },
{
"comment": "This configuration controls logging in the GPU compiler pass only",
"name": "compiler::cuda",
"level": "DEBUG",
"level": "INFO",
"filename": "compiler-trace-cuda.txt",
"format": "%^[%L %n]:%$ %v"
},
{
"comment": "general runtime CUDA debug log",
"name": "runtime::cuda",
"level": "INFO",
"level": "DEBUG",
"filename": "compiler-debug-cuda.txt",
"format": "%^[%L %n]:%$ %v"
},
{
"comment": "DAPHNE runtime logs",
"name": "runtime",
"level": "DEBUG",
"filename": "",
"format": "%^[%n %L]:%$ %v"
},
{
"comment": "DAPHNE compiler logs",
"name": "compiler",
Expand All @@ -56,14 +65,14 @@
{
"comment": "DAPHNE default",
"name": "default",
"level": "INFO",
"level": "DEBUG",
"filename": "",
"format": "%^[%n %L]:%$ %v"
},
{
"comment": "DAPHNE default",
"name": "parser",
"level": "INFO",
"level": "DEBUG",
"filename": "",
"format": "%^[%n %L]:%$ %v"
}
Expand Down
48 changes: 48 additions & 0 deletions local/test-bn2.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

import "nn/layers/batch_norm2d_dsl.daph" as "bn2d";
import "nn/layers/batch_norm2d_kernel.daph" as "bn2k";

msec_factor = as.f32(0.000001);

#input=\"/daphne/data/mnist20_features.csv\"
#X = readMatrix($input);

# num images
N = 1;
Himg = 8;
Wimg = 8;
#channels
C = 1;
# seed for random input generation
seed = 42;
mu = 0.1;
#eps = 1e-5;
eps = 0.00001;
print("eps: ",0);print(eps);
train = true;

gamma, beta, ema_mean, ema_var = bn2d.init(C);
print(gamma);
print(beta);
print(ema_mean);
print(ema_var);

X = rand(N, C * Himg * Wimg, 0.0, 256.0, 1.0, seed);
batch_size = 1;
X_batch = X[0:batch_size,];
print("X:");
print(X_batch);
#img1 = reshape(X[0,], Himg, Wimg);
#img = (img1 != 0);
#print(img1);

outBN, upd_ema_mean, upd_ema_var, cache_ema_mean, cache_ema_var =
bn2d.forward(X_batch, gamma, beta, C, Himg, Wimg, train, ema_mean, ema_var, mu, eps);

print("outBNd: ",0);
print(outBN);

outBNk, upd_ema_meank, upd_ema_vark, cache_ema_meank, cache_ema_vark =
bn2k.forward(X_batch, gamma, beta, C, Himg, Wimg, train, ema_mean, ema_var, mu, eps);
print("outBNk: ",0);
print(outBNk);
96 changes: 96 additions & 0 deletions local/test-dnn-ops.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@

import "nn/layers/conv2d_kernel.daph" as "conv2d";
import "nn/layers/conv2d.daph" as "conv2d_dsl"; #returns segFault (issue #77)

msec_factor = as.f32(0.000001);

X1 = readMatrix($input);

# num images
N = 1;
#Himg = 28;
#Wimg = 28;
Himg = 8;
Wimg = 8;
#channels
numC = 1;
# seed for random input generation
seed = 42;
X = rand(N, numC * Himg * Wimg, 0.0, 256.0, 1.0, seed);
batch_size = 1;
X_batch = X[0:batch_size,];
print("X:");
print(X_batch);
img1 = reshape(X[0,], Himg, Wimg);
#img = (img1 != 0);
print(img1);

numF = 1;
#Hf = 5;
#Wf = 5;
Hf = 3;
Wf = 3;

#W = fill(1.0, numF, numC * Hf * Wf);
#b = fill(0.0, numF, 1);
#print(sum(W));
#print(sum(b));

W, b = conv2d.init(numF, numC, Hf, Wf, seed);
print("W:");
imgW = reshape(W[0,], Hf, Wf);
#imgW = (imgW1 != 0);
print(imgW);

stride = 1;
pad = 2;
#X_conv, Hout, Wout = conv2d(X_batch, W, batch_size, numC, Himg, Wimg, Hf, Wf, stride, stride, pad, pad, b);

t_start_conv1 = now();
X_conv1, Hout1, Wout1 = conv2d.forward(X_batch, W, b, numC, Himg, Wimg, Hf, Wf, stride, stride, pad, pad);
t_start_conv2 = now();
X_conv2, Hout2, Wout2 = conv2d_dsl.forward(X_batch, W, b, numC, Himg, Wimg, Hf, Wf, stride, stride, pad, pad);
t_end_conv = now();

print("X dims: ",0) ;print(nrow(X),0);print("x",0);print(ncol(X));
print("X_conv1 dims: ",0) ;print(nrow(X_conv1),0);print("x",0);print(ncol(X_conv1));
print("X_conv2 dims: ",0) ;print(nrow(X_conv2),0);print("x",0);print(ncol(X_conv2));
print("conv1 output dims: ",0);print(Hout1,0);print("x",0);print(Wout1);
print("conv2 output dims: ",0);print(Hout2,0);print("x",0);print(Wout2);
print("Time conv1: ",0);print(as.f32((t_start_conv1 - t_start_conv1)) * msec_factor,0);print(" ms");
print("Time conv2: ",0);print(as.f32((t_end_conv - t_start_conv2)) * msec_factor,0);print(" ms");

print(X_conv1);
print(X_conv2);
print(sum(X_conv1));
print(sum(X_conv2));
print(nrow(X_conv1));
print(nrow(X_conv2));
print(ncol(X_conv1));
print(ncol(X_conv2));
print(Hout1);
print(Hout2);
print(Wout1);
print(Wout2);
//print(X_conv[0:1,0:100]);
#f1out = reshape(X_conv[0:1,0:784], 28, 28);
#print(f1out[3:4,]);
#print(sum(f1out));
stop("stopping early");

# seed for random input generation
seed = 42;

t_start = now();
X_relu2 = rand(N, 97280, 0.0, 256.0, 1.0, seed);

t_end_relu2 = now();
X_pool,Hpool,Wpool=max_pool2d(X_relu2,N,32,4,760,2,2,2,2,0,0);
t_end_pool = now();


print("X_pool dims: ",0) ;print(nrow(X_pool),0);print("x",0);print(ncol(X_pool));
print("Time maxpool2d: ",0);print(as.f32((t_end_pool - t_end_relu2)) * msec_factor,0);print(" ms");
t_end = now();
print("Time total: ",0);print(as.f32((t_end - t_start)) * msec_factor,0);print(" ms");

160 changes: 160 additions & 0 deletions scripts/examples/mnist_lenet-train.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Modifications Copyright 2024 The DAPHNE Consortium
#
#-------------------------------------------------------------

# This script has been manually translated from Apache SystemDS.

# MNIST LeNet - Train
#
# This script trains a convolutional net using the "LeNet" architecture
# on images of handwritten digits.
#
# Inputs:
# - train: File containing labeled MNIST training images.
# The format is "label, pixel_1, pixel_2, ..., pixel_n".
# - test: File containing labeled MNIST test images.
# The format is "label, pixel_1, pixel_2, ..., pixel_n".
# - C: Number of color chanels in the images.
# - Hin: Input image height.
# - Win: Input image width.
# - epochs: [DEFAULT: 10] Total number of full training loops over
# the full data set.
# - out_dir: [DEFAULT: "."] Directory to store weights and bias
# matrices of trained model, as well as final test accuracy.
# - fmt: [DEFAULT: "csv"] File format of `train` and `test` data.
# Options include: "csv", "mm", "text", and "binary".
#
# Outputs:
# - W1, W2, W3, W4: Files containing the trained weights of the model.
# - b1, b2, b3, b4: Files containing the trained biases of the model.
# - accuracy: File containing the final accuracy on the test data.
#
# Data:
# The MNIST dataset contains labeled images of handwritten digits,
# where each example is a 28x28 pixel image of grayscale values in
# the range [0,255] stretched out as 784 pixels, and each label is
# one of 10 possible digits in [0,9].
#
# Sample Invocation (running from outside the `nn` folder):
# 1. Download data (60,000 training examples, and 10,000 test examples)
# ```
# nn/examples/get_mnist_data.sh
# ```
#
# 2. Execute using Spark
# ```
# spark-submit --master local[*] --driver-memory 10G
# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
# $SYSTEMDS_ROOT/target/SystemDS.jar -f nn/examples/mnist_lenet-train.dml
# -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv
# C=1 Hin=28 Win=28 epochs=10 out_dir=nn/examples/model/mnist_lenet
# ```
#

import "nn/networks/lenet-train.daph" as "lenet";

# Read training data & settings
#fmt = ifdef($fmt, "csv")
#train = read($train, format=fmt)
#test = read($test, format=fmt)
train_X = readMatrix("./data/mnist20_features.csv");
train_Y = readMatrix("./data/mnist20_labels.csv");
#test_X = readMatrix("./data/mnist20_features.csv");
#test_Y = readMatrix("./data/mnist20_labels.csv");
#C = $C
#Hin = $Hin
#Win = $Win

#train_X = readMatrix("./data/mnist/mnist_features.csv");
#train_Y = readMatrix("./data/mnist/mnist_labels.csv");
#test_X = readMatrix("./data/mnist/mnist_features.csv");
#test_Y = readMatrix("./data/mnist/mnist_labels.csv");

C = 1;
Hin = 28;
Win = 28;
#epochs = ifdef($epochs, 10)
epochs = 2;
#out_dir = ifdef($out_dir, ".")
out_dir = ".";

# Extract images and labels
#images = train[,1:ncol(train)];
#labels = train[,1];
#X_test = test[,1:ncol(test)];
#Y_test = test[,1];

#images = train_X[0:5000,];
#labels = train_Y[0:5000,];
#X_test = train_X[5001:6000,];
#Y_test = train_Y[5001:6000,];
images = train_X[0:15,];
labels = train_Y[0:15,];
X_test = train_X[16:20,];
Y_test = train_Y[16:20,];

# Scale images to [-1,1], and one-hot encode the labels
n = nrow(images);
n_test = nrow(X_test);
images = (images / 255.0) * 2 - 1;
#labels = table(seq(1, n, 1), labels+1, n, 10);
#labels = oneHot(labels, fill(as.si64(nrow(labels)),1, ncol(labels)));
X_test = (X_test / 255.0) * 2 - 1;
#Y_test = table(seq(1, n_test, 1), Y_test+1, n_test, 10);
#Y_test = oneHot(Y_test, fill(nrow(Y_test),1, ncol(Y_test)));

# Split into training (55,000 examples) and validation (5,000 examples)
#X = images[5001:nrow(images),];
#X_val = images[1:5000,];
#Y = labels[5001:nrow(images),];
#Y_val = labels[1:5000,];
#X = images[1001:nrow(images) - 3000,];
#X = images[1001:nrow(images) - 3000,];
#X_val = images[0:1000,];
#Y = labels[11:nrow(images),];
#Y_val = labels[0:1000,];
X = images[0:nrow(images) - 3,];
X_val = images[nrow(images) - 3:nrow(images),];
Y = labels[0:nrow(images)-3,];
Y_val = labels[nrow(images) - 3:nrow(images),];


# Train
W1, b1, W2, b2, W3, b3, W4, b4 = lenet.train(X, Y, X_val, Y_val, C, Hin, Win, epochs);

# Write model out
write(W1, out_dir + "/W1.csv");
write(b1, out_dir + "/b1.csv");
write(W2, out_dir + "/W2.csv");
write(b2, out_dir + "/b2.csv");
write(W3, out_dir + "/W3.csv");
write(b3, out_dir + "/b3.csv");
write(W4, out_dir + "/W4.csv");
write(b4, out_dir + "/b4.csv");

# Eval on test set
probs = lenet.predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4);
loss, accuracy = lenet.eval(probs, Y_test);

# Output results
print("Test Accuracy: " + accuracy);
#write(accuracy, out_dir + "/accuracy.csv");
Loading
Loading