Merge pull request #392 from leondavi/new_confusion_natrix

New confusion natrix
leondavi · Aug 12, 2024 · cdbb932 · cdbb932
2 parents 9bada32 + 0122573
commit cdbb932
Show file tree

Hide file tree

Showing 5 changed files with 327 additions and 11 deletions.
diff --git a/inputJsonsFiles/ConnectionMap/conn_EEG_5Router8Clients3Source_EEG.json b/inputJsonsFiles/ConnectionMap/conn_EEG_5Router8Clients3Source_EEG.json
@@ -0,0 +1,10 @@
+{
+    "connectionsMap":
+    {
+        "r1":["mainServer", "c1", "c2", "r2"],
+        "r2":["c3","c4", "s1", "r3"],
+        "r3":["c5","c6", "r4", "r1"],
+        "r4":["s2","c7", "r5", "r2"],
+        "r5":["s3","c8", "r1"]
+    }
+}
diff --git a/inputJsonsFiles/DistributedConfig/dc_EEG_8d_8c_3s_5r_8w_RR.json b/inputJsonsFiles/DistributedConfig/dc_EEG_8d_8c_3s_5r_8w_RR.json
@@ -0,0 +1,221 @@
+{
+    "nerlnetSettings": {
+        "frequency": "5",
+        "batchSize": "10"
+    },
+    "mainServer": {
+        "port": "8900",
+        "args": ""
+    },
+    "apiServer": {
+        "port": "8901",
+        "args": ""
+    },
+    "devices": [
+        {
+            "name": "c0VM5",
+            "ipv4": "10.0.0.11",
+            "entities": "apiServer,mainServer"
+        },
+        {
+            "name": "c0VM4",
+            "ipv4": "10.0.0.10",
+            "entities": "c3"
+        },
+        {
+            "name": "c0VM6",
+            "ipv4": "10.0.0.8",
+            "entities": "c1,r3"
+        },
+        {
+            "name": "c0VM7",
+            "ipv4": "10.0.0.12",
+            "entities": "c2,r4"
+        },
+        {
+            "name": "nerlSpilke0",
+            "ipv4": "10.0.0.32",
+            "entities": "s2,r5,c4"
+        },
+        {
+            "name": "nerlSpilke1",
+            "ipv4": "10.0.0.33",
+            "entities": "s3,c5"
+        },
+        {
+            "name": "nerlSpilke2",
+            "ipv4": "10.0.0.34",
+            "entities": "s1,r2,c6"
+        },
+        {
+            "name": "nerlSpilke3",
+            "ipv4": "10.0.0.35",
+            "entities": "r1,c7,c8"
+        }
+    ],
+    "routers": [
+        {
+            "name": "r1",
+            "port": "8902",
+            "policy": "0"
+        },
+        {
+            "name": "r2",
+            "port": "8903",
+            "policy": "0"
+        },
+        {
+            "name": "r3",
+            "port": "8904",
+            "policy": "0"
+        },
+        {
+            "name": "r4",
+            "port": "8905",
+            "policy": "0"
+        },
+        {
+            "name": "r5",
+            "port": "8906",
+            "policy": "0"
+        }
+    ],
+    "sources": [
+        {
+            "name": "s1",
+            "port": "8907",
+            "frequency": "5",
+            "policy": "0",
+            "epochs": "1",
+            "type": "0"
+        },
+        {
+            "name": "s2",
+            "port": "8908",
+            "frequency": "5",
+            "policy": "0",
+            "epochs": "1",
+            "type": "0"
+        },
+        {
+            "name": "s3",
+            "port": "8909",
+            "frequency": "5",
+            "policy": "0",
+            "epochs": "1",
+            "type": "0"
+        }
+    ],
+    "clients": [
+        {
+            "name": "c1",
+            "port": "8910",
+            "workers": "w1"
+        },
+        {
+            "name": "c2",
+            "port": "8911",
+            "workers": "w2"
+        },
+        {
+            "name": "c3",
+            "port": "8912",
+            "workers": "w3"
+        },
+        {
+            "name": "c4",
+            "port": "8913",
+            "workers": "w4"
+        },
+        {
+            "name": "c5",
+            "port": "8914",
+            "workers": "w5"
+        },
+        {
+            "name": "c6",
+            "port": "8915",
+            "workers": "w6"
+        },
+        {
+            "name": "c7",
+            "port": "8916",
+            "workers": "w7"
+        },
+        {
+            "name": "c8",
+            "port": "8917",
+            "workers": "w8"
+        }
+    ],
+    "workers": [
+        {
+            "name": "w1",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w2",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w3",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w4",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w5",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w6",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w7",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w8",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        }
+    ],
+    "model_sha": {
+        "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa": {
+            "modelType": "0",
+            "_doc_modelType": " nn:0 | approximation:1 | classification:2 | forecasting:3 | image-classification:4 | text-classification:5 | text-generation:6 | auto-association:7 | autoencoder:8 | ae-classifier:9 |",
+            "modelArgs": "",
+            "layersSizes": "70x1x1k5x1x1x64p0s1t0,66x1x64k2x1p0s1,65x1x64k5x1x64x64p0s1t0,61x1x64k2x1p0s1,60x1x64k5x1x64x32p0s1t0,1,32,16,9",
+            "_doc_layersSizes": "List of postive integers [L0, L1, ..., LN]",
+            "layerTypesList": "2,4,2,4,2,9,3,3,3",
+            "_doc_LayerTypes": " Default:0 | Scaling:1 | CNN:2 | Perceptron:3 | Pooling:4 | Probabilistic:5 | LSTM:6 | Reccurrent:7 | Unscaling:8 | Flatten:9 | Bounding:10 |",
+            "layers_functions": "11,2,11,2,11,1,6,6,11",
+            "_doc_layers_functions_activation": " Threshold:1 | Sign:2 | Logistic:3 | Tanh:4 | Linear:5 | ReLU:6 | eLU:7 | SeLU:8 | Soft-plus:9 | Soft-sign:10 | Hard-sigmoid:11 |",
+            "_doc_layer_functions_pooling": " none:1 | Max:2 | Avg:3 |",
+            "_doc_layer_functions_probabilistic": " Binary:1 | Logistic:2 | Competitive:3 | Softmax:4 |",
+            "_doc_layer_functions_scaler": " none:1 | MinMax:2 | MeanStd:3 | STD:4 | Log:5 |",
+            "lossMethod": "2",
+            "_doc_lossMethod": " SSE:1 | MSE:2 | NSE:3 | MinkowskiE:4 | WSE:5 | CEE:6 |",
+            "lossArgs": "",
+    	    "_doc_lossArgs": "Arguments to loss function. Regularization: reg=L2, reg=L1, reg=NoRegularization (can be also empty)",
+            "lr": "0.00001",
+            "_doc_lr": "Positve float",
+            "epochs": "1",
+            "_doc_epochs": "Positve Integer",
+            "optimizer": "5",
+            "_doc_optimizer": " GD:0 | CGD:1 | SGD:2 | QuasiNeuton:3 | LVM:4 | ADAM:5 |",
+            "optimizerArgs": "",
+            "_doc_optimizerArgs": "String",
+            "infraType": "0",
+            "_doc_infraType": " opennn:0 | wolfengine:1 |",
+            "distributedSystemType": "0",
+            "_doc_distributedSystemType": " none:0 | fedClientAvg:1 | fedServerAvg:2 |",
+            "distributedSystemArgs": "",
+            "_doc_distributedSystemArgs": "String",
+            "distributedSystemToken": "none",
+            "_doc_distributedSystemToken": "Token that associates distributed group of workers and parameter-server"
+        }
+    }
+}
diff --git a/inputJsonsFiles/experimentsFlow/exp_EEG_3s_8w_3people_RR.json b/inputJsonsFiles/experimentsFlow/exp_EEG_3s_8w_3people_RR.json
@@ -0,0 +1,69 @@
+{
+    "experimentName": "EEG_Valence_Recognition_DEAP",
+    "experimentType": "classification",
+    "batchSize": 10,
+    "csvFilePath": "/home/nerlnet/workspace/1_3_persons_normalize_bins_valence.csv", 
+    "numOfFeatures": "70",
+    "numOfLabels": "9",
+    "headersNames": "1,2,3,4,5,6,7,8,9",
+    "Phases": 
+
+    [
+    {
+        "phaseName": "training_phase",
+        "phaseType": "training",
+        "sourcePieces":
+        [
+            {
+            "sourceName": "s1",
+            "startingSample": "0",
+            "numOfBatches": "20",
+            "workers": "w1,w4,w7",
+            "nerltensorType": "float"
+            },
+            {
+            "sourceName": "s2",
+            "startingSample": "19520",
+            "numOfBatches": "20",
+            "workers": "w2,w5,w8",
+            "nerltensorType": "float"
+            },
+            {
+            "sourceName": "s3",
+            "startingSample": "39040",
+            "numOfBatches": "20",
+            "workers": "w3,w6",
+            "nerltensorType": "float"
+            }
+        ]  
+    },
+    {
+        "phaseName": "prediction_phase",
+        "phaseType": "prediction",
+        "sourcePieces":
+        [
+            {
+            "sourceName": "s1",
+            "startingSample": "15610",
+            "numOfBatches": "2",
+            "workers": "w1,w4,w7",
+            "nerltensorType": "float"
+            },
+            {
+            "sourceName": "s2",
+            "startingSample": "35130",
+            "numOfBatches": "2",
+            "workers": "w2,w5,w8",
+            "nerltensorType": "float"
+            },
+            {
+            "sourceName": "s3",
+            "startingSample": "54650",
+            "numOfBatches": "2",
+            "workers": "w3,w6",
+            "nerltensorType": "float"
+            }           
+        ]    
+    }
+]
+}
diff --git a/src_py/apiServer/experiment_flow_debug.py b/src_py/apiServer/experiment_flow_debug.py
@@ -20,8 +20,8 @@ def print_test(in_str : str):
 api_server_instance.download_dataset(TEST_DATASET_IDX)
 #api_server_instance.help()
 api_server_instance.showJsons()
-dc_idx = 2
-conn_idx = 21
+dc_idx = 6
+conn_idx = 25
 exp_idx = 3
 api_server_instance.setJsons(dc_idx, conn_idx, exp_idx)
 dc_json , connmap_json, exp_flow_json = api_server_instance.getUserJsons()

diff --git a/src_py/apiServer/stats.py b/src_py/apiServer/stats.py
@@ -225,19 +225,35 @@ def build_worker_label_df(original_df, batch_ids, batch_size):
                         confusion_matrix_worker_dict[(worker_name, class_name)] += confusion_matrix
 
                 else: # Multi-Class
-                    # Take 2 list from the df, one for the actual labels and one for the predict labels to build the confusion matrix
-                    max_column_predict_index = df_worker_labels.iloc[:, num_of_labels:].idxmax(axis=1) 
-                    max_column_predict_index = max_column_predict_index.tolist() 
-                    max_column_predict_index = [int(predict_index) - num_of_labels for predict_index in max_column_predict_index] # fix the index to original labels index
-                    max_column_labels_index = df_worker_labels.iloc[:, :num_of_labels].idxmax(axis=1)
-                    max_column_labels_index = max_column_labels_index.tolist()
+                    #check if there is a sample with more than one predicted label
+                    max_in_row = df_worker_labels.iloc[:, num_of_labels:].max(axis=1)
+                    is_max = df_worker_labels.iloc[:, num_of_labels:].eq(max_in_row, axis=0)  #Get a DataFrame of boolean values where True indicates the maximum value in that row
+                    max_counts = is_max.sum(axis=1)    #Get the number of maximum values in each row
+                    has_multiple_max = max_counts.gt(1).any()  #boolean value: checks if there is at least one row with multiple maximum values in the predict labels
+
+                    if has_multiple_max:
+                        LOG_INFO(f"Worker {worker_name} has at least one sample with multiple predicted labels")
+                        max_column_predict_index = is_max.apply(lambda row: list(row[row].index) if row.any() else [-1], axis=1).tolist()   # Generate a list of lists' each sublist has the index of the maximum value in the row
+                        max_column_predict_index =[[int(predict_label) - num_of_labels for predict_label in prdict_indexes_sublist] for prdict_indexes_sublist in max_column_predict_index]  # fix the index to original labels index
+                        max_column_labels_index = df_worker_labels.iloc[:, :num_of_labels].idxmax(axis=1).tolist()  # Get the index of the maximum actual value in each row
+
+                    else:            # No sample with multiple predicted labels
+                        # Take 2 lists from the df, one for the actual labels and one for the predict labels to build the confusion matrix
+                        max_column_predict_index = df_worker_labels.iloc[:, num_of_labels:].idxmax(axis=1) 
+                        max_column_predict_index = max_column_predict_index.tolist() 
+                        max_column_predict_index = [int(predict_index) - num_of_labels for predict_index in max_column_predict_index] # fix the index to original labels index
+                        max_column_labels_index = df_worker_labels.iloc[:, :num_of_labels].idxmax(axis=1)
+                        max_column_labels_index = max_column_labels_index.tolist()
 
                     # building confusion matrix for each class
                     for class_index, class_name in enumerate(self.headers_list):
+                        if has_multiple_max:
+                            class_predict_list = [1 if class_index in row_max_list else 0 for row_max_list in max_column_predict_index]
+                        else:
+                            class_predict_list = [1 if label_num == class_index else 0 for label_num in max_column_predict_index]   # 1 if the label is belong to the class, 0 otherwise
                         class_actual_list = [1 if label_num == class_index else 0 for label_num in max_column_labels_index]   # 1 if the label is belong to the class, 0 otherwise
-                        class_predict_list = [1 if label_num == class_index else 0 for label_num in max_column_predict_index]   # 1 if the label is belong to the class, 0 otherwise
-                        confusion_matrix = metrics.confusion_matrix(class_actual_list, class_predict_list)  
-                        #confusion_matrix_np = confusion_matrix.to_numpy()
+                        labels = [0, 1]
+                        confusion_matrix = metrics.confusion_matrix(class_actual_list, class_predict_list, labels=labels)  
                         confusion_matrix_source_dict[(source_name, worker_name, class_name)] = confusion_matrix
                         if (worker_name, class_name) not in confusion_matrix_worker_dict:
                             confusion_matrix_worker_dict[(worker_name, class_name)] = confusion_matrix