From 668a2da3978a76779b6948f49aac030853e87c26 Mon Sep 17 00:00:00 2001 From: Benjamin Cretois Date: Thu, 7 Mar 2024 11:10:53 +0100 Subject: [PATCH] [ADD] n_subsample to the support for distribution --- evaluate/_utils_compute.py | 16 ++++++------ evaluate/_utils_writing.py | 53 +++++++++++++++++++++++++++++++++++++- evaluate/evaluateDCASE.py | 31 ++++++---------------- 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/evaluate/_utils_compute.py b/evaluate/_utils_compute.py index defce1c..89d7cb7 100644 --- a/evaluate/_utils_compute.py +++ b/evaluate/_utils_compute.py @@ -71,16 +71,16 @@ def merge_preds(df, tolerence, tensor_length,frame_shift): result = df.groupby("group").agg({"Starttime": "min", "Endtime": "max"}) return result -def reshape_support(support_samples, tensor_length=128): +def reshape_support(support_samples, tensor_length=128, n_subsample=1): new_input = [] for x in support_samples: - #for _ in range(n_subsample): - if x.shape[1] > tensor_length: - rand_start = torch.randint(0, x.shape[1] - tensor_length, (1,)) - new_x = torch.tensor(x[:, rand_start : rand_start + tensor_length]) - new_input.append(new_x.unsqueeze(0)) - else: - new_input.append(torch.tensor(x)) + for _ in range(n_subsample): + if x.shape[1] > tensor_length: + rand_start = torch.randint(0, x.shape[1] - tensor_length, (1,)) + new_x = torch.tensor(x[:, rand_start : rand_start + tensor_length]) + new_input.append(new_x.unsqueeze(0)) + else: + new_input.append(torch.tensor(x)) all_supports = torch.cat([x for x in new_input]) return(all_supports) diff --git a/evaluate/_utils_writing.py b/evaluate/_utils_writing.py index cafabf2..e9d8dc6 100644 --- a/evaluate/_utils_writing.py +++ b/evaluate/_utils_writing.py @@ -134,4 +134,55 @@ def write_wav( result_wav = np.vstack( (arr, gt_labels, merged_pred, pred_labels , distances_to_pos / 10, z_scores_pos) ) - wavfile.write(output, target_fs, result_wav.T) \ No newline at end of file + wavfile.write(output, target_fs, result_wav.T) + +def plot_2_d_representation(): + import numpy as np + import matplotlib.pyplot as plt + from sklearn.manifold import TSNE + + # Assuming `prototypes`, `z_pos_supports`, `z_neg_supports`, `q_embeddings`, and `labels` are already defined + # Convert tensors to numpy arrays if they are in tensor format + # e.g., z_pos_supports = z_pos_supports.detach().numpy() + + # Create a labels array for all points + # Label for prototypes, positive supports, negative supports, and query embeddings respectively + prototypes_labels = np.array([2] * prototypes.shape[0]) # Assuming 2 is not used in `gt_labels` + pos_supports_labels = np.array([3] * z_pos_supports.shape[0]) # Assuming 3 is not used in `gt_labels` + neg_supports_labels = np.array([4] * z_neg_supports.shape[0]) # Assuming 4 is not used in `gt_labels` + q_embeddings = q_embeddings.detach().numpy() + gt_labels = labels.detach().numpy() + + # Concatenate everything into one dataset + feat = np.concatenate([prototypes, z_pos_supports, z_neg_supports, q_embeddings]) + all_labels = np.concatenate([prototypes_labels, pos_supports_labels, neg_supports_labels, gt_labels]) + + # Run t-SNE + tsne = TSNE(n_components=2, perplexity=30) + features_2d = tsne.fit_transform(feat) + + # Plot + plt.figure(figsize=(10, 8)) + # Define marker for each type of point + markers = {2: "P", 3: "o", 4: "X"} # P for prototypes, o for supports, X for negative supports + + for label in np.unique(all_labels): + # Plot each class with its own color and marker + idx = np.where(all_labels == label) + if label in markers: # Prototypes or supports + plt.scatter(features_2d[idx, 0], features_2d[idx, 1], label=label, alpha=1.0, marker=markers[label], s=100) # Larger size + else: # Query embeddings + plt.scatter(features_2d[idx, 0], features_2d[idx, 1], label=label, alpha=0.5, s=50) # Smaller size, more transparent + + plt.legend() + plt.title('t-SNE visualization of embeddings, prototypes, and supports') + plt.xlabel('Dimension 1') + plt.ylabel('Dimension 2') + plt.grid(True) + + fig_name = os.path.basename(support_spectrograms).split("data_")[1].split(".")[0] + ".png" + output = os.path.join(target_path, fig_name) + + # Save the figure + plt.savefig(output, bbox_inches="tight") + plt.show() \ No newline at end of file diff --git a/evaluate/evaluateDCASE.py b/evaluate/evaluateDCASE.py index 161dde5..5400c5d 100644 --- a/evaluate/evaluateDCASE.py +++ b/evaluate/evaluateDCASE.py @@ -100,7 +100,9 @@ def compute( # GET EMBEDDINGS FOR THE NEG SAMPLES # ###################################### support_samples_neg = df_support[df_support["category"] == "NEG"]["feature"].to_numpy() - support_samples_neg = reshape_support(support_samples_neg, tensor_length=cfg["data"]["tensor_length"]) + support_samples_neg = reshape_support(support_samples_neg, + tensor_length=cfg["data"]["tensor_length"], + n_subsample=cfg["predict"]["n_subsample"]) z_neg_supports, _ = model.get_embeddings(support_samples_neg, padding_mask=None) ### Get the query dataset ### @@ -200,28 +202,6 @@ def compute( ################################################ if cfg["plot"]["tsne"]: - from sklearn.manifold import TSNE - import seaborn as sns - - prototypes=prototypes.detach().numpy() - z_pos_supports = z_pos_supports.detach().numpy() - z_neg_supports = z_neg_supports.detach().numpy() - q_embeddings = q_embeddings.detach().numpy() - gt_labels = labels - other_labels = np.concatenate(([0,1], np.repeat(1, z_pos_supports.shape(0)), np.repeat(0, z_neg_supports.shape(0))), axis=None) - - feat = np.concatenate([q_embeddings, prototypes, z_pos_supports, z_neg_supports]) - tsne = TSNE(n_components=2, perplexity=5) - features_2d = tsne.fit_transform(feat) - - # Do the figure! - fig = sns.scatterplot(x=features_2d[:, 0], y=features_2d[:, 1], hue=labels) - sns.move_legend(fig, "upper left", bbox_to_anchor=(1, 1)) - - fig_name = os.path.basename(support_spectrograms).split("data_")[1].split(".")[0] + ".png" - output = os.path.join(target_path, fig_name) - fig.get_figure().savefig(output, bbox_inches="tight") - import numpy as np import matplotlib.pyplot as plt from sklearn.manifold import TSNE @@ -235,6 +215,8 @@ def compute( prototypes_labels = np.array([2] * prototypes.shape[0]) # Assuming 2 is not used in `gt_labels` pos_supports_labels = np.array([3] * z_pos_supports.shape[0]) # Assuming 3 is not used in `gt_labels` neg_supports_labels = np.array([4] * z_neg_supports.shape[0]) # Assuming 4 is not used in `gt_labels` + q_embeddings = q_embeddings.detach().numpy() + gt_labels = labels.detach().numpy() # Concatenate everything into one dataset feat = np.concatenate([prototypes, z_pos_supports, z_neg_supports, q_embeddings]) @@ -263,6 +245,9 @@ def compute( plt.ylabel('Dimension 2') plt.grid(True) + fig_name = os.path.basename(support_spectrograms).split("data_")[1].split(".")[0] + ".png" + output = os.path.join(target_path, fig_name) + # Save the figure plt.savefig(output, bbox_inches="tight") plt.show()