Merge pull request #25 from simon-ball/master

Modified RateMap, new PopulationVector, fixed Coverage
kavli-ntnu · Dec 10, 2019 · f48eb4b · f48eb4b
2 parents a5f25f4 + fa4eaa4
commit f48eb4b
Show file tree

Hide file tree

Showing 9 changed files with 312 additions and 23 deletions.
diff --git a/opexebo/__init__.py b/opexebo/__init__.py
@@ -18,4 +18,4 @@
 
 __author__ = """Simon Ball"""
 __email__ = 'simon.ball@ntnu.no'
-__version__ = '0.3.5'
+__version__ = '0.4.0'
diff --git a/opexebo/analysis/__init__.py b/opexebo/analysis/__init__.py
@@ -12,12 +12,16 @@
 from opexebo.analysis.tuningCurve import tuning_curve
 from opexebo.analysis.tuningCurveStats import tuning_curve_stats
 
+# Miscellanious
+from opexebo.analysis.populationVectorCorrelation import population_vector_correlation
+
 # Experimental
 from opexebo.analysis.speedScore import speed_score
 from opexebo.analysis.borderCoverage import border_coverage
 from opexebo.analysis.borderScore import border_score
 
 __all__ = ["spatial_occupancy", "rate_map", "rate_map_stats", "rate_map_coherence",
         "grid_score", "grid_score_stats", "autocorrelation", "place_field", 
-           "angular_occupancy", "tuning_curve", "tuning_curve_stats",
+           "angular_occupancy", "tuning_curve", "tuning_curve_stats", 
+           "population_vector_correlation", 
            "border_coverage", "border_score", "speed_score"]
diff --git a/opexebo/analysis/angularOccupancy.py b/opexebo/analysis/angularOccupancy.py
@@ -70,8 +70,8 @@ def angular_occupancy(time, angle, **kwargs):
     frame_duration = np.mean(np.diff(time))
     masked_angle_seconds = masked_angle_histogram * frame_duration
 
-    # Calculate the fractional coverage based on the mask. Since the mask is 
-    # False where the animal HAS gone, invert it first (just for this calculation)
-    coverage = np.sum(np.logical_not(masked_angle_seconds.mask)) / masked_angle_seconds.size
+    # Calculate the fractional coverage based on locations where the histogram
+    # is zero. If all locations are  non-zero, then coverage is 1.0
+    coverage = np.count_nonzero(angle_histogram) / masked_angle_seconds.size
 
     return masked_angle_seconds, coverage, bin_edges
diff --git a/opexebo/analysis/populationVectorCorrelation.py b/opexebo/analysis/populationVectorCorrelation.py
@@ -0,0 +1,229 @@
+"""
+Provide function for population vector correlation calculation
+"""
+import numpy as np
+
+
+def population_vector_correlation(stack_0, stack_1, **kwargs):
+    """Calculates the bin-wise correlation between two stacks of rate maps
+    
+    Each stack corresponds to a separate Task, or trial. Each layer is the 
+    ratemap for a single cell from that Task. The same units should be given in
+    the same order in each stack.
+    
+    Take a single column through the stack (i.e. 1 single bin/location in
+    arena, with a firing rate for each cell), from each stack
+    
+    In the original MatLab implementation, three output modes were supported:
+    - 1D: (1x numYbins)
+        iterate over i
+            Take a 2D slice from each stack - all cells at all X positions at a
+            single Y position i
+            Reshape from 2D to 1D 
+            Calculate the Pearson correlation coefficient between the two 1D
+            arrays
+            The value of pv_corr[i] is the Pearson correlation coefficient
+            arising from Y position i
+    - 2D (numXbins x numYbins)
+        iterate over i
+            Take a 2D slice from each stack - all cells at all X positions at a
+            single Y position i
+            Calculate the 2D array (numXbins x numYbins) where the [j,k]th
+            value is the Pearson correlation coefficient between all
+            observations at the j'th X location in stack_left and the k'th
+            location in stack_right
+            The i'th row of pv_corr is the DIAGONAL of the correlation matrix
+            i.e. where j==k i.e. the correlation of the the SAME location in
+            each stack for all observations (numCells)
+    - 3D (numXbins x numYbins x iteration(=numYbins))
+        Same as 2D BUT take the whole correlation matrix, not the diagonal
+        i.e. the full [j,k] correlatio between all X locations
+    
+    A note on correlation in Numpy vs Matlab
+        Matlab's `corr(a, b)` function returns the correlation of ab
+        Numpy's `corrcoef` function returns the normalised covariance matrix,
+        which is:
+                aa  ab
+                ba  aa
+        The normalised covariance matrix *should* be hermitian, but due to
+        floating point accuracy, this is not actually guaranteed
+        the MatLab function can be reproduced by taking either [0, 1] or [1,0]
+        of the normalised covariance matrix. 
+    
+        If `a`, `b` are 2D matricies, then they should have shape
+            (num_variables, num_observations)
+        In the case of this function, where the iterator is over the Y values
+        of the rate map, that means:
+            (x_bins, num_cells)
+            
+
+    Parameters
+    ----------
+    stack_0 : 3D array -or- list of 2D arrays
+    stack_1 : 3D array -or- list of 2D arrays
+        stack_x[i] should return the i'th ratemap. This corresponds to a 
+        constructor like:
+            np.zeros(num_layers, y_bins, x_bins)
+        Alternatively, a list or tuple of 2D arrays may be supplied:
+            stack_x = (ratemap_0, ratemap_1, ratemap_2, ...)
+    row_major : bool
+        Direction of iteration. If True, then each row is iterated over in turn
+        and correlation is calculated per row. 
+        If false, then each column is iterated over in turn, and correlation is 
+        calculated per column. 
+        Default True (same behavior as in BNT)
+            
+
+    Returns
+    -------
+    (p1, p2, p3)
+    p1 : np.ndarray (1D, iterator x 1)
+        Array of Pearson correlation coefficients. i'th value is given by the 
+        correlation of the i'th flattened slice of stack_0 to the i'th
+        flattened slice  of stack_1
+    p2 : np.ndarray (2D, iterator x non-iterator)
+        
+        
+        
+
+    See Also
+    --------
+    BNT.+analyses.populationVectorCorrelation
+
+    Copyright (C) 2019 by Simon Ball
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 3 of the License, or
+    (at your option) any later version.
+    """
+    debug = kwargs.get("debug", False)
+    row_major = kwargs.get("row_major", True)
+
+    # Perform input validation and ensure we have a pair of 3D arrays
+    stack_0, stack_1 = _handle_both_inputs(stack_0, stack_1)
+
+    # _handle_ has ensured that both arrays meet the shape/type requirements
+    # Hardcode iterating over Y for now. 
+    num_cells, y_bins, x_bins = stack_0.shape
+    if row_major:
+        iterator = y_bins
+        non_iterator = x_bins
+    else:
+        iterator = x_bins
+        non_iterator = y_bins
+
+    if debug:
+        print(f"Number of ratemaps: {num_cells}")
+        print(f"Ratemap dimensions: {y_bins} x {x_bins}")
+        print(f"Iterating over axis length {iterator} (row_major is {row_major})")
+
+    p1 = np.zeros(iterator)
+    p2 = np.zeros((iterator, non_iterator))
+    p3 = np.zeros((iterator, non_iterator, non_iterator))
+
+    for i in range(iterator):
+        if row_major:
+            left = stack_0[:, i, :].transpose()
+            right = stack_1[:, i, :].transpose()
+        else:
+            left = stack_0[:, :, i].transpose()
+            right = stack_1[:, :, i].transpose()
+
+        # 1D
+        # Reshape 2D array to a 1D array
+        correlation_value = np.corrcoef(left.flatten(), right.flatten())[0,1]
+        p1[i] = correlation_value
+
+        # 2D, 3D
+        correlation_matrix = np.corrcoef(left, right)[0:non_iterator, non_iterator:]
+        p2[i, :] = np.diagonal(correlation_matrix)
+        p3[i, :, :] = correlation_matrix
+
+    return (p1, p2, p3)
+
+
+
+
+
+
+
+
+
+
+
+
+###############################################################################
+#############
+#############           Error checking
+#############
+
+
+def _handle_both_inputs(stack_0, stack_1):
+    '''Handle error checking across both main inputs'''
+    stack_0 = _handle_single_input(stack_0, 0)
+    stack_1 = _handle_single_input(stack_1, 1)
+    if stack_0.shape[0] != stack_1.shape[0]:
+        raise ValueError("You have a different number of rate maps in each stack.")
+    if stack_0.shape[1:] != stack_1.shape[1:]:
+        raise ValueError("Your rate maps do not have matching dimensions")
+    return stack_0, stack_1
+
+def _handle_single_input(stack, i):
+    '''Handle the input stack(s) and provide a correctly formatted 3D array
+    
+    Handle error checking for a variety of conditions for a single stack
+    If not already a MaskedArray, then convert to that
+    
+    Parameters
+    ----------
+    stack : array-like
+        One of main inputs to population_vector_correlation.
+        Should be either a 3D array, where each layer (stack[j]) is a RateMap,
+        OR a list of 2D arrays, where each array is a 2D RateMap. 
+        If a list of arrays, all arrays must be the same dimension
+    i : int
+        Index of stack input, solely used for providing more meaningful error
+        message
+
+    Returns
+    -------
+    stack : np.ma.MaskedArray
+        3D array of RateMaps, masked at invalid values
+    '''
+    dims = None
+    t = type(stack)
+    if t not in (list, tuple, np.ndarray, np.ma.MaskedArray):
+        raise ValueError(f"Stack_{i} must be array-like. You provided {t}")
+    elif t in (tuple, list):
+        for element in stack:
+            e = type(element)
+            if e not in (np.ndarray, np.ma.MaskedArray):
+                raise ValueError(f"The elements of the list stack_{i} must be"\
+                                 f" NumPy arrays. You provided {e}")
+            if dims is None:
+                dims = element.shape
+            else:
+                if element.shape != dims:
+                    raise ValueError(f"Your ratemaps are not a consistent"\
+                                     f" shape in stack_{i}")
+        # Passes error handling, now convert from list to masked array
+        stack = np.ma.masked_invalid(stack)
+    elif isinstance(stack, np.ndarray):
+        # Ok, but convert to masked array
+        stack = np.ma.masked_invalid(stack)
+        dims = stack.shape[1:]
+    else:
+        # Instance is already a Masked Array
+        dims = stack.shape[1:]
+    return stack
+
+if __name__ == "__main__":
+    num_cells = 24
+    num_bins = 10
+    stack_0 = np.random.rand(num_cells, num_bins, num_bins+1)
+    stack_1 = np.random.rand(num_cells, num_bins, num_bins+1)
+    p = population_vector_correlation(stack_0, stack_1, debug=True, row_major=False)
+    print(p[2].shape)
+    plt.imshow(p[1])
+
diff --git a/opexebo/analysis/rateMap.py b/opexebo/analysis/rateMap.py
@@ -4,10 +4,10 @@
 import numpy as np
 
 import opexebo
+import opexebo.defaults as default
 
 
-
-def rate_map(occupancy_map, spikes, **kwargs):
+def rate_map(occupancy_map, spikes_tracking, **kwargs):
     ''' Calculate the spatially correlated firing rate map
     
     The rate map is calculated by the number of spikes in a bin divided by the
@@ -21,13 +21,17 @@ def rate_map(occupancy_map, spikes, **kwargs):
     Parameters
     ----------
     occupancy_map : np.ndarray or np.ma.MaskedArray
-        Nx1 or Nx2 array of time spent by animal in each bin
+        Nx1 or Nx2 array of time spent by animal in each bin, with time in bins
     spikes : np.ndarray
-        Nx2 or Nx3 array of spike positions: [t, x] or [t, x, y]. Must have the
-        same dimensionality as positions (i.e. 1d, 2d)
+        Nx3 or Nx4 array of spikes tracking: [t, speed, x] or [t, speed, x, y].
+        Speeds are used for the same purpose as in Spatialoccupancy - spikes
+        occurring with an instaneous speed below the threshold are discarded
     **kwargs
         bin_width : float. 
             Bin size in cm. Bins are always assumed square default 2.5 cm.
+        speed_cutoff    : float. 
+            Timestamps with instantaneous speed beneath this value are ignored. 
+            Default 0
         arena_size : float or tuple of floats. 
             Dimensions of arena (in cm)
             For a linear track, length
@@ -67,16 +71,16 @@ def rate_map(occupancy_map, spikes, **kwargs):
         raise ValueError("Occupancy Map not provided in usable format. Please"\
             " provide either a Numpy ndarray or Numpy MaskedArray. You"\
             " provided %s." % type(occupancy_map))
-    if type(spikes) not in (np.ndarray, np.ma.MaskedArray) :
+    if type(spikes_tracking) not in (np.ndarray, np.ma.MaskedArray) :
         raise ValueError("spikes not provided in usable format. Please"\
             " provide either a Numpy ndarray or Numpy MaskedArray. You"\
-            " provided %s." % type(spikes))
+            " provided %s." % type(spikes_tracking))
 
     dims_p = occupancy_map.ndim
-    dims_s, num_samples_s = spikes.shape
-    if dims_s-1 != dims_p:
+    dims_s, num_samples_s = spikes_tracking.shape
+    if dims_s-2 != dims_p:
         raise ValueError("Spikes must have the same number of spatial"\
-            " dimensions as positions ([t,x] or [t, x, y]). You have provided"\
+            " dimensions as positions ([t, s, x] or [t, s, x, y]). You have provided"\
             " %d columns of spikes, and %d columns of positions" % (dims_s, dims_p))
 
     if "arena_size" not in kwargs:
@@ -86,8 +90,20 @@ def rate_map(occupancy_map, spikes, **kwargs):
     if type(occupancy_map) == np.ndarray:
         occupancy_map = np.ma.MaskedArray(occupancy_map)
 
+    speed_cutoff = kwargs.get("speed_cutoff", default.speed_cutoff)
+
+    times = spikes_tracking[0, :] # never actually used
+    speeds = spikes_tracking[1, :]
+    good_speeds = speeds>speed_cutoff
+    if dims_s == 3:
+        spikes = spikes_tracking[2, :][good_speeds]
+    elif dims_s == 4:
+        spikes_x = spikes_tracking[2, :][good_speeds]
+        spikes_y = spikes_tracking[3, :][good_speeds]
+        spikes = np.array((spikes_x, spikes_y))
+
     # Histogram of spike positions
-    spike_map = opexebo.general.accumulate_spatial(spikes[1:,:], **kwargs)[0]
+    spike_map = opexebo.general.accumulate_spatial(spikes, **kwargs)[0]
 
     if spike_map.shape != occupancy_map.shape:
         raise ValueError("Rate Map and Occupancy Map must have the same"\