Skip to content

Commit

Permalink
made type definitions in cython match numpy types explicitly
Browse files Browse the repository at this point in the history
  • Loading branch information
HeikoSchuett committed Jul 2, 2024
1 parent 3a95b2f commit 45998f6
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 68 deletions.
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from setuptools import setup, Extension
import setuptools_scm # noqa # pylint: disable=unused-import
from Cython.Build import build_ext
import numpy


setup(
ext_modules = [
ext_modules=[
Extension(
"rsatoolbox.cengine.similarity",
["src/rsatoolbox/cengine/similarity.pyx"])],
["src/rsatoolbox/cengine/similarity.pyx"],
include_dirs=[numpy.get_include()])],
cmdclass={'build_ext': build_ext}
)
130 changes: 67 additions & 63 deletions src/rsatoolbox/cengine/similarity.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@ from cython.view cimport array as cvarray
from libc.math cimport log, sqrt, isnan, NAN
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
cimport scipy.linalg.cython_blas as blas
cimport numpy as cnp

cnp.import_array()

ctypedef cnp.int64_t int_t
ctypedef cnp.float64_t float_t

@cython.boundscheck(False)
@cython.cdivision(True)
cpdef double [:] calc(
double [:, :] data, long [:] desc,
long [:] cv_desc, int n,
int method_idx, double [:, :] noise=None,
double prior_lambda=1, double prior_weight=0.1,
cpdef float_t [:] calc(
float_t [:, :] data, int_t [:] desc,
int_t [:] cv_desc, int n,
int method_idx, float_t [:, :] noise=None,
float_t prior_lambda=1, float_t prior_weight=0.1,
int weighting=1, int crossval=0):
# calculates an RDM from a double array of data with integer descriptors
# There are no checks or saveguards in this function!
Expand All @@ -37,17 +42,17 @@ cpdef double [:] calc(
# 0: each row has equal weight
# 1: rows weighted by number of valid measurements
cdef:
double [:] vec_i
double [:] vec_j
double weight, sim
double [:] weights
double [:] values
float_t [:] vec_i
float_t [:] vec_j
float_t weight, sim
float_t [:] weights
float_t [:] values
int i, j, idx
int n_rdm = (n * (n-1)) / 2
int n_dim = data.shape[1]
double prior_lambda_l = prior_lambda * prior_weight
double prior_weight_l = 1 + prior_weight
double [:, :] log_data
float_t prior_lambda_l = prior_lambda * prior_weight
float_t prior_weight_l = 1 + prior_weight
float_t [:, :] log_data
if (method_idx > 4) or (method_idx < 1):
raise ValueError('dissimilarity method not recognized!')
# precompute stuff for poisson KL
Expand All @@ -58,8 +63,8 @@ cpdef double [:] calc(
for j in range(n_dim):
data[i, j] = (data[i, j] + prior_lambda_l) / prior_weight_l
log_data[i, j] = log(data[i, j])
weights = <double [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(double))
values = <double [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(double))
weights = <float_t [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(float_t))
values = <float_t [:(n_rdm+n)]> PyMem_Malloc((n_rdm+n) * sizeof(float_t))
for idx in range(n_rdm + n):
weights[idx] = 0
values[idx] = 0
Expand All @@ -74,7 +79,7 @@ cpdef double [:] calc(
sim, weight = euclid(data[i], data[i], n_dim)
else:
sim = mahalanobis(data[i], data[i], n_dim, noise)
weight = <double> n_dim
weight = <float_t> n_dim
elif method_idx == 4: # method in ['poisson', 'poisson_cv']:
sim, weight = poisson_cv(data[i], data[i], log_data[i], log_data[i], n_dim)
idx = desc[i]
Expand All @@ -97,7 +102,7 @@ cpdef double [:] calc(
sim, weight = euclid(data[i], data[j], n_dim)
else:
sim = mahalanobis(data[i], data[j], n_dim, noise)
weight = <double> n_dim
weight = <float_t> n_dim
elif method_idx == 4: # method in ['poisson', 'poisson_cv']:
sim, weight = poisson_cv(data[i], data[j], log_data[i], log_data[j], n_dim)
if weight > 0:
Expand All @@ -124,25 +129,25 @@ cpdef double [:] calc(

@cython.boundscheck(False)
@cython.cdivision(True)
cpdef (double, double) calc_one(
double [:, :] data_i, double [:, :] data_j,
long [:] cv_desc_i, long [:] cv_desc_j,
cpdef (float_t, float_t) calc_one(
float_t [:, :] data_i, float_t [:, :] data_j,
int_t [:] cv_desc_i, int_t [:] cv_desc_j,
int n_i, int n_j,
int method_idx, double [:, :] noise=None,
double prior_lambda=1, double prior_weight=0.1,
int method_idx, float_t [:, :] noise=None,
float_t prior_lambda=1, float_t prior_weight=0.1,
int weighting=1):
cdef:
#double [:] values = np.zeros(n_i * n_j)
#double [:] weights = np.zeros(n_i * n_j)
double [:] vec_i
double [:] vec_j
double weight, sim, weight_sum, value
float_t [:] vec_i
float_t [:] vec_j
float_t weight, sim, weight_sum, value
int i, j
int n_dim = data_i.shape[1]
double prior_lambda_l = prior_lambda * prior_weight
double prior_weight_l = 1 + prior_weight
double [:, :] log_data_i
double [:, :] log_data_j
float_t prior_lambda_l = prior_lambda * prior_weight
float_t prior_weight_l = 1 + prior_weight
float_t [:, :] log_data_i
float_t [:, :] log_data_j
if (method_idx > 4) or (method_idx < 1):
raise ValueError('dissimilarity method not recognized!')
# precompute stuff for poisson KL
Expand Down Expand Up @@ -173,7 +178,7 @@ cpdef (double, double) calc_one(
sim, weight = euclid(data_i[i], data_j[j], n_dim)
else:
sim = mahalanobis(data_i[i], data_j[j], n_dim, noise)
weight = <double> n_dim
weight = <float_t> n_dim
elif method_idx == 4: # method in ['poisson', 'poisson_cv']:
sim, weight = poisson_cv(data_i[i], data_j[j], log_data_i[i], log_data_j[j], n_dim)
if weight > 0:
Expand All @@ -191,8 +196,8 @@ cpdef (double, double) calc_one(


@cython.boundscheck(False)
cpdef (double, double) similarity(double [:] vec_i, double [:] vec_j, int method_idx,
int n_dim, double [:, :] noise):
cpdef (float_t, float_t) similarity(float_t [:] vec_i, float_t [:] vec_j, int method_idx,
int n_dim, float_t [:, :] noise):
"""
double similarity(double [:] vec_i, double [:] vec_j, int method_idx,
int n_dim, double [:, :] noise=None)
Expand All @@ -203,8 +208,8 @@ cpdef (double, double) similarity(double [:] vec_i, double [:] vec_j, int method

Mahalanobis distances require full measurement vectors at the moment!
"""
cdef double sim
cdef double weight
cdef float_t sim
cdef float_t weight
if method_idx == 1: # method == 'euclidean':
sim, weight = euclid(vec_i, vec_j, n_dim)
elif method_idx == 2: # method == 'correlation':
Expand All @@ -214,15 +219,15 @@ cpdef (double, double) similarity(double [:] vec_i, double [:] vec_j, int method
sim, weight = euclid(vec_i, vec_j, n_dim)
else:
sim = mahalanobis(vec_i, vec_j, n_dim, noise)
weight = <double> n_dim
weight = <float_t> n_dim
return sim, weight


@cython.boundscheck(False)
cdef (double, double) euclid(double [:] vec_i, double [:] vec_j, int n_dim):
cdef (float_t, float_t) euclid(float_t [:] vec_i, float_t [:] vec_j, int n_dim):
cdef:
double sim = 0
double weight = 0
float_t sim = 0
float_t weight = 0
int i
for i in range(n_dim):
if not isnan(vec_i[i]) and not isnan(vec_j[i]):
Expand All @@ -233,12 +238,12 @@ cdef (double, double) euclid(double [:] vec_i, double [:] vec_j, int n_dim):

@cython.boundscheck(False)
@cython.cdivision(True)
cdef (double, double) poisson_cv(double [:] vec_i, double [:] vec_j,
double [:] log_vec_i, double [:] log_vec_j,
cdef (float_t, float_t) poisson_cv(float_t [:] vec_i, float_t [:] vec_j,
float_t [:] log_vec_i, float_t [:] log_vec_j,
int n_dim):
cdef:
double sim = 0
double weight = 0
float_t sim = 0
float_t weight = 0
int i
for i in range(n_dim):
if not isnan(vec_i[i]) and not isnan(vec_j[i]):
Expand All @@ -249,20 +254,20 @@ cdef (double, double) poisson_cv(double [:] vec_i, double [:] vec_j,


@cython.boundscheck(False)
cdef double mahalanobis(double [:] vec_i, double [:] vec_j, int n_dim,
double [:, :] noise):
cdef float_t mahalanobis(float_t [:] vec_i, float_t [:] vec_j, int n_dim,
float_t [:, :] noise):
cdef:
double *vec1
double *vec2
float_t *vec1
float_t *vec2
int *finite
int zero = 0
int one = 1
double onef = 1.0
double zerof = 0.0
float_t onef = 1.0
float_t zerof = 0.0
char trans = b'n'
double sim = 0.0
float_t sim = 0.0
int i, j, k, l, n_finite
double [:, :] noise_small
float_t [:, :] noise_small
finite = <int*> PyMem_Malloc(n_dim * sizeof(int))
# use finite as a bool to choose the non-nan values
n_finite = 0
Expand All @@ -272,11 +277,10 @@ cdef double mahalanobis(double [:] vec_i, double [:] vec_j, int n_dim,
n_finite += 1
else:
finite[i] = 0
vec1 = <double*> PyMem_Malloc(n_finite * sizeof(double))
vec2 = <double*> PyMem_Malloc(n_finite * sizeof(double))
vec3 = <double*> PyMem_Malloc(n_finite * sizeof(double))
#noise_small = <double [:n_finite, :n_finite]> PyMem_Malloc(n_finite * n_finite * sizeof(double))
noise_small = cvarray(shape=(n_finite, n_finite), itemsize=sizeof(double), format="d")
vec1 = <float_t*> PyMem_Malloc(n_finite * sizeof(float_t))
vec2 = <float_t*> PyMem_Malloc(n_finite * sizeof(float_t))
vec3 = <float_t*> PyMem_Malloc(n_finite * sizeof(float_t))
noise_small = cvarray(shape=(n_finite, n_finite), itemsize=sizeof(float_t), format="d")
k = 0
for i in range(n_dim):
if finite[i]:
Expand All @@ -300,15 +304,15 @@ cdef double mahalanobis(double [:] vec_i, double [:] vec_j, int n_dim,

@cython.boundscheck(False)
@cython.cdivision(True)
cdef (double, double) correlation(double [:] vec_i, double [:] vec_j, int n_dim):
cdef (float_t, float_t) correlation(float_t [:] vec_i, float_t [:] vec_j, int n_dim):
cdef:
double si = 0.0
double sj = 0.0
double si2 = 0.0
double sj2 = 0.0
double sij = 0.0
double sim
double weight = 0
float_t si = 0.0
float_t sj = 0.0
float_t si2 = 0.0
float_t sj2 = 0.0
float_t sij = 0.0
float_t sim
float_t weight = 0
int i
for i in range(n_dim):
if not isnan(vec_i[i]) and not isnan(vec_j[i]):
Expand Down
6 changes: 3 additions & 3 deletions src/rsatoolbox/rdm/calc_unbalanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,14 @@ def calc_rdm_unbalanced(dataset: SingleOrMultiDataset, method='euclidean',
dataset.obs_descriptors[descriptor])
# unique_cond = set(dataset.obs_descriptors[descriptor])
if cv_descriptor is None:
cv_desc_int = np.arange(dataset.n_obs, dtype=int)
cv_desc_int = np.arange(dataset.n_obs, dtype=np.int64)
crossval = 0
else:
_, indices = np.unique(
dataset.obs_descriptors[cv_descriptor],
return_inverse=True
)
cv_desc_int = indices.astype(int)
cv_desc_int = indices.astype(np.int64)
crossval = 1
if method == 'euclidean':
method_idx = 1
Expand All @@ -114,7 +114,7 @@ def calc_rdm_unbalanced(dataset: SingleOrMultiDataset, method='euclidean',
weight_idx = 0
else:
weight_idx = 1
cond_indices_int = cond_indices.astype(int)
cond_indices_int = cond_indices.astype(np.int64)
rdm = calc(
ensure_double(dataset.measurements),
cond_indices_int,
Expand Down

0 comments on commit 45998f6

Please sign in to comment.