Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates #35

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions base/PyNucleus_base/CSR_LinearOperator_{SCALAR}.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,31 @@ cdef class {SCALAR_label}CSR_VectorLinearOperator({SCALAR_label}VectorLinearOper
y[i, k] += self.data[jj, k]*x[j]
return 0

cdef INDEX_t matvecTrans({SCALAR_label}CSR_VectorLinearOperator self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1:
cdef:
INDEX_t i, j, jj, k
y[:, :] = 0.
for i in range(self.indptr.shape[0]-1):
for jj in range(self.indptr[i], self.indptr[i+1]):
j = self.indices[jj]
for k in range(self.data.shape[1]):
y[j, k] += self.data[jj, k]*x[i]
return 0

cdef INDEX_t matvecTrans_no_overwrite({SCALAR_label}CSR_VectorLinearOperator self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1:
cdef:
INDEX_t i, j, jj, k
for i in range(self.indptr.shape[0]-1):
for jj in range(self.indptr[i], self.indptr[i+1]):
j = self.indices[jj]
for k in range(self.data.shape[1]):
y[j, k] += self.data[jj, k]*x[i]
return 0

def isSparse(self):
return True

Expand Down
6 changes: 6 additions & 0 deletions base/PyNucleus_base/LinearOperator_decl_{SCALAR}.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ cdef class {SCALAR_label}VectorLinearOperator:
cdef INDEX_t matvec_no_overwrite(self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1
cdef INDEX_t matvecTrans(self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1
cdef INDEX_t matvecTrans_no_overwrite(self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1
cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val)
cdef void getEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val)
cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val)
Expand Down
30 changes: 25 additions & 5 deletions base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ cdef class {SCALAR_label}LinearOperator:
else:
self.matvec(x, y)
else:
self.matvecTrans(x, y)
if no_overwrite:
self.matvecTrans_no_overwrite(x, y)
else:
self.matvecTrans(x, y)

def dot(self, {SCALAR}_t[::1] x):
cdef:
Expand Down Expand Up @@ -545,14 +548,31 @@ cdef class {SCALAR_label}VectorLinearOperator:
{SCALAR}_t[:, ::1] y) except -1:
return -1

cdef INDEX_t matvecTrans(self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1:
return -1

cdef INDEX_t matvecTrans_no_overwrite(self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1:
return -1

def __call__(self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y,
BOOL_t no_overwrite=False):
if no_overwrite:
self.matvec_no_overwrite(x, y)
BOOL_t no_overwrite=False,
BOOL_t trans=False):
if not trans:
if no_overwrite:
self.matvec_no_overwrite(x, y)
else:
self.matvec(x, y)
else:
self.matvec(x, y)
if no_overwrite:
self.matvecTrans_no_overwrite(x, y)
else:
self.matvecTrans(x, y)

def __mul__(self, x):
cdef:
Expand Down
10 changes: 10 additions & 0 deletions base/PyNucleus_base/SSS_LinearOperator_{SCALAR}.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,16 @@ cdef class {SCALAR_label}SSS_VectorLinearOperator({SCALAR_label}VectorLinearOper
y[i, l] += self.temp[l]
return 0

cdef INDEX_t matvecTrans({SCALAR_label}SSS_VectorLinearOperator self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1:
return self.matvec(x, y)

cdef INDEX_t matvecTrans_no_overwrite({SCALAR_label}SSS_VectorLinearOperator self,
{SCALAR}_t[::1] x,
{SCALAR}_t[:, ::1] y) except -1:
return self.matvec_no_overwrite(x, y)

cdef void setEntry({SCALAR_label}SSS_VectorLinearOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val):
cdef:
INDEX_t i, low, mid, high, l
Expand Down
8 changes: 4 additions & 4 deletions base/PyNucleus_base/intTuple.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np
cimport numpy as np
from libc.stdlib cimport malloc
from cpython.mem cimport PyMem_Malloc
from libc.string cimport memcpy
from . myTypes import INDEX

Expand All @@ -19,7 +19,7 @@ cdef enum:
cdef class intTuple:
cdef void set(self, INDEX_t * t, int size):
self.size = size
self.entries = <INDEX_t *>malloc(size*INDEX_SIZE)
self.entries = <INDEX_t *>PyMem_Malloc(size*INDEX_SIZE)
memcpy(&self.entries[0], &t[0], size*INDEX_SIZE)

cdef void assign(self, INDEX_t * t):
Expand Down Expand Up @@ -55,7 +55,7 @@ cdef class intTuple:
cdef:
intTuple t = intTuple()
t.size = 2
t.entries = <INDEX_t *>malloc(2*INDEX_SIZE)
t.entries = <INDEX_t *>PyMem_Malloc(2*INDEX_SIZE)
t.entries[0] = a
t.entries[1] = b
return t
Expand All @@ -69,7 +69,7 @@ cdef class intTuple:
cdef:
intTuple t = intTuple()
t.size = 3
t.entries = <INDEX_t *>malloc(3*INDEX_SIZE)
t.entries = <INDEX_t *>PyMem_Malloc(3*INDEX_SIZE)
t.entries[0] = a
t.entries[1] = b
t.entries[2] = c
Expand Down
29 changes: 24 additions & 5 deletions base/PyNucleus_base/opt_false_blas.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha):
for i in range(x.shape[0]):
x[i] = x[i]*alpha

cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1) nogil:
cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1):
cdef:
int i
SCALAR_t s = 0.0
Expand Down Expand Up @@ -110,6 +110,25 @@ cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t be
y[i] = s


cdef void gemvF(SCALAR_t[::1, :] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
cdef:
INDEX_t i, j
if SCALAR_t is COMPLEX_t:
if beta != 0.:
for i in range(A.shape[0]):
y[i] = beta*y[i]
for j in range(A.shape[1]):
for i in range(A.shape[0]):
y[i] = y[i] + A[i, j]*x[j]
else:
if beta != 0.:
for i in range(A.shape[0]):
y[i] *= beta
for j in range(A.shape[1]):
for i in range(A.shape[0]):
y[i] += A[i, j]*x[j]


cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
cdef:
INDEX_t i, j
Expand All @@ -119,17 +138,17 @@ cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t b
y[i] = y[i]*beta
else:
y[:] = 0.
for j in range(A.shape[1]):
for i in range(A.shape[0]):
for i in range(A.shape[1]):
for j in range(A.shape[0]):
y[i] = y[i]+A[j, i]*x[j]
else:
if beta != 0.:
for i in range(A.shape[0]):
y[i] *= beta
else:
y[:] = 0.
for j in range(A.shape[1]):
for i in range(A.shape[0]):
for i in range(A.shape[1]):
for j in range(A.shape[0]):
y[i] += A[j, i]*x[j]


Expand Down
6 changes: 3 additions & 3 deletions base/PyNucleus_base/opt_true_blas.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t b
double complex* A_ptr_c
double complex* x_ptr_c
double complex* y_ptr_c
int m = A.shape[0]
int n = A.shape[1]
int m = A.shape[1]
int n = A.shape[0]
SCALAR_t alpha = 1.
int lda = A.shape[0]
int lda = A.shape[1]
int incx = 1
int incy = 1
if SCALAR_t is COMPLEX_t:
Expand Down
12 changes: 6 additions & 6 deletions base/PyNucleus_base/sparsityPattern.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# If you want to use this code, please refer to the README.rst and LICENSE files. #
###################################################################################

from libc.stdlib cimport malloc, realloc, free
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
import numpy as np
cimport numpy as np
from . myTypes import INDEX
Expand All @@ -21,10 +21,10 @@ cdef class sparsityPattern:
self.nnz = 0
self.counts = np.zeros((num_dofs), dtype=INDEX)
self.lengths = initial_length*np.ones((num_dofs), dtype=np.uint16)
self.indexL = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
self.indexL = <INDEX_t **>PyMem_Malloc(num_dofs*sizeof(INDEX_t *))
# reserve initial memory for array of variable column size
for i in range(num_dofs):
self.indexL[i] = <INDEX_t *>malloc(self.initial_length*sizeof(INDEX_t))
self.indexL[i] = <INDEX_t *>PyMem_Malloc(self.initial_length*sizeof(INDEX_t))

cdef inline BOOL_t findIndex(self, INDEX_t I, INDEX_t J):
cdef:
Expand Down Expand Up @@ -68,7 +68,7 @@ cdef class sparsityPattern:
# J was not present
# Do we need more space?
if self.counts[I] == self.lengths[I]:
self.indexL[I] = <INDEX_t *>realloc(self.indexL[I], (self.lengths[I]+self.initial_length)*sizeof(INDEX_t))
self.indexL[I] = <INDEX_t *>PyMem_Realloc(self.indexL[I], (self.lengths[I]+self.initial_length)*sizeof(INDEX_t))
self.lengths[I] += self.initial_length
# where should we insert?
m = self.index
Expand All @@ -95,8 +95,8 @@ cdef class sparsityPattern:
for j in range(self.counts[i]):
indices[k] = self.indexL[i][j]
k += 1
free(self.indexL[i])
free(self.indexL)
PyMem_Free(self.indexL[i])
PyMem_Free(self.indexL)

# fill indptr array
indptr_mem = uninitialized((self.num_dofs+1), dtype=INDEX)
Expand Down
1 change: 1 addition & 0 deletions base/PyNucleus_base/tupleDict.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ ctypedef np.uint64_t MEM_t

cdef class indexSet:
cdef BOOL_t inSet(self, INDEX_t i)
cdef INDEX_t position(self, INDEX_t i)
cpdef void fromSet(self, set s)
cpdef set toSet(self)
cpdef INDEX_t[::1] toArray(self)
Expand Down
40 changes: 36 additions & 4 deletions base/PyNucleus_base/tupleDict.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
###################################################################################

import numpy as np
from libc.stdlib cimport malloc, realloc, free
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
from libc.stdlib cimport qsort
from . myTypes import INDEX

Expand All @@ -25,6 +25,9 @@ cdef class indexSet:
def inSet_py(self, INDEX_t i):
return self.inSet(i)

cdef INDEX_t position(self, INDEX_t i):
raise NotImplementedError()

cpdef void fromSet(self, set s):
raise NotImplementedError()

Expand Down Expand Up @@ -176,6 +179,27 @@ cdef class arrayIndexSet(indexSet):
high = mid
return True

cdef INDEX_t position(self, INDEX_t i):
cdef:
INDEX_t low = 0
INDEX_t high = self.indexArray.shape[0]
INDEX_t mid
if high-low < 20:
for mid in range(low, high):
if self.indexArray[mid] == i:
return mid
return -1
else:
while self.indexArray[low] != i:
if high-low <= 1:
return -1
mid = (low+high) >> 1
if self.indexArray[mid] <= i:
low = mid
else:
high = mid
return low

cpdef void fromSet(self, set s):
cdef:
INDEX_t i, k
Expand Down Expand Up @@ -385,6 +409,14 @@ cdef class unsortedArrayIndexSet(arrayIndexSet):
return True
return False

cdef INDEX_t position(self, INDEX_t i):
cdef:
INDEX_t j
for j in range(self.indexArray.shape[0]):
if self.indexArray[j] == i:
return j
return -1

cpdef void fromSet(self, set s):
cdef:
INDEX_t i, k
Expand Down Expand Up @@ -446,7 +478,7 @@ cdef class arrayIndexSetIterator(indexSetIterator):
cdef class bitArray(indexSet):
def __init__(self, size_t hintMaxLength=1, INDEX_t maxElement=0):
self.length = max(hintMaxLength, maxElement/(sizeof(MEM_t)*8)+1)
self.a = <MEM_t *>malloc(self.length*sizeof(MEM_t))
self.a = <MEM_t *>PyMem_Malloc(self.length*sizeof(MEM_t))
for j in range(self.length):
self.a[j] = 0

Expand All @@ -459,7 +491,7 @@ cdef class bitArray(indexSet):
if k >= self.length:
l = self.length
self.length = k+1
self.a = <MEM_t *>realloc(self.a, self.length * sizeof(MEM_t))
self.a = <MEM_t *>PyMem_Realloc(self.a, self.length * sizeof(MEM_t))
for j in range(l, self.length):
self.a[j] = 0
self.a[k] |= one << n
Expand Down Expand Up @@ -510,7 +542,7 @@ cdef class bitArray(indexSet):
self.a[j] = 0

def __dealloc__(self):
free(self.a)
PyMem_Free(self.a)

cdef indexSetIterator getIter(self):
return bitArrayIterator(self)
Expand Down
Loading