Skip to content

Commit 7809595

Browse files
committed
updates
1 parent e238042 commit 7809595

35 files changed

+1239
-622
lines changed

base/PyNucleus_base/CSR_LinearOperator_{SCALAR}.pxi

+25
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,31 @@ cdef class {SCALAR_label}CSR_VectorLinearOperator({SCALAR_label}VectorLinearOper
517517
y[i, k] += self.data[jj, k]*x[j]
518518
return 0
519519

520+
cdef INDEX_t matvecTrans({SCALAR_label}CSR_VectorLinearOperator self,
521+
{SCALAR}_t[::1] x,
522+
{SCALAR}_t[:, ::1] y) except -1:
523+
cdef:
524+
INDEX_t i, j, jj, k
525+
y[:, :] = 0.
526+
for i in range(self.indptr.shape[0]-1):
527+
for jj in range(self.indptr[i], self.indptr[i+1]):
528+
j = self.indices[jj]
529+
for k in range(self.data.shape[1]):
530+
y[j, k] += self.data[jj, k]*x[i]
531+
return 0
532+
533+
cdef INDEX_t matvecTrans_no_overwrite({SCALAR_label}CSR_VectorLinearOperator self,
534+
{SCALAR}_t[::1] x,
535+
{SCALAR}_t[:, ::1] y) except -1:
536+
cdef:
537+
INDEX_t i, j, jj, k
538+
for i in range(self.indptr.shape[0]-1):
539+
for jj in range(self.indptr[i], self.indptr[i+1]):
540+
j = self.indices[jj]
541+
for k in range(self.data.shape[1]):
542+
y[j, k] += self.data[jj, k]*x[i]
543+
return 0
544+
520545
def isSparse(self):
521546
return True
522547

base/PyNucleus_base/LinearOperator_decl_{SCALAR}.pxi

+6
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ cdef class {SCALAR_label}VectorLinearOperator:
110110
cdef INDEX_t matvec_no_overwrite(self,
111111
{SCALAR}_t[::1] x,
112112
{SCALAR}_t[:, ::1] y) except -1
113+
cdef INDEX_t matvecTrans(self,
114+
{SCALAR}_t[::1] x,
115+
{SCALAR}_t[:, ::1] y) except -1
116+
cdef INDEX_t matvecTrans_no_overwrite(self,
117+
{SCALAR}_t[::1] x,
118+
{SCALAR}_t[:, ::1] y) except -1
113119
cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val)
114120
cdef void getEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val)
115121
cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val)

base/PyNucleus_base/LinearOperator_{SCALAR}.pxi

+25-5
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ cdef class {SCALAR_label}LinearOperator:
4646
else:
4747
self.matvec(x, y)
4848
else:
49-
self.matvecTrans(x, y)
49+
if no_overwrite:
50+
self.matvecTrans_no_overwrite(x, y)
51+
else:
52+
self.matvecTrans(x, y)
5053

5154
def dot(self, {SCALAR}_t[::1] x):
5255
cdef:
@@ -545,14 +548,31 @@ cdef class {SCALAR_label}VectorLinearOperator:
545548
{SCALAR}_t[:, ::1] y) except -1:
546549
return -1
547550

551+
cdef INDEX_t matvecTrans(self,
552+
{SCALAR}_t[::1] x,
553+
{SCALAR}_t[:, ::1] y) except -1:
554+
return -1
555+
556+
cdef INDEX_t matvecTrans_no_overwrite(self,
557+
{SCALAR}_t[::1] x,
558+
{SCALAR}_t[:, ::1] y) except -1:
559+
return -1
560+
548561
def __call__(self,
549562
{SCALAR}_t[::1] x,
550563
{SCALAR}_t[:, ::1] y,
551-
BOOL_t no_overwrite=False):
552-
if no_overwrite:
553-
self.matvec_no_overwrite(x, y)
564+
BOOL_t no_overwrite=False,
565+
BOOL_t trans=False):
566+
if not trans:
567+
if no_overwrite:
568+
self.matvec_no_overwrite(x, y)
569+
else:
570+
self.matvec(x, y)
554571
else:
555-
self.matvec(x, y)
572+
if no_overwrite:
573+
self.matvecTrans_no_overwrite(x, y)
574+
else:
575+
self.matvecTrans(x, y)
556576

557577
def __mul__(self, x):
558578
cdef:

base/PyNucleus_base/SSS_LinearOperator_{SCALAR}.pxi

+10
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,16 @@ cdef class {SCALAR_label}SSS_VectorLinearOperator({SCALAR_label}VectorLinearOper
392392
y[i, l] += self.temp[l]
393393
return 0
394394

395+
cdef INDEX_t matvecTrans({SCALAR_label}SSS_VectorLinearOperator self,
396+
{SCALAR}_t[::1] x,
397+
{SCALAR}_t[:, ::1] y) except -1:
398+
return self.matvec(x, y)
399+
400+
cdef INDEX_t matvecTrans_no_overwrite({SCALAR_label}SSS_VectorLinearOperator self,
401+
{SCALAR}_t[::1] x,
402+
{SCALAR}_t[:, ::1] y) except -1:
403+
return self.matvec_no_overwrite(x, y)
404+
395405
cdef void setEntry({SCALAR_label}SSS_VectorLinearOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t[::1] val):
396406
cdef:
397407
INDEX_t i, low, mid, high, l

base/PyNucleus_base/intTuple.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99
cimport numpy as np
10-
from libc.stdlib cimport malloc
10+
from cpython.mem cimport PyMem_Malloc
1111
from libc.string cimport memcpy
1212
from . myTypes import INDEX
1313

@@ -19,7 +19,7 @@ cdef enum:
1919
cdef class intTuple:
2020
cdef void set(self, INDEX_t * t, int size):
2121
self.size = size
22-
self.entries = <INDEX_t *>malloc(size*INDEX_SIZE)
22+
self.entries = <INDEX_t *>PyMem_Malloc(size*INDEX_SIZE)
2323
memcpy(&self.entries[0], &t[0], size*INDEX_SIZE)
2424

2525
cdef void assign(self, INDEX_t * t):
@@ -55,7 +55,7 @@ cdef class intTuple:
5555
cdef:
5656
intTuple t = intTuple()
5757
t.size = 2
58-
t.entries = <INDEX_t *>malloc(2*INDEX_SIZE)
58+
t.entries = <INDEX_t *>PyMem_Malloc(2*INDEX_SIZE)
5959
t.entries[0] = a
6060
t.entries[1] = b
6161
return t
@@ -69,7 +69,7 @@ cdef class intTuple:
6969
cdef:
7070
intTuple t = intTuple()
7171
t.size = 3
72-
t.entries = <INDEX_t *>malloc(3*INDEX_SIZE)
72+
t.entries = <INDEX_t *>PyMem_Malloc(3*INDEX_SIZE)
7373
t.entries[0] = a
7474
t.entries[1] = b
7575
t.entries[2] = c

base/PyNucleus_base/opt_false_blas.pxi

+24-5
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha):
5454
for i in range(x.shape[0]):
5555
x[i] = x[i]*alpha
5656

57-
cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1) nogil:
57+
cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1):
5858
cdef:
5959
int i
6060
SCALAR_t s = 0.0
@@ -110,6 +110,25 @@ cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t be
110110
y[i] = s
111111

112112

113+
cdef void gemvF(SCALAR_t[::1, :] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
114+
cdef:
115+
INDEX_t i, j
116+
if SCALAR_t is COMPLEX_t:
117+
if beta != 0.:
118+
for i in range(A.shape[0]):
119+
y[i] = beta*y[i]
120+
for j in range(A.shape[1]):
121+
for i in range(A.shape[0]):
122+
y[i] = y[i] + A[i, j]*x[j]
123+
else:
124+
if beta != 0.:
125+
for i in range(A.shape[0]):
126+
y[i] *= beta
127+
for j in range(A.shape[1]):
128+
for i in range(A.shape[0]):
129+
y[i] += A[i, j]*x[j]
130+
131+
113132
cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
114133
cdef:
115134
INDEX_t i, j
@@ -119,17 +138,17 @@ cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t b
119138
y[i] = y[i]*beta
120139
else:
121140
y[:] = 0.
122-
for j in range(A.shape[1]):
123-
for i in range(A.shape[0]):
141+
for i in range(A.shape[1]):
142+
for j in range(A.shape[0]):
124143
y[i] = y[i]+A[j, i]*x[j]
125144
else:
126145
if beta != 0.:
127146
for i in range(A.shape[0]):
128147
y[i] *= beta
129148
else:
130149
y[:] = 0.
131-
for j in range(A.shape[1]):
132-
for i in range(A.shape[0]):
150+
for i in range(A.shape[1]):
151+
for j in range(A.shape[0]):
133152
y[i] += A[j, i]*x[j]
134153

135154

base/PyNucleus_base/opt_true_blas.pxi

+3-3
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,10 @@ cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t b
215215
double complex* A_ptr_c
216216
double complex* x_ptr_c
217217
double complex* y_ptr_c
218-
int m = A.shape[0]
219-
int n = A.shape[1]
218+
int m = A.shape[1]
219+
int n = A.shape[0]
220220
SCALAR_t alpha = 1.
221-
int lda = A.shape[0]
221+
int lda = A.shape[1]
222222
int incx = 1
223223
int incy = 1
224224
if SCALAR_t is COMPLEX_t:

base/PyNucleus_base/sparsityPattern.pyx

+6-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# If you want to use this code, please refer to the README.rst and LICENSE files. #
66
###################################################################################
77

8-
from libc.stdlib cimport malloc, realloc, free
8+
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
99
import numpy as np
1010
cimport numpy as np
1111
from . myTypes import INDEX
@@ -21,10 +21,10 @@ cdef class sparsityPattern:
2121
self.nnz = 0
2222
self.counts = np.zeros((num_dofs), dtype=INDEX)
2323
self.lengths = initial_length*np.ones((num_dofs), dtype=np.uint16)
24-
self.indexL = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
24+
self.indexL = <INDEX_t **>PyMem_Malloc(num_dofs*sizeof(INDEX_t *))
2525
# reserve initial memory for array of variable column size
2626
for i in range(num_dofs):
27-
self.indexL[i] = <INDEX_t *>malloc(self.initial_length*sizeof(INDEX_t))
27+
self.indexL[i] = <INDEX_t *>PyMem_Malloc(self.initial_length*sizeof(INDEX_t))
2828

2929
cdef inline BOOL_t findIndex(self, INDEX_t I, INDEX_t J):
3030
cdef:
@@ -68,7 +68,7 @@ cdef class sparsityPattern:
6868
# J was not present
6969
# Do we need more space?
7070
if self.counts[I] == self.lengths[I]:
71-
self.indexL[I] = <INDEX_t *>realloc(self.indexL[I], (self.lengths[I]+self.initial_length)*sizeof(INDEX_t))
71+
self.indexL[I] = <INDEX_t *>PyMem_Realloc(self.indexL[I], (self.lengths[I]+self.initial_length)*sizeof(INDEX_t))
7272
self.lengths[I] += self.initial_length
7373
# where should we insert?
7474
m = self.index
@@ -95,8 +95,8 @@ cdef class sparsityPattern:
9595
for j in range(self.counts[i]):
9696
indices[k] = self.indexL[i][j]
9797
k += 1
98-
free(self.indexL[i])
99-
free(self.indexL)
98+
PyMem_Free(self.indexL[i])
99+
PyMem_Free(self.indexL)
100100

101101
# fill indptr array
102102
indptr_mem = uninitialized((self.num_dofs+1), dtype=INDEX)

base/PyNucleus_base/tupleDict.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ctypedef np.uint64_t MEM_t
1515

1616
cdef class indexSet:
1717
cdef BOOL_t inSet(self, INDEX_t i)
18+
cdef INDEX_t position(self, INDEX_t i)
1819
cpdef void fromSet(self, set s)
1920
cpdef set toSet(self)
2021
cpdef INDEX_t[::1] toArray(self)

base/PyNucleus_base/tupleDict.pyx

+36-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
###################################################################################
77

88
import numpy as np
9-
from libc.stdlib cimport malloc, realloc, free
9+
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
1010
from libc.stdlib cimport qsort
1111
from . myTypes import INDEX
1212

@@ -25,6 +25,9 @@ cdef class indexSet:
2525
def inSet_py(self, INDEX_t i):
2626
return self.inSet(i)
2727

28+
cdef INDEX_t position(self, INDEX_t i):
29+
raise NotImplementedError()
30+
2831
cpdef void fromSet(self, set s):
2932
raise NotImplementedError()
3033

@@ -176,6 +179,27 @@ cdef class arrayIndexSet(indexSet):
176179
high = mid
177180
return True
178181

182+
cdef INDEX_t position(self, INDEX_t i):
183+
cdef:
184+
INDEX_t low = 0
185+
INDEX_t high = self.indexArray.shape[0]
186+
INDEX_t mid
187+
if high-low < 20:
188+
for mid in range(low, high):
189+
if self.indexArray[mid] == i:
190+
return mid
191+
return -1
192+
else:
193+
while self.indexArray[low] != i:
194+
if high-low <= 1:
195+
return -1
196+
mid = (low+high) >> 1
197+
if self.indexArray[mid] <= i:
198+
low = mid
199+
else:
200+
high = mid
201+
return low
202+
179203
cpdef void fromSet(self, set s):
180204
cdef:
181205
INDEX_t i, k
@@ -385,6 +409,14 @@ cdef class unsortedArrayIndexSet(arrayIndexSet):
385409
return True
386410
return False
387411

412+
cdef INDEX_t position(self, INDEX_t i):
413+
cdef:
414+
INDEX_t j
415+
for j in range(self.indexArray.shape[0]):
416+
if self.indexArray[j] == i:
417+
return j
418+
return -1
419+
388420
cpdef void fromSet(self, set s):
389421
cdef:
390422
INDEX_t i, k
@@ -446,7 +478,7 @@ cdef class arrayIndexSetIterator(indexSetIterator):
446478
cdef class bitArray(indexSet):
447479
def __init__(self, size_t hintMaxLength=1, INDEX_t maxElement=0):
448480
self.length = max(hintMaxLength, maxElement/(sizeof(MEM_t)*8)+1)
449-
self.a = <MEM_t *>malloc(self.length*sizeof(MEM_t))
481+
self.a = <MEM_t *>PyMem_Malloc(self.length*sizeof(MEM_t))
450482
for j in range(self.length):
451483
self.a[j] = 0
452484

@@ -459,7 +491,7 @@ cdef class bitArray(indexSet):
459491
if k >= self.length:
460492
l = self.length
461493
self.length = k+1
462-
self.a = <MEM_t *>realloc(self.a, self.length * sizeof(MEM_t))
494+
self.a = <MEM_t *>PyMem_Realloc(self.a, self.length * sizeof(MEM_t))
463495
for j in range(l, self.length):
464496
self.a[j] = 0
465497
self.a[k] |= one << n
@@ -510,7 +542,7 @@ cdef class bitArray(indexSet):
510542
self.a[j] = 0
511543

512544
def __dealloc__(self):
513-
free(self.a)
545+
PyMem_Free(self.a)
514546

515547
cdef indexSetIterator getIter(self):
516548
return bitArrayIterator(self)

0 commit comments

Comments
 (0)