Skip to content

Commit c7f7889

Browse files
author
Aba
committed
Pointnet works
1 parent 25c6d9b commit c7f7889

File tree

6 files changed

+377
-75
lines changed

6 files changed

+377
-75
lines changed

Diff for: deepsocflow/py/hardware.py

-3
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,6 @@ def simulate(self, SIM='verilator', SIM_PATH=''):
226226
cmd = f'{SIM_PATH}verilator --binary -j 0 -O3 --relative-includes --top {self.TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DSIM -CFLAGS -I../ {self.MODULE_DIR}/c/sim.c -CFLAGS -g --Mdir ./'
227227
print(cmd)
228228
assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0
229-
230-
exit()
231-
232229
print("\n\nSIMULATING...\n\n")
233230
start = time.time()
234231

Diff for: deepsocflow/py/xbundle.py

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def call(self, input_tensor, x_add=None, training=False):
4747
self.prev_ib = x.ib
4848
BUNDLES[self.prev_ib].next_ibs += [self.ib]
4949

50+
print(f"{self.ib} x: {x.shape}, prev:{self.prev_ib}")
51+
5052
x = self.core(x)
5153
x = self.core.act(x)
5254

Diff for: deepsocflow/py/xmodel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ def get_config(self):
3939

4040

4141

42-
def export_inference(model, hw):
42+
def export_inference(model, hw, batch_size=1):
4343

4444
for b in BUNDLES:
4545
b.next_ibs.clear()
4646
b.next_add_ibs.clear()
4747
BUNDLES.clear()
4848

4949
user_model = model.layers[1]
50-
input_shape = (hw.ROWS, *model.inputs[0].shape[1:])
50+
input_shape = (batch_size, *model.inputs[0].shape[1:])
5151
x_keras = tf.random.uniform(input_shape)
5252
x_qtensor = user_model.input_quant_layer(x_keras)
5353
out_keras = model(x_keras)

Diff for: run/param_test.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from keras.utils import to_categorical
1212
from qkeras.utils import load_qmodel
1313
import numpy as np
14+
import pprint
1415
# import tensorflow as tf
1516
#tf.keras.utils.set_random_seed(0)
1617

@@ -175,24 +176,24 @@ def product_dict(**kwargs):
175176
yield dict(zip(kwargs.keys(), instance))
176177

177178
@pytest.mark.parametrize("PARAMS", list(product_dict(
178-
processing_elements = [(8,24) ],
179+
processing_elements = [(32,32) ],
179180
frequency_mhz = [ 250 ],
180181
bits_input = [ 4 ],
181182
bits_weights = [ 4 ],
182-
bits_sum = [ 32 ],
183+
bits_sum = [ 20 ],
183184
bits_bias = [ 16 ],
184185
max_batch_size = [ 64 ],
185186
max_channels_in = [ 2048 ],
186187
max_kernel_size = [ 9 ],
187188
max_image_size = [ 512 ],
188189
max_n_bundles = [ 64 ],
189-
ram_weights_depth = [ 20 ],
190+
ram_weights_depth = [ 512 ],
190191
ram_edges_depth = [ 288 ],
191-
axi_width = [ 128 ],
192+
axi_width = [ 64 ],
192193
config_baseaddr = ["B0000000"],
193194
target_cpu_int_bits = [ 32 ],
194-
valid_prob = [ 0.1 ],
195-
ready_prob = [ 0.01 ],
195+
valid_prob = [ 1 ],
196+
ready_prob = [ 1 ],
196197
data_dir = ['vectors'],
197198
)))
198199
def test_dnn_engine(PARAMS):
@@ -210,9 +211,10 @@ def test_dnn_engine(PARAMS):
210211
'''
211212
VERIFY & EXPORT
212213
'''
213-
export_inference(loaded_model, hw)
214+
export_inference(loaded_model, hw, batch_size=1)
214215
verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH)
215216

216-
seconds, bytes = predict_model_performance(hw)
217-
print(f"Predicted time on hardware: {1000*seconds:.5f} ms")
218-
print(f"Predicted data movement: {bytes/1000:.5f} kB")
217+
d_perf = predict_model_performance(hw)
218+
pp = pprint.PrettyPrinter(indent=4)
219+
print(f"Predicted Performance")
220+
pp.pprint(d_perf)

Diff for: run/pointnet.py

+298
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
import os
2+
import pytest
3+
import itertools
4+
import sys
5+
sys.path.append("../../")
6+
from tensorflow import keras
7+
from keras.layers import Input
8+
from keras.models import Model, save_model
9+
from keras.datasets import mnist
10+
from keras.optimizers import Adam
11+
from keras.utils import to_categorical
12+
from qkeras.utils import load_qmodel
13+
import numpy as np
14+
import pprint
15+
#from read_point_cloud import *
16+
#from preprocess import *
17+
import tensorflow as tf
18+
#tf.keras.utils.set_random_seed(0)
19+
20+
from deepsocflow import *
21+
22+
23+
(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.2/bin/") if os.name=='nt' else ('verilator', '')
24+
np.random.seed(42)
25+
26+
'''
27+
Dataset
28+
'''
29+
30+
NB_EPOCH = 2
31+
BATCH_SIZE = 64
32+
VALIDATION_SPLIT = 0.1
33+
34+
#input_shape = x_train.shape[1:]
35+
36+
scale_factor = 80.
37+
## Load data
38+
"""
39+
print("loading data...")
40+
pmtxyz = get_pmtxyz("./work/pmt_xyz.dat")
41+
X, y = torch.load("./work/preprocessed_data.pt")
42+
X = X/100.
43+
y[:,:] = y[:,:]/3.0
44+
y[:, :3] = y[:, :3]/scale_factor
45+
y[:, :3] = y[:,:3]
46+
#print(y[0])
47+
X_tf = tf.convert_to_tensor(X.numpy(), dtype=tf.float32)
48+
y_tf = tf.convert_to_tensor(y.numpy(), dtype=tf.float32)
49+
X_tf = tf.expand_dims(X_tf, axis=2)
50+
debug = True
51+
if debug:
52+
print("debug got called")
53+
small = 5000
54+
X_tf, y_tf = X_tf[:small], y_tf[:small]
55+
56+
57+
# Update batch size
58+
print(X_tf.shape)
59+
n_data, n_hits, _, F_dim = X_tf.shape
60+
61+
## switch to match Aobo's syntax (time, charge, x, y, z) -> (x, y, z, label, time, charge)
62+
## insert "label" feature to tensor. This feature (0 or 1) is the activation of sensor
63+
new_X = X_tf #preprocess(X_tf)
64+
65+
## Shuffle Data (w/ Seed)
66+
#np.random.seed(seed=args.seed)
67+
#set_seed(seed=args.seed)
68+
idx = np.random.permutation(new_X.shape[0])
69+
#new_X = tf.gather(new_X, idx)
70+
#y = tf.gather(y_tf, idx)
71+
## Split and Load data
72+
train_split = 0.7
73+
val_split = 0.3
74+
train_idx = int(new_X.shape[0] * train_split)
75+
val_idx = int(train_idx + new_X.shape[0] * train_split)
76+
train = tf.data.Dataset.from_tensor_slices((new_X[:train_idx], y_tf[:train_idx]))
77+
val = tf.data.Dataset.from_tensor_slices((new_X[train_idx:val_idx], y_tf[train_idx:val_idx]))
78+
test = tf.data.Dataset.from_tensor_slices((new_X[val_idx:], y_tf[val_idx:]))
79+
train_loader = train.shuffle(buffer_size=len(new_X)).batch(BATCH_SIZE)
80+
val_loader = val.batch(BATCH_SIZE)
81+
test_loader = val.batch(BATCH_SIZE)
82+
print(f"num. total: {len(new_X)} train: {len(train)}, val: {len(val)}, test: {len(test)}")
83+
#print(pmtxyz.shape, tf.shape(new_X), y_tf.shape)
84+
"""
85+
input_shape = (2126, 1, 5)#X_tf.shape[1:]
86+
n_hits, _, F_dim = input_shape#X_tf.shape
87+
88+
'''
89+
Define Model
90+
'''
91+
92+
sys_bits = SYS_BITS(x=8, k=8, b=16)
93+
dim = F_dim
94+
dim_reduce_factor = 2
95+
out_dim = 4 #y_tf.shape[-1]
96+
dimensions = dim
97+
nhits = 2126
98+
encoder_input_shapes = [dimensions, 64, int(128 / dim_reduce_factor)]
99+
(_, F1, F2), latent_dim = encoder_input_shapes, int(1024 / dim_reduce_factor)
100+
decoder_input_shapes = latent_dim, int(512/dim_reduce_factor), int(128/dim_reduce_factor)
101+
latent_dim, F3, F4 = decoder_input_shapes
102+
#print("Test", F1, F2, dim, dim_reduce_factor, out_dim, dimensions)
103+
@keras.saving.register_keras_serializable()
104+
class UserModel(XModel):
105+
def __init__(self, sys_bits, x_int_bits, *args, **kwargs):
106+
super().__init__(sys_bits, x_int_bits, *args, **kwargs)
107+
108+
self.b0 = XBundle(
109+
core=XConvBN(
110+
k_int_bits=0,
111+
b_int_bits=0,
112+
filters=F1,
113+
kernel_size=1,
114+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
115+
#core=XDense(
116+
# k_int_bits=0,
117+
# b_int_bits=0,
118+
# units=F1,
119+
# act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)
120+
# ),
121+
)
122+
123+
self.b1 = XBundle(
124+
core=XConvBN(
125+
k_int_bits=0,
126+
b_int_bits=0,
127+
filters=F2,
128+
kernel_size=1,
129+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
130+
#core=XDense(
131+
# k_int_bits=0,
132+
# b_int_bits=0,
133+
# units=F2,
134+
# act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
135+
)
136+
137+
self.b2 = XBundle(
138+
core=XConvBN(
139+
k_int_bits=0,
140+
b_int_bits=0,
141+
filters=latent_dim,
142+
kernel_size=1,
143+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
144+
pool=XPool(
145+
type='avg',
146+
pool_size=(2126,1),
147+
strides=(2126,1),
148+
padding='same',
149+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),),
150+
flatten=True
151+
#core=XDense(
152+
# k_int_bits=0,
153+
# b_int_bits=0,
154+
# units=latent_dim,
155+
# act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
156+
)
157+
158+
159+
self.b3 = XBundle(
160+
core=XDense(
161+
k_int_bits=0,
162+
b_int_bits=0,
163+
units=F3,
164+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
165+
)
166+
167+
self.b4 = XBundle(
168+
core=XDense(
169+
k_int_bits=0,
170+
b_int_bits=0,
171+
units=F4,
172+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
173+
)
174+
175+
self.b5 = XBundle(
176+
core=XDense(
177+
k_int_bits=0,
178+
b_int_bits=0,
179+
units=out_dim,
180+
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0.125)),
181+
# flatten=True
182+
)
183+
184+
def call (self, x):
185+
x = self.input_quant_layer(x)
186+
print('input', x.shape)
187+
x = self.b0(x)
188+
x = self.b1(x)
189+
x = self.b2(x)
190+
x = self.b3(x)
191+
x = self.b4(x)
192+
x = self.b5(x)
193+
return x
194+
195+
x = x_in = Input(input_shape, name="input")
196+
user_model = UserModel(sys_bits=sys_bits, x_int_bits=0)
197+
x = user_model(x_in)
198+
199+
model = Model(inputs=[x_in], outputs=[x])
200+
201+
202+
'''
203+
Train Model
204+
'''
205+
model.compile(loss="mse", optimizer=Adam(learning_rate=0.0001), metrics=["mse"])
206+
#history = model.fit(
207+
# train_loader,
208+
# #x_train,
209+
# #y_train,
210+
# batch_size=BATCH_SIZE,
211+
# epochs=NB_EPOCH,
212+
# #initial_epoch=1,
213+
# verbose=True,
214+
# )
215+
216+
print(model.submodules)
217+
#print(y[:5], model(X_tf[:5]))
218+
for layer in model.submodules:
219+
try:
220+
print(layer.summary())
221+
for w, weight in enumerate(layer.get_weights()):
222+
print(layer.name, w, weight.shape)
223+
except:
224+
pass
225+
# print_qstats(model.layers[1])
226+
227+
def summary_plus(layer, i=0):
228+
if hasattr(layer, 'layers'):
229+
if i != 0:
230+
layer.summary()
231+
for l in layer.layers:
232+
i += 1
233+
summary_plus(l, i=i)
234+
235+
print(summary_plus(model)) # OK
236+
model.summary(expand_nested=True)
237+
238+
239+
'''
240+
Save & Reload
241+
'''
242+
243+
save_model(model, "mnist.h5")
244+
loaded_model = load_qmodel("mnist.h5")
245+
246+
#score = loaded_model.evaluate(test_loader, verbose=0)
247+
#print(f"Test loss:{score[0]}, Test accuracy:{score[1]}")
248+
249+
250+
251+
252+
def product_dict(**kwargs):
253+
for instance in itertools.product(*(kwargs.values())):
254+
yield dict(zip(kwargs.keys(), instance))
255+
256+
@pytest.mark.parametrize("PARAMS", list(product_dict(
257+
processing_elements = [(16,32) ],
258+
frequency_mhz = [ 250 ],
259+
bits_input = [ 8 ],
260+
bits_weights = [ 8 ],
261+
bits_sum = [ 32 ],
262+
bits_bias = [ 16 ],
263+
max_batch_size = [ 64 ],
264+
max_channels_in = [ 2048 ],
265+
max_kernel_size = [ 9 ],
266+
max_image_size = [ 2126 ],
267+
max_n_bundles = [ 64 ],
268+
ram_weights_depth = [ 20 ],
269+
ram_edges_depth = [ 288 ],
270+
axi_width = [ 128 ],
271+
config_baseaddr = ["B0000000"],
272+
target_cpu_int_bits = [ 32 ],
273+
valid_prob = [ 1 ],
274+
ready_prob = [ 1 ],
275+
data_dir = ['vectors'],
276+
)))
277+
def test_dnn_engine(PARAMS):
278+
279+
'''
280+
SPECIFY HARDWARE
281+
'''
282+
hw = Hardware (**PARAMS)
283+
hw.export_json()
284+
hw = Hardware.from_json('hardware.json')
285+
hw.export() # Generates: config_hw.svh, config_hw.tcl
286+
hw.export_vivado_tcl(board='zcu104')
287+
288+
289+
'''
290+
VERIFY & EXPORT
291+
'''
292+
export_inference(loaded_model, hw, hw.ROWS)
293+
verify_inference(loaded_model, hw, SIM=SIM, SIM_PATH=SIM_PATH)
294+
295+
d_perf = predict_model_performance(hw)
296+
pp = pprint.PrettyPrinter(indent=4)
297+
print(f"Predicted Performance")
298+
pp.pprint(d_perf)

0 commit comments

Comments
 (0)