Skip to content

Commit 21ca56c

Browse files
authored
Merge pull request #835 from zeux/clust-perf
clusterizer: Improve performance and memory use of meshlet builder
2 parents 64f031b + 1eec9bd commit 21ca56c

File tree

2 files changed

+121
-8
lines changed

2 files changed

+121
-8
lines changed

demo/tests.cpp

+47
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,50 @@ static void clusterBoundsDegenerate()
10401040
assert(bounds2.center[2] - bounds2.radius <= 0 && bounds2.center[2] + bounds2.radius >= 1);
10411041
}
10421042

1043+
static void meshletsDense()
1044+
{
1045+
const float vbd[4 * 3] = {};
1046+
const unsigned int ibd[6] = {0, 2, 1, 1, 2, 3};
1047+
1048+
meshopt_Meshlet ml[1];
1049+
unsigned int mv[4];
1050+
unsigned char mt[8];
1051+
size_t mc = meshopt_buildMeshlets(ml, mv, mt, ibd, 6, vbd, 4, sizeof(float) * 3, 64, 64, 0.f);
1052+
1053+
assert(mc == 1);
1054+
assert(ml[0].triangle_count == 2);
1055+
assert(ml[0].vertex_count == 4);
1056+
1057+
unsigned int tri0[3] = {mv[mt[0]], mv[mt[1]], mv[mt[2]]};
1058+
unsigned int tri1[3] = {mv[mt[3]], mv[mt[4]], mv[mt[5]]};
1059+
1060+
// technically triangles could also be flipped in the meshlet but for now just assume they aren't
1061+
assert(memcmp(tri0, ibd + 0, 3 * sizeof(unsigned int)) == 0);
1062+
assert(memcmp(tri1, ibd + 3, 3 * sizeof(unsigned int)) == 0);
1063+
}
1064+
1065+
static void meshletsSparse()
1066+
{
1067+
const float vbd[16 * 3] = {};
1068+
const unsigned int ibd[6] = {0, 7, 15, 15, 7, 3};
1069+
1070+
meshopt_Meshlet ml[1];
1071+
unsigned int mv[4];
1072+
unsigned char mt[8];
1073+
size_t mc = meshopt_buildMeshlets(ml, mv, mt, ibd, 6, vbd, 16, sizeof(float) * 3, 64, 64, 0.f);
1074+
1075+
assert(mc == 1);
1076+
assert(ml[0].triangle_count == 2);
1077+
assert(ml[0].vertex_count == 4);
1078+
1079+
unsigned int tri0[3] = {mv[mt[0]], mv[mt[1]], mv[mt[2]]};
1080+
unsigned int tri1[3] = {mv[mt[3]], mv[mt[4]], mv[mt[5]]};
1081+
1082+
// technically triangles could also be flipped in the meshlet but for now just assume they aren't
1083+
assert(memcmp(tri0, ibd + 0, 3 * sizeof(unsigned int)) == 0);
1084+
assert(memcmp(tri1, ibd + 3, 3 * sizeof(unsigned int)) == 0);
1085+
}
1086+
10431087
static size_t allocCount;
10441088
static size_t freeCount;
10451089

@@ -2095,6 +2139,9 @@ void runTests()
20952139

20962140
clusterBoundsDegenerate();
20972141

2142+
meshletsDense();
2143+
meshletsSparse();
2144+
20982145
customAllocator();
20992146

21002147
emptyMesh();

src/clusterizer.cpp

+74-8
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,77 @@ static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned
7070
for (size_t i = 0; i < vertex_count; ++i)
7171
{
7272
assert(adjacency.offsets[i] >= adjacency.counts[i]);
73-
7473
adjacency.offsets[i] -= adjacency.counts[i];
7574
}
7675
}
7776

77+
static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
78+
{
79+
size_t face_count = index_count / 3;
80+
81+
// sparse mode can build adjacency more quickly by ignoring unused vertices, using a bit to mark visited vertices
82+
const unsigned int sparse_seen = 1u << 31;
83+
assert(index_count < sparse_seen);
84+
85+
// allocate arrays
86+
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
87+
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
88+
adjacency.data = allocator.allocate<unsigned int>(index_count);
89+
90+
// fill triangle counts
91+
for (size_t i = 0; i < index_count; ++i)
92+
assert(indices[i] < vertex_count);
93+
94+
for (size_t i = 0; i < index_count; ++i)
95+
adjacency.counts[indices[i]] = 0;
96+
97+
for (size_t i = 0; i < index_count; ++i)
98+
adjacency.counts[indices[i]]++;
99+
100+
// fill offset table
101+
unsigned int offset = 0;
102+
103+
// when using sparse mode this pass uses sparse_seen bit to tag visited vertices
104+
for (size_t i = 0; i < index_count; ++i)
105+
{
106+
unsigned int v = indices[i];
107+
108+
if ((adjacency.counts[v] & sparse_seen) == 0)
109+
{
110+
adjacency.offsets[v] = offset;
111+
offset += adjacency.counts[v];
112+
adjacency.counts[v] |= sparse_seen;
113+
}
114+
}
115+
116+
assert(offset == index_count);
117+
118+
// fill triangle data
119+
for (size_t i = 0; i < face_count; ++i)
120+
{
121+
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
122+
123+
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
124+
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
125+
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
126+
}
127+
128+
// fix offsets that have been disturbed by the previous pass
129+
// when using sparse mode this pass also fixes counts (that were marked with sparse_seen)
130+
for (size_t i = 0; i < index_count; ++i)
131+
{
132+
unsigned int v = indices[i];
133+
134+
if (adjacency.counts[v] & sparse_seen)
135+
{
136+
adjacency.counts[v] &= ~sparse_seen;
137+
138+
assert(adjacency.offsets[v] >= adjacency.counts[v]);
139+
adjacency.offsets[v] -= adjacency.counts[v];
140+
}
141+
}
142+
}
143+
78144
static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
79145
{
80146
assert(count > 0);
@@ -552,10 +618,13 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve
552618
meshopt_Allocator allocator;
553619

554620
TriangleAdjacency2 adjacency = {};
555-
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
621+
if (vertex_count > index_count && index_count < (1u << 31))
622+
buildTriangleAdjacencySparse(adjacency, indices, index_count, vertex_count, allocator);
623+
else
624+
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
556625

557-
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
558-
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
626+
// live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match
627+
unsigned int* live_triangles = adjacency.counts;
559628

560629
size_t face_count = index_count / 3;
561630

@@ -625,12 +694,9 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve
625694
memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
626695
}
627696

628-
live_triangles[a]--;
629-
live_triangles[b]--;
630-
live_triangles[c]--;
631-
632697
// remove emitted triangle from adjacency data
633698
// this makes sure that we spend less time traversing these lists on subsequent iterations
699+
// live triangle counts are updated as a byproduct of these adjustments
634700
for (size_t k = 0; k < 3; ++k)
635701
{
636702
unsigned int index = indices[best_triangle * 3 + k];

0 commit comments

Comments
 (0)