Skip to content

Commit 079e7c6

Browse files
committed
clusterizer: Separate sparse and dense adjacency builds
In sparse mode we still had a memset(counts) that cleared the entire array; this is not necessary in sparse mode and can be meaningfully slower especially if the region of memory is fresh and the pages need to be committed. We can instead clear every used index in a separate pass. This results in a small further performance improvement. This also results in sparse builds using almost completely separate code, at which point it's cleaner to split it into a separate function, which is what this commit also does.
1 parent 6176b20 commit 079e7c6

File tree

1 file changed

+69
-40
lines changed

1 file changed

+69
-40
lines changed

src/clusterizer.cpp

Lines changed: 69 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned
3030
{
3131
size_t face_count = index_count / 3;
3232

33-
// sparse mode can build adjacency more quickly by ignoring unused vertices
34-
const unsigned int sparse_seen = 1u << 31;
35-
bool sparse = vertex_count > index_count && index_count < sparse_seen;
36-
3733
// allocate arrays
3834
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
3935
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
@@ -52,27 +48,68 @@ static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned
5248
// fill offset table
5349
unsigned int offset = 0;
5450

55-
if (sparse)
51+
for (size_t i = 0; i < vertex_count; ++i)
5652
{
57-
// when using sparse mode this pass uses sparse_seen bit to tag visited vertices
58-
for (size_t i = 0; i < index_count; ++i)
59-
{
60-
unsigned int v = indices[i];
53+
adjacency.offsets[i] = offset;
54+
offset += adjacency.counts[i];
55+
}
6156

62-
if ((adjacency.counts[v] & sparse_seen) == 0)
63-
{
64-
adjacency.offsets[v] = offset;
65-
offset += adjacency.counts[v];
66-
adjacency.counts[v] |= sparse_seen;
67-
}
68-
}
57+
assert(offset == index_count);
58+
59+
// fill triangle data
60+
for (size_t i = 0; i < face_count; ++i)
61+
{
62+
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
63+
64+
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
65+
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
66+
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
6967
}
70-
else
68+
69+
// fix offsets that have been disturbed by the previous pass
70+
for (size_t i = 0; i < vertex_count; ++i)
7171
{
72-
for (size_t i = 0; i < vertex_count; ++i)
72+
assert(adjacency.offsets[i] >= adjacency.counts[i]);
73+
adjacency.offsets[i] -= adjacency.counts[i];
74+
}
75+
}
76+
77+
static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
78+
{
79+
size_t face_count = index_count / 3;
80+
81+
// sparse mode can build adjacency more quickly by ignoring unused vertices, using a bit to mark visited vertices
82+
const unsigned int sparse_seen = 1u << 31;
83+
assert(index_count < sparse_seen);
84+
85+
// allocate arrays
86+
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
87+
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
88+
adjacency.data = allocator.allocate<unsigned int>(index_count);
89+
90+
// fill triangle counts
91+
for (size_t i = 0; i < index_count; ++i)
92+
assert(indices[i] < vertex_count);
93+
94+
for (size_t i = 0; i < index_count; ++i)
95+
adjacency.counts[indices[i]] = 0;
96+
97+
for (size_t i = 0; i < index_count; ++i)
98+
adjacency.counts[indices[i]]++;
99+
100+
// fill offset table
101+
unsigned int offset = 0;
102+
103+
// when using sparse mode this pass uses sparse_seen bit to tag visited vertices
104+
for (size_t i = 0; i < index_count; ++i)
105+
{
106+
unsigned int v = indices[i];
107+
108+
if ((adjacency.counts[v] & sparse_seen) == 0)
73109
{
74-
adjacency.offsets[i] = offset;
75-
offset += adjacency.counts[i];
110+
adjacency.offsets[v] = offset;
111+
offset += adjacency.counts[v];
112+
adjacency.counts[v] |= sparse_seen;
76113
}
77114
}
78115

@@ -89,28 +126,17 @@ static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned
89126
}
90127

91128
// fix offsets that have been disturbed by the previous pass
92-
if (sparse)
129+
// when using sparse mode this pass also fixes counts (that were marked with sparse_seen)
130+
for (size_t i = 0; i < index_count; ++i)
93131
{
94-
// when using sparse mode this pass also fixes counts (that were marked with sparse_seen)
95-
for (size_t i = 0; i < index_count; ++i)
96-
{
97-
unsigned int v = indices[i];
98-
99-
if (adjacency.counts[v] & sparse_seen)
100-
{
101-
adjacency.counts[v] &= ~sparse_seen;
132+
unsigned int v = indices[i];
102133

103-
assert(adjacency.offsets[v] >= adjacency.counts[v]);
104-
adjacency.offsets[v] -= adjacency.counts[v];
105-
}
106-
}
107-
}
108-
else
109-
{
110-
for (size_t i = 0; i < vertex_count; ++i)
134+
if (adjacency.counts[v] & sparse_seen)
111135
{
112-
assert(adjacency.offsets[i] >= adjacency.counts[i]);
113-
adjacency.offsets[i] -= adjacency.counts[i];
136+
adjacency.counts[v] &= ~sparse_seen;
137+
138+
assert(adjacency.offsets[v] >= adjacency.counts[v]);
139+
adjacency.offsets[v] -= adjacency.counts[v];
114140
}
115141
}
116142
}
@@ -592,7 +618,10 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve
592618
meshopt_Allocator allocator;
593619

594620
TriangleAdjacency2 adjacency = {};
595-
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
621+
if (vertex_count > index_count && index_count < (1u << 31))
622+
buildTriangleAdjacencySparse(adjacency, indices, index_count, vertex_count, allocator);
623+
else
624+
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
596625

597626
// live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match
598627
unsigned int* live_triangles = adjacency.counts;

0 commit comments

Comments
 (0)