Skip to content

Commit 4c0c951

Browse files
authored
Merge pull request #114 from EleutherAI/frequency_estimation
Add per token and per context frequency to LatentRecord
2 parents 400066d + a76de99 commit 4c0c951

File tree

3 files changed

+18
-0
lines changed

3 files changed

+18
-0
lines changed

delphi/latents/constructors.py

+3
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ def constructor(
159159
non_active_indices = mask.nonzero(as_tuple=False).squeeze()
160160
activations = activation_data.activations
161161

162+
# per context frequency
163+
record.per_context_frequency = len(unique_batch_pos) / n_windows
164+
162165
# Add activation examples to the record in place
163166
token_windows, act_windows = pool_max_activation_windows(
164167
activations=activations,

delphi/latents/latents.py

+7
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,13 @@ class LatentRecord:
146146
extra_examples: Optional[list[Example]] = None
147147
"""Extra examples to include in the record."""
148148

149+
per_token_frequency: float = 0.0
150+
"""Frequency of the latent. Number of activations per total number of tokens."""
151+
152+
per_context_frequency: float = 0.0
153+
"""Frequency of the latent. Number of activations in a context per total
154+
number of contexts."""
155+
149156
@property
150157
def max_activation(self) -> float:
151158
"""

delphi/latents/loader.py

+8
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,14 @@ async def _aprocess_latent(self, latent_data: LatentData) -> LatentRecord | None
378378
if self.tokens is None:
379379
raise ValueError("Tokens are not loaded")
380380
record = LatentRecord(latent_data.latent)
381+
382+
# number of activations in the latent
383+
n_active = len(latent_data.activation_data.activations)
384+
# number of tokens in the latent
385+
n_tokens = self.tokens.shape[1] * self.tokens.shape[0]
386+
# frequency of the latent
387+
record.per_token_frequency = n_active / n_tokens
388+
381389
if self.neighbours is not None:
382390
record.set_neighbours(
383391
self.neighbours[latent_data.module][

0 commit comments

Comments
 (0)