grafana
diff --git a/‎api/gen/proto/go/metastore/v1/raft_log/raft_log.pb.go
Lines changed: 212 additions & 133 deletions b/‎api/gen/proto/go/metastore/v1/raft_log/raft_log.pb.go
Lines changed: 212 additions & 133 deletions
diff --git a/‎api/gen/proto/go/metastore/v1/raft_log/raft_log_vtproto.pb.go
Lines changed: 256 additions & 0 deletions b/‎api/gen/proto/go/metastore/v1/raft_log/raft_log_vtproto.pb.go
Lines changed: 256 additions & 0 deletions
diff --git a/‎api/metastore/v1/raft_log/raft_log.proto
Lines changed: 5 additions & 0 deletions b/‎api/metastore/v1/raft_log/raft_log.proto
Lines changed: 5 additions & 0 deletions
diff --git a/‎pkg/experiment/metastore/compaction/README.md
Lines changed: 8 additions & 1 deletion b/‎pkg/experiment/metastore/compaction/README.md
Lines changed: 8 additions & 1 deletion
diff --git a/‎pkg/experiment/metastore/compaction/compaction.go
Lines changed: 3 additions & 0 deletions b/‎pkg/experiment/metastore/compaction/compaction.go
Lines changed: 3 additions & 0 deletions
@@ -46,6 +46,7 @@ message CompactionPlanUpdate {
   repeated AssignedCompactionJob assigned_jobs = 2;
   repeated UpdatedCompactionJob updated_jobs = 3;
   repeated CompletedCompactionJob completed_jobs = 4;
+  repeated EvictedCompactionJob evicted_jobs = 5;
 }
 
 message NewCompactionJob {
@@ -67,6 +68,10 @@ message CompletedCompactionJob {
   metastore.v1.CompactedBlocks compacted_blocks = 2;
 }
 
+message EvictedCompactionJob {
+  CompactionJobState state = 1;
+}
+
 // CompactionJobState is produced in response to
 // the compaction worker status update request.
 //
 
@@ -259,7 +259,14 @@ reports from workers, as jobs that cause workers to crash would yield no reports
 
 To avoid infinite reassignment loops, the scheduler keeps track of reassignments (failures) for each job. If the number
 of failures exceeds a set threshold, the job is not reassigned and remains at the bottom of the queue. Once the cause of
-failure is resolved, the limit can be temporarily increased to reprocess these jobs.
+failure is resolved, the error limit can be temporarily increased to reprocess these jobs.
+
+The scheduler queue has a size limit. Typically, the only scenario in which this limit is reached is when the compaction
+process is not functioning correctly (e.g., due to a bug in the compaction procedure), preventing blocks from being
+compacted and resulting in many jobs remaining in a failed state. Once the queue size limit is reached, failed jobs are
+evicted, meaning the corresponding blocks will never be compacted. This may cause read amplification of the data queries
+and bloat the metadata index. Therefore, the limit should be large enough. The recommended course of action is to roll
+back or fix the bug and restart the compaction process, temporarily increasing the error limit if necessary.
 
 ### Job Completion
 
 
@@ -55,6 +55,9 @@ type Schedule interface {
 	UpdateJob(*raft_log.CompactionJobStatusUpdate) *raft_log.CompactionJobState
 	// AssignJob is called on behalf of the worker to request a new job.
 	AssignJob() (*raft_log.AssignedCompactionJob, error)
+	// EvictJob is called on behalf of the planner to evict jobs that cannot
+	// be assigned to workers, and free up resources for new jobs.
+	EvictJob() *raft_log.CompactionJobState
 	// AddJob is called on behalf of the planner to add a new job to the schedule.
 	// The scheduler may decline the job by returning a nil state.
 	AddJob(*raft_log.CompactionJobPlan) *raft_log.CompactionJobState