@@ -120,7 +120,7 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader
120
120
}
121
121
int max = 0 ;
122
122
long minSumTTF = Long .MAX_VALUE ;
123
- int minDocCount = Integer . MAX_VALUE ;
123
+ int [] docCounts = new int [ contexts . length ] ;
124
124
for (int i = 0 ; i < contexts .length ; i ++) {
125
125
TermStates ctx = contexts [i ];
126
126
int df = ctx .docFreq ();
@@ -134,15 +134,12 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader
134
134
// we need to find out the minimum sumTTF to adjust the statistics
135
135
// otherwise the statistics don't match
136
136
minSumTTF = Math .min (minSumTTF , reader .getSumTotalTermFreq (terms [i ].field ()));
137
- minDocCount = Math . min ( minDocCount , reader .getDocCount (terms [i ].field () ));
137
+ docCounts [ i ] = reader .getDocCount (terms [i ].field ());
138
138
}
139
139
}
140
140
if (maxDoc > minSumTTF ) {
141
141
maxDoc = (int ) minSumTTF ;
142
142
}
143
- if (maxDoc > minDocCount ) {
144
- maxDoc = minDocCount ;
145
- }
146
143
if (max == 0 ) {
147
144
return ; // we are done that term doesn't exist at all
148
145
}
@@ -180,7 +177,11 @@ protected int compare(int i, int j) {
180
177
if (prev > current ) {
181
178
actualDf ++;
182
179
}
183
- contexts [i ] = ctx = adjustDF (reader .getContext (), ctx , Math .min (maxDoc , actualDf ));
180
+ // Per field, we want to guarantee that the adjusted df does not exceed the number of docs with the field.
181
+ // That is, in the IDF formula (log(1 + (N - n + 0.5) / (n + 0.5))), we need to make sure that n (the
182
+ // adjusted df) is never bigger than N (the number of docs with the field).
183
+ int fieldMaxDoc = Math .min (maxDoc , docCounts [i ]);
184
+ contexts [i ] = ctx = adjustDF (reader .getContext (), ctx , Math .min (fieldMaxDoc , actualDf ));
184
185
prev = current ;
185
186
sumTTF += ctx .totalTermFreq ();
186
187
}
0 commit comments