@@ -120,7 +120,7 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader
120120 }
121121 int max = 0 ;
122122 long minSumTTF = Long .MAX_VALUE ;
123- int minDocCount = Integer . MAX_VALUE ;
123+ int [] docCounts = new int [ contexts . length ] ;
124124 for (int i = 0 ; i < contexts .length ; i ++) {
125125 TermStates ctx = contexts [i ];
126126 int df = ctx .docFreq ();
@@ -134,15 +134,12 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader
134134 // we need to find out the minimum sumTTF to adjust the statistics
135135 // otherwise the statistics don't match
136136 minSumTTF = Math .min (minSumTTF , reader .getSumTotalTermFreq (terms [i ].field ()));
137- minDocCount = Math . min ( minDocCount , reader .getDocCount (terms [i ].field () ));
137+ docCounts [ i ] = reader .getDocCount (terms [i ].field ());
138138 }
139139 }
140140 if (maxDoc > minSumTTF ) {
141141 maxDoc = (int ) minSumTTF ;
142142 }
143- if (maxDoc > minDocCount ) {
144- maxDoc = minDocCount ;
145- }
146143 if (max == 0 ) {
147144 return ; // we are done that term doesn't exist at all
148145 }
@@ -180,7 +177,11 @@ protected int compare(int i, int j) {
180177 if (prev > current ) {
181178 actualDf ++;
182179 }
183- contexts [i ] = ctx = adjustDF (reader .getContext (), ctx , Math .min (maxDoc , actualDf ));
180+ // Per field, we want to guarantee that the adjusted df does not exceed the number of docs with the field.
181+ // That is, in the IDF formula (log(1 + (N - n + 0.5) / (n + 0.5))), we need to make sure that n (the
182+ // adjusted df) is never bigger than N (the number of docs with the field).
183+ int fieldMaxDoc = Math .min (maxDoc , docCounts [i ]);
184+ contexts [i ] = ctx = adjustDF (reader .getContext (), ctx , Math .min (fieldMaxDoc , actualDf ));
184185 prev = current ;
185186 sumTTF += ctx .totalTermFreq ();
186187 }
0 commit comments