Skip to content

Commit 17b72c8

Browse files
Release v2.8.1 of NNCF to master
1 parent 72eb39c commit 17b72c8

33 files changed

+279
-146
lines changed

ReleaseNotes.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Release Notes
22

3+
## New in Release 2.8.1
4+
5+
Post-training Quantization:
6+
7+
- Bugfixes:
8+
- (Common) Fixed issue with `nncf.compress_weights()` to avoid overflows on 32-bit Windows systems.
9+
- (Common) Fixed performance issue with `nncf.compress_weights()` on LLama models.
10+
- (Common) Fixed `nncf.quantize_with_accuracy_control` pipeline with `tune_hyperparams=True` enabled option.
11+
- (OpenVINO) Fixed issue for stateful LLM models and added state restoring after the inference for it.
12+
- (PyTorch) Fixed issue with `nncf.compress_weights()` for LLM models with the executing `is_floating_point` with tracing.
13+
314
## New in Release 2.8.0
415

516
Post-training Quantization:

docs/Installation.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ as well as the supported versions of Python:
7070
| NNCF | OpenVINO | PyTorch | ONNX | TensorFlow | Python |
7171
|-----------|------------|----------|----------|------------|--------|
7272
| `develop` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
73+
| `2.8.1` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
7374
| `2.8.0` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
7475
| `2.7.0` | `2023.2.0` | `2.1` | `1.13.1` | `2.12.0` | `3.8` |
7576
| `2.6.0` | `2023.1.0` | `2.0.1` | `1.13.1` | `2.12.0` | `3.8` |

nncf/common/factory.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
from nncf.common.graph.transformations.command_creation import CommandCreator
1818
from nncf.common.tensor_statistics import aggregator
1919
from nncf.common.utils.backend import BackendType
20-
from nncf.common.utils.backend import get_available_backends
2120
from nncf.common.utils.backend import get_backend
22-
from nncf.common.utils.backend import is_openvino_compiled_model
2321
from nncf.data.dataset import Dataset
2422

2523
TModel = TypeVar("TModel")
@@ -86,12 +84,6 @@ def create(model: TModel) -> Engine:
8684
:param model: backend-specific model instance.
8785
:return: backend-specific Engine instance.
8886
"""
89-
available_backends = get_available_backends()
90-
if BackendType.OPENVINO in available_backends and is_openvino_compiled_model(model):
91-
from nncf.openvino.engine import OVCompiledModelEngine
92-
93-
return OVCompiledModelEngine(model)
94-
9587
model_backend = get_backend(model)
9688
if model_backend == BackendType.ONNX:
9789
from nncf.onnx.engine import ONNXEngine

nncf/openvino/engine.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import openvino.runtime as ov
1616

1717
from nncf.common.engine import Engine
18+
from nncf.openvino.graph.model_utils import model_has_state
1819
from nncf.parameters import TargetDevice
1920

2021

@@ -27,11 +28,12 @@ class OVCompiledModelEngine(Engine):
2728
to infer the compiled model.
2829
"""
2930

30-
def __init__(self, model: ov.CompiledModel):
31-
self.compiled_model = model
31+
def __init__(self, compiled_model: ov.CompiledModel, stateful: bool):
32+
self.infer_request = compiled_model.create_infer_request()
33+
self.reset_state = stateful and hasattr(self.infer_request, "reset_state")
3234
self.input_tensor_names = set()
33-
self.number_of_inputs = len(model.inputs)
34-
for model_input in model.inputs:
35+
self.number_of_inputs = len(compiled_model.inputs)
36+
for model_input in compiled_model.inputs:
3537
self.input_tensor_names.update(model_input.get_names())
3638

3739
def _check_input_data_format(
@@ -63,7 +65,11 @@ def infer(
6365
:return output_data: Model's output.
6466
"""
6567
self._check_input_data_format(input_data)
66-
model_outputs = self.compiled_model(input_data)
68+
69+
if self.reset_state:
70+
self.infer_request.reset_state()
71+
72+
model_outputs = self.infer_request.infer(input_data, share_inputs=True)
6773

6874
output_data = {}
6975
for tensor, value in model_outputs.items():
@@ -86,8 +92,9 @@ def __init__(self, model: ov.Model, target_device: TargetDevice = TargetDevice.C
8692
target_device = TargetDevice.CPU
8793

8894
ie = ov.Core()
95+
stateful = model_has_state(model)
8996
compiled_model = ie.compile_model(model, target_device.value)
90-
self.engine = OVCompiledModelEngine(compiled_model)
97+
self.engine = OVCompiledModelEngine(compiled_model, stateful)
9198

9299
def infer(
93100
self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]

nncf/openvino/graph/model_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,13 @@ def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[N
6060
:return: Target NNCFGraph input nodes.
6161
"""
6262
return nncf_graph.get_input_nodes() + nncf_graph.get_nodes_by_metatypes([OVReadValueMetatype])
63+
64+
65+
def model_has_state(model: ov.Model) -> bool:
66+
"""
67+
Returns True if model has state else False
68+
69+
:param model: OpenVINO model
70+
:return: True if model has state else False
71+
"""
72+
return len(model.get_sinks()) > 0

nncf/openvino/quantization/quantize_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def native_quantize_with_accuracy_control_impl(
263263
fast_bias_correction,
264264
model_type,
265265
ignored_scope,
266-
advanced_quantization_parameters,
266+
copied_parameters,
267267
)
268268
tuned_quantized_metric_results = evaluator.collect_metric_results(
269269
tuned_quantized_model, validation_dataset, model_name="tuned"

nncf/quantization/algorithms/accuracy_control/backend.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from abc import abstractmethod
1414
from typing import Any, List, Optional, TypeVar
1515

16+
from nncf.common.engine import Engine
1617
from nncf.common.graph.graph import NNCFGraph
1718
from nncf.common.graph.graph import NNCFNode
1819
from nncf.common.graph.operator_metatypes import OperatorMetatype
@@ -21,6 +22,35 @@
2122
TPModel = TypeVar("TPModel")
2223

2324

25+
class PreparedModel(ABC):
26+
@property
27+
@abstractmethod
28+
def model_for_inference(self) -> TPModel:
29+
"""
30+
Returns prepared model for inference.
31+
32+
:return: Prepared model for inference.
33+
"""
34+
35+
@property
36+
@abstractmethod
37+
def engine(self) -> Engine:
38+
"""
39+
Returns the engine for inference the prepared model.
40+
41+
:return: The engine for inference the prepared model.
42+
"""
43+
44+
def __call__(self, input_data: Any) -> Any:
45+
"""
46+
Runs model on the provided input data and returns the raw model outputs.
47+
48+
:param input_data: inputs for the model
49+
:return: raw model outputs
50+
"""
51+
return self.engine.infer(input_data)
52+
53+
2454
class AccuracyControlAlgoBackend(ABC):
2555
# Metatypes
2656

@@ -158,15 +188,3 @@ def get_model_size(model: TModel) -> int:
158188
:param model: A model
159189
:return: Model size (in bytes)
160190
"""
161-
162-
# Preparation of model
163-
164-
@staticmethod
165-
@abstractmethod
166-
def prepare_for_inference(model: TModel) -> TPModel:
167-
"""
168-
Prepares model for inference.
169-
170-
:param model: A model that should be prepared.
171-
:return: Prepared model for inference.
172-
"""

nncf/quantization/algorithms/accuracy_control/evaluator.py

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,14 @@
1212
from dataclasses import dataclass
1313
from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union
1414

15-
from nncf.common.factory import EngineFactory
1615
from nncf.common.logging import nncf_logger
1716
from nncf.common.utils.backend import BackendType
1817
from nncf.common.utils.backend import get_backend
1918
from nncf.common.utils.timer import timer
2019
from nncf.data.dataset import Dataset
20+
from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel
2121

2222
TModel = TypeVar("TModel")
23-
TPModel = TypeVar("TPModel")
2423
TTensor = TypeVar("TTensor")
2524

2625

@@ -111,7 +110,7 @@ def is_metric_mode(self) -> bool:
111110
"""
112111
return self._metric_mode
113112

114-
def prepare_model_for_inference(self, model: TModel) -> TPModel:
113+
def prepare_model(self, model: TModel) -> PreparedModel:
115114
"""
116115
Prepares model for inference.
117116
@@ -121,21 +120,19 @@ def prepare_model_for_inference(self, model: TModel) -> TPModel:
121120
backend = get_backend(model)
122121

123122
if backend == BackendType.OPENVINO:
124-
import openvino.runtime as ov
123+
from nncf.quantization.algorithms.accuracy_control.openvino_backend import OVPreparedModel
125124

126-
return ov.compile_model(model)
125+
return OVPreparedModel(model)
127126

128-
raise NotImplementedError(
129-
f"The `prepare_model_for_inference()` method is not implemented for the {backend} backend."
130-
)
127+
raise NotImplementedError(f"The `prepare_model()` method is not implemented for the {backend} backend.")
131128

132-
def validate_model_for_inference(
133-
self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
129+
def validate_prepared_model(
130+
self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
134131
):
135132
"""
136133
Validates prepared model for inference.
137134
138-
:param model: Prepared model to validate.
135+
:param prepared_model: Prepared model to validate.
139136
:param dataset: Dataset to validate the model.
140137
:param indices: Zero-based indices of data items that should be selected from
141138
the dataset.
@@ -147,7 +144,7 @@ def validate_model_for_inference(
147144
item.
148145
"""
149146
if self._metric_mode is None:
150-
self._metric_mode = Evaluator.determine_mode(model_for_inference, dataset, self._validation_fn)
147+
self._metric_mode = Evaluator.determine_mode(prepared_model, dataset, self._validation_fn)
151148

152149
if not self.is_metric_mode() and indices is not None:
153150
raise ValueError("The `indices` parameter can be used only if Evaluator.is_metric_mode() = True")
@@ -156,7 +153,7 @@ def validate_model_for_inference(
156153
if self._enable_iteration_count:
157154
validation_dataset = IterationCounter(validation_dataset)
158155

159-
metric, values_for_each_item = self._validation_fn(model_for_inference, validation_dataset)
156+
metric, values_for_each_item = self._validation_fn(prepared_model.model_for_inference, validation_dataset)
160157

161158
self._num_passed_iterations = validation_dataset.num_iterations if self._enable_iteration_count else 0
162159

@@ -189,20 +186,20 @@ def validate(
189186
Otherwise, if the condition is false, it represents list of logits for each
190187
item.
191188
"""
192-
model_for_inference = self.prepare_model_for_inference(model)
193-
return self.validate_model_for_inference(model_for_inference, dataset, indices)
189+
prepared_model = self.prepare_model(model)
190+
return self.validate_prepared_model(prepared_model, dataset, indices)
194191

195192
@staticmethod
196193
def determine_mode(
197-
model_for_inference: TPModel,
194+
prepared_model: PreparedModel,
198195
dataset: Dataset,
199196
validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
200197
) -> bool:
201198
"""
202199
Determines mode based on the type of returned value from the
203200
validation function.
204201
205-
:param model_for_inference: Model to validate.
202+
:param prepared_model: Model to validate.
206203
:param dataset: Dataset to validate the model.
207204
:param validation_fn: Validation function to validate model.
208205
:return: A boolean indicator where `True` means that the `Evaluator` collects
@@ -214,7 +211,7 @@ def determine_mode(
214211
data_item = dataset.get_data([0])
215212

216213
try:
217-
metric_value, values_for_each_item = validation_fn(model_for_inference, data_item)
214+
metric_value, values_for_each_item = validation_fn(prepared_model.model_for_inference, data_item)
218215
except Exception:
219216
metric_mode = False
220217

@@ -261,15 +258,15 @@ def determine_mode(
261258

262259
return metric_mode
263260

264-
def collect_values_for_each_item_using_model_for_inference(
265-
self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
261+
def collect_values_for_each_item_using_prepared_model(
262+
self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
266263
) -> Union[List[float], List[List[TTensor]]]:
267264
"""
268265
Collects value for each item from the dataset using prepared model for inference.
269266
If `is_metric_mode()` returns `True` then i-th value is a metric for i-th data item.
270267
It is an output of the model for i-th data item otherwise.
271268
272-
:param model: Model to infer.
269+
:param prepared_model: Model to infer.
273270
:param dataset: Dataset to collect values.
274271
:param indices: The zero-based indices of data items that should be selected from
275272
the dataset.
@@ -278,15 +275,14 @@ def collect_values_for_each_item_using_model_for_inference(
278275
if self._metric_mode:
279276
# Collect metrics for each item
280277
values_for_each_item = [
281-
self._validation_fn(model_for_inference, [data_item])[0] for data_item in dataset.get_data(indices)
278+
self._validation_fn(prepared_model.model_for_inference, [data_item])[0]
279+
for data_item in dataset.get_data(indices)
282280
]
283281
else:
284282
# Collect outputs for each item
285-
engine = EngineFactory.create(model_for_inference)
286-
287283
values_for_each_item = []
288284
for data_item in dataset.get_inference_data(indices):
289-
logits = engine.infer(data_item)
285+
logits = prepared_model(data_item)
290286
values_for_each_item.append(list(logits.values()))
291287

292288
self._num_passed_iterations = len(values_for_each_item) if self._enable_iteration_count else 0
@@ -307,8 +303,8 @@ def collect_values_for_each_item(
307303
the dataset.
308304
:return: Collected values.
309305
"""
310-
model_for_inference = self.prepare_model_for_inference(model)
311-
return self.collect_values_for_each_item_using_model_for_inference(model_for_inference, dataset, indices)
306+
prepared_model = self.prepare_model(model)
307+
return self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset, indices)
312308

313309
def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: str = "") -> MetricResults:
314310
"""
@@ -322,18 +318,16 @@ def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: st
322318
nncf_logger.info(f"Validation of {model_name} model was started")
323319

324320
with timer() as preparation_time:
325-
model_for_inference = self.prepare_model_for_inference(model)
321+
prepared_model = self.prepare_model(model)
326322

327323
with timer() as validation_time:
328-
metric, values_for_each_item = self.validate_model_for_inference(model_for_inference, dataset)
324+
metric, values_for_each_item = self.validate_prepared_model(prepared_model, dataset)
329325

330326
nncf_logger.info(f"Metric of {model_name} model: {metric}")
331327

332328
if values_for_each_item is None:
333329
nncf_logger.info(f"Collecting values for each data item using the {model_name} model")
334330
with timer():
335-
values_for_each_item = self.collect_values_for_each_item_using_model_for_inference(
336-
model_for_inference, dataset
337-
)
331+
values_for_each_item = self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset)
338332

339333
return MetricResults(metric, values_for_each_item, preparation_time(), validation_time())

0 commit comments

Comments
 (0)