snnn
diff --git a/‎cmake/onnxruntime_python.cmake
+27 b/‎cmake/onnxruntime_python.cmake
+27
diff --git a/‎dockerfiles/Dockerfile.training
+3 b/‎dockerfiles/Dockerfile.training
+3
diff --git a/‎onnxruntime/__init__.py
+12 b/‎onnxruntime/__init__.py
+12
diff --git a/‎orttraining/orttraining/python/experimental/__init__.py
+10 b/‎orttraining/orttraining/python/experimental/__init__.py
+10
diff --git a/‎orttraining/orttraining/python/experimental/_utils.py
+179 b/‎orttraining/orttraining/python/experimental/_utils.py
+179
diff --git a/‎orttraining/orttraining/python/experimental/amp/__init__.py
+1 b/‎orttraining/orttraining/python/experimental/amp/__init__.py
+1
diff --git a/‎orttraining/orttraining/python/experimental/amp/loss_scaler.py
+114 b/‎orttraining/orttraining/python/experimental/amp/loss_scaler.py
+114
@@ -173,6 +173,15 @@ if (onnxruntime_ENABLE_TRAINING)
   file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
     "${ORTTRAINING_SOURCE_DIR}/python/training/*.py"
   )
+  file(GLOB onnxruntime_python_root_srcs CONFIGURE_DEPENDS
+    "${ORTTRAINING_SOURCE_DIR}/python/experimental/*.py"
+  )
+  file(GLOB onnxruntime_python_amp_srcs CONFIGURE_DEPENDS
+    "${ORTTRAINING_SOURCE_DIR}/python/experimental/amp/*.py"
+  )
+  file(GLOB onnxruntime_python_optim_srcs CONFIGURE_DEPENDS
+    "${ORTTRAINING_SOURCE_DIR}/python/experimental/optim/*.py"
+  )
 else()
   file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
     "${ONNXRUNTIME_ROOT}/python/training/*.py"
@@ -260,6 +269,24 @@ add_custom_command(
       $<TARGET_FILE_DIR:${test_data_target}>
 )
 
+if (onnxruntime_ENABLE_TRAINING)
+  add_custom_command(
+    TARGET onnxruntime_pybind11_state POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/experimental
+    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/experimental/amp
+    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/experimental/optim
+    COMMAND ${CMAKE_COMMAND} -E copy
+        ${onnxruntime_python_root_srcs}
+        $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/experimental/
+    COMMAND ${CMAKE_COMMAND} -E copy
+        ${onnxruntime_python_amp_srcs}
+        $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/experimental/amp/
+    COMMAND ${CMAKE_COMMAND} -E copy
+        ${onnxruntime_python_optim_srcs}
+        $<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/experimental/optim/
+  )
+endif()
+
 if (onnxruntime_USE_DNNL)
   add_custom_command(
     TARGET onnxruntime_pybind11_state POST_BUILD
 
@@ -47,6 +47,9 @@ RUN conda install -y \
     pip install \
         onnx=="${ONNX_VERSION}"
 
+# install cerberus for the new pytorch front-end
+RUN pip install cerberus
+
 # build ucx suite
 # note: openmpi will not select ucx without multithreading enabled
 ARG UCX_VERSION
 
@@ -13,9 +13,21 @@
 from onnxruntime.capi._pybind_state import get_all_providers, get_available_providers, get_device, set_seed, \
     RunOptions, SessionOptions, set_default_logger_severity, NodeArg, ModelMetadata, GraphOptimizationLevel, \
     ExecutionMode, OrtDevice, SessionIOBinding
+
+try:
+    from onnxruntime.capi._pybind_state import set_cuda_mem_limit, set_cuda_device_id
+except ImportError:
+    pass
+
 from onnxruntime.capi.session import InferenceSession, IOBinding
 from onnxruntime.capi import onnxruntime_validation
 
 from onnxruntime.capi.training import *  # noqa: F403
 
+# TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end
+try:
+    from . import experimental
+except ImportError:
+    pass
+
 onnxruntime_validation.check_distro_info()
@@ -0,0 +1,10 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+from onnxruntime.capi._pybind_state import TrainingParameters
+from onnxruntime.capi.training.training_session import TrainingSession
+
+from .orttrainer_options import ORTTrainerOptions
+from .orttrainer import ORTTrainer, TrainStepInfo
+from . import amp, optim, model_desc_validation
@@ -0,0 +1,179 @@
+import importlib.util
+import numpy as np
+import os
+import sys
+import torch
+
+
+def get_device_index(device):
+    '''Returns device index from a device'''
+
+    if type(device) == str:
+        # Could be 'cuda:0', 'cuda:1', or 'cpu'. with cpu, set index=0
+        device = torch.device(device)
+    return 0 if device.index is None else device.index
+
+
+def get_device_index_from_input(input):
+    '''Returns device index from a input PyTorch Tensor'''
+
+    if isinstance(input, (list, tuple)):
+        device_index = get_device_index(input[0].device)
+    else:
+        device_index = get_device_index(input.device)
+    return device_index
+
+
+def get_all_gradients_finite_name_from_session(session):
+    '''Find all_gradients_finite node on Session graph and return its name'''
+
+    nodes = [x for x in session._outputs_meta if 'all_gradients_finite' in x.name]
+    if len(nodes) != 1:
+        raise RuntimeError("'all_gradients_finite' node not found within training session")
+    return nodes[0].name
+
+
+def get_gradient_accumulation_name_from_session(session):
+    '''Find Group_Accumulated_Gradients node on Session graph and return its name'''
+
+    nodes = [x for x in session._outputs_meta if 'Group_Accumulated_Gradients' in x.name]
+    if len(nodes) != 1:
+        raise RuntimeError("'Group_Accumulated_Gradients' node not found within training session")
+    return nodes[0].name
+
+
+def dtype_torch_to_numpy(torch_dtype):
+    '''Converts PyTorch types to Numpy types
+
+    Also must map to types accepted by:
+        MLDataType NumpyTypeToOnnxRuntimeType(int numpy_type)
+
+    References:
+        https://docs.scipy.org/doc/numpy-1.13.0/user/basics.types.html
+        https://pytorch.org/docs/stable/tensors.html
+    '''
+    if torch_dtype == torch.float64 or torch_dtype == torch.double:
+        return np.float64
+    elif torch_dtype == torch.float32 or torch_dtype == torch.float:
+        return np.float32
+    elif torch_dtype == torch.float16 or torch_dtype == torch.half or torch_dtype == torch.bfloat16:
+        # NOTE: numpy doesn't support bfloat16
+        return np.float16
+    elif torch_dtype == torch.int64 or torch_dtype == torch.long:
+        return np.longlong # np.int64 doesn't work!?
+    elif torch_dtype == torch.int32 or torch_dtype == torch.int:
+        return np.int32
+    elif torch_dtype == torch.int16 or torch_dtype == torch.short:
+        return np.int16
+    elif torch_dtype == torch.int8:
+        return np.int8
+    elif torch_dtype == torch.uint8:
+        return np.uint8
+    elif torch_dtype == torch.complex32 or torch_dtype == torch.complex64:
+        # NOTE: numpy doesn't support complex32
+        return np.complex64
+    elif torch_dtype == torch.complex128 or torch_dtype == torch.cdouble:
+        return np.complex128
+    elif torch_dtype == torch.bool:
+        return np.bool_
+    else:
+        raise ValueError(
+            f'torch_dtype ({str(torch_dtype)}) type is not supported by Numpy')
+
+
+def dtype_onnx_to_torch(onnx_type):
+    '''Converts ONNX types to PyTorch types
+
+    Reference: https://github.com/onnx/onnx/blob/master/onnx/onnx.in.proto (enum DataType)
+               https://pytorch.org/docs/stable/tensors.html
+    '''
+    onnx_types = ['UNDEFINED', 'FLOAT', 'UINT8', 'INT8', 'UINT16', 'INT16', 'INT32', 'INT64', 'STRING',
+                  'BOOL', 'FLOAT16', 'DOUBLE', 'UINT32', 'UINT64', 'COMPLEX64', 'COMPLEX128', 'BFLOAT16']
+
+    if isinstance(onnx_type, int):
+        assert onnx_type < len(onnx_types), "Invalid onnx_type integer"
+    elif isinstance(onnx_type, str):
+        onnx_type = onnx_type.upper()
+        assert onnx_type in onnx_types, "Invalid onnx_type string"
+        onnx_type = onnx_types.index(onnx_type)
+    else:
+        raise ValueError(
+            "'onnx_type' must be an ONNX type represented by either a string or integer")
+
+    if onnx_type == 0:
+        return None
+    elif onnx_type == 1:
+        return torch.float
+    elif onnx_type >= 2 and onnx_type <= 3:
+        # NOTE: Pytorch doesn't support uint8
+        return torch.int8
+    elif onnx_type >= 4 and onnx_type <= 5:
+        # NOTE: Pytorch doesn't support int16
+        return torch.int16
+    elif onnx_type == 6 or onnx_type == 12:
+        # NOTE: Pytorch doesn't support uint32
+        return torch.int32
+    elif onnx_type == 7 or onnx_type == 13:
+        # NOTE: Pytorch doesn't support uint64
+        return torch.int64
+    elif onnx_type == 8:
+        return str
+    elif onnx_type == 9:
+        return torch.bool
+    elif onnx_type == 10:
+        return torch.float16
+    elif onnx_type == 11:
+        return torch.double
+    elif onnx_type == 14:
+        return torch.complex64
+    elif onnx_type == 15:
+        return torch.complex128
+    elif onnx_type == 16:
+        return torch.bfloat
+
+
+def static_vars(**kwargs):
+    r'''Decorator to add :py:attr:`kwargs` as static vars to 'func'
+
+        Example:
+
+            .. code-block:: python
+
+                >>> @static_vars(counter=0)
+                ... def myfync():
+                ...     myfync.counter += 1
+                ...     return myfync.counter
+                ...
+                >>> print(myfunc())
+                1
+                >>> print(myfunc())
+                2
+                >>> print(myfunc())
+                3
+                >>> myfunc.counter = 100
+                >>> print(myfunc())
+                101
+    '''
+    def decorate(func):
+        for k in kwargs:
+            setattr(func, k, kwargs[k])
+        return func
+    return decorate
+
+
+def import_module_from_file(file_path, module_name=None):
+    '''Import a Python module from a file into interpreter'''
+
+    assert isinstance(file_path, str) and os.path.exists(file_path),\
+        "'file_path' must be a full path string with the python file to load"
+    assert module_name is None or isinstance(module_name, str) and module_name,\
+        "'module_name' must be a string with the python module name to load"
+
+    if not module_name:
+        module_name = os.path.basename(file_path).split('.')[0]
+
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)
+    return module
@@ -0,0 +1 @@
+from .loss_scaler import LossScaler, DynamicLossScaler
@@ -0,0 +1,114 @@
+class LossScaler(object):
+    r"""Base class for implementing custom loss scaler strategies
+
+    Once the scaler is configured, no user intervention is needed to update loss scale during training.
+
+    Note:
+        This class should never be instantiated, but used as an abstract class for custom loss scaling strategy.
+    """
+
+    def __init__(self, loss_scale):
+        self._input_name = None
+        self._loss_scale = loss_scale
+
+    @property
+    def input_name(self):
+        return self._input_name
+
+    @input_name.setter
+    def input_name(self, input_name):
+        assert isinstance(input_name, str), "'input_name' must be a string"
+        assert input_name is None or len(input_name) > 0, "'input_name' cannot be empty"
+        self._input_name = input_name
+
+    @property
+    def loss_scale(self):
+        return self._loss_scale
+
+    @loss_scale.setter
+    def loss_scale(self, loss_scale):
+        assert isinstance(loss_scale, float) and loss_scale > 0, "'loss_scale' must be a positive float"
+        self._loss_scale = loss_scale
+
+    def reset(self):
+        r"""Resets loss scaler internal state"""
+        raise NotImplementedError
+
+    def update(self, train_step_info):
+        r"""Updates loss based on user input and training session info
+
+        Args:
+            train_step_info (TrainStepInfo): last step state information
+
+        Returns:
+            Updated loss scale (float)
+        """
+        raise NotImplementedError
+
+
+class DynamicLossScaler(LossScaler):
+    r"""Default implementation for :py:class:`.LossScaler` class used for mixed precision
+
+    This loss scaler works by assuming an initial scale, which is doubled every time a certain number of
+    (stable) training steps are performed without exploding gradients (overflow or reach infinity).
+    When at least one of the gradients explode, loss scale is divided by 2.
+
+    Users can use this class in two ways:
+
+        1. Enable mixed precision and not setting a loss scaler class. Default values are used
+        2. Enable mixed precision and instantiate this class to override default arguments
+
+    Static loss scaling can be achieved by setting :py:attr:`.automatic_update` to :py:obj:`False`
+    and not performing manual :py:meth:`update` in train loop.
+
+    Args:
+        automatic_update (bool, default is False): boolean switch that allows :py:meth:`ORTTrainer.train_step`
+            to automatically perform loss scaling. If False, an explicit call to :py:meth:`.update` must be done by the user,
+            otherwise static loss scaling is performed.
+        loss_scale (default is 1 << 16): A float that represents current loss scale
+        up_scale_window (int, default is 2000): number of stable train steps before doubling loss scale
+        min_loss_scale (float, default is 1): min value for the loss scale. Used when loss scale is decreased
+        max_loss_scale (float, default is 1 << 24): max value for the loss scale. Used when loss scale is increased
+
+    Example with default values:
+        .. code-block:: python
+
+            scaler1 = amp.DynamicLossScaler()
+            print(f'Default loss scale is {scaler1.loss_scale}')
+
+    Example with user specified values:
+        .. code-block:: python
+
+            scaler2 = amp.DynamicLossScaler(loss_scale=1<<8)
+            print(f'Custom loss scale is {scaler2.loss_scale}')
+    """
+
+    def __init__(self, automatic_update=True,
+                 loss_scale=float(1 << 16),
+                 up_scale_window=2000,
+                 min_loss_scale=1.0,
+                 max_loss_scale=float(1 << 24)):
+        super().__init__(loss_scale)
+        self.automatic_update = automatic_update
+        self.up_scale_window = up_scale_window
+        self.min_loss_scale = min_loss_scale
+        self.max_loss_scale = max_loss_scale
+
+        self._initial_loss_scale = loss_scale
+        self._stable_steps_count = 0
+
+    def reset(self):
+        self.loss_scale = self._initial_loss_scale
+        self._stable_steps_count = 0
+
+    def update(self, train_step_info):
+        if train_step_info.all_finite:
+            self._stable_steps_count += 1
+
+            if self._stable_steps_count >= self.up_scale_window:
+                self.loss_scale = min(self.max_loss_scale, self.loss_scale * 2)
+                self._stable_steps_count = 0
+        else:
+            self.loss_scale = max(self.min_loss_scale, self.loss_scale / 2)
+            self._stable_steps_count = 0
+        return self.loss_scale
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .loss_scaler import LossScaler, DynamicLossScaler`