Skip to content

Commit cb37c7e

Browse files
authored
Remove ISchematized interface from the codebase. (#1759)
* Remove ISchematized interface from the codebase. 1. Remove ISchematized 2. For any class that requires a Schema, we add an Schema as its field 3. Rename Schema to OutputSchema in IRowToRowMapper * Address comments * Clean redundant Schema field and point Schema to OutputSchema * Replace Schema in SingleValueRowMapper with OutputSchema
1 parent be115f4 commit cb37c7e

File tree

52 files changed

+238
-182
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+238
-182
lines changed

src/Microsoft.ML.Api/PredictionEngine.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ private protected PredictionEngineBase(IHostEnvironment env, ITransformer transf
196196
internal virtual void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow<TSrc> inputRow, IRowToRowMapper mapper, bool ignoreMissingColumns,
197197
SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs<TDst> outputRow)
198198
{
199-
var cursorable = TypedCursorable<TDst>.Create(env, new EmptyDataView(env, mapper.Schema), ignoreMissingColumns, outputSchemaDefinition);
199+
var cursorable = TypedCursorable<TDst>.Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition);
200200
var outputRowLocal = mapper.GetRow(_inputRow, col => true, out disposer);
201201
outputRow = cursorable.GetRow(outputRowLocal);
202202
}

src/Microsoft.ML.Api/StatefulFilterTransform.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform<TS
9898

9999
public bool CanShuffle { get { return false; } }
100100

101-
public Schema Schema => _bindings.Schema;
101+
Schema IDataView.Schema => OutputSchema;
102+
103+
public Schema OutputSchema => _bindings.Schema;
102104

103105
public long? GetRowCount()
104106
{

src/Microsoft.ML.Core/Data/IDataView.cs

+14-13
Original file line numberDiff line numberDiff line change
@@ -60,22 +60,11 @@ public interface ISchema
6060
void GetMetadata<TValue>(string kind, int col, ref TValue value);
6161
}
6262

63-
/// <summary>
64-
/// Base interface for schematized information. IDataView and IRowCursor both derive from this.
65-
/// </summary>
66-
public interface ISchematized
67-
{
68-
/// <summary>
69-
/// Gets an instance of Schema.
70-
/// </summary>
71-
Schema Schema { get; }
72-
}
73-
7463
/// <summary>
7564
/// The input and output of Query Operators (Transforms). This is the fundamental data pipeline
7665
/// type, comparable to IEnumerable for LINQ.
7766
/// </summary>
78-
public interface IDataView : ISchematized
67+
public interface IDataView
7968
{
8069
/// <summary>
8170
/// Whether this IDataView supports shuffling of rows, to any degree.
@@ -124,6 +113,11 @@ public interface IDataView : ISchematized
124113
/// <returns></returns>
125114
IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator,
126115
Func<int, bool> needCol, int n, Random rand = null);
116+
117+
/// <summary>
118+
/// Gets an instance of Schema.
119+
/// </summary>
120+
Schema Schema { get; }
127121
}
128122

129123
/// <summary>
@@ -148,7 +142,7 @@ public interface IRowCursorConsolidator
148142
/// A logical row. May be a row of an IDataView or a stand-alone row. If/when its contents
149143
/// change, its ICounted.Counter value is incremented.
150144
/// </summary>
151-
public interface IRow : ISchematized, ICounted
145+
public interface IRow : ICounted
152146
{
153147
/// <summary>
154148
/// Returns whether the given column is active in this row.
@@ -161,6 +155,13 @@ public interface IRow : ISchematized, ICounted
161155
/// <typeparamref name="TValue"/> differs from this column's type.
162156
/// </summary>
163157
ValueGetter<TValue> GetGetter<TValue>(int col);
158+
159+
/// <summary>
160+
/// Gets a <see cref="Schema"/>, which provides name and type information for variables
161+
/// (i.e., columns in ML.NET's type system) stored in this row.
162+
/// </summary>
163+
Schema Schema { get; }
164+
164165
}
165166

166167
/// <summary>

src/Microsoft.ML.Core/Data/ISchemaBindableMapper.cs

+20-5
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,18 @@ public interface ISchemaBindableMapper
3030
/// This interface is used to map a schema from input columns to output columns. The <see cref="ISchemaBoundMapper"/> should keep track
3131
/// of the input columns that are needed for the mapping.
3232
/// </summary>
33-
public interface ISchemaBoundMapper : ISchematized
33+
public interface ISchemaBoundMapper
3434
{
3535
/// <summary>
3636
/// The <see cref="RoleMappedSchema"/> that was passed to the <see cref="ISchemaBoundMapper"/> in the binding process.
3737
/// </summary>
3838
RoleMappedSchema InputRoleMappedSchema { get; }
3939

40+
/// <summary>
41+
/// Gets schema of this mapper's output.
42+
/// </summary>
43+
Schema OutputSchema { get; }
44+
4045
/// <summary>
4146
/// A property to get back the <see cref="ISchemaBindableMapper"/> that produced this <see cref="ISchemaBoundMapper"/>.
4247
/// </summary>
@@ -53,6 +58,11 @@ public interface ISchemaBoundMapper : ISchematized
5358
/// </summary>
5459
public interface ISchemaBoundRowMapper : ISchemaBoundMapper, IRowToRowMapper
5560
{
61+
/// <summary>
62+
/// There are two schemas from <see cref="ISchemaBoundMapper"/> and <see cref="IRowToRowMapper"/>.
63+
/// Since the two parent schema's are identical in all derived classes, we merge them into <see cref="OutputSchema"/>.
64+
/// </summary>
65+
new Schema OutputSchema { get; }
5666
}
5767

5868
/// <summary>
@@ -61,15 +71,20 @@ public interface ISchemaBoundRowMapper : ISchemaBoundMapper, IRowToRowMapper
6171
/// return a subset of the input columns.
6272
/// This interface is similar to <see cref="ISchemaBoundRowMapper"/>, except it does not have any input role mappings,
6373
/// so to rebind, the same input column names must be used.
64-
/// Implementing of this object are typically created using a definie input <see cref="ISchema"/>.
74+
/// Implementations of this interface are typically created over defined input <see cref="Schema"/>.
6575
/// </summary>
66-
public interface IRowToRowMapper : ISchematized
76+
public interface IRowToRowMapper
6777
{
6878
/// <summary>
6979
/// Mappers are defined as accepting inputs with this very specific schema.
7080
/// </summary>
7181
Schema InputSchema { get; }
7282

83+
/// <summary>
84+
/// Gets an instance of <see cref="Schema"/> which describes the columns' names and types in the output generated by this mapper.
85+
/// </summary>
86+
Schema OutputSchema { get; }
87+
7388
/// <summary>
7489
/// Given a predicate specifying which columns are needed, return a predicate indicating which input columns are
7590
/// needed. The domain of the function is defined over the indices of the columns of <see cref="ISchema.ColumnCount"/>
@@ -82,9 +97,9 @@ public interface IRowToRowMapper : ISchematized
8297
/// The active columns are those for which <paramref name="active"/> returns true. Getting values on inactive
8398
/// columns of the returned row will throw. Null predicates are disallowed.
8499
///
85-
/// The <see cref="ISchematized.Schema"/> of <paramref name="input"/> should be the same object as
100+
/// The <see cref="IRow.Schema"/> of <paramref name="input"/> should be the same object as
86101
/// <see cref="InputSchema"/>. Implementors of this method should throw if that is not the case. Conversely,
87-
/// the returned value must have the same schema as <see cref="ISchematized.Schema"/>.
102+
/// the returned value must have the same schema as <see cref="OutputSchema"/>.
88103
///
89104
/// This method creates a live connection between the input <see cref="IRow"/> and the output <see
90105
/// cref="IRow"/>. In particular, when the getters of the output <see cref="IRow"/> are invoked, they invoke the

src/Microsoft.ML.Core/Data/RoleMappedSchema.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ public RoleMappedSchema(Schema schema, string label, string feature,
467467
/// <summary>
468468
/// Encapsulates an <see cref="IDataView"/> plus a corresponding <see cref="RoleMappedSchema"/>.
469469
/// Note that the schema of <see cref="RoleMappedSchema.Schema"/> of <see cref="Schema"/> is
470-
/// guaranteed to equal the the <see cref="ISchematized.Schema"/> of <see cref="Data"/>.
470+
/// guaranteed to equal the the <see cref="IDataView.Schema"/> of <see cref="Data"/>.
471471
/// </summary>
472472
public sealed class RoleMappedData
473473
{
@@ -478,7 +478,7 @@ public sealed class RoleMappedData
478478

479479
/// <summary>
480480
/// The role mapped schema. Note that <see cref="Schema"/>'s <see cref="RoleMappedSchema.Schema"/> is
481-
/// guaranteed to be the same as <see cref="Data"/>'s <see cref="ISchematized.Schema"/>.
481+
/// guaranteed to be the same as <see cref="Data"/>'s <see cref="IDataView.Schema"/>.
482482
/// </summary>
483483
public RoleMappedSchema Schema { get; }
484484

src/Microsoft.ML.Core/Data/Schema.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
namespace Microsoft.ML.Data
1515
{
1616
/// <summary>
17-
/// This class represents the schema of an <see cref="ISchematized"/> object (like an <see cref="IDataView"/> or an <see cref="IRow"/>).
17+
/// This class represents the <see cref="Schema"/> of an object like, for interstance, an <see cref="IDataView"/> or an <see cref="IRow"/>.
1818
/// On the high level, the schema is a collection of 'columns'. Each column has the following properties:
1919
/// - Column name.
2020
/// - Column type.

src/Microsoft.ML.Data/Commands/ScoreCommand.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,8 @@ public static TScorerFactory GetScorerComponent(
306306

307307
ComponentCatalog.LoadableClassInfo info = null;
308308
ReadOnlyMemory<char> scoreKind = default;
309-
if (mapper.Schema.Count > 0 &&
310-
mapper.Schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreColumnKind, 0, ref scoreKind) &&
309+
if (mapper.OutputSchema.Count > 0 &&
310+
mapper.OutputSchema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreColumnKind, 0, ref scoreKind) &&
311311
!scoreKind.IsEmpty)
312312
{
313313
var loadName = scoreKind.ToString();

src/Microsoft.ML.Data/Data/IRowSeekable.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using System;
67

78
namespace Microsoft.ML.Runtime.Data
@@ -11,9 +12,11 @@ namespace Microsoft.ML.Runtime.Data
1112
/// <summary>
1213
/// Represents a data view that supports random access to a specific row.
1314
/// </summary>
14-
public interface IRowSeekable : ISchematized
15+
public interface IRowSeekable
1516
{
1617
IRowSeeker GetSeeker(Func<int, bool> predicate);
18+
19+
Schema Schema { get; }
1720
}
1821

1922
/// <summary>

src/Microsoft.ML.Data/Data/ITransposeDataView.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public interface ITransposeDataView : IDataView
2828
/// <summary>
2929
/// An enhanced schema, containing information on the transposition properties, if any,
3030
/// of each column. Note that there is no contract or suggestion that this property
31-
/// should be equal to <see cref="ISchematized.Schema"/>.
31+
/// should be equal to <see cref="IDataView.Schema"/>.
3232
/// </summary>
3333
ITransposeSchema TransposeSchema { get; }
3434

src/Microsoft.ML.Data/DataLoadSave/TransformerChain.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ public IRowToRowMapper GetRowToRowMapper(Schema inputSchema)
227227
for (int i = 0; i < mappers.Length; ++i)
228228
{
229229
mappers[i] = _transformers[i].GetRowToRowMapper(schema);
230-
schema = mappers[i].Schema;
230+
schema = mappers[i].OutputSchema;
231231
}
232232
return new CompositeRowToRowMapper(inputSchema, mappers);
233233
}

src/Microsoft.ML.Data/DataView/CompositeRowToRowMapper.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public sealed class CompositeRowToRowMapper : IRowToRowMapper
1818
private static readonly IRowToRowMapper[] _empty = new IRowToRowMapper[0];
1919

2020
public Schema InputSchema { get; }
21-
public Schema Schema { get; }
21+
public Schema OutputSchema { get; }
2222

2323
/// <summary>
2424
/// Out of a series of mappers, construct a seemingly unitary mapper that is able to apply them in sequence.
@@ -32,7 +32,7 @@ public CompositeRowToRowMapper(Schema inputSchema, IRowToRowMapper[] mappers)
3232
Contracts.CheckValueOrNull(mappers);
3333
InnerMappers = Utils.Size(mappers) > 0 ? mappers : _empty;
3434
InputSchema = inputSchema;
35-
Schema = Utils.Size(mappers) > 0 ? mappers[mappers.Length - 1].Schema : inputSchema;
35+
OutputSchema = Utils.Size(mappers) > 0 ? mappers[mappers.Length - 1].OutputSchema : inputSchema;
3636
}
3737

3838
public Func<int, bool> GetDependencies(Func<int, bool> predicate)

src/Microsoft.ML.Data/DataView/LambdaFilter.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ private sealed class RowCursor : LinkedRowFilterCursorBase
154154
private T1 _src;
155155

156156
public RowCursor(Impl<T1, T2> parent, IRowCursor input, bool[] active)
157-
: base(parent.Host, input, parent.Schema, active)
157+
: base(parent.Host, input, parent.OutputSchema, active)
158158
{
159159
_getSrc = Input.GetGetter<T1>(parent._colSrc);
160160
if (parent._conv == null)

src/Microsoft.ML.Data/DataView/RowToRowMapperTransform.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ private static VersionInfo GetVersionInfo()
7575
loaderAssemblyName: typeof(RowToRowMapperTransform).Assembly.FullName);
7676
}
7777

78-
public override Schema Schema => _bindings.Schema;
78+
public override Schema OutputSchema => _bindings.Schema;
7979

8080
bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => _mapper is ICanSaveOnnx onnxMapper ? onnxMapper.CanSaveOnnx(ctx) : false;
8181

@@ -233,7 +233,7 @@ public Func<int, bool> GetDependencies(Func<int, bool> predicate)
233233
return predicateInput;
234234
}
235235

236-
Schema IRowToRowMapper.InputSchema => Source.Schema;
236+
public Schema InputSchema => Source.Schema;
237237

238238
public IRow GetRow(IRow input, Func<int, bool> active, out Action disposer)
239239
{
@@ -245,13 +245,13 @@ public IRow GetRow(IRow input, Func<int, bool> active, out Action disposer)
245245
using (var ch = Host.Start("GetEntireRow"))
246246
{
247247
Action disp;
248-
var activeArr = new bool[Schema.ColumnCount];
249-
for (int i = 0; i < Schema.ColumnCount; i++)
248+
var activeArr = new bool[OutputSchema.ColumnCount];
249+
for (int i = 0; i < OutputSchema.ColumnCount; i++)
250250
activeArr[i] = active(i);
251251
var pred = GetActiveOutputColumns(activeArr);
252252
var getters = _mapper.CreateGetters(input, pred, out disp);
253253
disposer += disp;
254-
return new Row(input, this, Schema, getters);
254+
return new Row(input, this, OutputSchema, getters);
255255
}
256256
}
257257

src/Microsoft.ML.Data/Dirty/ChooseColumnsByIndexTransform.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ public override void Save(ModelSaveContext ctx)
245245
_bindings.Save(ctx);
246246
}
247247

248-
public override Schema Schema => _bindings.AsSchema;
248+
public override Schema OutputSchema => _bindings.AsSchema;
249249

250250
protected override bool? ShouldUseParallelCursors(Func<int, bool> predicate)
251251
{

src/Microsoft.ML.Data/EntryPoints/TransformModel.cs

+2
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ private sealed class CompositeRowToRowMapper : IRowToRowMapper
193193

194194
public Schema Schema => _chain.Schema;
195195

196+
public Schema OutputSchema => Schema;
197+
196198
public CompositeRowToRowMapper(IExceptionContext ectx, IDataView chain, ISchema rootSchema)
197199
{
198200
Contracts.CheckValue(ectx, nameof(ectx));

src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs

+10-1
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,16 @@ private static VersionInfo GetVersionInfo()
566566

567567
public bool CanShuffle { get { return _transform.CanShuffle; } }
568568

569-
public Schema Schema => _transform.Schema;
569+
/// <summary>
570+
/// Explicit implementation prevents Schema from being accessed from derived classes.
571+
/// It's our first step to separate data produced by transform from transform.
572+
/// </summary>
573+
Schema IDataView.Schema => OutputSchema;
574+
575+
/// <summary>
576+
/// Shape information of the produced output. Note that the input and the output of this transform (and their types) are identical.
577+
/// </summary>
578+
public Schema OutputSchema => _transform.OutputSchema;
570579

571580
public RankerPerInstanceTransform(IHostEnvironment env, IDataView input, string labelCol, string scoreCol, string groupCol,
572581
int truncationLevel, Double[] labelGains)

src/Microsoft.ML.Data/Prediction/Calibrator.cs

+12-12
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ private sealed class Bound : ISchemaBoundRowMapper
528528
public ISchemaBindableMapper Bindable => _parent;
529529
public RoleMappedSchema InputRoleMappedSchema => _predictor.InputRoleMappedSchema;
530530
public Schema InputSchema => _predictor.InputSchema;
531-
public Schema Schema { get; }
531+
public Schema OutputSchema { get; }
532532

533533
public Bound(IHostEnvironment env, SchemaBindableCalibratedPredictor parent, RoleMappedSchema schema)
534534
{
@@ -537,16 +537,16 @@ public Bound(IHostEnvironment env, SchemaBindableCalibratedPredictor parent, Rol
537537
_parent = parent;
538538
_predictor = _parent._bindable.Bind(env, schema) as ISchemaBoundRowMapper;
539539
env.Check(_predictor != null, "Predictor is not a row-to-row mapper");
540-
if (!_predictor.Schema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out _scoreCol))
540+
if (!_predictor.OutputSchema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out _scoreCol))
541541
throw env.Except("Predictor does not output a score");
542-
var scoreType = _predictor.Schema.GetColumnType(_scoreCol);
542+
var scoreType = _predictor.OutputSchema.GetColumnType(_scoreCol);
543543
env.Check(!scoreType.IsVector && scoreType.IsNumber);
544-
Schema = Schema.Create(new BinaryClassifierSchema());
544+
OutputSchema = Schema.Create(new BinaryClassifierSchema());
545545
}
546546

547547
public Func<int, bool> GetDependencies(Func<int, bool> predicate)
548548
{
549-
for (int i = 0; i < Schema.ColumnCount; i++)
549+
for (int i = 0; i < OutputSchema.ColumnCount; i++)
550550
{
551551
if (predicate(i))
552552
return _predictor.GetDependencies(col => true);
@@ -562,7 +562,7 @@ public Func<int, bool> GetDependencies(Func<int, bool> predicate)
562562
public IRow GetRow(IRow input, Func<int, bool> predicate, out Action disposer)
563563
{
564564
Func<int, bool> predictorPredicate = col => false;
565-
for (int i = 0; i < Schema.ColumnCount; i++)
565+
for (int i = 0; i < OutputSchema.ColumnCount; i++)
566566
{
567567
if (predicate(i))
568568
{
@@ -571,17 +571,17 @@ public IRow GetRow(IRow input, Func<int, bool> predicate, out Action disposer)
571571
}
572572
}
573573
var predictorRow = _predictor.GetRow(input, predictorPredicate, out disposer);
574-
var getters = new Delegate[Schema.ColumnCount];
575-
for (int i = 0; i < Schema.ColumnCount - 1; i++)
574+
var getters = new Delegate[OutputSchema.ColumnCount];
575+
for (int i = 0; i < OutputSchema.ColumnCount - 1; i++)
576576
{
577577
var type = predictorRow.Schema.GetColumnType(i);
578578
if (!predicate(i))
579579
continue;
580580
getters[i] = Utils.MarshalInvoke(GetPredictorGetter<int>, type.RawType, predictorRow, i);
581581
}
582-
if (predicate(Schema.ColumnCount - 1))
583-
getters[Schema.ColumnCount - 1] = GetProbGetter(predictorRow);
584-
return new SimpleRow(Schema, predictorRow, getters);
582+
if (predicate(OutputSchema.ColumnCount - 1))
583+
getters[OutputSchema.ColumnCount - 1] = GetProbGetter(predictorRow);
584+
return new SimpleRow(OutputSchema, predictorRow, getters);
585585
}
586586

587587
private Delegate GetPredictorGetter<T>(IRow input, int col)
@@ -728,7 +728,7 @@ private static bool NeedCalibration(IHostEnvironment env, IChannel ch, ICalibrat
728728

729729
var bindable = ScoreUtils.GetSchemaBindableMapper(env, predictor);
730730
var bound = bindable.Bind(env, schema);
731-
var outputSchema = bound.Schema;
731+
var outputSchema = bound.OutputSchema;
732732
int scoreCol;
733733
if (!outputSchema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out scoreCol))
734734
{

0 commit comments

Comments
 (0)