Skip to content

Commit a0edc5c

Browse files
authored
Polish GetColumn on IDataView (#2738)
* Remove IHostEnviroment from GetColumn's argument list * Use Schema.Column instead of string to identify which column to load * Add overload over string
1 parent b0baf12 commit a0edc5c

25 files changed

+174
-112
lines changed

docs/code/MlNetCookBook.md

+9-9
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,8 @@ var someRows = mlContext
303303
// This will give the entire dataset: make sure to only take several row
304304
// in case the dataset is huge. The is similar to the static API, except
305305
// you have to specify the column name and type.
306-
var featureColumns = transformedData.GetColumn<string[]>(mlContext, "AllFeatures")
307-
.Take(20).ToArray();
306+
var featureColumns = transformedData.GetColumn<string[]>(transformedData.Schema["AllFeatures"])
307+
308308
```
309309
## How do I train a regression model?
310310

@@ -637,7 +637,7 @@ var pipeline =
637637
var normalizedData = pipeline.Fit(trainData).Transform(trainData);
638638

639639
// Inspect one column of the resulting dataset.
640-
var meanVarValues = normalizedData.GetColumn<float[]>(mlContext, "MeanVarNormalized").ToArray();
640+
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MeanVarNormalized"]).ToArray();
641641
```
642642

643643
## How do I train my model on categorical data?
@@ -682,8 +682,8 @@ var loader = mlContext.Data.CreateTextLoader(new[]
682682
// Load the data.
683683
var data = loader.Load(dataPath);
684684

685-
// Inspect the first 10 records of the categorical columns to check that they are correctly load.
686-
var catColumns = data.GetColumn<string[]>(mlContext, "CategoricalFeatures").Take(10).ToArray();
685+
// Inspect the first 10 records of the categorical columns to check that they are correctly read.
686+
var catColumns = data.GetColumn<string[]>(data.Schema["CategoricalFeatures"]).Take(10).ToArray();
687687

688688
// Build several alternative featurization pipelines.
689689
var pipeline =
@@ -699,8 +699,8 @@ var pipeline =
699699
var transformedData = pipeline.Fit(data).Transform(data);
700700

701701
// Inspect some columns of the resulting dataset.
702-
var categoricalBags = transformedData.GetColumn<float[]>(mlContext, "CategoricalBag").Take(10).ToArray();
703-
var workclasses = transformedData.GetColumn<float[]>(mlContext, "WorkclassOneHotTrimmed").Take(10).ToArray();
702+
var categoricalBags = transformedData.GetColumn<float[]>(transformedData.Schema["CategoricalBag"]).Take(10).ToArray();
703+
var workclasses = transformedData.GetColumn<float[]>(transformedData.Schema["WorkclassOneHotTrimmed"]).Take(10).ToArray();
704704

705705
// Of course, if we want to train the model, we will need to compose a single float vector of all the features.
706706
// Here's how we could do this:
@@ -756,8 +756,8 @@ var loader = mlContext.Data.CreateTextLoader(new[]
756756
// Load the data.
757757
var data = loader.Load(dataPath);
758758

759-
// Inspect the message texts that are load from the file.
760-
var messageTexts = data.GetColumn<string>(mlContext, "Message").Take(20).ToArray();
759+
// Inspect the message texts that are read from the file.
760+
var messageTexts = data.GetColumn<string>(data.Schema["Message"]).Take(20).ToArray();
761761

762762
// Apply various kinds of text operations supported by ML.NET.
763763
var pipeline =

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ public static void Example()
8282
};
8383

8484
// Print the data that results from the transformations.
85-
var countSelectColumn = transformedData.GetColumn<VBuffer<float>>(ml, "FeaturesCountSelect");
86-
var MISelectColumn = transformedData.GetColumn<VBuffer<float>>(ml, "FeaturesMISelect");
85+
var countSelectColumn = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema["FeaturesCountSelect"]);
86+
var MISelectColumn = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema["FeaturesMISelect"]);
8787
printHelper("FeaturesCountSelect", countSelectColumn);
8888
printHelper("FeaturesMISelect", MISelectColumn);
8989

docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public static void Example()
6060
};
6161

6262
// Preview of the DefaultKeys column obtained after processing the input.
63-
var defaultColumn = transformedData_default.GetColumn<VBuffer<uint>>(ml, defaultColumnName);
63+
var defaultColumn = transformedData_default.GetColumn<VBuffer<uint>>(transformedData_default.Schema[defaultColumnName]);
6464
printHelper(defaultColumnName, defaultColumn);
6565

6666
// DefaultKeys column obtained post-transformation.
@@ -71,7 +71,7 @@ public static void Example()
7171
// 9 10 11 12 13 6
7272

7373
// Previewing the CustomizedKeys column obtained after processing the input.
74-
var customizedColumn = transformedData_customized.GetColumn<VBuffer<uint>>(ml, customizedColumnName);
74+
var customizedColumn = transformedData_customized.GetColumn<VBuffer<uint>>(transformedData_customized.Schema[customizedColumnName]);
7575
printHelper(customizedColumnName, customizedColumn);
7676

7777
// CustomizedKeys column obtained post-transformation.
@@ -87,7 +87,7 @@ public static void Example()
8787
transformedData_default = pipeline.Fit(trainData).Transform(trainData);
8888

8989
// Preview of the DefaultColumnName column obtained.
90-
var originalColumnBack = transformedData_default.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, defaultColumnName);
90+
var originalColumnBack = transformedData_default.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedData_default.Schema[defaultColumnName]);
9191

9292
foreach (var row in originalColumnBack)
9393
{

docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public static void Example()
3737
var transformed_data = transformer.Transform(trainData);
3838

3939
// Column obtained after processing the input.
40-
var ldaFeaturesColumn = transformed_data.GetColumn<VBuffer<float>>(ml, ldaFeatures);
40+
var ldaFeaturesColumn = transformed_data.GetColumn<VBuffer<float>>(transformed_data.Schema[ldaFeatures]);
4141

4242
Console.WriteLine($"{ldaFeatures} column obtained post-transformation.");
4343
foreach (var featureRow in ldaFeaturesColumn)

docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs

+15-15
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ public static void NgramTransform()
2626
// A pipeline to tokenize text as characters and then combine them together into ngrams
2727
// The pipeline uses the default settings to featurize.
2828

29-
var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters:false);
30-
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength:1);
29+
var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters: false);
30+
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
3131
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
3232
var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
3333
var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline);
@@ -38,30 +38,30 @@ public static void NgramTransform()
3838

3939
// Small helper to print the text inside the columns, in the console.
4040
Action<string, IEnumerable<VBuffer<float>>, VBuffer<ReadOnlyMemory<char>>> printHelper = (columnName, column, names) =>
41-
{
42-
Console.WriteLine($"{columnName} column obtained post-transformation.");
43-
var slots = names.GetValues();
44-
foreach (var featureRow in column)
45-
{
46-
foreach (var item in featureRow.Items())
47-
Console.Write($"'{slots[item.Key]}' - {item.Value} ");
48-
Console.WriteLine("");
49-
}
41+
{
42+
Console.WriteLine($"{columnName} column obtained post-transformation.");
43+
var slots = names.GetValues();
44+
foreach (var featureRow in column)
45+
{
46+
foreach (var item in featureRow.Items())
47+
Console.Write($"'{slots[item.Key]}' - {item.Value} ");
48+
Console.WriteLine("");
49+
}
5050

51-
Console.WriteLine("===================================================");
52-
};
51+
Console.WriteLine("===================================================");
52+
};
5353
// Preview of the CharsUnigrams column obtained after processing the input.
5454
VBuffer<ReadOnlyMemory<char>> slotNames = default;
5555
transformedData_onechars.Schema["CharsUnigrams"].GetSlotNames(ref slotNames);
56-
var charsOneGramColumn = transformedData_onechars.GetColumn<VBuffer<float>>(ml, "CharsUnigrams");
56+
var charsOneGramColumn = transformedData_onechars.GetColumn<VBuffer<float>>(transformedData_onechars.Schema["CharsUnigrams"]);
5757
printHelper("CharsUnigrams", charsOneGramColumn, slotNames);
5858

5959
// CharsUnigrams column obtained post-transformation.
6060
// 'B' - 1 'e' - 6 's' - 1 't' - 1 '<?>' - 4 'g' - 1 'a' - 2 'm' - 1 'I' - 1 ''' - 1 'v' - 2 ...
6161
// 'e' - 1 '<?>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1
6262
// 'B' - 0 'e' - 6 's' - 3 't' - 6 '<?>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ...
6363
// Preview of the CharsTwoGrams column obtained after processing the input.
64-
var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(ml, "CharsTwograms");
64+
var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(transformedData_onechars.Schema["CharsUnigrams"]);
6565
transformedData_twochars.Schema["CharsTwograms"].GetSlotNames(ref slotNames);
6666
printHelper("CharsTwograms", charsTwoGramColumn, slotNames);
6767

docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public static void Example()
4444
var transformedData = transformer.Transform(trainData);
4545

4646
// Getting the data of the newly created column, so we can preview it.
47-
var normalizedColumn = transformedData.GetColumn<float>(ml, "Induced");
47+
var normalizedColumn = transformedData.GetColumn<float>(transformedData.Schema["Induced"]);
4848

4949
// A small printing utility.
5050
Action<string, IEnumerable<float>> printHelper = (colName, column) =>
@@ -72,8 +72,8 @@ public static void Example()
7272
var multiColtransformedData = multiColtransformer.Transform(trainData);
7373

7474
// Getting the newly created columns.
75-
var normalizedInduced = multiColtransformedData.GetColumn<float>(ml, "LogInduced");
76-
var normalizedSpont = multiColtransformedData.GetColumn<float>(ml, "LogSpontaneous");
75+
var normalizedInduced = multiColtransformedData.GetColumn<float>(multiColtransformedData.Schema["LogInduced"]);
76+
var normalizedSpont = multiColtransformedData.GetColumn<float>(multiColtransformedData.Schema["LogSpontaneous"]);
7777

7878
printHelper("LogInduced", normalizedInduced);
7979

docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public static void Example()
4141
// The transformed (projected) data.
4242
var transformedData = rffPipeline.Fit(trainData).Transform(trainData);
4343
// Getting the data of the newly created column, so we can preview it.
44-
var randomFourier = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
44+
var randomFourier = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);
4545

4646
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier);
4747

@@ -59,7 +59,7 @@ public static void Example()
5959
// The transformed (projected) data.
6060
transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData);
6161
// Getting the data of the newly created column, so we can preview it.
62-
var lpNormalize= transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
62+
var lpNormalize= transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);
6363

6464
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize);
6565

@@ -77,7 +77,7 @@ public static void Example()
7777
// The transformed (projected) data.
7878
transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData);
7979
// Getting the data of the newly created column, so we can preview it.
80-
var gcNormalize = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
80+
var gcNormalize = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);
8181

8282
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize);
8383

docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@ public static void Example()
5454
};
5555

5656
// Preview the result of breaking string into array of words.
57-
var originalText = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, originalTextColumnName);
57+
var originalText = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema[originalTextColumnName]);
5858
printHelper(originalTextColumnName, originalText);
5959
// Best|game|I've|ever|played.|
6060
// == RUDE ==| Dude,| 2 |
6161
// Until | the | next | game,| this |is| the | best | Xbox | game!|
6262

6363
// Preview the result of cleaning with default stop word remover.
64-
var defaultRemoverData = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, "DefaultRemover");
64+
var defaultRemoverData = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema["DefaultRemover"]);
6565
printHelper("DefaultRemover", defaultRemoverData);
6666
// Best|game|I've|played.|
6767
// == RUDE ==| Dude,| 2 |
@@ -70,7 +70,7 @@ public static void Example()
7070

7171

7272
// Preview the result of cleaning with default customized stop word remover.
73-
var customizeRemoverData = transformedDataCustomized.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, "RemovedWords");
73+
var customizeRemoverData = transformedDataCustomized.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataCustomized.Schema["RemovedWords"]);
7474
printHelper("RemovedWords", customizeRemoverData);
7575

7676
// Best|game|I've|ever|played.|

docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public static void Example()
5858
};
5959

6060
// Preview of the DefaultTextFeatures column obtained after processing the input.
61-
var defaultColumn = transformedData_default.GetColumn<VBuffer<float>>(ml, defaultColumnName);
61+
var defaultColumn = transformedData_default.GetColumn<VBuffer<float>>(transformedData_default.Schema[defaultColumnName]);
6262
printHelper(defaultColumnName, defaultColumn);
6363

6464
// DefaultTextFeatures column obtained post-transformation.
@@ -68,7 +68,7 @@ public static void Example()
6868
// 0 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.246183 0.246183 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.1230915 0 0 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.3692745 0.246183 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.2886751 0 0 0 0 0 0 0 0.2886751 0.5773503 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751
6969

7070
// Preview of the CustomizedTextFeatures column obtained after processing the input.
71-
var customizedColumn = transformedData_customized.GetColumn<VBuffer<float>>(ml, customizedColumnName);
71+
var customizedColumn = transformedData_customized.GetColumn<VBuffer<float>>(transformedData_customized.Schema[customizedColumnName]);
7272
printHelper(customizedColumnName, customizedColumn);
7373

7474
// CustomizedTextFeatures column obtained post-transformation.

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public static void Example()
4545
// The transformed (projected) data.
4646
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
4747
// Getting the data of the newly created column, so we can preview it.
48-
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
48+
var whitening = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);
4949

5050
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);
5151

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public static void Example()
4444
// The transformed (projected) data.
4545
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
4646
// Getting the data of the newly created column, so we can preview it.
47-
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
47+
var whitening = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);
4848

4949
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);
5050

docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public static void Example()
3131

3232
var wordsDataview = wordsPipeline.Fit(trainData).Transform(trainData);
3333
// Preview of the CleanWords column obtained after processing SentimentText.
34-
var cleanWords = wordsDataview.GetColumn<ReadOnlyMemory<char>[]>(ml, "CleanWords");
34+
var cleanWords = wordsDataview.GetColumn<ReadOnlyMemory<char>[]>(wordsDataview.Schema["CleanWords"]);
3535
Console.WriteLine($" CleanWords column obtained post-transformation.");
3636
foreach (var featureRow in cleanWords)
3737
{
@@ -86,7 +86,7 @@ public static void Example()
8686
// And do all required transformations.
8787
var embeddingDataview = pipeline.Fit(wordsDataview).Transform(wordsDataview);
8888

89-
var customEmbeddings = embeddingDataview.GetColumn<float[]>(ml, "CustomEmbeddings");
89+
var customEmbeddings = embeddingDataview.GetColumn<float[]>(embeddingDataview.Schema["CustomEmbeddings"]);
9090
printEmbeddings("GloveEmbeddings", customEmbeddings);
9191

9292
// -1 -2 -3 -0.5 -1 8.5 0 0 20
@@ -98,7 +98,7 @@ public static void Example()
9898
// Second set of 3 floats in output represent average (for each dimension) for extracted values.
9999
// Third set of 3 floats in output represent maximum values (for each dimension) for extracted values.
100100
// Preview of GloveEmbeddings.
101-
var gloveEmbeddings = embeddingDataview.GetColumn<float[]>(ml, "GloveEmbeddings");
101+
var gloveEmbeddings = embeddingDataview.GetColumn<float[]>(embeddingDataview.Schema["GloveEmbeddings"]);
102102
printEmbeddings("GloveEmbeddings", gloveEmbeddings);
103103
// 0.23166 0.048825 0.26878 -1.3945 -0.86072 -0.026778 0.84075 -0.81987 -1.6681 -1.0658 -0.30596 0.50974 ...
104104
//-0.094905 0.61109 0.52546 - 0.2516 0.054786 0.022661 1.1801 0.33329 - 0.85388 0.15471 - 0.5984 0.4364 ...

docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ public static void FeatureSelectionTransform()
8383
};
8484

8585
// Print the data that results from the transformations.
86-
var countSelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(ml, "FeaturesCountSelect");
87-
var MISelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(ml, "FeaturesMISelect");
86+
var countSelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(transformedData.AsDynamic.Schema["FeaturesCountSelect"]);
87+
var MISelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(transformedData.AsDynamic.Schema["FeaturesMISelect"]);
8888
printHelper("FeaturesCountSelect", countSelectColumn);
8989
printHelper("FeaturesMISelect", MISelectColumn);
9090

0 commit comments

Comments
 (0)