Skip to content

Polish GetColumn on IDataView #2738

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,8 @@ var someRows = mlContext
// This will give the entire dataset: make sure to only take several row
// in case the dataset is huge. The is similar to the static API, except
// you have to specify the column name and type.
var featureColumns = transformedData.GetColumn<string[]>(mlContext, "AllFeatures")
.Take(20).ToArray();
var featureColumns = transformedData.GetColumn<string[]>(transformedData.Schema["AllFeatures"])

```
## How do I train a regression model?

Expand Down Expand Up @@ -637,7 +637,7 @@ var pipeline =
var normalizedData = pipeline.Fit(trainData).Transform(trainData);

// Inspect one column of the resulting dataset.
var meanVarValues = normalizedData.GetColumn<float[]>(mlContext, "MeanVarNormalized").ToArray();
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MeanVarNormalized"]).ToArray();
```

## How do I train my model on categorical data?
Expand Down Expand Up @@ -682,8 +682,8 @@ var loader = mlContext.Data.CreateTextLoader(new[]
// Load the data.
var data = loader.Load(dataPath);

// Inspect the first 10 records of the categorical columns to check that they are correctly load.
var catColumns = data.GetColumn<string[]>(mlContext, "CategoricalFeatures").Take(10).ToArray();
// Inspect the first 10 records of the categorical columns to check that they are correctly read.
var catColumns = data.GetColumn<string[]>(data.Schema["CategoricalFeatures"]).Take(10).ToArray();

// Build several alternative featurization pipelines.
var pipeline =
Expand All @@ -699,8 +699,8 @@ var pipeline =
var transformedData = pipeline.Fit(data).Transform(data);

// Inspect some columns of the resulting dataset.
var categoricalBags = transformedData.GetColumn<float[]>(mlContext, "CategoricalBag").Take(10).ToArray();
var workclasses = transformedData.GetColumn<float[]>(mlContext, "WorkclassOneHotTrimmed").Take(10).ToArray();
var categoricalBags = transformedData.GetColumn<float[]>(transformedData.Schema["CategoricalBag"]).Take(10).ToArray();
var workclasses = transformedData.GetColumn<float[]>(transformedData.Schema["WorkclassOneHotTrimmed"]).Take(10).ToArray();

// Of course, if we want to train the model, we will need to compose a single float vector of all the features.
// Here's how we could do this:
Expand Down Expand Up @@ -756,8 +756,8 @@ var loader = mlContext.Data.CreateTextLoader(new[]
// Load the data.
var data = loader.Load(dataPath);

// Inspect the message texts that are load from the file.
var messageTexts = data.GetColumn<string>(mlContext, "Message").Take(20).ToArray();
// Inspect the message texts that are read from the file.
var messageTexts = data.GetColumn<string>(data.Schema["Message"]).Take(20).ToArray();

// Apply various kinds of text operations supported by ML.NET.
var pipeline =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ public static void Example()
};

// Print the data that results from the transformations.
var countSelectColumn = transformedData.GetColumn<VBuffer<float>>(ml, "FeaturesCountSelect");
var MISelectColumn = transformedData.GetColumn<VBuffer<float>>(ml, "FeaturesMISelect");
var countSelectColumn = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema["FeaturesCountSelect"]);
var MISelectColumn = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema["FeaturesMISelect"]);
printHelper("FeaturesCountSelect", countSelectColumn);
printHelper("FeaturesMISelect", MISelectColumn);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public static void Example()
};

// Preview of the DefaultKeys column obtained after processing the input.
var defaultColumn = transformedData_default.GetColumn<VBuffer<uint>>(ml, defaultColumnName);
var defaultColumn = transformedData_default.GetColumn<VBuffer<uint>>(transformedData_default.Schema[defaultColumnName]);
printHelper(defaultColumnName, defaultColumn);

// DefaultKeys column obtained post-transformation.
Expand All @@ -71,7 +71,7 @@ public static void Example()
// 9 10 11 12 13 6

// Previewing the CustomizedKeys column obtained after processing the input.
var customizedColumn = transformedData_customized.GetColumn<VBuffer<uint>>(ml, customizedColumnName);
var customizedColumn = transformedData_customized.GetColumn<VBuffer<uint>>(transformedData_customized.Schema[customizedColumnName]);
printHelper(customizedColumnName, customizedColumn);

// CustomizedKeys column obtained post-transformation.
Expand All @@ -87,7 +87,7 @@ public static void Example()
transformedData_default = pipeline.Fit(trainData).Transform(trainData);

// Preview of the DefaultColumnName column obtained.
var originalColumnBack = transformedData_default.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, defaultColumnName);
var originalColumnBack = transformedData_default.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedData_default.Schema[defaultColumnName]);

foreach (var row in originalColumnBack)
{
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static void Example()
var transformed_data = transformer.Transform(trainData);

// Column obtained after processing the input.
var ldaFeaturesColumn = transformed_data.GetColumn<VBuffer<float>>(ml, ldaFeatures);
var ldaFeaturesColumn = transformed_data.GetColumn<VBuffer<float>>(transformed_data.Schema[ldaFeatures]);

Console.WriteLine($"{ldaFeatures} column obtained post-transformation.");
foreach (var featureRow in ldaFeaturesColumn)
Expand Down
30 changes: 15 additions & 15 deletions docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ public static void NgramTransform()
// A pipeline to tokenize text as characters and then combine them together into ngrams
// The pipeline uses the default settings to featurize.

var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters:false);
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength:1);
var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters: false);
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline);
Expand All @@ -38,30 +38,30 @@ public static void NgramTransform()

// Small helper to print the text inside the columns, in the console.
Action<string, IEnumerable<VBuffer<float>>, VBuffer<ReadOnlyMemory<char>>> printHelper = (columnName, column, names) =>
{
Console.WriteLine($"{columnName} column obtained post-transformation.");
var slots = names.GetValues();
foreach (var featureRow in column)
{
foreach (var item in featureRow.Items())
Console.Write($"'{slots[item.Key]}' - {item.Value} ");
Console.WriteLine("");
}
{
Console.WriteLine($"{columnName} column obtained post-transformation.");
var slots = names.GetValues();
foreach (var featureRow in column)
{
foreach (var item in featureRow.Items())
Console.Write($"'{slots[item.Key]}' - {item.Value} ");
Console.WriteLine("");
}

Console.WriteLine("===================================================");
};
Console.WriteLine("===================================================");
};
// Preview of the CharsUnigrams column obtained after processing the input.
VBuffer<ReadOnlyMemory<char>> slotNames = default;
transformedData_onechars.Schema["CharsUnigrams"].GetSlotNames(ref slotNames);
var charsOneGramColumn = transformedData_onechars.GetColumn<VBuffer<float>>(ml, "CharsUnigrams");
var charsOneGramColumn = transformedData_onechars.GetColumn<VBuffer<float>>(transformedData_onechars.Schema["CharsUnigrams"]);
printHelper("CharsUnigrams", charsOneGramColumn, slotNames);

// CharsUnigrams column obtained post-transformation.
// 'B' - 1 'e' - 6 's' - 1 't' - 1 '<?>' - 4 'g' - 1 'a' - 2 'm' - 1 'I' - 1 ''' - 1 'v' - 2 ...
// 'e' - 1 '<?>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1
// 'B' - 0 'e' - 6 's' - 3 't' - 6 '<?>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ...
// Preview of the CharsTwoGrams column obtained after processing the input.
var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(ml, "CharsTwograms");
var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(transformedData_onechars.Schema["CharsUnigrams"]);
transformedData_twochars.Schema["CharsTwograms"].GetSlotNames(ref slotNames);
printHelper("CharsTwograms", charsTwoGramColumn, slotNames);

Expand Down
6 changes: 3 additions & 3 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static void Example()
var transformedData = transformer.Transform(trainData);

// Getting the data of the newly created column, so we can preview it.
var normalizedColumn = transformedData.GetColumn<float>(ml, "Induced");
var normalizedColumn = transformedData.GetColumn<float>(transformedData.Schema["Induced"]);

// A small printing utility.
Action<string, IEnumerable<float>> printHelper = (colName, column) =>
Expand Down Expand Up @@ -72,8 +72,8 @@ public static void Example()
var multiColtransformedData = multiColtransformer.Transform(trainData);

// Getting the newly created columns.
var normalizedInduced = multiColtransformedData.GetColumn<float>(ml, "LogInduced");
var normalizedSpont = multiColtransformedData.GetColumn<float>(ml, "LogSpontaneous");
var normalizedInduced = multiColtransformedData.GetColumn<float>(multiColtransformedData.Schema["LogInduced"]);
var normalizedSpont = multiColtransformedData.GetColumn<float>(multiColtransformedData.Schema["LogSpontaneous"]);

printHelper("LogInduced", normalizedInduced);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public static void Example()
// The transformed (projected) data.
var transformedData = rffPipeline.Fit(trainData).Transform(trainData);
// Getting the data of the newly created column, so we can preview it.
var randomFourier = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
var randomFourier = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);

printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier);

Expand All @@ -59,7 +59,7 @@ public static void Example()
// The transformed (projected) data.
transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData);
// Getting the data of the newly created column, so we can preview it.
var lpNormalize= transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
var lpNormalize= transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);

printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize);

Expand All @@ -77,7 +77,7 @@ public static void Example()
// The transformed (projected) data.
transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData);
// Getting the data of the newly created column, so we can preview it.
var gcNormalize = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
var gcNormalize = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);

printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ public static void Example()
};

// Preview the result of breaking string into array of words.
var originalText = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, originalTextColumnName);
var originalText = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema[originalTextColumnName]);
printHelper(originalTextColumnName, originalText);
// Best|game|I've|ever|played.|
// == RUDE ==| Dude,| 2 |
// Until | the | next | game,| this |is| the | best | Xbox | game!|

// Preview the result of cleaning with default stop word remover.
var defaultRemoverData = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, "DefaultRemover");
var defaultRemoverData = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema["DefaultRemover"]);
printHelper("DefaultRemover", defaultRemoverData);
// Best|game|I've|played.|
// == RUDE ==| Dude,| 2 |
Expand All @@ -70,7 +70,7 @@ public static void Example()


// Preview the result of cleaning with default customized stop word remover.
var customizeRemoverData = transformedDataCustomized.GetColumn<VBuffer<ReadOnlyMemory<char>>>(ml, "RemovedWords");
var customizeRemoverData = transformedDataCustomized.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataCustomized.Schema["RemovedWords"]);
printHelper("RemovedWords", customizeRemoverData);

// Best|game|I've|ever|played.|
Expand Down
4 changes: 2 additions & 2 deletions docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public static void Example()
};

// Preview of the DefaultTextFeatures column obtained after processing the input.
var defaultColumn = transformedData_default.GetColumn<VBuffer<float>>(ml, defaultColumnName);
var defaultColumn = transformedData_default.GetColumn<VBuffer<float>>(transformedData_default.Schema[defaultColumnName]);
printHelper(defaultColumnName, defaultColumn);

// DefaultTextFeatures column obtained post-transformation.
Expand All @@ -68,7 +68,7 @@ public static void Example()
// 0 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.246183 0.246183 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.1230915 0 0 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.3692745 0.246183 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.2886751 0 0 0 0 0 0 0 0.2886751 0.5773503 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751

// Preview of the CustomizedTextFeatures column obtained after processing the input.
var customizedColumn = transformedData_customized.GetColumn<VBuffer<float>>(ml, customizedColumnName);
var customizedColumn = transformedData_customized.GetColumn<VBuffer<float>>(transformedData_customized.Schema[customizedColumnName]);
printHelper(customizedColumnName, customizedColumn);

// CustomizedTextFeatures column obtained post-transformation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void Example()
// The transformed (projected) data.
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
// Getting the data of the newly created column, so we can preview it.
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
var whitening = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);

printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static void Example()
// The transformed (projected) data.
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
// Getting the data of the newly created column, so we can preview it.
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
var whitening = transformedData.GetColumn<VBuffer<float>>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]);

printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public static void Example()

var wordsDataview = wordsPipeline.Fit(trainData).Transform(trainData);
// Preview of the CleanWords column obtained after processing SentimentText.
var cleanWords = wordsDataview.GetColumn<ReadOnlyMemory<char>[]>(ml, "CleanWords");
var cleanWords = wordsDataview.GetColumn<ReadOnlyMemory<char>[]>(wordsDataview.Schema["CleanWords"]);
Console.WriteLine($" CleanWords column obtained post-transformation.");
foreach (var featureRow in cleanWords)
{
Expand Down Expand Up @@ -86,7 +86,7 @@ public static void Example()
// And do all required transformations.
var embeddingDataview = pipeline.Fit(wordsDataview).Transform(wordsDataview);

var customEmbeddings = embeddingDataview.GetColumn<float[]>(ml, "CustomEmbeddings");
var customEmbeddings = embeddingDataview.GetColumn<float[]>(embeddingDataview.Schema["CustomEmbeddings"]);
printEmbeddings("GloveEmbeddings", customEmbeddings);

// -1 -2 -3 -0.5 -1 8.5 0 0 20
Expand All @@ -98,7 +98,7 @@ public static void Example()
// Second set of 3 floats in output represent average (for each dimension) for extracted values.
// Third set of 3 floats in output represent maximum values (for each dimension) for extracted values.
// Preview of GloveEmbeddings.
var gloveEmbeddings = embeddingDataview.GetColumn<float[]>(ml, "GloveEmbeddings");
var gloveEmbeddings = embeddingDataview.GetColumn<float[]>(embeddingDataview.Schema["GloveEmbeddings"]);
printEmbeddings("GloveEmbeddings", gloveEmbeddings);
// 0.23166 0.048825 0.26878 -1.3945 -0.86072 -0.026778 0.84075 -0.81987 -1.6681 -1.0658 -0.30596 0.50974 ...
//-0.094905 0.61109 0.52546 - 0.2516 0.054786 0.022661 1.1801 0.33329 - 0.85388 0.15471 - 0.5984 0.4364 ...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ public static void FeatureSelectionTransform()
};

// Print the data that results from the transformations.
var countSelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(ml, "FeaturesCountSelect");
var MISelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(ml, "FeaturesMISelect");
var countSelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(transformedData.AsDynamic.Schema["FeaturesCountSelect"]);
var MISelectColumn = transformedData.AsDynamic.GetColumn<VBuffer<float>>(transformedData.AsDynamic.Schema["FeaturesMISelect"]);
printHelper("FeaturesCountSelect", countSelectColumn);
printHelper("FeaturesMISelect", MISelectColumn);

Expand Down
Loading