Skip to content

Commit 3c19ae5

Browse files
wangkuiyityphoonzero
authored andcommitted
Rename fieldMeta into fieldDesc (#1436)
* Update * Update * Update
1 parent 6ea5b4d commit 3c19ae5

21 files changed

+233
-234
lines changed

cmd/repl/repl_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ INTO sqlflow_models.mymodel;`
267267
a.NoError(err)
268268
pbTrain := &irpb.TrainStmt{}
269269
proto.UnmarshalText(pbtxt, pbTrain)
270-
a.Equal("class", pbTrain.GetLabel().GetNc().GetFieldMeta().GetName())
270+
a.Equal("class", pbTrain.GetLabel().GetNc().GetFieldDesc().GetName())
271271

272272
// run one train SQL to save the model then test predict/analyze use the model
273273
sess := &irpb.Session{DbConnStr: dataSourceStr}

doc/design/codegen_couler_use_ir.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ To implement the single `codegen_couler.go` to support generate code that can ru
1919
tfFiller := TFFiller{
2020
Estimator: generateTFEstimatorCode(ir),
2121
FeatureColumns: generateFeatureColumnsCode(ir),
22-
FieldMetas: generateFieldMetasCode(ir),
22+
FieldDescs: generateFieldDescsCode(ir),
2323
...
2424
}
25-
tfTrainTemplate = `couler.tensorflow.train(estimator="{{.Estimator}}", FeatureColumns="""{{.FeatureColumns}}""", FieldMetas={{.FieldMetas}})`
25+
tfTrainTemplate = `couler.tensorflow.train(estimator="{{.Estimator}}", FeatureColumns="""{{.FeatureColumns}}""", FieldDescs={{.FieldDescs}})`
2626
// Do template rendering here.
2727
else if ir.ModelType == "XGBoost":
2828
...

doc/design/couler_sqlflow.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ From the above Couler function:
4949
protobuf text format, the protobuf definition is as follows:
5050

5151
```protobuf
52-
message FieldMeta {
52+
message FieldDesc {
5353
required string name = 1;
5454
required FieldType dtype = 2;
5555
optional string delimiter = 3;
@@ -60,7 +60,7 @@ message FieldMeta {
6060
}
6161
6262
message NumericColumn {
63-
FieldMeta field_meta = 1;
63+
FieldDesc field_meta = 1;
6464
}
6565
6666
message BucketColumn {

pkg/proto/intermediate_representation.proto

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import "sqlflow.proto";
88
// All structs should be exactly the same as structs defined in `pkg/sql/ir/ir.go`
99
// You may refer to `ir.go` and `feature_column.go` for the meaning of each field.
1010

11-
message FieldMeta {
11+
message FieldDesc {
1212
string name = 1;
1313
string dtype = 2;
1414
string delimiter = 3;
@@ -19,7 +19,7 @@ message FieldMeta {
1919
}
2020

2121
message NumericColumn {
22-
FieldMeta field_meta = 1;
22+
FieldDesc field_desc = 1;
2323
}
2424

2525
message BucketColumn {
@@ -33,12 +33,12 @@ message CrossColumn {
3333
}
3434

3535
message CategoryIDColumn {
36-
FieldMeta field_meta = 1;
36+
FieldDesc field_desc = 1;
3737
int32 bucket_size = 2;
3838
}
3939

4040
message SeqCategoryIDColumn {
41-
FieldMeta field_meta = 1;
41+
FieldDesc field_desc = 1;
4242
int32 bucket_size = 2;
4343
}
4444

pkg/sql/codegen/couler/codegen_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ INTO sqlflow_models.my_xgboost_model;
7070
"model.n_classes": 3},
7171
Features: map[string][]ir.FeatureColumn{
7272
"feature_columns": {
73-
&ir.NumericColumn{&ir.FieldMeta{"sepal_length", ir.Float, "", []int{1}, false, nil, 0}},
74-
&ir.NumericColumn{&ir.FieldMeta{"sepal_width", ir.Float, "", []int{1}, false, nil, 0}},
75-
&ir.NumericColumn{&ir.FieldMeta{"petal_length", ir.Float, "", []int{1}, false, nil, 0}},
76-
&ir.NumericColumn{&ir.FieldMeta{"petal_width", ir.Float, "", []int{1}, false, nil, 0}}}},
77-
Label: &ir.NumericColumn{&ir.FieldMeta{"class", ir.Int, "", []int{1}, false, nil, 0}}},
73+
&ir.NumericColumn{&ir.FieldDesc{"sepal_length", ir.Float, "", []int{1}, false, nil, 0}},
74+
&ir.NumericColumn{&ir.FieldDesc{"sepal_width", ir.Float, "", []int{1}, false, nil, 0}},
75+
&ir.NumericColumn{&ir.FieldDesc{"petal_length", ir.Float, "", []int{1}, false, nil, 0}},
76+
&ir.NumericColumn{&ir.FieldDesc{"petal_width", ir.Float, "", []int{1}, false, nil, 0}}}},
77+
Label: &ir.NumericColumn{&ir.FieldDesc{"class", ir.Int, "", []int{1}, false, nil, 0}}},
7878
}
7979
}
8080

pkg/sql/codegen/tensorflow/codegen.go

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ func generateFeatureColumnCode(fc ir.FeatureColumn) (string, error) {
6161
case *ir.NumericColumn:
6262
nc := fc.(*ir.NumericColumn)
6363
return fmt.Sprintf("tf.feature_column.numeric_column(\"%s\", shape=%s)",
64-
nc.FieldMeta.Name,
65-
intArrayToJSONString(nc.FieldMeta.Shape)), nil
64+
nc.FieldDesc.Name,
65+
intArrayToJSONString(nc.FieldDesc.Shape)), nil
6666
case *ir.BucketColumn:
6767
bc := fc.(*ir.BucketColumn)
6868
sourceCode, err := generateFeatureColumnCode(bc.SourceColumn)
@@ -76,11 +76,11 @@ func generateFeatureColumnCode(fc ir.FeatureColumn) (string, error) {
7676
case *ir.CategoryIDColumn:
7777
cc := fc.(*ir.CategoryIDColumn)
7878
return fmt.Sprintf("tf.feature_column.categorical_column_with_identity(key=\"%s\", num_buckets=%d)",
79-
cc.FieldMeta.Name, cc.BucketSize), nil
79+
cc.FieldDesc.Name, cc.BucketSize), nil
8080
case *ir.SeqCategoryIDColumn:
8181
cc := fc.(*ir.SeqCategoryIDColumn)
8282
return fmt.Sprintf("tf.feature_column.sequence_categorical_column_with_identity(key=\"%s\", num_buckets=%d)",
83-
cc.FieldMeta.Name, cc.BucketSize), nil
83+
cc.FieldDesc.Name, cc.BucketSize), nil
8484
case *ir.CrossColumn:
8585
cc := fc.(*ir.CrossColumn)
8686
var keysGenerated = make([]string, len(cc.Keys))
@@ -226,26 +226,25 @@ func setValidateParamDefaultValues(validateParams map[string]interface{}) {
226226
}
227227
}
228228

229-
func deriveFeatureColumnCode(trainStmt *ir.TrainStmt) (featureColumnsCode []string, fieldMetas []*ir.FieldMeta, err error) {
229+
func deriveFeatureColumnCode(trainStmt *ir.TrainStmt) (featureColumnsCode []string, fieldDescs []*ir.FieldDesc, err error) {
230230
perTargetFeatureColumnsCode := []string{}
231-
232231
for target, fcList := range trainStmt.Features {
233232
for _, fc := range fcList {
234233
fcCode, err := generateFeatureColumnCode(fc)
235234
if err != nil {
236235
return nil, nil, err
237236
}
238237
perTargetFeatureColumnsCode = append(perTargetFeatureColumnsCode, fcCode)
239-
if len(fc.GetFieldMeta()) > 0 {
240-
for _, fm := range fc.GetFieldMeta() {
241-
fieldMetas = append(fieldMetas, fm)
238+
if len(fc.GetFieldDesc()) > 0 {
239+
for _, fm := range fc.GetFieldDesc() {
240+
fieldDescs = append(fieldDescs, fm)
242241
}
243242
}
244243
}
245244
featureColumnsCode = append(featureColumnsCode,
246245
fmt.Sprintf("\"%s\": [%s]", target, strings.Join(perTargetFeatureColumnsCode, ",\n")))
247246
}
248-
return featureColumnsCode, fieldMetas, nil
247+
return featureColumnsCode, fieldDescs, nil
249248
}
250249

251250
// Train generates a Python program for train a TensorFlow model.
@@ -259,7 +258,7 @@ func Train(trainStmt *ir.TrainStmt) (string, error) {
259258
setTrainParamDefaultValues(trainParams)
260259
setValidateParamDefaultValues(validateParams)
261260

262-
featureColumnsCode, fieldMetas, err := deriveFeatureColumnCode(trainStmt)
261+
featureColumnsCode, fieldDescs, err := deriveFeatureColumnCode(trainStmt)
263262
if err != nil {
264263
return "", err
265264
}
@@ -286,9 +285,9 @@ func Train(trainStmt *ir.TrainStmt) (string, error) {
286285
ValidationSelect: trainStmt.ValidationSelect,
287286
Estimator: estimatorStr,
288287
IsKerasModel: isKeras,
289-
FieldMetas: fieldMetas,
288+
FieldDescs: fieldDescs,
290289
FeatureColumnCode: fmt.Sprintf("{%s}", strings.Join(featureColumnsCode, ",\n")),
291-
Y: trainStmt.Label.GetFieldMeta()[0], // TODO(typhoonzero): label only support numericColumn.
290+
Y: trainStmt.Label.GetFieldDesc()[0], // TODO(typhoonzero): label only support numericColumn.
292291
ModelParams: modelParams,
293292
TrainParams: trainParams,
294293
ValidationParams: validateParams,
@@ -319,29 +318,29 @@ func Pred(predStmt *ir.PredictStmt, session *pb.Session) (string, error) {
319318
}
320319
featureColumnsCode := []string{}
321320
perTargetFeatureColumnsCode := []string{}
322-
fieldMetas := []*ir.FieldMeta{}
321+
fieldDescs := []*ir.FieldDesc{}
323322
for target, fcList := range predStmt.TrainStmt.Features {
324323
for _, fc := range fcList {
325324
fcCode, err := generateFeatureColumnCode(fc)
326325
if err != nil {
327326
return "", err
328327
}
329328
perTargetFeatureColumnsCode = append(perTargetFeatureColumnsCode, fcCode)
330-
if len(fc.GetFieldMeta()) > 0 {
331-
for _, fm := range fc.GetFieldMeta() {
332-
fieldMetas = append(fieldMetas, fm)
329+
if len(fc.GetFieldDesc()) > 0 {
330+
for _, fm := range fc.GetFieldDesc() {
331+
fieldDescs = append(fieldDescs, fm)
333332
}
334333
}
335334
}
336335
featureColumnsCode = append(featureColumnsCode,
337336
fmt.Sprintf("\"%s\": [%s]", target, strings.Join(perTargetFeatureColumnsCode, ",\n")))
338337
}
339338
isKeras, estimatorStr := IsKerasModel(predStmt.TrainStmt.Estimator)
340-
labelFM := predStmt.TrainStmt.Label.GetFieldMeta()[0]
339+
labelFM := predStmt.TrainStmt.Label.GetFieldDesc()[0]
341340
if labelFM.Name == "" {
342341
log.Printf("clustering model, got result table: %s, result column: %s", predStmt.ResultTable, predStmt.ResultColumn)
343-
// no label in train SQL means a clustering model, generate a fieldmeta using result table's column
344-
labelFM = &ir.FieldMeta{
342+
// no label in train SQL means a clustering model, generate a fieldDesc using result table's column
343+
labelFM = &ir.FieldDesc{
345344
Name: predStmt.ResultColumn,
346345
Shape: []int{1},
347346
DType: ir.Int,
@@ -354,7 +353,7 @@ func Pred(predStmt *ir.PredictStmt, session *pb.Session) (string, error) {
354353
ResultTable: predStmt.ResultTable,
355354
Estimator: estimatorStr,
356355
IsKerasModel: isKeras,
357-
FieldMetas: fieldMetas,
356+
FieldDescs: fieldDescs,
358357
FeatureColumnCode: fmt.Sprintf("{%s}", strings.Join(featureColumnsCode, ",\n")),
359358
Y: labelFM,
360359
ModelParams: modelParams,

pkg/sql/codegen/tensorflow/template_pred.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ type predFiller struct {
2222
// below members comes from trainStmt
2323
Estimator string
2424
IsKerasModel bool
25-
FieldMetas []*ir.FieldMeta
25+
FieldDescs []*ir.FieldDesc
2626
FeatureColumnCode string
27-
Y *ir.FieldMeta
27+
Y *ir.FieldDesc
2828
ModelParams map[string]interface{}
2929
Save string
3030
HDFSNameNodeAddr string
@@ -41,12 +41,12 @@ try:
4141
except:
4242
pass
4343
44-
feature_column_names = [{{range .FieldMetas}}
44+
feature_column_names = [{{range .FieldDescs}}
4545
"{{.Name}}",
4646
{{end}}]
4747
4848
feature_metas = dict()
49-
{{ range $value := .FieldMetas }}
49+
{{ range $value := .FieldDescs }}
5050
feature_metas["{{$value.Name}}"] = {
5151
"feature_name": "{{$value.Name}}",
5252
"dtype": "{{$value.DType | dtypeToString}}",

pkg/sql/codegen/tensorflow/template_train.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ type trainFiller struct {
2121
ValidationSelect string
2222
Estimator string
2323
IsKerasModel bool
24-
FieldMetas []*ir.FieldMeta
24+
FieldDescs []*ir.FieldDesc
2525
FeatureColumnCode string
26-
Y *ir.FieldMeta
26+
Y *ir.FieldDesc
2727
ModelParams map[string]interface{}
2828
TrainParams map[string]interface{}
2929
ValidationParams map[string]interface{}
@@ -40,12 +40,12 @@ try:
4040
except:
4141
pass
4242
43-
feature_column_names = [{{range .FieldMetas}}
43+
feature_column_names = [{{range .FieldDescs}}
4444
"{{.Name}}",
4545
{{end}}]
4646
4747
feature_metas = dict()
48-
{{ range $value := .FieldMetas }}
48+
{{ range $value := .FieldDescs }}
4949
feature_metas["{{$value.Name}}"] = {
5050
"feature_name": "{{$value.Name}}",
5151
"dtype": "{{$value.DType | dtypeToString}}",

pkg/sql/codegen/xgboost/codegen.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,23 +74,23 @@ func parseAttribute(attrs map[string]interface{}) (map[string]map[string]interfa
7474
return params, nil
7575
}
7676

77-
func getFieldMeta(fcs []ir.FeatureColumn, l ir.FeatureColumn) ([]ir.FieldMeta, ir.FieldMeta, error) {
78-
var features []ir.FieldMeta
77+
func getFieldDesc(fcs []ir.FeatureColumn, l ir.FeatureColumn) ([]ir.FieldDesc, ir.FieldDesc, error) {
78+
var features []ir.FieldDesc
7979
for _, fc := range fcs {
8080
switch c := fc.(type) {
8181
case *ir.NumericColumn:
82-
features = append(features, *c.FieldMeta)
82+
features = append(features, *c.FieldDesc)
8383
default:
84-
return nil, ir.FieldMeta{}, fmt.Errorf("unsupported feature column type %T on %v", c, c)
84+
return nil, ir.FieldDesc{}, fmt.Errorf("unsupported feature column type %T on %v", c, c)
8585
}
8686
}
8787

88-
var label ir.FieldMeta
88+
var label ir.FieldDesc
8989
switch c := l.(type) {
9090
case *ir.NumericColumn:
91-
label = *c.FieldMeta
91+
label = *c.FieldDesc
9292
default:
93-
return nil, ir.FieldMeta{}, fmt.Errorf("unsupported label column type %T on %v", c, c)
93+
return nil, ir.FieldDesc{}, fmt.Errorf("unsupported label column type %T on %v", c, c)
9494
}
9595

9696
return features, label, nil
@@ -111,7 +111,7 @@ func Train(trainStmt *ir.TrainStmt) (string, error) {
111111
if len(trainStmt.Features) != 1 {
112112
return "", fmt.Errorf("xgboost only support 1 feature column set, received %d", len(trainStmt.Features))
113113
}
114-
featureFieldMeta, labelFieldMeta, err := getFieldMeta(trainStmt.Features["feature_columns"], trainStmt.Label)
114+
featureFieldDesc, labelFieldDesc, err := getFieldDesc(trainStmt.Features["feature_columns"], trainStmt.Label)
115115
if err != nil {
116116
return "", err
117117
}
@@ -123,11 +123,11 @@ func Train(trainStmt *ir.TrainStmt) (string, error) {
123123
if err != nil {
124124
return "", err
125125
}
126-
f, err := json.Marshal(featureFieldMeta)
126+
f, err := json.Marshal(featureFieldDesc)
127127
if err != nil {
128128
return "", err
129129
}
130-
l, err := json.Marshal(labelFieldMeta)
130+
l, err := json.Marshal(labelFieldDesc)
131131
if err != nil {
132132
return "", err
133133
}
@@ -137,7 +137,7 @@ func Train(trainStmt *ir.TrainStmt) (string, error) {
137137
ValidationSelect: trainStmt.ValidationSelect,
138138
ModelParamsJSON: string(mp),
139139
TrainParamsJSON: string(tp),
140-
FieldMetaJSON: string(f),
140+
FieldDescJSON: string(f),
141141
LabelJSON: string(l)}
142142

143143
var program bytes.Buffer
@@ -150,15 +150,15 @@ func Train(trainStmt *ir.TrainStmt) (string, error) {
150150

151151
// Pred generates a Python program for predict a xgboost model.
152152
func Pred(predStmt *ir.PredictStmt, session *pb.Session) (string, error) {
153-
featureFieldMeta, labelFieldMeta, err := getFieldMeta(predStmt.TrainStmt.Features["feature_columns"], predStmt.TrainStmt.Label)
153+
featureFieldDesc, labelFieldDesc, err := getFieldDesc(predStmt.TrainStmt.Features["feature_columns"], predStmt.TrainStmt.Label)
154154
if err != nil {
155155
return "", err
156156
}
157-
f, err := json.Marshal(featureFieldMeta)
157+
f, err := json.Marshal(featureFieldDesc)
158158
if err != nil {
159159
return "", err
160160
}
161-
l, err := json.Marshal(labelFieldMeta)
161+
l, err := json.Marshal(labelFieldDesc)
162162
if err != nil {
163163
return "", err
164164
}

pkg/sql/codegen/xgboost/codegen_analyze.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func Analyze(analyzeStmt *ir.AnalyzeStmt) (string, error) {
3636
if err != nil {
3737
return "", err
3838
}
39-
xs, y, err := getFieldMeta(analyzeStmt.TrainStmt.Features["feature_columns"], analyzeStmt.TrainStmt.Label)
39+
xs, y, err := getFieldDesc(analyzeStmt.TrainStmt.Features["feature_columns"], analyzeStmt.TrainStmt.Label)
4040
if err != nil {
4141
return "", err
4242
}
@@ -49,7 +49,7 @@ func Analyze(analyzeStmt *ir.AnalyzeStmt) (string, error) {
4949
DataSource: analyzeStmt.DataSource,
5050
DatasetSQL: analyzeStmt.Select,
5151
ShapSummaryParames: string(jsonSummary),
52-
FieldMetaJSON: string(fm),
52+
FieldDescJSON: string(fm),
5353
Label: y.Name,
5454
}
5555
var analysis bytes.Buffer

0 commit comments

Comments
 (0)