Skip to content

Commit b95b3e9

Browse files
committed
feat: mapping column + readme for oss as source
1 parent bc949c5 commit b95b3e9

File tree

4 files changed

+46
-15
lines changed

4 files changed

+46
-15
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ It expects configuration from env variables. Or you can pass configuration from
6666
| MC | MC__SERVICE_ACCOUNT | Service account for MaxCompute. |
6767
| | MC__QUERY_FILE_PATH | Path to the query file. (default: /data/in/query.sql) |
6868
| | MC__EXECUTION_PROJECT | Project ID for the query execution. |
69-
69+
| OSS | OSS__SERVICE_ACCOUNT | Service account for OSS. |
70+
| | OSS__SOURCE_BUCKET_PATH | The source path in a OSS bucket to read the files. Must include the OSS bucket name. |
71+
| | OSS__FILE_FORMAT | File format availability: CSV, JSON. (default: JSON) |
72+
| | OSS__CSV_DELIMITER | Delimiter for CSV file format. (default: ,) |
73+
| | OSS__COLUMN_MAPPING_FILE_PATH | Path to the mapping column for the record result. "" for ignore (default: "") |
7074
## Supported Sinks
7175

7276
| Component | Configuration | Description |

ext/oss/source.go

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strings"
1414

1515
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
16+
extcommon "github.com/goto/optimus-any2any/ext/common"
1617
"github.com/goto/optimus-any2any/internal/component/option"
1718
"github.com/goto/optimus-any2any/internal/component/source"
1819
"github.com/goto/optimus-any2any/pkg/flow"
@@ -29,13 +30,15 @@ type OSSSource struct {
2930
pathPrefix string
3031
fileFormat string
3132
csvDelimiter rune
33+
columnMap map[string]string
3234
}
3335

3436
var _ flow.Source = (*OSSSource)(nil)
3537

3638
// NewSource creates a new OSSSource.
3739
func NewSource(ctx context.Context, l *slog.Logger, svcAcc string,
38-
sourceBucketPath, fileFormat string, csvDelimiter rune, opts ...option.Option) (*OSSSource, error) {
40+
sourceBucketPath, fileFormat string, csvDelimiter rune,
41+
columnMappingFilePath string, opts ...option.Option) (*OSSSource, error) {
3942
// create commonSource source
4043
commonSource := source.NewCommonSource(l, opts...)
4144

@@ -50,6 +53,11 @@ func NewSource(ctx context.Context, l *slog.Logger, svcAcc string,
5053
if err != nil {
5154
return nil, errors.WithStack(err)
5255
}
56+
// read column map
57+
columnMap, err := extcommon.GetColumnMap(columnMappingFilePath)
58+
if err != nil {
59+
return nil, errors.WithStack(err)
60+
}
5361

5462
ossSource := &OSSSource{
5563
CommonSource: commonSource,
@@ -59,6 +67,7 @@ func NewSource(ctx context.Context, l *slog.Logger, svcAcc string,
5967
pathPrefix: strings.TrimPrefix(parsedURL.Path, "/"),
6068
fileFormat: fileFormat,
6169
csvDelimiter: csvDelimiter,
70+
columnMap: columnMap,
6271
}
6372

6473
// add clean function
@@ -110,12 +119,19 @@ func (o *OSSSource) process() {
110119

111120
// send records
112121
for _, record := range records {
113-
o.Send(record)
122+
mappedRecord := extcommon.KeyMapping(o.columnMap, record)
123+
raw, err := json.Marshal(mappedRecord)
124+
if err != nil {
125+
o.Logger.Error(fmt.Sprintf("source(oss): failed to marshal record: %s", err.Error()))
126+
o.SetError(errors.WithStack(err))
127+
continue
128+
}
129+
o.Send(raw)
114130
}
115131
}
116132
}
117133

118-
func (o *OSSSource) unpackRecords(object *oss.GetObjectResult) ([][]byte, error) {
134+
func (o *OSSSource) unpackRecords(object *oss.GetObjectResult) ([]map[string]interface{}, error) {
119135
// unmarshal object based on file format
120136
var (
121137
records []map[string]interface{}
@@ -132,17 +148,7 @@ func (o *OSSSource) unpackRecords(object *oss.GetObjectResult) ([][]byte, error)
132148
if err != nil {
133149
return nil, errors.WithStack(err)
134150
}
135-
136-
// marshal records
137-
raws := make([][]byte, 0, len(records))
138-
for _, record := range records {
139-
raw, err := json.Marshal(record)
140-
if err != nil {
141-
return nil, errors.WithStack(err)
142-
}
143-
raws = append(raws, raw)
144-
}
145-
return raws, nil
151+
return records, nil
146152
}
147153

148154
func (o *OSSSource) unmarshalCSV(object *oss.GetObjectResult) ([]map[string]interface{}, error) {

internal/component/component.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ func GetSource(ctx context.Context, l *slog.Logger, source Type, cfg *config.Con
7272
return gmail.NewSource(ctx, l, sourceCfg.Token, sourceCfg.Filter,
7373
sourceCfg.ExtractorSource, sourceCfg.ExtractorPattern, sourceCfg.ExtractorFileFormat,
7474
sourceCfg.FilenameColumn, sourceCfg.ColumnMappingFilePath, opts...)
75+
case OSS:
76+
sourceCfg, err := config.SourceOSS(envs...)
77+
if err != nil {
78+
return nil, errors.WithStack(err)
79+
}
80+
return oss.NewSource(ctx, l, sourceCfg.ServiceAccount, sourceCfg.SourceBucketPath, sourceCfg.FileFormat, sourceCfg.CSVDelimiter, sourceCfg.ColumnMappingFilePath, opts...)
7581
case IO:
7682
}
7783
return nil, fmt.Errorf("source: unknown source: %s", source)

internal/config/source_oss.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package config
2+
3+
// SourceOSSConfig is a configuration for the source oss component.
4+
type SourceOSSConfig struct {
5+
ServiceAccount string `env:"OSS__SERVICE_ACCOUNT"`
6+
SourceBucketPath string `env:"OSS__SOURCE_BUCKET_PATH"`
7+
FileFormat string `env:"OSS__FILE_FORMAT" envDefault:"json"`
8+
CSVDelimiter rune `env:"OSS__CSV_DELIMITER" envDefault:","`
9+
ColumnMappingFilePath string `env:"OSS__COLUMN_MAPPING_FILE_PATH"`
10+
}
11+
12+
// SourceOSS parses the environment variables and returns the source oss configuration.
13+
func SourceOSS(envs ...string) (*SourceOSSConfig, error) {
14+
return parse[SourceOSSConfig](envs...)
15+
}

0 commit comments

Comments
 (0)