Skip to content

Commit f161082

Browse files
parikshitgGaurav Sahil
andauthored
Destination connector implementation. (#1)
* feat: package sftp * feat: updated initial template and added config * feat: updated config * feat: destination configuration * feat: config test and error handling * feat: initial source implementation * feat: sftp client * feat: upload file * feat: handled host key callback * feat: adding test cases * fix: lint * seperated source logic into iterator * feat: acceptance * fix: small fix * feat: readme * added test cases * added integration test * remove go generate directive from source * fix: test cases * fix: teardown * added go header * added file chunk mechanism for files larger than 3 mb * added configurable chunk * fix: updated readme * fix: large file processing * feat: upload large file * fix: refactored iterator * fix: handle file modification while read * fix: source integration test * modify README file * feat: merged source * fix: source acceptance to be handled separately * fix: refactored iterator to provide record on demand * feat: handled missing filename in metadata * added source and destination directories in docker compose * fix: test workflow * fix: refactored * fix: source and destination tests * fix: linters * fix: actual filesize in source metadata and handle filename from rawdata key * fix: source raw key * fix: source middleware --------- Co-authored-by: Gaurav Sahil <[email protected]>
1 parent 1aca814 commit f161082

15 files changed

+1085
-75
lines changed

README.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,24 @@ The connector supports both password and private key authentication methods.
3131

3232
## Destination
3333

34+
Destination connects to a remote server. It takes an `opencdc.Record`, extracts filename from the metadata and upload the file to the remote server. The connector supports both password and private key authentication methods. The connector will sync only those files that are in the source directory itself.
35+
Destination also supports large file uploads. Source can provide large file content chunk by chunk (one record per chunk). Each record should have following in metadata:
36+
37+
* `filename`: Filename of the file with extension.
38+
* `file_size`: Integer size of the file.
39+
* `chunk_index`: Index of the chunk (starting from 1).
40+
* `total_chunks`: Total number of chunks.
41+
* `hash`: Unique hash, which is used to create temporary file till the last chunk is uploaded.
42+
3443
### Configuration Options
3544

36-
![scarf pixel](https://static.scarf.sh/a.png?x-pxid=64b333ae-77ad-4895-a5cd-a73bb14362d9)
45+
| name | description | required |
46+
| -------------- | ----------------------------------------------------------------------------------------------------- | -------- |
47+
| `address` | Address is the address of the sftp server to connect.| **true** |
48+
| `hostKey` | HostKey is the key used for host key callback validation.| **true** |
49+
| `username`| User is the username of the SFTP user. | **true** |
50+
| `password`| Password is the SFTP password (can be used as passphrase for private key). | false |
51+
| `privateKeyPath`| PrivateKeyPath is the private key for ssh login.| false |
52+
| `directoryPath` | DirectoryPath is the path to the directory to read/write data. | true |
53+
54+
![scarf pixel](https://static.scarf.sh/a.png?x-pxid=64b333ae-77ad-4895-a5cd-a73bb14362d9)

acceptance_test.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright © 2024 Meroxa, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package sftp
16+
17+
import (
18+
"fmt"
19+
"os/exec"
20+
"sync/atomic"
21+
"testing"
22+
"time"
23+
24+
"github.com/conduitio-labs/conduit-connector-sftp/common"
25+
"github.com/conduitio-labs/conduit-connector-sftp/config"
26+
"github.com/conduitio-labs/conduit-connector-sftp/destination"
27+
"github.com/conduitio/conduit-commons/opencdc"
28+
sdk "github.com/conduitio/conduit-connector-sdk"
29+
)
30+
31+
type driver struct {
32+
sdk.ConfigurableAcceptanceTestDriver
33+
id int64
34+
}
35+
36+
func (d *driver) GenerateRecord(_ *testing.T, _ opencdc.Operation) opencdc.Record {
37+
atomic.AddInt64(&d.id, 1)
38+
39+
content := []byte("hello world")
40+
filename := fmt.Sprintf("%d.txt", d.id)
41+
42+
return sdk.Util.Source.NewRecordCreate(
43+
nil,
44+
map[string]string{
45+
opencdc.MetadataCollection: "upload",
46+
opencdc.MetadataCreatedAt: time.Now().UTC().Format(time.RFC3339),
47+
"filename": filename,
48+
"hash": common.GenerateFileHash(filename, time.Now(), 11),
49+
"file_size": fmt.Sprintf("%d", len(content)),
50+
"mod_time": time.Now().UTC().Format(time.RFC3339),
51+
},
52+
opencdc.StructuredData{"filename": filename},
53+
opencdc.RawData(content),
54+
)
55+
}
56+
57+
func (d *driver) ReadFromDestination(_ *testing.T, records []opencdc.Record) []opencdc.Record {
58+
return records
59+
}
60+
61+
func TestAcceptance(t *testing.T) {
62+
hostKey, err := setupHostKey()
63+
if err != nil {
64+
fmt.Println(err)
65+
return
66+
}
67+
68+
sdk.AcceptanceTest(t, &driver{
69+
ConfigurableAcceptanceTestDriver: sdk.ConfigurableAcceptanceTestDriver{
70+
Config: sdk.ConfigurableAcceptanceTestDriverConfig{
71+
Connector: sdk.Connector{
72+
NewSpecification: Specification,
73+
NewDestination: destination.NewDestination,
74+
NewSource: nil,
75+
},
76+
DestinationConfig: map[string]string{
77+
config.ConfigAddress: "localhost:2222",
78+
config.ConfigHostKey: hostKey,
79+
config.ConfigUsername: "user",
80+
config.ConfigPassword: "pass",
81+
config.ConfigDirectoryPath: "/destination",
82+
},
83+
},
84+
},
85+
})
86+
}
87+
88+
func setupHostKey() (string, error) {
89+
cmd := exec.Command("ssh-keyscan", "-t", "rsa", "-p", "2222", "localhost")
90+
output, err := cmd.Output()
91+
if err != nil {
92+
return "", fmt.Errorf("error setupHostKey: %w", err)
93+
}
94+
return string(output), nil
95+
}

common/hash.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright © 2024 Meroxa, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package common
16+
17+
import (
18+
"crypto/md5" //nolint: gosec // MD5 used for non-cryptographic unique identifier
19+
"encoding/hex"
20+
"fmt"
21+
"time"
22+
)
23+
24+
// GenerateFileHash creates a unique hash based on file name, mod time, and size.
25+
func GenerateFileHash(fileName string, modTime time.Time, fileSize int64) string {
26+
data := fmt.Sprintf("%s|%s|%d", fileName, modTime.Format(time.RFC3339), fileSize)
27+
hash := md5.Sum([]byte(data)) //nolint: gosec // MD5 used for non-cryptographic unique identifier
28+
return hex.EncodeToString(hash[:])
29+
}

common/sshauth.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Copyright © 2024 Meroxa, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package common
16+
17+
import (
18+
"bytes"
19+
"fmt"
20+
"net"
21+
"os"
22+
23+
"golang.org/x/crypto/ssh"
24+
)
25+
26+
var ErrUntrustedKey = fmt.Errorf("host key does not match the trusted key")
27+
28+
type MismatchKeyTypeError struct {
29+
key1, key2 string
30+
}
31+
32+
func (e MismatchKeyTypeError) Error() string {
33+
return fmt.Sprintf("host key type mismatch: got %s, want %s", e.key1, e.key2)
34+
}
35+
36+
func NewMismatchKeyTypeError(key1, key2 string) MismatchKeyTypeError {
37+
return MismatchKeyTypeError{key1, key2}
38+
}
39+
40+
func SSHConfigAuth(remoteHostKey, username, password, privateKeyPath string) (*ssh.ClientConfig, error) {
41+
//nolint:dogsled // not required here.
42+
hostKey, _, _, _, err := ssh.ParseAuthorizedKey([]byte(remoteHostKey))
43+
if err != nil {
44+
return nil, fmt.Errorf("failed to parse host key: %w", err)
45+
}
46+
47+
hostKeyCallback := func(_ string, _ net.Addr, key ssh.PublicKey) error {
48+
if key.Type() != hostKey.Type() {
49+
return NewMismatchKeyTypeError(key.Type(), hostKey.Type())
50+
}
51+
52+
if !bytes.Equal(key.Marshal(), hostKey.Marshal()) {
53+
return ErrUntrustedKey
54+
}
55+
56+
return nil
57+
}
58+
59+
sshConfig := &ssh.ClientConfig{
60+
User: username,
61+
HostKeyCallback: hostKeyCallback,
62+
}
63+
64+
if privateKeyPath != "" {
65+
auth, err := authWithPrivateKey(privateKeyPath, password)
66+
if err != nil {
67+
return nil, err
68+
}
69+
70+
sshConfig.Auth = []ssh.AuthMethod{auth}
71+
return sshConfig, nil
72+
}
73+
74+
sshConfig.Auth = []ssh.AuthMethod{ssh.Password(password)}
75+
return sshConfig, nil
76+
}
77+
78+
func authWithPrivateKey(privateKeyPath, password string) (ssh.AuthMethod, error) {
79+
key, err := os.ReadFile(privateKeyPath)
80+
if err != nil {
81+
return nil, fmt.Errorf("failed to read private key file: %w", err)
82+
}
83+
84+
if password != "" {
85+
signer, err := ssh.ParsePrivateKeyWithPassphrase(key, []byte(password))
86+
if err != nil {
87+
return nil, fmt.Errorf("failed to parse private key: %w", err)
88+
}
89+
return ssh.PublicKeys(signer), nil
90+
}
91+
92+
signer, err := ssh.ParsePrivateKey(key)
93+
if err != nil {
94+
return nil, fmt.Errorf("failed to parse private key: %w", err)
95+
}
96+
97+
return ssh.PublicKeys(signer), nil
98+
}

config/config.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616

1717
package config
1818

19-
import "fmt"
19+
import (
20+
"fmt"
21+
)
2022

2123
var ErrEmptyAuthFields = fmt.Errorf("both %q and %q can not be empty", ConfigPassword, ConfigPrivateKeyPath)
2224

@@ -26,7 +28,7 @@ type Config struct {
2628
Address string `json:"address" validate:"required"`
2729
// HostKey is the key used for host key callback validation.
2830
HostKey string `json:"hostKey" validate:"required"`
29-
// User is the SFTP user.
31+
// User is the username of the SFTP user.
3032
Username string `json:"username" validate:"required"`
3133
// Password is the SFTP password (can be used as passphrase for private key).
3234
Password string `json:"password"`

config/paramgen.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

connector.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
package sftp
1616

1717
import (
18+
"github.com/conduitio-labs/conduit-connector-sftp/destination"
1819
source "github.com/conduitio-labs/conduit-connector-sftp/source"
1920
sdk "github.com/conduitio/conduit-connector-sdk"
2021
)
2122

2223
// Connector combines all constructors for each plugin in one struct.
2324
var Connector = sdk.Connector{
2425
NewSpecification: Specification,
26+
NewDestination: destination.NewDestination,
2527
NewSource: source.NewSource,
26-
NewDestination: nil,
2728
}

0 commit comments

Comments
 (0)