Skip to content

Commit adec7da

Browse files
Merge pull request #169 from AlexanderMann/fix/buffer-size-calculations
Fix/buffer size calculations
2 parents 2157fba + 73117f0 commit adec7da

File tree

4 files changed

+21
-33
lines changed

4 files changed

+21
-33
lines changed

target_postgres/pysize.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

target_postgres/singer_stream.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from copy import deepcopy
2+
import json
23
import uuid
34

45
import arrow
@@ -7,7 +8,22 @@
78

89
from target_postgres import json_schema, singer
910
from target_postgres.exceptions import SingerStreamError
10-
from target_postgres.pysize import get_size
11+
12+
13+
SINGER_RECEIVED_AT = '_sdc_received_at'
14+
SINGER_BATCHED_AT = '_sdc_batched_at'
15+
SINGER_SEQUENCE = '_sdc_sequence'
16+
SINGER_TABLE_VERSION = '_sdc_table_version'
17+
SINGER_PK = '_sdc_primary_key'
18+
SINGER_SOURCE_PK_PREFIX = '_sdc_source_key_'
19+
SINGER_LEVEL = '_sdc_level_{}_id'
20+
SINGER_VALUE = '_sdc_value'
21+
22+
RAW_LINE_SIZE = '__raw_line_size'
23+
24+
25+
def get_line_size(line_data):
26+
return line_data.get(RAW_LINE_SIZE) or len(json.dumps(line_data))
1127

1228

1329
class BufferedSingerStream():
@@ -133,7 +149,7 @@ def add_record_message(self, record_message):
133149

134150
if add_record:
135151
self.__buffer.append(record_message)
136-
self.__size += get_size(record_message)
152+
self.__size += get_line_size(record_message)
137153
self.__count += 1
138154
elif self.invalid_records_detect \
139155
and len(self.invalid_records) >= self.invalid_records_threshold:

target_postgres/target_tools.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from target_postgres import json_schema
1212
from target_postgres.exceptions import TargetError
13-
from target_postgres.singer_stream import BufferedSingerStream
13+
from target_postgres.singer_stream import BufferedSingerStream, RAW_LINE_SIZE
1414
from target_postgres.stream_tracker import StreamTracker
1515

1616
LOGGER = singer.get_logger()
@@ -136,6 +136,7 @@ def _line_handler(state_tracker, target, invalid_records_detect, invalid_records
136136
if 'stream' not in line_data:
137137
raise TargetError('`stream` is a required key: {}'.format(line))
138138

139+
line_data[RAW_LINE_SIZE] = len(line)
139140
state_tracker.handle_record_message(line_data['stream'], line_data)
140141
elif line_data['type'] == 'ACTIVATE_VERSION':
141142
if 'stream' not in line_data:

tests/unit/test_BufferedSingerStream.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def test_multiple_batches__by_memory():
236236
singer_stream = BufferedSingerStream(CATS_SCHEMA['stream'],
237237
CATS_SCHEMA['schema'],
238238
CATS_SCHEMA['key_properties'],
239-
max_buffer_size=1024)
239+
max_buffer_size=10)
240240

241241
assert len(singer_stream.peek_buffer()) == 0
242242

0 commit comments

Comments
 (0)