|
15 | 15 | # See the License for the specific language governing permissions and
|
16 | 16 | # limitations under the License.
|
17 | 17 | ################################################################################
|
| 18 | +import re |
18 | 19 | import sys
|
19 | 20 |
|
20 | 21 | # force to register the operations to SDK Harness
|
| 22 | +from apache_beam.options.pipeline_options import DebugOptions, PipelineOptions |
| 23 | + |
21 | 24 | import pyflink.fn_execution.beam.beam_operations # noqa # pylint: disable=unused-import
|
22 | 25 |
|
23 | 26 | # force to register the coders to SDK Harness
|
24 | 27 | import pyflink.fn_execution.beam.beam_coders # noqa # pylint: disable=unused-import
|
25 | 28 |
|
26 |
| -import apache_beam.runners.worker.sdk_worker_main |
| 29 | +import apache_beam |
27 | 30 |
|
28 | 31 | # disable bundle processor shutdown
|
29 |
| -from apache_beam.runners.worker import sdk_worker |
| 32 | +from apache_beam.runners.worker import sdk_worker, sdk_worker_main, statecache |
30 | 33 | sdk_worker.DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S = 86400 * 30
|
31 | 34 |
|
32 | 35 |
|
| 36 | +# Currently PyFlink only support count-based state cache strategy |
| 37 | +def get_deep_size(*objs): |
| 38 | + return 1 |
| 39 | + |
| 40 | + |
| 41 | +def get_state_cache_size(options): |
| 42 | + """ |
| 43 | + Return the maximum size of state cache in count. |
| 44 | + """ |
| 45 | + if isinstance(options, PipelineOptions): |
| 46 | + experiments = options.view_as(DebugOptions).experiments or [] |
| 47 | + else: |
| 48 | + experiments = options |
| 49 | + |
| 50 | + for experiment in experiments: |
| 51 | + # There should only be 1 match so returning from the loop |
| 52 | + if re.match(r'state_cache_size=', experiment): |
| 53 | + return int( |
| 54 | + re.match(r'state_cache_size=(?P<state_cache_size>.*)', |
| 55 | + experiment).group('state_cache_size')) |
| 56 | + return 0 |
| 57 | + |
| 58 | + |
| 59 | +statecache.get_deep_size = get_deep_size |
| 60 | +sdk_worker_main._get_state_cache_size = get_state_cache_size |
| 61 | +# since Beam 2.52.0, _get_state_cache_size is renamed to _get_state_cache_size_bytes |
| 62 | +sdk_worker_main._get_state_cache_size_bytes = get_state_cache_size |
| 63 | + |
| 64 | + |
33 | 65 | def print_to_logging(logging_func, msg, *args, **kwargs):
|
34 | 66 | if msg != '\n':
|
35 | 67 | logging_func(msg, *args, **kwargs)
|
|
0 commit comments