From df1abb35da7138f1fead8d50bfd01288790af486 Mon Sep 17 00:00:00 2001 From: Sonal Goyal Date: Tue, 22 Apr 2025 00:47:40 +0530 Subject: [PATCH 1/7] suppressed logging in multi field preproc --- .../main/java/zingg/common/core/executor/ZinggBaseCommon.java | 2 +- .../zingg/common/core/preprocess/MultiFieldPreprocessor.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/core/src/main/java/zingg/common/core/executor/ZinggBaseCommon.java b/common/core/src/main/java/zingg/common/core/executor/ZinggBaseCommon.java index cbdc7e882..618a90007 100644 --- a/common/core/src/main/java/zingg/common/core/executor/ZinggBaseCommon.java +++ b/common/core/src/main/java/zingg/common/core/executor/ZinggBaseCommon.java @@ -70,11 +70,11 @@ public void postMetrics() { Analytics.track(Metric.EXEC_TIME, (System.currentTimeMillis() - startTime) / 1000, true); Analytics.track(Metric.MODEL_ID, getArgs().getModelId(), true); Analytics.track(Metric.ZINGG_VERSION, "0.5.0", true); - Analytics.trackEnv(Metric.ZINGG_HOME, true); Analytics.trackEnvValue(Metric.DATABRICKS_RUNTIME_VERSION, true); Analytics.track(Metric.COUNTRY, Locale.getDefault().getCountry(), true); Analytics.trackEnvValue(Metric.DB_INSTANCE_TYPE, collectMetrics); + Analytics.trackEnv(Metric.ZINGG_HOME, collectMetrics); Analytics.trackPropValue(Metric.JAVA_VERSION, collectMetrics); Analytics.trackPropValue(Metric.OS_ARCH, collectMetrics); Analytics.trackPropValue(Metric.OS_NAME, collectMetrics); diff --git a/common/core/src/main/java/zingg/common/core/preprocess/MultiFieldPreprocessor.java b/common/core/src/main/java/zingg/common/core/preprocess/MultiFieldPreprocessor.java index e8594052b..dbba6f35f 100644 --- a/common/core/src/main/java/zingg/common/core/preprocess/MultiFieldPreprocessor.java +++ b/common/core/src/main/java/zingg/common/core/preprocess/MultiFieldPreprocessor.java @@ -55,7 +55,7 @@ public boolean isApplicable() { public ZFrame preprocess(ZFrame df) { try { if(isApplicable()){ - LOG.info("Applying preprocessor on input dataframe"); + LOG.debug("Applying preprocessor on input dataframe"); return applyPreprocessor(df, relevantFields); } } catch (Exception exception) { From 324e580d7510d12c2374c957824b6d059452a891 Mon Sep 17 00:00:00 2001 From: Nitish Date: Mon, 28 Apr 2025 14:43:13 +0530 Subject: [PATCH 2/7] pass arguments to getBlocks (#1149) * pass arguments to getBlocks * pass strategy in SparkBlock constructor --- .../src/main/java/zingg/common/core/block/Block.java | 11 +++++++---- .../java/zingg/common/core/util/BlockingTreeUtil.java | 4 ++-- .../main/java/zingg/spark/core/block/SparkBlock.java | 5 +++-- .../zingg/spark/core/util/SparkBlockingTreeUtil.java | 9 ++++----- .../spark/core/block/TestSparkBlockingTreeUtil.java | 3 ++- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/common/core/src/main/java/zingg/common/core/block/Block.java b/common/core/src/main/java/zingg/common/core/block/Block.java index b71243bcc..9d9e317e1 100644 --- a/common/core/src/main/java/zingg/common/core/block/Block.java +++ b/common/core/src/main/java/zingg/common/core/block/Block.java @@ -22,6 +22,7 @@ public abstract class Block implements Serializable { public static final Log LOG = LogFactory.getLog(Block.class); private final IHashFunctionUtility hashFunctionUtility; + private FieldDefinitionStrategy fieldDefinitionStrategy; protected ZFrame dupes; // Class[] types; @@ -46,11 +47,12 @@ public Block(ZFrame training, ZFrame dupes) { } public Block(ZFrame training, ZFrame dupes, - ListMap> functionsMap, long maxSize) { + ListMap> functionsMap, long maxSize, FieldDefinitionStrategy fieldDefinitionStrategy) { this(training, dupes); this.functionsMap = functionsMap; // functionsMap.prettyPrint(); this.maxSize = maxSize; + this.fieldDefinitionStrategy = fieldDefinitionStrategy; } /** @@ -374,12 +376,13 @@ public void printTree(Tree> tree, } public List getFieldOfInterestList(List fieldDefinitions, Canopy node) { - FieldDefinitionStrategy fieldDefinitionStrategy = new DefaultFieldDefinitionStrategy(); return fieldDefinitionStrategy.getAdjustedFieldDefinitions(fieldDefinitions, node); } public abstract FeatureFactory getFeatureFactory(); - - + + public void setFieldDefinitionStrategy(FieldDefinitionStrategy fieldDefinitionStrategy) { + this.fieldDefinitionStrategy = fieldDefinitionStrategy; + } } diff --git a/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java b/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java index f1b41563d..2086415fe 100644 --- a/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java +++ b/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java @@ -37,7 +37,7 @@ public void setPipeUtil(PipeUtilBase pipeUtil) { public abstract Block getBlock(ZFrame sample, ZFrame positives, - ListMap>hashFunctions, long blockSize); + ListMap>hashFunctions, long blockSize, IArguments arguments); public Tree> createBlockingTree(ZFrame testData, @@ -55,7 +55,7 @@ public Tree> createBlockingTree(ZFrame testData, LOG.info("Learning indexing rules for block size " + blockSize); positives = positives.coalesce(1); - Block cblock = getBlock(sample, positives, hashFunctions, blockSize); + Block cblock = getBlock(sample, positives, hashFunctions, blockSize, args); Canopy root = new Canopy(sample.collectAsList(), positives.collectAsList()); List fd = new ArrayList (); diff --git a/spark/core/src/main/java/zingg/spark/core/block/SparkBlock.java b/spark/core/src/main/java/zingg/spark/core/block/SparkBlock.java index 01daa4b6e..934e7d56d 100644 --- a/spark/core/src/main/java/zingg/spark/core/block/SparkBlock.java +++ b/spark/core/src/main/java/zingg/spark/core/block/SparkBlock.java @@ -8,6 +8,7 @@ import zingg.common.client.ZFrame; import zingg.common.client.util.ListMap; import zingg.common.core.block.Block; +import zingg.common.core.block.FieldDefinitionStrategy; import zingg.common.core.feature.FeatureFactory; import zingg.common.core.hash.HashFunction; import zingg.spark.core.feature.SparkFeatureFactory; @@ -23,8 +24,8 @@ public SparkBlock(){ public SparkBlock(ZFrame, Row, Column> training, ZFrame, Row, Column> dupes, - ListMap, Row, Column, DataType>> functionsMap, long maxSize) { - super(training, dupes, functionsMap, maxSize); + ListMap, Row, Column, DataType>> functionsMap, long maxSize, FieldDefinitionStrategy fieldDefinitionStrategy) { + super(training, dupes, functionsMap, maxSize, fieldDefinitionStrategy); } @Override diff --git a/spark/core/src/main/java/zingg/spark/core/util/SparkBlockingTreeUtil.java b/spark/core/src/main/java/zingg/spark/core/util/SparkBlockingTreeUtil.java index aec03909a..398027f9d 100644 --- a/spark/core/src/main/java/zingg/spark/core/util/SparkBlockingTreeUtil.java +++ b/spark/core/src/main/java/zingg/spark/core/util/SparkBlockingTreeUtil.java @@ -16,13 +16,12 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; +import zingg.common.client.IArguments; import zingg.common.client.ZFrame; import zingg.common.client.util.ColName; import zingg.common.client.util.ListMap; import zingg.common.client.util.PipeUtilBase; -import zingg.common.core.block.Block; -import zingg.common.core.block.Canopy; -import zingg.common.core.block.Tree; +import zingg.common.core.block.*; import zingg.common.core.hash.HashFunction; import zingg.common.core.util.BlockingTreeUtil; import zingg.spark.client.SparkFrame; @@ -86,8 +85,8 @@ public Tree> readBlockingTree(Arguments args) throws Exception, Zing public Block, Row, Column, DataType> getBlock(ZFrame, Row, Column> sample, ZFrame, Row, Column> positives, ListMap, Row, Column, DataType>> hashFunctions, - long blockSize) { + long blockSize, IArguments arguments) { // TODO Auto-generated method stub - return new SparkBlock(sample, positives, hashFunctions, blockSize); + return new SparkBlock(sample, positives, hashFunctions, blockSize, new DefaultFieldDefinitionStrategy()); } } \ No newline at end of file diff --git a/spark/core/src/test/java/zingg/spark/core/block/TestSparkBlockingTreeUtil.java b/spark/core/src/test/java/zingg/spark/core/block/TestSparkBlockingTreeUtil.java index 07f3cde1b..4a736ed7a 100644 --- a/spark/core/src/test/java/zingg/spark/core/block/TestSparkBlockingTreeUtil.java +++ b/spark/core/src/test/java/zingg/spark/core/block/TestSparkBlockingTreeUtil.java @@ -12,6 +12,7 @@ import zingg.common.client.util.ListMap; import zingg.common.client.util.WithSession; import zingg.common.core.block.Block; +import zingg.common.core.block.DefaultFieldDefinitionStrategy; import zingg.common.core.block.TestBlockingTreeUtil; import zingg.common.core.hash.HashFunction; import zingg.common.core.util.BlockingTreeUtil; @@ -54,6 +55,6 @@ protected void setTestDataBaseLocation() { @Override protected Block, Row, Column, DataType> getBlock(ZFrame, Row, Column> sample, ZFrame, Row, Column> positives, ListMap, Row, Column, DataType>> hashFunctions, long blockSize) { - return new SparkBlock(sample, positives, hashFunctions, blockSize); + return new SparkBlock(sample, positives, hashFunctions, blockSize, new DefaultFieldDefinitionStrategy()); } } From c5608caed4e93b8cebae52e1c121b6f04693c514 Mon Sep 17 00:00:00 2001 From: Sonal Goyal Date: Tue, 1 Jul 2025 23:15:06 +0530 Subject: [PATCH 3/7] analytics fine tune --- .../main/java/zingg/common/client/Client.java | 3 ++- .../zingg/common/core/executor/ZinggBase.java | 1 - .../zingg/common/core/util/Analytics.java | 5 +++-- .../common/core/util/BlockingTreeUtil.java | 2 +- config/zingg.conf | 1 + log4j2.properties | 4 +++- ...-a359-937b94bf9fab-c000.snappy.parquet.crc | Bin 24 -> 0 bytes ...-819c-64c113e507bc-c000.snappy.parquet.crc | Bin 0 -> 84 bytes ...4c99-a359-937b94bf9fab-c000.snappy.parquet | Bin 1572 -> 0 bytes ...4d92-819c-64c113e507bc-c000.snappy.parquet | Bin 0 -> 9586 bytes .../bestModel/metadata/.part-00000.crc | Bin 12 -> 12 bytes .../best.model/bestModel/metadata/part-00000 | 2 +- .../metadata/._SUCCESS.crc | Bin .../metadata/.part-00000.crc | Bin 0 -> 12 bytes .../metadata/_SUCCESS | 0 .../metadata/part-00000 | 2 +- .../metadata/.part-00000.crc | Bin 12 -> 0 bytes .../metadata/._SUCCESS.crc | Bin .../metadata/.part-00000.crc | Bin 0 -> 12 bytes .../metadata/_SUCCESS | 0 .../1_poly_2a7e28777fb4/metadata/part-00000 | 1 + .../metadata/.part-00000.crc | Bin 12 -> 0 bytes .../1_poly_f0b90cd4e059/metadata/part-00000 | 1 - .../metadata/.part-00000.crc | Bin 16 -> 0 bytes .../2_logreg_077944a9332a/metadata/part-00000 | 1 - .../data/._SUCCESS.crc | Bin ...884d-7430eeee9589-c000.snappy.parquet.crc} | Bin 156 -> 156 bytes .../data/_SUCCESS | 0 ...f0d-884d-7430eeee9589-c000.snappy.parquet} | Bin 18756 -> 18756 bytes .../metadata/._SUCCESS.crc | Bin .../metadata/.part-00000.crc | Bin 0 -> 16 bytes .../metadata/_SUCCESS | 0 .../2_logreg_529da18ddad8/metadata/part-00000 | 1 + .../estimator/metadata/.part-00000.crc | Bin 12 -> 12 bytes .../best.model/estimator/metadata/part-00000 | 2 +- .../metadata/._SUCCESS.crc | Bin .../metadata/.part-00000.crc | Bin 0 -> 12 bytes .../metadata/_SUCCESS | 0 .../metadata/part-00000 | 2 +- .../metadata/.part-00000.crc | Bin 12 -> 0 bytes .../metadata/._SUCCESS.crc | Bin .../metadata/.part-00000.crc | Bin 0 -> 12 bytes .../metadata/_SUCCESS | 0 .../1_poly_2a7e28777fb4/metadata/part-00000 | 1 + .../metadata/.part-00000.crc | Bin 12 -> 0 bytes .../1_poly_f0b90cd4e059/metadata/part-00000 | 1 - .../metadata/.part-00000.crc | Bin 16 -> 0 bytes .../2_logreg_077944a9332a/metadata/part-00000 | 1 - .../metadata/._SUCCESS.crc | Bin .../metadata/.part-00000.crc | Bin 0 -> 16 bytes .../metadata/_SUCCESS | 0 .../2_logreg_529da18ddad8/metadata/part-00000 | 1 + .../evaluator/metadata/.part-00000.crc | Bin 12 -> 12 bytes .../best.model/evaluator/metadata/part-00000 | 2 +- .../best.model/metadata/.part-00000.crc | Bin 40 -> 40 bytes .../classifier/best.model/metadata/part-00000 | 2 +- ...-8c88-4373a3db2de8-c000.snappy.parquet.crc | Bin 0 -> 64 bytes ...44a9-8c88-4373a3db2de8-c000.snappy.parquet | Bin 0 -> 7163 bytes 58 files changed, 20 insertions(+), 16 deletions(-) delete mode 100644 models/100/model/block/zingg.block/.part-00000-ce845c02-c1e9-4c99-a359-937b94bf9fab-c000.snappy.parquet.crc create mode 100644 models/100/model/block/zingg.block/.part-00000-e4dac7ba-521e-4d92-819c-64c113e507bc-c000.snappy.parquet.crc delete mode 100644 models/100/model/block/zingg.block/part-00000-ce845c02-c1e9-4c99-a359-937b94bf9fab-c000.snappy.parquet create mode 100644 models/100/model/block/zingg.block/part-00000-e4dac7ba-521e-4d92-819c-64c113e507bc-c000.snappy.parquet rename models/100/model/classifier/best.model/bestModel/stages/{0_vecAssembler_e0c19172dee0 => 0_vecAssembler_090e68292fdb}/metadata/._SUCCESS.crc (100%) create mode 100644 models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/.part-00000.crc rename models/100/model/classifier/best.model/bestModel/stages/{0_vecAssembler_e0c19172dee0 => 0_vecAssembler_090e68292fdb}/metadata/_SUCCESS (100%) rename models/100/model/classifier/best.model/{estimator/stages/0_vecAssembler_e0c19172dee0 => bestModel/stages/0_vecAssembler_090e68292fdb}/metadata/part-00000 (59%) delete mode 100644 models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/.part-00000.crc rename models/100/model/classifier/best.model/bestModel/stages/{1_poly_f0b90cd4e059 => 1_poly_2a7e28777fb4}/metadata/._SUCCESS.crc (100%) create mode 100644 models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/.part-00000.crc rename models/100/model/classifier/best.model/bestModel/stages/{1_poly_f0b90cd4e059 => 1_poly_2a7e28777fb4}/metadata/_SUCCESS (100%) create mode 100644 models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/part-00000 delete mode 100644 models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/.part-00000.crc delete mode 100644 models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/part-00000 delete mode 100644 models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/.part-00000.crc delete mode 100644 models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/part-00000 rename models/100/model/classifier/best.model/bestModel/stages/{2_logreg_077944a9332a => 2_logreg_529da18ddad8}/data/._SUCCESS.crc (100%) rename models/100/model/classifier/best.model/bestModel/stages/{2_logreg_077944a9332a/data/.part-00000-ee62725c-51bd-4929-aff0-71314bf9d891-c000.snappy.parquet.crc => 2_logreg_529da18ddad8/data/.part-00000-0d8265b9-a3c1-4f0d-884d-7430eeee9589-c000.snappy.parquet.crc} (73%) rename models/100/model/classifier/best.model/bestModel/stages/{2_logreg_077944a9332a => 2_logreg_529da18ddad8}/data/_SUCCESS (100%) rename models/100/model/classifier/best.model/bestModel/stages/{2_logreg_077944a9332a/data/part-00000-ee62725c-51bd-4929-aff0-71314bf9d891-c000.snappy.parquet => 2_logreg_529da18ddad8/data/part-00000-0d8265b9-a3c1-4f0d-884d-7430eeee9589-c000.snappy.parquet} (96%) rename models/100/model/classifier/best.model/bestModel/stages/{2_logreg_077944a9332a => 2_logreg_529da18ddad8}/metadata/._SUCCESS.crc (100%) create mode 100644 models/100/model/classifier/best.model/bestModel/stages/2_logreg_529da18ddad8/metadata/.part-00000.crc rename models/100/model/classifier/best.model/bestModel/stages/{2_logreg_077944a9332a => 2_logreg_529da18ddad8}/metadata/_SUCCESS (100%) create mode 100644 models/100/model/classifier/best.model/bestModel/stages/2_logreg_529da18ddad8/metadata/part-00000 rename models/100/model/classifier/best.model/estimator/stages/{0_vecAssembler_e0c19172dee0 => 0_vecAssembler_090e68292fdb}/metadata/._SUCCESS.crc (100%) create mode 100644 models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/.part-00000.crc rename models/100/model/classifier/best.model/estimator/stages/{0_vecAssembler_e0c19172dee0 => 0_vecAssembler_090e68292fdb}/metadata/_SUCCESS (100%) rename models/100/model/classifier/best.model/{bestModel/stages/0_vecAssembler_e0c19172dee0 => estimator/stages/0_vecAssembler_090e68292fdb}/metadata/part-00000 (59%) delete mode 100644 models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/.part-00000.crc rename models/100/model/classifier/best.model/estimator/stages/{1_poly_f0b90cd4e059 => 1_poly_2a7e28777fb4}/metadata/._SUCCESS.crc (100%) create mode 100644 models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/.part-00000.crc rename models/100/model/classifier/best.model/estimator/stages/{1_poly_f0b90cd4e059 => 1_poly_2a7e28777fb4}/metadata/_SUCCESS (100%) create mode 100644 models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/part-00000 delete mode 100644 models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/.part-00000.crc delete mode 100644 models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/part-00000 delete mode 100644 models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/.part-00000.crc delete mode 100644 models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/part-00000 rename models/100/model/classifier/best.model/estimator/stages/{2_logreg_077944a9332a => 2_logreg_529da18ddad8}/metadata/._SUCCESS.crc (100%) create mode 100644 models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/.part-00000.crc rename models/100/model/classifier/best.model/estimator/stages/{2_logreg_077944a9332a => 2_logreg_529da18ddad8}/metadata/_SUCCESS (100%) create mode 100644 models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/part-00000 create mode 100644 models/100/trainingData/unmarked/.part-00000-fc0fdb27-3b64-44a9-8c88-4373a3db2de8-c000.snappy.parquet.crc create mode 100644 models/100/trainingData/unmarked/part-00000-fc0fdb27-3b64-44a9-8c88-4373a3db2de8-c000.snappy.parquet diff --git a/common/client/src/main/java/zingg/common/client/Client.java b/common/client/src/main/java/zingg/common/client/Client.java index eacc3322c..0681b4331 100644 --- a/common/client/src/main/java/zingg/common/client/Client.java +++ b/common/client/src/main/java/zingg/common/client/Client.java @@ -214,7 +214,7 @@ else if (options.get(ClientOptions.CONF).value.endsWith("env")) { client.init(); // after setting arguments etc. as some of the listeners need it client.execute(); - client.postMetrics(); + LOG.warn("Zingg processing has completed"); } catch(ZinggClientException e) { @@ -279,6 +279,7 @@ public IZArgs getArguments() { public void execute() throws ZinggClientException { zingg.execute(); + postMetrics(); } public void postMetrics() throws ZinggClientException { diff --git a/common/core/src/main/java/zingg/common/core/executor/ZinggBase.java b/common/core/src/main/java/zingg/common/core/executor/ZinggBase.java index f2617c6e0..d2e919ddf 100644 --- a/common/core/src/main/java/zingg/common/core/executor/ZinggBase.java +++ b/common/core/src/main/java/zingg/common/core/executor/ZinggBase.java @@ -80,7 +80,6 @@ public void track(boolean collectMetrics){ Analytics.track(Metric.OUTPUT_FORMAT, getPipeUtil().getPipesAsString(args.getOutput()), collectMetrics); Analytics.track(Metric.MODEL_ID, args.getModelId(), collectMetrics); Analytics.track(Metric.STOPWORDS,new StopWordUtility().getFieldDefinitionNamesWithStopwords(args), collectMetrics); - } diff --git a/common/core/src/main/java/zingg/common/core/util/Analytics.java b/common/core/src/main/java/zingg/common/core/util/Analytics.java index 5229f57b9..4455792da 100644 --- a/common/core/src/main/java/zingg/common/core/util/Analytics.java +++ b/common/core/src/main/java/zingg/common/core/util/Analytics.java @@ -45,6 +45,7 @@ private static Map getMetrics() { public static void track(String metricName, String metricValue, boolean collectMetrics) { if (collectMetrics) { String metricNameToSend = metricName.replace(".", "_"); + if (metricValue == null) metricValue = ""; getMetrics().put(metricNameToSend, metricValue); } } @@ -123,7 +124,7 @@ public static void postEvent(String phase, boolean collectMetrics) { rootNode.set("events", eventList); rootNode.put("user_id", getUserId()); String metricEvent = rootNode.toString(); - LOG.warn("event is " + metricEvent); + LOG.debug("event is " + metricEvent); Analytics.sendEvents(metricEvent); } @@ -142,7 +143,7 @@ private static void sendEvents(String param) { uri = builder.build(); URL url = uri.toURL(); String response = executePostRequest(url.toString(), param); - LOG.warn("Analytics event " + response); + LOG.debug("Analytics event " + response); } catch (IOException | URISyntaxException e) { if(LOG.isDebugEnabled()) e.printStackTrace(); } diff --git a/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java b/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java index 2086415fe..be84939f3 100644 --- a/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java +++ b/common/core/src/main/java/zingg/common/core/util/BlockingTreeUtil.java @@ -107,7 +107,7 @@ public Tree> readBlockingTree(IArguments args, IModelHelper mu) throws //byte [] byteArrayBack = (byte[]) tree.df().head().get(0); byte[] byteArrayBack = getTreeFromDF(tree); Tree> blockingTree = null; - LOG.warn("byte array back is " + byteArrayBack); + LOG.debug("byte array back is " + byteArrayBack); blockingTree = (Tree>) Util.revertObjectFromByteArray(byteArrayBack); return blockingTree; } diff --git a/config/zingg.conf b/config/zingg.conf index c2f4f1976..4bfd84149 100644 --- a/config/zingg.conf +++ b/config/zingg.conf @@ -13,6 +13,7 @@ spark.serializer=org.apache.spark.serializer.KryoSerializer spark.default.parallelism=8 spark.debug.maxToStringFields=200 +spark.sql.debug.maxToStringFields=200 spark.driver.memory=8g spark.executor.memory=8g #spark.jars=/home/zingg/pathto.jar diff --git a/log4j2.properties b/log4j2.properties index 9ed982118..f42141ec9 100644 --- a/log4j2.properties +++ b/log4j2.properties @@ -54,8 +54,10 @@ logger.breeze.level = fatal logger.zingg.name = zingg logger.zingg.level = info logger.zingg_analytics.name = zingg.common.core.util.Analytics -logger.zingg_analytics.level = OFF +logger.zingg_analytics.level = debug logger.codegen.name = org.apache.spark.sql.catalyst.expressions logger.codegen.level = OFF logger.codehaus.name = org.codehaus logger.codehaus.level = OFF +logger.graphframes.name = org.graphframes +logger.graphframes.level = ERROR diff --git a/models/100/model/block/zingg.block/.part-00000-ce845c02-c1e9-4c99-a359-937b94bf9fab-c000.snappy.parquet.crc b/models/100/model/block/zingg.block/.part-00000-ce845c02-c1e9-4c99-a359-937b94bf9fab-c000.snappy.parquet.crc deleted file mode 100644 index c33fd8c774779a8654ebfbaff4b6af120847386f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24 fcmYc;N@ieSU}C6>TbZ(=(m{x2dHqG*Gu`t6Sb7Mt diff --git a/models/100/model/block/zingg.block/.part-00000-e4dac7ba-521e-4d92-819c-64c113e507bc-c000.snappy.parquet.crc b/models/100/model/block/zingg.block/.part-00000-e4dac7ba-521e-4d92-819c-64c113e507bc-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..c1eb415f8472763d5f00e1437d47e52aaf2558be GIT binary patch literal 84 zcmV-a0IUCFa$^7h00IEFCBN^# qrGQ#!)P$RAD#h3_42qWqTOf-fNWBkJWf?|j!e)5CSM2%n+eF_*gUh>>M_dd`4 zp8wCyjO3p(B>Q-=s$eNXDkWiEXXC!bOEi|GMi_K3Nwp|eYrBvi*y!fg|mVqK+Tp( zI9V4)B{5>#Y%yy%B+}V}fJ|@!>s-S$I&|UfqWIWxFj?OChMs~IlR1!x(wRjGI+EBB0);9 z`Mkl%McEOX-zVb-gpHQa4@GB!9bB|HIq!96!BvQ^Ql?qe=YtmN1IM?)TV*0&@+EuF zC6^;=)^j!66ALsfOF)Z5UkC}H1(Q{|Pl=%Yb2j7(d8a8y7`(fl}>UQLnd-A~LjtdhKx!_fD%;99c zA|N{XnMCI0#Hgo3PB%usAnXk31dWXO7=VlPVz%TtpoTN67Y08dK(s#~l7%%k7j@fA zT~CXz2cV2v<9Pm94{?g9xA%z=j%@bk5Gc~E%i&>| zDG$T$m*W(>g2JX&Pw~X%J~YQijmv*5*)+Hk$WtD;V-C>XlYM#;Hod-o5U} z4fdjg^{g^g0@WRdfP1vLe{e`1Zy>)E3z z7PKlCtU0-X{Ha*8`C@}HN7(hei1o$_&1K`n29KPIHRlpV{|*Heo_>%sOlB|QyIHSP zSgwMa1y*r*Z|raM*x;$tr;nb7z~ocgo2qu?sk4!kmg>_|k)B-LsOS+r*Pj~bOC`7K rTXNg_`gwlK_P(S(uq~O_8Bc9Z@&4q1uIJdsU?_xnZxDbP{6G8!;+>!t diff --git a/models/100/model/block/zingg.block/part-00000-e4dac7ba-521e-4d92-819c-64c113e507bc-c000.snappy.parquet b/models/100/model/block/zingg.block/part-00000-e4dac7ba-521e-4d92-819c-64c113e507bc-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..997120919a9fa6280a1e4545affd74c32981bb93 GIT binary patch literal 9586 zcmeHNU5p!76+U+|*E@E0RlC`-Y+@(2_cxV0wzHdbvzw&ZpIX_KH(8oQs9OrX_KbJE zvB#5{an?ylNu>%!s8XJJpbrQ^3spoa$U~8k5UP;iCE=x|2!Vun-~myI0xbf;xnp~G zyxCo}iYoCimgdg&opbIv=iWQtcYkJS`uUs;^2dkdH$w6sKf3h`5|afikYowq-6uYJ z@qQ1$+kXaXnGm~P(N|WKQoUBI>v%IYrC6<(E-M#JO@mL=zx+vCG6WC{Ag*dmwc*La z8oSCeRi>|G&KK9Tl08)bTGd$DvOzAak$ARYSE`w5(`45Q6^kSWeOMv0H5(52Pt)Ym zS!S&+G6U6o1~WC?w&75L-hqrCFM=1nlg@DSQ%wUsJVXZ;-hnq~nO--p{p-Q))8g;G zN(Jay0KZnP)Mo1q-7Y{+sjk~v6YCl(&{?^Bt6IUDGx>^ERp+#FMX%VEy3Soy8-{ky zhG6_heGj3J99(){t1*18uUvswKq`P#Zs;X)GGBriHb-U^-KBqneBTI6B`-<%k38E? zP|P;t=cT5Bb@ZZ}1N5H!F}(-M?peAx9qy{g}T|7iZl(w2A@U0 z6tjVrbym|tQUdKC;N7Am{|U;agLBPSe-;16&)GNk0htHUs$AFTKSs&A2ml0S)k?%RnpECAB>HV**6^HzW!WFg3ej>bbWh(ln9_qr-J` zMPUXjt!hvE2dyhr#l{A-l=&w~E-^TMclrJ7;Pu}mX%J)CUrru|+)yI<#b69WP9FJO z<=dyupIAsdTz~JA-~VCpu8@2`D`~nqud7-ULpZ!V1-bs%Z|{EhKVM(_)~|;am4}3U z1>)2FqG8b6!4NiH;JIN4&>#Tyi9YQ3h=D&FAezRabU!rFQs{--$(1*8fYY82fBLiV z%MbT~kcU9E&eS|B*>!Us{PwD;S*!J`+BBX$Lth&0UB@>QuwK}YLsKM0e*=5!9T=r4 z@^bH}%xIVy=b1IkOst&(@+kxer2!Zq0Z7rz zX^Ft)_~+pyDq^;YVrRI#j+r^0VVF6KiqG{7iLj6q;VCq!;B^$Q<9N;BHA!gpG=ER> zO`eVz(U1_0oOxU@dHgDBvJeepN5sOC{2+umz=HJIUS|k@*}@T1W3m8p?+`?KXE?&) zYv^EHSct}=v0*7D_eO_6igrJig|HC*&f)Nb(6Dqea{r0PjdUm27aO~h2nTa*m6+Z4Wl~&{T_g|+sgk$SeMgC1U#TV4!+ysEk|dL-Q6ZS_?zKD z=Mk3MATDxH<^!;W!!GznJZ}H+RUGJ!*>PCeLJ4i@EqKlCAfLI(?os6C4mi-wNd~c7 zYZv&+yuj)_c5s{{Juh&9d0V0l=ngM*AiZ#zT;Qy*BN6PPYOtI4lm}!el zAyjYaS3YTRJaZe^403~6T%^kb?h1Z`Y&soTOBl5)hibR4h7HlWktJ(*tMr zs;s+|=k}`1T*_y6QFf*rlsmb=lc#v1@Z=~@j`L)OCzCwsnx%Nejf=dE>48VwI6ge$ z-m-%8h}$FX?uDsG+&j!SJmPk?J;-NI++i5?*4!R-Z|;`zshvmN9(8+b?p^nkJ?i$T zd)FHB5w}O&9&vlbjdOT!v%Iy_{(rbxj)&;@u@Ii01BCy6Kn{mL z2+3~tD0%yxoL!sDaVv7M245Lsy3e!6BR+Ri3&N^VTaB}R*lN>xp$#qr$4ST1`) tJyv{XY+NfHJ2943C!WccRHo&Q=d|(cgsK+Vk!UQ2b8IyMu!LV*{TIU|Vc-A& literal 0 HcmV?d00001 diff --git a/models/100/model/classifier/best.model/bestModel/metadata/.part-00000.crc b/models/100/model/classifier/best.model/bestModel/metadata/.part-00000.crc index b72443667c1c78349708bd99ef63ac7232648fe9..1b3cf7914f36b52d7e38fb52864797d779ac8382 100644 GIT binary patch literal 12 TcmYc;N@ieSU}C8LkeLAh6Ltf4 literal 12 TcmYc;N@ieSU}ETGmkj^_5JLhu diff --git a/models/100/model/classifier/best.model/bestModel/metadata/part-00000 b/models/100/model/classifier/best.model/bestModel/metadata/part-00000 index 1528b03da..adf078e50 100644 --- a/models/100/model/classifier/best.model/bestModel/metadata/part-00000 +++ b/models/100/model/classifier/best.model/bestModel/metadata/part-00000 @@ -1 +1 @@ -{"class":"org.apache.spark.ml.PipelineModel","timestamp":1743247597718,"sparkVersion":"3.5.0","uid":"pipeline_f2e218586f66","paramMap":{"stageUids":["vecAssembler_e0c19172dee0","poly_f0b90cd4e059","logreg_077944a9332a"]},"defaultParamMap":{}} +{"class":"org.apache.spark.ml.PipelineModel","timestamp":1751390444064,"sparkVersion":"3.5.0","uid":"pipeline_af0e0bb68da1","paramMap":{"stageUids":["vecAssembler_090e68292fdb","poly_2a7e28777fb4","logreg_529da18ddad8"]},"defaultParamMap":{}} diff --git a/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/._SUCCESS.crc b/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/._SUCCESS.crc similarity index 100% rename from models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/._SUCCESS.crc rename to models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/._SUCCESS.crc diff --git a/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/.part-00000.crc b/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..00b6fd57f32abfa5869390ead15f0ed739a545c3 GIT binary patch literal 12 TcmYc;N@ieSU}AX6vT->86Db3H literal 0 HcmV?d00001 diff --git a/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/_SUCCESS b/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/_SUCCESS similarity index 100% rename from models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/_SUCCESS rename to models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/_SUCCESS diff --git a/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/part-00000 b/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/part-00000 similarity index 59% rename from models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/part-00000 rename to models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/part-00000 index 0992df9f0..3a618017b 100644 --- a/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/part-00000 +++ b/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_090e68292fdb/metadata/part-00000 @@ -1 +1 @@ -{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1743247597510,"sparkVersion":"3.5.0","uid":"vecAssembler_e0c19172dee0","paramMap":{"outputCol":"z_featurevector","inputCols":["z_sim0","z_sim1","z_sim2","z_sim3","z_sim4","z_sim5","z_sim6","z_sim7","z_sim8","z_sim9","z_sim10","z_sim11","z_sim12","z_sim13","z_sim14","z_sim15","z_sim16","z_sim17","z_sim18","z_sim19"]},"defaultParamMap":{"outputCol":"vecAssembler_e0c19172dee0__output","handleInvalid":"error"}} +{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1751390444189,"sparkVersion":"3.5.0","uid":"vecAssembler_090e68292fdb","paramMap":{"outputCol":"z_featurevector","inputCols":["z_sim0","z_sim1","z_sim2","z_sim3","z_sim4","z_sim5","z_sim6","z_sim7","z_sim8","z_sim9","z_sim10","z_sim11","z_sim12","z_sim13","z_sim14","z_sim15","z_sim16","z_sim17","z_sim18","z_sim19"]},"defaultParamMap":{"outputCol":"vecAssembler_090e68292fdb__output","handleInvalid":"error"}} diff --git a/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/.part-00000.crc b/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/.part-00000.crc deleted file mode 100644 index 24c7c635ae981d1fd9f55f42cb3d30548e998e64..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Bj1hxY>j6Ve08 diff --git a/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/._SUCCESS.crc b/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/._SUCCESS.crc similarity index 100% rename from models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/._SUCCESS.crc rename to models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/._SUCCESS.crc diff --git a/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/.part-00000.crc b/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..b7c0bda3809233b819391c6e2204764083e97834 GIT binary patch literal 12 TcmYc;N@ieSU}DJ2ooE995#0jg literal 0 HcmV?d00001 diff --git a/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/_SUCCESS b/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/_SUCCESS similarity index 100% rename from models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/_SUCCESS rename to models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/_SUCCESS diff --git a/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/part-00000 b/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/part-00000 new file mode 100644 index 000000000..91c3aa489 --- /dev/null +++ b/models/100/model/classifier/best.model/bestModel/stages/1_poly_2a7e28777fb4/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.feature.PolynomialExpansion","timestamp":1751390444313,"sparkVersion":"3.5.0","uid":"poly_2a7e28777fb4","paramMap":{"inputCol":"z_featurevector","degree":3,"outputCol":"z_feature"},"defaultParamMap":{"degree":2,"outputCol":"poly_2a7e28777fb4__output"}} diff --git a/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/.part-00000.crc b/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/.part-00000.crc deleted file mode 100644 index 4d882a7d3dc310bf8be66d1dfb6177476c2e20f7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}8wW7vT*660rj# diff --git a/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/part-00000 b/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/part-00000 deleted file mode 100644 index 494b9d0a2..000000000 --- a/models/100/model/classifier/best.model/bestModel/stages/1_poly_f0b90cd4e059/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.PolynomialExpansion","timestamp":1743247598018,"sparkVersion":"3.5.0","uid":"poly_f0b90cd4e059","paramMap":{"degree":3,"inputCol":"z_featurevector","outputCol":"z_feature"},"defaultParamMap":{"degree":2,"outputCol":"poly_f0b90cd4e059__output"}} diff --git a/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/.part-00000.crc b/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/.part-00000.crc deleted file mode 100644 index 2e94648ffd0f0d0466c039c14a3a41c7162430ab..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}D%%JZt0QUtJ3UDp3Zp diff --git a/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/part-00000 b/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/part-00000 deleted file mode 100644 index 950b3745c..000000000 --- a/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.classification.LogisticRegressionModel","timestamp":1743247598151,"sparkVersion":"3.5.0","uid":"logreg_077944a9332a","paramMap":{"threshold":0.4,"labelCol":"z_isMatch","regParam":1.0E-4,"fitIntercept":true,"predictionCol":"z_prediction","probabilityCol":"z_probability","maxIter":100,"featuresCol":"z_feature"},"defaultParamMap":{"threshold":0.5,"tol":1.0E-6,"maxBlockSizeInMB":0.0,"labelCol":"label","aggregationDepth":2,"standardization":true,"rawPredictionCol":"rawPrediction","regParam":0.0,"fitIntercept":true,"predictionCol":"prediction","probabilityCol":"probability","elasticNetParam":0.0,"family":"auto","featuresCol":"features","maxIter":100}} diff --git a/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/data/._SUCCESS.crc b/models/100/model/classifier/best.model/bestModel/stages/2_logreg_529da18ddad8/data/._SUCCESS.crc similarity index 100% rename from models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/data/._SUCCESS.crc rename to models/100/model/classifier/best.model/bestModel/stages/2_logreg_529da18ddad8/data/._SUCCESS.crc diff --git a/models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/data/.part-00000-ee62725c-51bd-4929-aff0-71314bf9d891-c000.snappy.parquet.crc b/models/100/model/classifier/best.model/bestModel/stages/2_logreg_529da18ddad8/data/.part-00000-0d8265b9-a3c1-4f0d-884d-7430eeee9589-c000.snappy.parquet.crc similarity index 73% rename from models/100/model/classifier/best.model/bestModel/stages/2_logreg_077944a9332a/data/.part-00000-ee62725c-51bd-4929-aff0-71314bf9d891-c000.snappy.parquet.crc rename to models/100/model/classifier/best.model/bestModel/stages/2_logreg_529da18ddad8/data/.part-00000-0d8265b9-a3c1-4f0d-884d-7430eeee9589-c000.snappy.parquet.crc index 4c463b65658170cd3c534b43b87e24a495417638..773fe936f3a0db7d78aa8100457387a6a66a820b 100644 GIT binary patch delta 35 tcmV+;0Nnqa0h|GlfE*N@%THQKMeE7zM!I{t7Us4xyiSfuJ#tr4ROl%C3D{W&z)D2rV5T#%j3ZlyGYCzOiy95xm$6j)BfxQ=mnGB*7 d9l{}$$mBi;utr4(k;$(d3V@=UV;oyiSfuJ#tr4RObl$3D{W&z)D2rV5T#%j3ZlyGYCzOiy95xm$6j)BfxQ=mnGB*7 d9l{}$$mBi;utr4(k;$(d3V@=UV;o_~ literal 12 TcmYc;N@ieSU}C60vSTR#6Ws&b diff --git a/models/100/model/classifier/best.model/estimator/metadata/part-00000 b/models/100/model/classifier/best.model/estimator/metadata/part-00000 index 587d465dc..b075b014d 100644 --- a/models/100/model/classifier/best.model/estimator/metadata/part-00000 +++ b/models/100/model/classifier/best.model/estimator/metadata/part-00000 @@ -1 +1 @@ -{"class":"org.apache.spark.ml.Pipeline","timestamp":1743247597446,"sparkVersion":"3.5.0","uid":"pipeline_f2e218586f66","paramMap":{"stageUids":["vecAssembler_e0c19172dee0","poly_f0b90cd4e059","logreg_077944a9332a"]},"defaultParamMap":{}} +{"class":"org.apache.spark.ml.Pipeline","timestamp":1751390443654,"sparkVersion":"3.5.0","uid":"pipeline_af0e0bb68da1","paramMap":{"stageUids":["vecAssembler_090e68292fdb","poly_2a7e28777fb4","logreg_529da18ddad8"]},"defaultParamMap":{}} diff --git a/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/._SUCCESS.crc b/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/._SUCCESS.crc similarity index 100% rename from models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/._SUCCESS.crc rename to models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/._SUCCESS.crc diff --git a/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/.part-00000.crc b/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..76874cf339156031bec0d269c2ff77dddd5296bb GIT binary patch literal 12 TcmYc;N@ieSU}CVBUGy9P5dH%o literal 0 HcmV?d00001 diff --git a/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/_SUCCESS b/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/_SUCCESS similarity index 100% rename from models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/_SUCCESS rename to models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/_SUCCESS diff --git a/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/part-00000 b/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/part-00000 similarity index 59% rename from models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/part-00000 rename to models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/part-00000 index 4ca0e1c86..93b00be70 100644 --- a/models/100/model/classifier/best.model/bestModel/stages/0_vecAssembler_e0c19172dee0/metadata/part-00000 +++ b/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_090e68292fdb/metadata/part-00000 @@ -1 +1 @@ -{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1743247597820,"sparkVersion":"3.5.0","uid":"vecAssembler_e0c19172dee0","paramMap":{"outputCol":"z_featurevector","inputCols":["z_sim0","z_sim1","z_sim2","z_sim3","z_sim4","z_sim5","z_sim6","z_sim7","z_sim8","z_sim9","z_sim10","z_sim11","z_sim12","z_sim13","z_sim14","z_sim15","z_sim16","z_sim17","z_sim18","z_sim19"]},"defaultParamMap":{"outputCol":"vecAssembler_e0c19172dee0__output","handleInvalid":"error"}} +{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1751390443725,"sparkVersion":"3.5.0","uid":"vecAssembler_090e68292fdb","paramMap":{"outputCol":"z_featurevector","inputCols":["z_sim0","z_sim1","z_sim2","z_sim3","z_sim4","z_sim5","z_sim6","z_sim7","z_sim8","z_sim9","z_sim10","z_sim11","z_sim12","z_sim13","z_sim14","z_sim15","z_sim16","z_sim17","z_sim18","z_sim19"]},"defaultParamMap":{"outputCol":"vecAssembler_090e68292fdb__output","handleInvalid":"error"}} diff --git a/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/.part-00000.crc b/models/100/model/classifier/best.model/estimator/stages/0_vecAssembler_e0c19172dee0/metadata/.part-00000.crc deleted file mode 100644 index 65b7444e811f2571c18b212313891ebb77e17262..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E@dJ>MGu6RrbY diff --git a/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/._SUCCESS.crc b/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/._SUCCESS.crc similarity index 100% rename from models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/._SUCCESS.crc rename to models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/._SUCCESS.crc diff --git a/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/.part-00000.crc b/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..234f45add5b1437e61a98d7f7acb682e16642e5d GIT binary patch literal 12 TcmYc;N@ieSU}E^-Cu#`*5{&}K literal 0 HcmV?d00001 diff --git a/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/_SUCCESS b/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/_SUCCESS similarity index 100% rename from models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/_SUCCESS rename to models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/_SUCCESS diff --git a/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/part-00000 b/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/part-00000 new file mode 100644 index 000000000..e36c2151d --- /dev/null +++ b/models/100/model/classifier/best.model/estimator/stages/1_poly_2a7e28777fb4/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.feature.PolynomialExpansion","timestamp":1751390443823,"sparkVersion":"3.5.0","uid":"poly_2a7e28777fb4","paramMap":{"inputCol":"z_featurevector","degree":3,"outputCol":"z_feature"},"defaultParamMap":{"degree":2,"outputCol":"poly_2a7e28777fb4__output"}} diff --git a/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/.part-00000.crc b/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/.part-00000.crc deleted file mode 100644 index cecb50017430f759100ef73bf4a3faeeb5af1f90..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}9ii`T8mV5;6m) diff --git a/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/part-00000 b/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/part-00000 deleted file mode 100644 index 0dd578714..000000000 --- a/models/100/model/classifier/best.model/estimator/stages/1_poly_f0b90cd4e059/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.PolynomialExpansion","timestamp":1743247597578,"sparkVersion":"3.5.0","uid":"poly_f0b90cd4e059","paramMap":{"degree":3,"inputCol":"z_featurevector","outputCol":"z_feature"},"defaultParamMap":{"degree":2,"outputCol":"poly_f0b90cd4e059__output"}} diff --git a/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/.part-00000.crc b/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/.part-00000.crc deleted file mode 100644 index e069ac933e6c7693f464fbe7e7f1343d1b323db6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}E^KZzS#@r=<@79|;3x diff --git a/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/part-00000 b/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/part-00000 deleted file mode 100644 index c981fbda3..000000000 --- a/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.classification.LogisticRegression","timestamp":1743247597650,"sparkVersion":"3.5.0","uid":"logreg_077944a9332a","paramMap":{"labelCol":"z_isMatch","fitIntercept":true,"predictionCol":"z_prediction","probabilityCol":"z_probability","featuresCol":"z_feature","maxIter":100},"defaultParamMap":{"threshold":0.5,"tol":1.0E-6,"maxBlockSizeInMB":0.0,"labelCol":"label","aggregationDepth":2,"standardization":true,"rawPredictionCol":"rawPrediction","regParam":0.0,"fitIntercept":true,"predictionCol":"prediction","probabilityCol":"probability","elasticNetParam":0.0,"family":"auto","featuresCol":"features","maxIter":100}} diff --git a/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/._SUCCESS.crc b/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/._SUCCESS.crc similarity index 100% rename from models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/._SUCCESS.crc rename to models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/._SUCCESS.crc diff --git a/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/.part-00000.crc b/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..c3a2e4937d141e5034de3b97e3971c47c80312f6 GIT binary patch literal 16 XcmYc;N@ieSU}6Zg;aOJtj@uLf9|r^- literal 0 HcmV?d00001 diff --git a/models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/_SUCCESS b/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/_SUCCESS similarity index 100% rename from models/100/model/classifier/best.model/estimator/stages/2_logreg_077944a9332a/metadata/_SUCCESS rename to models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/_SUCCESS diff --git a/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/part-00000 b/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/part-00000 new file mode 100644 index 000000000..694f07d15 --- /dev/null +++ b/models/100/model/classifier/best.model/estimator/stages/2_logreg_529da18ddad8/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.classification.LogisticRegression","timestamp":1751390443915,"sparkVersion":"3.5.0","uid":"logreg_529da18ddad8","paramMap":{"fitIntercept":true,"labelCol":"z_isMatch","predictionCol":"z_prediction","maxIter":100,"probabilityCol":"z_probability","featuresCol":"z_feature"},"defaultParamMap":{"aggregationDepth":2,"standardization":true,"elasticNetParam":0.0,"family":"auto","fitIntercept":true,"labelCol":"label","tol":1.0E-6,"predictionCol":"prediction","regParam":0.0,"maxIter":100,"probabilityCol":"probability","threshold":0.5,"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBlockSizeInMB":0.0}} diff --git a/models/100/model/classifier/best.model/evaluator/metadata/.part-00000.crc b/models/100/model/classifier/best.model/evaluator/metadata/.part-00000.crc index a0ced782577d66b6ef1407ccc9a29badaa7e63d4..9595d6fed40f48c2b5d72a3dd304e544f2d87999 100644 GIT binary patch literal 12 TcmYc;N@ieSU}DJrbvOh76Z8YO literal 12 TcmYc;N@ieSU}E4Ader~`5IO?G diff --git a/models/100/model/classifier/best.model/evaluator/metadata/part-00000 b/models/100/model/classifier/best.model/evaluator/metadata/part-00000 index 3c0a83c21..775626759 100644 --- a/models/100/model/classifier/best.model/evaluator/metadata/part-00000 +++ b/models/100/model/classifier/best.model/evaluator/metadata/part-00000 @@ -1 +1 @@ -{"class":"org.apache.spark.ml.evaluation.BinaryClassificationEvaluator","timestamp":1743247597368,"sparkVersion":"3.5.0","uid":"binEval_35d17de4c91a","paramMap":{"labelCol":"z_isMatch"},"defaultParamMap":{"numBins":1000,"rawPredictionCol":"rawPrediction","labelCol":"label","metricName":"areaUnderROC"}} +{"class":"org.apache.spark.ml.evaluation.BinaryClassificationEvaluator","timestamp":1751390443573,"sparkVersion":"3.5.0","uid":"binEval_7ec945af36a7","paramMap":{"labelCol":"z_isMatch"},"defaultParamMap":{"labelCol":"label","rawPredictionCol":"rawPrediction","metricName":"areaUnderROC","numBins":1000}} diff --git a/models/100/model/classifier/best.model/metadata/.part-00000.crc b/models/100/model/classifier/best.model/metadata/.part-00000.crc index 59e9d48f6fc1cf5547e37e1b83cc265740a0e691..1f6670dcf11495486346ebd4da1c64783cad7e39 100644 GIT binary patch literal 40 wcmYc;N@ieSU}ETRELP1*+iySXY~%KQn^H4pUyjr1JQ&9*xKUcdw`k`-05iQ1q5uE@ literal 40 wcmYc;N@ieSU}9jY3;%Y8ts>;#?c1isGTo0=dHk6rx7K>>FLx>6lMT)V03JCGc>n+a diff --git a/models/100/model/classifier/best.model/metadata/part-00000 b/models/100/model/classifier/best.model/metadata/part-00000 index b03fc59ea..749883ab3 100644 --- a/models/100/model/classifier/best.model/metadata/part-00000 +++ b/models/100/model/classifier/best.model/metadata/part-00000 @@ -1 +1 @@ -{"class":"org.apache.spark.ml.tuning.CrossValidatorModel","timestamp":1743247597268,"sparkVersion":"3.5.0","uid":"cv_1bf83c0e3e49","paramMap":{"numFolds":2,"seed":-1191137437,"foldCol":"","estimatorParamMaps":[[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.4","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0E-4","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.45","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0E-4","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.5","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0E-4","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.55","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0E-4","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.4","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.001","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.45","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.001","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.5","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.001","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.55","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.001","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.4","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.01","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.45","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.01","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.5","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.01","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.55","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.01","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.4","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.1","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.45","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.1","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.5","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.1","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.55","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"0.1","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.4","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.45","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.5","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0","isJson":"true"}],[{"parent":"logreg_077944a9332a","name":"threshold","value":"0.55","isJson":"true"},{"parent":"logreg_077944a9332a","name":"regParam","value":"1.0","isJson":"true"}]]},"defaultParamMap":{"numFolds":3,"seed":-1191137437,"foldCol":""},"avgMetrics":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0],"persistSubModels":false} +{"class":"org.apache.spark.ml.tuning.CrossValidatorModel","timestamp":1751390443467,"sparkVersion":"3.5.0","uid":"cv_b644af8361a8","paramMap":{"seed":-1191137437,"foldCol":"","numFolds":2,"estimatorParamMaps":[[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0E-4","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.4","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.001","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.4","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.01","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.4","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.1","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.4","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.4","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0E-4","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.45","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.001","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.45","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.01","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.45","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.1","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.45","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.45","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0E-4","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.5","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.001","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.5","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.01","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.5","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.1","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.5","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.5","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0E-4","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.55","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.001","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.55","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.01","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.55","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"0.1","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.55","isJson":"true"}],[{"parent":"logreg_529da18ddad8","name":"regParam","value":"1.0","isJson":"true"},{"parent":"logreg_529da18ddad8","name":"threshold","value":"0.55","isJson":"true"}]]},"defaultParamMap":{"seed":-1191137437,"foldCol":"","numFolds":3},"avgMetrics":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0],"persistSubModels":false} diff --git a/models/100/trainingData/unmarked/.part-00000-fc0fdb27-3b64-44a9-8c88-4373a3db2de8-c000.snappy.parquet.crc b/models/100/trainingData/unmarked/.part-00000-fc0fdb27-3b64-44a9-8c88-4373a3db2de8-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..1a910af21630db6a3f4bacb20bb76381e74e16eb GIT binary patch literal 64 zcmV-G0KflZa$^7h00ICo0H1pZmdW=(u?YbVDwvNkygHwj$OorMnWe9a^Of%GWXosO` zL!jT=Tj@@+OdCbR+uMDAe&3(>z2EokviY-HB*Kv0esTemlTZHMpOfYS7mrI-Br@4^5~Mhd;8=p{`r8G6amOO9Uhp1P<0YX5(AEKnCcES%yW-d{YM z4;+5_%?BTS`O3we5l>x#^pY3P9=o>ys)MePLf!gnYdtR@z+FbV$i8RaJrZofZRK(M z*_(>uhjs5@aukz4d+o;`_zEOWVJ6lMZ~fAi4{!f`E&B7)tv_$l#HeXNC?^^up}yLhY33%$qvBg+8J(n^DpsH6wWlM{pTJzWTi zaABlSEEACjO>3+tfn;_)65%(KNgzhfvx%MxlS;8&NQtcvcOj_<8r$2mm60X9H3nl! zCIWlRMo>FBic*n^%;`1(*84szos=@LRTea*;@UEz>mMYLO9IdHNRWAp-qvP64Rjsh2 z(Q$y(So{T1vtS0*ORLpX&*XRBeer~A)ofctt^Q+ z%*ZL+!sH~}!lyEripMM)VKOrAXR;z2W~3bEQI72!km+ceVx$#NjULetU;E0Rwl~FJ z(-Fmq&poi?!0MfmO|^~V8%N%SWpLj1vE-7Uy!Ai7`h9-^2nn=0?b}32Q?RG12E?8K zZqhWT9WU>5CiO9Gddf7*owy@PeWF@1I2@Qn%9Ep`)8RCV;Ld`s)+WGnLaAz%CacbJ z9PNdzN4 zJsO9Nwe_kd1a~8h?_&8>tyDIrR9&ybdVBi8-~Zb&rpa-QD29~N(tqX9)7*+a{Mn86 zUkKMX_KRK=A7#la40-RlZ+xFEkUr|Yw|s?m>b-fsD%5?|@Mf$xIyDV`4CCGb$DFjP zwj+Sg#xtELWLx7MH~}Y2vsAq-jIp0b%kc74$+XI5Q5C==rWM6qo9t?B0>W9friG#k zzAKH}Tn`@-RI3*I1Un***HjBosg*M-JF~%d;B-twENj|0p(pT2)n7+o+&%K&a4N^?ZHfJAZJpIY{UE+Dq^E;(s~2Q+$T`=GQFG*KhmD zUO)Nft$#e-;W5U)4MFRJ>~v>||BoJxT0Cn6s$xFKG}01RHglu&3YBWR1>V~4&Y}u zMpSE-M1OJY>YinbhF^n3&maLv^hgtlUc=-#CfA=ha5C&s%J0S@29YewDK-#D<)mPM zNo8>$l#=BP{3xO*vH>QOg?>q)Q_^BkfI5iH1(bA}5^-(Gn$s*DH9Au zQyEDTrC=x`Dq>y^^O2k==B02bno&eqP6vBKnXD+2LIh?>Qjm{|X)z~@!4S&fZj^_G zQXXoQaPLSyE2b2Y<})b|rKc!oJ=fj(=kLGzajskG67&xAxgTsA-8AC4?u?H-%7|%&%|`QC878tEFQ-!?_yDg2tZY`2v#?N3XB3E$`$BSB$%FwrCjxqK z>$;SjPi17Pf|3S@`SJA}yc1Xgrl}^(PEW~(UKE0r4nNUxRUgBOb8W22Y zZt8DynbD;$Z~{;x(3U#gG=ZUn0#>>d(C{-O81__*_XA|pWU5s_%GXaoQ-3RsBS}fe z_||&dHSYpLcO4|Sn_rg@=N|Qu3hXJ0#ks4@-0i^@vd-&$K0qGwZqEcs%DcTDB93=^ zAu@M+JUVxKots;BWyM_Y@s)G8msic*p6HvK`_yXkWe@k*;9T$f;@s^%xvm2keY0yr z$fg;6n_a4CP%9@OqE{iFo!K^1Eh7?w`{2yBl0FGLYpp_?;$SCfgWk~0wh2osS4&RS zG-#KPZ=C@Y)6x*})t6Q6`&6e?p)fw4h2XlkJPYCWj2dc9o7bZ+>|t!@ezSQBqn69k z!VY;}N2%&eH+KY7OH)f`dA=yOo@k!thq`6nvuzuVXV(uiHWWdnSsZR87LVQROg;)P zn@ns~fQfm2BR+0~`xe8!?}M>qAl^9r4NBlp-yA12dQofu7=T>37 zvny&h=JsNkl03(PVY5^;dj|@GVIS>XMBr;w;6;TR_B`&P&^q#p1t@K0K~t9}?v-U`y~S-`XWJQ`j^z{8&b0gt4~7L>dYX%gU`A6LL1 zGTaqE3?_dTrUE>c2DWx-dlATk6!LtUWKi;Wv;j+ganJf-Ecqg#z>pbuEGz=_BMNjbLwZs2%8Di+N&qg< z(H8C|17pcc-L6;XCzQoOFH+DO8BzedSI!5OUC>i4+<6wpk{99>bS@7q4)>oF?!_z- zQS$PtCOAq#F5ESSdxn9r$DY4@#csYv9f?nTx&K&zv}^Wko?YCFXDHgO9QES+ViOt=u#0xV&mHr_Q1Z|a zMVrmb{A{XTM8S6`o)6!_~-Mth{+fS@1M;MMv6 zP`6nNx0-1HEaJ60>floM(wDn5z4Ya@s9O4JO2EI-YI*6aDX}hn^=u)ybOe_!m%beM zap{YJ4wt@oj`{8gu^3x?aTnq@dJHvKvjmUQ1ZhZ8h9se{I0>KOg>o?^ Date: Wed, 2 Jul 2025 16:00:46 +0530 Subject: [PATCH 4/7] Removing inMemory Pipes (#1171) * removing inmemory pipes * moving to ent * moving to ent * access change --- .../java/zingg/common/client/pipe/Pipe.java | 14 -- .../zingg/common/client/util/PipeUtil.java | 13 +- docs/python/markdown/index.md | 3 - docs/python/markdown/zingg.md | 28 +--- python/zingg/pipes.py | 47 +----- .../spark/client/util/SparkDFReader.java | 4 +- .../spark/client/util/SparkDFWriter.java | 9 +- .../spark/client/util/SparkPipeUtil.java | 1 - test/InMemPipeDataBricks.py | 52 ------ test/InMemPipeTest.py | 60 ------- test/InMemPipeTestImages.py | 154 ----------------- test/InMemPipeTestImagesJson.py | 118 ------------- test/InMemPipeTestImagesLink.py | 158 ------------------ test/testFebrl/testArgs.py | 44 +---- 14 files changed, 15 insertions(+), 690 deletions(-) delete mode 100644 test/InMemPipeDataBricks.py delete mode 100644 test/InMemPipeTest.py delete mode 100644 test/InMemPipeTestImages.py delete mode 100644 test/InMemPipeTestImagesJson.py delete mode 100644 test/InMemPipeTestImagesLink.py diff --git a/common/client/src/main/java/zingg/common/client/pipe/Pipe.java b/common/client/src/main/java/zingg/common/client/pipe/Pipe.java index 4d8154505..1badab6bb 100644 --- a/common/client/src/main/java/zingg/common/client/pipe/Pipe.java +++ b/common/client/src/main/java/zingg/common/client/pipe/Pipe.java @@ -33,7 +33,6 @@ public class Pipe implements Serializable{ // St:StructType, Sv:SaveMode public static final String FORMAT_ELASTIC = "org.elasticsearch.spark.sql"; public static final String FORMAT_EXASOL = "com.exasol.spark"; public static final String FORMAT_BIGQUERY = "bigquery"; - public static final String FORMAT_INMEMORY = "inMemory"; String name; String format; @@ -44,25 +43,18 @@ public class Pipe implements Serializable{ // St:StructType, Sv:SaveMode String schema; String mode; - - - - public String getSchema() { return schema; } - public void setSchema(String schema) { this.schema = schema; } - public String getName() { return name; } - @JsonValue public void setName(String name) { this.name = name; @@ -76,7 +68,6 @@ public String getFormat() { public void setFormat(String sinkType) { this.format = sinkType; } - @JsonValue public void setProps(Map props) { @@ -102,23 +93,18 @@ public String get(String key) { return props.get(key); } - public String getPreprocessors() { return preprocessors; } - public void setPreprocessors(String preprocessors) { this.preprocessors = preprocessors; } - - public int getId() { return id; } - public void setId(int recId) { this.id = recId; } diff --git a/common/client/src/main/java/zingg/common/client/util/PipeUtil.java b/common/client/src/main/java/zingg/common/client/util/PipeUtil.java index 12b144d2d..11a68b77f 100644 --- a/common/client/src/main/java/zingg/common/client/util/PipeUtil.java +++ b/common/client/src/main/java/zingg/common/client/util/PipeUtil.java @@ -9,7 +9,6 @@ import zingg.common.client.ZFrame; import zingg.common.client.ZinggClientException; import zingg.common.client.pipe.FilePipe; -//import zingg.common.client.pipe.InMemoryPipe; import zingg.common.client.pipe.Pipe; //import com.datastax.spark.connector.cql.*; @@ -55,20 +54,18 @@ protected ZFrame read(DFReader reader, Pipe p, boolean add LOG.warn("Reading " + p); try { - if (p.getFormat().equals(Pipe.FORMAT_INMEMORY)) { - input = p.getDataset(); //.df(); - } - else { + if (p.getProps().containsKey(FilePipe.LOCATION)) { input = reader.load(p.get(FilePipe.LOCATION)); } else { input = reader.load(); } - } + if (addSource) { input = input.withColumn(ColName.SOURCE_COL, p.getName()); } + p.setDataset(input); } catch (Exception ex) { LOG.warn(ex.getMessage()); @@ -195,10 +192,6 @@ public void write(ZFrame toWriteOrig, LOG.warn("Writing output " + p); - if (p.getFormat().equals(Pipe.FORMAT_INMEMORY)) { - p.setDataset(toWriteOrig); - return; - } //SparkPipe sPipe = (SparkPipe) p; if (p.getMode() != null) { writer.setMode(p.getMode()); //SaveMode.valueOf(p.getMode())); diff --git a/docs/python/markdown/index.md b/docs/python/markdown/index.md index 9f14d1aaa..61affe955 100644 --- a/docs/python/markdown/index.md +++ b/docs/python/markdown/index.md @@ -102,9 +102,6 @@ Requires **python 3.6+**; **spark 3.5.0.** Otherwise, [`zingg.client.Zingg()`](z * [`CsvPipe.setDelimiter()`](zingg.md#zingg.pipes.CsvPipe.setDelimiter) * [`CsvPipe.setHeader()`](zingg.md#zingg.pipes.CsvPipe.setHeader) * [`CsvPipe.setLocation()`](zingg.md#zingg.pipes.CsvPipe.setLocation) - * [`InMemoryPipe`](zingg.md#zingg.pipes.InMemoryPipe) - * [`InMemoryPipe.getDataset()`](zingg.md#zingg.pipes.InMemoryPipe.getDataset) - * [`InMemoryPipe.setDataset()`](zingg.md#zingg.pipes.InMemoryPipe.setDataset) * [`Pipe`](zingg.md#zingg.pipes.Pipe) * [`Pipe.addProperty()`](zingg.md#zingg.pipes.Pipe.addProperty) * [`Pipe.getPipe()`](zingg.md#zingg.pipes.Pipe.getPipe) diff --git a/docs/python/markdown/zingg.md b/docs/python/markdown/zingg.md index 9467c6751..304c67cb2 100644 --- a/docs/python/markdown/zingg.md +++ b/docs/python/markdown/zingg.md @@ -643,32 +643,6 @@ Method to set location of pipe * **Parameters:** **location** (*String*) – location from where we read data -### *class* zingg.pipes.InMemoryPipe(name, df=None) - -Bases: [`Pipe`](#zingg.pipes.Pipe) - -Pipe Class for working with InMemory pipeline - -* **Parameters:** - * **name** (*String*) – name of the pipe - * **df** (*Dataset* *or* *None*) – provide dataset for this pipe (optional) - -#### getDataset() - -Method to get Dataset from pipe - -* **Returns:** - dataset of the pipe in the format of spark dataset -* **Return type:** - Dataset - -#### setDataset(df) - -Method to set DataFrame of the pipe - -* **Parameters:** - **df** (*DataFrame*) – pandas or spark dataframe for the pipe - ### *class* zingg.pipes.Pipe(name, format) Bases: `object` @@ -677,7 +651,7 @@ Pipe class for working with different data-pipelines. Actual pipe def in the arg * **Parameters:** * **name** (*String*) – name of the pipe - * **format** (*Format*) – formate of pipe e.g. bigquery,InMemory, etc. + * **format** (*Format*) – formate of pipe e.g. bigquery,csv, etc. #### addProperty(name, value) diff --git a/python/zingg/pipes.py b/python/zingg/pipes.py index 0553f1d10..f8d9219c3 100644 --- a/python/zingg/pipes.py +++ b/python/zingg/pipes.py @@ -38,7 +38,7 @@ class Pipe: :param name: name of the pipe :type name: String - :param format: formate of pipe e.g. bigquery,InMemory, etc. + :param format: formate of pipe e.g. bigquery,csv, etc. :type format: Format """ @@ -251,47 +251,4 @@ def setDbTable(self, dbtable): :param dbtable: provide bucket parameter. :type dbtable: String """ - Pipe.addProperty(self, "dbtable", dbtable) - - -class InMemoryPipe(Pipe): - """ Pipe Class for working with InMemory pipeline - - :param name: name of the pipe - :type name: String - :param df: provide dataset for this pipe (optional) - :type df: Dataset or None - """ - - def __init__(self, name, df = None): - setupPipes() - Pipe.__init__(self, name, JPipe.FORMAT_INMEMORY) - if (df is not None): - self.setDataset(df) - - def setDataset(self, df): - """ Method to set DataFrame of the pipe - - :param df: pandas or spark dataframe for the pipe - :type df: DataFrame - """ - if (isinstance(df, pd.DataFrame)): - print('schema of pandas df is ' , Pipe.getPipe(self).getSchema()) - if (Pipe.getPipe(self).getSchema() is not None): - ds = getSparkSession().createDataFrame(df, schema=Pipe.getPipe(self).getSchema()) - else: - ds = getSparkSession().createDataFrame(df) - - Pipe.getPipe(self).setDataset(ds._jdf) - elif (isinstance(df, DataFrame)): - Pipe.getPipe(self).setDataset(df._jdf) - else: - LOG.error(" setDataset(): NUll or Unsupported type: %s", type(df)) - - def getDataset(self): - """ Method to get Dataset from pipe - - :return: dataset of the pipe in the format of spark dataset - :rtype: Dataset - """ - return Pipe.getPipe(self).getDataset().df() + Pipe.addProperty(self, "dbtable", dbtable) \ No newline at end of file diff --git a/spark/client/src/main/java/zingg/spark/client/util/SparkDFReader.java b/spark/client/src/main/java/zingg/spark/client/util/SparkDFReader.java index cf67ff1f6..6b383ec7f 100644 --- a/spark/client/src/main/java/zingg/spark/client/util/SparkDFReader.java +++ b/spark/client/src/main/java/zingg/spark/client/util/SparkDFReader.java @@ -14,8 +14,8 @@ public class SparkDFReader implements DFReader, Row, Column> { - private SparkSession session; - private DataFrameReader reader; + protected SparkSession session; + protected DataFrameReader reader; public SparkDFReader(SparkSession s) { this.session = s; diff --git a/spark/client/src/main/java/zingg/spark/client/util/SparkDFWriter.java b/spark/client/src/main/java/zingg/spark/client/util/SparkDFWriter.java index 023a90fb6..5cf65f836 100644 --- a/spark/client/src/main/java/zingg/spark/client/util/SparkDFWriter.java +++ b/spark/client/src/main/java/zingg/spark/client/util/SparkDFWriter.java @@ -10,30 +10,33 @@ import zingg.common.client.util.DFWriter; public class SparkDFWriter implements DFWriter, Row, Column>{ - private DataFrameWriter writer; + + protected DataFrameWriter writer; public SparkDFWriter(ZFrame, Row, Column> toWriteOrig) { Dataset toWrite = toWriteOrig.df(); this.writer = toWrite.write(); - } - public void setMode(String s) { this.writer.mode(SaveMode.valueOf(s)); } + public DFWriter, Row, Column> format(String f) { writer.format(f); return this; } + public DFWriter, Row, Column> option(String k, String v) { writer.option(k,v); return this; } + public void save(String location) { writer.save(location); } + public void save() { writer.save(); } diff --git a/spark/client/src/main/java/zingg/spark/client/util/SparkPipeUtil.java b/spark/client/src/main/java/zingg/spark/client/util/SparkPipeUtil.java index bc3a70bfe..fcc7a5fe9 100644 --- a/spark/client/src/main/java/zingg/spark/client/util/SparkPipeUtil.java +++ b/spark/client/src/main/java/zingg/spark/client/util/SparkPipeUtil.java @@ -7,7 +7,6 @@ import org.apache.spark.sql.Row; import zingg.common.client.ZFrame; -//import zingg.common.client.pipe.InMemoryPipe; import zingg.common.client.util.DFReader; import zingg.common.client.util.DFWriter; import zingg.common.client.util.PipeUtil; diff --git a/test/InMemPipeDataBricks.py b/test/InMemPipeDataBricks.py deleted file mode 100644 index 795dc1a86..000000000 --- a/test/InMemPipeDataBricks.py +++ /dev/null @@ -1,52 +0,0 @@ -from zingg.client import * -from zingg.pipes import * -from pyspark.sql.types import * -import pandas - -#build the arguments for zingg -args = Arguments() -#set field definitions -id = FieldDefinition("id", "string", MatchType.DONT_USE) -title = FieldDefinition("title", "string", MatchType.NUMERIC) -description = FieldDefinition("description", "string", MatchType.TEXT) -description.setStopWords("dbfs:/FileStore/tables/stopWords.csv") -manufacturer = FieldDefinition("manufacturer","string", MatchType.FUZZY) -price = FieldDefinition("price", "double", MatchType.FUZZY) - -fieldDefs = [id, title, description, manufacturer, price] -args.setFieldDefinition(fieldDefs) -#set the modelid and the zingg dir -args.setModelId("103") -args.setZinggDir("dbfs:/FileStore/tables/models") -args.setNumPartitions(4) -args.setLabelDataSampleSize(0.4) - - -schema = StructType([StructField("id", StringType(), True)\ - ,StructField("title", StringType(), True)\ - ,StructField("description", StringType(), True)\ - ,StructField("manufacturer", StringType(), True)\ - ,StructField("price", DoubleType(), True)]) - - -inputPipeAmazon=InMemoryPipe("amz") -inputPipeAmazon.setDataset(getSparkSession().read.format("csv").schema(schema).load("dbfs:/FileStore/tables/Amazon.csv")) -inputPipeGoogle=InMemoryPipe("google") -inputPipeGoogle.setDataset(getSparkSession().read.format("csv").schema(schema).load("dbfs:/FileStore/tables/GoogleProducts.csv")) - -args.setData(inputPipeAmazon,inputPipeGoogle) - -#setting outputpipe in 'args' -outputPipe = CsvPipe("resultAmazonGoogle", "dbfs:/FileStore/tables/AwsGoogleOutput") - -args.setOutput(outputPipe) - -inpPhase = input("Enter phase: ") - -options = ClientOptions([ClientOptions.PHASE,inpPhase]) - -#Zingg execution for the given phase -zingg = Zingg(args, options) - -zingg.initAndExecute() - diff --git a/test/InMemPipeTest.py b/test/InMemPipeTest.py deleted file mode 100644 index 888bbe499..000000000 --- a/test/InMemPipeTest.py +++ /dev/null @@ -1,60 +0,0 @@ -from zingg.client import * -from zingg.pipes import * -from pyspark.sql.types import * -import pandas - -#build the arguments for zingg -args = Arguments() -#set field definitions -id = FieldDefinition("id", "string", MatchType.DONT_USE) -title = FieldDefinition("title", "string", MatchType.NUMERIC) -description = FieldDefinition("description", "string", MatchType.TEXT) -description.setStopWords("examples/amazon-google/stopWords.csv") -manufacturer = FieldDefinition("manufacturer","string", MatchType.FUZZY) -price = FieldDefinition("price", "double", MatchType.FUZZY) - -fieldDefs = [id, title, description, manufacturer, price] -args.setFieldDefinition(fieldDefs) -#set the modelid and the zingg dir -args.setModelId("103") -args.setZinggDir("models") -args.setNumPartitions(4) -args.setLabelDataSampleSize(0.4) - -#reading dataset into inputPipe and settint it up in 'args' -#below line should not be required if you are reading from in memory dataset -#in that case, replace df with input df -#schemaType = {'id': 'string', 'title': 'string', 'description': 'string', 'manufacturer': 'string', 'price': 'string'} -#amzDF = pandas.read_csv("~/zingg/examples/amazon-google/Amazon.csv", encoding="iso-8859-1", dtype=schemaType) -#amzDF.info() -#print(amzDF) -#amzDF = amzDF[~amzDF['description'].isnull()] -#amzDF.info() -schema = StructType([StructField("id", StringType(), True)\ - ,StructField("title", StringType(), True)\ - ,StructField("description", StringType(), True)\ - ,StructField("manufacturer", StringType(), True)\ - ,StructField("price", DoubleType(), True)]) - -#gDF = pandas.read_csv("~/zingg/examples/amazon-google/Amazon.csv", encoding="iso-8859-1") -#amzDF = pandas.DataFrame() -#gDF=pandas.DataFrame() -inputPipeAmazon=InMemoryPipe("amz") -#inputPipeAmazon.setSchema("id string, title string, description string, manufacturer string, price string") -inputPipeAmazon.setDataset(getSparkSession().read.format("csv").schema(schema).load("examples/amazon-google/Amazon.csv")) -inputPipeGoogle=InMemoryPipe("google") -inputPipeGoogle.setDataset(getSparkSession().read.format("csv").schema(schema).load("examples/amazon-google/GoogleProducts.csv")) - -args.setData(inputPipeAmazon,inputPipeGoogle) - -#setting outputpipe in 'args' -outputPipe = CsvPipe("resultAmazonGoogle", "/tmp/AwsGoogleOutput") - -args.setOutput(outputPipe) - -options = ClientOptions([ClientOptions.PHASE,"match"]) - -#Zingg execution for the given phase -zingg = Zingg(args, options) -zingg.initAndExecute() - diff --git a/test/InMemPipeTestImages.py b/test/InMemPipeTestImages.py deleted file mode 100644 index 2023e5f1f..000000000 --- a/test/InMemPipeTestImages.py +++ /dev/null @@ -1,154 +0,0 @@ -from zingg.client import * -from zingg.pipes import * -from pyspark.sql.types import * -import pandas - -import pyspark.sql.functions as fn -from sentence_transformers import SentenceTransformer, util -import torch -import pickle - -from PIL import Image - -df = (getSparkSession().read.json('/home/ubuntu/image_data/listings/metadata')) - -df = ( - df - .filter("country='US'") - .select( - 'item_id', - 'brand', - 'bullet_point', - 'domain_name', - 'marketplace', - 'item_keywords', - 'item_name', - 'product_description', - 'main_image_id', - 'other_image_id', - 'node' - ) - ) - -image_metadata = ( - getSparkSession() - .read - .csv( - path='/home/ubuntu/image_data/images/metadata', - sep=',', - header=True, - ) - ) - -@fn.udf(ArrayType(StringType())) -def get_english_values_from_array(array=None): - - # prioritized list of english language codes (biased towards us english) - english = ['en_US','en_CA','en_GB','en_AU','en_IN','en_SG','en_AE'] - - # initialize search - values = [] - if array is None: array=[] - - # for each potential english code - for e in english: - - # for each item in array - for a in array: - # if we found the english variant we want - if a['language_tag']==e: - # get value and stop - values += [a['value']] - - # if value has been found, then break - if len(values) > 0: break - - return values - -model = SentenceTransformer('clip-ViT-B-32',device='cuda') - -@fn.udf(ArrayType(DoubleType())) -#@fn.udf(StringType()) -def get_image_embedding(path): - - embedding = [] - - if path is not None: - - full_path = '/home/ubuntu/image_data/images/small/' + path - - # open image and convert to embedding - try: - image = Image.open(full_path).convert('RGB') - embedding = model.encode(image, batch_size=128, convert_to_tensor=False, show_progress_bar=False) - embedding = embedding.tolist() - except: - pass - - # return embedding value - return embedding - -items = ( - df - .alias('a') - .select( - 'item_id', - 'domain_name', - 'marketplace', - get_english_values_from_array('brand')[0].alias('brand'), - get_english_values_from_array('item_name')[0].alias('item_name'), - get_english_values_from_array('product_description')[0].alias('product_description'), - get_english_values_from_array('bullet_point').alias('bulletpoint'), - get_english_values_from_array('item_keywords').alias('item_keywords'), - fn.split( fn.col('node')[0]['node_name'], '/').alias('hierarchy'), - 'main_image_id' - ) - .join( - image_metadata.alias('b').select('image_id','path'), - on=fn.expr('a.main_image_id=b.image_id'), - how='left' - ) - .withColumn('main_image_embedding', get_image_embedding(fn.col('path'))) - .drop('main_image_id','image_id','bulletpoint','item_keywords','hierarchy') - ) - -items.show() - -#build the arguments for zingg -args = Arguments() -#set field definitions -item_id = FieldDefinition("item_id", "string", MatchType.DONT_USE) -domain_name = FieldDefinition("domain_name", "string", MatchType.DONT_USE) -marketplace = FieldDefinition("marketplace", "string", MatchType.DONT_USE) -brand = FieldDefinition("brand","string", MatchType.FUZZY) -item_name = FieldDefinition("item_name", "string", MatchType.TEXT) -product_description = FieldDefinition("product_description", "string", MatchType.DONT_USE) -path = FieldDefinition("path", "string", MatchType.DONT_USE) -main_image_embedding = FieldDefinition("main_image_embedding", "array", MatchType.FUZZY) - -#fieldDefs = [item_id, domain_name, marketplace, brand, item_name,product_description, bulletpoint, item_keywords, hierarchy,path, main_image_embedding] -fieldDefs = [item_id, domain_name, marketplace, brand, item_name,product_description,path,main_image_embedding] -args.setFieldDefinition(fieldDefs) -#set the modelid and the zingg dir -args.setModelId("9999") -args.setZinggDir("/tmp/modelSmallImages") -args.setNumPartitions(16) -args.setLabelDataSampleSize(0.2) - -inputPipeSmallImages=InMemoryPipe("smallImages") -inputPipeSmallImages.setDataset(items) - -args.setData(inputPipeSmallImages) - -#setting outputpipe in 'args' -outputPipe = Pipe("resultSmallImages", "parquet") -outputPipe.addProperty("location", "/tmp/resultSmallImages") -args.setOutput(outputPipe) - -options = ClientOptions([ClientOptions.PHASE,"findTrainingData"]) - -#Zingg execution for the given phase -zingg = Zingg(args, options) -zingg.initAndExecute() - - diff --git a/test/InMemPipeTestImagesJson.py b/test/InMemPipeTestImagesJson.py deleted file mode 100644 index 526c122c1..000000000 --- a/test/InMemPipeTestImagesJson.py +++ /dev/null @@ -1,118 +0,0 @@ -from zingg.client import * -from zingg.pipes import * -from pyspark.sql.types import * -import pandas - -import pyspark.sql.functions as fn -from sentence_transformers import SentenceTransformer, util -import torch -import pickle - -from PIL import Image - -df = (getSparkSession().read.json('/home/ubuntu/image_data/listings/metadata')) - -df = ( - df - .filter("country='US'") - .select( - 'item_id', - 'brand', - 'bullet_point', - 'domain_name', - 'marketplace', - 'item_keywords', - 'item_name', - 'product_description', - 'main_image_id', - 'other_image_id', - 'node' - ) - ) - -image_metadata = ( - getSparkSession() - .read - .csv( - path='/home/ubuntu/image_data/images/metadata', - sep=',', - header=True, - ) - ) - -@fn.udf(ArrayType(StringType())) -def get_english_values_from_array(array=None): - - # prioritized list of english language codes (biased towards us english) - english = ['en_US','en_CA','en_GB','en_AU','en_IN','en_SG','en_AE'] - - # initialize search - values = [] - if array is None: array=[] - - # for each potential english code - for e in english: - - # for each item in array - for a in array: - # if we found the english variant we want - if a['language_tag']==e: - # get value and stop - values += [a['value']] - - # if value has been found, then break - if len(values) > 0: break - - return values - -model = SentenceTransformer('clip-ViT-B-32',device='cuda') - -@fn.udf(ArrayType(DoubleType())) -#@fn.udf(StringType()) -def get_image_embedding(path): - - embedding = [] - - if path is not None: - - full_path = '/home/ubuntu/image_data/images/small/' + path - - # open image and convert to embedding - try: - image = Image.open(full_path).convert('RGB') - embedding = model.encode(image, batch_size=128, convert_to_tensor=False, show_progress_bar=False) - embedding = embedding.tolist() - except: - pass - - # return embedding value - return embedding - -items = ( - df - .alias('a') - .select( - 'item_id', - 'domain_name', - 'marketplace', - get_english_values_from_array('brand')[0].alias('brand'), - get_english_values_from_array('item_name')[0].alias('item_name'), - get_english_values_from_array('product_description')[0].alias('product_description'), - get_english_values_from_array('bullet_point').alias('bulletpoint'), - get_english_values_from_array('item_keywords').alias('item_keywords'), - fn.split( fn.col('node')[0]['node_name'], '/').alias('hierarchy'), - 'main_image_id' - ) - .join( - image_metadata.alias('b').select('image_id','path'), - on=fn.expr('a.main_image_id=b.image_id'), - how='left' - ) - .withColumn('main_image_embedding', get_image_embedding(fn.col('path'))) - .drop('main_image_id','image_id','path','bulletpoint','item_keywords','hierarchy') - ) - -items.show() - -items.write.json('/home/ubuntu/image_data/json_data/items') - diff --git a/test/InMemPipeTestImagesLink.py b/test/InMemPipeTestImagesLink.py deleted file mode 100644 index 4c1eeec50..000000000 --- a/test/InMemPipeTestImagesLink.py +++ /dev/null @@ -1,158 +0,0 @@ -from zingg.client import * -from zingg.pipes import * -from pyspark.sql.types import * -import pandas - -import pyspark.sql.functions as fn -from sentence_transformers import SentenceTransformer, util -import torch -import pickle - -from PIL import Image - -df = (getSparkSession().read.json('/home/ubuntu/image_data/listings/metadata')) - -df = ( - df - .filter("country='US'") - .select( - 'item_id', - 'brand', - 'bullet_point', - 'domain_name', - 'marketplace', - 'item_keywords', - 'item_name', - 'product_description', - 'main_image_id', - 'other_image_id', - 'node' - ) - ) - -image_metadata = ( - getSparkSession() - .read - .csv( - path='/home/ubuntu/image_data/images/metadata', - sep=',', - header=True, - ) - ) - -@fn.udf(ArrayType(StringType())) -def get_english_values_from_array(array=None): - - # prioritized list of english language codes (biased towards us english) - english = ['en_US','en_CA','en_GB','en_AU','en_IN','en_SG','en_AE'] - - # initialize search - values = [] - if array is None: array=[] - - # for each potential english code - for e in english: - - # for each item in array - for a in array: - # if we found the english variant we want - if a['language_tag']==e: - # get value and stop - values += [a['value']] - - # if value has been found, then break - if len(values) > 0: break - - return values - -model = SentenceTransformer('clip-ViT-B-32',device='cuda') - -@fn.udf(ArrayType(DoubleType())) -#@fn.udf(StringType()) -def get_image_embedding(path): - - embedding = [] - - if path is not None: - - full_path = '/home/ubuntu/image_data/images/small/' + path - - # open image and convert to embedding - try: - image = Image.open(full_path).convert('RGB') - embedding = model.encode(image, batch_size=128, convert_to_tensor=False, show_progress_bar=False) - embedding = embedding.tolist() - except: - pass - - # return embedding value - return embedding - -items = ( - df - .alias('a') - .select( - 'item_id', - 'domain_name', - 'marketplace', - get_english_values_from_array('brand')[0].alias('brand'), - get_english_values_from_array('item_name')[0].alias('item_name'), - get_english_values_from_array('product_description')[0].alias('product_description'), - get_english_values_from_array('bullet_point').alias('bulletpoint'), - get_english_values_from_array('item_keywords').alias('item_keywords'), - fn.split( fn.col('node')[0]['node_name'], '/').alias('hierarchy'), - 'main_image_id' - ) - .join( - image_metadata.alias('b').select('image_id','path'), - on=fn.expr('a.main_image_id=b.image_id'), - how='left' - ) - .withColumn('main_image_embedding', get_image_embedding(fn.col('path'))) - .drop('main_image_id','image_id','bulletpoint','item_keywords','hierarchy') - ) - -#build the arguments for zingg -args = Arguments() -#set field definitions -item_id = FieldDefinition("item_id", "string", MatchType.DONT_USE) -domain_name = FieldDefinition("domain_name", "string", MatchType.DONT_USE) -marketplace = FieldDefinition("marketplace", "string", MatchType.DONT_USE) -brand = FieldDefinition("brand","string", MatchType.FUZZY) -item_name = FieldDefinition("item_name", "string", MatchType.TEXT) -product_description = FieldDefinition("product_description", "string", MatchType.DONT_USE) -path = FieldDefinition("path", "string", MatchType.DONT_USE) -main_image_embedding = FieldDefinition("main_image_embedding", "array", MatchType.FUZZY) - -#fieldDefs = [item_id, domain_name, marketplace, brand, item_name,product_description, bulletpoint, item_keywords, hierarchy,path, main_image_embedding] -fieldDefs = [item_id, domain_name, marketplace, brand, item_name,product_description,path,main_image_embedding] -args.setFieldDefinition(fieldDefs) -#set the modelid and the zingg dir -args.setModelId("9999") -args.setZinggDir("/tmp/modelSmallImages") -args.setNumPartitions(16) -args.setLabelDataSampleSize(0.2) - -items1 = items.limit(100) -items2 = items1.limit(10) - -inputPipeSmallImages1=InMemoryPipe("smallImages1") -inputPipeSmallImages1.setDataset(items1) - -inputPipeSmallImages2=InMemoryPipe("smallImages2") -inputPipeSmallImages2.setDataset(items2) - -args.setData(inputPipeSmallImages1,inputPipeSmallImages2) - -#setting outputpipe in 'args' -outputPipe = Pipe("resultSmallImages", "parquet") -outputPipe.addProperty("location", "/tmp/resultSmallImages") -args.setOutput(outputPipe) - -options = ClientOptions([ClientOptions.PHASE,"link"]) - -#Zingg execution for the given phase -zingg = Zingg(args, options) -zingg.initAndExecute() - - diff --git a/test/testFebrl/testArgs.py b/test/testFebrl/testArgs.py index 6a2751d3f..5847022fd 100644 --- a/test/testFebrl/testArgs.py +++ b/test/testFebrl/testArgs.py @@ -808,46 +808,4 @@ def test_set_db_table(self): pipe = SnowflakePipe("snowflake_pipe") db_table = "my_table" pipe.setDbTable(db_table) - self.assertEqual(pipe.pipe.getProps()["dbtable"], db_table) - -class TestInMemoryPipe(TestCase): - def test_init(self): - name = "in_memory_pipe" - pipe = InMemoryPipe(name) - self.assertEqual(pipe.pipe.getName(), name) - self.assertEqual(pipe.pipe.getFormat(), "inMemory") - - # def test_set_dataset(self): - # name = "in_memory_pipe" - # pipe = InMemoryPipe(name) - # sample_data = [{"name": "John", "age": 30}, {"name": "Alice", "age": 25}] - # df = pd.DataFrame(sample_data) - # pipe.setDataset(df) - # dataset = pipe.getDataset() - # self.assertEqual(dataset.count(), len(sample_data)) - - # def test_set_dataset_with_schema(self): - # name = "in_memory_pipe" - # sample_data = [{"name": "John", "age": 30}, {"name": "Alice", "age": 25}] - # schema = "name string, age int" - # df = pd.DataFrame(sample_data) - # pipe = InMemoryPipe(name) - # pipe.setDataset(df) - # dataset = pipe.getDataset() - # self.assertEqual(dataset.count(), len(sample_data)) - - # def test_set_dataset_with_invalid_type(self): - # name = "in_memory_pipe" - # pipe = InMemoryPipe(name) - # invalid_data = random.randint(1, 100) - # with self.assertRaises(AttributeError): - # pipe.setDataset(invalid_data) - - # def test_get_dataset(self): - # name = "in_memory_pipe" - # sample_data = [{"name": "John", "age": 30}, {"name": "Alice", "age": 25}] - # df = pd.DataFrame(sample_data) - # pipe = InMemoryPipe(name) - # pipe.setDataset(df) - # dataset = pipe.getDataset() - # self.assertEqual(dataset.count(), len(sample_data)) \ No newline at end of file + self.assertEqual(pipe.pipe.getProps()["dbtable"], db_table) \ No newline at end of file From 246863e648f6341a190f5dd8225ade2190b480ee Mon Sep 17 00:00:00 2001 From: Sania Goyal <85894828+sania-16@users.noreply.github.com> Date: Mon, 14 Jul 2025 17:38:13 +0530 Subject: [PATCH 5/7] Pipe Code Refactor PR (#1179) * pipe code refactor * adding generic class * adding tables --- .../zingg/common/client/pipe/FilePipe.java | 3 +- .../zingg/common/client/util/PipeUtil.java | 112 ++++-------------- log4j2.properties | 2 +- .../zingg/spark/client/pipe/SparkPipe.java | 3 + 4 files changed, 30 insertions(+), 90 deletions(-) diff --git a/common/client/src/main/java/zingg/common/client/pipe/FilePipe.java b/common/client/src/main/java/zingg/common/client/pipe/FilePipe.java index 927bbcf99..67b609853 100644 --- a/common/client/src/main/java/zingg/common/client/pipe/FilePipe.java +++ b/common/client/src/main/java/zingg/common/client/pipe/FilePipe.java @@ -5,7 +5,6 @@ public class FilePipe { public static final String LOCATION = "location"; public static final String HEADER = "header"; public static final String DELIMITER = "delimiter"; - - + public static final String TABLE = "table"; } diff --git a/common/client/src/main/java/zingg/common/client/util/PipeUtil.java b/common/client/src/main/java/zingg/common/client/util/PipeUtil.java index 11a68b77f..f42e05647 100644 --- a/common/client/src/main/java/zingg/common/client/util/PipeUtil.java +++ b/common/client/src/main/java/zingg/common/client/util/PipeUtil.java @@ -49,18 +49,22 @@ public DFReader getReader(Pipe p) { return reader; } - protected ZFrame read(DFReader reader, Pipe p, boolean addSource) throws ZinggClientException{ + public ZFrame getInput(Pipe p, DFReader reader) throws ZinggClientException{ ZFrame input = null; - LOG.warn("Reading " + p); - try { - - if (p.getProps().containsKey(FilePipe.LOCATION)) { input = reader.load(p.get(FilePipe.LOCATION)); } else { input = reader.load(); } + return input; + } + + protected ZFrame read(DFReader reader, Pipe p, boolean addSource) throws ZinggClientException{ + ZFrame input = null; + LOG.warn("Reading " + p); + try { + input = getInput(p, reader); if (addSource) { input = input.withColumn(ColName.SOURCE_COL, p.getName()); @@ -188,7 +192,7 @@ public void write(ZFrame toWriteOrig, for (Pipe p: pipes) { //Dataset toWrite = toWriteOrig.df(); //DataFrameWriter writer = toWrite.write(); - DFWriter writer = getWriter(toWriteOrig); + DFWriter writer = getWriter(toWriteOrig); LOG.warn("Writing output " + p); @@ -199,98 +203,32 @@ public void write(ZFrame toWriteOrig, else { writer.setMode("Append"); //SaveMode.valueOf("Append")); } - /* - if (p.getFormat().equals(Pipe.FORMAT_ELASTIC)) { - ctx.getConf().set(ElasticPipe.NODE, p.getProps().get(ElasticPipe.NODE)); - ctx.getConf().set(ElasticPipe.PORT, p.getProps().get(ElasticPipe.PORT)); - ctx.getConf().set(ElasticPipe.ID, ColName.ID_COL); - ctx.getConf().set(ElasticPipe.RESOURCE, p.getName()); - } - */ - writer = writer.format(p.getFormat()); + writer = getWriterWithFormat(writer, p); for (String key: p.getProps().keySet()) { writer = writer.option(key, p.get(key)); } - if (p.getFormat() == Pipe.FORMAT_CASSANDRA) { - /* - ctx.getConf().set(CassandraPipe.HOST, p.getProps().get(CassandraPipe.HOST)); - toWrite.sparkSession().conf().set(CassandraPipe.HOST, p.getProps().get(CassandraPipe.HOST)); - //df.createCassandraTable(p.get("keyspace"), p.get("table"), opPk, opCl, CassandraConnector.apply(ctx.getConf())); - - CassandraConnector connector = CassandraConnector.apply(ctx.getConf()); - try (Session session = connector.openSession()) { - ResultSet rs = session.execute("SELECT table_name FROM system_schema.tables WHERE keyspace_name='" - + p.get(CassandraPipe.KEYSPACE) + "' AND table_name='" + p.get(CassandraPipe.TABLE) + "'"); - if (rs.all().size() == 0) { - List pk = new ArrayList(); - if (p.get(CassandraPipe.PRIMARY_KEY) != null) { - //pk.add(p.get(CassandraPipe.PRIMARY_KEY)); - pk = Arrays.asList(p.get(CassandraPipe.PRIMARY_KEY).split(",")); - } - Option> opPk = Option.apply(JavaConverters.asScalaIteratorConverter(pk.iterator()).asScala().toSeq()); - List cl = new ArrayList(); - - if (p.getAddProps()!= null && p.getAddProps().containsKey("clusterBy")) { - cl=Arrays.asList(p.getAddProps().get("clusterBy").split(",")); - } - Option> opCl = Option.apply(JavaConverters.asScalaIteratorConverter(cl.iterator()).asScala().toSeq()); - - DataFrameFunctions df = new DataFrameFunctions(toWrite); - LOG.warn("received cassandra table - " + p.get(CassandraPipe.KEYSPACE) + " and " + p.get(CassandraPipe.TABLE)); - df.createCassandraTable(p.get(CassandraPipe.KEYSPACE), p.get(CassandraPipe.TABLE), opPk, opCl, CassandraConnector.apply(ctx.getConf())); - if (p.getAddProps()!= null && p.getAddProps().containsKey("indexBy")) { - LOG.warn("creating index on cassandra"); - - session.execute("CREATE INDEX " + p.getAddProps().get("indexBy") + p.get(CassandraPipe.KEYSPACE) + "_" + - p.get(CassandraPipe.TABLE) + "_idx ON " + p.get(CassandraPipe.KEYSPACE) + "." + - p.get(CassandraPipe.TABLE) + "(" + p.getAddProps().get("indexBy") + - ")"); - } - } - else { - LOG.warn("existing cassandra table - " + p.get(CassandraPipe.KEYSPACE) + " and " + p.get(CassandraPipe.TABLE)); - - } - - } - catch(Exception e) { - e.printStackTrace(); - LOG.warn("Writing issue"); - }*/ + save(p, writer, toWriteOrig); } - else if (p.getProps().containsKey("location")) { + } catch (Exception ex) { + throw new ZinggClientException(ex.getMessage()); + } + } + + public DFWriter getWriterWithFormat(DFWriter writer, Pipe p) { + writer = writer.format(p.getFormat()); + return writer; + } + + public void save(Pipe p, DFWriter writer, ZFrame toWriteOrig){ + if (p.getProps().containsKey("location")) { LOG.warn("Writing file"); writer.save(p.get(FilePipe.LOCATION)); } - else if (p.getFormat().equals(Pipe.FORMAT_JDBC)){ - writer = getWriter(toWriteOrig); - writer = writer.format(p.getFormat()); - - //SparkPipe sPipe = (SparkPipe) p; - if (p.getMode() != null) { - writer.setMode(p.getMode()); //SaveMode.valueOf(p.getMode())); - } - else { - writer.setMode("Append") ;//SaveMode.valueOf("Append")); - } - for (String key: p.getProps().keySet()) { - writer = writer.option(key, p.get(key)); - } - writer.save(); - } - else { + else{ writer.save(); - - } - - } - } catch (Exception ex) { - throw new ZinggClientException(ex.getMessage()); - } } - /* public void writePerSource(Dataset toWrite, Arguments args, JavaSparkContext ctx, Pipe[] pipes ) throws ZinggClientException { List sources = toWrite.select(ColName.SOURCE_COL).distinct().collectAsList(); diff --git a/log4j2.properties b/log4j2.properties index f42141ec9..807283ecf 100644 --- a/log4j2.properties +++ b/log4j2.properties @@ -54,7 +54,7 @@ logger.breeze.level = fatal logger.zingg.name = zingg logger.zingg.level = info logger.zingg_analytics.name = zingg.common.core.util.Analytics -logger.zingg_analytics.level = debug +logger.zingg_analytics.level = off logger.codegen.name = org.apache.spark.sql.catalyst.expressions logger.codegen.level = OFF logger.codehaus.name = org.codehaus diff --git a/spark/client/src/main/java/zingg/spark/client/pipe/SparkPipe.java b/spark/client/src/main/java/zingg/spark/client/pipe/SparkPipe.java index bc7fcb7fb..64190b0e0 100644 --- a/spark/client/src/main/java/zingg/spark/client/pipe/SparkPipe.java +++ b/spark/client/src/main/java/zingg/spark/client/pipe/SparkPipe.java @@ -57,5 +57,8 @@ public void setLocation(String fileName){ this.props.put(FilePipe.LOCATION, fileName); } + public void setTable(String tableName){ + this.props.put(FilePipe.TABLE, tableName); + } } From 05c9b06dbe35d5ee572c4cd83c1a4893eb6ae9a6 Mon Sep 17 00:00:00 2001 From: Sania Goyal <85894828+sania-16@users.noreply.github.com> Date: Thu, 17 Jul 2025 11:02:07 +0530 Subject: [PATCH 6/7] Documentation Changes PR (#1182) * pipe code refactor * adding generic class * adding tables * databricks docs * enterprise python api * formatting * updating oss docs --- docs/SUMMARY.md | 5 +- docs/dataSourcesAndSinks/databricks.md | 6 +- docs/pythonEC/doctrees/environment.pickle | Bin 0 -> 94244 bytes docs/pythonEC/doctrees/index.doctree | Bin 0 -> 13375 bytes docs/pythonEC/doctrees/zinggEC.doctree | Bin 0 -> 187031 bytes docs/pythonEC/markdown/index.md | 82 ++ docs/pythonEC/markdown/zinggEC.md | 865 ++++++++++++++++++ docs/pythonES/doctrees/environment.pickle | Bin 0 -> 22405 bytes docs/pythonES/doctrees/index.doctree | Bin 0 -> 13374 bytes docs/pythonES/doctrees/zinggES.doctree | Bin 0 -> 17351 bytes docs/pythonES/markdown/index.md | 23 + docs/pythonES/markdown/zinggES.md | 43 + .../doctrees/environment.pickle | Bin .../doctrees/index.doctree | Bin .../doctrees/zingg.doctree | Bin docs/{python => pythonOss}/markdown/index.md | 10 +- docs/{python => pythonOss}/markdown/zingg.md | 2 +- docs/running/databricks.md | 8 +- docs/working-with-python-enterprise.md | 86 ++ python/docs/index.rst | 8 +- python/zingg/pipes.py | 2 +- 21 files changed, 1122 insertions(+), 18 deletions(-) create mode 100644 docs/pythonEC/doctrees/environment.pickle create mode 100644 docs/pythonEC/doctrees/index.doctree create mode 100644 docs/pythonEC/doctrees/zinggEC.doctree create mode 100644 docs/pythonEC/markdown/index.md create mode 100644 docs/pythonEC/markdown/zinggEC.md create mode 100644 docs/pythonES/doctrees/environment.pickle create mode 100644 docs/pythonES/doctrees/index.doctree create mode 100644 docs/pythonES/doctrees/zinggES.doctree create mode 100644 docs/pythonES/markdown/index.md create mode 100644 docs/pythonES/markdown/zinggES.md rename docs/{python => pythonOss}/doctrees/environment.pickle (100%) rename docs/{python => pythonOss}/doctrees/index.doctree (100%) rename docs/{python => pythonOss}/doctrees/zingg.doctree (100%) rename docs/{python => pythonOss}/markdown/index.md (96%) rename docs/{python => pythonOss}/markdown/zingg.md (99%) create mode 100644 docs/working-with-python-enterprise.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 76acddf89..df3b88bff 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -65,7 +65,10 @@ * [BigQuery](dataSourcesAndSinks/bigquery.md) * [Exasol](dataSourcesAndSinks/exasol.md) * [Working With Python](working-with-python.md) - * [Python API](python/markdown/index.md) + * [Community Python API](pythonOss/markdown/index.md) + * [Enterprise Python API](working-with-python-enterprise.md) + * [ZinggEC Python API](pythonEC/markdown/index.md) + * [ZinggES Python API](pythonES/markdown/index.md) * [Running Zingg On Cloud](running/running.md) * [Running On AWS](running/aws.md) * [Running On Azure](running/azure.md) diff --git a/docs/dataSourcesAndSinks/databricks.md b/docs/dataSourcesAndSinks/databricks.md index d5d406d90..70a3096ed 100644 --- a/docs/dataSourcesAndSinks/databricks.md +++ b/docs/dataSourcesAndSinks/databricks.md @@ -1,5 +1,7 @@ # Databricks -As a Spark-based application, Zingg Open Source works seamlessly on Databricks. Zingg leverages Databricks' Spark environment, and can access all the supported data sources like parquet and the delta file format. +As a Spark-based application, the **Zingg Community version** works seamlessly on Databricks. Zingg leverages Databricks' Spark environment. -Please check the various ways in which you can run Zingg on Databricks [here](../running/databricks.md) +You can access all the supported data sources like parquet, the delta file format and unity catalog supported formats while running **Zingg Enterprise version**. + +Please check the various ways in which you can run **Zingg Community version on Databricks** [here](../running/databricks.md) diff --git a/docs/pythonEC/doctrees/environment.pickle b/docs/pythonEC/doctrees/environment.pickle new file mode 100644 index 0000000000000000000000000000000000000000..24ffa6427199373e71d3a9031668e0c6ec2a1374 GIT binary patch literal 94244 zcmeHwX}la)aj&dhSNq}}+hci~Yow8Pn`>-*Ey)WbT?%!C8}cCY-g-I%mDJ!P)3+@>hDzZo6!Yc7WTMbLV^h zdWoufU%6XUXaZMwcBe}uPS__pcEfX<4X<|?_-|F5I?*Xqx^1h2nqI&^lv>?R6X^9q zO&eCLS+l9B3E-G(A1^m6px7)eH;>tEYu@hJ{_2|5nD5fFztl$OwY6s1s@WybO6d5j z#JktC{EdRplHCAAFtq2d>2~IJ-nP>tg4Z=LNnS}H53*LZ+Rdt6rdfWRxStELA0#+U z%;;*aD@@Sv1=;$(!6pte?O1+G{bX>niq~)va|(ZAcF> z$Es97UZYH6hjH3isE$npJnL_wx_YzHg?tddsBBB8SuQnaX{I3cByU8*%-5ILy|14s+0aY?kEI?*tns!Fi+|OvesE( zoVPfTmKsTno>NGIBn*NTR5x2hmPc&f(6Q?+OrGtrA|YAdsESInZMT9YG2^d4V%x3K zajV^c4AbbFy>6?;6RQi+a|`1rOG=%E7IWb|@QSAyB2C%`LSfS+qr(K|xMa87T5}$w z@EasXylr8Qkp6)Ll;&y{27VS~gHKj_0pcSVx3mS>-XvC?p+Z2~y1Lb9VM%+XZo3BX zs~c{o<(7{u5clMB$(?VIZ~&KW;UY}IJR}npD;wQ9DYKLQR`FEw>@uE86}tsxTRg{K zFU#ke?Yh-r#k~q`gvy#YD4udx(R_wxW4)AcW@TPNH)28Xt3ShOliHMfC3c4HpM%=jCDfl~rPvshzTrZibxWFH0F zAz_RnOp0npq&*wFdUmVlpHslNotjsIW^v~hsGi=+5XE-Kqp9k81b!X@h_|&KrlfPU z3(4ihV7!(r$%bV|_khT4W|dsVB!_u_jnXwj{V%P;V6Ax4DeayS{~vmU)HE+~Xgkac zj3Xu(ih{L*4b}*;Edq2LtSs-nYc<>CB``fyj`dBt zZ#lg{l!LO8v0OHCv0DM%?wo5wb+76+jx?Ibfyi2}^U4gzdEQ^m?_Iaj!;2V$ElzJz zzRjb~#@hyNh*iNVnC*=ir`06Mhg#;c4Nj+CE0v*4b&SD1=XdB|Zx)4u9f#e>1l{-gZgYNxuT#1gxX`B-&XudLjDd zG;I zudD|WgF^weEnRl5fD3b1>b5G-u`nrYdM}Ju5YM`uGT1H#^dK6u7Q_~cOu&KF0c)BK ztA+t+hETJW0SQ$|EO*Pi00lBF8%rJYa)j38Hf|Xdm6$w#*|*;CG}I?odaatVZ=MI*b+@55z^N1Y^z;%dd`#_gTz+#n2}{B zlo{Ed#|<2xXZ;Jruw+mLOhBoVaR5q^B(7tZ8)af@N$kjv0Q&?}n%z#TOJ-a7b1T@N zgAQQ$Fj_Fdu&GeP#7Qw5!UT|xl`f0Fw2)H}B{oWJOt{cgOUR#gc{&7~crD zLKP|ZRL&X6fhr;go((eOK$Ue#IiK{msCU{?NH6kP{~}!)EWD1@Mk#CK^Hnt*+m7~u z$(g_Ko`?4B%gs#h-^Djs+p0ru_D#>+`_S~g z=(2C$T{XARY|o?lTJp?JCYRz`LdEA3I$T8U%nD$xnLX@*8gBP1< z?Z7N!bp-C-#X+I6pYS!i0#|0B?!=oNEJW-;+%^~`9Os}vIpyRiUKkX$?W0|CTOHeC z=L5uVHfzNW0j{G_+HvbPbWyK(sablw**-$AxswgS|F6m30^ch0?uuwLizslG#q|0lN*cCvl&&X)Rr0z_DA+wua%A zEDsxs<`HbKVdNmnVk^WR#7X}mDyUmWU*wy|7t zD|ZC~@1UG>N>+sbbi8o2U^iWOp9eH;=#v#%3-kV|k$P8jAB(>zF0H)uCW-FxtZL zb1}>FOppcQO003k?o4miXqqoXK*Mbr8@O4kd;|d$&%yAQi$Gk7{lUgjsC(LHz{kaA zT1FdA`d7+E!^kekcIk23E_+BTOiL6?7=9rFW$?XuM-SFv8P;sauC-t-8x^>3u%oaW zL2<=CMjL3D6ztyY#?CH8>4KnI_EK9h|D5ZwcL=Jb>%A!`IWj9Z@x~Z!yN(T~EI&VJ zD`E{wL&DYtRdq}YNT~b-nhkS+}vvaVgN+^Nk^Be@=5M+iuov zRRuz20bnqbw)V6P5v?LVVx{m7i##S~W7r2~zm&^Kn<@_zzVT5QHZYx|P8=74BPYw` zzGy`QJA`9!D9mEhLT)A}hof=iHzDMTrMrUP(TVC{1xt0C!w1k7a1Q--Rt-DqSsVTX zc^~JiRsxEaZNRC!CtwAF7AmS{_<FR#;>3r`f7s5QicoCWxZbT7VOyn#ed8 z0uAs=+E~~ZsT#N8Aa2s35T_uKm%XZ`xS8EPhTyV@Ig%t?s48jQGg1^QqwpRn3>DNa z%{QC#5F`y3l^5trSPg8}%(_c;or-+A2y$T~j|dUkk-CYsKz!{OIRFD2c`du_LXeF{ zS87N^f>lAFkGHGrW7sDwvN1$yFk(VGihWX}s|Nuy>@Q+lp_V|MHZxm7g|HfMu@R#U zm9~k?DBTt+l{PZObj%lmlGs^$S^=J`JB0)YabsOk?s}aj!gRu$p_o}Pjbyh=Rs+<^ zs5uO}I--y=PwG?`AuP}OgCbKW{hThJYjPM~tP&Urv259u&~3=`kl{e$>?U>SAtTrx z;!o%6GT|QUD)MQgbt)(>HbK!^73NRs*;)_*a_+fN_Y7hvTZ}S}zKS&&Ll3?WLe#uo zgF4>HvcF9fV*)SH(AZHy47H32Y{D#&P8ra^>r}`M4?#ErgAGdu^#cncVjp1U#la91 zXsElMNBT@)hafXYv_b*94OQ$55WObZ_u4LCtTvQ$-r40`=P$$IVDFipbG>sz^)om( z#N&-PYNE_3d7cB_^5SakNO#~F>z1JA0m1N!L| z`sqREpsGHk@pwo-Jv^fr@QCxMYWSG*xO#lTd8KM_ST}u@?p|_Uty)@o$XTbX`cyQQ zwtkw^4d)$4^)AdfQ(%?rRQdlSx^?X&7u1~w|KD_4{QpsnLc4g50^&Ix_4t$q@LJvS zb-L{J8il8w?^a#jpbNi8H+-XhdPdj2$;b#9{8&O4mv`Trl4|G(3DUJ>=KN%iz&`sv-ydsN|jogbHv)lXs- zRX>G44C({U2UVL76_=^UpU`~!a3E_xsrcr6q_{-2{HXJSK?tpI83bqOAD4*!l>Gk_ z&L_G2Q_fE-x%;&9-_+x0oc}H#RoI)E>R%Z>M9%SvZw{J?Ll+{%rMl`MC~w1QRgm_@({DJxsR}=)3%{fReOZ_NmVWwe=PRnq??4|)h51#D#{bYyzpMNFo__k8e)@gq z>#Fk~IDe=f|47&WvGWa8_$M0VpE`f03je1w#Y^YUb&tOY(D_T}1=a4a^rHQ1P1oNz z-&EEA%lTXN_;=3#R*(P3`M>J%AJyPGlzM{>jH)*Oq}n+D{A~3L!i4;be){%FRrar1 z=Ksz4juP+x=lr{>{}1Os<)gn&M)?{DCqTSbw<O73C&7{XS1+otNyrML6s zo4-NxL+rSoVm4LJr$!g3E*H|6dNuxVc^(gw{4|Lt9qZ$gYf&Qj@nZGluc26-ID-=NDH4l<`NSqE zSx;g9og1v;Icg$zD45lq>IoS-5Dr8a;O$^%C{Mw(rr>}ZhXSKQ6+;3R=!WWBmLluw zvIiPA1wrkcu_eqk=W;UF2EnZTu2;5eut^BPdPrgtf>r=l)m`|*ytt0uu2*k2;6ZTn zMk>EamEVj9!LeI(i(9G1ZS+Stbk^OR9=JD%05tG<*f?l5ZaG;3c*6?Yl6)AW3| zQLvky_ZbEI@vQVqb%rV)FlrCb^DB&k2kH5sQE&*)y8lCX5d=Q0pB^zAJc<`h%VX3= zk^Q*&@`Tyvm1dvAMl=81Adn)%MtwXKe~oSl2e$eu2LDOSKVC|BslJ;2$O4O=XN`g~ zo@Zn&;sZLsg~$`6q=F12M2I6!(N2+WrC?SJ5LMfJnZwIT)ok9VF+*wCFSbO07OuKL z6S+nj5e~b;uhMlJgSw9J4g`KiidpVHgo}C#*u_By6{kYr4$Ia>Oj*-zgz0oEk$rPm zL8>|+q3Y5f$<8W0A2AAQcvckqn-C7T=p(1Q*-!Zc$wtlcjKx09{l0A_tM*stGD;j+xykq z2k7mC>g_}H_7m#u!}Ru(>g^-+_EGir0v-ekAJb1C*H1r%CxPlG^wUq%lN88qch+Ao zj=gbMiZn2s51xY)4L%5$CF0bFQUq4wh?gRXGtMr5gG{T{mT%PonV_mua#!}t@paCl zBw%d#_q-7yRAeDh_MQlas2H`ghf{_{TI6#$X;KNEt9bS9-Q9bOzlo#f6pFzTW?lXi z8m^+SJLfU4Inean69@2$L}naSQHCfdgT4`?S1(xxHeP{0{#pq#sJj%!UBH)cX$XRf zqvyQncQtjUR@F&r+MRW7p|NE$Q9+y-dWCYYxXiyma!MCEND)Tl76-KF_~%C}T@lKo zI+f$CJ0gX%0;INe3mQRK%_APe#&a*hzE+7c>9N({0l5xoLGC|`?$ad{l!Pyz)G zCJ{2F3{QV4&V~H-oKc2@H%p{TFQ-EdxchX*fD=&4Ud4tVd_C!(TSluR$l$|S8e+&5 z$}mL`@L7Ld3;Cy{#`(C1GJiyR3W_~>%2hzh1ajUtP)j33j1!XfT-oS*wd#wEEDI-N zWgM>;T)Q*J03bTM$oLQ>>KB5v!EJ;xg`A!K26SNSTRK4p1;i){`Fh>iaOq}stnJyg zIXVILiXld|V@p^dks$)lg>nrP~R`Qy7^Qy}GzeR6dN{6Oo z5~M^?BD231sTVaB$oALDa|`5}bZCWgQYM4|4VTgKcx#UrDlIc;0DLKc)TjddFPHq@xM72c2y_)`5^k)`-ko^iY@kznz0bootx9+{iwQH36Z>t` z$U&!XIIav5+ijbp1 z%In7%{2?r1(jwPAPSP&-E;v)AC+tgd5857*)<_@b4)8ZhxdTEaK+DiKQuuUSmuKGSaBAEAn{0DkIg#Y}>6l8?(uYMHWs-MLl;9h+J|C6`!ar_UEpAi3j zO8oa}@!x0ozi#zs@V~^Y>rd30_yB;mfx+xBM+DLYzl&6PQ~QevSUcN=ZWrmTdfLL2 zk+g6L5xK?Fx1mg4%xs zwIj{fZxU~L%Ef%y85k|4<|`V+ml=!d@8G}0DgXU`6@T2Xf|t$u=>zvep~_FoqyiO4 zUxisaC2PXpaD~-!cLwV^wnVmL=#f|+ERoAb1#!w*zIm@meaR8)AM)VJhS+}JDP#MV zD`e8mPGWzTM~<<6@2GqWSbz7ZAfl`vVZqRvQ)aoZdhEA|^3@Sn(TFC#vq)2YDTp3s zb&ifYDZ#up7(`#Ildpa|P&JEXYKXMzQVa~&XQlXWHU29uudcy=Gt~+FJy~7Lf7kKf z_4tbu_znEB5r5&gRX0=N1Z=|eXf{DukKG(s658?2fCxeBCrYDA(u>tduxaWjRG6wl zY-QC2>Cr>kTlbl_wq>;N_op1Ln)o;u0oUqMG9jlYzcL&^SYeD?1Yv%hyYE~pqP zkqI&ylZVe`M|G=5$#yDSCVau(vl4P3CO^lJ2k56oT&%x}u@e0-_NuOoO?Ux$bfVCz zy@K-n@{^(E`wxJZZTZGz;9J=lo4ALG%fJKU$v}*DC}iGz1bd>t%Z__Wq4;KI$P&3g zcl;HDSWHzvSb8o_mY(*!7ZIAvR*Wq)F9L{co_1QranB+M4M>j5%7xicPAMzrWrlpl zWo3J!tn9&uhWM6?LSzw<**Ct(+@G0CX+>uL_#$Ia9+#YZvSXf7a&~8ie8wf`@!l#&!f6#VSK?kmYGRu1!v#*f)fxPmm4oT;wj~(l^OCGmzxCN21k5@e47u9FFHS- znNMj&XI$SVN_$*--klxyl+yFg%#hEx^d$H;#KE|}`Re#W^V^x(lU8WP^=%}{aas9N zc9c`f%CBdJe8y!Z!M6e93=1FeZI-MY+a3P*%v?$r7=M~&;-gb0tz%{AE(PboK7W`^8HZbm!?5Rowv6dD0U zu>T(tI~+CCTj(F^P0B6`g^S9WB{r>|+9h1+#Aq*$WmVGCfPn9-N1gW|l6QDi5UD3^ zlz{KxsLi&alXf!mq{#&l#Ce~|%$T(EzGuJ|Dv7c}Qej-ZydgWnDb>sC8S>c>?Y%UK z6&&=8yS1MjUj{y&ouw(`yn{?lB8`cK6zNV3+FT3`c-4g&;&r78ejL|;>r-y9~Yrj+3`;)Ld!ElKI0-Z*-wOw@&qxO8efcV z&CIN{s_2o$5~BzY;-Yj@b{bNO()F1kpK(#T(qI=n0Qpe_Ga$Y)%Lk{xNy z!z4%g@5h&+zs=03v@*0fj&zv*xCs4CcKlO{&|hYTe8xp6*^ved4X-G{k-iu|IWp#+ z{sMqFHT$u}airrsh>Oy>*=a~AN?S5RKI5X4>_~%+fhQR;X!ngTMSC;zDXlVE97j4t zeq4a=&W?Uc0eV?x$bAH8ghQy{6EYF*9sxw^Bau1JKrxIuv3pxRfrbE1>jxT5lyK|o@dB?&#X@4=|hwY6nKx8MQqHM#}|R$$j;f6 zM^^{N5rHJ)<6`l}?8v7Si!Wq`9J4T=9~DGkVT5p=jTR=?-@=FsRYG=U`I@XAby_`h z`2f2j5s6uobAU}Y=UjpatjT|jI=5%gnylWt+dFmum)(S>=Gx3t6SE@MWaeku*}Zmv z3X%8rXrf}%d}VgDQYweb8FF8nkw1M#<9tPa0lqk*YxH|ib~|Dv4vjy>ugK2Jlvd)_ z^iv$7F>VZIvg4R?zKfY5OJNLF`+W@J^a;i`Q!wu1-#3d$#nZrO%qYieGxH_w!oMk- zR0sm&BJrB+2&NQ?6PY2OagjLB6p4NEhZZA3@xu5*@spW3lU68h$tDzv#JE^|C_9QN z#p3;$A@>oB5q!SDZ4~xt1Q3C~8j@hU_DTI+Jiuomf zmK~>*()SI9d^Y$c7x8S;Chh=!c@RHI*zC?bdr!2YuF2+!v5n9MfXL>}r=8-dlvA8Q zW?bj2&5mZuSznzQa!lu}U=XLzJ2^ip9^Hgvm^*>gm=4PAnOT!oHqyAQwEPX(U>f6s zaa(pAQwqk-nIWHX!8lh725#ZPwYmvnab$e4a56I{tytWgL1hFK#)ZPpj$ulnn9U6N zj0?q9Ary|~kuf>sw7U_n_3h(}#M?5nC9O!@m{ueV`r-odgW2&*DG<+PhJ3~aLV6c` z{5Z(F_`>*d@%hZ$Nh=qry^A=Raq;+t>}aMGkDtv9xsP~^upJ3@G{UnT0mK>H(X6|_ zu^&Hv++K)nZ2n_>x%^J%X-O-Wh2i=tM3|iJ;IQ`F*-=X=i~q!s&xZZYg(eHg$0J7t z3pHYx@@vO-VXp#+Q?tE!Nw$eLsEq5M?b)$RIp>#Uh8)vBFJcggFEivm!ZCtn35=6aOCx|dgT`s)L%U;U>HEhQ z$nVWOA!!9NKU`Z$l41tw+3ZNAl)pDJdZR)JG!Kal!eg>{zE1 zoPWp+`HTzBWnsaI{6cF)Wb*6BRxMWn#Hra6?H)yBBJ{?EWqWpfQwqyvnIWHT!ZJ9) z?V<67<-yD>N~>Q+5|#+PabbBNJH9D}WnX5UD;7hDIyD*A)j#(F`Ze&?-TYo9~)mdK9ZR)X@w)Z zGb@RVi^Wf5M>3^Yd>}LAGcFdF7|RJ9ySvsbA3=C_KYqcx7V${_XngtjdS(Wtm5=Gn z9!ZqixS;%Ac5G7$%2zW(KI4KC(H{w$;WZn^_E=W}#HsOEN75h3o8e{IkxVHTOEN<~ z<6=<=>yN~3;qGRm;wJng*)8J>%#E2jmR6w*y1T}p*?5FP8BBg$!(5jg{gk4#Gc)8f zE=uP17`%*Z7bC`~JihF_Ix~yX%1-tjPn_PkupG{gZ%Sc#JTv68O;{q^#fY%HWqe`z zzRWB-rNR=SH!du1&W>+NVR=(#$bE!mg!8}P2vHm!jQ}EbCd8?%fzPasHc>P^_d~Z%*zCWIDO~Dxg_BU0+SJEWH+!I z^UV0J%sfh~I$l1m-~?pHh31azXr~mKsmzeiLZQJPAmCm^Xj_(^U+Fp;KT!PBtvg)Pu-^;rG z25RHkWi*?$+tfr9&AIbN(I&L&v~9cOHp;bb#Q+6_mIF_V0Uqvk{Y`Uj&Gt&2W~pt@ zcHLS9Ai2qNa`3l!{`&b^a~4e)rq_F}=WiB+*_BY~M5S4`+(v~`Xn?zaPMQB(+io^H zC2yfV+pP8c3%r)&Hck{oTdz_UlKgQeR-r3wu3x@0w)4P6UXbZ7ll&DpA5?$Bg7w-lFj{WWFBYRubM*i8Q_`Ks$z zm)(v)5=2rL^pGVyqwCch=46gay8g0qt3|-7zlufcUliqGtEIVE{WY`*bFlg)6wOq> zOn?0~%&iC7546Cd4IR7Qs#zTy_iI~q4<+kV2_2AubHB}9zruf+i`C!3^GSbAzy!$D z*H8v&nyr$UOEx&ulDg_raG!7|Sw>@$<-@&v-e2Cb+;-33SZ`LkHT!mT*(KO4N_NVR z9(vByOb7k{8Kd|&Mp_pz_jbG0c0IdLZr1C~MnU`}yWIu};-`3e{taP>sXB>K{gobM zuV(lB`(ki&dB~gR<=lqCJq_T=o_|OG&pBd)B#D~(21sthVI_+(Iufbx%&vP4E^r*q9MC)Y-A0* zLO4GRoXz|47Y*g&*eC~Rg)n{$s`4zA;V5Pyybn+<|J3QC@pw!bmI#Hgd<2DttSq8_ z7*mGBc!h9&7ufPh zyZf*7q#nsXiELs>I%)VyNew$NQ3K{PH8eI|MEWJVSK667|8dSadmBl$9GoQDM&>iQMz*eRaa#H2T9QJ^%Wl-bk#Esk85i z)~518qMhJ@+}>ZSMO(*z*v7=@acJLYkHIq;Z5@Lj=-vm$z>;itbg0n$<%fHe^0znq zYlhC6Y-w`lCUsMJsyPw>Ff)_=X^#F_ggM$Y1l9QIgh6&Z0w!ShEvbhN|A>K!$DP1K z-PAu45;tZ-Lf)BgVsIBQP`9QJmc+h{xH%IJYA_{T>>Xau*L`N2`m2Wz%hw&5)b3&L zCvtye8pac(VCr9zZfIIm(cB;to9UolUWS|Vqgpt8h;O)uG1zN@zFxq+-BZ2m~C08ICX-7l`GhU9U65v zxq0whj5F2R@~Ty79IZDpH>YlgBx-d$#{#M^;^ zyv!?wDv)UD2>Ep>hYT*&l4G-nwqv-35Av&f{)>lNd84&S{QGRAj0%d^`|yy z*XJf66CV*HB zbT&Rv2>5erMhu84G-wQy;aj^?fC&v6!sH-n(WE&`El&ZK0h8_Y+_IL=z{NUO^tRptjOtK zm2%J!e~KuKe8o5hi|)!QUD44)xDl65zs8V*hA^SXPU2cMzSSCoZ37}Z$&xh`oe=i> z(LAz-PwyIyj~~M?vW8DPev$DNerY3P(UjISk+E1GQ$ySntxe}QM5cLgWGp&FEVWs2 zfg=EqLX$>Qya&pB1eX}}z;x~(pf-j&s z$r|Rclks5(IP;mf+egxO#7A5d*-26tyW(RlitMn`2y&%DvU5Xx%tetMJ*LPUGAHro z_>hYtI~dZKaSlV7!`>Djc2Q&p!}gJ#+v6h+`20F{+H4|!CqNk*a;)=r6Yeyj7}AW3 zerRqEwk2bq?!tc*g3wjmL<7hn{(op_XDXg15!tj(@CUvgPgZ0Y*VQCY{g6;STVp z4IUeh>`NtN8t8ML&=kBYK4NAu#fXtWZfJ=2EY1*v#PogfAqMOUqK5_{Y(EH4hRPqX zD~N)LoeqTHewa`U8TLg!GBD-yqj7#LVz}QN>Enw$5@abjJqR)Ild+My)z6@eg73eG z|Er&;25a~8r_~9;b!W<%;&^!Vi`4pOH;2OILG8C9wG>DTYA;1MvoCETgWAbREp4rW zS|?IV-bGOR!ALC$qOR?V@9dYgJ^sR89$CmjuS0hj$R2|A;|?F7|Z zaZ2H@LVvq$;Z}nc_cNF`^=9AH^H<~CLVyJl3p@3jGgbY445PYN{{S!kIx$5#6TnIj zzoS=ndey&RtP*;yJ;*I`2k?O#K zFMGq`PyZ=gKtA-j_pT*>c;#-qc5xcD5;wTx3Wc7(Mb1dKjDQd)#tI%*t~;+{RF;iAL3b1)~1?JFA(@8y`4^+;f)rn@YPy=^j^P4d}Zx zX8g^#Fa%uiT2|REc^#{Kq<0t>DwGK?B1B%g0rYTx1jwT6Im9g&lcFdLZY#A)0JxdT zqaZin(mbnMBW@Ac-i$K?0jHZHuudN#Y;i9}sbYHqN5X%L5(S{z?%?(itJAxecL4a) zeFn3)7B^m$%1vD9aDtXLZsHJ20XLaonU*?khh`BK=Z7n3${BI0J2zZcw^}X8AJtqO zu3>N`qyB<$y;Z9jLq<9ajKF1iSBrmMxHG8l;O;cWHeRKX#VsYoKDv8jbLp6CA1~wT z@sgNxUMblQKJ$NImOSGjI0*KzC| zc+OW+Yno!qq(6XGt77;^c^sfir^@gK-s zt^{FlJEcK}rl%*p{$kaoMUmn|H~6enl|iRl$=0&%-Yg%V7lE*`w*0A}$Z> zq+6RT*JM$OHxm;kBP2lw^kL_4ISMv02?7*U4wamsX(Ik|1_Lsbpoga^g1IqSVDP}X zGRD^kOeIZH&@j#H1g$mHl+yx))xj!>GIuAv0>X`x@8J_fY7XYD14n|q!(4MN*X(v$ z-A=-M@eP{Pg8YCS893;CyFxby@po9BCSs~B7d+j$`2<%U)|DoONfT${Z%_uwG;3(C zcjT^Z;Ugt);M5U?b(na!RkL&4JU7>DODav;4T{Cga^opUTk>>;wwopWvSFWvvEfbK0=;{H8dCzN~bEyp>$#berVpmNHj9;C@Uzprspq%HtH4E7S~}f zd11UE$qVNJ-L?gn0&Xato7{{zA2d6^cnO+amIy7-?Y%0)ysUUta$~@ZHNK{}6OH30 zSp?vm;%4bX;;lC5to-i6lN6R{SB^3pTe{_f&^$?b_5rWnfhFE1WI`^_=< zy=wOy@OBMVcWJ(onoy!*I^4MBpqbOKLXM7fL7O&K5qK7CaG!cDwHfWoZjibz08@Oa)tJsuh3u&WBs(UKL z&7{Gg4m>bxQj1w1D*sEwC{_?sZ6w!Jc32yTz9OMTt$N|0*}Gn~LXS?yvyZwHLB zM`%(5t$oFZ(BaC#BZ$eCLp)ME+_#4wBGjW)w9#W*(il4GfEFBe=+lb#+R1aTrG(q0 zq=eh1<105!KiS+omF#iLWSYQ7A$s2}$AR$uyz4 zxjv|6H*@d;@s0u6TCs09@{j%uh9O1VKh_cLv{-SD}Bip0Ut5yhKh21&d>X3lwjpcsDO5$ZAyjoXAHp zN^}-Xr$~+!;T9hUTo?rw^$a!_w~(EQHA~=(v`pKI7oo?dXb){;P|G$Zp#^KWlw%1U zmqlSgzYYH)EC1-j7Y);D;^VE9x-6Ql;&o?b>I#-1Yfe%>fkLaeb8|N3P_g8Av`}fc z=jFU^ixO7k*=_LKGdYe}q5&^yBQvJqATbPJaF!~r++;n12pJ>`YEgs}gl+)_h!*4M z5klKo$R*%F$BkZGy>W3Ym4=7Q^CneIB>**NG3hOYCIT`_8&V5EPi(JbU1Xo%zCT-j_YlA zl21N-b4fujh(Np9op*wBW7*n&gF!rkSU-D^}4m!$487;ypc^ij`Pcpiv|` zdr!350deevfJ1z7Y7Jt=)-wWfDkmr~_ygVp_j0o>qKC)Z-YxpZL;z{=#13)zPYp1}qcyQ1;p z{Ng#UxgKXBz)R9(s0rVPx+etB1}D9A;WHu=L}5RfdvR|26A>ao_I8wmKA!MGl9+JE z!$?ddnWE)vTvj`H4U`?bd?e?x{|46+3g7C(p9JE`C;>)HS$fZF|1`r;QjM+UP7LT|Rd8lWl;d~&;Y^uk zk;EJ=&MYO)K{CHn&Ptp1rJjwi1cptHaMje^`FS?T(sq#6WH&G;KjohaHohT0i_+V% zF@*Ocx5#h=S%HvyC6e| zcblvTMH4nM{ah#ImrdpJ;yW6vHmRL8aR32D2IY6$hU{4tUdjgCpuaSOSo3zgU8U>>N`j+`7yyc%8rR(;%Onj8=z$ZB;5r!={+Xf&`DX z4Y{->?0paHo<4N{12ci&9E?FwFpi`sc@%1MMP&};v;Hm6%oT)7n14BqVPanppd`e} z0ws!TGQ#6BT*|$FIcB?C?odFWK&MoD_Uu#rc?zka`X@*Rr^QT4g2kY5@GT@VcJkZF zA(JF-7mXM$%{ZzM))iN0NMLa7#k(=Jv{1$83nF9y)y@$rY`_2oJ098)h+ec~kgGYc zWm0%`qbV4F4kTNU&#{U6?~;bI#hG z+{k0T%UpU7U4Q)A-?TQxsLoXEgOtTf5 zRi;$Qp9~=zvm@tV$=L0IM2jYHR4g#HWkR7=>tA;2+ll;Zw zh?43l6)aHw1p{*VYY5Oaxm~$yxZgFwB1$6yVV>N#cY4p>2Q|6s3mx;f1Y+io6o|Be zwABe&J=Kx5ImH#2mVstUv>0fmDsxlB=404Du>m5HqoF(Wc~Xne39Od^j=D+TE^0qW zzyhgH42QD*B=cbi0S3-&FVcLYB-X1mDfiFp-@E^T2OkUO6+)8-Iv|$Z+6YcW3j0Ms zPI<9; z&CWnKKJVhEneizhlrA`c4E{<%qc>?#L12aHxJH@{iUJ9=1m~{F?T~5_D}e1RF=LX_ z3#DN)z8D~I#B?z)eLD=85rTmIKSzBI(kAGj?a^1F@Ek<^Pm{rP38e2BGF~7N1RkF_ zM|LWi29***edu4((56Irg;JHoO0=fYi9wO+(L_b(W*GFj81yJ@`yRNrV7J@Nc78j% z0BX-Rm7D1zZUJrZP$LN3l7vm=uEIBI9NYzuqn`m8E5eYY@M^%sYX!pQigbjjQZ=C|FC19QWISD6T3r z9;RK9EqoJ|P&Gxr~Q3?Bb9WIR}4aPkEYl6`zJ z|HF0?hoD(+hp0&Ucu}gutANB*V1nuA6`-%P zQ_e>qkU-Esk&m-MKv2eA9-3__o`*xT3xeen4l2u57ZfiIT7dyFdPb3O3O&nKx#HDm zwO)K^yW47uhL6}IwxSMldR&WD- z=11u;n^ojOZ(?PShl?d1HTf1o{w~NW&ZEgT4FR<8lN$mF#4R?_VvE`0K6aL6{n6ra z)UQ_c60?0=dam*h$pA;Od%E~WbUZ)Q1j9>>CZWHn`1V+%K<&v!?xeQ!{~JP;N>4(kFV`W1@%v}G1AyaxKCkc^oV-cd9=J)A#x=B zI3f%4xxK-`blO~)0Qc7{xcr)nLu6j*E=p9qde zX&24;7#gAmK<q#XXW(6O#DG&f5*TL!G+3Mfk@wL3f(f*I z=}eiiHW^C?A1a50Xj9pA2E)}+Vsd{tqC?M^LsJCm19qBBHxKfd9SKB+SrFXTHe7jW zvGUB5PmZaG9~sLXu@{n586U+PO=Qeke1R0_=c4oD2{8r>9wj%@hAR*muEdg(az&vM zE-D}dQ%9t`XfGTNqHEx2;w`Pi*_YT2`BQXjwrlE1B`wJvDB3>g;+@1^dW8*>mc-t% zFCl<&9qhul@@38rgbClRZe~zcqauG5i7JNQ+L1V}h8dhnCM+&y2t-Q!2on!SUr{9B zn5@{+^bGM&znDE?u0?!ooNGT4u@={Wm}7ZHaF2wVuLGe5B-p`qAmJ{7UL!4Q)&X^O zI|vn%uHBQwzGxeDSo<0pZ)TA$v1Krq6tb8r;C$POzpyC3rPhDF#jT;vJ+3xe@=N z*cv87W1Hbaabrw9sGKu!On;*d_Y2U)YqEms*IGk0Nzrx<+Q{q}STK+i{=H2hnzJ~Z zq{Q0&xX!OOP%z2jD+qK^r(Fy5Gam=gSuv88ZS$5Np`}BEOo>GG91dId?7e6Dp?!xa z@cqdBIK$JViTG!vTSA;ocTi2y28k&13qmQUx^xNb@^JHBx9$}bc}yKA1&A^83cTQw z9#wOlA|#>Cx`#q&GKiAP8w3;er%>1N(C)Qt0m;f4bHx4%CDVFh*EszCDRPPnH4C4f z2*QMs31A1lwKgz%kg$~r`h+cvZZ2?A7Dg9`c?W9|_bD>WT_O+jdN6@QSOX<@8LE;J zyPYj=TrwNU;Nu^Exv@3SHqnTo_?mNz(SSSwY))m+a6@q$f`(>2gx#n=kH6f!#OROu zd&uDqylV;S^5V4sWj2lPRAkc~#T(G{0s|_V2HO_d>ZalpT3r!tl{i(h!%O+&%pFDt zA|^=+wHGRT?kUd1agW$O+4Mm1;Yd>qlHBys;;YcqWG={rVNjpNzPdOUsRt;d-YGUv ze{q(I84+?IqUiH~ypr;Bew-*fvk;cKj( zq$V7R?$?Z?sF}y-ULnf4B$4S!HA2?8CY_{l(_vDc)F>t++-Sfe;Chn~Egf7lST2$} z8?M-GjeEjfPKp`A0vtdT4k&f_2I-x5AxO8XD(w{r&;}=*M;OFR&sc=OgdZ=YYe>*@z;w7)}a( zbCt6#d^;H6gwQkpX`@FT!FDW#K{(XtP2DPB`VGF_g5Z&{(}@Zq?LAJ zUta~ig-<006pk-*K!D~)PUIj8IqOR>jfc4Bp@Rn>Lvqif)4LCe5kO_R)}$M= zA+MPwt=|V>qRAdPJ-lx$h{-@|DhJSsw?vLHU1u+`;kONyIwZly@GC4oS&9^p#rFGQ#> zNZ|nnA2X!*9!Iet|HHs$?s{@+Se9${A8xc99&O5|%WoZreOJN3q;miw`OR=!ANA{X!7dzUD_$W2> znLbUfj4WtbMs6~Fw+0Q928OsmMm!Qz~l-jHiD+5kaE3613lTWta5xXD35K!tx z|B^5&AS)>^flz2PYD3N2j-@kx zaq;rRwjueJZLcU!qV4u1xLQ7Vl4biB7hi(*o1*RIc4LBS#RvWI7VI6S8i`vDrL|A1 U2$j^TJLoQx7^0nCw>w+-{}oF*DF6Tf literal 0 HcmV?d00001 diff --git a/docs/pythonEC/doctrees/index.doctree b/docs/pythonEC/doctrees/index.doctree new file mode 100644 index 0000000000000000000000000000000000000000..600555a12a287a01b9b57debf0bc81e03ae60f56 GIT binary patch literal 13375 zcmeHOdutrm6_+1+tu0x0;^5}RWnD+BWVI_hcG{@4iE7KWV_O!M-82qnJUerDXQbJg z$vh-&pwL26Y`FiFOerb!6NEs&K>sTA3xpN|{RE}ZmQX0Ph5pXHGqbas-L=&G(NYw! zvvcpc=brbu_uS(jjsNW8#WDGxm^D2kj%+8axt__xL`=~@o4Zl+QL^ykP5jR<*Qwn{{c6S%wi)~{ojPxL40Dc>DZ^aHf{~9}garx3Wme0S=E-YQR zR4cvX#mvxMrklDSvBSmQdl-2}6mb5KnRdX9$lm6)#F~7Nfa9Zg`7X)l|CP-T6H>gG@O5Z8 zN~~jHLl-4IiUJ$rgO!j~9I?$176A1-dKh95zsFs@t$t0JvETh@=*CvSn-XSvx9u#4 zzE0LOdr%WLy`sJpgaf1j+ZYh-n@ zM9;1Joyfxfus*D-b#>b{`ED%;qeL8aY?miD#^VMVZ%DfJ+}zr&achFqOqM+!*^$Ew zQ(~!T*JRe%+>Scz4i7yCegtb>mz%S7u&lSh4Vlc`f3Q#~p#oVfS?8=H)=O(vWi4*# z)-#a4bp}|U#h>T#=SBQs50cpWx>W=CD+=72w<(}8&UcjJyVh%b=nX59}954v~@B$Mq<`T%ST$ycdVDB#gOw!3zC*|)b*ks z-uycp!5J38l#{VnYp=cXEe3xMw($E>ZK-q{F*MkLb5z-~-&tUMRb2J1;P zLQSrzt!~iL%Z`nihZM}QBWrNRcR3OhH$R_IeNE%1w4&oU3 z-Y&%yJ4znQ`5irQ@pBqh*Y?a9p?eJ}rn4+F6n>Mt;!x4z$O7l~BnBB&gVZ%KOCoxnXGpxPK}p z;f;YUQ~8>hRHC|^{3OxEaoaVV*yMC{qkL}tS{GAoeb?lEWF10cNhUXi2idf%9I+(1TvP!H(Tjc|`z>~}-=Ye6i#WT;)UIb4I9*Hm z62Xm*1!`>}b7tYPyS9+E7eLn$R+v4Tu>eXq-5*%#-?c-)Pqw|1jKp z`vpYDhtCUyoRLZ3SR;0Dn${c}$ER+d%Lqdnq!|q%r(Kv9Czj!E%Mam+(xD&rMRMy4 zNqCsCm41(j3EPz?w8BmbbPk4=A}d7Ke+_4^VtpWvgprBBgukK5=9dH0Kfv+O22@l!LfD4}rAOAs1>(t;v!L=d?Ekk>`x9`MoC)}^PXhjP z>LmpHClU^n_?Jw|nO0id9jKMfd9c;1nC{R&Gc4U9=p;kc)yZa^kPJPv?oln9RbUy# z>@dx|zHiOsEEF~KtaW;G;Rz%Zl|%67_5h#h2k-;*GpoENVsEUSdLp$V41^SsoewSnxp^PQ+l16u8M3SJ` zJR(V)T;0{F2*M~?+zSg=(E&4gve^73L^IqXigA&IihQy8i+qL5T`$U%F_RuZwT9yv zTZOFfgW&ksSTpe2Os3nKArJn{#t8<+o|j97Dc|;S%@{DVdbxOwPCXNa-@;J?NB3KLWLS4`Jl!+Be1Gh%?w_>Pt2TGcD>$jSwyaE~ zqU;|p$oxqcv@UlkE-x&V&NgDy17+n5XX{k>R6dXzE@3QW-)3C~Dhs8vA&*e019>P6 zWb}n4)Dfr7UO4RaOphEOlG>l}o&K<@VZL?cT=ff?8h#?)AWK>p|xR?^Nyv zF<&S(sj@T>vz!@%`Rcv<_um1vvKZ1JBbSmXrE9>jm*S<&8 ztX;o+V{PTu>V|fAb>nV%VWH# zNqQ0-o&4-w=_*wwO|#0RZB!Ylxr#Emd0zf~0Y!HR62DVn5d?Tf>CS85cj!KleoMa8 z?~D0nsT)(p9LI9**eIu?(nsIY^Qmy2W4T2T*Ud`#{L)f+As@aEXKr1wgG#wpmq^0G z=c zG$Z$9k|3ssjN2Ft*p=0*EB9{RWr$mEU%#??hb=O@Nlyn538R`6g)H4gaTiFdatlc? z&^nH{6XpoA{c;|^ODPbGZeN@naGQewwhf)wl>wNh-<}l^OEo*hx zhorKgGml7wz^l*qpfd675wsEV!^k#Nu%aDuykW^~lRsZ6!pUF?u*$qdK3|mm0+f~;169^EGo`~Za1c`)$ukL% zw0YYLI@!LZv+}tJFGz+?wa$S14jwg_*37Zzf^e7 z=48CiK1c~UoD8bjj0}_6N9M~ps0w4*G{vRtH-(z?qg?8W4w?6IxJ^>5l<~|HUFD6y zq1eOKSu(6QX{1hbH6z@{lwz)+N_b@@#|U+Kag=GMh#pmy*y(OikJ`RMep|xBq`M2r zM6N*>9L66Zofkrcg06I7WeiXrt}4j%BtwT0^86Ma@m%Ix&=(?wC(Be6GgbBZw)(ET z+w)mlzqYcmS}o(0Vy?KR3_axs#Mr);BM8~9bTeA4;v?iCDXoxkRx^6i*&3bNu;Hqz zgX7QyMHzIwC5i`~Spkwk$TJY^!Ho!B;#ZT>LyCIH)l3+NsCtpm_UXI06D>^-&=i!R zej}ibQj$}-?orM`>=y9l!lE%?NCiea5NCLg!mo6LRB#0OCE=-y^4!7gBfTvh+&<(n z=HT`rLlRwF4{jemuca54gWE@TC$E$bZXY;79Naz*ZXXA?58PP}ZXXA?kAvICzHcAu zeFcvtpW?9u&6GnsvBWs;s^}(rEM_gcWjXj0X>>hA%cWJe&Wb5VcUv*N=Ab=Ob{gVw zo9Y>^7bcJJCbRhoy~n6tE>?J+>3M)UnO^7@=^-kXk3-!@sBDtaV^lUd;votzKBHd7 z(#B0|UObIUup2tE1yI9lO$$W;)k``ad3b$MEg&h3>!8kkQ~+oasY)& z9=N(g2v5in=@6v-?vNky*z`0sNdsyQ4Q^h@wc<)?B}Sxe+o7#Y_Jn5Iq26#f7)hy| z%v=R-cO1;180<>F1JaokOv_rh#542@o1hh&O3%;@VKbrfW9Y%kKW`ioC&}MnIc^Jv z=-6$v-rbs;7qdS1wfx_&q9wL-c z(Q}H%J?<}_Te=8&&IV1R4?T@^(Z7_Tr_N(C*)5J55RaO24+Mm|6}25~HXZ2=c#%o1 zYE$9`*koJFoik@ZQ|;-G#a!6Iv+%B#$71Rp_sH~*hj?I&wD)3oH3F}AN`9+P4F!K) zb;pSp3=6&21{AOS4N{gO#Okfsr#KJlWZPRB2GApi5!-FT2GY22iULI#H~PTv#My!_ zJXxexZS7kLOa}dQCOo(U3M|g4H}bRrs4WduXxq_ngrm4CsXm!@zr$9Yriy8g#S!jq zC+Lk6Q!1LkePM|xf1>RdcBdLBWh?(l5CzMMWaB%m3=RJHX9K5tM(Nd34g;Hf29O`r zy-fdp0N|4Zp?+7a-&>z;{v3Y3nb2RqrN4fIzhW^>Q1s|ZYXJ9Hf<2ZXk7Mf$4Sr_5 zBG*};%SEy?Ar>=5{mLC@aKBec2vn#Ma^nn6u~r-*+moZPhjznra+8sRPxlSx(9=B_ zr?XwnWXX4Rs9&`~m}wLgs_};6k*HzVBU0pph ztKE%%KaYC5>wNFktN(lTs_L$h*By1#yrb~Ha7DRM>UOHN_ISNf_S?O15x$u6>z&?f zdmHcW9qJXr#jWE0cB9)W`MvNcNKvX(Yvq<-@7>a?92G8X`=w5`QIFDBMk=F~g}3wy z@NuM5?bLkbQ@HV_YJKlsZ+pGtx0=2`n+ z*zu#Qhmk@BkXRvY?p z@{al4JwTMj(6Qp)RY#cOcO*n63T6_J}!mxzRm35V~ zKn@m=WI<_Te568`nl1mfZWY+;aY^#d8^7SJOT0FS%RYF1()ig+p4zGSt^FVzo4u{g zgIg!N_0rZ`xj0@w4)Oux5=Cd6<}NE`ga<){3NjjUeR`Xwoa=0uinhJsaB(n!o9{dd0XPy`0Fz%R%k;>~N%7FZ&0Q zq)OFU3MoK>o5gCY7p~+IbBPzp-wOU_j9yemcXfJ2?8WRc_ zNiV{)M33BBg%ER1zC2Qm{VZMa$Hyh&z{h#=aCUMBaizo&V>eW)ZI2Hl51u*=ukCld zZqozw;DPdiAy=$jQu6oiltldVUocpXCW?0V$Qh&{Sm~F>q(F~x zMKQ)(4HJK)C@Np9JmM2#i?SLdCz-WK01wC9Vj8%jZSyNX0$KeDLc%XrzJo&B1?u*t z1Hn=OMFBH)cx_@pdIdIdd(}E=5ku1xxiX{>CYu7xLo4vh zM7s*M5tACJOv7Un%>;Fty-xZJTwqUvcts(rfOSx zI{+qvQQ^04_Yv_0=kOB7qPoL(%(^MZB)BjMF)}aX5(PE}TIq@$Zq73@h#&bfNqH$p zZXJiNGPm;azY58Xj3`m4nEKd*glKB$6X8I2}?HaomBk z2?*5-mms+sdjdd5x|JA?^49h8ft8R{)@ZnBvRkV$^7FPXnE-2I0<8N^yBE9-8p2+` z1nf%oL8ip6oP}Lk7s(MwR{`GuaJLOTGwwKENt`& zc{G1QS9()EIBv^;t#%Dm&Un0KJq>I{{ilHtY=HidpmS|k2sSJ4!qXFNP1He9E9j~j zS0$Um$58Tv98Euk5-ig}a5sDpUI;(hDAK1dF$MfMDk}WI8~_&ys-m-=qm5(Ogo_?~ zkP^RDtAZzA!5s4i{ZrjjI(dGFbXgln@&v!dH>|SYK6uo{A+=P^+c((c7p~71GVTdS z7p68j%5Eek#~(mn_~aP85gvm#!H+IBZ-qDc=ELBAED7i8;O+RcWvDb_40#*SR>}1uil_z4OQ0PwhKEl2$15|zzSs64^ z2JphC`(V%3+r)|JZ%Obc=OK7-Ii_A4T!R1b)gH7Cqo(g^ln*A_r-;lL7ZPv_2sb0F z{7n@*n$&mz*2-1p#TL*gXD->d>+|#tCiJzzZpdLnOE_e9Ng8Begy3SH^a56DuoeD^ z4X5A=SVP9zHDXXh$~FFdc>3}YKxnI z!FNL|GZ@SS;RV_0NDz>!xdcI)Sq5AgoR4&rnt||?J=42TX-nO0&Qu6q3uS}7#`JdF ziW3WVJk3I|MmPsPidmR}to~hZjPa!#SiY+_=y3(TMjvMj*O8WqkM#FyNFj`|c9dvk zy+Q9N9hQXkforz|UdG;G9C&AnnF%_|&^DP|1K{o52qjy4+e3ue+Xsi)(4c|Q^+{&n z!}`h%S{+m`6qJaj3i?})@W)UxQ*}-u4}|`9R8;ss<_d&nT;X8$zZ75;_J@)s1%k(s z7&D5JPfQh-6mTqCp8&_-fZj|uI9_lxf#YTHWCX{NJQF6i4hU2)!jnYY04?(mLQ1=P zGNACOIuuS&lLl30Iyu;mgN~&Sw%7!Z&WO1;kbuIgMK6KEPXc1c(NQL1iU6%ZOVQtw zkQ?(5l91Awn8^_-Ju$&Rm;_o!!&1V9ty4r68re*8JLxi1A85njd z?HU+;n%!SP ziA4aW9!74lC8!LzxrWP0MZR;zdrdC#_4phE`YxBW84aBbgR1y>A7 zgF{Lw2gl`UH$yC|Ax8$XA;g0Hb3`n^0~Udik!et9?pC`C_MALjBbJO~CP$VmV=TFu zQJ><#k_dK$rBXr?u5oVHAgEPTt0GlYbxEXpC1VoqE4f)|B)(NA?uwyftm8_aTVi?A zb&A@t;0DNMid#tWFpuzR^K|SMrIGQDSwS)r_$wHJ(mR97UaUEw^yCtFYwhG}CiO+S z+MY>3bee3;nF^fhMprY3=*Sfg(R=vBAUb=RX^4I!;EUaCbKa00!#f5>=(1S@1N5XT z0&$hBz>LqAYiVdgh`p7{{~_S4I>>by&xOt(jEV~1KS$8HB<2voxuiqz*GQ5X1?Q(4 z!MW0JGd7ofNMQ4KLZ7A^n|~GR;Mn|I@MOg1F^MK z<<=pAMbZ<~sV@k(RmGZ%2{BJ50luRFyrGfS#mH&6x_p zwa`?XcNo8O=vl`n20hu+OheCSL%0K|9Pl`Bm1wxlM&e`)dGQg!*+ytufZr*@I5gKMC>l)nG3#GLZP8>x0vaa zgmP$nThvbTp*}Q{uy5TQR#iake~`pZLjG*fxdiM}K1L&tw)#qv^=Em=N|5t$%;X3; zpRTJ}ufRiYH+o}ZxThThAHAZGkvt*OY)7*=NoE29ts4+PIf&|AILf8lNxp&+OuaUk z2me55_?mFOS5Vxe_*$KM6MkRV=uA!br<#>wyV_P8Pw~IP9Hu`d*G`F5U>yv>I^Nh5 zS!2_a4uISP56V^%JF2E6VvoqBp1#D0s8MT8IO6^DU9VNQStlsECcLm;>5*iNmudy( z%$E2s=$CDpRovJ(NE~g6A3KyPYzfi}B&OCTQ=|!LZgZwWa2{~aW=r6AZcCK$iP;kD zX{K$7-5>))c3=+}*(@3~u;oxNTMii6T%-duCLn&@XfME_fQ&3;CPaTr(LV%?R0o}| z7IJy54@O1Jjp)pXI0mC&xkg+@zu?Ud<=e75HGnMybRkuqafZ8(oNhoV|Us51a|L0Z>Af& ze+25_*!|=1WW?^#SS84A2q(Fa8)J=1Z zu2@VseNOC+AhAMu2MT{$Qg~Y7`D1ygN$~tp%;X5qFO3x5Q}+a3a@JuuJuw$~;v5GA zsD)q@>WdWQGPOXGAN{2imS5!Ro&eoHgG>g{jjz=iEe^ValhtM#;*kZ~Hhhr{Uc6AR z#46ncyhkCEO;Ha5yq7}RD)2_Vk_6rn$u!`dpqK%88`7MRDh`rP@oZL@%8>QrjH`Bf ziU1LmZ%1VPj}D~@WKBbw6qmKhgse%^nllxGQ-D1-WR2fBvVJN)F=Wl2W*S*P6BM2j zfI53(xb=2T6V6t9zNv|EfkvFYBHkDtNg97GheZXLtm-IT+;gGsa#YmZh~$ow$^gDc zIjIe(Tm6ELpVbh_%Z{>}XuvtF#g2CBR5))F%{%ZUiGO!QF%#4d=QWbf^*b@k+TafO z2jVICDg&;{m0V&1j1QFZn=Q{3uV|1 z5R;uG$`*B+HQ!^x%ucA_LGfy+Jjr<9`Q@nilxQnWm+A5fjx-juGbaM(W3nl1gilDu zOskFX?L0&!8{r=?lcSArDwmD|kC0~;0e`fZ0U7G?XSv!efdBu2Oa|bOuhmJ_4*W-X z(@Ci5Pbs4#i}d2GGD;ACK4h9Mh`$iZRv|v>mL$ZFh@(OL1Z@lu-;idx&R_il}9%a%I*MHriRDtVh7?RGhHkoieDOGc(La-KiV#D?Lo#Xl)_{4BM zdzxune-;t2ZrO(f?!FoNG~KxS?NA5D-4DW(5qHN%F2Qa? zu)Ee@-NjD)0n~j9vdRT@E3xcQvKJ4)T@$7!kV5@_Q5#K1x_VLVJi^U$BG_FcHQPS| zl>IYF*=dE%pUOj1g3X`6OpdU5T%BaRg5*3$(A#Jk*Mg#TB_|xz`b#M!-_F$`fvLX< znGBd3U#s((9aF1u+oF-`E7`=K#fr7cF=6Y+SrgNf4&pxlcPLxM)~G*{*g7JThOHAc zGGJ>%n$=Pei1U&_@$^=(%KYYkpFtg*ssz49;dR8gyRAPyAQkIiL=w_Fu<;OeLY?5WQV!0lWE zofHU{Jzc<$xR0)wGTdfzUA@%u@p{T)EmB{-@JT!)-9+V@Pdi$hu4&}>p`ZJgf*XL4 zIQ`)zPj48;_DNH3bKJsY$}T3k=q@$OO-lSq$L&`+lant#il$#i&S2L zuE!|HtR}dAx6;|xyOrSjIFj)A^-B3OO4_y~N|~$@1Aq%!n1kK36p>lg-Gz`d+IZEH z(uoLz^w{{2*DXNz0_S3y4K3R4d8fFkN~Bn8e9!UB|_4~ zMN0QWR#+&zPOUBF;-rV|wLHUAi?}(7*OER<>vp=hEa@##ec^j&>Ew_iaNHqKQfW$e zB*}{4Q&=kRpmzJkai(riFU%ih zQ@@_k)Eu{9n48+~H4hg$+;L>=cy!ItH=%207mRFa!UgjN$mHOH$%`D=x#g8nev<<> zjJsfdiBI_FOZYtv>=~^Par7*)9Pq;%A(fL8rN3*7ej|#Hka4uzIdr(TXk-aAj;lzz zFg@ z0-KFhPBB&)b)6~Wg1%2VI*wY5o9J&jaIUZqoIJ;?UlJ8HoUMMw{<>efYs_wZ8F7eD zd_y?RQj#LrDWn}>*o;k6;W}%-6}5)*f;h}l_8~EWT>yQWt_h5(L_UX=p&CAi)!->K z!;CNzYFb0c8Ys%#N6Z0LbP@8&WdUKm4BG+0@l2E@VC0fi4#kdW0jUIi4l~%Kb9k5m z!EciUPp&HzvbWSmr>>K7qicsGMGxnpC@m?x1T#4S|M{{$%rpF zw^l`kc@Nt$w?bVEK`qD^Ll8CsGuh=rpMkd-OlQLJ$?S9_97yS0!jW$KqsWPc2^#RE z-|U&hu{)%e&6x_px1g!Ez#sh17bcEc3Qufdf<5)=*XO`(AlE*>bo03|d0g$+;0DBP zCA@dc9{OX_Ga2CZ`XPAN__5crkpqL+GmRZdkirCJSH@D&B%63I2kwc$a*I`%2i&fS ziV8nEm)uoJ4h{|EN|FQ{kUG5JnFG1T`8CmhGuV}VNr2r|(6=K1b}xr&IM}@gp3=Z> zLf8rPx(+M_{1Vduiyedfa{<36>EM?uFpq068j#jBa&Ybvb&_V!hP!ir-~p+3NK#K1 z>@DS?ETOAgF_R;@TBlB2X>iw`NXkJcPiq;Hxean;ARa<8*egdQa|=s4RG9nfJY6G3 zyaF>hGGdv^2tNLj@K#SG3IA^A)(rw{_1aZ9i+U~zXCHvK8O&uu@V4x9Bm_v&TtZ++ zrUK5=KqmcV&m`b1DOz);Lhu1-stwNKcMfO2g-;C5vZtAbv$sU(tbBE`Qye;)efm(* zENkPyVD=0!B}IaH%Ye=^ z4P;MqkgJp{GG$j1IQGZD_alH~PXG#X9P7bT8jg)iI>A`SA*G;L;ut`y|05(DT-#02w?NhYL8#PmrH?0*FbP?do`7IJN0{eDzb__uQn z?#GBc#2bwyRf7LO0?jn!-=K8WCTA(IUv?(}`@aL-I|8tO*>ZyY$H7w?>^Dh0LH`^> zO@aT!J%FX(6_TC_|4-53ze0M=1jtGzhwD1%a{6$M%@;7mt^4mW6oKX^ik?rSDR536 z;u5MH$4ri>@@%sy&@8swXbCi0X@)@lj8bH-%+o7|imrehj%EP%-Vqg@#gY!y44BB% zHDcIvFq0$0`ZNPN&h1jsa^5fwgBgGtF=+-g;ce0kz_+R@aWMm47R#ff1y%j19SdFv z*#46N6Eg$Y(@dKI&jB#2oY=C_D+~d%%UJ8D4WTpy=CX+cga6ck0$E42S4`V2 z6T8V#1SrPd%DLd%z-V>M>mr{E?SC^WD*VV?L;I4dLj?MgAiP%#>oZSMdz<#(( zEP))3tDtk~!!f4z8JeH#Lk&p0Ty%OGf&3YH=t{V7J!W#mg|_=jg$}kQkrHr0o|ZDK zavtPx1n1ZzN33!xOFC3=etn*<5fgS}CPyZy2hF6!TdPSV(JRg^Q&DVQc&^s1y6RD* zB{6y#-e$0svA>v|jy%>#%UtYtf~qup&8Rb`vk0n^S~X`X1h+#|ZNVt`okP{P;1h$Y z>}jT4fueB(!_!ShC_!B>(s5RBkA(3=#pq%ap&FyV<&gX!ur+-q z%mqx}9~Cvnb}dERGK8DxCx0Owr|}@gFGLjN_)oVItOqH62C~}Jn0`QmI7spDoHL~y zq$pV)d;;4()6mq#U%f z%ZWLF0{;y8}-TGT{l6dCY35@{z{w5iA`h-e1Bk0*|{WcI2wT9zn270hSFpM zKlKh#p~lr$lB`e2Lso))$6+Q%*mtEm-*^Sn)WnWiG=1=fbbHa}T%8mLE*2mYbHnOa zhT>~=>Wlkf_1lZgd-nuK%pWChw#BN_6?;tN=@7=tS+moV4uFw^tDtNZ8>8Y$V&jN# z+98Yyx)}~(G^9DnCKr-a@tjs{%#isR8FW&d$SCfP$b7X!sUl3AMz|Sd$GIz<$fSqO znF_&;P`?eC<9CkC@5LvE%-PdSBlG8i{!{lX3>~2lW!Gb@kpm<3b4}9CZ%~nYD+@gK zkng23BKDPucJbqI67S`JxgXfCsxe(*zhUAmq^&`;} z-|S@LH^HTa8VHR zls7geWP`u3VR^L}ii1qkkx$6gDS_pWg-ix4kFV89(2nKx4T}Q9>o28fZ;lmh6>Y-q z8(A~clMaI2$DwQ$yQ7LpV)uwp8g@_6$$;GrX^vF|g7_~<6wi^3*;i%IN5SkU#*Uc% zw=8Ko)v;88*=gjG!m>7*yy>J_&6x_pHt3ZNv*UM;*<1L;FgtsiY0Q30q~B)o)=s^R z10(jc?1-HcBJ(CvwwDOQ!%5rm+Xm%pIZ9pzJXdv;E~mMG`n^$6b8LfWoQDRSu3kK< zBXspkA-D_fBa5!UPPu>dfrw5<7#>0qiF1|31*C6d0OHAZ}L!=;Qp^LlOx=phU-OFt>!Z5BoTo?^UENU0nOuU zb*8kVdAd(j;CT8|3hNoM;;h0-2!1_lT6)q!5c~!xTSf4wFp>y9B94aO6SOfPcte`h z>qX;f)8`;QlGg@{Gbp2Aca%v-?EW7tX*#_X>`udwM9tb{!tSIa&6(gZQs|WpyW@9` z-S^=W!|v>9rm=ewbd>^SXYbz4WIG4O@LQCQ%TU{7MFk6vKPH)+u)OJ$R4ey@9H?4K zm)%@={;sH~Iks)gM7wFm^ly%6l^fH)0kY=A^bgu=vtfE(Te@}Q4I)tH^MZLh{;QpY_eYNlB;T`)O1m}}c!lT0A2|EQ$-v|{~V zvkFm{ss=7kWCb0epWHMlVe67x+17iJCV#QenmazUwtZC^<2f_Lt zl&xZYR3b^N9}!2x`U%4KUI*dL6Fnq!+aV_Y;~`Xd?{-;3yI z1pPJ`{wm0tJDw(V&XnR>my8cyhHal|7;fH_s&m3wZ^3<2;Lrmw!_91WrPQV02Vj#7t^b zj4)Ko?MZ3*MXnAB4E{67WWeC~TAlFh7+l?7DW+2Om2BeTWA$RK0!>(a6f)V=^&nV$ zDU_{Zaa1QsEFKX_!{P}V8L+q^&FUoQC4u7Ut>Bi$xEt7N)>pV z#w#f)Ym*6&lO8o^Dg>thUu<|BzjHkPRD5E1oIMQ#k302%u1!tp0pFr5u65RL<9%d4 zwD>tu2X2Wvz@GZ-09=AS(EUV}kA>$>HOk$Ze~FiVD9g@bvstb0-C69EDmNT#`n~WR zo@p#cp33O1POpgeZ9~ny`zntHQ$Rp0 z?aOe)+AD{#FD%~6argTt<5`98i1!lo>=M0yh002L5pK#uLcRwePZp_T4gtMX-EP$# zl@%~0KQGELD+xRw2s;+h@qgScnj-Ua}ZiAFPOj4Gmo)$v$TUEc^LrAwCE@Tg{iK_;$ zP^y7+z&V3C6z;}ec!|577?0PfJ709&n|&Ga$?@U}_2QJj*}J`1>-ywoITt!hR0^-T z`O>oAE={zndnfk!2VoqRw|bYJ*6|N?wt5>_|2JHI`DRb1YB!4|UrFP^ciXz>P4Sl$&A+jVW^gqWm5B4Z5MF{{ei*XCn&5S6ZSy=@0fr}8%jfO2Ji}DWwQ4Qt zP(xn}JcnocU{qgt|16v746?>O93_>GXXOxHqIj-6k~iOtXk}7Pg`ffL3Yzevqq7No zeJlK+%Xts{3lbs89z2Gn@{VbzzpaXo4QhSzgKXkYGn#lew&jKJV`*Z?)!r$Q z6(j1+5NjF@)&=;Teb8fE<$Ewb`5&?i3yuSNfJ21Prw%5p2UHvdS#5TmJ_5vQ_-}z^ zFgUe_k1z#fXu0wMPtxbfWZT#E-}V#wZ@bHoPjMld<*?t&7tfpEAgR4Y49YEX&fUW2 z+${;I4o6!4WVPH2eg=#%y9Z?}6YjzHK_&;l>+>ZlblwTf>jpBa6>wrP5jW6j1 z@5Et*pKwT@g~``NO9pD(qH$EdJc7L%|?;Wn(86bNqcQK|^?hdI# z*pQpRiklmA8&C{cs^V9|qAI$ClfQ%8vAf*8xdR`W2U)RMcE-n<$FU)vLtTdPR!1vi zirX!1ujSv?t+vE5)nrR}jk;I#CaZp}%!CkRaLA=faXTdXU{?AD+&_0XD%oSW=4gm# zv%Dq`3Ca8XO3Y;SKEv70d%88)Ycx@-wmZGo_DFwlgQZY8DqM_TCaN8OD)Cv7$a4@3 zc!lf1imC5q4r}(JF)2*w$aEg_c5d6f`?_6!z*V2_zqUe$_va#k@K-+snGF7Fe67L| zsatu+^^;phdADes(4SJkKNTxZ*QYT7CqMNkSkuyzsyZ|035LOEplsDo&8D|Ep(9bu zYwT9|h&Y;`IzbzQpW2Yd%VfBuS3G+<#z7Uo88qj6GKhs!^`9aa)&}olNrMOBA6=Ri zalH^UC1}D_feqTA9!rgS2Ru2dE;R}5C@(9L4)Le#?SEmrQZa8_I)vcpx_?6MlUX18j-;3V66!Vyb3Z|=l#Vnw{tI8FZebz*fh0sj|_q_Cg&Ykr1Cqx zU=5U=Ew~YJG{H^K#vr(cG^eYAo62~}&Ug{o!u$UiFYUC|gx6YR@D7xLwZZ)krHYjt zoCq>-M{qG{2PidkC{?WF(A-DU`XKJEn}GXqP-Blyil*(W zpgBC|3yN3LR*`fM!|%{CTpLJI1TR3UaA(hq7}??J@qQZnuGnR$)$fh>BVmT`g?>$k z8P326x5QruRd7rEkKrjydci`Ya8rzq>wv=k3X7hY1uU#0-&~?}JpEFrsl>_Q4@hDs<8@#c9ZW+yY%2s;DDT3BPxm|BR=rK6 zj{cSu{YV~)lCkt5%w#o| zfItP;hteiete&cMN06leaTRNDp<4xo=$p zc`VEMR2GN#*$JiIdM?mG^%K}Z1qSWh9OVdSns9o6&KF_)pgIQ?Le67jh>sW_0gieZ*h3i%fjuHaYB-N*j11?&l7rpwGhjH+`P;8* z-_B&nC5(qR6M}a(rh6`UnsF*u_etap)!9VFh!CAxqg3ov8}-X}HR=$t(=Ju~sUrJh zmc{M76{RQDw`z^sc@t!HjN5syy%t;C4zDF0G3Y)85x4XDsJ`%?8Jxrv;o@EmEjH(g znX*Y1fI~{DLH=jdcasLm6^A3q7(9Zd@=j=RIEzT|sjc#bY|@u9nv^5%#@r;$4aE$| z&0hKtWKAvTF)ecCEJrZT53K#N2IG7Wvf8We8*MC=j&0D3P26!1&Ht#YeBxC6ltlB2DoCe5if%VkUYQ0vi`^sW!xB#^*RmlI4 zJv+%-XSf{J`-|OLXM#s7X+etNu>$f;w5N)#&TZXB2f+DZbOD@*rLGNlt6a5{g^f#< zDZ<3WQU0AeNq@?2KFQimt1O-aSwWGzFz_mSEj9*5o&f?%eEz))~|+44o>TAqe_by!=`XQ^_{@K z0RNS__fdEArpD~KZ%)a-u~d7;3cKmScI(E1Ao?A zjnG|DFWZm)i)8}^B80@1g7C@)t;5Ay05^rU&V^EN+-ql{0D1V zdQw%z<_%2yPbgbm+hDUO3rrK!CksrAh@-{#BxqxZ?=hrVt;Y9o4olv})2HJk^eST{ z+M^lNK?op%dH#_lwF~66WXw=3ng=nwXb-R;%0i=wZkS?iV`L9{ldZ-Ss911udi#9{ z?k$$RG94cu6I&Fg79kxOShc1?)yX2@g^Uwl18@!cLP&fYW;o^~Z`T6$7de1qkX zI7fKeA3*YK@#VzPV@MYuU&;L8e@H33rJsHZ7R1&wW3}2W%X&Yx?t@HLD5eJg<)m&i z2X#RYYsw)Zc?RWRNOM-65hfWHFDYAXY|NlrPPK74x2HjAFxPCF(qQ3?A}MYTz@-dS zfx<-B4i6!loOX=}i(Pj5UAg2%D2lhUekbBs@pT?W!FoxF|6C1KNMe)^s`YBk@zoPhlKec{$8maq(9Mp{~iiG4D6onzp zX^Nsq9aVCy#7oFl5O2+(S3*Hpi=-goZ#bY85e|fCJHuS0AmNWYlq#Y>$iN``XpkV` zZEye-+#^+;g54-BdvtJYbxg(rUyyCl3qx1jy~%6Sm)9aYnR=BS^6Sw;gH|lw=gBLe zbv12uVwu`u(ti3&DRNIfQCF$DV#^iYdudcu`0ClP{-n{&7`wYTA|xY|zQz#$s4XwV zK7zVX-bm^`Ga`v;S<334z8P))mM^c59Fa4d9vPynQWdcWp7$2`K|7!EQ0Iwwys|< z`6ve9AV8crg~tPxD$w*2w%kLE-){P)>f}Lm%p%<1)9BXA#nwSMOsKQp_v?`db~89y z%YM^`ABdG|`Fp|9z>b$eZ-UqEfM;Qf(<0_oQ1qj)6_huUP#?-gC}A%A1!OY#74UT) zbAfF`r7@D+ula>oaaOq}bK$eBY3WH-b0M!=?2AygYA&EY;vv7PTPz}uX89&)V*s>< zG`G)?KuH0KH_8fVneXj=87#p0M4(w5agJ#AEe@p$G)qc`6sEPwba)}n1LjPH;CoQN zEtVO-^HAVp*1{8uWoA!(h5~c+I?(Zd{EVv00iIq9{>Q$-_<6a&IIn78B)(Ri#YOWg ztQSTMzBuY6=@aFR6y~#lHL8Zv^+Yc8y&)QBFJq?$*9&pj-U{ zh@1YDN(hd&f6&ML8lf1^Rgz zU+$-mg8Wv8`Kq`uQT@peQW(D1PiwvdnQZf!O7Y+ekT}PB=Jz3ae-sB3L=apvMtg1r;iqz)ooapy8iu8WEaUo~6I5~7!JO+b%+(U$~b-!$Vy z{{~o4zL111N7Tq|pxg>sZK}h#9`GFJOeqG6WM6PIw*AN(C>pAdIA>lCkgZLaC|5)4 zr^Q5hDHP02l-IzMYNBX^i;R@xjRRM^35W;{sQMbDxr><+yMdDwLEJvcN+u=sO`>Mf z`ML@)878xHcmomNAc;7wrpce@AsLw_@5M}xrpc2cwyG}v=;g3O{QfRie}ob82xKxC zA^2LIQ3ffdA#OF^nA!j4GwL5ETWQl(K#3dq`ILaL-M&6x_pPoaLB(;UBZqeh@;gP3*3lXe?)e zWXIPW%Ni!Ouk#}~2|Le{D<-Z3HmZtKS6|kx>f0f!<5u-&+H0|GRp+&&%eIb7#a8ug zQGMavGinv=W?`)y)gFm`>b0ojCIcYXK6OdMU>}ysJE28_iH&u_<&xND_q>e8p_$P~FhDGLKJ?pc{>io_%)d(^)9}ncx(NcC!fn5=;}iZ#`+D&i zF_Jhpl13<)BbUP$C!6l{w0*tfxa8CxN8M%J!TT*y6JZGny2FzbJ9ra?DtmSR1qSE7a1;Qh!~u`4IiHhA!V4>D;4VyEXKF_!U0!PaJodbS%Z%?xN8e`_g3Lz zJXuGlxCi$9%JWL#5yI5)GPBK|-zkxQaxOJlj)}#{2%tWVJAF);d?&L<*reo4@7CT^ z^3aH!B`0Afo3kYU*4`&(ZtWFNMcegc-PwC_uC58c$&(g8lSW^4O-b*<`^a|H7n9M>A#48bM;`!ry$3X}0^dXG*;AV{T z?i88QH10ts$SJ08`co>amq4pj2}|cQc11|*3t3kH(dz5w_*yM1Lt1$sN!O2xKV{S2 z94pS=G_gPLk66>vld94hlpt}Ah3`Lx`M&LmF)+b zj=RYaG0UxF7iTXRV+K5fKUQ08(Kz0>Y;V_E1^^8un=g$x#?%CE1(-fb@%t4pNL2&+ zV9d3A{EMilus7q5A_L`&OUV6Q2_d^0jGh7nJn~DeC#1}_VP z@!eM^18KO)eFi**!lxCQ1d^6qZDNmlO_=CJbTFTOE~E~;r|*V)E@EfJD7$V>m(igx zSiB})wJh`l9_?SQ;yD~N!MP=t!)OepAeR|2dxr=Rb(8205H-0BJDe*VuFOL*GAgdX zOg5uJ;djBzVYUGZ_CDe6DL!8l&)qXfzF(iG7Yu~%h8#Bc2}-$`XR)`A^X#Q8Y4#>K z?WzWWGqrcJYt1}eBL+=jCPxO*t}WJ#SSThsV8xw$vPCa;ZjlNcbDB9(y()M_{l%Av zRa56~c$>jW#_|_rrz4gl{c^F~j_m}rqTx)?%AQGF&_IgSoT(7J7Mf~XpT+NddGo{g z#MWooQ=iM5+)*}!O-=G?mRwK-14c^QQ%x{t61_(DbNN6=<4}L&Bx0Lau;PqBY>BF9=D? z1f^SbP}=fKjaHe?*KETH{q4zv8BleQBIGbz1Y}4bW(Gfa@OdFx(A^P{8Jwc>3ax!D zc8%9oXg+LDhl4;}pSG zredNpsMl>saF?+BZMLbn-0%*{Y^c4HGWAib zOy!yj|0*gfeEG~n4vhqS!Wx2UD*G6G4!e8gXTsIoG*!E&M6BT)BF?LmO-;;$e*sOO zmU-}dP%xhde*{m_JgCVY&4NZ@R^~vWFX*lhBB@>G!1LuC2*up)HM$IpZ1gDbPs z5!aD~xwvizcVaz-MmP;WdnU1-LQ2@2sSrF9nrd55!S5Um9>OPv2HDeKXpm2CgCN1H z;jn^O_)9;4&^6d5S=+$4a3jYA$&%TUuo6T>8_u*vjboDeNE@0y$pQF!;A6VaH5Yb# zO;pruh`lip!~h{z;tIV&T}Cy1yN)@Dc9?@P39KI{-b=zi6j6&yzpiqu8<8G_thO0P z-$EzuQ+$_mrj(sLlD)w@vF%45Hj8*8182?XS++QVo?i|H7J;)=WoMPgq}@< zkU+GEH%fL(KaGxv4&dOGNPibBt%MLk@hL!x6op%2DZ$?tt)R(7m*5z#0jRWY7OMpy z>USkklaM(Zh^gl|i=YlhyzuLw}K%dWOU+ zAS(o%TsKcfL5K6A8|mOlYK{Btwb;&(;I*VnybgfH84@pz>I?6iN!PY22DBaENE{{c z!KnWxgCo~b5|Wg`2e4Fb3TOuDGQFF88;Jf=ioz!{nwH~W33Jo5RfFaHU5 zh$69ayQY3TgM1IRUHpR9f3$YOYS4WhvgS7EzQQu;rs_hyQ<{xBzdT7uA0H+4f8Kw? zALnhjYoS8Cd6!W&@FZ*by}gzYiqI~q$I1FmcnQc{)Up=t1EZ}K;R4&8lgrmz71)_j zkXBcCY`fcRHd>vD_EfRexvksi`0d_dY{H3z4+zZ425=J=e^(Qg&QZP8s#o*qu(NCy)EE6_gl>toI1Uw)R>xT)VJ^( z&3fTT%b%>4d%-7xYi8qGwk=^?zZfzN&$y<;Gq5S#vc3e^%UVW-p_S?eO5 z1bn*}zTi}r9R!(nc*gdt#wD4?Ihn@k;x|Mb)+05Z9Fs(8s)Xkyj?{3hN;zob53zWz zTN9`CJSA4OK8w4av~ewzVpC0uaHu9va3kc)dC~^56;9efX2MAuuIG6H-(k=9x>+`- zgXkgaGYl7WiUbwxHq4-qczctq|W@ zobqsv+sc0NOiw(2qY1LQWGe=(ijUmnqYULqh>~qJZU?KMMaQDQ2&?NnrP6uzr7%`r@`B?(?@>g;0Y8=Sy4 zFNF>a58JTGj&D@{D=3)5n@8X&)J>&Ol}1`Ou!y(uriTQk7#+~s})KCv|e_SC1#okOW1>|nkM^uZAah_cRs0ix!PXIFo7byz4*8Y66$ zsC5g7sJxP+Yzp|4?mf>15vx&Avj`$8ofb$=hlpkFX4D;%c4hd8kIv=?j?25jtH|p7aVhW3k=ZEEpzz zS2*{4ai61E zigx)WYhZfP0q|n*8z@^viKrWrC@~_D7VVOtkRjT|kmgKB#!ITj3$bE923P+ngJg=$ zf>P`VS3m1es(`CBa7hhWn@k}tq(9A>3c-rg?Qj*pbGUj1J~6n;o`wNeoq9kkJ}Eum z)=YYc%TuBb?2J0Vp8D*-a6(+pea2PHv6?HmWkaU(m6xhR>AYbr$MQl%KnpvkoAC22 z317*J2B2;kBeow+>lFILn@>zVL-J={_bI;)wT11hTw5 z>bc1{$Q8&Ui5P@fD({3A$TCVD zhQ#GV3F;U*1G<+I)Ugo1fqhF=>vNfFP{+|wifzy;!l5GjU^V2+8PtJng`f^(W*>mPx+c)Z+taF+_O;2#8(Mi%rEancnJEZj7V$|E-EeI15d9Tb9TDKND%KgIPK#K9?8Tu8E; zCAD8jat%{n+Lkl^CgrwaT5%>FcVna0oq|9llz^DeEdvPt3A9d?v~+%BmxR>5gLMhl zAH0IH{(!I5vIdmekH(6#H%(kj@?qAr^rWiP2KPYX92b*(43cL{ZA2VRY7?|ENUb5w z`F)9j9QpAgvnBif3<4%3+gc>uQL@vaR1u4S+CBp^&@%MJ4y6je9!+or0S7rY;S%uo z-+;$lI>q>LOITh_a<-#`30GHHExM;E!bjAT?5Muuc@v@n3rwiU-=x{?i)5u7OUoBh zs7^dnALsgL%e7X!CMqg?+06T6A_^J9_!Lb#93%YfsPiV5jMag#8M4}J5#9ME)@uvS znNo~!$*o`mw*APj*RG6-a!M$oY-qwFUj+>xo<+_~G#-k0BNWVy^5?=+NG`8Xjv!i} zT^;L$zAj6Y1ykr4B(lr8>`D5%ET1&M5>mqst!d=oo)mSGdeH}W3@f7THv0xHP_85? zH?8ZhcjTcK*)s<)lg*w{_AzYXye#l-2WG)zg^Ses0%t)vyrnF_&2pnh9$Ab#hotv|shw%W>` z`dqB!h)e_rI`x2Dv1vVEc1ikbD}Lr3_*K*a_S9zw23>8fd@S6|H%VUNrGwk;+gf|U zNUe9;z3?oa+;WhBlDaawtJ5ojELPrMS*tI!R=(o8S7)T!hB|xqRUQo%Z2(b*(!LCH z*NcV$I%QkOn+La+8>KeCqzXKbPUPT@Pu?@osY~=e6)G#~MR>Yfc^taCC6@Z*1fYwm zyVEr&Qf~#!vTLFovxboKHMjANH{d=zE^K)6B^=d*8vzsdWW)A8xaMNgoAisFE}V|( ziJi@U89gmG!8I3qz3tJqUw#EFxId<=c*&Cu5rs`Af&q;MXJU?%ci_R-#2!^|7laJ? zr7^idcZ_e)9rIz&vlyaml=!qWI?80))hjacTDTdiTY0uu`MuSAKM;SNX}I z9xFXq0!?iLzXsTFxBY4#He*j!>(zFrTH0Cclqz^9iA%(X9JR3JZmY|{8aGy^o9DWI z%C^m)Xe_g^X$re?6*WiQ9nQvci$5yqr{az%|u8VAqAQVmffYm9?|8lUo2Hui8v zV{^omm>a9blq`{Bl(kUz3B|t_0{ys;P1LJ-n-mZF+pIkZ!iDI&I30AX)U$Nl$D1K5 zj4ti)lEsl0q1u!;l5P*#Ykg?MQ0x8h5~dhltLtbJeyBIFrl!LSIYd+ZP-gD*8w@h$ zaKeY2EaWL4FN1C09`Oe^O8g zK4bGlPzVh_;NMR8;^bjEF&Pxn*c14rj`bjq$3*dD=*ShJ@EGMF549xaoQ@x1ZjNO~ zc@kx){3KSiE>5m79sdfY*!-vpe^s|Z@E?#bXG{lj1!6jo7ZB4y9YQFL2XV~uX$M3F zd8r~v!iI`2LU&-WumBz8p&XB5Uzz`M5RCU+JZ8WI^X$tna>$yeQ@J8@z0(|OYy#W* zZ)%&43In194TNl9cy_+}%qKLQkVZP157HCJ>ESeFXnOQikYidsH7Y86c-G=93r{&_ z(O^J-hdW3@1m}qH0+aU%)jRWNO9r*XC1MI2K4h*`C#E($hDsg*$H!hvgX>e1^V#np9j?(k`ZkNe| zBKSQ^+_Wn<+AzD-O}iAbI=X33w%1~F)ACx<2deY|;W!s=+J#Yl;VWi?`5^gE0b|-u zK1Xk>nA03n%h}`Hh#CX?W+h>RbFfr{Z+3~=S?-ikU&&@YIip!Q%tdpvj_=JCwpAVl^(**t$o|pyr=wg~|O8WU;z}zF_rCJqM z49BBYe5>PcQq)INlj8V0O>z8Li&i{=O)QQJ-QuVLiZiekt4_CZT%Cb+D8+_%6d_RY zSnzzvm(v-DtOI8tG7Ox7$D1k4mQm3;I2)B67+c8X3^2z$g0H|{a*tpM9|!gbUI|t+ zh(X)g0Vs7>qGrx5d`!BrE$^9pbK>rBVu7ihwx<(?O?czHwS!RSIw5aTbQH^{I zM1M=I`yN49sy#LW@;K?<9TgS6es;~WlJY}y#L3PEeXn1F7yZGo7G2%_POhL6)9C z4wJ`3ZRTNOa)!<3ApqoiRFZGH9A3Z3Lo@QK{2VhmdR5j+UZ#1uY)K?{M>cU;a^>#Y zU_RuqP5CIZ!nK7x%9!oQwe?e$bf~Va^?ABROgI%YIWj>V(JB5cHHjqph0ZNgK~r8Q zmRh&!xk7EkJy!z!GSAg{*=%K=tFyDy5&Myrx!9j>+oDhj&lL?{l305t;khD}Y0gv# zwnI~Go-6#$Jy-kiiFvNrQ=g6>4t<7jIx6Bi+^atbuImfVVWq={bG##Law-G~ILB+0 zaE|>g2h}~m7@QS}#RI^(yP~3IBb?)O7#f_Doef@t{pJN9VUIx=gLABpit&i!UqlMY z_9o!m9nk)1gL4l;;T+DrAD)tMjx{+2&fzzG%Nmgy@W>&gx(l2;Q-^b9DAZ$nnx?w7 zPPJL%r$|NzcDN>&L+HbzCFUVCN1z_#gRq_dQj&JMpxzhr(2P*V=P{Eb%6Q_oR;zdr zL+u&E;lkWb5$@k%VfO4Q_lV&RDJ8ET%hPT)^FInX9DyJ9&vEAe3`;sx;P=}+T_cwK z8Z$YvWEo@0&5ZgGSqf89L3-=N6*#a{jipjtZmZAY13i0q4FdZ_wJPF6g_T5n%b_`J zKjOTbl}6%Qbs9BwjCEYeQ;6l!r(RWWs2vMVg>3F;Nrd9!u{<5SMQLQbV^-YA1bzu4 zP*ucLMs) zp)PxxX{dWWLS0kb&p_Ds+CE&F##RgrhR={TeQG?J+1Z&MuO(Ih(i6xD;ed4z=Q^UI z6BQNy$?S87rf_d)fLiuGcnK0>W&rhR4$_q3FqIuhAnXQoXxb3=A4B0BVZRHWbcCHQ z!34P-hYu=k5!6rI0StZ{GRg&m=L;zm!!KVZIV2wvts+%!or-YXXl@NRp!<6y-KPtn ze>@Kn2@U=&W^zP>j^Ttt793McA^2LJ_A}h_Wys+O)UltAxaFfP=}>|C&+>GQSo1h$ za%4?PI3Z(2AHjzso4JLy zoWoje)CWSeSN4HuWAzV=Wlz=7N33`=iajPt2U@2zeUcjT>A+9bWYr}*7joSd6&1dH z_953887YEp2%BqUzk^$m1T%wKPqs=;3R;y-PoUN7pexgcR`)~U9If67PdZvP$t&U1 z%%PzmRN@67r5(r?7le9=?j!HtMF5V1|qa0&#Cw}Hx# z1M^VSuz*Joi&}W8U=g6P~e%vGZF@EqTaC@aT=0$q^o%ZWazWzoiC;gP>%9 zV316>C6>&8C{ODdE_n~+$iOdzOR$%Yxa9RL=}>X&=kj!o81re&Wku+~oQXHs#bzU`R=Os9FuoT1}vQ!t{9SQ($^5nI6nRyZ~5u1+v8jrk}3E^d@K)PpB3T*7(cJ4?qLUEwLPG zcZgPy64r;>90A@jlG8o02PB233wXaO5BUh+y&N++;=6*G-EBNNQJjRSgKzf?QV8Cj zrxy%yycKdd0&wiDBjR`|OFC2l{;@n=BL@97W^!Z@T?munDW=bo*k5yQkqSrix`wG< z6@;Q%OJeRX!P^X0GM0ZKJ000vNWWYxx8p_O3#Q>rs>+^8fK1Y|=1hg)d(c#yFBrdb zkhyT2Kqh;dX^?p>BB)k%s@OWX!#_9>zPzjtH?XXFU}&jqKfq`PS+W^bF4xk~ycc^b zhnoj%8xodW7Znx0W%jvzCEbST?Uh{)#ce8Qj>>pA<{HIIePZy*N@=%Tt%U;anh*&N|U#`k??(Y{4{^SaP z*~vR#DN<7I$kShjL=HfX48%f61nJ_4M9M7bP{HRP<>?x+@^zTWk(Gmbe;;yg;2>Bu z>HS6Zm_(uv!rP?x7vHKAcuw!{-^H@0D2A^1v278)mV16as@;2!C z3HkTgiAo0s8^^3nss~b zsNW_(?lP5(%xuvQH8*_M&KpmDhf6Z_my+CPMKt0FSSPpj_PY&`)i%58hewH1$4_(4 zlyd60WNNS;+deafhPtIyB<&e>4`(*(eD|%6lNXFMTs2H3)del+TeNXDC2cnVcE7`>RW5rs9l`hKrW7fp< zq$=9X>D&z-fU;G`E~R2|Lz>k|&PxKt(^~-`!>spYP)EV6D7=oC zwdqi*a3<4uB?V+{GF^^GI?|k}5PT5ow>gvXJIAa)#3zPX+0#s8)*YbDuBCH4+=Jt|@^Ab`1#t?Z)0p+8=+mjrLZ!dkkdF4R=>KXG(#) zlEcArZ2Qc>-IFZCBqfGgHavlQe+g1MZMgSRpc2QuSHqKzdy}J*0I(V+9+zAl2@@9p z%Kb`=J~*oG-EajE;RU{1zo1+ne35jpL*dMoPMXGJT2zKjms)W7A^O|lxZJby zr072d#RqUZ%}Ki2F(K(Wp;CYk*NL71AEp&j7xPe%AoX)GlOv=)Pg93pLAh?4-8SGA z+uqgNui5sDr`+H{i2XCKyL$W6nTW)2bxi#U%o=+uCFAy7{SwH!1(^)U8egkY7I(tx z$eMx=L<{Lp*|e9%in9tWA?o{B)6$a;f~a2(Wvhr9l}r**N5s((b%HhqL~TfOVmj|7 zq2l?hFqI+eMh2A>WQ{WHh^%jRC{-Y98n&d6tW73lO*+?{sSw-`_1lm&e&@*gEBM5a zHG7(AWR2cnex{RG$O((1({%gt2J^#>)Z3l=I7C-nou-a#YF6PwUn6ct2NbVY`hnTs zQlNhf>`@hoKAdx*O?Mlme=h&!ez0&e`Ks}gQHPk)A zSWDMo9djyUcXTLCqy`B0eWbdJQ+Z<&O6y0}vCW5N zc9JC5Eje;X3Epm%U{$tYnuO_e-K^FuK;SDyho&1N-;{@Ngpr?#nH(|lsZ8~Nzj-&C zUA(a|k3U-c%o*yck*m!D|L%iK2K zGLU=uX*qE;dVuZC8Q9-^sqRR#*H+k6_B%U^ol*tqVMfwVbZ9f52OUz%q4j+b6jhDs z$|M*1ek>{~eBbQzQQJ9S43J)-v&DeVO)1=P`hP~mGKOP>3lZFzub%%UWR2tW<(dG| zG!uI(xxVOJ8`rJgabA1629^*Ub*u;m`FGZ2UAENT$xJD@UCLJQbEMqN;P%aV26c+N zT=pb^=)VO$n>Iv04lL)0{sMT?5xs-PAgr(4=9jHSbt`}v2f*b~hb&kYwE))rr8YOGH?)l*@Ilp&MB4#C&z1a7xOQqG4H4NmzgrT$Q?vUH7(%3oM1 z&tnZwPdbQ|awn9nS}Ca8l2%GYI?YN+P|jeb7}Bh>3WuasJZt)t$u-2<<&Qz3W_)NeCd@H;nJK7minXkkw?ZM5tJ#2q_D zXCNyj_t|>M`Uf^kHs}Z@-aWR}2aXan`tpx4wTGJ=rcZJtJPLeORg|s@a@iRF5EV7+ z4(~8g$KdQ=L^L*-6R~faCtm*sp)c}y!!Ay?m`bnhNz-1pBK|D}gec7m6(6rQS0+-4cQYNm?)gOVOp9+}_7#d%z@@2=+ z$#dOAGm{@=b1Shb^f@pl!i1cC*4*@@gCOSs%2tsxDw8B~jtHh9=LEeB$k~wQcug*( z2uLc$Gi77ur(}>w!OSSkj+pr(hf)P*rtxcrzj4Njt&XHr&6x_pA*kPmnejWv%zuVY z3^TK**?^g2XD=Ob%p9wKV9XqIdBuunrzAD~n zbpAwC)NF*#aS{k5s0ER)Z0biS8W6NOi3X&NNdQkL-b*fgCn8;LSp6-?nj2Ps!#Pt5 ztd_hEzK(656|g$aVspfcY9BNmw0keG06OCDUOw@d9A= zBghsPSgjo22buydY<4{B&yrXQ_HoeFboCOOyk;w~j**;z^=i?#=?2yt^U#oh^_iH- z5m@g;o+~qq7B0>s$K|PyEGnwyml~}yJE^_i=y-5iJDl&+u9p3lUuH@(7Dk>>eFD%X z-b=~3JXfy;{N4(g4DcIYt5Rr(-xND8+DL!Ord=N^PMP`H)ol)u3 zb>L5;4zQ;_J5X6lq^vxwZ<4Bz!8BZ};#b)Q^Sd!gi=` z&A-I6g!HJ61i0{~7oNj2jpfKwQRCpD-7pnLZU2gE+efNxsJVAv<t#G)ty?eQwN1rZwZm>|o{V3(LYCrC%aok>MnqSWaifk^2F|bi=uofnlS90Y;KR`DHgKsg)fB7;k9&BH=D`B4c?G6i@$6;yk+ZQE zUgDm5JYJ{n(9m^PupzlWHQ21Y3pdA$E7Xfq{$_ST_zee}@U)3V&fqioGR{>CxaQ_d z%YM5w(XQ^Dfb$_itjk-yOHb>7L~r#rFmi0T{PNA7Ox11{OTLoEgYUpNP#=ZhBDH8S zQwd}z{*scq@JTkE5nK&LC1U0-gqI+vM?+RH1g=wSo9EHUf+tza8hb6zFx7IcT8lmj zs{=@exq-w6=kQFwkH%kk|16t045G(92_=<|XXTJ(qIj-6k~f>IyeR|?XjjmLAN_9X z34DDk{2<`WZiy^z`I8+u>&ox+!t*9L zc581D%596pBU?Bg*%F^fd*KL_P%Za@tw21pcT=`9;obaCm6aDEt?$NqdtFL)SzEI>;ME+wj;gb%baAUp-Dn;Uc@EI(Wn=C71jZv!pT&!x{ zE-6%M+@@aCJUC`Bs}YFa3S|2flw#9+ig2iorQq)&U$_wE5LI0$5RmEA+5N>c-4?#V zgVGyKztsW%g$E|NQluzNy&w~dz6nlz**i{}ceXW4s{h9Y70D-}4+9m`qR8X6*f}Fp zQb4Bn5SjTGa6xn;A*~Di9_emd7YN>YTzp0=1P>sStXae|65qo(vzx6I`7ep=R?`5 z_n*zaZ<;TrU>rU-fu@Uyqj~=mv@v-94QW;1PKYM4YEY=Ebt#hlD@6gDrVip7PCbY zK393@*G}P!FszNleG!RL@E$e2S^6?UWH%8Fg3OhKu|%bbItomXnMxEGQ**GcPGlg} zVE7{Cws4`YcXS@-3W_{EDk^+9;?hU>EjP0D^F*^7kh@Kos4bhr+=&yaZ! zeW-$a52xTMOs;^0h9OHalBZ{PS`U;YHh~&{B67?nQ1VG~{}HGaW=F_rv<)Y!wi~5t zu>(u)Y!SZ5(HJS5K66W@)D`U{{i!P@lgDJPDjZ-+OEP6z58{1k9-@*D0_*ofwChg~zb&j@XzLgxg{xSJRokqpl@nSfo{+I4p-yX29gNDvydJ$X^xy?m!A|@s;JP$w)N5q5F zaYQ_?U`ewXxs)@Kt%}ev(%#9geLPRsh@pRrnH(9qa8ILA^NV%fj+k+PaZdDIw&|PB zZBhY!PNPXytfG2Uok`2-Yw$LMn~dvU&Q3=-7fG0l>vqU0a4u;8Qg!xBV!s#ZadW0Z z@B?V7ZNC?O=Qwxi#e{R&(~RR>JVT&Wohr5t?(m1V-Y^i()tWRg&fS#2DO&l3X4HE# z;?+`O$hHw!9Gk3OZ4JCCPtD zM6=ur3>QLHn*!4TxmaL0&pA`d0)u37a4xoeb|HG5t4io8qTPGBO(J`cK=da;kER*X z7oiG{=&SH#MD%(m(h%lcK1X|8UIm4!DzCd0}CzE-D2d%RDCcSZZ6za>xJ7c0;zFdyUYYf7*VF!_U)s&a;cmJ01CF#TZZ^(yU}fxU^C!Z;c=Z-vEHu1UAm3> z1Z>CH3pKTSaN0yT(rI*xwcZXm%!$&#@i#k;>Vi;L=7lRt-Bt@)HzCT}6)vMUwQAk( z?FtvcgoRi1wtL}fIC=rPTx*X{wi>Xhywc@)X|$lE#Sqc#w>!nDCj7Cq)o65}sjye>Ju68&=B@wS<7!WTAhiSe>+@Mykma2q*AO! ze~yH!{R35?VyD>Ywkgp_I8yfaboWAeD{!k4P!UqVmB&-?W-+j%Q4(Nsi?Q zGEHzNA5??qo$Lxn`CcNVNVQA?Q5g+a6uX_qgnSjwhfRgy@^W{oIT48@5SG2D;t>mQ zdIT1@lD%QVj@*~8nr**ZZcH4QLV;T4A80n({)C#7NlB?QaeK90 zxZ?>jE~KFX!)_ULrVWFGjXU_ZTqK*Atk$4&pb3u0C#VEsnc3L%>sT6`mI4EyxF1-> z|J*SzT#e%mB&WU?1hH0~s&;xiS8WJaH2vlTj1oAts$Sxfh;R15M#Bjl8sQ0*PN&)4 zx@8NzZo~N~ol2uV-e~QG5w!+WvIaoPiH<)7?CAhI7Qy?mGtNFA=((abS&9{pZ)EXj zQt`MQ0Y!G7-x{y_ok>;)D1e8Zfug^x4KAF{#BE&|)omcuvFvaD=R~bhDhkq`T&fhI zJ-~QA-hj%Cgphk*w+U6OfYyQ91^RO}a2TWqqMz!MAOk`#z`2v94jhLOO9oCPkX_hq z!3X04rjkHpruO({7{Ldj1r;B5$Q};Ylpxi<34f|stxXimWz@T@)vK{&)W&VUHpz9` z;qYj`etU0McTc#8YZDm0Ac<9QdBfP~Oz?dNOm0>~qd>AcY=!|Tn?!9M9;3a*{VnV@ zlr|atF^Vd+%#?rS5r{?jYO(S~_~$#t;D*IV9R;C~H^M)~VCq=*M+5#T2D^@Df9!^T ziov1{?2jezPm!HV0(tP%kYYDnh_#5qgJn4BF2A(iC;2{1?EQFsj{@lC>es0Ad*100~5Xl*^r#Il!Tkz+CCGc|z{;XOGKWp&k z$N2Ma`174*@bi8A`Ra1``6m9ncLn^sAAjDk5`Nx-KQCMbKlkF#9joEzF8t{n13w4x zhY|I8NLl1zz@FZRPanjey=&m7hChttHzVDVy#afAH9ox_e=a-`elEqI^=sj01O8ln z68v0;KbNk9pKbW_ZzsdgFYxEHr@+sb@aOVV;pb}n`3wB{EBtx62S2aDpBFz7eqPFc z*2B{)@uz$m{8aJhmeb*95B{t=6Moj<4-<(sC>JPu0ejkkPrLDFZ2^8x#h>{b;b$TK zyd8hujX!s6f}gwar*Rhibnxf8&G54ee=Zw?pDXd_3FGjy4u6(zfuB|Q^Gp2s4gTOY zvH@N*8{qX*0bWlP;Q93do^K!Ek(&V?$;m{Pox#Kg27kgW>+B$n03%QXj8F}50hldu z2k1Bo(6Q7Fm#gZK4cz6WDkxRd5-@1_8=Unh9oGcv8=em46;Pj3dm6P!!?L8u#a>3K z7i%RhrdnC1|34uV^MMVRqLL}V>PQsbET{~y9SN+Hlk-bKH7>BxmYkoH11af%tul6J zsQ6@HY1KL<2w1-qBo+Z{DqulX3@lP17Q(A$WMPdAU<;(UvKUxP!Gz&$$_!TMsRi(( zrJ#J#9up5>$x#F*o-w zGT~l~Aml|XJ0TnkfSrJ{)FQ{C^ipv97Jo1tCNdZfkQxkGdx1I-f$)OtK!_$I5Ipme zi@>e>#2g~S;VzNkaEr2V$V%V@h6=gqA)Jh$@P%xjgD>mFpB%mu84O=242DWzg++ES z_>vI}so>j`@q1p-nz$l?52%i$A^|loWJLjMVX{3RNQUPTmmc8{0Bs@zK!d^n5CK;1 zWCs8_nE-wE6n`+d5*Z9m6b8d#VDkY>Fr@Zy!|O?KH-Q_nGZxe>NeA^Txl%yq8$)}Z IkZBw}0EXMpJOBUy literal 0 HcmV?d00001 diff --git a/docs/pythonEC/markdown/index.md b/docs/pythonEC/markdown/index.md new file mode 100644 index 000000000..d22061917 --- /dev/null +++ b/docs/pythonEC/markdown/index.md @@ -0,0 +1,82 @@ +# Enterprise Python API + +## Zingg Enterpise EC Entity Resolution Python Package + +Zingg Enterprise Python APIs for entity resolution, identity resolution, +record linkage, data mastering and deduplication using ML ([https://www.zingg.ai](https://www.zingg.ai)) + +**NOTE** + +Requires **python 3.6+**; **spark 3.5.0.** Otherwise, +[`zinggES.enterprise.spark.ESparkClient.EZingg()`](zinggES.md#zinggES.enterprise.spark.ESparkClient.EZingg) cannot be executed + +* [Zingg Enterpise Entity Resolution Package](zinggEC.md) + * [zinggEC.enterprise.common.ApproverArguments](zinggEC.md#zinggec-enterprise-common-approverarguments) + * [`ApproverArguments`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments) + * [`ApproverArguments.getApprovalQuery()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.getApprovalQuery) + * [`ApproverArguments.getArgs()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.getArgs) + * [`ApproverArguments.getDestination()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.getDestination) + * [`ApproverArguments.getParentArgs()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.getParentArgs) + * [`ApproverArguments.setApprovalQuery()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.setApprovalQuery) + * [`ApproverArguments.setArgs()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.setArgs) + * [`ApproverArguments.setDestination()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.setDestination) + * [`ApproverArguments.setParentArgs()`](zinggEC.md#zinggEC.enterprise.common.ApproverArguments.ApproverArguments.setParentArgs) + * [zinggEC.enterprise.common.IncrementalArguments](zinggEC.md#zinggec-enterprise-common-incrementalarguments) + * [`IncrementalArguments`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments) + * [`IncrementalArguments.getArgs()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.getArgs) + * [`IncrementalArguments.getDeleteAction()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.getDeleteAction) + * [`IncrementalArguments.getDeletedData()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.getDeletedData) + * [`IncrementalArguments.getIncrementalData()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.getIncrementalData) + * [`IncrementalArguments.getParentArgs()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.getParentArgs) + * [`IncrementalArguments.setArgs()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.setArgs) + * [`IncrementalArguments.setDeleteAction()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.setDeleteAction) + * [`IncrementalArguments.setDeletedData()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.setDeletedData) + * [`IncrementalArguments.setIncrementalData()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.setIncrementalData) + * [`IncrementalArguments.setParentArgs()`](zinggEC.md#zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments.setParentArgs) + * [zinggEC.enterprise.common.MappingMatchType](zinggEC.md#zinggec-enterprise-common-mappingmatchtype) + * [`MappingMatchType`](zinggEC.md#zinggEC.enterprise.common.MappingMatchType.MappingMatchType) + * [`MappingMatchType.getMappingMatchType()`](zinggEC.md#zinggEC.enterprise.common.MappingMatchType.MappingMatchType.getMappingMatchType) + * [zinggEC.enterprise.common.epipes](zinggEC.md#zinggec-enterprise-common-epipes) + * [`ECsvPipe`](zinggEC.md#zinggEC.enterprise.common.epipes.ECsvPipe) + * [`ECsvPipe.setDelimiter()`](zinggEC.md#zinggEC.enterprise.common.epipes.ECsvPipe.setDelimiter) + * [`ECsvPipe.setHeader()`](zinggEC.md#zinggEC.enterprise.common.epipes.ECsvPipe.setHeader) + * [`ECsvPipe.setLocation()`](zinggEC.md#zinggEC.enterprise.common.epipes.ECsvPipe.setLocation) + * [`EPipe`](zinggEC.md#zinggEC.enterprise.common.epipes.EPipe) + * [`EPipe.getPassthroughExpr()`](zinggEC.md#zinggEC.enterprise.common.epipes.EPipe.getPassthroughExpr) + * [`EPipe.getPassthruData()`](zinggEC.md#zinggEC.enterprise.common.epipes.EPipe.getPassthruData) + * [`EPipe.getUsableData()`](zinggEC.md#zinggEC.enterprise.common.epipes.EPipe.getUsableData) + * [`EPipe.hasPassThru()`](zinggEC.md#zinggEC.enterprise.common.epipes.EPipe.hasPassThru) + * [`EPipe.setPassthroughExpr()`](zinggEC.md#zinggEC.enterprise.common.epipes.EPipe.setPassthroughExpr) + * [`InMemoryPipe`](zinggEC.md#zinggEC.enterprise.common.epipes.InMemoryPipe) + * [`InMemoryPipe.getDataset()`](zinggEC.md#zinggEC.enterprise.common.epipes.InMemoryPipe.getDataset) + * [`InMemoryPipe.setDataset()`](zinggEC.md#zinggEC.enterprise.common.epipes.InMemoryPipe.setDataset) + * [`UCPipe`](zinggEC.md#zinggEC.enterprise.common.epipes.UCPipe) + * [`UCPipe.setTable()`](zinggEC.md#zinggEC.enterprise.common.epipes.UCPipe.setTable) + * [zinggEC.enterprise.common.EArguments](zinggEC.md#zinggec-enterprise-common-earguments) + * [`DeterministicMatching`](zinggEC.md#zinggEC.enterprise.common.EArguments.DeterministicMatching) + * [`DeterministicMatching.getDeterministicMatching()`](zinggEC.md#zinggEC.enterprise.common.EArguments.DeterministicMatching.getDeterministicMatching) + * [`EArguments`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments) + * [`EArguments.getArgs()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.getArgs) + * [`EArguments.getData()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.getData) + * [`EArguments.getDeterministicMatching()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.getDeterministicMatching) + * [`EArguments.getFieldDefinition()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.getFieldDefinition) + * [`EArguments.getPassthroughExpr()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.getPassthroughExpr) + * [`EArguments.getPrimaryKey()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.getPrimaryKey) + * [`EArguments.setArgs()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.setArgs) + * [`EArguments.setBlockingModel()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.setBlockingModel) + * [`EArguments.setData()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.setData) + * [`EArguments.setDeterministicMatchingCondition()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.setDeterministicMatchingCondition) + * [`EArguments.setFieldDefinition()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.setFieldDefinition) + * [`EArguments.setPassthroughExpr()`](zinggEC.md#zinggEC.enterprise.common.EArguments.EArguments.setPassthroughExpr) + * [zinggEC.enterprise.common.EFieldDefinition](zinggEC.md#zinggec-enterprise-common-efielddefinition) + * [`EFieldDefinition`](zinggEC.md#zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition) + * [`EFieldDefinition.getMatchTypeArray()`](zinggEC.md#zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition.getMatchTypeArray) + * [`EFieldDefinition.getPrimaryKey()`](zinggEC.md#zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition.getPrimaryKey) + * [`EFieldDefinition.setPrimaryKey()`](zinggEC.md#zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition.setPrimaryKey) + +## API Reference + + * [Module Index](py-modindex.html) + * [Index](genindex.html) + * [Search Page](search.html) + diff --git a/docs/pythonEC/markdown/zinggEC.md b/docs/pythonEC/markdown/zinggEC.md new file mode 100644 index 000000000..f5152228d --- /dev/null +++ b/docs/pythonEC/markdown/zinggEC.md @@ -0,0 +1,865 @@ +# Zingg Enterpise EC Entity Resolution Package + +Zingg Enterprise Python APIs for entity resolution, record linkage, data mastering and deduplication using ML +([https://www.zingg.ai](https://www.zingg.ai)) + +requires python 3.6+; spark 3.5.0 Otherwise, +[`zinggES.enterprise.spark.ESparkClient.EZingg()`](#zinggES.enterprise.spark.ESparkClient.EZingg) cannot be executed + +## zinggEC.enterprise.common.ApproverArguments + +This module is to set up the approval feature + +_class +_zinggEC.enterprise.common.ApproverArguments.ApproverArguments[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments) + + + +Bases: `object` + +getApprovalQuery()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getApprovalQuery) + + + +Method to get query for approval + +Returns: + + + +query in string format for approval condition + +Return type: + + + +String + +getArgs()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getArgs) + + + +Method to get ApproverArguments + +Returns: + + + +ApproverArguments parameter value + +Return type: + + + +ApproverArguments + +getDestination()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getDestination) + + + +Method to get the destination of output + +Returns: + + + +Array of data contained in EPipes + +Return type: + + + +Array[EPipe] + +getParentArgs()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getParentArgs) + + + +Method to get EArguments + +Returns: + + + +EArguments parameter value + +Return type: + + + +EArguments + +setApprovalQuery(_approval_query_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setApprovalQuery) + + + +Method to set query for approval + +Parameters: + + + +**approval_query** (_String_) – setting a query in string format for approval +condition + +setArgs(_arguments_obj_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setArgs) + + + +Method to set ApproverArguments + +Parameters: + + + +**argumentsObj** (_ApproverArguments_) – ApproverArguments object + +setDestination(_* +pipes_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setDestination) + + + +Method to set the multiple pipes for output destination + +Parameters: + + + +**pipes** (_EPipes_) – EPipes object + +setParentArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setParentArgs) + + + +Method to set EArguments + +Parameters: + + + +**argumentsObj** (_EArguments_) – EArguments object + +## zinggEC.enterprise.common.IncrementalArguments + +This module is to set up the incremental feature + +_class +_zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments) + + + +Bases: `object` + +getArgs()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getArgs) + + + +Method to get IncrementalArguments + +Returns: + + + +IncrementalArguments parameter value + +Return type: + + + +IncrementalArguments + +getDeleteAction()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getDeleteAction) + + + +Method to get Delete Action + +Returns: + + + +DeleteAction parameter value + +Return type: + + + +DeleteAction + +getDeletedData()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getDeletedData) + + + +Method to get Deleted Data + +Returns: + + + +data that needs to be deleted from incremental run + +Return type: + + + +Array[EPipe] + +getIncrementalData()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getIncrementalData) + + + +Method to get Incremental Data + +Returns: + + + +data that needs to be passed for incremental run + +Return type: + + + +Array[EPipe] + +getParentArgs()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getParentArgs) + + + +Method to get EArguments + +Returns: + + + +EArguments parameter value + +Return type: + + + +EArguments + +setArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setArgs) + + + +Method to set IncrementalArguments + +Parameters: + + + +**argumentsObj** (_IncrementalArguments_) – IncrementalArguments object + +setDeleteAction(_deleteAction_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setDeleteAction) + + + +Method to set Delete Action + +Parameters: + + + +**deleteAction** (_DeleteAction_) – DeleteAction object + +setDeletedData(_* +pipes_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setDeletedData) + + + +Method to set Deleted Data + +Parameters: + + + +**pipes** (_EPipes_) – EPipes object + +setIncrementalData(_* +pipes_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setIncrementalData) + + + +Method to set Incremental Data + +Parameters: + + + +**pipes** (_EPipes_) – EPipes object + +setParentArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setParentArgs) + + + +Method to set EArguments + +Parameters: + + + +**argumentsObj** (_EArguments_) – EArguments object + +## zinggEC.enterprise.common.MappingMatchType + +This module is to work with the mapping match type which is used for +leveraging domain expertise to push matching accuracy. Also saves time +massaging data before matching. + +_class _zinggEC.enterprise.common.MappingMatchType.MappingMatchType(_name_ , +_value_)[[source]](_modules/zinggEC/enterprise/common/MappingMatchType.html#MappingMatchType) + + + +Bases: `object` + +MappingMatchType class for defining mappings required for matching on a field + +Parameters: + + + + * **name** (_String_) – name of the match type - MAPPING + + * **format** (_String_) – name of the json containing mappings + +getMappingMatchType()[[source]](_modules/zinggEC/enterprise/common/MappingMatchType.html#MappingMatchType.getMappingMatchType) + + + +Method to get mapping match type + +Returns: + + + +mapping match type containg name and value + +Return type: + + + +MappingMatchType + +## zinggEC.enterprise.common.epipes + +This module is submodule of zingg to work with different types of Pipes +supported in Enterprise. Classes of this module inherit the EPipe class, and +use that class to create many different types of pipes. + +_class _zinggEC.enterprise.common.epipes.ECsvPipe(_name_ , _location =None_, +_schema +=None_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe) + + + +Bases: `EPipe` + +Class CsvPipe: used for working with text files which uses a pipe symbol to +separate units of text that belong in different columns. + +Parameters: + + + + * **name** (_String_) – name of the pipe. + + * **location** (_String_ _or_ _None_) – (optional) location from where we read data + + * **schema** (_Schema_ _or_ _None_) – (optional) json schema for the pipe + +setDelimiter(_delimiter_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe.setDelimiter) + + + +This method is used to define delimiter of CsvPipe + +Parameters: + + + +**delimiter** (_String_) – a sequence of one or more characters for specifying +the boundary between separate, independent regions in data streams + +setHeader(_header_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe.setHeader) + + + +Method to set header property of pipe + +Parameters: + + + +**header** (_String_ _(__'true' / 'false'__)_) – true if pipe have header, +false otherwise + +setLocation(_location_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe.setLocation) + + + +Method to set location of pipe + +Parameters: + + + +**location** (_String_) – location from where we read data + +_class _zinggEC.enterprise.common.epipes.EPipe(_name_ , +_format_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe) + + + +Bases: `Pipe` + +EPipe class for working with different data-pipelines. Actual pipe def in the +args. One pipe can be used at multiple places with different tables, +locations, queries, etc + +Parameters: + + + + * **name** (_String_) – name of the pipe + + * **format** (_Format_) – format of pipe e.g. inMemory, delta, etc. + +getPassthroughExpr()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.getPassthroughExpr) + + + +Method to get pass through condition + +Returns: + + + +pass through conditions in string format + +Return type: + + + +String + +getPassthruData()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.getPassthruData) + + + +Method to get records which satisfy pass through condition + +return: pandas or spark dataframe containing records which satisfy pass +through condition :rtype: DataFrame + +getUsableData()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.getUsableData) + + + +Method to get records which do not satisfy pass through condition + +return: pandas or spark dataframe containing records which do not satisfy pass +through condition :rtype: DataFrame + +hasPassThru()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.hasPassThru) + + + +Method to check if there is a pass through condition + +Returns: + + + +whether pass through condition is present or not + +Return type: + + + +boolean + +setPassthroughExpr(_passthroughExpr_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.setPassthroughExpr) + + + +Method to set pass through condition + +Parameters: + + + +**passthroughExpr** (_String_) – String condition for records to not be +considered + +_class _zinggEC.enterprise.common.epipes.InMemoryPipe(_name_ , _df +=None_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#InMemoryPipe) + + + +Bases: `EPipe` + +Pipe Class for working with InMemory pipeline :param name: name of the pipe +:type name: String :param df: provide dataset for this pipe (optional) :type +df: Dataset or None + +getDataset()[[source]](_modules/zinggEC/enterprise/common/epipes.html#InMemoryPipe.getDataset) + + + +Method to get Dataset from pipe + +Returns: + + + +dataset of the pipe in the format of spark dataset + +Return type: + + + +Dataset + +setDataset(_df_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#InMemoryPipe.setDataset) + + + +Method to set DataFrame of the pipe + +Parameters: + + + +**df** (_DataFrame_) – pandas or spark dataframe for the pipe + +_class _zinggEC.enterprise.common.epipes.UCPipe(_name_ , +_table_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#UCPipe) + + + +Bases: `EPipe` + +Pipe Class for working with Delta tables in Unity Catalog of Databricks + +Parameters: + + + + * **name** (_String_) – name of the pipe + + * **table** (_String_) – table from where we read data in the Catalog Volumes + +setTable(_table_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#UCPipe.setTable) + + + +Method to set table in pipe + +Parameters: + + + +**table** (_String_) – table from where we read data + +## zinggEC.enterprise.common.EArguments + +This module is to work with different types of features supported in Zingg +Enterprise. + +_class _zinggEC.enterprise.common.EArguments.DeterministicMatching(_* +matchCond_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#DeterministicMatching) + + + +Bases: `object` + +getDeterministicMatching()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#DeterministicMatching.getDeterministicMatching) + + + +Method to get DeterministicMatching criteria + +Returns: + + + +DeterministicMatching parameter value + +Return type: + + + +DeterministicMatching + +_class +_zinggEC.enterprise.common.EArguments.EArguments[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments) + + + +Bases: `Arguments` + +getArgs()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getArgs) + + + +Method to get EArguments + +Returns: + + + +EArguments parameter value + +Return type: + + + +EArguments + +getData()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getData) + + + +Method to get data from multiple pipes + +Returns: + + + +Array of data contained in EPipes + +Return type: + + + +Array[EPipe] + +getDeterministicMatching()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getDeterministicMatching) + + + +Method to get DeterministicMatching criteria + +Returns: + + + +DeterministicMatching parameter value + +Return type: + + + +DeterministicMatching + +getFieldDefinition()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getFieldDefinition) + + + +Method to get all field definitions to be used for matching + +Returns: + + + +all field definitions in list format + +Return type: + + + +List[EFieldDefinition] + +getPassthroughExpr()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getPassthroughExpr) + + + +Method to get pass through condition + +Returns: + + + +pass through conditions in string format + +Return type: + + + +String + +getPrimaryKey()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getPrimaryKey) + + + +Method to get the fields containing the primary keys + +Returns: + + + +all primary keys defined for field definitions in list format + +Return type: + + + +List[EFieldDefinition] + +setArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setArgs) + + + +Method to set EArguments + +Parameters: + + + +**argumentsObj** (_EArguments_) – EArguments object + +setBlockingModel(_blockingModel_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setBlockingModel) + + + +Method to set the Blocking Model used for creating model + +Parameters: + + + +**blockingModel** (_String_) – value as DEFAULT or WIDER + +setData(_* +pipes_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setData) + + + +Method to set the multiple pipes for data + +Parameters: + + + +**pipes** (_EPipes_) – EPipes object + +setDeterministicMatchingCondition(_* +detMatchConds_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setDeterministicMatchingCondition) + + + +Method to set the DeterministicMatchingCondition used for matching + +Parameters: + + + +**detMatchConds** (_DeterministicMatching_) – DeterministicMatching object + +setFieldDefinition(_fieldDef_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setFieldDefinition) + + + +Method to set the field definitions + +Parameters: + + + +**fieldDef** (_EFieldDefinition_) – EFieldDefiniton object + +setPassthroughExpr(_passthroughExpr_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setPassthroughExpr) + + + +Method to set pass through condition + +Parameters: + + + +**passthroughExpr** (_String_) – String condition for records to not be +considered + +## zinggEC.enterprise.common.EFieldDefinition + +This module is to work with the extended functionality of field definitions + +_class _zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition(_name_ , +_dataType_ , _* +matchType_)[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition) + + + +Bases: `FieldDefinition` + +This class defines each field that we use in matching. We can use this to +configure the properties of each field we use for matching in Zingg. + +Parameters: + + + + * **name** (_String_) – name of the field + + * **dataType** (_String_) – type of the data e.g. string, float, etc. + + * **matchType** (_MatchType_) – match type of this field e.g. FUSSY, EXACT, etc. including user-defined mapping match types + +getMatchTypeArray(_matchType_)[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition.getMatchTypeArray) + + + +Method to get the match types associated with a field + +Parameters: + + + +**matchType** (_List_ _[__IMatchType_ _]_) – list of match types associated +with a field + +getPrimaryKey()[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition.getPrimaryKey) + + + +Method to check if the field contains the primary key + +Returns: + + + +true or false depending on if the field contains the primary key + +Return type: + + + +boolean + +setPrimaryKey(_primaryKey_)[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition.setPrimaryKey) + + + +Method to set the field containing the primary key + +Parameters: + + + +**primaryKey** (_boolean_) – true or false depending on if the field contains +the primary key + +[ Previous](index.html "Zingg Enterpise Entity Resolution Python Package") + +* * * + +(C) Copyright 2025, Zingg.AI. + +Built with [Sphinx](https://www.sphinx-doc.org/) using a +[theme](https://github.com/readthedocs/sphinx_rtd_theme) provided by [Read the +Docs](https://readthedocs.org). + diff --git a/docs/pythonES/doctrees/environment.pickle b/docs/pythonES/doctrees/environment.pickle new file mode 100644 index 0000000000000000000000000000000000000000..000bcd8f0356daf61fe0ca4fe44c7c36f13eaf62 GIT binary patch literal 22405 zcmeHPd3YUHb+;{9tF>5mY$wk0vQ`S#ljE#7B-pVXHAIElXLt(Vdo-LZSOt=ttQp{hf1{nJ3APt&;ES*RT5H z>E3hiJ$F0XJ@?#s-pZFBxpf);r9)9uxc*Gp@ejFS;5QsU&eFl_61QI4RRBGljogzR z&4$x9yV=Yp(q5|;RO8TbYHpaNYZa0tcI#!UX~&}ORgSi9XRU2E<_65d}$Db!5GcYT!@0Q(4-lMym52J7j^G-^$2|6PjU< zGZQ<0yCxF(s0sFnU69%9gq{X93L35o>|(R#FL{} zjYc%#RX+A8vgF_~tEv8 z`?qY~a`~oGUa31T6qlu)m|CSn9Nk@M3Ggk+fn3Dc~8TRK!8Nh7_jTtoEf4MrXDLwCMVq)P=4B8 z4kKDMw2s518e^z6>N<97>dcJCKSqY{;CV2SSX&VAsDAf!zm zbo~YG(iIn9j7?otb0+Pi9$O)(hZe`K)i6E3N@RzALhz|@XadL5K`LtmwFLA*>qW4k zIH+1dg?1W*5r-~A&shkyB7udI2*GC^wV+|U*txUNreRHD+5I4Ptk9Ws&}Y>FCE4Jr zxpWmblmtV|*x#I;BIyBIi)VSvLju%NCyFt93QABD17YRGL6fEx(K7ePPNRv zw49h7HW>|iu*a6ubnC$sdf^->M&^ciMM(aD0<6ipjgF6D+E^z$oCW#Fg|w0bISxl=w72gb6t6@)FA0%fA0!%rF{%;wTH zGG#?h6)CIcG$Cupm#2LSJ{g1!J7&4vi8@kb_3V?W(uK6&lIgH7*Sk|?2CaaLF;$DhJrCei_nKv)=1 z1v~~erOjfHa}di83gaO{qv(1xwPSfUa++DXyo`S1dSpSexRbL~PPr;bF^nVHsw5)x zQy@U(dLwK}d@up!GBXsimaUed?ch#Ka&56lu`$u%aN4b9jTHYYJm{;HM})OwLjK=> zD~V}laY#Gt3v?qk7=nVOf`g@zWNm?TRkv9R2|txM_T z9)^RzN%pl<2Be(BtOC0XFxahO-tMI9Ky-H|{xqhw)u3!<&`bPHC42bd}mKoMB;zr%7LYNwm9HkbA2HeL8A$$~twyCiZ z1j_bUYAQrfB+wGKrLse?r3Ll^&tSRMG8WLA1X+jO8j<@lHD2a*a28Ru>k|<|n(z=o z8wC;19|JI5lD8^q~?MSVXw;aXH)qBl$++*KGIl9(P6}w!mQ9BZJw$< zBRbF^GT>PugAOz-A>lli4(U9dD3ld>EIq?ObHvb^<+t}E^WXzv_{VjX; zlqR;{xU2NET{mvOVb8YhJ9pl^Yu~bY zVp~XAv_t`2l^~b~5JZJDPdy03;BW+TP<^s8DC%dm781^+jI|gf-ho;McjV^Y#vG{i zCv43`j+GbCG~&Y!7!fBBH^f3o!#QtHMmZUZr{|#1Ihc^yDjb`Q4@`b_u~eQAU^*6{ zvDvi_pVCI8Elu_{t*G_N!2hB#t)ZXce$9$raBz7l)hD@v4wD$j0$VdS+e+ zXKgYn8&C#%NV~U%pO*fMLOpo~OniAJagR8}OXnBxIL#n5I3mlAaG(gLalD3-11Za+ z5L*y)=@|rQ*wfGik`(FajZ^?>4h{tuxc+2dwG#QD2DcEI1}>oA)w-lW!Cm^u1>V%f zegM-xw%t02AwlmObU`%?V11dh87)8IjP;nSf;4v8dGR%)cc1r^J8`V#Qr|g@Re@$s z+Y=uLpL7j_=@@C$2x}INOIhzNnx$t3#t&@7I41QrP_^8eh2t+9P(Tgx8zO52B7Pot zuxd%OWZIbpG1VflRVVBM9DF9hRb=kDJ}J$#lf-6S9>=A1Lud973cMX`2n(!>&Pq6Q zz}Q_Cix9MJv0nsD8KE6zFU`_(s@TXAA5zv=n@wqHEIbpg=Cb*dTT9TzrgttCJW+px zR0$d~nZkgBY1qIx%;hkq2lB1~JnY$rY-z1NA=)j4x-k4)?D7;($j0O>mbmhCW)5q# z&8Nem;Wi5gxQbn!h65!M=>A;kh_l!p^bbPZ(>Vh+E)LVm+c1}2pejAlw9BgA$&c+k z)k0}uVo|U#>_RxoV0-h49!m!{tUJP~H=!;48q7E7QRs~vTyqZ50U9a=r#HtRO+fq` z@?zCWAH~w;m*DJ>7c0|yFo#T6)FHmmN9Z(g;8gJSd0pvi&=L~5E-z|eTQHSIeq3Ic z7jioobkYJE7)zpXtm67(!X-azmKM68^EvIQs=M0M)dg^s<%qGEbhIZngtv;;5!}Ql zEV7t*8Cz{&wo4gK(o|cRu#FEwv0>5K>%@K`7;*}x&P6MIoDdGdpeTZi8gexmIqZ!i zy9q8=u=h&NqY;%s3tA0_-3O2sFb>mIb{!|`iUa$BtdI4&mc(Lc8+aP-475OALwnT< zcHlfMaqDIAZAH1{t$%gH#AhHy$^QjffB~bU@SYyk&ZUZfMHE@aEKMNy{yR zKOHWSyccFG@EGiA*6JG;Vo@FwIMIn0V8rMu%FhKy1FVt|jD3bKaeV>5NlY$IIYnlB zT}pm4Cp-k_vh+C;C7iAciSKz(oDTLO*VXatpwkgw| zPZv%u9OU64LOl{U3u?f>c8Cms6IP0vPSph=7b;y~Fwqq38XSFmTxA=>nXyT;&`k3l z7pX_SPpWj$iI5faFML}qH8FHL%nY>vp*3J)!$;c!4QZ7DUE2axIk?lbJnQEe^YW59b1S zuZi}f&;^dJf^Z%dV`6jKhP%P+zD!&qws;?1zHAwim*SSEos3}Sl}x+nUWi49fr;zH z4qbDT7}d$^#crJ(7khMa!VKr8V;Z*Cq@H0?H;a9`c)uCPEhhEM3B3TfiraL>XNlW& z^4a1JUEzSK`W(~T5_jsFw&}7Ws=7_hjM6cwNmFr32;I6oA+}4$GN~_%uV~Ig3Cuq>!}t@E z`cu>9&rIsiP3kYiU+TtR6MvwU*C?LU}#{iFCNUHs4DUv%#abVMX%R}l-G_QhOb0|C9m$7m=KPm z8hl7xp~E!nMCusBx{&g^@x!?uBxK!Qg0Ir>KFam$+yLbUb#58uXnc%e4dvGA+-a0s zr*dh(Sr2*QI?BuBt*1&Gbd%F5cZSYwq}-W0cNXQ&*12;iSJJt2kr-djV>pkJXLIs= zO0MMO1xU&{Ux<`<5q=mxjKm11MvyYTK89Qjh+L1S=v3NGzB+jaCHGUt73KShL(<}M z3i~g%*yGFfMsCtLy-}S)gbs)U-UV1YI2o#2Fe540;l{4O`9!r~5YRwdyoXjSTb7=4 zlkbod)F~B?gr#7zq+)I0sJqdPs!km`2`<k5D! zl-^SS+=#T6FWv+d+*BytOX+7605?;5UjeWmY194|WaI>&X;QZqE8K>Rnai`Nj-K}I z#muvdZSE+xIZ&vUo|ZdOl-uYy7E0P}YQli^p2O%ri2X;#LdLt3eiXo_bfo~OB0Zr> z;U6##E_j|mB{f7K!9zUHC>oXFRvKrmfWmW%nMq{kbhW8MNzs*t^I}svXkn_$VItPZ z41_(cu&YejKwc(IE;YVFh*{}Z;V#NFvWnjRw4QM~2zMYmu1;WIQlt`B!rmcPNjt{!7DMxt2KZk}&jz~Eip}#2 znRGxo!Ez)E%vz-yy`a#}Ya-(vq#tQEcp;^u0wAVzLO+V34^jGX0Wd@9*#h7QrSB>L zejRDUCP%5@?n3EvDgC?x;2ugpzW{gvrC(S8+)L^E3V{2OHa#3GX6BGFT=_t;=sga>am!S(YEAisW|ZFy`*PLFCTtMHX?) zxS*m4Q4R*Z7rlFDw_zF2$4}a;Q0HZ50h|E8!b@EcR$M&~%XSx0W2)tiP}QU&uAshE zFi~0F7)Ir4Hr|$QP%CAC0wKci+~R_EdAfdnp(|Z^RHj3`O+!R*k9V7(Z4H*of%>P< z!=HGYKPdr!l3{PTuj1cN)evv3fv2&eH>ASi#J<_ zOSjXd2F!iBW55k4MX%z(k9D0(PphKVG$QzLmj)kljUr6p1U#0mY9juW#5iB~P~?vc zPXRbmw_Ig}Od#aFpK2Cd#JC{|C#!|F7wNW$$g*)WR>k#t*>&PcMgYWFzor;ADGdQSOR?Y*ZDKzz)U`VWn}*0aF^qs=3d*!}RA!QP;p&4} zgZGO0;epPLU;OGfS8eTJo_L_M^s(ZWHGZP7v8My(=BoWv!Cb21}c45im3ya*e{ zks2=}7F(-%^F1yW?;wph!4C`j;&Pu7inIruOM$j}G!I2-m%4_kW!|mS%kB6fT1op5 zlt$46cKxtD8)XmT0BKREHif?tgW+nPf{%qmQA8VL)EX5mEbM9^a)Z`Wb#WWIz@rvP zs@zuxGiJ^N%))fT8~Gu)QVO

1HQF;1%(7X)nLCDeaX+1x}%7K6rk^FAOs8A>fNDcG^=2?I6k8 z<|1wbmnxlw3zn5q%L3YksNOt7;RFy{R6j{OV;gbe2yFIV0@O)GYQG1OmXq^dh9n$P z591%^^9uY+M^0iIE#vndK{M|${9x?fYw@2n?(6X%DBmdmy;=TyRQ`J#|4Y1g;J+GK zlJ?XC1hJE&Gv6Iv5jkkWpCH`3D8b|g^i~qcCmEt?wykw$!?I>KMRD>4&GG8teUE&h z1@)uoVYLnc#V*-Xq1`0IWYOhga_BZX<9OA0m6u8^a|uAPP-6aqax_r!FjffU!O?Z zr|JP+#C8a`1|5l1pF&^&T{46V${X?WJ(8vazfQ4iJ&-2(NI=l!n99b7#Jcp_F?z2z z#;^6pq;+e2q+_fy#-wT-BQ-uIWztw~Aj`>q6N6vUc72SNmUmokmvaj1tcd1Q2E`|M z6yB%tZ_yq9{P7CDe7pkYn!fFOZ-6{iucMUwl!IQ={v@$P+JC;?bVqZxUce!P=z2*@ zo^wdwQc*nVOTK!SjNT~G(%%xn7HV1cE1tAvA39&fjf~RrC*1RbrN4Bk>y|J5=B1*T zzw}FB7~(mtm5(bOkHmA;0`BVso@pMepWT)gmT;$FAG?Y%9+rl4L$| z${VE?_CshWQM~_@S7mW4b%DT0N9A7c+J`SGoPhGmYF-Pr{P9yC*_w!=z_cGn8hF_; zg*lfx?>c473dNI22_MFw7B$1zs2Y%~bSvUQ_pxK5NCw|R(^ETXEFgn#obr;KEE!yJ zN@S2vZ~;Ah{nUqelJxMkQ^tH8dRU@$Avsa%!X=={sq$G%ortt?l5_y)I|@ZLUq~P? z0UgZ{ZABbD-kzlWR0ljJ*^B%D5`;H@1<;^6DI&s(zvz(r3!rxS@Wu$q0W!#kIvMyW zKM-+LR)KGlJBMGYj}COG!j2|(KdqLX(Zh=Zv`0GO*}X(wndMb>d-rlwWHj5T&?A`z z_XcH47j^8Db&U6b4qI?gM=pZi{Hg5~e2@fZQ=Pxg!ACZbU&)uWNWk+7|Bw0>3eK=N z1@f7Fg6#!7iFBizonjrtM_bxmWmRVR;Q%e^+7_rrLu-w&WO$x+8+hVy)6}Wsb`h>} z#W;L0-FWDv?m}o8I#m{!WqDUsUx%QWKxm;LyrX?ONA7ikBxxI7;9(AgKeN6%)uTs40JkI4@+Itg!B4!sHQM0hehNWZEqDASDL&)Z0 zxP;S_w5z=%XsWPOko4}tkHm0l+fzg|NyAZ3f+Vgbu67ZrJ0EG{`2f-K9maoehRSErrIyUPhr}oG?Ba4MpvHs&L|*VcmkOA;_iDYf03kVprei_PI#6@ zRk{$IDDP*z*PttS{q={eR1^;6>i+z*N4P?tq5Xt~!JYEf2g~|k1}#>r%SzrSG0?PcCqCLjchb}%lAD(f;d3MO z8n&)Jx+h=;*y65_&#Up{b{btaY}%wUIv?o_eY62}D5PbcCILR{lQ-weQ-3F)m= z!F(Cur-1yiGfNp02n17a?~3n&(08AP%ZO2sBHyw@XRBvA6yecV#Vdab#*p^KM7kP= z7JZtE9tl!B2tFiq0E$Sx5H3_4;8(B#eM?DN2P3G3$Fi;H_^s?B2>Goprb=sL`tH}S z{-m{puoJ}Jc%)&$r?=8*VNdYmJlP{4Lz)yY)R4ZDV#nETesE5^ejj$P7nXv2u&6%W zfn}x7h{zawJXBY=M(O(20@`vOIPK$Utz`|w!-4*&WMgXyqr>xGQ@)|KoNm(dF6T97 zo-Ww9xXWxAD`rox#)#S%DKdkFbw|5GHp^|ci{lbk(a%3f>@D#N~Lob0W!r4+Yz{L|q+YtUENjcn~H;SVi&49bWp48e5HmtT`$hY~$nTzo80 z`mlWQsV!&-iuf#-cHlE<{Q0?(T~fn|12u_+Y$&RF2o`6pginLWuSb+@zgA*@6IOvf zbyqI&Aa9r~g^5q!*eJ0Usn2dIAznf)7ga0)Hy4#laynoOJcvZ!_=EuJ%KP*l#lB!Z^8NoJWqnEDXYK)>- zrusm!YTsHz?bbE)$--ZI=8eN6WhVcH)~c-7;;$U0RV||-^*=mfMo6HR2@a2pm|k0` z)0mvYl3M?d(kYXAOmu;l7Lx0fs#FZ{|1?V^Yv}oZ2hDCGo?u7cJ}FYb9G1rtO94iD zpqz#gMa4q4thp8eOJL$`e9ibeShLz^%yH1;-3;3>?`W4d^?nWmVEnxC3(<0czfq#t bK!pi)Y*4e~cVTq`CntMI!Y4zMO8LJ5P#Kk@ literal 0 HcmV?d00001 diff --git a/docs/pythonES/doctrees/index.doctree b/docs/pythonES/doctrees/index.doctree new file mode 100644 index 0000000000000000000000000000000000000000..59b1222717c15d4a65ccc5d34cd378d387cb8dad GIT binary patch literal 13374 zcmeHOdutrm6_+1+tu0x0;$RY|ysTA3xpN|{RE}ZmQX0Ph5pXHGqbas-L=&G(NYw! zvvcpc=brbu_uS(jj{oe^m=5%v z(2E0uCt?f`hGjcuz}@6wQnALwq~W!D7FgrrcMoj0)ncn|!~;LDL(Vd^dSrVpYwICq zSkrcRl^FroBW|)rrxf~@?d~qV7u&{G80kU80Q@%Q-ijS|;Z=6wwdD&}mM>jq7nd$x zsg>UGVrJ+r(@ouvSY+9uM9Dyr@9>B_9Ts`41KOEmZ-JyLYs3+=BNkd-?3hfCn8QI! z$?)7rw_V`iEbzRDJ&HUd3OIknOgrF4WN-6YVog3w!12+0e3#_&|H|fv2`OGo_&T&4 zCDyU9p^K6pMS%_R!Ai&~j@V`h3xN6^Jq$63-{Y>{R==jq*zbNcbYm;vO$jr-+jf>i zUngst@&g|7V4Ekgn2humtw^kC>$G)5Olz6RB>0-h4Dj{3<+XV|)LmPzzfV@yHL^O; z23vK%6Iq^H$MUeQmep<7sAfmuPAV9-m2W3luS-TDtar>eXGdJM@3$wi>9Qn ztlxkec>#m&i8-h!WvbHE_n!wXD!m`f~InW;y5>fz82(_NF99QNSYhVuS6r19Ig zN);=L{BXHm-`UxzNoCe_dqE1V0;zrQzd&Nr)x;Y`O~62=VbdHg0#nX{C1T&RgP6(; zVen*ki?@nf3|F?8z}9hKC)O>}`%%EuS~-1@aHrrP?h$^FPmZXU&P07o&(sB7YA%kdge z$b@B^$Q$@IldjGOrrfyQKnOLHn`5FR^KIz4P6x0hoI1N?5dXhQ`9B33Kj{~iIFYit zxPwgR^B~dtjqu}4ZVC^wX;(R7NpiWU0u-Vb{X+Iz>}J1374jBwbdjlD&oXejmhvTn z8yySO+Ct{c!ew`DA!{#yt|Q9F@~r)G7V)yj1R6$m*alfbb??BFo1IW~?@-8`2OhBQbs8bVIHFfC3j!`+r2!4suJKkAF**5{J& zFk>tI9upI`D^F;JofPOC3@t@gh_3${&R)g(KpY7p6M+eTLzB%f2Bv?2<-hmaQc*5> zV1ipv07*mA=H#I<1X>KJsC0y|4-HC>t&a-ClPzaK<$3J?w^92OaF(12_^(d`{&VUj z1pFrw4wU$pOv;&7THGC|mCbpu)vK89&_6RQ-67~CL)F#EW}T1>J+$soEt^$f8O7`{ z&Ahg6&EzZ;HS>aXdUN3^BoviH@aOjcpXmqi1N1Yiye8y&U|5ezeNH20dZyKE?n0Xn zvAtAYS5x`W9Ij9jX+cj7g8$Nb(w@TfFg-a~w=WOM7$|D#ETsPKQwS((iQwPf1AL|> zz-LQn<^lq~Ltg{$a*nLJr+{t}`YZV|<}NJ2%v*!)GlLguco%9Jsa9zeB*;~86p ztnmHd_}Ex8@Y+nK+nON{{>;V+2DP4-ONA-l_HoS^Ftd8Kc#WXqljqh}d_VBEd9V_+ zWc6zAD{r`{KT-WkclN*dHjWxNy5H6#!@7s#>7ME3`(tl)|D>&6x4C0p!%5AxWo052 zW&e0V=1;qzb-7D%d10w^t`VagC<|veTc^UO@`2QF31cDq7V9!lSty+gd4xh8$U|Wu zqc1F>j&Q>y*gncDWjRx>vT}%HsoTP@T;jEBch>G{_cvA-)cP85uLpKp4>~t_r*bcd z`9i5lg{6U*<;)Pw*Y7`g@D8Yz#gMkrH0l~qHKU|oI`<{e5(mAeQz_qGSzo`g_FbZ8 z?Z(xcYb&=`H?(`J8~4f!3ngbL3vy;O3t_b84P>7tMlZ!Z^}@iVG*PY&(nu;d09+il z^5W=Cai{AHTuKw8m!f2;8z^NDT%JviMuEa}GPRQ1 z8M!Bu1Tj5i+{R$QuB~2Qxqs^(L)?1%#m-F}?N`Y7;cdV4tjlic2gAvi_qks;S0Uk3vl!7Q)=gZeIhbb>^S*yD~ zB$Wl7c|;-vUVXj?m5Eo6ppB3pMz*1X744AY4NGR5{P|iDP6kteRpuq~`RY6w4E{$J zGrxc(74W$M$v4M!MW|F9Y;3_Dn@8C0_w878xj%$IXe6~?k@ic8sV3N`6RxzrUMGVj%Jo1|DN`+u^ILesbD3{JUx*Z*EK^a;RMqD@>bvr8 z&u4A@&6SPSY8j^#bHz1f=qW!W#`d)wLCAKco6%wwA0ZD(X@!iln$eTa*67rR4Odki z9ET<-%An&dQ9S6(3Xlv!o`GNwZba}BznYXDQq)7PX2Li`)r*9-Pv6CzXlZ(Yrl1V< z8v$*UlAOwQk8%!Tw}3Af7L5TzDlpoCIKz7sex)0vf+NT;2v1#<=MHWk>22xY_92fk z2e%IylIY@kaQpCiExouL+&;29d8K@C`@jj};P!EF`#89L;LdVz`#89L9Na$kefv=F zD|jsV1dk=?r5xIcCB|`AMK{?KF>Bc^%fX*Wqw66$F0Hb4PE0wv+luKm2i=*n(-4o_ zRL^j|FnNqOnaz*sJx29%eXa03)AImzGQH3*(nC}%ABVb+P}wA-$Ea*_#6uKbd`7*D zrG1;$ym%IuU^jGR3!sMAnih%xs+Dv+^6>hiT0l}5*%1=cBhw);Ju2I?0jJgdPGSC+zCcdXTG1^tKmI5)MDhA1#RYO}4o`2CT6?p;m z2%&iq1P}`_?kfnibk!jGz(J3L34n% zs8da#L7|BkCLfC#YCT2!i?$1$x0A%^}XiqGah#{y8q7S{qVCsW%QYpm8y7@=-Arafm48ocavw9qPT6ahswqb5FX599*+$14bIexdqOkqP;WRKjHFaf zX08IaI}T=040fg80qIN%re!T$;yL<-P0)!=rDy1du$fT#G4x>NpEnPQljLu(9Jd8S zbnG@-?{3e{i&>xhTK;ZL%%G>yhqZ%tat(1NYl+2hSW|(k<^?Ty)M9`_f|FI|Q_XM?8Ehn_~d=wHgvQ|B?6>=s82h(}Gi2LeLfirNk~n~wAbyvU?h zwJGr;Y_hH8&Y3fyp*2+o;kmGbXW?BfkHyqI?vd#q5AnblY464GY6M>KjQm!g8Vdfp z>W&jH8Wwu54Jcmu8>B2nh}B!MPjMd9$+ovN44_92BevUw4Wx156a|VfZuEiSiL(V= zc(O>X+S<1gm<;;qOn7hy6j+>7Z{%qMP+JOQhhS*euu3(O%>Cgh$GzH zPS6`Crc^Y6`@#}Y{zTg^>`paM%1-{1APSZh$;NkB8T$M2uLe%_jMA&690oS|3?M(K zdzt?E0Kg{+LjA5-zqdZ!{5kx5GoinJOMm?af5l>&py<(+)&TB_1bZSup2XG}8vN9H zMXs|xlZ#|$LM&#A`jtD*;C`=?5U5Zihu5r literal 0 HcmV?d00001 diff --git a/docs/pythonES/doctrees/zinggES.doctree b/docs/pythonES/doctrees/zinggES.doctree new file mode 100644 index 0000000000000000000000000000000000000000..539c30baf010cb12a8278911b5fd65cb8a59e5ec GIT binary patch literal 17351 zcmd^HeT*H~RrlKaZF|>tvTfto$;fH!XT0xi(l%e4DoP1Xo4VaNarm&)GM@L|yq%ey zH}f(d-d)r{g4*CEH=s6C!GHXP1QPrKf1@f0p`s#L5F!3bD+oliph$p(Q2zmb=YGuG znfJM-Di)38w=;L{J@?#mfA^eo&%GzF&3yAuo;kq(V@vI@l_stqH-fNj#~GWag03AT z+1uIq*Rxl%8e530?Kn)MmYuN!sL^sB&uYjTh}W^ub9O7?-WqmbGbTL5yY?-gVU|G-frTA!5TP?*93>M;*~}eV;H9|9ZZ{$$pUIyn~~LXblHAYmn~5> zE^~UuXD&Y%8=WvRAPW+9lnZ+m&#f?O8;}ACebs1NiDh)H81n-b4J&9HZ5u=N-Im3t zCylB6i(go*IZ4us*Vopzx3?R7JPpe|&!?^Ctg!IM%uOaI9TIN{iElWkF|Q;p%!~~n z0Kp2?&J4SI5dQ*oD?>gOT|BOLVLA5!FOC7cp25G5w=sJzu%vn#JpK=~0 z=D^Yln>s&?UwbGqqOj_maxMS|(+XIiRu>BuL+VBLE2)d=H3X83Pc|OC@T9@Ph37{a z4=p~QICiuRaI6~Zz1{Ur8no803p8G703TSE1f2$N)3|traQTey;>&fTWd%W)7){%- zcWmelyPeN=eP2CmV}C&xBwLPqjvMSWtae)}q$52$XajNP4kW2h-$VOlUn!J1-^C~` zUnA!HB3Cvm?A%0>sOgi})CW*TLXAyxuGzufF4RbGw@%{DY#kvqsn;=?UxEf2I`bSx z&`TQ|w}`1frq3h=mqY43*Z6pDF+ z&EtiuH$2laNvvqoPBQl3M1shO`)d-Ca~Q=79Nc#eLQ1iAN>e@=+=({VqkKKjx592O zOoO&Qh1}>kSD}2vRub9zQWpkksB}Slm(2m;ktC3RKc%x)x^MWgtmf(xmF|5$0LF*BjhLe2RWt556c%F~^D}OY{I@hK%L%+y~`DkRm9 z^PY1{_3+h+oeMc%YlW_t&WEn+8&C^_cpfKFZA9!Is2})-CMhCreA{Mo*W`ZDs~P*~ zz^{Pc$#36)u)q5gHaPOmOG^6)+s)!7|03FIXfUcs|3 zIc{u-rD5O+9soQ%Hz0Q~+BJHi8zh8FD!&GdD)+qNLMqqjM@6m>_kT|=Zt{Qr3k&Cv z5_{S#EJ#h_!j9_&y6fDNsoVTdi+eJebIWN6iTSe7OFV#Y2uJ4qVp7MHCE(lxjf@v0GWgBh#M!Bw`*TXJ`j?~It_U<;eC+o(iA4sqbTsO{fQJj0~sa2z>g{f`XS{(!5;eYc@=v8*CN=@%czCQbI}N|rtGfQ*kLS$I%LWjV@HWk4le#)^7%azq`d3+A;~w1?n$qZ zyk}8~8t?)iaCNk^YUxstWQD2Mg4-KJg9jc4b30Gm3RdT-hjCmcqq#i6xumc}*T(M3 zcM-18Cg*5L59CWzD3H9B-eQ_sh{?Jtgh`4=qJo!UbPs&9}?lAlj!{pK3Deuo*cpN zk0RApZu9YY61oT`~p$ zec!m@+S@HeCb0ogkERv0oG{X`GR`m8_q7dE_(8D>tc~`A5%3efytf0E^KH1ZOQZNs zbrfQy72@+gjv5vzVFVL_-i$5MstcPQ{Ni#LfXxcNCf<27Ww~bAd?)pNzJ3LlizYUT zG$~1(dGCV-Ui=0^2K+-&lLm2t25~A^Ihd6il>l^_x^0CB3~4dpBF0;InP-36LX32s zN0qqVTfhFxTqQSz)77{}uH|*P_5pWUUo(6k%ATE5a^LuJzK-svXwNvZj-~2*|4F2T zZk*vAYu)^?(oIE?JHCk5AT&}J-a$!uLueGR@%{r;MlCQ6EPAZQJ9H30D#A5#8i&Vg zoL}K{=)A^f(RrPn=$ERB!`yi`8-k)))xGqpQ$hlTTkowvFk(yW{j;1QK(GMV2YegD zXQZ@?$@eeV#@JB@uAs5)CeGmPOxH?qet~^>FG2{cl{AcJ!T?)yJQ|iEkZL5M+y{wR zH>%!n5d=42lK9z3%){b*Lm@-ujm76WMwA8tk@mxu<@>vkSP9}QvhR@hI-XbW3+il%?4t<#OK1k4=?b`7}*_jO#`ZwsSX_$<;~ z;dG3D?lWX`a3))FRb8}XirA?Y(h^kt!)yDJ(k}=py@WrI&I&td)HI_)#X5YQ+;++N#zs`J zX?J^$6}z#{=wdN`r2_P#VrpNhbX2xpm388Uf@z;KJ-n)Jp@j7taDKqg7rcLfZq|!R30NW>4=Q~UujIH{iXB%$joy2IazfbOFvGEz)s3zbd{q@q8fX&K-SW#sT>r+7J#32BT zhgbCXAgqfcIULId2)@(l+#Xv}#T2`a=zQ>q{HWfKqlVmx@c=ikv>b%0Ow}Vw)F0f> z1pa8YN0WF~y3{;wt@c;96`%IbkOg>J1fYH%p7s@NHMflY2dN+78<~q^t{un-VBXQF zXtj3^td;d`&0E#V*nJhY2L`CUPv}j4r15C)ahp{Bo_kqUM8wyIH^9I<;pTp#<;J% zV9*=G;OoTTua0I=29XCd_-i_Yg966o$f|JWN#5nKYEce%rRwp&HgXUn1FBy^(@~bP zJQXq|yu%xn8P9#Pe%?!;28V=y`}RkW-TnwBS>oQeF@byDU*J~{@(!NtztkrC#}dEw zF?cN`NBT&{Zwm(d;1_nQb#i9Cel>N(PwEr#CquJp4Z}lepcWYfX!akVX2sHJv9hO!1!E_)wj;x|-;rD>KdJEg*%N(X z+I@AX1#tg{IvIAj^|uuc%7(EvVFdSGP#LwNHMo?v4S$Dr`;6ey93z5Dvy2Gt{!Zrs z1-fH*2OCQulQ1#Hjpsq$e4`A;$wdq1x_8o>WIq1YSX zOb6ib>8IgHFh_hieW6$?)@h9fb0z31@mwh_^vs(T;-eXrq1c<8(cx8fNPAaMc^{$J zw@`gNq;pY~Al)aa0gxV0XP=?iDuwZo4#&u=57Hw^s!;5y38>0YEE$F|KAZ~0&QCy9 zhGLa1R5off*D@5VHadeF)dXAyg<`#rK&A1|-bd-_JuvEhEPuKOPkY`;{(CR~J;i@d z^WXdU@BQ>k!X6YdEx2drdH7P^G)Br4Jt4aenNTW~v5tGGJe85}A<2u8jp*jJ%zqS( zw3IxfXe5)!zze@`d`PTsp8tg#-Xm6)N^f*7u)A?|8pJ-oWozOl!zS1h>5Vf>&^ zr=`d`JWC~|{-)THbXUzhRq}``p-W$sJ0t_lNEwG1~adHN|` zHrqmlMfsXWKHiSfWhY9%q_1FQ1`nuKOi#10~D1b0N!ZqLji3BmFLekmxhd?L8aOZbAI*)NKE zv7NR<6PKe=>liXq!q_(TmK-I>Wx3(Tu7K>a*>+>A>C-?Ymvd^VVA&nPU}BuUh}-Qz zjjU$oIb|QACv<`<*py9%xglzlK6Ds)VfpK&DR!LJ8;}!h0*JobMMB>fkDX&nJ-b&V z*U(^)E7=3u!8To89+@F7c?+E38rHn<@E<2`V{23 z6m?n^@>EigKdZ=7$}mB8%Z?hZopg8~PzY;I()H2VLSi+sirSPuH_uK3lU*}koOyPT zydJ;Dj>Wq{V(plYyXpA&2mDU^bghFOiMzPeXugtSRmTw2UHo0Vn*3@NilJmrw;bG) zX+gz`^#&<(5u){0+M}~JaK}^eq#8ip9!lJx0}M#LaDo&CvXCm(+%93)4=u46DSLb? z17win*TxFoMZ%zCBY|>{t+Y^U%e0Yc^GsQYqaV;>DoW$zLcJnNr#*N#z|} z8-LewZsE`OEM%-xCHc+!fNbXb^mBs3k<0i=*#gzzr;5~`*V*IWVJZzOp$5p&e8>5a zw?IJXDMv{L4DrQrLmZtJHCh0_HK72q t{R+VQI$9LN|NemC+w5=-1-|eP%Pe9jnoUw@i*5S9G)P-QUSQg6{0}tJ8<+q9 literal 0 HcmV?d00001 diff --git a/docs/pythonES/markdown/index.md b/docs/pythonES/markdown/index.md new file mode 100644 index 000000000..cb7d181f1 --- /dev/null +++ b/docs/pythonES/markdown/index.md @@ -0,0 +1,23 @@ +# Enterprise Spark Python API + +## Zingg Enterpise ES Entity Resolution Python Package + +Zingg Enterprise Python APIs for entity resolution, identity resolution, +record linkage, data mastering and deduplication using ML ([https://www.zingg.ai](https://www.zingg.ai)) + +**NOTE** + +Requires **python 3.6+**; **spark 3.5.0.** Otherwise, +[`zinggES.enterprise.spark.ESparkClient.EZingg()`](zinggES.md#zinggES.enterprise.spark.ESparkClient.EZingg) cannot be executed + +* [Zingg Enterpise Entity Resolution Package](zinggES.md) + * [zinggES.enterprise.spark.ESparkClient](zinggES.md#zingges-enterprise-spark-esparkclient) + * [`EZingg`](zinggES.md#zinggES.enterprise.spark.ESparkClient.EZingg) + * [`EZinggWithSpark`](zinggES.md#zinggES.enterprise.spark.ESparkClient.EZinggWithSpark) + +## API Reference + + * [Module Index](py-modindex.html) + * [Index](genindex.html) + * [Search Page](search.html) + diff --git a/docs/pythonES/markdown/zinggES.md b/docs/pythonES/markdown/zinggES.md new file mode 100644 index 000000000..2233974ba --- /dev/null +++ b/docs/pythonES/markdown/zinggES.md @@ -0,0 +1,43 @@ +# Zingg Enterpise ES Entity Resolution Package + +Zingg Enterprise Python APIs for entity resolution, record linkage, data mastering and deduplication using ML +([https://www.zingg.ai](https://www.zingg.ai)) + +requires python 3.6+; spark 3.5.0 Otherwise, +[`zinggES.enterprise.spark.ESparkClient.EZingg()`](#zinggES.enterprise.spark.ESparkClient.EZingg) cannot be executed + + + + + +## zinggES.enterprise.spark.ESparkClient + +This module is the main entry point of the Zingg Enterprise Python API + +### *class* zinggES.enterprise.spark.ESparkClient.EZingg(args ,options) + +Bases: `object` + +This class is the main point of interface with the Zingg Enterprise matching +product. Construct a client to Zingg using provided arguments and spark +master. If running locally, set the master to local. This creates a new +session. + +* **Parameters:** + * **args** ([*Arguments*](#zinggEC.enterprise.common.EArguments)) – arguments for training and matching + * **options** ([*ClientOptions*](#zingg.client.ClientOptions)) – client option for this class object + +### *class* zinggES.enterprise.spark.ESparkClient.EZinggWithSpark(args ,options) + +Bases: `object` + +This class is the main point of interface with the Zingg Enterprise matching +product. Construct a client to Zingg using provided arguments and spark +master. If running locally, set the master to local and this uses the current +spark session. + +* **Parameters:** + * **args** ([*Arguments*](#zinggEC.enterprise.common.EArguments)) – arguments for training and matching + * **options** ([*ClientOptions*](#zingg.client.ClientOptions)) – client option for this class object + + diff --git a/docs/python/doctrees/environment.pickle b/docs/pythonOss/doctrees/environment.pickle similarity index 100% rename from docs/python/doctrees/environment.pickle rename to docs/pythonOss/doctrees/environment.pickle diff --git a/docs/python/doctrees/index.doctree b/docs/pythonOss/doctrees/index.doctree similarity index 100% rename from docs/python/doctrees/index.doctree rename to docs/pythonOss/doctrees/index.doctree diff --git a/docs/python/doctrees/zingg.doctree b/docs/pythonOss/doctrees/zingg.doctree similarity index 100% rename from docs/python/doctrees/zingg.doctree rename to docs/pythonOss/doctrees/zingg.doctree diff --git a/docs/python/markdown/index.md b/docs/pythonOss/markdown/index.md similarity index 96% rename from docs/python/markdown/index.md rename to docs/pythonOss/markdown/index.md index 61affe955..883f1bbff 100644 --- a/docs/python/markdown/index.md +++ b/docs/pythonOss/markdown/index.md @@ -1,8 +1,8 @@ -# Python API +# Community Python API -## Zingg Entity Resolution Python Package +## Community Zingg Entity Resolution Python Package -Zingg Python APIs for entity resolution, identity resolution, record linkage, data mastering and deduplication using ML ([https://www.zingg.ai](https://www.zingg.ai)) +Community Zingg Python APIs for entity resolution, identity resolution, record linkage, data mastering and deduplication using ML ([https://www.zingg.ai](https://www.zingg.ai)) **NOTE** @@ -158,9 +158,7 @@ args.setZinggDir("models") args.setNumPartitions(4) args.setLabelDataSampleSize(0.5) -#reading dataset into inputPipe and settint it up in 'args' -#below line should not be required if you are reading from in memory dataset -#in that case, replace df with input df +#reading dataset into inputPipe and setting it up in 'args' schema = "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string" inputPipe = CsvPipe("testFebrl", "examples/febrl/test.csv", schema) args.setData(inputPipe) diff --git a/docs/python/markdown/zingg.md b/docs/pythonOss/markdown/zingg.md similarity index 99% rename from docs/python/markdown/zingg.md rename to docs/pythonOss/markdown/zingg.md index 304c67cb2..305d5e07a 100644 --- a/docs/python/markdown/zingg.md +++ b/docs/pythonOss/markdown/zingg.md @@ -651,7 +651,7 @@ Pipe class for working with different data-pipelines. Actual pipe def in the arg * **Parameters:** * **name** (*String*) – name of the pipe - * **format** (*Format*) – formate of pipe e.g. bigquery,csv, etc. + * **format** (*Format*) – format of pipe e.g. bigquery,csv, etc. #### addProperty(name, value) diff --git a/docs/running/databricks.md b/docs/running/databricks.md index c371e9c65..cd9efebd7 100644 --- a/docs/running/databricks.md +++ b/docs/running/databricks.md @@ -7,6 +7,10 @@ description: "Step by Step Identity Resolution with Zingg on\_Databricks" # Running On Databricks -You can run Zingg on Databricks directly using the Databricks notebook interface. All [file formats and data sources and sinks](../dataSourcesAndSinks/) are supported within Databricks. +You can run the **Zingg Community Version** on Databricks directly using the Databricks notebook interface. -This uses the Zingg Python API and an [example notebook](../../examples/databricks/FebrlExample.ipynb) and a [step by step guide](https://www.zingg.ai/documentation-article/identity-resolution-on-databricks-for-customer-360) is available. +All [file formats and data sources and sinks](../dataSourcesAndSinks/) are supported within Databricks with **Zingg Enterprise** including Unity Catalog, Delta file format and InMemory. + +## For running the Zingg Community version on Databricks: + +This uses the Zingg Community Python API and an [example notebook](../../examples/databricks/FebrlExample.ipynb) and a [step by step guide](https://www.zingg.ai/documentation-article/identity-resolution-on-databricks-for-customer-360) is available. diff --git a/docs/working-with-python-enterprise.md b/docs/working-with-python-enterprise.md new file mode 100644 index 000000000..d53d9558c --- /dev/null +++ b/docs/working-with-python-enterprise.md @@ -0,0 +1,86 @@ +--- +description: A whole new way to work with Zingg Enterprise! +--- + +# Working With Python + +## Example API Usage + +```python +from zingg.client import * +from zingg.pipes import * +from zinggEC.enterprise.common.ApproverArguments import * +from zinggEC.enterprise.common.IncrementalArguments import * +from zinggEC.enterprise.common.MappingMatchType import * +from zinggEC.enterprise.common.epipes import * +from zinggEC.enterprise.common.EArguments import * +from zinggEC.enterprise.common.EFieldDefinition import EFieldDefinition +from zinggES.enterprise.spark.ESparkClient import * +import os + +#build the arguments for zingg +args = EArguments() +#set field definitions +recId = EFieldDefinition("recId", "string", MatchType.DONT_USE) +recId.setPrimaryKey(True) +fname = EFieldDefinition("fname", "string", MatchType.FUZZY) +# for mapping match type +#fname = EFieldDefinition("fname", "string", MatchType.FUZZY, MappingMatchType("MAPPING", "NICKNAMES_TEST")) +lname = EFieldDefinition("lname", "string", MatchType.FUZZY) +stNo = EFieldDefinition("stNo", "string", MatchType.FUZZY) +add1 = EFieldDefinition("add1","string", MatchType.FUZZY) +add2 = EFieldDefinition("add2", "string", MatchType.FUZZY) +city = EFieldDefinition("city", "string", MatchType.FUZZY) +areacode = EFieldDefinition("areacode", "string", MatchType.FUZZY) +state = EFieldDefinition("state", "string", MatchType.FUZZY) +dob = EFieldDefinition("dob", "string", MatchType.FUZZY) +ssn = EFieldDefinition("ssn", "string", MatchType.FUZZY) + +fieldDefs = [recId, fname, lname, stNo, add1, add2, city, areacode, state, dob, ssn] +args.setFieldDefinition(fieldDefs) +#set the modelid and the zingg dir +args.setModelId("100") +args.setZinggDir("./models") +args.setNumPartitions(4) +args.setLabelDataSampleSize(0.5) + +# Set the blocking strategy for the Zingg Model as either DEFAULT or WIDER - if you do not set anything, the model follows DEFAULT strategy +args.setBlockingModel("DEFAULT") + +#setting pass thru condition +args.setPassthroughExpr("fname = 'matilda'") + +#setting deterministic matching conditions +dm1 = DeterministicMatching('fname','stNo','add1') +dm2 = DeterministicMatching('ssn') +dm3 = DeterministicMatching('fname','stNo','lname') +args.setDeterministicMatchingCondition(dm1,dm2,dm3) + +#reading dataset into inputPipe and setting it up in 'args' +#below line should not be required if you are reading from in memory dataset +#in that case, replace df with input df +schema = "recId string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string" +inputPipe = ECsvPipe("testFebrl", "examples/febrl/test.csv", schema) +args.setData(inputPipe) + +outputPipe = ECsvPipe("resultFebrl", "/tmp/febrlOutput") +outputPipe.setHeader("true") +args.setOutput(outputPipe) + +# Zingg execution for the given phase +# options = ClientOptions([ClientOptions.PHASE,"findAndLabel"]) + +options = ClientOptions([ClientOptions.PHASE,"trainMatch"]) +zingg = EZingg(args, options) +zingg.initAndExecute() + +incrArgs = IncrementalArguments() +incrArgs.setParentArgs(args) +incrPipe = ECsvPipe("testFebrlIncr", "examples/febrl/test-incr.csv", schema) +incrArgs.setIncrementalData(incrPipe) + +incrOptions = ClientOptions([ClientOptions.PHASE,"runIncremental"]) +zinggIncr = EZingg(incrArgs, incrOptions) +zinggIncr.initAndExecute() +``` + diff --git a/python/docs/index.rst b/python/docs/index.rst index 0a34058ae..c071c7769 100644 --- a/python/docs/index.rst +++ b/python/docs/index.rst @@ -3,11 +3,11 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Zingg Entity Resolution Python Package +Community Zingg Entity Resolution Python Package ============================================================================ -Zingg Python APIs for entity resolution, identity resolution, record linkage, data mastering and deduplication using ML +Community Zingg Python APIs for entity resolution, identity resolution, record linkage, data mastering and deduplication using ML (https://www.zingg.ai) @@ -59,9 +59,7 @@ Example API Usage args.setNumPartitions(4) args.setLabelDataSampleSize(0.5) - #reading dataset into inputPipe and settint it up in 'args' - #below line should not be required if you are reading from in memory dataset - #in that case, replace df with input df + #reading dataset into inputPipe and setting it up in 'args' schema = "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string" inputPipe = CsvPipe("testFebrl", "examples/febrl/test.csv", schema) args.setData(inputPipe) diff --git a/python/zingg/pipes.py b/python/zingg/pipes.py index f8d9219c3..2938beb08 100644 --- a/python/zingg/pipes.py +++ b/python/zingg/pipes.py @@ -38,7 +38,7 @@ class Pipe: :param name: name of the pipe :type name: String - :param format: formate of pipe e.g. bigquery,csv, etc. + :param format: format of pipe e.g. bigquery,csv, etc. :type format: Format """ From 993c97f19e3d801d74ec1e10fbf8a05c319f8efd Mon Sep 17 00:00:00 2001 From: Sania Goyal <85894828+sania-16@users.noreply.github.com> Date: Fri, 18 Jul 2025 15:47:11 +0530 Subject: [PATCH 7/7] Documentation Changes PR (#1184) * pipe code refactor * adding generic class * adding tables * databricks docs * enterprise python api * formatting * updating oss docs * updating docs --- docs/SUMMARY.md | 1 + docs/approval.md | 2 - docs/{runApproval.md => lookup.md} | 0 docs/pythonEC/markdown/zinggEC.md | 825 +++++------------- docs/runIncremental.md | 2 +- docs/running/databricks.md | 6 +- .../installing-from-release/README.md | 2 +- 7 files changed, 238 insertions(+), 600 deletions(-) rename docs/{runApproval.md => lookup.md} (100%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index df3b88bff..f6d7e9cd9 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -47,6 +47,7 @@ * [Finding The Matches](setup/match.md) * [Adding Incremental Data](runIncremental.md) * [Linking Across Datasets](setup/link.md) + * [Lookup Data](lookup.md) * [Explanation of Models](modelexplain.md) * [Approval of Clusters](approval.md) * [Combining Different Match Models](relations.md) diff --git a/docs/approval.md b/docs/approval.md index 0307d89a8..f2262d17c 100644 --- a/docs/approval.md +++ b/docs/approval.md @@ -6,8 +6,6 @@ nav_order: 13 # Approval of Clusters -## - [Zingg Enterprise Feature](#user-content-fn-1)[^1] ### The approval phase is run as follows: diff --git a/docs/runApproval.md b/docs/lookup.md similarity index 100% rename from docs/runApproval.md rename to docs/lookup.md diff --git a/docs/pythonEC/markdown/zinggEC.md b/docs/pythonEC/markdown/zinggEC.md index f5152228d..b56793daf 100644 --- a/docs/pythonEC/markdown/zinggEC.md +++ b/docs/pythonEC/markdown/zinggEC.md @@ -6,412 +6,242 @@ Zingg Enterprise Python APIs for entity resolution, record linkage, data masteri requires python 3.6+; spark 3.5.0 Otherwise, [`zinggES.enterprise.spark.ESparkClient.EZingg()`](#zinggES.enterprise.spark.ESparkClient.EZingg) cannot be executed + + + + ## zinggEC.enterprise.common.ApproverArguments This module is to set up the approval feature -_class -_zinggEC.enterprise.common.ApproverArguments.ApproverArguments[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments) - - +### *class* zinggEC.enterprise.common.ApproverArguments.ApproverArguments Bases: `object` -getApprovalQuery()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getApprovalQuery) - - +#### getApprovalQuery() Method to get query for approval -Returns: - - - -query in string format for approval condition - -Return type: - - +* **Returns:** + query in string format for approval condition +* **Return type:** + String -String - -getArgs()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getArgs) - - +#### getArgs() Method to get ApproverArguments -Returns: - - - -ApproverArguments parameter value +* **Returns:** + ApproverArguments parameter value +* **Return type:** + ApproverArguments -Return type: - - - -ApproverArguments - -getDestination()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getDestination) - - +#### getDestination() Method to get the destination of output -Returns: - - - -Array of data contained in EPipes - -Return type: +* **Returns:** + Array of data contained in EPipes +* **Return type:** + Array[EPipe] - - -Array[EPipe] - -getParentArgs()[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.getParentArgs) - - +#### getParentArgs() Method to get EArguments -Returns: - - - -EArguments parameter value - -Return type: - - - -EArguments +* **Returns:** + EArguments parameter value +* **Return type:** + EArguments -setApprovalQuery(_approval_query_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setApprovalQuery) - - +#### setApprovalQuery(approval_query) Method to set query for approval -Parameters: - - - -**approval_query** (_String_) – setting a query in string format for approval -condition +* **Parameters:** + **approval_query** (*String*) – setting a query in string format for approval condition -setArgs(_arguments_obj_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setArgs) - - +#### setArgs(arguments_obj) Method to set ApproverArguments -Parameters: - - - -**argumentsObj** (_ApproverArguments_) – ApproverArguments object - -setDestination(_* -pipes_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setDestination) +* **Parameters:** + **argumentsObj** (*ApproverArguments*) – ApproverArguments object +#### setDestination(*pipes) - Method to set the multiple pipes for output destination -Parameters: - - - -**pipes** (_EPipes_) – EPipes object - -setParentArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/ApproverArguments.html#ApproverArguments.setParentArgs) +* **Parameters:** + **pipes** (*EPipes*) – EPipes object - +#### setParentArgs(argumentsObj) Method to set EArguments -Parameters: - - +* **Parameters:** + **argumentsObj** (*EArguments*) – EArguments object -**argumentsObj** (_EArguments_) – EArguments object + -## zinggEC.enterprise.common.IncrementalArguments +## zinggEC.enterprise.common.IncrementalArguments This module is to set up the incremental feature -_class -_zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments) - - +### *class* zinggEC.enterprise.common.IncrementalArguments.IncrementalArguments Bases: `object` -getArgs()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getArgs) - - +#### getArgs() Method to get IncrementalArguments -Returns: - - - -IncrementalArguments parameter value - -Return type: +* **Returns:** + IncrementalArguments parameter value +* **Return type:** + IncrementalArguments - - -IncrementalArguments - -getDeleteAction()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getDeleteAction) - - +#### getDeleteAction() Method to get Delete Action -Returns: - - - -DeleteAction parameter value - -Return type: - - - -DeleteAction - -getDeletedData()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getDeletedData) +* **Returns:** + DeleteAction parameter value +* **Return type:** + DeleteAction - +#### getDeletedData() Method to get Deleted Data -Returns: - - - -data that needs to be deleted from incremental run +* **Returns:** + data that needs to be deleted from incremental run +* **Return type:** + Array[EPipe] -Return type: - - - -Array[EPipe] - -getIncrementalData()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getIncrementalData) - - +#### getIncrementalData() Method to get Incremental Data -Returns: - - - -data that needs to be passed for incremental run - -Return type: +* **Returns:** + data that needs to be passed for incremental run +* **Return type:** + Array[EPipe] - - -Array[EPipe] - -getParentArgs()[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.getParentArgs) - - +#### getParentArgs() Method to get EArguments -Returns: - - - -EArguments parameter value - -Return type: - - - -EArguments +* **Returns:** + EArguments parameter value +* **Return type:** + EArguments -setArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setArgs) - - +#### setArgs(argumentsObj) Method to set IncrementalArguments -Parameters: - - - -**argumentsObj** (_IncrementalArguments_) – IncrementalArguments object +* **Parameters:** + **argumentsObj** (*IncrementalArguments*) – IncrementalArguments object -setDeleteAction(_deleteAction_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setDeleteAction) - - +#### setDeleteAction(deleteAction) Method to set Delete Action -Parameters: - - - -**deleteAction** (_DeleteAction_) – DeleteAction object +* **Parameters:** + **deleteAction** (*DeleteAction*) – DeleteAction object -setDeletedData(_* -pipes_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setDeletedData) - - +#### setDeletedData(*pipes) Method to set Deleted Data -Parameters: - - - -**pipes** (_EPipes_) – EPipes object - -setIncrementalData(_* -pipes_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setIncrementalData) +* **Parameters:** + **pipes** (*EPipes*) – EPipes object - +#### setIncrementalData(*pipes) Method to set Incremental Data -Parameters: - - - -**pipes** (_EPipes_) – EPipes object - -setParentArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/IncrementalArguments.html#IncrementalArguments.setParentArgs) +* **Parameters:** + **pipes** (*EPipes*) – EPipes object - +#### setParentArgs(argumentsObj) Method to set EArguments -Parameters: +* **Parameters:** + **argumentsObj** (*EArguments*) – EArguments object - - -**argumentsObj** (_EArguments_) – EArguments object + -## zinggEC.enterprise.common.MappingMatchType +## zinggEC.enterprise.common.MappingMatchType This module is to work with the mapping match type which is used for leveraging domain expertise to push matching accuracy. Also saves time massaging data before matching. -_class _zinggEC.enterprise.common.MappingMatchType.MappingMatchType(_name_ , -_value_)[[source]](_modules/zinggEC/enterprise/common/MappingMatchType.html#MappingMatchType) - - +### *class* zinggEC.enterprise.common.MappingMatchType.MappingMatchType(name ,value) Bases: `object` MappingMatchType class for defining mappings required for matching on a field -Parameters: - - - - * **name** (_String_) – name of the match type - MAPPING - - * **format** (_String_) – name of the json containing mappings +* **Parameters:** + * **name** (*String*) – name of the match type - MAPPING + * **format** (*String*) – name of the json containing mappings -getMappingMatchType()[[source]](_modules/zinggEC/enterprise/common/MappingMatchType.html#MappingMatchType.getMappingMatchType) - - +#### getMappingMatchType() Method to get mapping match type -Returns: - - - -mapping match type containg name and value - -Return type: - - +* **Returns:** + mapping match type containg name and value +* **Return type:** + MappingMatchType -MappingMatchType + -## zinggEC.enterprise.common.epipes +## zinggEC.enterprise.common.epipes This module is submodule of zingg to work with different types of Pipes supported in Enterprise. Classes of this module inherit the EPipe class, and use that class to create many different types of pipes. -_class _zinggEC.enterprise.common.epipes.ECsvPipe(_name_ , _location =None_, -_schema -=None_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe) - - +### *class* zinggEC.enterprise.common.epipes.ECsvPipe(name , location =None, schema =None) Bases: `EPipe` Class CsvPipe: used for working with text files which uses a pipe symbol to separate units of text that belong in different columns. -Parameters: - - - - * **name** (_String_) – name of the pipe. - - * **location** (_String_ _or_ _None_) – (optional) location from where we read data +* **Parameters:** + * **name** (*String*) – name of the pipe. + * **location** (*String_or_None*) – (optional) location from where we read data + * **schema** (*Schema_or_None*) – (optional) json schema for the pipe - * **schema** (_Schema_ _or_ _None_) – (optional) json schema for the pipe - -setDelimiter(_delimiter_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe.setDelimiter) - - +#### setDelimiter(*delimiter*) This method is used to define delimiter of CsvPipe -Parameters: - - - -**delimiter** (_String_) – a sequence of one or more characters for specifying +* **Parameters:** + **delimiter** (*String*) – a sequence of one or more characters for specifying the boundary between separate, independent regions in data streams -setHeader(_header_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe.setHeader) - - +#### setHeader(*header*) Method to set header property of pipe -Parameters: - - - -**header** (_String_ _(__'true' / 'false'__)_) – true if pipe have header, -false otherwise +* **Parameters:** + **header** (*String('true' / 'false')*) – true if pipe have header, false otherwise -setLocation(_location_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#ECsvPipe.setLocation) - - +#### setLocation(*location*) Method to set location of pipe -Parameters: - - - -**location** (_String_) – location from where we read data +* **Parameters:** + **location** (*String*) – location from where we read data -_class _zinggEC.enterprise.common.epipes.EPipe(_name_ , -_format_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe) - - +### *class* zinggEC.enterprise.common.epipes.EPipe(name ,format) Bases: `Pipe` @@ -419,447 +249,254 @@ EPipe class for working with different data-pipelines. Actual pipe def in the args. One pipe can be used at multiple places with different tables, locations, queries, etc -Parameters: - - - - * **name** (_String_) – name of the pipe - - * **format** (_Format_) – format of pipe e.g. inMemory, delta, etc. +* **Parameters:** + * **name** (*String*) – name of the pipe + * **format** (*Format*) – format of pipe e.g. inMemory, delta, etc. -getPassthroughExpr()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.getPassthroughExpr) - - +#### getPassthroughExpr() Method to get pass through condition -Returns: - - - -pass through conditions in string format +* **Returns:** + pass through conditions in string format +* **Return type:** + String -Return type: - - - -String - -getPassthruData()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.getPassthruData) - - +#### getPassthruData() Method to get records which satisfy pass through condition -return: pandas or spark dataframe containing records which satisfy pass -through condition :rtype: DataFrame +* **Returns:** + pandas or spark dataframe containing records which satisfy pass through condition +* **Return type:** + DataFrame -getUsableData()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.getUsableData) - - +#### getUsableData() Method to get records which do not satisfy pass through condition -return: pandas or spark dataframe containing records which do not satisfy pass -through condition :rtype: DataFrame +* **Returns:** + pandas or spark dataframe containing records which do not satisfy pass through condition +* **Return type:** + DataFrame -hasPassThru()[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.hasPassThru) - - +#### hasPassThru() Method to check if there is a pass through condition -Returns: - - - -whether pass through condition is present or not - -Return type: - - - -boolean +* **Returns:** + whether pass through condition is present or not +* **Return type:** + boolean -setPassthroughExpr(_passthroughExpr_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#EPipe.setPassthroughExpr) - - +#### setPassthroughExpr(*passthroughExpr*) Method to set pass through condition -Parameters: - - - -**passthroughExpr** (_String_) – String condition for records to not be -considered +* **Parameters:** + **passthroughExpr** (*String*) – String condition for records to not be considered -_class _zinggEC.enterprise.common.epipes.InMemoryPipe(_name_ , _df -=None_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#InMemoryPipe) - - +### *class* zinggEC.enterprise.common.epipes.InMemoryPipe(name , df=None) Bases: `EPipe` -Pipe Class for working with InMemory pipeline :param name: name of the pipe -:type name: String :param df: provide dataset for this pipe (optional) :type -df: Dataset or None - -getDataset()[[source]](_modules/zinggEC/enterprise/common/epipes.html#InMemoryPipe.getDataset) +Pipe Class for working with InMemory pipeline - - -Method to get Dataset from pipe -Returns: +* **Parameters:** + * **name** (*String*) – name of the pipe + * **df** (*Dataset or None*) – provide dataset for this pipe(optional) - - -dataset of the pipe in the format of spark dataset - -Return type: - - +#### getDataset() -Dataset +Method to get Dataset from pipe -setDataset(_df_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#InMemoryPipe.setDataset) +* **Returns:** + dataset of the pipe in the format of spark dataset +* **Return type:** + Dataset - +#### setDataset(*df*) Method to set DataFrame of the pipe -Parameters: - - - -**df** (_DataFrame_) – pandas or spark dataframe for the pipe +* **Parameters:** + **df** (*DataFrame*) – pandas or spark dataframe for the pipe -_class _zinggEC.enterprise.common.epipes.UCPipe(_name_ , -_table_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#UCPipe) - - +### *class* zinggEC.enterprise.common.epipes.UCPipe(name ,table) Bases: `EPipe` Pipe Class for working with Delta tables in Unity Catalog of Databricks -Parameters: - - - - * **name** (_String_) – name of the pipe - - * **table** (_String_) – table from where we read data in the Catalog Volumes +* **Parameters:** + * **name** (*String*) – name of the pipe + * **table** (*String*) – table from where we read data in the Catalog Volumes -setTable(_table_)[[source]](_modules/zinggEC/enterprise/common/epipes.html#UCPipe.setTable) - - +#### setTable(table) Method to set table in pipe -Parameters: - - +* **Parameters:** + **table** (*String*) – table from where we read data -**table** (_String_) – table from where we read data + -## zinggEC.enterprise.common.EArguments +## zinggEC.enterprise.common.EArguments This module is to work with different types of features supported in Zingg Enterprise. -_class _zinggEC.enterprise.common.EArguments.DeterministicMatching(_* -matchCond_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#DeterministicMatching) - - +### *class* zinggEC.enterprise.common.EArguments.DeterministicMatching(*matchCond) Bases: `object` -getDeterministicMatching()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#DeterministicMatching.getDeterministicMatching) - - +#### getDeterministicMatching() Method to get DeterministicMatching criteria -Returns: - - - -DeterministicMatching parameter value +* **Returns:** + DeterministicMatching parameter value +* **Return type:** + DeterministicMatching -Return type: - - - -DeterministicMatching - -_class -_zinggEC.enterprise.common.EArguments.EArguments[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments) - - +### *class* zinggEC.enterprise.common.EArguments.EArguments Bases: `Arguments` -getArgs()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getArgs) - - +#### getArgs() Method to get EArguments -Returns: - - - -EArguments parameter value +* **Returns:** + EArguments parameter value +* **Return type:** + EArguments -Return type: - - - -EArguments - -getData()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getData) - - +#### getData() Method to get data from multiple pipes -Returns: - - - -Array of data contained in EPipes - -Return type: - - - -Array[EPipe] - -getDeterministicMatching()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getDeterministicMatching) +* **Returns:** + Array of data contained in EPipes +* **Return type:** + Array[EPipe] - +#### getDeterministicMatching() Method to get DeterministicMatching criteria -Returns: - - - -DeterministicMatching parameter value - -Return type: - - - -DeterministicMatching +* **Returns:** + DeterministicMatching parameter value +* **Return type:** + DeterministicMatching -getFieldDefinition()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getFieldDefinition) - - +#### getFieldDefinition() Method to get all field definitions to be used for matching -Returns: - - - -all field definitions in list format - -Return type: +* **Returns:** + all field definitions in list format +* **Return type:** + List[EFieldDefinition] - - -List[EFieldDefinition] - -getPassthroughExpr()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getPassthroughExpr) - - +#### getPassthroughExpr() Method to get pass through condition -Returns: +* **Returns:** + pass through conditions in string format +* **Return type:** + String - - -pass through conditions in string format - -Return type: - - - -String - -getPrimaryKey()[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.getPrimaryKey) - - +#### getPrimaryKey() Method to get the fields containing the primary keys -Returns: - - - -all primary keys defined for field definitions in list format - -Return type: - - - -List[EFieldDefinition] +* **Returns:** + all primary keys defined for field definitions in list format +* **Return type:** + List[EFieldDefinition] -setArgs(_argumentsObj_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setArgs) - - +#### setArgs(argumentsObj) Method to set EArguments -Parameters: - - - -**argumentsObj** (_EArguments_) – EArguments object - -setBlockingModel(_blockingModel_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setBlockingModel) +* **Parameters:** + **argumentsObj** (*EArguments*) – EArguments object - +#### setBlockingModel(blockingModel) Method to set the Blocking Model used for creating model -Parameters: - - - -**blockingModel** (_String_) – value as DEFAULT or WIDER - -setData(_* -pipes_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setData) +* **Parameters:** + **blockingModel** (*String*) – value as DEFAULT or WIDER - +#### setData(*pipes) Method to set the multiple pipes for data -Parameters: +* **Parameters:** + **pipes** (*EPipes*) – EPipes object - - -**pipes** (_EPipes_) – EPipes object - -setDeterministicMatchingCondition(_* -detMatchConds_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setDeterministicMatchingCondition) - - +#### setDeterministicMatchingCondition(*detMatchConds) Method to set the DeterministicMatchingCondition used for matching -Parameters: +* **Parameters:** + **detMatchConds** (*DeterministicMatching*) – DeterministicMatching object - - -**detMatchConds** (_DeterministicMatching_) – DeterministicMatching object - -setFieldDefinition(_fieldDef_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setFieldDefinition) - - +#### setFieldDefinition(fieldDef) Method to set the field definitions -Parameters: +* **Parameters:** + **fieldDef** (*EFieldDefinition*) – EFieldDefiniton object - - -**fieldDef** (_EFieldDefinition_) – EFieldDefiniton object - -setPassthroughExpr(_passthroughExpr_)[[source]](_modules/zinggEC/enterprise/common/EArguments.html#EArguments.setPassthroughExpr) - - +#### setPassthroughExpr(passthroughExpr) Method to set pass through condition -Parameters: - - +* **Parameters:** + **passthroughExpr** (*String*) – String condition for records to not be considered -**passthroughExpr** (_String_) – String condition for records to not be -considered + -## zinggEC.enterprise.common.EFieldDefinition +## zinggEC.enterprise.common.EFieldDefinition This module is to work with the extended functionality of field definitions -_class _zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition(_name_ , -_dataType_ , _* -matchType_)[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition) - - +### *class* zinggEC.enterprise.common.EFieldDefinition.EFieldDefinition(name ,dataType , *matchType) Bases: `FieldDefinition` This class defines each field that we use in matching. We can use this to configure the properties of each field we use for matching in Zingg. -Parameters: - - - - * **name** (_String_) – name of the field +* **Parameters:** + * **name** (*String*) – name of the field + * **dataType** (*String*) – type of the data e.g. string, float, etc. + * **matchType** (*MatchType*) – match type of this field e.g. FUSSY, EXACT, etc. including user-defined mapping match types - * **dataType** (_String_) – type of the data e.g. string, float, etc. - - * **matchType** (_MatchType_) – match type of this field e.g. FUSSY, EXACT, etc. including user-defined mapping match types - -getMatchTypeArray(_matchType_)[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition.getMatchTypeArray) - - +#### getMatchTypeArray(matchType) Method to get the match types associated with a field -Parameters: - - - -**matchType** (_List_ _[__IMatchType_ _]_) – list of match types associated -with a field - -getPrimaryKey()[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition.getPrimaryKey) +* **Parameters:** + **matchType** (*List[IMatchType]*) – list of match types associated with a field - +#### getPrimaryKey() Method to check if the field contains the primary key -Returns: - - - -true or false depending on if the field contains the primary key - -Return type: - - - -boolean - -setPrimaryKey(_primaryKey_)[[source]](_modules/zinggEC/enterprise/common/EFieldDefinition.html#EFieldDefinition.setPrimaryKey) - - +* **Returns:** + true or false depending on if the field contains the primary key +* **Return type:** + boolean +#### setPrimaryKey(primaryKey) + Method to set the field containing the primary key -Parameters: - - - -**primaryKey** (_boolean_) – true or false depending on if the field contains -the primary key - -[ Previous](index.html "Zingg Enterpise Entity Resolution Python Package") - -* * * - -(C) Copyright 2025, Zingg.AI. - -Built with [Sphinx](https://www.sphinx-doc.org/) using a -[theme](https://github.com/readthedocs/sphinx_rtd_theme) provided by [Read the -Docs](https://readthedocs.org). - +* **Parameters:** + **primaryKey** (*boolean*) – true or false depending on if the field contains the primary key diff --git a/docs/runIncremental.md b/docs/runIncremental.md index c05e372f2..5526ab912 100644 --- a/docs/runIncremental.md +++ b/docs/runIncremental.md @@ -52,7 +52,7 @@ from zinggEC.enterprise.common.IncrementalArguments import * from zinggEC.enterprise.common.epipes import * from zinggEC.enterprise.common.EArguments import * from zinggEC.enterprise.common.EFieldDefinition import EFieldDefinition -from zinggES.enterprise.spark.ESparkClient import EZingg +from zinggES.enterprise.spark.ESparkClient import * import os #build the arguments for zingg diff --git a/docs/running/databricks.md b/docs/running/databricks.md index cd9efebd7..01329ab12 100644 --- a/docs/running/databricks.md +++ b/docs/running/databricks.md @@ -9,8 +9,10 @@ description: "Step by Step Identity Resolution with Zingg on\_Databricks" You can run the **Zingg Community Version** on Databricks directly using the Databricks notebook interface. -All [file formats and data sources and sinks](../dataSourcesAndSinks/) are supported within Databricks with **Zingg Enterprise** including Unity Catalog, Delta file format and InMemory. - ## For running the Zingg Community version on Databricks: This uses the Zingg Community Python API and an [example notebook](../../examples/databricks/FebrlExample.ipynb) and a [step by step guide](https://www.zingg.ai/documentation-article/identity-resolution-on-databricks-for-customer-360) is available. + +## For running the Zingg Enterprise version on Databricks: + +This uses the Zingg Enterprise Python API and all [file formats and data sources and sinks](../dataSourcesAndSinks/) are supported within Databricks with **Zingg Enterprise** including Unity Catalog, Delta file format and InMemory. diff --git a/docs/stepbystep/installation/installing-from-release/README.md b/docs/stepbystep/installation/installing-from-release/README.md index 860da3c21..bfadc75e2 100644 --- a/docs/stepbystep/installation/installing-from-release/README.md +++ b/docs/stepbystep/installation/installing-from-release/README.md @@ -4,7 +4,7 @@ description: From the pre-built release hosted on GitHub # Installing From Release -Zingg is prebuilt for common Spark versions so that you can use those directly. The following document assumes that we are installing **Zingg 0.3** on **Spark 3.1.2**, but you can follow the same process for other versions too. +Zingg is prebuilt for common Spark versions so that you can use those directly. The following document assumes that we are installing **Zingg 0.5.0** on **Spark 3.5.0**, but you can follow the same process for other versions too. ## Prerequisites