From 2aa6179cdd881c374c56d236fb2ae7779308d84c Mon Sep 17 00:00:00 2001 From: wecharyu Date: Sat, 14 Sep 2024 17:36:14 +0000 Subject: [PATCH] use negative lookbehind pattern --- .../ql/ddl/database/show/ShowDatabasesOperation.java | 2 +- .../show/ShowDataConnectorsOperation.java | 2 +- .../info/show/status/ShowTableStatusOperation.java | 2 +- .../show/ShowMaterializedViewsOperation.java | 2 +- .../apache/hadoop/hive/ql/exec/FunctionRegistry.java | 2 +- .../expressions/FilterStringColLikeStringScalar.java | 2 +- .../hive/ql/metadata/SessionHiveMetaStoreClient.java | 4 ++-- .../java/org/apache/hadoop/hive/ql/udf/UDFLike.java | 12 ++++++++---- .../hadoop/hive/metastore/HiveMetaStoreClient.java | 2 +- 9 files changed, 17 insertions(+), 13 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/show/ShowDatabasesOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/show/ShowDatabasesOperation.java index 6da1a841126b..cc4a4b84f117 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/show/ShowDatabasesOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/show/ShowDatabasesOperation.java @@ -45,7 +45,7 @@ public int execute() throws HiveException { List databases = context.getDb().getAllDatabases(); if (desc.getPattern() != null) { LOG.debug("pattern: {}", desc.getPattern()); - Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern(), true), Pattern.CASE_INSENSITIVE); + Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE); databases = databases.stream().filter(name -> pattern.matcher(name).matches()).collect(Collectors.toList()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/dataconnector/show/ShowDataConnectorsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/dataconnector/show/ShowDataConnectorsOperation.java index fb980204bb9a..b0190363781d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/dataconnector/show/ShowDataConnectorsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/dataconnector/show/ShowDataConnectorsOperation.java @@ -45,7 +45,7 @@ public int execute() throws HiveException { List connectors = context.getDb().getAllDataConnectorNames(); if (desc.getPattern() != null) { LOG.debug("pattern: {}", desc.getPattern()); - Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern(), true), Pattern.CASE_INSENSITIVE); + Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE); connectors = connectors.stream().filter(name -> pattern.matcher(name).matches()).collect(Collectors.toList()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/ShowTableStatusOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/ShowTableStatusOperation.java index 757532fbcd0f..8144d540484e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/ShowTableStatusOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/ShowTableStatusOperation.java @@ -63,7 +63,7 @@ public int execute() throws HiveException { LOG.debug("pattern: {}", desc.getPattern()); List tableNames = context.getDb().getTablesForDb(desc.getDbName(), null); if (desc.getPattern() != null) { - Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern(), true), Pattern.CASE_INSENSITIVE); + Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE); tableNames = tableNames.stream() .filter(name -> pattern.matcher(name).matches()) .collect(Collectors.toList()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsOperation.java index b3c1bad300ec..ce918c849e65 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsOperation.java @@ -53,7 +53,7 @@ public int execute() throws HiveException { List viewObjects = new ArrayList<>( context.getDb().getMaterializedViewObjectsByPattern(desc.getDbName(), null)); if (desc.getPattern() != null) { - Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern(), true), Pattern.CASE_INSENSITIVE); + Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE); viewObjects = viewObjects.stream() .filter(object -> pattern.matcher(object.getTableName()).matches()) .collect(Collectors.toList()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 01d391d96062..c54a59f95169 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -858,7 +858,7 @@ public static Set getFunctionNamesByLikePattern(String funcPatternStr) { Set allFuncs = getFunctionNames(); String[] subpatterns = funcPatternStr.trim().split("\\|"); for (String subpattern : subpatterns) { - subpattern = "(?i)" + UDFLike.likePatternToRegExp(subpattern, true); + subpattern = "(?i)" + UDFLike.likePatternToRegExp(subpattern); try { Pattern patternObj = Pattern.compile(subpattern); for (String funcName : allFuncs) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java index 7bfb24f1f1c9..88f12a2a9fab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java @@ -82,7 +82,7 @@ String format(String pattern) { COMPLEX(ComplexChecker.class) { @Override String format(String pattern) { - return "^" + UDFLike.likePatternToRegExp(pattern, true) + "$"; + return "^" + UDFLike.likePatternToRegExp(pattern) + "$"; } }, // Accepts chained LIKE patterns without escaping like "abc%def%ghi%" and diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 928a23ed2602..af7aca6a3735 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -321,7 +321,7 @@ public List getTables(String dbName, String tablePattern) throws MetaExc if (tables == null || tables.size() == 0) { return tableNames; } - tablePattern = tablePattern.replaceAll("\\.\\*", "\\*").replaceAll("\\*", ".*"); + tablePattern = tablePattern.replaceAll("(? combinedTableNames = new HashSet(); @@ -351,7 +351,7 @@ public List getTables(String dbname, String tablePattern, TableType tabl if (tables == null || tables.size() == 0) { return tableNames; } - tablePattern = tablePattern.replaceAll("\\.\\*", "\\*").replaceAll("\\*", ".*"); + tablePattern = tablePattern.replaceAll("(? combinedTableNames = new HashSet(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java index 600e03fbb2d4..2e9bcf9ce73c 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java @@ -61,7 +61,11 @@ private enum PatternType { public UDFLike() { } - public static String likePatternToRegExp(String likePattern, boolean quote) { + public static String likePatternToRegExp(String likePattern) { + return likePatternToRegExp(likePattern, true); + } + + public static String likePatternToRegExp(String likePattern, boolean literalize) { if (likePattern == null) { return null; } @@ -80,9 +84,9 @@ public static String likePatternToRegExp(String likePattern, boolean quote) { if (n == '_') { sb.append("."); } else if (n == '%') { - sb.append(".*"); + sb.append(".*?"); } else { - sb.append(quote ? Pattern.quote(Character.toString(n)) : n); + sb.append(literalize ? Pattern.quote(Character.toString(n)) : n); } } return sb.toString(); @@ -187,7 +191,7 @@ public BooleanWritable evaluate(Text s, Text likePattern) { parseSimplePattern(strLikePattern); if (type == PatternType.COMPLEX) { - p = Pattern.compile(likePatternToRegExp(strLikePattern, true), Pattern.DOTALL); + p = Pattern.compile(likePatternToRegExp(strLikePattern), Pattern.DOTALL); } } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index b50b3c4a8cb6..f556ec78704e 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -2929,7 +2929,7 @@ public List getTables(String catName, String dbName, String tablePattern // We need unify the pattern definition, see HIVE-28297 for details. String[] patterns = tablePattern.split("\\|"); for (String pattern : patterns) { - pattern = "(?i)" + pattern.replaceAll("\\.\\*", "\\*").replaceAll("\\*", ".*"); + pattern = "(?i)" + pattern.replaceAll("(?