Skip to content

Commit

Permalink
Merge pull request #370 from zhuowang-linkedin/zhuowang/patternMatchB…
Browse files Browse the repository at this point in the history
…ugFix

Fix PatternMatch hashCode bug
  • Loading branch information
twollnik authored Sep 7, 2021
2 parents 32be67a + 400f581 commit 4e68020
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/main/scala/com/amazon/deequ/analyzers/PatternMatch.scala
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,23 @@ case class PatternMatch(column: String, pattern: Regex, where: Option[String] =
override protected def additionalPreconditions(): Seq[StructType => Unit] = {
hasColumn(column) :: isString(column) :: Nil
}

// PatternMatch hasCode is different with the same-parameter objects
// because Regex compares by address
// fix this by tuple with pattern string
private val internalObj = (column, pattern.toString(), where)

override def hashCode(): Int = {
internalObj.hashCode()
}

override def equals(obj: Any): Boolean = {
obj match {
case o: PatternMatch => internalObj.equals(o.asInstanceOf[PatternMatch].internalObj)
case _ => false
}
}

}

object Patterns {
Expand Down
7 changes: 7 additions & 0 deletions src/test/scala/com/amazon/deequ/analyzers/AnalyzerTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,13 @@ class AnalyzerTests extends AnyWordSpec with Matchers with SparkContextSpec with
"Pattern compliance analyzer" should {
val someColumnName = "some"

"PatternMatch hashCode should equal for the same pattern" in {
val p1 = PatternMatch("col1", "[a-z]".r)
val p2 = PatternMatch("col1", "[a-z]".r)
p1.hashCode() should equal(p2.hashCode())
p1 should equal(p2)
}

"not match doubles in nullable column" in withSparkSession { sparkSession =>

val df = dataFrameWithColumn(someColumnName, DoubleType, sparkSession, Row(1.1),
Expand Down

0 comments on commit 4e68020

Please sign in to comment.