|
39 | 39 | )]
|
40 | 40 |
|
41 | 41 | use crate::cmp;
|
| 42 | +use crate::cmp::Ordering; |
42 | 43 | use crate::fmt;
|
43 | 44 | use crate::slice::memchr;
|
44 | 45 |
|
@@ -946,6 +947,27 @@ impl<'a, 'b> Pattern<'a> for &'b str {
|
946 | 947 | haystack.as_bytes().starts_with(self.as_bytes())
|
947 | 948 | }
|
948 | 949 |
|
| 950 | + /// Checks whether the pattern matches anywhere in the haystack |
| 951 | + #[inline] |
| 952 | + fn is_contained_in(self, haystack: &'a str) -> bool { |
| 953 | + if self.len() == 0 { |
| 954 | + return true; |
| 955 | + } |
| 956 | + |
| 957 | + match self.len().cmp(&haystack.len()) { |
| 958 | + Ordering::Less => { |
| 959 | + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| 960 | + if self.as_bytes().len() <= 8 { |
| 961 | + return simd_contains(self, haystack); |
| 962 | + } |
| 963 | + |
| 964 | + self.into_searcher(haystack).next_match().is_some() |
| 965 | + } |
| 966 | + Ordering::Equal => self == haystack, |
| 967 | + Ordering::Greater => false, |
| 968 | + } |
| 969 | + } |
| 970 | + |
949 | 971 | /// Removes the pattern from the front of haystack, if it matches.
|
950 | 972 | #[inline]
|
951 | 973 | fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
|
@@ -1684,3 +1706,83 @@ impl TwoWayStrategy for RejectAndMatch {
|
1684 | 1706 | SearchStep::Match(a, b)
|
1685 | 1707 | }
|
1686 | 1708 | }
|
| 1709 | + |
| 1710 | +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| 1711 | +#[inline] |
| 1712 | +fn simd_contains(needle: &str, haystack: &str) -> bool { |
| 1713 | + let needle = needle.as_bytes(); |
| 1714 | + let haystack = haystack.as_bytes(); |
| 1715 | + |
| 1716 | + if needle.len() == 1 { |
| 1717 | + return haystack.contains(&needle[0]); |
| 1718 | + } |
| 1719 | + |
| 1720 | + const CHUNK: usize = 16; |
| 1721 | + |
| 1722 | + // do a naive search if if the haystack is too small to fit |
| 1723 | + if haystack.len() < CHUNK + needle.len() - 1 { |
| 1724 | + return haystack.windows(needle.len()).any(|c| c == needle); |
| 1725 | + } |
| 1726 | + |
| 1727 | + use crate::arch::x86_64::{ |
| 1728 | + __m128i, _mm_and_si128, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8, |
| 1729 | + }; |
| 1730 | + |
| 1731 | + // SAFETY: no preconditions other than sse2 being available |
| 1732 | + let first: __m128i = unsafe { _mm_set1_epi8(needle[0] as i8) }; |
| 1733 | + // SAFETY: no preconditions other than sse2 being available |
| 1734 | + let last: __m128i = unsafe { _mm_set1_epi8(*needle.last().unwrap() as i8) }; |
| 1735 | + |
| 1736 | + let check_mask = #[cold] |
| 1737 | + |idx, mut mask: u32| -> bool { |
| 1738 | + while mask != 0 { |
| 1739 | + let trailing = mask.trailing_zeros(); |
| 1740 | + let offset = idx + trailing as usize + 1; |
| 1741 | + let sub = &haystack[offset..][..needle.len() - 2]; |
| 1742 | + let trimmed_needle = &needle[1..needle.len() - 1]; |
| 1743 | + |
| 1744 | + if sub == trimmed_needle { |
| 1745 | + return true; |
| 1746 | + } |
| 1747 | + mask &= !(1 << trailing); |
| 1748 | + } |
| 1749 | + return false; |
| 1750 | + }; |
| 1751 | + |
| 1752 | + let test_chunk = |i| -> bool { |
| 1753 | + // SAFETY: this requires at least CHUNK bytes being readable at offset i |
| 1754 | + // that is ensured by the loop ranges (see comments below) |
| 1755 | + let a: __m128i = unsafe { _mm_loadu_si128(haystack.as_ptr().add(i) as *const _) }; |
| 1756 | + let b: __m128i = |
| 1757 | + // SAFETY: this requires CHUNK + needle.len() - 1 bytes being readable at offset i |
| 1758 | + unsafe { _mm_loadu_si128(haystack.as_ptr().add(i + needle.len() - 1) as *const _) }; |
| 1759 | + |
| 1760 | + // SAFETY: no preconditions other than sse2 being available |
| 1761 | + let eq_first: __m128i = unsafe { _mm_cmpeq_epi8(first, a) }; |
| 1762 | + // SAFETY: no preconditions other than sse2 being available |
| 1763 | + let eq_last: __m128i = unsafe { _mm_cmpeq_epi8(last, b) }; |
| 1764 | + |
| 1765 | + // SAFETY: no preconditions other than sse2 being available |
| 1766 | + let mask: u32 = unsafe { _mm_movemask_epi8(_mm_and_si128(eq_first, eq_last)) } as u32; |
| 1767 | + |
| 1768 | + if mask != 0 { |
| 1769 | + return check_mask(i, mask); |
| 1770 | + } |
| 1771 | + return false; |
| 1772 | + }; |
| 1773 | + |
| 1774 | + let mut i = 0; |
| 1775 | + let mut result = false; |
| 1776 | + while !result && i + CHUNK + needle.len() <= haystack.len() { |
| 1777 | + result |= test_chunk(i); |
| 1778 | + i += CHUNK; |
| 1779 | + } |
| 1780 | + |
| 1781 | + // process the tail that didn't fit into CHUNK-sized steps |
| 1782 | + // this simply repeats the same procedure but as right-aligned chunk instead |
| 1783 | + // of a left-aligned one. The last byte must be exactly flush with the string end so |
| 1784 | + // we don't miss a single byte or read out of bounds. |
| 1785 | + result |= test_chunk(haystack.len() + 1 - needle.len() - CHUNK); |
| 1786 | + |
| 1787 | + return result; |
| 1788 | +} |
0 commit comments