Skip to content

Commit 4f940f6

Browse files
authored
Merge branch 'rust-lang:master' into patch-1
2 parents dabfeb0 + 837fd85 commit 4f940f6

File tree

107 files changed

+5445
-1656
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+5445
-1656
lines changed

.github/workflows/ci.yml

+34-17
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,14 @@ jobs:
5454
os: ubuntu-latest
5555
rust: stable
5656
target: i686-unknown-linux-gnu
57-
- build: stable-mips
57+
- build: stable-powerpc64
5858
os: ubuntu-latest
5959
rust: stable
60-
target: mips64-unknown-linux-gnuabi64
60+
target: powerpc64-unknown-linux-gnu
61+
- build: stable-s390x
62+
os: ubuntu-latest
63+
rust: stable
64+
target: s390x-unknown-linux-gnu
6165
- build: beta
6266
os: ubuntu-latest
6367
rust: beta
@@ -77,7 +81,7 @@ jobs:
7781
- name: Checkout repository
7882
uses: actions/checkout@v3
7983
- name: Install Rust
80-
uses: dtolnay/rust-toolchain@v1
84+
uses: dtolnay/rust-toolchain@master
8185
with:
8286
toolchain: ${{ matrix.rust }}
8387
- name: Install and configure Cross
@@ -92,12 +96,6 @@ jobs:
9296
cd "$dir"
9397
curl -LO "https://github.com/cross-rs/cross/releases/download/$CROSS_VERSION/cross-x86_64-unknown-linux-musl.tar.gz"
9498
tar xf cross-x86_64-unknown-linux-musl.tar.gz
95-
96-
# We used to install 'cross' from master, but it kept failing. So now
97-
# we build from a known-good version until 'cross' becomes more stable
98-
# or we find an alternative. Notably, between v0.2.1 and current
99-
# master (2022-06-14), the number of Cross's dependencies has doubled.
100-
# cargo install --bins --git https://github.com/rust-embedded/cross --tag v0.2.1
10199
echo "CARGO=cross" >> $GITHUB_ENV
102100
echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV
103101
- name: Show command used for Cargo
@@ -141,9 +139,28 @@ jobs:
141139
- name: Checkout repository
142140
uses: actions/checkout@v3
143141
- name: Install Rust
144-
uses: dtolnay/rust-toolchain@v1
142+
uses: dtolnay/rust-toolchain@master
145143
with:
146-
toolchain: 1.60.0
144+
toolchain: 1.65.0
145+
# The memchr 2.6 release purportedly bumped its MSRV to Rust 1.60, but it
146+
# turned out that on aarch64, it was using something that wasn't stabilized
147+
# until Rust 1.61[1]. (This was an oversight on my part. I had previously
148+
# thought everything I needed was on Rust 1.60.) To resolve that, I just
149+
# bumped memchr's MSRV to 1.61. Since it was so soon after the memchr 2.6
150+
# release, I treated this as a bugfix.
151+
#
152+
# But the regex crate's MSRV is at Rust 1.60, and it now depends on at
153+
# least memchr 2.6 (to make use of its `alloc` feature). So we can't set
154+
# a lower minimal version. And I can't just bump the MSRV in a patch
155+
# release as a bug fix because regex 1.9 was released quite some time ago.
156+
# I could just release regex 1.10 and bump the MSRV there, but eh, I don't
157+
# want to put out another minor version release just for this.
158+
#
159+
# So... pin memchr to 2.6.2, which at least works on x86-64 on Rust 1.60.
160+
#
161+
# [1]: https://github.com/BurntSushi/memchr/issues/136
162+
- name: Pin memchr to 2.6.2
163+
run: cargo update -p memchr --precise 2.6.2
147164
- name: Basic build
148165
run: cargo build --verbose
149166
- name: Build docs
@@ -162,7 +179,7 @@ jobs:
162179
- name: Checkout repository
163180
uses: actions/checkout@v3
164181
- name: Install Rust
165-
uses: dtolnay/rust-toolchain@v1
182+
uses: dtolnay/rust-toolchain@master
166183
with:
167184
toolchain: stable
168185
- name: Run full test suite
@@ -175,7 +192,7 @@ jobs:
175192
- name: Checkout repository
176193
uses: actions/checkout@v3
177194
- name: Install Rust
178-
uses: dtolnay/rust-toolchain@v1
195+
uses: dtolnay/rust-toolchain@master
179196
with:
180197
toolchain: stable
181198
- name: Run full test suite
@@ -188,7 +205,7 @@ jobs:
188205
- name: Checkout repository
189206
uses: actions/checkout@v3
190207
- name: Install Rust
191-
uses: dtolnay/rust-toolchain@v1
208+
uses: dtolnay/rust-toolchain@master
192209
with:
193210
toolchain: stable
194211
- name: Run full test suite
@@ -201,7 +218,7 @@ jobs:
201218
- name: Checkout repository
202219
uses: actions/checkout@v3
203220
- name: Install Rust
204-
uses: dtolnay/rust-toolchain@v1
221+
uses: dtolnay/rust-toolchain@master
205222
with:
206223
toolchain: stable
207224
- name: Run full test suite
@@ -216,7 +233,7 @@ jobs:
216233
- name: Checkout repository
217234
uses: actions/checkout@v3
218235
- name: Install Rust
219-
uses: dtolnay/rust-toolchain@v1
236+
uses: dtolnay/rust-toolchain@master
220237
with:
221238
# We use nightly here so that we can use miri I guess?
222239
# It caught me by surprise that miri seems to only be
@@ -233,7 +250,7 @@ jobs:
233250
- name: Checkout repository
234251
uses: actions/checkout@v3
235252
- name: Install Rust
236-
uses: dtolnay/rust-toolchain@v1
253+
uses: dtolnay/rust-toolchain@master
237254
with:
238255
toolchain: stable
239256
components: rustfmt

CHANGELOG.md

+195
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,198 @@
1+
1.10.2 (2023-10-16)
2+
===================
3+
This is a new patch release that fixes a search regression where incorrect
4+
matches could be reported.
5+
6+
Bug fixes:
7+
8+
* [BUG #1110](https://github.com/rust-lang/regex/issues/1110):
9+
Revert broadening of reverse suffix literal optimization introduced in 1.10.1.
10+
11+
12+
1.10.1 (2023-10-14)
13+
===================
14+
This is a new patch release with a minor increase in the number of valid
15+
patterns and a broadening of some literal optimizations.
16+
17+
New features:
18+
19+
* [FEATURE 04f5d7be](https://github.com/rust-lang/regex/commit/04f5d7be4efc542864cc400f5d43fbea4eb9bab6):
20+
Loosen ASCII-compatible rules such that regexes like `(?-u:☃)` are now allowed.
21+
22+
Performance improvements:
23+
24+
* [PERF 8a8d599f](https://github.com/rust-lang/regex/commit/8a8d599f9d2f2d78e9ad84e4084788c2d563afa5):
25+
Broader the reverse suffix optimization to apply in more cases.
26+
27+
28+
1.10.0 (2023-10-09)
29+
===================
30+
This is a new minor release of `regex` that adds support for start and end
31+
word boundary assertions. That is, `\<` and `\>`. The minimum supported Rust
32+
version has also been raised to 1.65, which was released about one year ago.
33+
34+
The new word boundary assertions are:
35+
36+
* `\<` or `\b{start}`: a Unicode start-of-word boundary (`\W|\A` on the left,
37+
`\w` on the right).
38+
* `\>` or `\b{end}`: a Unicode end-of-word boundary (`\w` on the left, `\W|\z`
39+
on the right)).
40+
* `\b{start-half}`: half of a Unicode start-of-word boundary (`\W|\A` on the
41+
left).
42+
* `\b{end-half}`: half of a Unicode end-of-word boundary (`\W|\z` on the
43+
right).
44+
45+
The `\<` and `\>` are GNU extensions to POSIX regexes. They have been added
46+
to the `regex` crate because they enjoy somewhat broad support in other regex
47+
engines as well (for example, vim). The `\b{start}` and `\b{end}` assertions
48+
are aliases for `\<` and `\>`, respectively.
49+
50+
The `\b{start-half}` and `\b{end-half}` assertions are not found in any
51+
other regex engine (although regex engines with general look-around support
52+
can certainly express them). They were added principally to support the
53+
implementation of word matching in grep programs, where one generally wants to
54+
be a bit more flexible in what is considered a word boundary.
55+
56+
New features:
57+
58+
* [FEATURE #469](https://github.com/rust-lang/regex/issues/469):
59+
Add support for `\<` and `\>` word boundary assertions.
60+
* [FEATURE(regex-automata) #1031](https://github.com/rust-lang/regex/pull/1031):
61+
DFAs now have a `start_state` method that doesn't use an `Input`.
62+
63+
Performance improvements:
64+
65+
* [PERF #1051](https://github.com/rust-lang/regex/pull/1051):
66+
Unicode character class operations have been optimized in `regex-syntax`.
67+
* [PERF #1090](https://github.com/rust-lang/regex/issues/1090):
68+
Make patterns containing lots of literal characters use less memory.
69+
70+
Bug fixes:
71+
72+
* [BUG #1046](https://github.com/rust-lang/regex/issues/1046):
73+
Fix a bug that could result in incorrect match spans when using a Unicode word
74+
boundary and searching non-ASCII strings.
75+
* [BUG(regex-syntax) #1047](https://github.com/rust-lang/regex/issues/1047):
76+
Fix panics that can occur in `Ast->Hir` translation (not reachable from `regex`
77+
crate).
78+
* [BUG(regex-syntax) #1088](https://github.com/rust-lang/regex/issues/1088):
79+
Remove guarantees in the API that connect the `u` flag with a specific HIR
80+
representation.
81+
82+
`regex-automata` breaking change release:
83+
84+
This release includes a `regex-automata 0.4.0` breaking change release, which
85+
was necessary in order to support the new word boundary assertions. For
86+
example, the `Look` enum has new variants and the `LookSet` type now uses `u32`
87+
instead of `u16` to represent a bitset of look-around assertions. These are
88+
overall very minor changes, and most users of `regex-automata` should be able
89+
to move to `0.4` from `0.3` without any changes at all.
90+
91+
`regex-syntax` breaking change release:
92+
93+
This release also includes a `regex-syntax 0.8.0` breaking change release,
94+
which, like `regex-automata`, was necessary in order to support the new word
95+
boundary assertions. This release also includes some changes to the `Ast`
96+
type to reduce heap usage in some cases. If you are using the `Ast` type
97+
directly, your code may require some minor modifications. Otherwise, users of
98+
`regex-syntax 0.7` should be able to migrate to `0.8` without any code changes.
99+
100+
`regex-lite` release:
101+
102+
The `regex-lite 0.1.1` release contains support for the new word boundary
103+
assertions. There are no breaking changes.
104+
105+
106+
1.9.6 (2023-09-30)
107+
==================
108+
This is a patch release that fixes a panic that can occur when the default
109+
regex size limit is increased to a large number.
110+
111+
* [BUG aa4e4c71](https://github.com/rust-lang/regex/commit/aa4e4c7120b0090ce0624e3c42a2ed06dd8b918a):
112+
Fix a bug where computing the maximum haystack length for the bounded
113+
backtracker could result underflow and thus provoke a panic later in a search
114+
due to a broken invariant.
115+
116+
117+
1.9.5 (2023-09-02)
118+
==================
119+
This is a patch release that hopefully mostly fixes a performance bug that
120+
occurs when sharing a regex across multiple threads.
121+
122+
Issue [#934](https://github.com/rust-lang/regex/issues/934)
123+
explains this in more detail. It is [also noted in the crate
124+
documentation](https://docs.rs/regex/latest/regex/#sharing-a-regex-across-threads-can-result-in-contention).
125+
The bug can appear when sharing a regex across multiple threads simultaneously,
126+
as might be the case when using a regex from a `OnceLock`, `lazy_static` or
127+
similar primitive. Usually high contention only results when using many threads
128+
to execute searches on small haystacks.
129+
130+
One can avoid the contention problem entirely through one of two methods.
131+
The first is to use lower level APIs from `regex-automata` that require passing
132+
state explicitly, such as [`meta::Regex::search_with`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html#method.search_with).
133+
The second is to clone a regex and send it to other threads explicitly. This
134+
will not use any additional memory usage compared to sharing the regex. The
135+
only downside of this approach is that it may be less convenient, for example,
136+
it won't work with things like `OnceLock` or `lazy_static` or `once_cell`.
137+
138+
With that said, as of this release, the contention performance problems have
139+
been greatly reduced. This was achieved by changing the free-list so that it
140+
was sharded across threads, and that ensuring each sharded mutex occupies a
141+
single cache line to mitigate false sharing. So while contention may still
142+
impact performance in some cases, it should be a lot better now.
143+
144+
Because of the changes to how the free-list works, please report any issues you
145+
find with this release. That not only includes search time regressions but also
146+
significant regressions in memory usage. Reporting improvements is also welcome
147+
as well! If possible, provide a reproduction.
148+
149+
Bug fixes:
150+
151+
* [BUG #934](https://github.com/rust-lang/regex/issues/934):
152+
Fix a performance bug where high contention on a single regex led to massive
153+
slow downs.
154+
155+
156+
1.9.4 (2023-08-26)
157+
==================
158+
This is a patch release that fixes a bug where `RegexSet::is_match(..)` could
159+
incorrectly return false (even when `RegexSet::matches(..).matched_any()`
160+
returns true).
161+
162+
Bug fixes:
163+
164+
* [BUG #1070](https://github.com/rust-lang/regex/issues/1070):
165+
Fix a bug where a prefilter was incorrectly configured for a `RegexSet`.
166+
167+
168+
1.9.3 (2023-08-05)
169+
==================
170+
This is a patch release that fixes a bug where some searches could result in
171+
incorrect match offsets being reported. It is difficult to characterize the
172+
types of regexes susceptible to this bug. They generally involve patterns
173+
that contain no prefix or suffix literals, but have an inner literal along with
174+
a regex prefix that can conditionally match.
175+
176+
Bug fixes:
177+
178+
* [BUG #1060](https://github.com/rust-lang/regex/issues/1060):
179+
Fix a bug with the reverse inner literal optimization reporting incorrect match
180+
offsets.
181+
182+
183+
1.9.2 (2023-08-05)
184+
==================
185+
This is a patch release that fixes another memory usage regression. This
186+
particular regression occurred only when using a `RegexSet`. In some cases,
187+
much more heap memory (by one or two orders of magnitude) was allocated than in
188+
versions prior to 1.9.0.
189+
190+
Bug fixes:
191+
192+
* [BUG #1059](https://github.com/rust-lang/regex/issues/1059):
193+
Fix a memory usage regression when using a `RegexSet`.
194+
195+
1196
1.9.1 (2023-07-07)
2197
==================
3198
This is a patch release which fixes a memory usage regression. In the regex

Cargo.toml

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "regex"
3-
version = "1.9.1" #:version
3+
version = "1.10.2" #:version
44
authors = ["The Rust Project Developers", "Andrew Gallant <[email protected]>"]
55
license = "MIT OR Apache-2.0"
66
readme = "README.md"
@@ -15,7 +15,7 @@ categories = ["text-processing"]
1515
autotests = false
1616
exclude = ["/scripts/*", "/.github/*"]
1717
edition = "2021"
18-
rust-version = "1.60.0"
18+
rust-version = "1.65"
1919

2020
[workspace]
2121
members = [
@@ -52,6 +52,7 @@ std = [
5252
# to actually emit the log messages somewhere.
5353
logging = [
5454
"aho-corasick?/logging",
55+
"memchr?/logging",
5556
"regex-automata/logging",
5657
]
5758
# The 'use_std' feature is DEPRECATED. It will be removed in regex 2. Until
@@ -167,20 +168,20 @@ optional = true
167168

168169
# For skipping along search text quickly when a leading byte is known.
169170
[dependencies.memchr]
170-
version = "2.5.0"
171+
version = "2.6.0"
171172
optional = true
172173

173174
# For the actual regex engines.
174175
[dependencies.regex-automata]
175176
path = "regex-automata"
176-
version = "0.3.1"
177+
version = "0.4.3"
177178
default-features = false
178179
features = ["alloc", "syntax", "meta", "nfa-pikevm"]
179180

180181
# For parsing regular expressions.
181182
[dependencies.regex-syntax]
182183
path = "regex-syntax"
183-
version = "0.7.3"
184+
version = "0.8.2"
184185
default-features = false
185186

186187
[dev-dependencies]

0 commit comments

Comments
 (0)