Skip to content

Commit 5fbf45d

Browse files
garrickevansCommit Bot
authored and
Commit Bot
committed
anonymizer_tool: Whitelist certain special purpose IPv4 subnets.
Many IP addresses reveal no personal or device identifying information. Logging and debugging can be improved by whitelisting such addresses without loss of privacy. BUG=b:111048642 TEST=components_unittests BUG: 908117 Change-Id: Icdc669a8030bbda64446d81d595702bf4df40b79 Reviewed-on: https://chromium-review.googlesource.com/c/1297857 Commit-Queue: Dominic Battré <[email protected]> Reviewed-by: Thiemo Nagel <[email protected]> Reviewed-by: Nick Harper <[email protected]> Reviewed-by: Dominic Battré <[email protected]> Cr-Commit-Position: refs/heads/master@{#624088}
1 parent 58d72b1 commit 5fbf45d

File tree

2 files changed

+109
-9
lines changed

2 files changed

+109
-9
lines changed

components/feedback/anonymizer_tool.cc

+55-7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "base/strings/string_util.h"
1313
#include "base/strings/stringprintf.h"
1414
#include "content/public/browser/browser_thread.h"
15+
#include "net/base/ip_address.h"
1516
#include "third_party/re2/src/re2/re2.h"
1617

1718
using re2::RE2;
@@ -39,7 +40,7 @@ namespace {
3940
//
4041
// +? is a non-greedy (lazy) +.
4142
// \b matches a word boundary.
42-
// (?i) turns on case insensitivy for the remainder of the regex.
43+
// (?i) turns on case insensitivity for the remainder of the regex.
4344
// (?-s) turns off "dot matches newline" for the remainder of the regex.
4445
// (?:regex) denotes non-capturing parentheses group.
4546
constexpr const char* kCustomPatternsWithContext[] = {
@@ -58,6 +59,49 @@ constexpr const char* kCustomPatternsWithContext[] = {
5859
"(?i-s)(serial\\s*(?:number)?\\s*[:=]\\s*)([0-9a-zA-Z\\-\"]+)()",
5960
};
6061

62+
// Returns the number of leading bytes that may be kept unsanitized.
63+
std::string MaybeScrubIPv4Address(const std::string& addr) {
64+
struct {
65+
net::IPAddress ip_addr;
66+
int prefix_length;
67+
bool scrub;
68+
} static const kWhitelistedIPv4Ranges[] = {
69+
// Private.
70+
{net::IPAddress(10, 0, 0, 0), 8, true},
71+
{net::IPAddress(172, 16, 0, 0), 12, true},
72+
{net::IPAddress(192, 168, 0, 0), 16, true},
73+
// Chrome OS containers and VMs.
74+
{net::IPAddress(100, 115, 92, 0), 24, false},
75+
// Loopback.
76+
{net::IPAddress(127, 0, 0, 0), 8, true},
77+
// Any.
78+
{net::IPAddress(0, 0, 0, 0), 8, true},
79+
// DNS.
80+
{net::IPAddress(8, 8, 8, 8), 32, false},
81+
{net::IPAddress(8, 8, 4, 4), 32, false},
82+
{net::IPAddress(1, 1, 1, 1), 32, false},
83+
// Multicast.
84+
{net::IPAddress(224, 0, 0, 0), 4, true},
85+
// Link local.
86+
{net::IPAddress(169, 254, 0, 0), 16, true},
87+
// Broadcast.
88+
{net::IPAddress(255, 255, 255, 255), 32, false},
89+
};
90+
net::IPAddress input_addr;
91+
if (input_addr.AssignFromIPLiteral(addr) && input_addr.IsIPv4()) {
92+
for (const auto& range : kWhitelistedIPv4Ranges) {
93+
if (IPAddressMatchesPrefix(input_addr, range.ip_addr,
94+
range.prefix_length)) {
95+
return range.scrub ? base::StringPrintf(
96+
"%s/%d", range.ip_addr.ToString().c_str(),
97+
range.prefix_length)
98+
: addr;
99+
}
100+
}
101+
}
102+
return "";
103+
}
104+
61105
// Helper macro: Non capturing group
62106
#define NCG(x) "(?:" x ")"
63107
// Helper macro: Optional non capturing group
@@ -377,12 +421,16 @@ std::string AnonymizerTool::AnonymizeCustomPatternWithoutContext(
377421
std::string matched_id_as_string = matched_id.as_string();
378422
std::string replacement_id = (*identifier_space)[matched_id_as_string];
379423
if (replacement_id.empty()) {
380-
// The weird Uint64toString trick is because Windows does not like to deal
381-
// with %zu and a size_t in printf, nor does it support %llu.
382-
replacement_id = base::StringPrintf(
383-
"<%s: %s>", pattern.alias,
384-
base::NumberToString(identifier_space->size()).c_str());
385-
(*identifier_space)[matched_id_as_string] = replacement_id;
424+
replacement_id = MaybeScrubIPv4Address(matched_id_as_string);
425+
if (replacement_id != matched_id_as_string) {
426+
// The weird Uint64toString trick is because Windows does not like
427+
// to deal with %zu and a size_t in printf, nor does it support %llu.
428+
replacement_id = base::StringPrintf(
429+
"<%s: %s>",
430+
replacement_id.empty() ? pattern.alias : replacement_id.c_str(),
431+
base::NumberToString(identifier_space->size()).c_str());
432+
(*identifier_space)[matched_id_as_string] = replacement_id;
433+
}
386434
}
387435

388436
skipped.AppendToString(&result);

components/feedback/anonymizer_tool_unittest.cc

+54-2
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ TEST_F(AnonymizerToolTest, AnonymizeCustomPatterns) {
133133
AnonymizeCustomPatterns("[2001:db8:0:0:0:ff00:42:8329]"));
134134
EXPECT_EQ("[<IPv6: 3>]", AnonymizeCustomPatterns("[2001:db8::ff00:42:8329]"));
135135
EXPECT_EQ("[<IPv6: 4>]", AnonymizeCustomPatterns("[::1]"));
136-
EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.168.0.1"));
136+
EXPECT_EQ("<IPv4: 1>", AnonymizeCustomPatterns("192.160.0.1"));
137137

138138
EXPECT_EQ("<URL: 1>",
139139
AnonymizeCustomPatterns("http://example.com/foo?test=1"));
@@ -211,6 +211,32 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
211211
"example@@1234\n" // No PII, it is not valid email address.
212212
"255.255.155.2\n" // IP address.
213213
"255.255.155.255\n" // IP address.
214+
"127.0.0.1\n" // IPv4 loopback.
215+
"127.255.0.1\n" // IPv4 loopback.
216+
"0.0.0.0\n" // Any IPv4.
217+
"0.255.255.255\n" // Any IPv4.
218+
"10.10.10.100\n" // IPv4 private class A.
219+
"10.10.10.100\n" // Intentional duplicate.
220+
"10.10.10.101\n" // IPv4 private class A.
221+
"10.255.255.255\n" // IPv4 private class A.
222+
"172.16.0.0\n" // IPv4 private class B.
223+
"172.31.255.255\n" // IPv4 private class B.
224+
"172.11.5.5\n" // IP address.
225+
"172.111.5.5\n" // IP address.
226+
"192.168.0.0\n" // IPv4 private class C.
227+
"192.168.255.255\n" // IPv4 private class C.
228+
"192.169.2.120\n" // IP address.
229+
"169.254.0.1\n" // Link local.
230+
"169.200.0.1\n" // IP address.
231+
"224.0.0.24\n" // Multicast.
232+
"240.0.0.0\n" // IP address.
233+
"255.255.255.255\n" // Broadcast.
234+
"100.115.92.92\n" // ChromeOS.
235+
"100.115.91.92\n" // IP address.
236+
"1.1.1.1\n" // DNS
237+
"8.8.8.8\n" // DNS
238+
"8.8.4.4\n" // DNS
239+
"8.8.8.4\n" // IP address.
214240
"255.255.259.255\n" // Not an IP address.
215241
"255.300.255.255\n" // Not an IP address.
216242
"aaaa123.123.45.4aaa\n" // IP address.
@@ -225,9 +251,35 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
225251
"example@@1234\n"
226252
"<IPv4: 1>\n"
227253
"<IPv4: 2>\n"
254+
"<127.0.0.0/8: 3>\n"
255+
"<127.0.0.0/8: 4>\n"
256+
"<0.0.0.0/8: 5>\n"
257+
"<0.0.0.0/8: 6>\n"
258+
"<10.0.0.0/8: 7>\n"
259+
"<10.0.0.0/8: 7>\n"
260+
"<10.0.0.0/8: 8>\n"
261+
"<10.0.0.0/8: 9>\n"
262+
"<172.16.0.0/12: 10>\n"
263+
"<172.16.0.0/12: 11>\n"
264+
"<IPv4: 12>\n"
265+
"<IPv4: 13>\n"
266+
"<192.168.0.0/16: 14>\n"
267+
"<192.168.0.0/16: 15>\n"
268+
"<IPv4: 16>\n"
269+
"<169.254.0.0/16: 17>\n"
270+
"<IPv4: 18>\n"
271+
"<224.0.0.0/4: 19>\n"
272+
"<IPv4: 20>\n"
273+
"255.255.255.255\n"
274+
"100.115.92.92\n"
275+
"<IPv4: 23>\n"
276+
"1.1.1.1\n"
277+
"8.8.8.8\n"
278+
"8.8.4.4\n"
279+
"<IPv4: 27>\n"
228280
"255.255.259.255\n"
229281
"255.300.255.255\n"
230-
"aaaa<IPv4: 3>aaa\n"
282+
"aaaa<IPv4: 28>aaa\n"
231283
"11:11;<IPv6: 1>\n"
232284
"<IPv6: 1>\n"
233285
"11:11:abcdef:0:0:0:0:0\n"

0 commit comments

Comments
 (0)