Skip to content

Commit c283c3a

Browse files
nielsdosderickr
authored andcommitted
Sanitize libxml2 globals before parsing
Fixes GHSA-3qrf-m4j2-pcrr. To parse a document with libxml2, you first need to create a parsing context. The parsing context contains parsing options (e.g. XML_NOENT to substitute entities) that the application (in this case PHP) can set. Unfortunately, libxml2 also supports providing default set options. For example, if you call xmlSubstituteEntitiesDefault(1) then the XML_NOENT option will be added to the parsing options every time you create a parsing context **even if the application never requested XML_NOENT**. Third party extensions can override these globals, in particular the substitute entity global. This causes entity substitution to be unexpectedly active. Fix it by setting the parsing options to a sane known value. For API calls that depend on global state we introduce PHP_LIBXML_SANITIZE_GLOBALS() and PHP_LIBXML_RESTORE_GLOBALS(). For other APIs that work directly with a context we introduce php_libxml_sanitize_parse_ctxt_options().
1 parent 8031612 commit c283c3a

14 files changed

+216
-6
lines changed

ext/dom/document.c

+15
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,7 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
12541254
options |= XML_PARSE_NOBLANKS;
12551255
}
12561256

1257+
php_libxml_sanitize_parse_ctxt_options(ctxt);
12571258
xmlCtxtUseOptions(ctxt, options);
12581259

12591260
ctxt->recovery = recover;
@@ -1548,7 +1549,9 @@ PHP_METHOD(DOMDocument, xinclude)
15481549

15491550
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
15501551

1552+
PHP_LIBXML_SANITIZE_GLOBALS(xinclude);
15511553
err = xmlXIncludeProcessFlags(docp, (int)flags);
1554+
PHP_LIBXML_RESTORE_GLOBALS(xinclude);
15521555

15531556
/* XML_XINCLUDE_START and XML_XINCLUDE_END nodes need to be removed as these
15541557
are added via xmlXIncludeProcess to mark beginning and ending of xincluded document
@@ -1586,6 +1589,7 @@ PHP_METHOD(DOMDocument, validate)
15861589

15871590
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
15881591

1592+
PHP_LIBXML_SANITIZE_GLOBALS(validate);
15891593
cvp = xmlNewValidCtxt();
15901594

15911595
cvp->userData = NULL;
@@ -1597,6 +1601,7 @@ PHP_METHOD(DOMDocument, validate)
15971601
} else {
15981602
RETVAL_FALSE;
15991603
}
1604+
PHP_LIBXML_RESTORE_GLOBALS(validate);
16001605

16011606
xmlFreeValidCtxt(cvp);
16021607

@@ -1631,14 +1636,18 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
16311636

16321637
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
16331638

1639+
PHP_LIBXML_SANITIZE_GLOBALS(new_parser_ctxt);
1640+
16341641
switch (type) {
16351642
case DOM_LOAD_FILE:
16361643
if (CHECK_NULL_PATH(source, source_len)) {
1644+
PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt);
16371645
zend_argument_value_error(1, "must not contain any null bytes");
16381646
RETURN_THROWS();
16391647
}
16401648
valid_file = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
16411649
if (!valid_file) {
1650+
PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt);
16421651
php_error_docref(NULL, E_WARNING, "Invalid Schema file source");
16431652
RETURN_FALSE;
16441653
}
@@ -1659,6 +1668,7 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
16591668
parser);
16601669
sptr = xmlSchemaParse(parser);
16611670
xmlSchemaFreeParserCtxt(parser);
1671+
PHP_LIBXML_RESTORE_GLOBALS(new_parser_ctxt);
16621672
if (!sptr) {
16631673
if (!EG(exception)) {
16641674
php_error_docref(NULL, E_WARNING, "Invalid Schema");
@@ -1679,11 +1689,13 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
16791689
valid_opts |= XML_SCHEMA_VAL_VC_I_CREATE;
16801690
}
16811691

1692+
PHP_LIBXML_SANITIZE_GLOBALS(validate);
16821693
xmlSchemaSetValidOptions(vptr, valid_opts);
16831694
xmlSchemaSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr);
16841695
is_valid = xmlSchemaValidateDoc(vptr, docp);
16851696
xmlSchemaFree(sptr);
16861697
xmlSchemaFreeValidCtxt(vptr);
1698+
PHP_LIBXML_RESTORE_GLOBALS(validate);
16871699

16881700
if (is_valid == 0) {
16891701
RETURN_TRUE;
@@ -1754,12 +1766,14 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ
17541766
return;
17551767
}
17561768

1769+
PHP_LIBXML_SANITIZE_GLOBALS(parse);
17571770
xmlRelaxNGSetParserErrors(parser,
17581771
(xmlRelaxNGValidityErrorFunc) php_libxml_error_handler,
17591772
(xmlRelaxNGValidityWarningFunc) php_libxml_error_handler,
17601773
parser);
17611774
sptr = xmlRelaxNGParse(parser);
17621775
xmlRelaxNGFreeParserCtxt(parser);
1776+
PHP_LIBXML_RESTORE_GLOBALS(parse);
17631777
if (!sptr) {
17641778
php_error_docref(NULL, E_WARNING, "Invalid RelaxNG");
17651779
RETURN_FALSE;
@@ -1858,6 +1872,7 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
18581872
ctxt->sax->error = php_libxml_ctx_error;
18591873
ctxt->sax->warning = php_libxml_ctx_warning;
18601874
}
1875+
php_libxml_sanitize_parse_ctxt_options(ctxt);
18611876
if (options) {
18621877
htmlCtxtUseOptions(ctxt, (int)options);
18631878
}

ext/dom/documentfragment.c

+2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ PHP_METHOD(DOMDocumentFragment, appendXML) {
114114
}
115115

116116
if (data) {
117+
PHP_LIBXML_SANITIZE_GLOBALS(parse);
117118
err = xmlParseBalancedChunkMemory(nodep->doc, NULL, NULL, 0, (xmlChar *) data, &lst);
119+
PHP_LIBXML_RESTORE_GLOBALS(parse);
118120
if (err != 0) {
119121
RETURN_FALSE;
120122
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
--TEST--
2+
GHSA-3qrf-m4j2-pcrr (libxml global state entity loader bypass)
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('libxml')) die('skip libxml extension not available');
6+
if (!extension_loaded('dom')) die('skip dom extension not available');
7+
if (!extension_loaded('zend-test')) die('skip zend-test extension not available');
8+
?>
9+
--FILE--
10+
<?php
11+
12+
$xml = "<?xml version='1.0'?><!DOCTYPE root [<!ENTITY % bork SYSTEM \"php://nope\"> %bork;]><nothing/>";
13+
14+
libxml_use_internal_errors(true);
15+
16+
function parseXML($xml) {
17+
$doc = new DOMDocument();
18+
@$doc->loadXML($xml);
19+
$doc->createDocumentFragment()->appendXML("&bork;");
20+
foreach (libxml_get_errors() as $error) {
21+
var_dump(trim($error->message));
22+
}
23+
}
24+
25+
parseXML($xml);
26+
zend_test_override_libxml_global_state();
27+
parseXML($xml);
28+
29+
echo "Done\n";
30+
31+
?>
32+
--EXPECT--
33+
string(25) "Entity 'bork' not defined"
34+
string(25) "Entity 'bork' not defined"
35+
string(25) "Entity 'bork' not defined"
36+
Done

ext/libxml/php_libxml.h

+36
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,42 @@ PHP_LIBXML_API void php_libxml_shutdown(void);
118118
ZEND_TSRMLS_CACHE_EXTERN()
119119
#endif
120120

121+
/* Other extension may override the global state options, these global options
122+
* are copied initially to ctxt->options. Set the options to a known good value.
123+
* See libxml2 globals.c and parserInternals.c.
124+
* The unique_name argument allows multiple sanitizes and restores within the
125+
* same function, even nested is necessary. */
126+
#define PHP_LIBXML_SANITIZE_GLOBALS(unique_name) \
127+
int xml_old_loadsubset_##unique_name = xmlLoadExtDtdDefaultValue; \
128+
xmlLoadExtDtdDefaultValue = 0; \
129+
int xml_old_validate_##unique_name = xmlDoValidityCheckingDefaultValue; \
130+
xmlDoValidityCheckingDefaultValue = 0; \
131+
int xml_old_pedantic_##unique_name = xmlPedanticParserDefault(0); \
132+
int xml_old_substitute_##unique_name = xmlSubstituteEntitiesDefault(0); \
133+
int xml_old_linenrs_##unique_name = xmlLineNumbersDefault(0); \
134+
int xml_old_blanks_##unique_name = xmlKeepBlanksDefault(1);
135+
136+
#define PHP_LIBXML_RESTORE_GLOBALS(unique_name) \
137+
xmlLoadExtDtdDefaultValue = xml_old_loadsubset_##unique_name; \
138+
xmlDoValidityCheckingDefaultValue = xml_old_validate_##unique_name; \
139+
(void) xmlPedanticParserDefault(xml_old_pedantic_##unique_name); \
140+
(void) xmlSubstituteEntitiesDefault(xml_old_substitute_##unique_name); \
141+
(void) xmlLineNumbersDefault(xml_old_linenrs_##unique_name); \
142+
(void) xmlKeepBlanksDefault(xml_old_blanks_##unique_name);
143+
144+
/* Alternative for above, working directly on the context and not setting globals.
145+
* Generally faster because no locking is involved, and this has the advantage that it sets the options to a known good value. */
146+
static zend_always_inline void php_libxml_sanitize_parse_ctxt_options(xmlParserCtxtPtr ctxt)
147+
{
148+
ctxt->loadsubset = 0;
149+
ctxt->validate = 0;
150+
ctxt->pedantic = 0;
151+
ctxt->replaceEntities = 0;
152+
ctxt->linenumbers = 0;
153+
ctxt->keepBlanks = 1;
154+
ctxt->options = 0;
155+
}
156+
121157
#else /* HAVE_LIBXML */
122158
#define libxml_module_ptr NULL
123159
#endif

ext/simplexml/simplexml.c

+6
Original file line numberDiff line numberDiff line change
@@ -2278,7 +2278,9 @@ PHP_FUNCTION(simplexml_load_file)
22782278
RETURN_THROWS();
22792279
}
22802280

2281+
PHP_LIBXML_SANITIZE_GLOBALS(read_file);
22812282
docp = xmlReadFile(filename, NULL, (int)options);
2283+
PHP_LIBXML_RESTORE_GLOBALS(read_file);
22822284

22832285
if (!docp) {
22842286
RETURN_FALSE;
@@ -2331,7 +2333,9 @@ PHP_FUNCTION(simplexml_load_string)
23312333
RETURN_THROWS();
23322334
}
23332335

2336+
PHP_LIBXML_SANITIZE_GLOBALS(read_memory);
23342337
docp = xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options);
2338+
PHP_LIBXML_RESTORE_GLOBALS(read_memory);
23352339

23362340
if (!docp) {
23372341
RETURN_FALSE;
@@ -2380,7 +2384,9 @@ SXE_METHOD(__construct)
23802384
RETURN_THROWS();
23812385
}
23822386

2387+
PHP_LIBXML_SANITIZE_GLOBALS(read_file_or_memory);
23832388
docp = is_url ? xmlReadFile(data, NULL, (int)options) : xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options);
2389+
PHP_LIBXML_RESTORE_GLOBALS(read_file_or_memory);
23842390

23852391
if (!docp) {
23862392
((php_libxml_node_object *)sxe)->document = NULL;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
--TEST--
2+
GHSA-3qrf-m4j2-pcrr (libxml global state entity loader bypass)
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('libxml')) die('skip libxml extension not available');
6+
if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
7+
if (!extension_loaded('zend-test')) die('skip zend-test extension not available');
8+
?>
9+
--FILE--
10+
<?php
11+
12+
$xml = "<?xml version='1.0'?><!DOCTYPE root [<!ENTITY % bork SYSTEM \"php://nope\"> %bork;]><nothing/>";
13+
14+
libxml_use_internal_errors(true);
15+
zend_test_override_libxml_global_state();
16+
17+
echo "--- String test ---\n";
18+
simplexml_load_string($xml);
19+
echo "--- Constructor test ---\n";
20+
new SimpleXMLElement($xml);
21+
echo "--- File test ---\n";
22+
file_put_contents("libxml_global_state_entity_loader_bypass.tmp", $xml);
23+
simplexml_load_file("libxml_global_state_entity_loader_bypass.tmp");
24+
25+
echo "Done\n";
26+
27+
?>
28+
--CLEAN--
29+
<?php
30+
@unlink("libxml_global_state_entity_loader_bypass.tmp");
31+
?>
32+
--EXPECT--
33+
--- String test ---
34+
--- Constructor test ---
35+
--- File test ---
36+
Done

ext/soap/php_xml.c

+2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ xmlDocPtr soap_xmlParseFile(const char *filename)
9191
if (ctxt) {
9292
zend_bool old;
9393

94+
php_libxml_sanitize_parse_ctxt_options(ctxt);
9495
ctxt->keepBlanks = 0;
9596
ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace;
9697
ctxt->sax->comment = soap_Comment;
@@ -139,6 +140,7 @@ xmlDocPtr soap_xmlParseMemory(const void *buf, size_t buf_size)
139140
if (ctxt) {
140141
zend_bool old;
141142

143+
php_libxml_sanitize_parse_ctxt_options(ctxt);
142144
ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace;
143145
ctxt->sax->comment = soap_Comment;
144146
ctxt->sax->warning = NULL;

ext/xml/compat.c

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "php.h"
1818
#if defined(HAVE_LIBXML) && (defined(HAVE_XML) || defined(HAVE_XMLRPC)) && !defined(HAVE_LIBEXPAT)
1919
#include "expat_compat.h"
20+
#include "ext/libxml/php_libxml.h"
2021

2122
typedef struct _php_xml_ns {
2223
xmlNsPtr nsptr;
@@ -469,6 +470,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *m
469470
return NULL;
470471
}
471472

473+
php_libxml_sanitize_parse_ctxt_options(parser->parser);
472474
xmlCtxtUseOptions(parser->parser, XML_PARSE_OLDSAX);
473475

474476
parser->parser->replaceEntities = 1;

ext/xmlreader/php_xmlreader.c

+9
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ static xmlRelaxNGPtr _xmlreader_get_relaxNG(char *source, size_t source_len, siz
284284
return NULL;
285285
}
286286

287+
PHP_LIBXML_SANITIZE_GLOBALS(parse);
287288
if (error_func || warn_func) {
288289
xmlRelaxNGSetParserErrors(parser,
289290
(xmlRelaxNGValidityErrorFunc) error_func,
@@ -292,6 +293,7 @@ static xmlRelaxNGPtr _xmlreader_get_relaxNG(char *source, size_t source_len, siz
292293
}
293294
sptr = xmlRelaxNGParse(parser);
294295
xmlRelaxNGFreeParserCtxt(parser);
296+
PHP_LIBXML_RESTORE_GLOBALS(parse);
295297

296298
return sptr;
297299
}
@@ -872,7 +874,9 @@ PHP_METHOD(XMLReader, open)
872874
valid_file = _xmlreader_get_valid_file_path(source, resolved_path, MAXPATHLEN );
873875

874876
if (valid_file) {
877+
PHP_LIBXML_SANITIZE_GLOBALS(reader_for_file);
875878
reader = xmlReaderForFile(valid_file, encoding, options);
879+
PHP_LIBXML_RESTORE_GLOBALS(reader_for_file);
876880
}
877881

878882
if (reader == NULL) {
@@ -945,7 +949,9 @@ PHP_METHOD(XMLReader, setSchema)
945949

946950
intern = Z_XMLREADER_P(id);
947951
if (intern && intern->ptr) {
952+
PHP_LIBXML_SANITIZE_GLOBALS(schema);
948953
retval = xmlTextReaderSchemaValidate(intern->ptr, source);
954+
PHP_LIBXML_RESTORE_GLOBALS(schema);
949955

950956
if (retval == 0) {
951957
RETURN_TRUE;
@@ -1068,6 +1074,7 @@ PHP_METHOD(XMLReader, XML)
10681074
}
10691075
uri = (char *) xmlCanonicPath((const xmlChar *) resolved_path);
10701076
}
1077+
PHP_LIBXML_SANITIZE_GLOBALS(text_reader);
10711078
reader = xmlNewTextReader(inputbfr, uri);
10721079

10731080
if (reader != NULL) {
@@ -1086,9 +1093,11 @@ PHP_METHOD(XMLReader, XML)
10861093
xmlFree(uri);
10871094
}
10881095

1096+
PHP_LIBXML_RESTORE_GLOBALS(text_reader);
10891097
return;
10901098
}
10911099
}
1100+
PHP_LIBXML_RESTORE_GLOBALS(text_reader);
10921101
}
10931102

10941103
if (uri) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
--TEST--
2+
GHSA-3qrf-m4j2-pcrr (libxml global state entity loader bypass)
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('libxml')) die('skip libxml extension not available');
6+
if (!extension_loaded('xmlreader')) die('skip xmlreader extension not available');
7+
if (!extension_loaded('zend-test')) die('skip zend-test extension not available');
8+
?>
9+
--FILE--
10+
<?php
11+
12+
$xml = "<?xml version='1.0'?><!DOCTYPE root [<!ENTITY % bork SYSTEM \"php://nope\"> %bork;]><nothing/>";
13+
14+
libxml_use_internal_errors(true);
15+
zend_test_override_libxml_global_state();
16+
17+
echo "--- String test ---\n";
18+
$reader = XMLReader::xml($xml);
19+
$reader->read();
20+
echo "--- File test ---\n";
21+
file_put_contents("libxml_global_state_entity_loader_bypass.tmp", $xml);
22+
$reader = XMLReader::open("libxml_global_state_entity_loader_bypass.tmp");
23+
$reader->read();
24+
25+
echo "Done\n";
26+
27+
?>
28+
--CLEAN--
29+
<?php
30+
@unlink("libxml_global_state_entity_loader_bypass.tmp");
31+
?>
32+
--EXPECT--
33+
--- String test ---
34+
--- File test ---
35+
Done

0 commit comments

Comments
 (0)