expat 2022-09-21 (e0cbe708)

Code extracted from:

    https://gitlab.kitware.com/third-party/expat.git

at commit e0cbe708cc4d571e4134d52b92e43523958a3555 (for/vtk-20220921-2.4.8).
HTGUnlimitedGradien
Expat Upstream 9 months ago committed by Ben Boeckel
parent 7bc23cfb4b
commit ca4219f06b

@ -7,7 +7,7 @@
#
# Copyright (c) 2010 Patrick Spendrin <ps_ml@gmx.de>
# Copyright (c) 2012 Karl Waclawek <karl@waclawek.net>
# Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
# Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
# Copyright (c) 2016 Sergei Nikulov <sergey.nikulov@gmail.com>
# Copyright (c) 2016 Björn Lindahl <bjorn.lindahl@foi.se>
# Copyright (c) 2016 Tobias Taschner <github@tc84.de>
@ -29,6 +29,7 @@
# Copyright (c) 2020 Gulliver <gulliver@traumkristalle.net>
# Copyright (c) 2020 Thomas Beutlich <tc@tbeu.de>
# Copyright (c) 2021 Alex Richardson <Alexander.Richardson@cl.cam.ac.uk>
# Copyright (c) 2022 Johnny Jazeix <jazeix@gmail.com>
# Unlike most of Expat,
# this file is copyrighted under the BSD-license for buildsystem files of KDE.
@ -66,7 +67,7 @@ endif()
project(expat
VERSION
2.4.1
2.4.8
LANGUAGES
C
)
@ -109,6 +110,10 @@ if(DEFINED BUILD_SHARED_LIBS)
else()
set(_EXPAT_SHARED_LIBS_DEFAULT ON)
endif()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE NoConfig) # so that accessing CMAKE_*_POSTFIX will be waterproof
endif()
string(TOUPPER "${CMAKE_BUILD_TYPE}" _EXPAT_BUILD_TYPE_UPPER)
#
# Configuration
@ -150,6 +155,9 @@ mark_as_advanced(EXPAT_MIN_SIZE)
if(MSVC OR _EXPAT_HELP)
set(EXPAT_MSVC_STATIC_CRT OFF CACHE BOOL "Use /MT flag (static CRT) when compiling in MSVC")
endif()
if(NOT _EXPAT_HELP)
set(_EXPAT_M32 OFF CACHE BOOL "(Unofficial!) Produce 32bit code with -m32")
endif()
else ()
set(EXPAT_BUILD_TOOLS OFF)
set(EXPAT_BUILD_EXAMPLES OFF)
@ -179,6 +187,7 @@ set(EXPAT_MIN_SIZE OFF)
if(MSVC OR _EXPAT_HELP)
set(EXPAT_MSVC_STATIC_CRT OFF)
endif()
set(_EXPAT_M32 OFF)
endif ()
if(EXPAT_BUILD_TESTS)
@ -335,10 +344,12 @@ if (MSVC)
if (EXPAT_MSVC_STATIC_CRT)
message("-- Using static CRT ${EXPAT_MSVC_STATIC_CRT}")
foreach(flag_var
CMAKE_CXX_FLAGS_${_EXPAT_BUILD_TYPE_UPPER}
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS_${_EXPAT_BUILD_TYPE_UPPER}
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
@ -349,46 +360,68 @@ if (MSVC)
endif()
endif()
if(_EXPAT_M32 AND NOT MSVC)
foreach(flag_var
CMAKE_CXX_FLAGS_${_EXPAT_BUILD_TYPE_UPPER}
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS_${_EXPAT_BUILD_TYPE_UPPER}
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
set(${flag_var} "${${flag_var}} -m32")
endforeach()
endif()
include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/lib)
if (FALSE) # XXX(kitware): Settings handled by VTK.
if(MSVC)
add_definitions(-D_CRT_SECURE_NO_WARNINGS -wd4996)
endif()
if(WIN32)
if(_EXPAT_UNICODE_WCHAR_T)
set(_POSTFIX_WIDE "w")
endif()
if(MSVC AND NOT EXPAT_SHARED_LIBS)
if(EXPAT_MSVC_STATIC_CRT)
set(_POSTFIX_CRT "MT")
else()
set(_POSTFIX_CRT "MD")
endif()
#
# Library filename postfix
#
if(_EXPAT_UNICODE)
set(_POSTFIX_WIDE "w")
endif()
endif ()
if(MSVC AND NOT EXPAT_SHARED_LIBS)
if(EXPAT_MSVC_STATIC_CRT)
set(_POSTFIX_CRT "MT")
else()
set(_POSTFIX_CRT "MD")
endif()
endif()
foreach(postfix_var
CMAKE_DEBUG_POSTFIX
CMAKE_RELEASE_POSTFIX
CMAKE_MINSIZEREL_POSTFIX
CMAKE_RELWITHDEBINFO_POSTFIX
)
if(postfix_var STREQUAL "CMAKE_DEBUG_POSTFIX")
set(_POSTFIX_DEBUG "d")
else()
set(_POSTFIX_DEBUG "")
endif()
if (FALSE) # XXX(kitware): Settings handled by VTK.
foreach(postfix_var
CMAKE_${_EXPAT_BUILD_TYPE_UPPER}_POSTFIX
CMAKE_DEBUG_POSTFIX
CMAKE_RELEASE_POSTFIX
CMAKE_MINSIZEREL_POSTFIX
CMAKE_RELWITHDEBINFO_POSTFIX
)
if(WIN32 AND postfix_var STREQUAL "CMAKE_DEBUG_POSTFIX")
set(_POSTFIX_DEBUG "d")
else()
set(_POSTFIX_DEBUG "") # needs a reset because of being looped
endif()
set(${postfix_var} "${_POSTFIX_WIDE}${_POSTFIX_DEBUG}${_POSTFIX_CRT}" CACHE STRING "Windows binary postfix, e.g. libexpat<postfix=[w][d][MD|MT]>.lib")
endforeach()
endif()
set(${postfix_var} "${_POSTFIX_WIDE}${_POSTFIX_DEBUG}${_POSTFIX_CRT}" CACHE STRING "Library filename postfix, e.g. libexpat<postfix=[w][d][MD|MT]>.lib")
endforeach()
endif ()
#
# C library
#
set(expat_SRCS
set(_EXPAT_C_SOURCES
lib/xmlparse.c
lib/xmlrole.c
lib/xmltok.c
@ -402,25 +435,22 @@ if(EXPAT_SHARED_LIBS)
set(_SHARED SHARED)
if(MSVC)
if (FALSE) # XXX(kitware): Exporting is handled via generate_export_header
set(expat_SRCS ${expat_SRCS} lib/libexpat.def)
set(_EXPAT_EXTRA_SOURCES ${_EXPAT_EXTRA_SOURCES} lib/libexpat.def)
endif ()
endif()
if(WIN32)
# Add DLL version
string(REPLACE "." "," _EXPAT_DLL_VERSION ${PROJECT_VERSION}.0)
set(_EXPAT_EXTRA_SOURCES ${_EXPAT_EXTRA_SOURCES} win32/version.rc)
endif()
else()
set(_SHARED STATIC)
endif()
# Avoid colliding with Expat.dll of Perl's XML::Parser::Expat
if(WIN32 AND NOT MINGW)
set(_EXPAT_OUTPUT_NAME libexpat) # CMAKE_*_POSTFIX applies, see above
else()
if(_EXPAT_UNICODE)
set(_EXPAT_OUTPUT_NAME expatw)
else()
set(_EXPAT_OUTPUT_NAME expat)
endif()
add_library(expat ${_SHARED} ${_EXPAT_C_SOURCES} ${_EXPAT_EXTRA_SOURCES})
if(_EXPAT_LIBM_FOUND)
target_link_libraries(expat m)
endif()
add_library(expat ${_SHARED} ${expat_SRCS})
if(EXPAT_WITH_LIBBSD)
target_link_libraries(expat ${LIB_BSD})
endif()
@ -432,22 +462,45 @@ set(expat_headers
lib/vtk_expat_mangle.h)
vtk_module_add_module(VTK::expat
SOURCES ${expat_SRCS}
SOURCES ${_EXPAT_C_SOURCES}
HEADERS ${expat_headers}
HEADERS_SUBDIR "vtkexpat/lib")
endif ()
if (FALSE) # XXX(kitware): VTK handles installation
set(LIBCURRENT 9) # sync
set(LIBREVISION 1) # with
set(LIBREVISION 8) # with
set(LIBAGE 8) # configure.ac!
math(EXPR LIBCURRENT_MINUS_AGE "${LIBCURRENT} - ${LIBAGE}")
set_property(TARGET expat PROPERTY OUTPUT_NAME "${_EXPAT_OUTPUT_NAME}")
if(NOT WIN32)
set_property(TARGET expat PROPERTY VERSION ${LIBCURRENT_MINUS_AGE}.${LIBAGE}.${LIBREVISION})
set_property(TARGET expat PROPERTY SOVERSION ${LIBCURRENT_MINUS_AGE})
set_property(TARGET expat PROPERTY NO_SONAME ${NO_SONAME})
if(APPLE)
if(NOT CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
message(FATAL_ERROR "Expat requires CMake >=3.17 on platform \"APPLE\".")
endif()
# NOTE: This intends to talk CMake into compatiblity with GNU Libtool
math(EXPR _EXPAT_MACHO_COMPATIBILITY_VERSION "${LIBCURRENT} + 1")
set(_EXPAT_MACHO_CURRENT_VERSION "${_EXPAT_MACHO_COMPATIBILITY_VERSION}.${LIBREVISION}")
set_property(TARGET expat PROPERTY MACHO_COMPATIBILITY_VERSION ${_EXPAT_MACHO_COMPATIBILITY_VERSION})
set_property(TARGET expat PROPERTY MACHO_CURRENT_VERSION ${_EXPAT_MACHO_CURRENT_VERSION})
endif()
endif()
if(WIN32 AND NOT MINGW)
# NOTE: This avoids a name collision with Expat.dll of Perl's XML::Parser::Expat
# on Windows by resorting to filename libexpat.dll since Expat 1.95.3.
# Everything but MSVC is already adding prefix "lib", automatically.
# NOTE: "set_property(TARGET expat PROPERTY PREFIX lib)" would only affect *.dll
# files but not *.lib files, so we have to rely on property OUTPUT_NAME, instead.
# Property CMAKE_*_POSTFIX still applies.
set(_EXPAT_OUTPUT_NAME libexpat)
set_property(TARGET expat PROPERTY OUTPUT_NAME ${_EXPAT_OUTPUT_NAME})
else()
set(_EXPAT_OUTPUT_NAME expat)
endif()
target_include_directories(expat
@ -457,8 +510,12 @@ target_include_directories(expat
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
if(NOT EXPAT_SHARED_LIBS AND WIN32)
target_compile_definitions(expat PUBLIC -DXML_STATIC)
if(WIN32)
if(EXPAT_SHARED_LIBS)
target_compile_definitions(expat PRIVATE VER_FILEVERSION=${_EXPAT_DLL_VERSION})
else()
target_compile_definitions(expat PUBLIC -DXML_STATIC)
endif()
endif()
expat_install(TARGETS expat EXPORT expat
@ -474,23 +531,42 @@ endif ()
#
if (FALSE) # XXX(kitware): Do not install a .pc file.
if(EXPAT_BUILD_PKGCONFIG)
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix "\${prefix}")
if(CMAKE_INSTALL_LIBDIR MATCHES "^/")
set(libdir "${CMAKE_INSTALL_LIBDIR}")
set(_expat_pkgconfig_libdir "${CMAKE_INSTALL_LIBDIR}")
else()
set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
set(_expat_pkgconfig_libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
endif()
if(CMAKE_INSTALL_INCLUDEDIR MATCHES "^/")
set(includedir "${CMAKE_INSTALL_INCLUDEDIR}")
set(_expat_pkgconfig_includedir "${CMAKE_INSTALL_INCLUDEDIR}")
else()
set(includedir "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
set(_expat_pkgconfig_includedir "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
endif()
configure_file(expat.pc.in ${CMAKE_CURRENT_BINARY_DIR}/expat.pc @ONLY)
expat_install(FILES ${CMAKE_CURRENT_BINARY_DIR}/expat.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
set_target_properties(expat PROPERTIES
pkgconfig_prefix "${CMAKE_INSTALL_PREFIX}"
pkgconfig_exec_prefix "\${prefix}"
pkgconfig_libdir "${_expat_pkgconfig_libdir}"
pkgconfig_includedir "${_expat_pkgconfig_includedir}"
pkgconfig_version "${PACKAGE_VERSION}")
foreach(_build_type ${CMAKE_BUILD_TYPE} Debug Release RelWithDebInfo MinSizeRel)
string(TOLOWER "${_build_type}" _build_type_lower)
string(TOUPPER "${_build_type}" _build_type_upper)
set_property(TARGET expat PROPERTY "pkgconfig_${_build_type_lower}_name" "expat${CMAKE_${_build_type_upper}_POSTFIX}")
set_property(TARGET expat PROPERTY "pkgconfig_${_build_type_lower}_output_name" "${_EXPAT_OUTPUT_NAME}${CMAKE_${_build_type_upper}_POSTFIX}")
if(_EXPAT_LIBM_FOUND)
set_property(TARGET expat PROPERTY "pkgconfig_libm" "-lm")
else()
set_property(TARGET expat PROPERTY "pkgconfig_libm" "")
endif()
endforeach()
file(GENERATE
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/expat.pc
INPUT ${PROJECT_SOURCE_DIR}/expat.pc.cmake)
expat_install(FILES ${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/expat.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
endif()
endif ()
@ -508,6 +584,9 @@ if(EXPAT_BUILD_TOOLS)
add_executable(xmlwf ${xmlwf_SRCS})
set_property(TARGET xmlwf PROPERTY RUNTIME_OUTPUT_DIRECTORY xmlwf)
target_link_libraries(xmlwf expat)
if(_EXPAT_LIBM_FOUND)
target_link_libraries(xmlwf m)
endif()
expat_install(TARGETS xmlwf DESTINATION ${CMAKE_INSTALL_BINDIR})
if(MINGW AND _EXPAT_UNICODE_WCHAR_T)
@ -555,7 +634,7 @@ if(EXPAT_BUILD_TESTS)
tests/memcheck.c
tests/minicheck.c
tests/structdata.c
${expat_SRCS}
${_EXPAT_C_SOURCES}
)
if(NOT MSVC)
@ -581,6 +660,11 @@ if(EXPAT_BUILD_TESTS)
set_property(TARGET runtestspp PROPERTY RUNTIME_OUTPUT_DIRECTORY tests)
expat_add_test(runtestspp $<TARGET_FILE:runtestspp>)
if(_EXPAT_LIBM_FOUND)
target_link_libraries(runtests m)
target_link_libraries(runtestspp m)
endif()
if(EXPAT_WITH_LIBBSD)
target_link_libraries(runtests ${LIB_BSD})
target_link_libraries(runtestspp ${LIB_BSD})
@ -619,7 +703,7 @@ if(EXPAT_BUILD_FUZZERS)
set(encoding_types UTF-16 UTF-8 ISO-8859-1 US-ASCII UTF-16BE UTF-16LE)
set(fuzz_targets xml_parse_fuzzer xml_parsebuffer_fuzzer)
add_library(fuzzpat STATIC ${expat_SRCS})
add_library(fuzzpat STATIC ${_EXPAT_C_SOURCES})
if(NOT EXPAT_OSSFUZZ_BUILD)
target_compile_options(fuzzpat PRIVATE -fsanitize=fuzzer-no-link)
endif()
@ -787,20 +871,31 @@ elseif(EXPAT_CHAR_TYPE STREQUAL "wchar_t")
else()
set(_EXPAT_CHAR_TYPE_SUMMARY "ERROR")
endif()
string(TOUPPER "${CMAKE_BUILD_TYPE}" _EXPAT_BUILD_TYPE_UPPER)
# NOTE: We're not accessing global property GENERATOR_IS_MULTI_CONFIG
# because that would require CMake >=3.9
if(CMAKE_CONFIGURATION_TYPES)
set(_EXPAT_GENERATOR_IS_MULTI_CONFIG TRUE)
else()
set(_EXPAT_GENERATOR_IS_MULTI_CONFIG FALSE)
endif()
message(STATUS "===========================================================================")
message(STATUS "")
message(STATUS "Configuration")
message(STATUS " Generator .................. ${CMAKE_GENERATOR}")
if(_EXPAT_GENERATOR_IS_MULTI_CONFIG)
message(STATUS " Build types ................ ${CMAKE_CONFIGURATION_TYPES}")
else()
message(STATUS " Build type ................. ${CMAKE_BUILD_TYPE}")
endif()
message(STATUS " Prefix ..................... ${CMAKE_INSTALL_PREFIX}")
message(STATUS " Build type ................. ${CMAKE_BUILD_TYPE}")
message(STATUS " Shared libraries ........... ${EXPAT_SHARED_LIBS}")
if(MSVC)
message(STATUS " Static CRT ................. ${EXPAT_MSVC_STATIC_CRT}")
endif()
message(STATUS " Character type ............. ${_EXPAT_CHAR_TYPE_SUMMARY}")
if(WIN32)
message(STATUS " Binary postfix ............. ${CMAKE_${_EXPAT_BUILD_TYPE_UPPER}_POSTFIX}")
if(NOT _EXPAT_GENERATOR_IS_MULTI_CONFIG)
message(STATUS " Library name postfix ....... ${CMAKE_${_EXPAT_BUILD_TYPE_UPPER}_POSTFIX}")
endif()
message(STATUS "")
message(STATUS " Build documentation ........ ${EXPAT_BUILD_DOCS}")

@ -2,6 +2,7 @@ include(CheckCCompilerFlag)
include(CheckCSourceCompiles)
include(CheckIncludeFile)
include(CheckIncludeFiles)
include(CheckLibraryExists)
include(CheckSymbolExists)
include(TestBigEndian)
@ -64,3 +65,5 @@ check_c_source_compiles("
check_c_compiler_flag("-fno-strict-aliasing" FLAG_NO_STRICT_ALIASING)
check_c_compiler_flag("-fvisibility=hidden" FLAG_VISIBILITY)
check_library_exists(m cos "" _EXPAT_LIBM_FOUND)

@ -5,7 +5,7 @@
[![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases)
# Expat, Release 2.4.1
# Expat, Release 2.4.8
This is Expat, a C library for parsing XML, started by
[James Clark](https://en.wikipedia.org/wiki/James_Clark_%28programmer%29) in 1997.

@ -11,10 +11,11 @@
Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -176,8 +177,10 @@ struct XML_cp {
};
/* This is called for an element declaration. See above for
description of the model argument. It's the caller's responsibility
to free model when finished with it.
description of the model argument. It's the user code's responsibility
to free model when finished with it. See XML_FreeContentModel.
There is no need to free the model from the handler, it can be kept
around and freed at a later stage.
*/
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
@ -239,6 +242,17 @@ XML_ParserCreate(const XML_Char *encoding);
and the local part will be concatenated without any separator.
It is a programming error to use the separator '\0' with namespace
triplets (see XML_SetReturnNSTriplet).
If a namespace separator is chosen that can be part of a URI or
part of an XML name, splitting an expanded name back into its
1, 2 or 3 original parts on application level in the element handler
may end up vulnerable, so these are advised against; sane choices for
a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
Note that Expat does not validate namespace URIs (beyond encoding)
against RFC 3986 today (and is not required to do so with regard to
the XML 1.0 namespaces specification) but it may start doing that
in future releases. Before that, an application using Expat must
be ready to receive namespace URIs containing non-URI characters.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
@ -319,7 +333,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *pubid,
int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the
/* This is called for the end of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
@ -1043,7 +1057,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 4
#define XML_MICRO_VERSION 1
#define XML_MICRO_VERSION 8
#ifdef __cplusplus
}

@ -1,4 +1,4 @@
/* 8539b9040d9d901366a62560a064af7cb99811335784b363abc039c5b0ebc416 (2.4.1+)
/* 2722de33b8d95adcfb16db05afdec6ed1d40d51565cda2176c61806b5350eafe (2.4.8+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@ -11,9 +11,9 @@
Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
@ -32,6 +32,9 @@
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -54,6 +57,10 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define XML_BUILDING_EXPAT 1
#include <expat_config.h>
#if ! defined(_GNU_SOURCE)
# define _GNU_SOURCE 1 /* syscall prototype */
#endif
@ -84,14 +91,10 @@
# include <errno.h>
#endif
#define XML_BUILDING_EXPAT 1
#ifdef _WIN32
# include "winconfig.h"
#endif
#include <expat_config.h>
#include "ascii.h"
#include "expat.h"
#include "siphash.h"
@ -131,7 +134,7 @@
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
* Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
* Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
@ -716,11 +719,11 @@ XML_ParserCreate(const XML_Char *encodingName) {
XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
XML_Char tmp[2];
*tmp = nsSep;
XML_Char tmp[2] = {nsSep, 0};
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}
// "xml=http://www.w3.org/XML/1998/namespace"
static const XML_Char implicitContext[]
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
@ -973,7 +976,7 @@ parserCreate(const XML_Char *encodingName,
if (memsuite) {
XML_Memory_Handling_Suite *mtemp;
parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
if (parser != NULL) {
mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
mtemp->malloc_fcn = memsuite->malloc_fcn;
@ -1342,8 +1345,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
would be otherwise.
*/
if (parser->m_ns) {
XML_Char tmp[2];
*tmp = parser->m_namespaceSeparator;
XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
} else {
parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
@ -2066,6 +2068,11 @@ XML_GetBuffer(XML_Parser parser, int len) {
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
/* Detect and prevent integer overflow */
if (keep > INT_MAX - neededSize) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
neededSize += keep;
#endif /* defined XML_CONTEXT_BYTES */
if (neededSize
@ -2556,6 +2563,7 @@ storeRawNames(XML_Parser parser) {
while (tag) {
int bufSize;
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
size_t rawNameLen;
char *rawNameBuf = tag->buf + nameLen;
/* Stop if already stored. Since m_tagStack is a stack, we can stop
at the first entry that has already been copied; everything
@ -2567,7 +2575,11 @@ storeRawNames(XML_Parser parser) {
/* For re-use purposes we need to ensure that the
size of tag->buf is a multiple of sizeof(XML_Char).
*/
bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
/* Detect and prevent integer overflow. */
if (rawNameLen > (size_t)INT_MAX - nameLen)
return XML_FALSE;
bufSize = nameLen + (int)rawNameLen;
if (bufSize > tag->bufEnd - tag->buf) {
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
if (temp == NULL)
@ -3260,13 +3272,38 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
/* get the attributes from the tokenizer */
n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
/* Detect and prevent integer overflow */
if (n > INT_MAX - nDefaultAtts) {
return XML_ERROR_NO_MEMORY;
}
if (n + nDefaultAtts > parser->m_attsSize) {
int oldAttsSize = parser->m_attsSize;
ATTRIBUTE *temp;
#ifdef XML_ATTR_INFO
XML_AttrInfo *temp2;
#endif
/* Detect and prevent integer overflow */
if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
|| (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
return XML_ERROR_NO_MEMORY;
}
parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
parser->m_attsSize = oldAttsSize;
return XML_ERROR_NO_MEMORY;
}
#endif
temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
parser->m_attsSize * sizeof(ATTRIBUTE));
if (temp == NULL) {
@ -3275,6 +3312,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
}
parser->m_atts = temp;
#ifdef XML_ATTR_INFO
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
# if UINT_MAX >= SIZE_MAX
if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
parser->m_attsSize = oldAttsSize;
return XML_ERROR_NO_MEMORY;
}
# endif
temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
parser->m_attsSize * sizeof(XML_AttrInfo));
if (temp2 == NULL) {
@ -3413,7 +3461,13 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
if (nPrefixes) {
int j; /* hash table index */
unsigned long version = parser->m_nsAttsVersion;
int nsAttsSize = (int)1 << parser->m_nsAttsPower;
/* Detect and prevent invalid shift */
if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
return XML_ERROR_NO_MEMORY;
}
unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
unsigned char oldNsAttsPower = parser->m_nsAttsPower;
/* size of hash table must be at least 2 * (# of prefixed attributes) */
if ((nPrefixes << 1)
@ -3424,7 +3478,28 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
;
if (parser->m_nsAttsPower < 3)
parser->m_nsAttsPower = 3;
nsAttsSize = (int)1 << parser->m_nsAttsPower;
/* Detect and prevent invalid shift */
if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
/* Restore actual size of memory in m_nsAtts */
parser->m_nsAttsPower = oldNsAttsPower;
return XML_ERROR_NO_MEMORY;
}
nsAttsSize = 1u << parser->m_nsAttsPower;
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
/* Restore actual size of memory in m_nsAtts */
parser->m_nsAttsPower = oldNsAttsPower;
return XML_ERROR_NO_MEMORY;
}
#endif
temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
nsAttsSize * sizeof(NS_ATT));
if (! temp) {
@ -3582,9 +3657,31 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
tagNamePtr->prefixLen = prefixLen;
for (i = 0; localPart[i++];)
; /* i includes null terminator */
/* Detect and prevent integer overflow */
if (binding->uriLen > INT_MAX - prefixLen
|| i > INT_MAX - (binding->uriLen + prefixLen)) {
return XML_ERROR_NO_MEMORY;
}
n = i + binding->uriLen + prefixLen;
if (n > binding->uriAlloc) {
TAG *p;
/* Detect and prevent integer overflow */
if (n > INT_MAX - EXPAND_SPARE) {
return XML_ERROR_NO_MEMORY;
}
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
return XML_ERROR_NO_MEMORY;
}
#endif
uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
if (! uri)
return XML_ERROR_NO_MEMORY;
@ -3609,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}
static XML_Bool
is_rfc3986_uri_char(XML_Char candidate) {
// For the RFC 3986 ANBF grammar see
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
switch (candidate) {
// From rule "ALPHA" (uppercase half)
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
// From rule "ALPHA" (lowercase half)
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
// From rule "DIGIT"
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// From rule "pct-encoded"
case '%':
// From rule "unreserved"
case '-':
case '.':
case '_':
case '~':
// From rule "gen-delims"
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':
// From rule "sub-delims"
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return XML_TRUE;
default:
return XML_FALSE;
}
}
/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
static enum XML_Error
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
const XML_Char *uri, BINDING **bindingsPtr) {
// "http://www.w3.org/XML/1998/namespace"
static const XML_Char xmlNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
@ -3625,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
ASCII_e, '\0'};
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
// "http://www.w3.org/2000/xmlns/"
static const XML_Char xmlnsNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
@ -3664,6 +3874,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
if (! mustBeXML && isXMLNS
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
// NOTE: While Expat does not validate namespace URIs against RFC 3986
// today (and is not REQUIRED to do so with regard to the XML 1.0
// namespaces specification) we have to at least make sure, that
// the application on top of Expat (that is likely splitting expanded
// element names ("qualified names") of form
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
// in its element handler code) cannot be confused by an attacker
// putting additional namespace separator characters into namespace
// declarations. That would be ambiguous and not to be expected.
//
// While the HTML API docs of function XML_ParserCreateNS have been
// advising against use of a namespace separator character that can
// appear in a URI for >20 years now, some widespread applications
// are using URI characters (':' (colon) in particular) for a
// namespace separator, in practice. To keep these applications
// functional, we only reject namespaces URIs containing the
// application-chosen namespace separator if the chosen separator
// is a non-URI character with regard to RFC 3986.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
&& ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX;
}
}
isXML = isXML && len == xmlLen;
isXMLNS = isXMLNS && len == xmlnsLen;
@ -3680,6 +3913,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
if (parser->m_freeBindingList) {
b = parser->m_freeBindingList;
if (len > b->uriAlloc) {
/* Detect and prevent integer overflow */
if (len > INT_MAX - EXPAND_SPARE) {
return XML_ERROR_NO_MEMORY;
}
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
return XML_ERROR_NO_MEMORY;
}
#endif
XML_Char *temp = (XML_Char *)REALLOC(
parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
if (temp == NULL)
@ -3692,6 +3940,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
b = (BINDING *)MALLOC(parser, sizeof(BINDING));
if (! b)
return XML_ERROR_NO_MEMORY;
/* Detect and prevent integer overflow */
if (len > INT_MAX - EXPAND_SPARE) {
return XML_ERROR_NO_MEMORY;
}
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
return XML_ERROR_NO_MEMORY;
}
#endif
b->uri
= (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
if (! b->uri) {
@ -3976,7 +4239,7 @@ initializeEncoding(XML_Parser parser) {
const char *s;
#ifdef XML_UNICODE
char encodingBuf[128];
/* See comments abount `protoclEncodingName` in parserInit() */
/* See comments about `protocolEncodingName` in parserInit() */
if (! parser->m_protocolEncodingName)
s = NULL;
else {
@ -5018,6 +5281,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (parser->m_prologState.level >= parser->m_groupSize) {
if (parser->m_groupSize) {
{
/* Detect and prevent integer overflow */
if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
return XML_ERROR_NO_MEMORY;
}
char *const new_connector = (char *)REALLOC(
parser, parser->m_groupConnector, parser->m_groupSize *= 2);
if (new_connector == NULL) {
@ -5028,6 +5296,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
if (dtd->scaffIndex) {
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
return XML_ERROR_NO_MEMORY;
}
#endif
int *const new_scaff_index = (int *)REALLOC(
parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
if (new_scaff_index == NULL)
@ -5236,7 +5514,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (dtd->in_eldecl) {
ELEMENT_TYPE *el;
const XML_Char *name;
int nameLen;
size_t nameLen;
const char *nxt
= (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
int myindex = nextScaffoldPart(parser);
@ -5252,7 +5530,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
nameLen = 0;
for (; name[nameLen++];)
;
dtd->contentStringLen += nameLen;
/* Detect and prevent integer overflow */
if (nameLen > UINT_MAX - dtd->contentStringLen) {
return XML_ERROR_NO_MEMORY;
}
dtd->contentStringLen += (unsigned)nameLen;
if (parser->m_elementDeclHandler)
handleDefault = XML_FALSE;
}
@ -6098,7 +6382,24 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
}
} else {
DEFAULT_ATTRIBUTE *temp;
/* Detect and prevent integer overflow */
if (type->allocDefaultAtts > INT_MAX / 2) {
return 0;
}
int count = type->allocDefaultAtts * 2;
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
return 0;
}
#endif
temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
(count * sizeof(DEFAULT_ATTRIBUTE)));
if (temp == NULL)
@ -6388,7 +6689,7 @@ normalizePublicId(XML_Char *publicId) {
static DTD *
dtdCreate(const XML_Memory_Handling_Suite *ms) {
DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
DTD *p = ms->malloc_fcn(sizeof(DTD));
if (p == NULL)
return p;
poolInit(&(p->pool), ms);
@ -6561,8 +6862,8 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
if (! newE)
return 0;
if (oldE->nDefaultAtts) {
newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
newE->defaultAtts
= ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
if (! newE->defaultAtts) {
return 0;
}
@ -6724,7 +7025,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
/* table->size is a power of 2 */
table->size = (size_t)1 << INIT_POWER;
tsize = table->size * sizeof(NAMED *);
table->v = (NAMED **)table->mem->malloc_fcn(tsize);
table->v = table->mem->malloc_fcn(tsize);
if (! table->v) {
table->size = 0;
return NULL;
@ -6749,10 +7050,22 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
/* check for overflow (table is half full) */
if (table->used >> (table->power - 1)) {
unsigned char newPower = table->power + 1;
/* Detect and prevent invalid shift */
if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
return NULL;
}
size_t newSize = (size_t)1 << newPower;
unsigned long newMask = (unsigned long)newSize - 1;
/* Detect and prevent integer overflow */
if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
return NULL;
}
size_t tsize = newSize * sizeof(NAMED *);
NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
NAMED **newV = table->mem->malloc_fcn(tsize);
if (! newV)
return NULL;
memset(newV, 0, tsize);
@ -6781,7 +7094,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
}
}
}
table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
table->v[i] = table->mem->malloc_fcn(createSize);
if (! table->v[i])
return NULL;
memset(table->v[i], 0, createSize);
@ -7069,7 +7382,7 @@ poolGrow(STRING_POOL *pool) {
if (bytesToAllocate == 0)
return XML_FALSE;
tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
tem = pool->mem->malloc_fcn(bytesToAllocate);
if (! tem)
return XML_FALSE;
tem->size = blockSize;
@ -7100,6 +7413,20 @@ nextScaffoldPart(XML_Parser parser) {
if (dtd->scaffCount >= dtd->scaffSize) {
CONTENT_SCAFFOLD *temp;
if (dtd->scaffold) {
/* Detect and prevent integer overflow */
if (dtd->scaffSize > UINT_MAX / 2u) {
return -1;
}
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
return -1;
}
#endif
temp = (CONTENT_SCAFFOLD *)REALLOC(
parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
if (temp == NULL)
@ -7131,55 +7458,130 @@ nextScaffoldPart(XML_Parser parser) {
return next;
}
static void
build_node(XML_Parser parser, int src_node, XML_Content *dest,
XML_Content **contpos, XML_Char **strpos) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
dest->type = dtd->scaffold[src_node].type;
dest->quant = dtd->scaffold[src_node].quant;
if (dest->type == XML_CTYPE_NAME) {
const XML_Char *src;
dest->name = *strpos;
src = dtd->scaffold[src_node].name;
for (;;) {
*(*strpos)++ = *src;
if (! *src)
break;
src++;
}
dest->numchildren = 0;
dest->children = NULL;
} else {
unsigned int i;
int cn;
dest->numchildren = dtd->scaffold[src_node].childcnt;
dest->children = *contpos;
*contpos += dest->numchildren;
for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
i++, cn = dtd->scaffold[cn].nextsib) {
build_node(parser, cn, &(dest->children[i]), contpos, strpos);
}
dest->name = NULL;
}
}
static XML_Content *
build_model(XML_Parser parser) {
/* Function build_model transforms the existing parser->m_dtd->scaffold
* array of CONTENT_SCAFFOLD tree nodes into a new array of
* XML_Content tree nodes followed by a gapless list of zero-terminated
* strings. */
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
XML_Content *ret;
XML_Content *cpos;
XML_Char *str;
int allocsize = (dtd->scaffCount * sizeof(XML_Content)
+ (dtd->contentStringLen * sizeof(XML_Char)));
XML_Char *str; /* the current string writing location */
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
* from -Wtype-limits on platforms where
* sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
return NULL;
}
if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
return NULL;
}
#endif
if (dtd->scaffCount * sizeof(XML_Content)
> (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
return NULL;
}
const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
+ (dtd->contentStringLen * sizeof(XML_Char)));
ret = (XML_Content *)MALLOC(parser, allocsize);
if (! ret)
return NULL;
str = (XML_Char *)(&ret[dtd->scaffCount]);
cpos = &ret[1];
/* What follows is an iterative implementation (of what was previously done
* recursively in a dedicated function called "build_node". The old recursive
* build_node could be forced into stack exhaustion from input as small as a
* few megabyte, and so that was a security issue. Hence, a function call
* stack is avoided now by resolving recursion.)
*
* The iterative approach works as follows:
*
* - We have two writing pointers, both walking up the result array; one does
* the work, the other creates "jobs" for its colleague to do, and leads
* the way:
*
* - The faster one, pointer jobDest, always leads and writes "what job
* to do" by the other, once they reach that place in the
* array: leader "jobDest" stores the source node array index (relative
* to array dtd->scaffold) in field "numchildren".
*
<