From 8e1eae2319cd3a651941c88b46d95e8ee8507c6c Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Fri, 27 Sep 2024 20:21:31 -0700 Subject: [PATCH] vendor/expat: import 2.6.3 The 2.6.1 - 2.6.3 releases address security issues. The release notes for the 2.6.3 release can be found at https://github.com/libexpat/libexpat/blob/R_2_6_3/expat/Changes . --- Changes | 136 ++++++++++++++++++++++- Makefile.am | 6 +- Makefile.in | 8 +- README.md | 14 ++- buildconf.sh | 24 +---- configure.ac | 39 +++++-- doc/Makefile.am | 21 ++-- doc/Makefile.in | 56 +++++----- doc/reference.html | 19 ++-- doc/xmlwf.1 | 2 +- doc/xmlwf.xml | 2 +- examples/Makefile.in | 2 +- expat_config.h.in | 3 - fix-xmltest-log.sh | 12 +-- lib/Makefile.am | 19 ++-- lib/Makefile.in | 79 ++++++++++---- lib/expat.h | 5 +- lib/internal.h | 17 ++- lib/siphash.h | 3 +- lib/xmlparse.c | 73 +++++++++---- tests/Makefile.am | 11 +- tests/Makefile.in | 19 ++-- tests/README.md | 11 ++ tests/README.txt | 13 --- tests/acc_tests.c | 59 ++++++++++ tests/basic_tests.c | 208 ++++++++++++++++++++++++------------ tests/benchmark/Makefile.in | 2 +- tests/misc_tests.c | 2 +- xmlwf/Makefile.in | 2 +- 29 files changed, 604 insertions(+), 263 deletions(-) create mode 100644 tests/README.md delete mode 100644 tests/README.txt diff --git a/Changes b/Changes index a7d4caf9ac81..c1d22efa5a3c 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,136 @@ -NOTE: We are looking for help with a few things: - https://github.com/libexpat/libexpat/labels/help%20wanted - If you can help, please get in touch. Thanks! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!! Expat is UNDERSTAFFED and WITHOUT FUNDING. !! +!! ~~~~~~~~~~~~ !! +!! The following topics need *additional skilled C developers* to progress !! +!! in a timely manner or at all (loosely ordered by descending priority): !! +!! !! +!! - fixing a complex non-public security issue, !! +!! - teaming up on researching and fixing future security reports and !! +!! ClusterFuzz findings with few-days-max response times in communication !! +!! in order to (1) have a sound fix ready before the end of a 90 days !! +!! grace period and (2) in a sustainable manner, !! +!! - implementing and auto-testing XML 1.0r5 support !! +!! (needs discussion before pull requests), !! +!! - smart ideas on fixing the Autotools CMake files generation issue !! +!! without breaking CI (needs discussion before pull requests), !! +!! - the Windows binaries topic (needs requirements engineering first), !! +!! - pushing migration from `int` to `size_t` further !! +!! including edge-cases test coverage (needs discussion before anything). !! +!! !! +!! For details, please reach out via e-mail to sebastian@pipping.org so we !! +!! can schedule a voice call on the topic, in English or German. !! +!! !! +!! THANK YOU! Sebastian Pipping -- Berlin, 2024-03-09 !! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +Release 2.6.3 Wed September 4 2024 + Security fixes: + #887 #890 CVE-2024-45490 -- Calling function XML_ParseBuffer with + len < 0 without noticing and then calling XML_GetBuffer + will have XML_ParseBuffer fail to recognize the problem + and XML_GetBuffer corrupt memory. + With the fix, XML_ParseBuffer now complains with error + XML_ERROR_INVALID_ARGUMENT just like sibling XML_Parse + has been doing since Expat 2.2.1, and now documented. + Impact is denial of service to potentially artitrary code + execution. + #888 #891 CVE-2024-45491 -- Internal function dtdCopy can have an + integer overflow for nDefaultAtts on 32-bit platforms + (where UINT_MAX equals SIZE_MAX). + Impact is denial of service to potentially artitrary code + execution. + #889 #892 CVE-2024-45492 -- Internal function nextScaffoldPart can + have an integer overflow for m_groupSize on 32-bit + platforms (where UINT_MAX equals SIZE_MAX). + Impact is denial of service to potentially artitrary code + execution. + + Other changes: + #851 #879 Autotools: Sync CMake templates with CMake 3.28 + #853 Autotools: Always provide path to find(1) for portability + #861 Autotools: Ensure that the m4 directory always exists. + #870 Autotools: Simplify handling of SIZEOF_VOID_P + #869 Autotools: Support non-GNU sed + #856 Autotools|CMake: Fix main() to main(void) + #865 Autotools|CMake: Fix compile tests for HAVE_SYSCALL_GETRANDOM + #863 Autotools|CMake: Stop requiring dos2unix + #854 #855 CMake: Fix check for symbols size_t and off_t + #864 docs|tests: Convert README to Markdown and update + #741 Windows: Drop support for Visual Studio <=15.0/2017 + #886 Drop needless XML_DTD guards around is_param access + #885 Fix typo in a code comment + #894 #896 Version info bumped from 10:2:9 (libexpat*.so.1.9.2) + to 10:3:9 (libexpat*.so.1.9.3); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #880 Readme: Promote the call for help + #868 CI: Fix various issues + #849 CI: Allow triggering GitHub Actions workflows manually + #851 #872 .. + #873 #879 CI: Adapt to breaking changes in GitHub Actions + + Special thanks to: + Alexander Bluhm + Berkay Eren Ürün + Dag-Erling Smørgrav + Ferenc Géczi + TaiYou + +Release 2.6.2 Wed March 13 2024 + Security fixes: + #839 #842 CVE-2024-28757 -- Prevent billion laughs attacks with + isolated use of external parsers. Please see the commit + message of commit 1d50b80cf31de87750103656f6eb693746854aa8 + for details. + + Bug fixes: + #839 #841 Reject direct parameter entity recursion + and avoid the related undefined behavior + + Other changes: + #847 Autotools: Fix build for DOCBOOK_TO_MAN containing spaces + #837 Add missing #821 and #824 to 2.6.1 change log + #838 #843 Version info bumped from 10:1:9 (libexpat*.so.1.9.1) + to 10:2:9 (libexpat*.so.1.9.2); see https://verbump.de/ + for what these numbers do + + Special thanks to: + Philippe Antoine + Tomas Korbar + and + Clang UndefinedBehaviorSanitizer + OSS-Fuzz / ClusterFuzz + +Release 2.6.1 Thu February 29 2024 + Bug fixes: + #817 Make tests independent of CPU speed, and thus more robust + #828 #836 Expose billion laughs API with XML_DTD defined and + XML_GE undefined, regression from 2.6.0 + + Other changes: + #829 Hide test-only code behind new internal macro + #833 Autotools: Reject expat_config.h.in defining SIZEOF_VOID_P + #821 #824 Autotools: Fix "make clean" for case: + ./configure --without-docbook && make clean all + #819 Address compiler warnings + #832 #834 Version info bumped from 10:0:9 (libexpat*.so.1.9.0) + to 10:1:9 (libexpat*.so.1.9.1); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #818 CI: Adapt to breaking changes in clang-format + + Special thanks to: + David Hall + Snild Dolkow Release 2.6.0 Tue February 6 2024 Security fixes: diff --git a/Makefile.am b/Makefile.am index 9c2259d23e63..7d8e17c2cf86 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,6 +10,8 @@ # Copyright (c) 2018 KangLin # Copyright (c) 2022 Johnny Jazeix # Copyright (c) 2023 Sony Corporation / Snild Dolkow +# Copyright (c) 2024 Alexander Bluhm +# Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -114,10 +116,10 @@ buildlib: @echo 'ERROR: is no longer supported. INSTEAD please:' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Mass-patch Makefile.am, e.g.' >&2 - @echo 'ERROR: # find -name Makefile.am -exec sed \' >&2 + @echo 'ERROR: # find . -name Makefile.am -exec sed \' >&2 @echo 'ERROR: -e "s,libexpat\.la,libexpatw.la," \' >&2 @echo 'ERROR: -e "s,libexpat_la,libexpatw_la," \' >&2 - @echo 'ERROR: -i {} +' >&2 + @echo 'ERROR: -i.bak {} +' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Run automake to re-generate Makefile.in files' >&2 @echo 'ERROR:' >&2 diff --git a/Makefile.in b/Makefile.in index f505224f6fa8..c0fcb5dd05d1 100644 --- a/Makefile.in +++ b/Makefile.in @@ -26,6 +26,8 @@ # Copyright (c) 2018 KangLin # Copyright (c) 2022 Johnny Jazeix # Copyright (c) 2023 Sony Corporation / Snild Dolkow +# Copyright (c) 2024 Alexander Bluhm +# Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -384,6 +386,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -397,7 +400,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ @@ -1080,10 +1082,10 @@ buildlib: @echo 'ERROR: is no longer supported. INSTEAD please:' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Mass-patch Makefile.am, e.g.' >&2 - @echo 'ERROR: # find -name Makefile.am -exec sed \' >&2 + @echo 'ERROR: # find . -name Makefile.am -exec sed \' >&2 @echo 'ERROR: -e "s,libexpat\.la,libexpatw.la," \' >&2 @echo 'ERROR: -e "s,libexpat_la,libexpatw_la," \' >&2 - @echo 'ERROR: -i {} +' >&2 + @echo 'ERROR: -i.bak {} +' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Run automake to re-generate Makefile.in files' >&2 @echo 'ERROR:' >&2 diff --git a/README.md b/README.md index 43c4f4f3dbb3..180a68e4abbe 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,14 @@ [![Downloads SourceForge](https://img.shields.io/sourceforge/dt/expat?label=Downloads%20SourceForge)](https://sourceforge.net/projects/expat/files/) [![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases) +> [!CAUTION] +> +> Expat is **understaffed** and without funding. +> There is a [call for help with details](https://github.com/libexpat/libexpat/blob/master/expat/Changes) +> at the top of the `Changes` file. -# Expat, Release 2.6.0 + +# Expat, Release 2.6.3 This is Expat, a C99 library for parsing [XML 1.0 Fourth Edition](https://www.w3.org/TR/2006/REC-xml-20060816/), started by @@ -20,7 +26,7 @@ Expat supports the following compilers: - GNU GCC >=4.5 - LLVM Clang >=3.5 -- Microsoft Visual Studio >=15.0/2017 (rolling `${today} minus 5 years`) +- Microsoft Visual Studio >=16.0/2019 (rolling `${today} minus 5 years`) Windows users can use the [`expat-win32bin-*.*.*.{exe,zip}` download](https://github.com/libexpat/libexpat/releases), @@ -158,10 +164,10 @@ support this mode of compilation (yet): 1. Mass-patch `Makefile.am` files to use `libexpatw.la` for a library name:
- `find -name Makefile.am -exec sed + `find . -name Makefile.am -exec sed -e 's,libexpat\.la,libexpatw.la,' -e 's,libexpat_la,libexpatw_la,' - -i {} +` + -i.bak {} +` 1. Run `automake` to re-write `Makefile.in` files:
`automake` diff --git a/buildconf.sh b/buildconf.sh index 5e2b3269c256..4e506b30082b 100755 --- a/buildconf.sh +++ b/buildconf.sh @@ -8,6 +8,7 @@ # # Copyright (c) 2017-2022 Sebastian Pipping # Copyright (c) 2018 Marco Maggi +# Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -31,25 +32,4 @@ set -e -# File expat_config.h.in (as generated by autoheader by autoreconf) contains -# macro SIZEOF_VOID_P which is (1) not really needed by Expat as of today and -# (2) a problem to "multilib" systems with one shared installed -# /usr/include/expat_config.h for two Expats with different "void *" sizes -# installed in e.g. /usr/lib32 and /usr/lib64. Hence we patch macro -# SIZEOF_VOID_P out of template expat_config.h.in so that configure will -# not put SIZEOF_VOID_P in the eventual expat_config.h. -patch_expat_config_h_in() { - local filename="$1" - local sizeof_void_p_line_number="$(grep -F -n SIZEOF_VOID_P "${filename}" | awk -F: '{print $1}')" - [[ ${sizeof_void_p_line_number} =~ ^[0-9]+$ ]] # cheap assert - local first_line_to_delete=$(( sizeof_void_p_line_number - 1 )) - local last_line_to_delete=$(( sizeof_void_p_line_number + 1 )) - # Note: Avoiding "sed -i" only for macOS portability. - local tempfile="$(mktemp)" - sed "${first_line_to_delete},${last_line_to_delete}d" "${filename}" > "${tempfile}" - mv "${tempfile}" "${filename}" -} - -autoreconf --warnings=all --install --verbose "$@" - -patch_expat_config_h_in expat_config.h.in +exec autoreconf --warnings=all --install --verbose "$@" diff --git a/configure.ac b/configure.ac index a5d1ff9317c8..1a930413ffe5 100644 --- a/configure.ac +++ b/configure.ac @@ -22,6 +22,8 @@ dnl Copyright (c) 2018 KangLin dnl Copyright (c) 2019 Mohammed Khajapasha dnl Copyright (c) 2019 Kishore Kunche dnl Copyright (c) 2020 Jeffrey Walton +dnl Copyright (c) 2024 Ferenc Géczi +dnl Copyright (c) 2024 Dag-Erling Smørgrav dnl Licensed under the MIT license: dnl dnl Permission is hereby granted, free of charge, to any person obtaining @@ -83,7 +85,7 @@ dnl If the API changes incompatibly set LIBAGE back to 0 dnl LIBCURRENT=10 # sync -LIBREVISION=0 # with +LIBREVISION=3 # with LIBAGE=9 # CMakeLists.txt! AC_CONFIG_HEADERS([expat_config.h]) @@ -160,7 +162,6 @@ AC_C_BIGENDIAN([AC_DEFINE([WORDS_BIGENDIAN], 1) AC_DEFINE_UNQUOTED([BYTEORDER], $BYTEORDER, [1234 = LILENDIAN, 4321 = BIGENDIAN]) AC_C_CONST -AC_TYPE_SIZE_T AC_ARG_WITH([xmlwf], [AS_HELP_STRING([--without-xmlwf], [do not build xmlwf])], @@ -215,7 +216,7 @@ AC_LINK_IFELSE([AC_LANG_SOURCE([ #else # include /* for arc4random_buf on BSD */ #endif - int main() { + int main(void) { char dummy[[123]]; // double brackets for m4 arc4random_buf(dummy, 0U); return 0; @@ -232,7 +233,7 @@ AC_LINK_IFELSE([AC_LANG_SOURCE([ #else # include #endif - int main() { + int main(void) { arc4random(); return 0; } @@ -254,7 +255,7 @@ AS_IF([test "x$with_getrandom" != xno], AC_LINK_IFELSE([AC_LANG_SOURCE([ #include /* for NULL */ #include - int main() { + int main(void) { return getrandom(NULL, 0U, 0U); } ])], @@ -275,10 +276,11 @@ AS_HELP_STRING([--without-sys-getrandom], AS_IF([test "x$with_sys_getrandom" != xno], [AC_MSG_CHECKING([for syscall SYS_getrandom (Linux 3.17+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ + #define _GNU_SOURCE #include /* for NULL */ #include /* for syscall */ #include /* for SYS_getrandom */ - int main() { + int main(void) { syscall(SYS_getrandom, NULL, 0, 0); return 0; } @@ -357,11 +359,22 @@ AS_IF([test "x${DOCBOOK_TO_MAN}" != x -a "x$with_docbook" != xno], page for xmlwf.])])]) dnl This will make sure that a release tarball shipping a pre-rendered xmlwf man page will -dnl get it installed, independent of whether some flavor of docbook2man is available. +dnl get it installed, when no working flavor of docbook2man is available (or wanted). dnl This relies on file xmlwf.1 being at least as recent as its source file xmlwf.xml. AS_IF([test -f "${srcdir}"/doc/xmlwf.1], - [AM_CONDITIONAL(WITH_DOCBOOK, [true])], - [AM_CONDITIONAL(WITH_DOCBOOK, [test "x${DOCBOOK_TO_MAN}" != x])]) + [AM_CONDITIONAL(WITH_MANPAGE, [true]) + AS_IF([test "x$with_docbook" = xno -o "x${DOCBOOK_TO_MAN}" = x], + [AM_CONDITIONAL(WITH_PREBUILT_MANPAGE, [true]) + AM_CONDITIONAL(WITH_DISTRIBUTABLE_MANPAGE, [false])], + [AM_CONDITIONAL(WITH_PREBUILT_MANPAGE, [false]) + AM_CONDITIONAL(WITH_DISTRIBUTABLE_MANPAGE, [true])]) + ], + [AS_IF([test "x$with_docbook" != xno -a "x${DOCBOOK_TO_MAN}" != x], + [AM_CONDITIONAL(WITH_MANPAGE, [true]) + AM_CONDITIONAL(WITH_DISTRIBUTABLE_MANPAGE, [true])], + [AM_CONDITIONAL(WITH_MANPAGE, [false]) + AM_CONDITIONAL(WITH_DISTRIBUTABLE_MANPAGE, [false])]) + AM_CONDITIONAL(WITH_PREBUILT_MANPAGE, [false])]) dnl Configure CMake file templates dnl NOTE: The *_TRUE variables read here are Automake conditionals @@ -392,7 +405,6 @@ LIBDIR_BASENAME="$(basename "${libdir}")" SO_MAJOR="$(expr "${LIBCURRENT}" - "${LIBAGE}")" SO_MINOR="${LIBAGE}" SO_PATCH="${LIBREVISION}" -AC_CHECK_SIZEOF([void *]) # sets ac_cv_sizeof_void_p AC_SUBST([EXPAT_ATTR_INFO]) AC_SUBST([EXPAT_DTD]) AC_SUBST([EXPAT_LARGE_SIZE]) @@ -405,8 +417,13 @@ AC_SUBST([LIBDIR_BASENAME]) AC_SUBST([SO_MAJOR]) AC_SUBST([SO_MINOR]) AC_SUBST([SO_PATCH]) -AC_SUBST([ac_cv_sizeof_void_p]) +dnl The canonical way of doing this is AC_CHECK_SIZEOF(void *), but +dnl that adds SIZEOF_VOID_P to expat_config.h.in, making it difficult +dnl to have 32-bit and 64-bit versions of libexpat installed on the +dnl same system with a single, shared copy of the header. +AC_COMPUTE_INT(SIZEOF_VOID_P, [sizeof(void *)]) +AC_SUBST([SIZEOF_VOID_P]) dnl write the Automake flags we set AC_SUBST([AM_CPPFLAGS]) diff --git a/doc/Makefile.am b/doc/Makefile.am index c3a3ce59c1b9..3bea96e9aa6f 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -6,9 +6,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2022 Sebastian Pipping +# Copyright (c) 2017-2024 Sebastian Pipping # Copyright (c) 2017 Stephen Groat # Copyright (c) 2017 Joe Orton +# Copyright (c) 2024 Tomas Korbar # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -32,26 +33,24 @@ .PHONY: dist-hook # not inside conditional to avoid automake warning -if WITH_DOCBOOK +if WITH_MANPAGE dist_man_MANS = xmlwf.1 xmlwf.1: xmlwf.xml -rm -f $@ - $(DOCBOOK_TO_MAN) $< + test "x$(DOCBOOK_TO_MAN)" != x && $(DOCBOOK_TO_MAN) $< test -f $@ || mv XMLWF.1 $@ -else +endif + +if !WITH_DISTRIBUTABLE_MANPAGE dist-hook: @echo 'ERROR: Configure with --with-docbook for "make dist".' 1>&2 @false endif -# https://www.gnu.org/software/automake/manual/automake.html#What-Gets-Cleaned -.PHONY: clean-local -clean-local: clean-local-check - -.PHONY: clean-local-check -clean-local-check: - $(RM) xmlwf.1 +if !WITH_PREBUILT_MANPAGE +CLEANFILES = xmlwf.1 +endif EXTRA_DIST = \ ok.min.css \ diff --git a/doc/Makefile.in b/doc/Makefile.in index 18f86be3947b..72deb0565d94 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -22,9 +22,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2022 Sebastian Pipping +# Copyright (c) 2017-2024 Sebastian Pipping # Copyright (c) 2017 Stephen Groat # Copyright (c) 2017 Joe Orton +# Copyright (c) 2024 Tomas Korbar # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -285,6 +286,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -298,7 +300,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ @@ -345,7 +346,8 @@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -@WITH_DOCBOOK_TRUE@dist_man_MANS = xmlwf.1 +@WITH_MANPAGE_TRUE@dist_man_MANS = xmlwf.1 +@WITH_PREBUILT_MANPAGE_FALSE@CLEANFILES = xmlwf.1 EXTRA_DIST = \ ok.min.css \ reference.html \ @@ -439,7 +441,7 @@ ctags CTAGS: cscope cscopelist: -@WITH_DOCBOOK_TRUE@dist-hook: +@WITH_DISTRIBUTABLE_MANPAGE_TRUE@dist-hook: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am @@ -505,6 +507,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) @@ -515,7 +518,7 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-generic clean-libtool clean-local mostlyclean-am +clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile @@ -584,38 +587,31 @@ uninstall-man: uninstall-man1 .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ - clean-local cscopelist-am ctags-am dist-hook distclean \ - distclean-generic distclean-libtool distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-man install-man1 install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-generic \ - mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ - uninstall-am uninstall-man uninstall-man1 + cscopelist-am ctags-am dist-hook distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-man1 install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags-am uninstall uninstall-am uninstall-man \ + uninstall-man1 .PRECIOUS: Makefile .PHONY: dist-hook # not inside conditional to avoid automake warning -@WITH_DOCBOOK_TRUE@xmlwf.1: xmlwf.xml -@WITH_DOCBOOK_TRUE@ -rm -f $@ -@WITH_DOCBOOK_TRUE@ $(DOCBOOK_TO_MAN) $< -@WITH_DOCBOOK_TRUE@ test -f $@ || mv XMLWF.1 $@ -@WITH_DOCBOOK_FALSE@dist-hook: -@WITH_DOCBOOK_FALSE@ @echo 'ERROR: Configure with --with-docbook for "make dist".' 1>&2 -@WITH_DOCBOOK_FALSE@ @false +@WITH_MANPAGE_TRUE@xmlwf.1: xmlwf.xml +@WITH_MANPAGE_TRUE@ -rm -f $@ +@WITH_MANPAGE_TRUE@ test "x$(DOCBOOK_TO_MAN)" != x && $(DOCBOOK_TO_MAN) $< +@WITH_MANPAGE_TRUE@ test -f $@ || mv XMLWF.1 $@ -# https://www.gnu.org/software/automake/manual/automake.html#What-Gets-Cleaned -.PHONY: clean-local -clean-local: clean-local-check - -.PHONY: clean-local-check -clean-local-check: - $(RM) xmlwf.1 +@WITH_DISTRIBUTABLE_MANPAGE_FALSE@dist-hook: +@WITH_DISTRIBUTABLE_MANPAGE_FALSE@ @echo 'ERROR: Configure with --with-docbook for "make dist".' 1>&2 +@WITH_DISTRIBUTABLE_MANPAGE_FALSE@ @false # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/doc/reference.html b/doc/reference.html index 898f03a3364d..4cfb2ce9384e 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -52,7 +52,7 @@

The Expat XML Parser - Release 2.6.0 + Release 2.6.3

@@ -319,7 +319,7 @@ directions in the next section. Otherwise if you have Microsoft's Developer Studio installed, you can use CMake to generate a .sln file, e.g. -cmake -G"Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=RelWithDebInfo . +cmake -G"Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=RelWithDebInfo . , and build Expat using msbuild /m expat.sln after.

Alternatively, you may download the Win32 binary package that @@ -356,10 +356,7 @@ library and header would get installed in

Configuring Expat Using the Pre-Processor

Expat's feature set can be configured using a small number of -pre-processor definitions. The definition of this symbols does not -affect the set of entry points for Expat, only the behavior of the API -and the definition of character types in the case of -XML_UNICODE_WCHAR_T. The symbols are:

+pre-processor definitions. The symbols are:

XML_GE
@@ -1138,7 +1135,9 @@ containing part (or perhaps all) of the document. The number of bytes of s that are part of the document is indicated by len. This means that s doesn't have to be null-terminated. It also means that if len is larger than the number of bytes in the block of -memory that s points at, then a memory fault is likely. The +memory that s points at, then a memory fault is likely. +Negative values for len are rejected since Expat 2.2.1. +The isFinal parameter informs the parser that this is the last piece of the document. Frequently, the last piece is empty (i.e. len is zero.) @@ -1186,11 +1185,17 @@ XML_ParseBuffer(XML_Parser p, int isFinal);
+

This is just like XML_Parse, except in this case Expat provides the buffer. By obtaining the buffer from Expat with the XML_GetBuffer function, the application can avoid double copying of the input. +

+ +

+Negative values for len are rejected since Expat 2.6.3. +

XML_GetBuffer

diff --git a/doc/xmlwf.1 b/doc/xmlwf.1 index ac5ce21e6b81..347c36f06109 100644 --- a/doc/xmlwf.1 +++ b/doc/xmlwf.1 @@ -5,7 +5,7 @@ \\$2 \(la\\$1\(ra\\$3 .. .if \n(.g .mso www.tmac -.TH XMLWF 1 "February 6, 2024" "" "" +.TH XMLWF 1 "September 4, 2024" "" "" .SH NAME xmlwf \- Determines if an XML document is well-formed .SH SYNOPSIS diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml index 2b3f1ccd74a8..10b29782b197 100644 --- a/doc/xmlwf.xml +++ b/doc/xmlwf.xml @@ -21,7 +21,7 @@ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ Scott"> Bronson"> - February 6, 2024"> + September 4, 2024"> 1"> bronson@rinspin.com"> diff --git a/examples/Makefile.in b/examples/Makefile.in index 0ccc020dd94f..044c9089c565 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -313,6 +313,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -326,7 +327,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ diff --git a/expat_config.h.in b/expat_config.h.in index 91c32340868e..67ef89c7171a 100644 --- a/expat_config.h.in +++ b/expat_config.h.in @@ -139,7 +139,4 @@ /* Define to `long int' if does not define. */ #undef off_t -/* Define to `unsigned int' if does not define. */ -#undef size_t - #endif // ndef EXPAT_CONFIG_H diff --git a/fix-xmltest-log.sh b/fix-xmltest-log.sh index 7981cf3b00c8..4739acab6b02 100755 --- a/fix-xmltest-log.sh +++ b/fix-xmltest-log.sh @@ -7,6 +7,7 @@ # |_| XML parser # # Copyright (c) 2019-2022 Sebastian Pipping +# Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -32,10 +33,10 @@ set -e filename="${1:-tests/xmltest.log}" -dos2unix "${filename}" - -tempfile="$(mktemp)" -sed \ +sed -i.bak \ + -e '# convert DOS line endings to Unix without resorting to dos2unix' \ + -e $'s/\r//' \ + \ -e 's/^wine: Call .* msvcrt\.dll\._wperror, aborting$/ibm49i02.dtd: No such file or directory/' \ \ -e '/^wine: /d' \ @@ -46,5 +47,4 @@ sed \ -e '/^wine client error:/d' \ -e '/^In ibm\/invalid\/P49\/: Unhandled exception: unimplemented .\+/d' \ \ - "${filename}" > "${tempfile}" -mv "${tempfile}" "${filename}" + "${filename}" diff --git a/lib/Makefile.am b/lib/Makefile.am index 0e0185b59120..1958f322f319 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2022 Sebastian Pipping +# Copyright (c) 2017-2024 Sebastian Pipping # Copyright (c) 2017 Tomasz Kłoczko # Copyright (c) 2019 David Loffredo # Licensed under the MIT license: @@ -36,7 +36,9 @@ include_HEADERS = \ expat_external.h lib_LTLIBRARIES = libexpat.la -noinst_LTLIBRARIES = libexpatinternal.la +if WITH_TESTS +noinst_LTLIBRARIES = libtestpat.la +endif libexpat_la_LDFLAGS = \ @AM_LDFLAGS@ \ @@ -44,17 +46,16 @@ libexpat_la_LDFLAGS = \ -no-undefined \ -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ -libexpat_la_SOURCES = - -# This layer of indirection allows -# the test suite to access internal symbols -# despite compiling with -fvisibility=hidden -libexpatinternal_la_SOURCES = \ +libexpat_la_SOURCES = \ xmlparse.c \ xmltok.c \ xmlrole.c -libexpat_la_LIBADD = libexpatinternal.la +if WITH_TESTS +libtestpat_la_CPPFLAGS = -DXML_TESTING + +libtestpat_la_SOURCES = $(libexpat_la_SOURCES) +endif doc_DATA = \ ../AUTHORS \ diff --git a/lib/Makefile.in b/lib/Makefile.in index 29584d8bbe74..1a97e85fc41f 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -22,7 +22,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2022 Sebastian Pipping +# Copyright (c) 2017-2024 Sebastian Pipping # Copyright (c) 2017 Tomasz Kłoczko # Copyright (c) 2019 David Loffredo # Licensed under the MIT license: @@ -176,8 +176,8 @@ am__uninstall_files_from_dir = { \ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(docdir)" \ "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) -libexpat_la_DEPENDENCIES = libexpatinternal.la -am_libexpat_la_OBJECTS = +libexpat_la_LIBADD = +am_libexpat_la_OBJECTS = xmlparse.lo xmltok.lo xmlrole.lo libexpat_la_OBJECTS = $(am_libexpat_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -186,9 +186,13 @@ am__v_lt_1 = libexpat_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libexpat_la_LDFLAGS) $(LDFLAGS) -o $@ -libexpatinternal_la_LIBADD = -am_libexpatinternal_la_OBJECTS = xmlparse.lo xmltok.lo xmlrole.lo -libexpatinternal_la_OBJECTS = $(am_libexpatinternal_la_OBJECTS) +libtestpat_la_LIBADD = +am__libtestpat_la_SOURCES_DIST = xmlparse.c xmltok.c xmlrole.c +am__objects_1 = libtestpat_la-xmlparse.lo libtestpat_la-xmltok.lo \ + libtestpat_la-xmlrole.lo +@WITH_TESTS_TRUE@am_libtestpat_la_OBJECTS = $(am__objects_1) +libtestpat_la_OBJECTS = $(am_libtestpat_la_OBJECTS) +@WITH_TESTS_TRUE@am_libtestpat_la_rpath = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -204,8 +208,10 @@ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) depcomp = $(SHELL) $(top_srcdir)/conftools/depcomp am__maybe_remake_depfiles = depfiles -am__depfiles_remade = ./$(DEPDIR)/xmlparse.Plo ./$(DEPDIR)/xmlrole.Plo \ - ./$(DEPDIR)/xmltok.Plo +am__depfiles_remade = ./$(DEPDIR)/libtestpat_la-xmlparse.Plo \ + ./$(DEPDIR)/libtestpat_la-xmlrole.Plo \ + ./$(DEPDIR)/libtestpat_la-xmltok.Plo ./$(DEPDIR)/xmlparse.Plo \ + ./$(DEPDIR)/xmlrole.Plo ./$(DEPDIR)/xmltok.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) @@ -225,8 +231,9 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(libexpat_la_SOURCES) $(libexpatinternal_la_SOURCES) -DIST_SOURCES = $(libexpat_la_SOURCES) $(libexpatinternal_la_SOURCES) +SOURCES = $(libexpat_la_SOURCES) $(libtestpat_la_SOURCES) +DIST_SOURCES = $(libexpat_la_SOURCES) \ + $(am__libtestpat_la_SOURCES_DIST) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -344,6 +351,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -357,7 +365,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ @@ -410,24 +417,20 @@ include_HEADERS = \ expat_external.h lib_LTLIBRARIES = libexpat.la -noinst_LTLIBRARIES = libexpatinternal.la +@WITH_TESTS_TRUE@noinst_LTLIBRARIES = libtestpat.la libexpat_la_LDFLAGS = \ @AM_LDFLAGS@ \ @LIBM@ \ -no-undefined \ -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ -libexpat_la_SOURCES = - -# This layer of indirection allows -# the test suite to access internal symbols -# despite compiling with -fvisibility=hidden -libexpatinternal_la_SOURCES = \ +libexpat_la_SOURCES = \ xmlparse.c \ xmltok.c \ xmlrole.c -libexpat_la_LIBADD = libexpatinternal.la +@WITH_TESTS_TRUE@libtestpat_la_CPPFLAGS = -DXML_TESTING +@WITH_TESTS_TRUE@libtestpat_la_SOURCES = $(libexpat_la_SOURCES) doc_DATA = \ ../AUTHORS \ ../Changes @@ -534,8 +537,8 @@ clean-noinstLTLIBRARIES: libexpat.la: $(libexpat_la_OBJECTS) $(libexpat_la_DEPENDENCIES) $(EXTRA_libexpat_la_DEPENDENCIES) $(AM_V_CCLD)$(libexpat_la_LINK) -rpath $(libdir) $(libexpat_la_OBJECTS) $(libexpat_la_LIBADD) $(LIBS) -libexpatinternal.la: $(libexpatinternal_la_OBJECTS) $(libexpatinternal_la_DEPENDENCIES) $(EXTRA_libexpatinternal_la_DEPENDENCIES) - $(AM_V_CCLD)$(LINK) $(libexpatinternal_la_OBJECTS) $(libexpatinternal_la_LIBADD) $(LIBS) +libtestpat.la: $(libtestpat_la_OBJECTS) $(libtestpat_la_DEPENDENCIES) $(EXTRA_libtestpat_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_libtestpat_la_rpath) $(libtestpat_la_OBJECTS) $(libtestpat_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -543,6 +546,9 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtestpat_la-xmlparse.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtestpat_la-xmlrole.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtestpat_la-xmltok.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmlparse.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmlrole.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmltok.Plo@am__quote@ # am--include-marker @@ -574,6 +580,27 @@ am--depfiles: $(am__depfiles_remade) @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< +libtestpat_la-xmlparse.lo: xmlparse.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libtestpat_la-xmlparse.lo -MD -MP -MF $(DEPDIR)/libtestpat_la-xmlparse.Tpo -c -o libtestpat_la-xmlparse.lo `test -f 'xmlparse.c' || echo '$(srcdir)/'`xmlparse.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtestpat_la-xmlparse.Tpo $(DEPDIR)/libtestpat_la-xmlparse.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xmlparse.c' object='libtestpat_la-xmlparse.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libtestpat_la-xmlparse.lo `test -f 'xmlparse.c' || echo '$(srcdir)/'`xmlparse.c + +libtestpat_la-xmltok.lo: xmltok.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libtestpat_la-xmltok.lo -MD -MP -MF $(DEPDIR)/libtestpat_la-xmltok.Tpo -c -o libtestpat_la-xmltok.lo `test -f 'xmltok.c' || echo '$(srcdir)/'`xmltok.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtestpat_la-xmltok.Tpo $(DEPDIR)/libtestpat_la-xmltok.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xmltok.c' object='libtestpat_la-xmltok.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libtestpat_la-xmltok.lo `test -f 'xmltok.c' || echo '$(srcdir)/'`xmltok.c + +libtestpat_la-xmlrole.lo: xmlrole.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libtestpat_la-xmlrole.lo -MD -MP -MF $(DEPDIR)/libtestpat_la-xmlrole.Tpo -c -o libtestpat_la-xmlrole.lo `test -f 'xmlrole.c' || echo '$(srcdir)/'`xmlrole.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtestpat_la-xmlrole.Tpo $(DEPDIR)/libtestpat_la-xmlrole.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xmlrole.c' object='libtestpat_la-xmlrole.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libtestpat_la-xmlrole.lo `test -f 'xmlrole.c' || echo '$(srcdir)/'`xmlrole.c + mostlyclean-libtool: -rm -f *.lo @@ -749,7 +776,10 @@ clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ clean-noinstLTLIBRARIES mostlyclean-am distclean: distclean-am - -rm -f ./$(DEPDIR)/xmlparse.Plo + -rm -f ./$(DEPDIR)/libtestpat_la-xmlparse.Plo + -rm -f ./$(DEPDIR)/libtestpat_la-xmlrole.Plo + -rm -f ./$(DEPDIR)/libtestpat_la-xmltok.Plo + -rm -f ./$(DEPDIR)/xmlparse.Plo -rm -f ./$(DEPDIR)/xmlrole.Plo -rm -f ./$(DEPDIR)/xmltok.Plo -rm -f Makefile @@ -798,7 +828,10 @@ install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am - -rm -f ./$(DEPDIR)/xmlparse.Plo + -rm -f ./$(DEPDIR)/libtestpat_la-xmlparse.Plo + -rm -f ./$(DEPDIR)/libtestpat_la-xmlrole.Plo + -rm -f ./$(DEPDIR)/libtestpat_la-xmltok.Plo + -rm -f ./$(DEPDIR)/xmlparse.Plo -rm -f ./$(DEPDIR)/xmlrole.Plo -rm -f ./$(DEPDIR)/xmltok.Plo -rm -f Makefile diff --git a/lib/expat.h b/lib/expat.h index 95464b0dd177..d0d6015a6628 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -18,6 +18,7 @@ Copyright (c) 2022 Thijs Schreijer Copyright (c) 2023 Hanno Böck Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -1042,7 +1043,7 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -#if XML_GE == 1 +#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) /* Added in Expat 2.4.0 for XML_DTD defined and * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) @@ -1065,7 +1066,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 6 -#define XML_MICRO_VERSION 0 +#define XML_MICRO_VERSION 3 #ifdef __cplusplus } diff --git a/lib/internal.h b/lib/internal.h index cce71e4c5164..167ec36804a4 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -28,10 +28,11 @@ Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2003 Greg Stein - Copyright (c) 2016-2023 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2018 Yury Gribov Copyright (c) 2019 David Loffredo - Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow + Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -155,14 +156,20 @@ extern "C" { void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef); -#if XML_GE == 1 +#if defined(XML_GE) && XML_GE == 1 unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); const char *unsignedCharToPrintable(unsigned char c); #endif -extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c -extern unsigned int g_parseAttempts; // used for testing only +extern +#if ! defined(XML_TESTING) + const +#endif + XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c +#if defined(XML_TESTING) +extern unsigned int g_bytesScanned; // used for testing only +#endif #ifdef __cplusplus } diff --git a/lib/siphash.h b/lib/siphash.h index a1ed99e687bd..04f6f74585b5 100644 --- a/lib/siphash.h +++ b/lib/siphash.h @@ -126,8 +126,7 @@ | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \ | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) -#define SIPHASH_INITIALIZER \ - { 0, 0, 0, 0, {0}, 0, 0 } +#define SIPHASH_INITIALIZER {0, 0, 0, 0, {0}, 0, 0} struct siphash { uint64_t v0, v1, v2, v3; diff --git a/lib/xmlparse.c b/lib/xmlparse.c index aaf0fa9c8f96..d9285b213b38 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -1,4 +1,4 @@ -/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+) +/* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -38,7 +38,8 @@ Copyright (c) 2022 Jann Horn Copyright (c) 2022 Sean McBride Copyright (c) 2023 Owain Davies - Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow + Copyright (c) 2024 Berkay Eren Ürün Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -210,7 +211,7 @@ typedef char ICHAR; #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ -#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) +#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) /* Do safe (NULL-aware) pointer arithmetic */ #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) @@ -248,7 +249,7 @@ static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); it odd, since odd numbers are always relative prime to a power of 2. */ #define SECOND_HASH(hash, mask, power) \ - ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) + ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) #define PROBE_STEP(hash, mask, power) \ ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) @@ -294,7 +295,7 @@ typedef struct { The name of the element is stored in both the document and API encodings. The memory buffer 'buf' is a separately-allocated memory area which stores the name. During the XML_Parse()/ - XMLParseBuffer() when the element is open, the memory for the 'raw' + XML_ParseBuffer() when the element is open, the memory for the 'raw' version of the name (in the document encoding) is shared with the document buffer. If the element is open across calls to XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to @@ -629,8 +630,14 @@ static unsigned long getDebugLevel(const char *variableName, ? 0 \ : ((*((pool)->ptr)++ = c), 1)) -XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c -unsigned int g_parseAttempts = 0; // used for testing only +#if ! defined(XML_TESTING) +const +#endif + XML_Bool g_reparseDeferralEnabledDefault + = XML_TRUE; // write ONLY in runtests.c +#if defined(XML_TESTING) +unsigned int g_bytesScanned = 0; // used for testing only +#endif struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData @@ -1017,7 +1024,9 @@ callProcessor(XML_Parser parser, const char *start, const char *end, return XML_ERROR_NONE; } } - g_parseAttempts += 1; +#if defined(XML_TESTING) + g_bytesScanned += (unsigned)have_now; +#endif const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); if (ret == XML_ERROR_NONE) { // if we consumed nothing, remember what we had on this parse attempt. @@ -2030,6 +2039,12 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { if (parser == NULL) return XML_STATUS_ERROR; + + if (len < 0) { + parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; + return XML_STATUS_ERROR; + } + switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: parser->m_errorCode = XML_ERROR_SUSPENDED; @@ -5838,18 +5853,17 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { /* Set a safe default value in case 'next' does not get set */ next = textStart; -#ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok, next, &next, XML_FALSE, XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); - } else -#endif /* XML_DTD */ + } else { result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart, textEnd, &next, XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); + } if (result == XML_ERROR_NONE) { if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { @@ -5886,18 +5900,17 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, /* Set a safe default value in case 'next' does not get set */ next = textStart; -#ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok, next, &next, XML_FALSE, XML_TRUE, XML_ACCOUNT_ENTITY_EXPANSION); - } else -#endif /* XML_DTD */ + } else { result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding, textStart, textEnd, &next, XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); + } if (result != XML_ERROR_NONE) return result; @@ -5924,7 +5937,6 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, return XML_ERROR_NONE; } -#ifdef XML_DTD if (entity->is_param) { int tok; parser->m_processor = prologProcessor; @@ -5932,9 +5944,7 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, XML_ACCOUNT_DIRECT); - } else -#endif /* XML_DTD */ - { + } else { parser->m_processor = contentProcessor; /* see externalEntityContentProcessor vs contentProcessor */ result = doContent(parser, parser->m_parentParser ? 1 : 0, @@ -6232,7 +6242,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, dtd->keepProcessing = dtd->standalone; goto endEntityValue; } - if (entity->open) { + if (entity->open || (entity == parser->m_declEntity)) { if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; result = XML_ERROR_RECURSIVE_ENTITY_REF; @@ -7008,6 +7018,16 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, if (! newE) return 0; if (oldE->nDefaultAtts) { + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if ((size_t)oldE->nDefaultAtts + > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { + return 0; + } +#endif newE->defaultAtts = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! newE->defaultAtts) { @@ -7550,6 +7570,15 @@ nextScaffoldPart(XML_Parser parser) { int next; if (! dtd->scaffIndex) { + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning + * from -Wtype-limits on platforms where + * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ +#if UINT_MAX >= SIZE_MAX + if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { + return -1; + } +#endif dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); if (! dtd->scaffIndex) return -1; @@ -7779,6 +7808,8 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { static float accountingGetCurrentAmplification(XML_Parser rootParser) { + // 1.........1.........12 => 22 + const size_t lenOfShortestInclude = sizeof("") - 1; const XmlBigCount countBytesOutput = rootParser->m_accounting.countBytesDirect + rootParser->m_accounting.countBytesIndirect; @@ -7786,7 +7817,9 @@ accountingGetCurrentAmplification(XML_Parser rootParser) { = rootParser->m_accounting.countBytesDirect ? (countBytesOutput / (float)(rootParser->m_accounting.countBytesDirect)) - : 1.0f; + : ((lenOfShortestInclude + + rootParser->m_accounting.countBytesIndirect) + / (float)lenOfShortestInclude); assert(! rootParser->m_parentParser); return amplificationFactor; } diff --git a/tests/Makefile.am b/tests/Makefile.am index f949fe7f1ab0..d25376be5419 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -6,9 +6,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2023 Sebastian Pipping +# Copyright (c) 2017-2024 Sebastian Pipping # Copyright (c) 2017-2022 Rhodri James # Copyright (c) 2020 Jeffrey Walton +# Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -32,7 +33,7 @@ SUBDIRS = . benchmark -AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib +AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib -DXML_TESTING check_PROGRAMS = runtests runtests_cxx TESTS = runtests runtests_cxx @@ -72,8 +73,8 @@ runtests_cxx_SOURCES = \ runtests_cxx.cpp \ structdata_cxx.cpp -runtests_LDADD = ../lib/libexpatinternal.la -runtests_cxx_LDADD = ../lib/libexpatinternal.la +runtests_LDADD = ../lib/libtestpat.la +runtests_cxx_LDADD = ../lib/libtestpat.la runtests_LDFLAGS = @AM_LDFLAGS@ @LIBM@ runtests_cxx_LDFLAGS = @AM_LDFLAGS@ @LIBM@ @@ -92,7 +93,7 @@ EXTRA_DIST = \ structdata.h \ minicheck.h \ memcheck.h \ - README.txt \ + README.md \ udiffer.py \ xmltest.log.expected \ xmltest.sh diff --git a/tests/Makefile.in b/tests/Makefile.in index 4c4fd928e3c7..eb00a068cbd2 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -22,9 +22,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2023 Sebastian Pipping +# Copyright (c) 2017-2024 Sebastian Pipping # Copyright (c) 2017-2022 Rhodri James # Copyright (c) 2020 Jeffrey Walton +# Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -151,7 +152,7 @@ am_runtests_OBJECTS = acc_tests.$(OBJEXT) alloc_tests.$(OBJEXT) \ nsalloc_tests.$(OBJEXT) runtests.$(OBJEXT) \ structdata.$(OBJEXT) runtests_OBJECTS = $(am_runtests_OBJECTS) -runtests_DEPENDENCIES = ../lib/libexpatinternal.la +runtests_DEPENDENCIES = ../lib/libtestpat.la AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent @@ -168,7 +169,7 @@ am_runtests_cxx_OBJECTS = acc_tests_cxx.$(OBJEXT) \ ns_tests_cxx.$(OBJEXT) runtests_cxx.$(OBJEXT) \ structdata_cxx.$(OBJEXT) runtests_cxx_OBJECTS = $(am_runtests_cxx_OBJECTS) -runtests_cxx_DEPENDENCIES = ../lib/libexpatinternal.la +runtests_cxx_DEPENDENCIES = ../lib/libtestpat.la runtests_cxx_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(runtests_cxx_LDFLAGS) $(LDFLAGS) -o $@ @@ -485,7 +486,7 @@ TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/conftools/depcomp \ - $(top_srcdir)/conftools/test-driver + $(top_srcdir)/conftools/test-driver README.md DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ @@ -515,7 +516,7 @@ am__relativize = \ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_CFLAGS = @AM_CFLAGS@ -AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib +AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib -DXML_TESTING AM_CXXFLAGS = @AM_CXXFLAGS@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AM_LDFLAGS = @AM_LDFLAGS@ @@ -602,6 +603,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -615,7 +617,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ @@ -698,8 +699,8 @@ runtests_cxx_SOURCES = \ runtests_cxx.cpp \ structdata_cxx.cpp -runtests_LDADD = ../lib/libexpatinternal.la -runtests_cxx_LDADD = ../lib/libexpatinternal.la +runtests_LDADD = ../lib/libtestpat.la +runtests_cxx_LDADD = ../lib/libtestpat.la runtests_LDFLAGS = @AM_LDFLAGS@ @LIBM@ runtests_cxx_LDFLAGS = @AM_LDFLAGS@ @LIBM@ EXTRA_DIST = \ @@ -716,7 +717,7 @@ EXTRA_DIST = \ structdata.h \ minicheck.h \ memcheck.h \ - README.txt \ + README.md \ udiffer.py \ xmltest.log.expected \ xmltest.sh diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 000000000000..010ca95e9e21 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,11 @@ +This directory contains the test suite for Expat. The tests provide +general unit testing and regression coverage. The tests are not +expected to be useful examples of Expat usage; see the +[examples](../examples) directory for that. + +The Expat tests use a partial internal implementation of the +[Check](https://libcheck.github.io/check/) unit testing framework for +C. + +Expat must be built and, on some platforms, installed, before the +tests can be run. diff --git a/tests/README.txt b/tests/README.txt deleted file mode 100644 index 30e1d4dabe80..000000000000 --- a/tests/README.txt +++ /dev/null @@ -1,13 +0,0 @@ -This directory contains the (fledgling) test suite for Expat. The -tests provide general unit testing and regression coverage. The tests -are not expected to be useful examples of Expat usage; see the -examples/ directory for that. - -The Expat tests use a partial internal implementation of the "Check" -unit testing framework for C. More information on Check can be found at: - - http://check.sourceforge.net/ - -Expat must be built and, depending on platform, must be installed, before "make check" can be executed. - -This test suite can all change in a later version. diff --git a/tests/acc_tests.c b/tests/acc_tests.c index e1c4b7f7eb51..f193aa58a492 100644 --- a/tests/acc_tests.c +++ b/tests/acc_tests.c @@ -378,6 +378,63 @@ START_TEST(test_helper_unsigned_char_to_printable) { fail("unsignedCharToPrintable result mistaken"); } END_TEST + +START_TEST(test_amplification_isolated_external_parser) { + // NOTE: Length 44 is precisely twice the length of "" + // (22) that is used in function accountingGetCurrentAmplification in + // xmlparse.c. + // 1.........1.........1.........1.........1..4 => 44 + const char doc[] = ""; + const int docLen = (int)sizeof(doc) - 1; + const float maximumToleratedAmplification = 2.0f; + + struct TestCase { + int offsetOfThreshold; + enum XML_Status expectedStatus; + }; + + struct TestCase cases[] = { + {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR}, + {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK}, + }; + + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + const int offsetOfThreshold = cases[i].offsetOfThreshold; + const enum XML_Status expectedStatus = cases[i].expectedStatus; + const unsigned long long activationThresholdBytes + = docLen + offsetOfThreshold; + + set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold, + expectedStatus); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + + assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parser, maximumToleratedAmplification) + == XML_TRUE); + assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold( + parser, activationThresholdBytes) + == XML_TRUE); + + XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); + assert_true(ext_parser != NULL); + + const enum XML_Status actualStatus + = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE); + + assert_true(actualStatus == expectedStatus); + if (actualStatus != XML_STATUS_OK) { + assert_true(XML_GetErrorCode(ext_parser) + == XML_ERROR_AMPLIFICATION_LIMIT_BREACH); + } + + XML_ParserFree(ext_parser); + XML_ParserFree(parser); + } +} +END_TEST + #endif // XML_GE == 1 void @@ -390,6 +447,8 @@ make_accounting_test_case(Suite *s) { tcase_add_test(tc_accounting, test_accounting_precision); tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); + tcase_add_test__ifdef_xml_dtd(tc_accounting, + test_amplification_isolated_external_parser); #else UNUSED_P(s); #endif /* XML_GE == 1 */ diff --git a/tests/basic_tests.c b/tests/basic_tests.c index 7112a4401879..0d97b1090c7f 100644 --- a/tests/basic_tests.c +++ b/tests/basic_tests.c @@ -1202,6 +1202,49 @@ START_TEST(test_wfc_no_recursive_entity_refs) { } END_TEST +START_TEST(test_recursive_external_parameter_entity_2) { + struct TestCase { + const char *doc; + enum XML_Status expectedStatus; + }; + + struct TestCase cases[] = { + {"", XML_STATUS_ERROR}, + {"" + "", + XML_STATUS_ERROR}, + {"" + "", + XML_STATUS_OK}, + {"", XML_STATUS_OK}, + }; + + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + const char *const doc = cases[i].doc; + const enum XML_Status expectedStatus = cases[i].expectedStatus; + set_subtest("%s", doc); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + + XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); + assert_true(ext_parser != NULL); + + const enum XML_Status actualStatus + = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); + + assert_true(actualStatus == expectedStatus); + if (actualStatus != XML_STATUS_OK) { + assert_true(XML_GetErrorCode(ext_parser) + == XML_ERROR_RECURSIVE_ENTITY_REF); + } + + XML_ParserFree(ext_parser); + XML_ParserFree(parser); + } +} +END_TEST + /* Test incomplete external entities are faulted */ START_TEST(test_ext_entity_invalid_parse) { const char *text = ""; + for (int isFinal = 0; isFinal < 2; isFinal++) { + set_subtest("isFinal=%d", isFinal); + + XML_Parser parser = XML_ParserCreate(NULL); + + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + fail("There was not supposed to be any initial parse error."); + + const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal); + + if (status != XML_STATUS_ERROR) + fail("Negative len was expected to fail the parse but did not."); + + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) + fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); + + XML_ParserFree(parser); + } +} +END_TEST + +/* Test XML_ParseBuffer for len < 0 */ +START_TEST(test_negative_len_parse_buffer) { + const char *const doc = ""; + for (int isFinal = 0; isFinal < 2; isFinal++) { + set_subtest("isFinal=%d", isFinal); + + XML_Parser parser = XML_ParserCreate(NULL); + + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + fail("There was not supposed to be any initial parse error."); + + void *const buffer = XML_GetBuffer(parser, (int)strlen(doc)); + + if (buffer == NULL) + fail("XML_GetBuffer failed."); + + memcpy(buffer, doc, strlen(doc)); + + const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal); + + if (status != XML_STATUS_ERROR) + fail("Negative len was expected to fail the parse but did not."); + + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) + fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); + + XML_ParserFree(parser); + } +} +END_TEST + /* Test odd corners of the XML_GetBuffer interface */ static enum XML_Status get_feature(enum XML_FeatureEnum feature_id, long *presult) { @@ -5202,13 +5300,7 @@ START_TEST(test_nested_entity_suspend) { END_TEST /* Regression test for quadratic parsing on large tokens */ -START_TEST(test_big_tokens_take_linear_time) { - const char *const too_slow_failure_message - = "Compared to the baseline runtime of the first test, this test has a " - "slowdown of more than . " - "Please keep increasing the value by 1 until it reliably passes the " - "test on your hardware and open a bug sharing that number with us. " - "Thanks in advance!"; +START_TEST(test_big_tokens_scale_linearly) { const struct { const char *pre; const char *post; @@ -5220,65 +5312,57 @@ START_TEST(test_big_tokens_take_linear_time) { {"<", "/>"}, // big elem name, used to be O(N²) }; const int num_cases = sizeof(text) / sizeof(text[0]); - // For the test we need a value that is: - // (1) big enough that the test passes reliably (avoiding flaky tests), and - // (2) small enough that the test actually catches regressions. - const int max_slowdown = 15; char aaaaaa[4096]; const int fillsize = (int)sizeof(aaaaaa); const int fillcount = 100; + const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. + const unsigned max_factor = 4; + const unsigned max_scanned = max_factor * approx_bytes; memset(aaaaaa, 'a', fillsize); if (! g_reparseDeferralEnabledDefault) { return; // heuristic is disabled; we would get O(n^2) and fail. } -#if ! defined(__linux__) - if (CLOCKS_PER_SEC < 100000) { - // Skip this test if clock() doesn't have reasonably good resolution. - // This workaround is primarily targeting Windows and FreeBSD, since - // XSI requires the value to be 1.000.000 (10x the condition here), and - // we want to be very sure that at least one platform in CI can catch - // regressions (through a failing test). - return; - } -#endif - clock_t baseline = 0; for (int i = 0; i < num_cases; ++i) { XML_Parser parser = XML_ParserCreate(NULL); assert_true(parser != NULL); enum XML_Status status; - set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown, - text[i].pre, text[i].post); - const clock_t start = clock(); + set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); // parse the start text + g_bytesScanned = 0; status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, (int)strlen(text[i].pre), XML_FALSE); if (status != XML_STATUS_OK) { xml_failure(parser); } + // parse lots of 'a', failing the test early if it takes too long + unsigned past_max_count = 0; for (int f = 0; f < fillcount; ++f) { status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); if (status != XML_STATUS_OK) { xml_failure(parser); } - // i == 0 means we're still calculating the baseline value - if (i > 0) { - const clock_t now = clock(); - const clock_t clocks_so_far = now - start; - const int slowdown = clocks_so_far / baseline; - if (slowdown >= max_slowdown) { - fprintf( - stderr, - "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n", - f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown); - fail(too_slow_failure_message); - } + if (g_bytesScanned > max_scanned) { + // We're not done, and have already passed the limit -- the test will + // definitely fail. This block allows us to save time by failing early. + const unsigned pushed + = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; + fprintf( + stderr, + "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", + f + 1, fillcount, pushed, g_bytesScanned, + g_bytesScanned / (double)pushed, max_scanned, max_factor); + past_max_count++; + // We are failing, but allow a few log prints first. If we don't reach + // a count of five, the test will fail after the loop instead. + assert_true(past_max_count < 5); } } + // parse the end text status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, (int)strlen(text[i].post), XML_TRUE); @@ -5286,18 +5370,14 @@ START_TEST(test_big_tokens_take_linear_time) { xml_failure(parser); } - // how long did it take in total? - const clock_t end = clock(); - const clock_t taken = end - start; - if (i == 0) { - assert_true(taken > 0); // just to make sure we don't div-by-0 later - baseline = taken; - } - const int slowdown = taken / baseline; - if (slowdown >= max_slowdown) { - fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n", - (int)taken, (int)baseline, slowdown, max_slowdown); - fail(too_slow_failure_message); + assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working + if (g_bytesScanned > max_scanned) { + fprintf( + stderr, + "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", + g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, + max_factor); + fail("scanned too many bytes"); } XML_ParserFree(parser); @@ -5774,19 +5854,17 @@ START_TEST(test_varying_buffer_fills) { fillsize[2], fillsize[3]); XML_Parser parser = XML_ParserCreate(NULL); assert_true(parser != NULL); - g_parseAttempts = 0; CharData storage; CharData_Init(&storage); XML_SetUserData(parser, &storage); XML_SetStartElementHandler(parser, start_element_event_handler); + g_bytesScanned = 0; int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) - int scanned_bytes = 0; // sum of (buffered bytes at each actual parse) int offset = 0; while (*fillsize >= 0) { assert_true(offset + *fillsize <= document_length); // or test is invalid - const unsigned attempts_before = g_parseAttempts; const enum XML_Status status = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); if (status != XML_STATUS_OK) { @@ -5796,28 +5874,20 @@ START_TEST(test_varying_buffer_fills) { fillsize++; assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow worstcase_bytes += offset; // we might've tried to parse all pending bytes - if (g_parseAttempts != attempts_before) { - assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse - assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow - scanned_bytes += offset; // we *did* try to parse all pending bytes - } } assert_true(storage.count == 1); // the big token should've been parsed - assert_true(scanned_bytes > 0); // test-the-test: does our counter work? + assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? if (g_reparseDeferralEnabledDefault) { // heuristic is enabled; some XML_Parse calls may have deferred reparsing - const int max_bytes_scanned = -*fillsize; - if (scanned_bytes > max_bytes_scanned) { + const unsigned max_bytes_scanned = -*fillsize; + if (g_bytesScanned > max_bytes_scanned) { fprintf(stderr, - "bytes scanned in parse attempts: actual=%d limit=%d \n", - scanned_bytes, max_bytes_scanned); + "bytes scanned in parse attempts: actual=%u limit=%u \n", + g_bytesScanned, max_bytes_scanned); fail("too many bytes scanned in parse attempts"); } - assert_true(scanned_bytes <= worstcase_bytes); - } else { - // heuristic is disabled; every XML_Parse() will have reparsed - assert_true(scanned_bytes == worstcase_bytes); } + assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); XML_ParserFree(parser); } @@ -5940,6 +6010,8 @@ make_basic_test_case(Suite *s) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); tcase_add_test(tc_basic, test_empty_parse); + tcase_add_test(tc_basic, test_negative_len_parse); + tcase_add_test(tc_basic, test_negative_len_parse_buffer); tcase_add_test(tc_basic, test_get_buffer_1); tcase_add_test(tc_basic, test_get_buffer_2); #if XML_CONTEXT_BYTES > 0 @@ -5972,6 +6044,8 @@ make_basic_test_case(Suite *s) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); tcase_add_test__ifdef_xml_dtd(tc_basic, test_recursive_external_parameter_entity); + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_recursive_external_parameter_entity_2); tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); tcase_add_test(tc_basic, test_suspend_xdecl); tcase_add_test(tc_basic, test_abort_epilog); @@ -6065,7 +6139,7 @@ make_basic_test_case(Suite *s) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_pool_integrity_with_unfinished_attr); tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); - tcase_add_test(tc_basic, test_big_tokens_take_linear_time); + tcase_add_test(tc_basic, test_big_tokens_scale_linearly); tcase_add_test(tc_basic, test_set_reparse_deferral); tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); diff --git a/tests/benchmark/Makefile.in b/tests/benchmark/Makefile.in index 18f3b7bf7bf8..d0e6d0769db0 100644 --- a/tests/benchmark/Makefile.in +++ b/tests/benchmark/Makefile.in @@ -303,6 +303,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -316,7 +317,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ diff --git a/tests/misc_tests.c b/tests/misc_tests.c index b5212f58a5bb..2ee9320b1392 100644 --- a/tests/misc_tests.c +++ b/tests/misc_tests.c @@ -208,7 +208,7 @@ START_TEST(test_misc_version) { if (! versions_equal(&read_version, &parsed_version)) fail("Version mismatch"); - if (xcstrcmp(version_text, XCS("expat_2.6.0"))) /* needs bump on releases */ + if (xcstrcmp(version_text, XCS("expat_2.6.3"))) /* needs bump on releases */ fail("XML_*_VERSION in expat.h out of sync?\n"); } END_TEST diff --git a/xmlwf/Makefile.in b/xmlwf/Makefile.in index a8a84dc4e6de..480fd3e04103 100644 --- a/xmlwf/Makefile.in +++ b/xmlwf/Makefile.in @@ -311,6 +311,7 @@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SIZEOF_VOID_P = @SIZEOF_VOID_P@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ @@ -324,7 +325,6 @@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@