From 1a0ad8a54c5655179460fb20dd11bda73ffda00d Mon Sep 17 00:00:00 2001 From: Dawn Perchik Date: Fri, 19 Jun 2026 00:00:12 -0700 Subject: [PATCH] P2434R5 Nondeterministic pointer provenance --- source/basic.tex | 152 ++++++++++++++++++++++++++--------------- source/expressions.tex | 35 ++++++++-- source/iostreams.tex | 9 +++ source/utilities.tex | 9 +-- 4 files changed, 138 insertions(+), 67 deletions(-) diff --git a/source/basic.tex b/source/basic.tex index c1f58de60a..90cbab7069 100644 --- a/source/basic.tex +++ b/source/basic.tex @@ -5166,57 +5166,6 @@ or functions\iref{dcl.fct}. \end{note} -\pnum -\indextext{object!byte copying and|(}% -\indextext{type!trivially copyable}% -For any object (other than a potentially-overlapping subobject) of trivially copyable type -\tcode{T}, whether or not the object holds a valid value of type -\tcode{T}, the underlying bytes\iref{intro.memory} making up the -object can be copied into an array of -\keyword{char}, -\tcode{\keyword{unsigned} \keyword{char}}, or -\tcode{std::byte}\iref{cstddef.syn}. -\begin{footnote} -By using, for example, the library -functions\iref{headers} \tcode{std::memcpy} or \tcode{std::memmove}. -\end{footnote} -If the content of that array -is copied back into the object, the object shall -subsequently hold its original value. -\begin{example} -\begin{codeblock} -constexpr std::size_t N = sizeof(T); -char buf[N]; -T obj; // \tcode{obj} initialized to its original value -std::memcpy(buf, &obj, N); // between these two calls to \tcode{std::memcpy}, \tcode{obj} might be modified -std::memcpy(&obj, buf, N); // at this point, each subobject of \tcode{obj} of scalar type holds its original value -\end{codeblock} -\end{example} - -\pnum -For two distinct objects \tcode{obj1} and \tcode{obj2} -of trivially copyable type \tcode{T}, -where neither \tcode{obj1} nor \tcode{obj2} is a potentially-overlapping subobject, -if the underlying bytes\iref{intro.memory} making up -\tcode{obj1} are copied into \tcode{obj2}, -\begin{footnote} -By using, for example, -the library functions\iref{headers} \tcode{std::memcpy} or \tcode{std::memmove}. -\end{footnote} - \tcode{obj2} shall subsequently hold the same value as -\tcode{obj1}. -\begin{example} -\begin{codeblock} -T* t1p; -T* t2p; - // provided that \tcode{t2p} points to an initialized object ... -std::memcpy(t1p, t2p, sizeof(T)); - // at this point, every subobject of trivially copyable type in \tcode{*t1p} contains - // the same value as the corresponding subobject in \tcode{*t2p} -\end{codeblock} -\end{example} -\indextext{object!byte copying and|)} - \pnum \label{term.object.representation}% The \defnx{object representation}{representation!object} @@ -5243,10 +5192,6 @@ Bits in the object representation of a type or object that are not part of the value representation are \defn{padding bits}. -For trivially copyable types, the value representation is -a set of bits in the object representation that determines a -\defn{value}, which is one discrete element of an -\impldef{values of a trivially copyable type} set of values. \begin{footnote} The intent is that the memory model of \Cpp{} is compatible with that of the C programming language. @@ -5424,6 +5369,88 @@ Every function of consteval-only type shall be an immediate function\iref{expr.const.imm}. +\rSec2[basic.types.trivial]{Trivially copyable types} + +%%FIXME: This paragraph is hard to understand. +\pnum +Each trivially copyable type \tcode{T} has an +\impldef{values of a trivially copyable type} +set of discrete \defnx{values}{value}. +\indextext{value!representation}% +Each possible value representation of an object of type \tcode{T} +corresponds to a distinct +\impldef{subset of values of a trivially copyable type + for which a value representation corresponds to} +subset of this set. +%%FIXME: "*the* set of values" - what set of values? +The union of these subsets is the set of values; +for scalar types other than object pointer types, +each such subset contains no more than one value. +Certain operations cause an object to +\indextext{object!acquiring a value representation}% +\indextext{value!representation!aquire}% +\defn{acquire} a value representation, +in which case the object's value +is replaced with an unspecified member of the corresponding subset that +would result in the program having defined behavior, if any. + +\pnum +\begin{note} +A single subset for a pointer type +can contain pointers to multiple objects +in each of several regions of storage whose durations are disjoint. +\end{note} + +\pnum +\indextext{object!byte copying and|(}% +\indextext{type!trivially copyable}% +If an object of such a type \tcode{T} +is not a potentially-overlapping subobject, +whether or not the object holds a valid value of type +\tcode{T}, the underlying bytes\iref{intro.memory} making up the +object can be copied into an array of +\keyword{char}, +\tcode{\keyword{unsigned} \keyword{char}}, or +\tcode{std::byte}\iref{cstddef.syn}. +\begin{footnote} +By using, for example, the library +functions\iref{headers} \tcode{std::memcpy} or \tcode{std::memmove}. +\end{footnote} +If the content of that array +is copied back into the object, +the object acquires its original value representation. +\begin{example} +\begin{codeblock} +constexpr std::size_t N = sizeof(T); +char buf[N]; +T obj; // \tcode{obj} initialized to its original value +std::memcpy(buf, &obj, N); // between these two calls to \tcode{std::memcpy}, \tcode{obj} might be modified +std::memcpy(&obj, buf, N); // at this point, each subobject of \tcode{obj} of scalar type holds its original value +\end{codeblock} +\end{example} + +\pnum +For two distinct such objects \tcode{obj1} and \tcode{obj2}, +if the underlying bytes\iref{intro.memory} making up +\tcode{obj1} are copied into \tcode{obj2}, +\begin{footnote} +By using, for example, +the library functions\iref{headers} \tcode{std::memcpy} or \tcode{std::memmove}. +\end{footnote} + \tcode{obj2} acquires the value representation of +\tcode{obj1}. +\begin{example} +\begin{codeblock} +T* t1p; +T* t2p; + // provided that \tcode{t2p} points to an initialized object ... +std::memcpy(t1p, t2p, sizeof(T)); + // at this point, every subobject of trivially copyable type in \tcode{*t1p} contains + // the same value as the corresponding subobject in \tcode{*t2p} +\end{codeblock} +\end{example} +\indextext{object!byte copying and|)} + \rSec2[basic.fundamental]{Fundamental types} \pnum @@ -6076,13 +6103,26 @@ is also associated with $E$. \end{note} +\pnum +If an evaluation produces or causes an object +to have\iref{basic.types.trivial} a pointer value +to or past the end of an object $O$ and +happens before the beginning of the duration +of the region of storage for $O$, +the behavior is undefined. +\begin{note} +Relaxed atomic operations\iref{atomics.order} can produce such values. +Conversions from integers avoid producing them\iref{expr.reinterpret.cast}. +\end{note} + \pnum A pointer value $P$ is \indextext{value!valid in the context of an evaluation}% \defn{valid in the context of} an evaluation $E$ if $P$ is a pointer to function or a null pointer value, or if it is a pointer to or past the end of an object $O$ and -$E$ happens before the end of the duration of the region of storage for $O$. +$E$ happens after the beginning +and happens before the end of the duration of the region of storage for $O$. If a pointer value $P$ is used in an evaluation $E$ and $P$ is not valid in the context of $E$, then the behavior is undefined if $E$ is diff --git a/source/expressions.tex b/source/expressions.tex index 72f77d87bb..80d5122402 100644 --- a/source/expressions.tex +++ b/source/expressions.tex @@ -4955,7 +4955,7 @@ \indextext{cast!reinterpret!pointer to integer}% \indextext{cast!pointer to integer}% A pointer can be explicitly converted to any integral type large enough -to hold all values of its type. +to distinguish all value representations of its type. \indextext{conversion!implementation-defined pointer integer}% The mapping function is \impldef{mapping of pointer to integer}. \begin{note} @@ -4975,12 +4975,26 @@ \indextext{cast!reinterpret!integer to pointer}% \indextext{cast!integer to pointer}% A value of integral type or enumeration type can be explicitly converted -to a pointer. A pointer converted to an integer of sufficient size (if -any such exists on the implementation) and back to the same pointer type -will have its original value\iref{basic.compound}; +to a pointer. +If the value is one that can be produced +by converting one or more pointer values\iref{basic.compound} +to an integral type, +the result is an unspecified choice among all such values +that would result in the program having defined behavior. +If no such value exists, the behavior is undefined. +\begin{note} +It is possible for the result +to be an invalid pointer value or +to not be valid in the context of the conversion\iref{basic.compound} +because it points to an object in a region of storage +whose duration has ended or has not yet begun. +\end{note} \indextext{conversion!implementation-defined pointer integer}% -mappings between pointers and integers are otherwise +Otherwise, the result is \impldef{conversions between pointers and integers}. +\begin{note} +It can be an invalid pointer value. +\end{note} \pnum \indextext{cast!reinterpret!pointer-to-function}% @@ -7653,6 +7667,14 @@ result of type \keyword{bool}. In each case below, the operands shall have the same type after the specified conversions have been applied. +\pnum +Any two pointer values or two pointer-to-member values +either compare equal or compare unequal. +\begin{note} +Repeated comparisons are consistent +so long as neither value is an invalid pointer value. +\end{note} + \pnum \indextext{comparison!pointer}% \indextext{comparison!pointer to function}% @@ -7772,8 +7794,7 @@ If two operands compare equal, the result is \keyword{true} for the \tcode{==} operator and \keyword{false} for the \tcode{!=} operator. If two operands compare unequal, the result is \keyword{false} for the \tcode{==} operator and -\keyword{true} for the \tcode{!=} operator. Otherwise, the result of each of the -operators is unspecified. +\keyword{true} for the \tcode{!=} operator. \pnum If both operands are of arithmetic or enumeration type, the usual arithmetic diff --git a/source/iostreams.tex b/source/iostreams.tex index 615ea941ed..aae0346972 100644 --- a/source/iostreams.tex +++ b/source/iostreams.tex @@ -18997,6 +18997,15 @@ to be written to a file (\xrefc{7.23.3}) is an observable checkpoint\iref{intro.abstract}. +\pnum +When the input item for the \tcode{\%p} conversion +of the \tcode{fscanf} function (or equivalent) +is one that can be produced +from more than one pointer value\iref{basic.compound}, +the pointer that results is an unspecified choice among all those values +that would result in the program having defined behavior. +If no such value exists, the behavior is undefined. + \pnum Calls to the function \tcode{tmpnam} with an argument that is a null pointer value may introduce a data race\iref{res.on.data.races} with other calls to \tcode{tmpnam} with diff --git a/source/utilities.tex b/source/utilities.tex index 02e3e7b7a6..302f245fb5 100644 --- a/source/utilities.tex +++ b/source/utilities.tex @@ -16376,10 +16376,11 @@ Each bit of the value representation of the result is equal to the corresponding bit in the object representation of \tcode{from}. Padding bits of the result are unspecified. -For the result and each object created within it, -if there is no value of the object's type corresponding to the -value representation produced, the behavior is undefined. -If there are multiple such values, which value is produced is unspecified. +Every trivially copyable object +among the result and each object created within it +acquires the value representation produced; +if any such object does not receive a value, +the behavior is undefined. A bit in the value representation of the result is indeterminate if it does not correspond to a bit in the value representation of \tcode{from} or corresponds to a bit