diff --git a/.changeset/like-postgres-semantics.md b/.changeset/like-postgres-semantics.md new file mode 100644 index 0000000000..4dcd75129f --- /dev/null +++ b/.changeset/like-postgres-semantics.md @@ -0,0 +1,5 @@ +--- +"@core/sync-service": patch +--- + +Fix `LIKE`/`ILIKE` to follow Postgres semantics: `%` and `_` now match newline characters, a trailing newline in the value is no longer ignored, and a backslash-escaped `%` or `_` matches the literal character instead of the wildcard (including the backslash). diff --git a/packages/sync-service/lib/electric/replication/postgres_interop/casting.ex b/packages/sync-service/lib/electric/replication/postgres_interop/casting.ex index d677c725cf..01111cb9f9 100644 --- a/packages/sync-service/lib/electric/replication/postgres_interop/casting.ex +++ b/packages/sync-service/lib/electric/replication/postgres_interop/casting.ex @@ -94,6 +94,16 @@ defmodule Electric.Replication.PostgresInterop.Casting do @doc """ LIKE function from SQL. Case sensitive by default. + Follows Postgres semantics: + + * `%` matches any sequence of zero or more characters, + * `_` matches any single character, + * both wildcards also match newline characters, + * the pattern must match the entire string (a trailing newline in the value + is not ignored), and + * a backslash escapes the following character, so an escaped `%` or `_` + matches the literal character instead of acting as a wildcard. + ## Examples iex> like?("hello", "hell_") @@ -112,18 +122,36 @@ defmodule Electric.Replication.PostgresInterop.Casting do true """ def like?(text, pattern, ignore_case? \\ false) do - pattern - |> String.split(~r/(? Enum.map_join(fn - "%" -> ".*" - "_" -> "." - text -> Regex.escape(text) - end) - |> then(&("^" <> &1 <> "$")) - |> Regex.compile!(if ignore_case?, do: [:caseless], else: []) + # `:dotall` makes `.` (from `%`/`_`) match newlines like Postgres does, and + # `\A..\z` anchors the match to the absolute string boundaries so a trailing + # newline in the value is not silently ignored (which `^..$` would do). + options = if ignore_case?, do: [:caseless, :dotall], else: [:dotall] + + ("\\A" <> like_pattern_to_regex(pattern) <> "\\z") + |> Regex.compile!(options) |> Regex.match?(text) end + # Translate a SQL LIKE pattern into a regex source string following Postgres + # semantics: `%` -> `.*`, `_` -> `.`, a backslash escapes the next character, + # and everything else is matched literally. + defp like_pattern_to_regex(pattern), do: like_pattern_to_regex(pattern, []) + + defp like_pattern_to_regex(<<>>, acc), + do: acc |> Enum.reverse() |> IO.iodata_to_binary() + + defp like_pattern_to_regex(<>, acc), + do: like_pattern_to_regex(rest, [Regex.escape(<>) | acc]) + + defp like_pattern_to_regex(<>, acc), + do: like_pattern_to_regex(rest, [".*" | acc]) + + defp like_pattern_to_regex(<>, acc), + do: like_pattern_to_regex(rest, ["." | acc]) + + defp like_pattern_to_regex(<>, acc), + do: like_pattern_to_regex(rest, [Regex.escape(<>) | acc]) + def ilike?(text, pattern), do: like?(text, pattern, true) @doc """ diff --git a/packages/sync-service/test/electric/replication/postgres_interop/casting_test.exs b/packages/sync-service/test/electric/replication/postgres_interop/casting_test.exs index 15d46814e0..913e6af5be 100644 --- a/packages/sync-service/test/electric/replication/postgres_interop/casting_test.exs +++ b/packages/sync-service/test/electric/replication/postgres_interop/casting_test.exs @@ -1,4 +1,31 @@ defmodule Electric.Replication.PostgresInterop.CastingTest do use ExUnit.Case, async: true + import Electric.Replication.PostgresInterop.Casting doctest Electric.Replication.PostgresInterop.Casting, import: true + + describe "like?/2 Postgres compatibility" do + test "`%` and `_` match newline characters" do + # In Postgres both wildcards match any character, including newlines. + assert like?("hello\nworld", "hello%world") + assert like?("a\nb", "a_b") + end + + test "the pattern must match the whole value, including a trailing newline" do + # 'trailing\n' LIKE 'trailing' is false in Postgres; the newline is a + # real character that the (anchored) pattern must account for. + refute like?("trailing\n", "trailing") + assert like?("trailing\n", "trailing%") + end + + test "a backslash escapes `%` and `_` to match the literal character" do + assert like?("100%", "100\\%") + assert like?("a_b", "a\\_b") + refute like?("hello", "hell\\%") + end + + test "ilike?/2 keeps the corrected semantics while ignoring case" do + assert ilike?("HELLO\nWORLD", "hello%world") + assert ilike?("100%", "100\\%") + end + end end