Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/like-postgres-semantics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@core/sync-service": patch
---

Fix `LIKE`/`ILIKE` to follow Postgres semantics: `%` and `_` now match newline characters, a trailing newline in the value is no longer ignored, and a backslash-escaped `%` or `_` matches the literal character instead of the wildcard (including the backslash).
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@ defmodule Electric.Replication.PostgresInterop.Casting do
@doc """
LIKE function from SQL. Case sensitive by default.

Follows Postgres semantics:

* `%` matches any sequence of zero or more characters,
* `_` matches any single character,
* both wildcards also match newline characters,
* the pattern must match the entire string (a trailing newline in the value
is not ignored), and
* a backslash escapes the following character, so an escaped `%` or `_`
matches the literal character instead of acting as a wildcard.

## Examples

iex> like?("hello", "hell_")
Expand All @@ -112,18 +122,36 @@ defmodule Electric.Replication.PostgresInterop.Casting do
true
"""
def like?(text, pattern, ignore_case? \\ false) do
pattern
|> String.split(~r/(?<!\\)[_%]/, include_captures: true, trim: true)
|> Enum.map_join(fn
"%" -> ".*"
"_" -> "."
text -> Regex.escape(text)
end)
|> then(&("^" <> &1 <> "$"))
|> Regex.compile!(if ignore_case?, do: [:caseless], else: [])
# `:dotall` makes `.` (from `%`/`_`) match newlines like Postgres does, and
# `\A..\z` anchors the match to the absolute string boundaries so a trailing
# newline in the value is not silently ignored (which `^..$` would do).
options = if ignore_case?, do: [:caseless, :dotall], else: [:dotall]

("\\A" <> like_pattern_to_regex(pattern) <> "\\z")
|> Regex.compile!(options)
|> Regex.match?(text)
end

# Translate a SQL LIKE pattern into a regex source string following Postgres
# semantics: `%` -> `.*`, `_` -> `.`, a backslash escapes the next character,
# and everything else is matched literally.
defp like_pattern_to_regex(pattern), do: like_pattern_to_regex(pattern, [])

defp like_pattern_to_regex(<<>>, acc),
do: acc |> Enum.reverse() |> IO.iodata_to_binary()

defp like_pattern_to_regex(<<?\\, next::utf8, rest::binary>>, acc),
do: like_pattern_to_regex(rest, [Regex.escape(<<next::utf8>>) | acc])

defp like_pattern_to_regex(<<?%, rest::binary>>, acc),
do: like_pattern_to_regex(rest, [".*" | acc])

defp like_pattern_to_regex(<<?_, rest::binary>>, acc),
do: like_pattern_to_regex(rest, ["." | acc])

defp like_pattern_to_regex(<<c::utf8, rest::binary>>, acc),
do: like_pattern_to_regex(rest, [Regex.escape(<<c::utf8>>) | acc])

def ilike?(text, pattern), do: like?(text, pattern, true)

@doc """
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
defmodule Electric.Replication.PostgresInterop.CastingTest do
use ExUnit.Case, async: true
import Electric.Replication.PostgresInterop.Casting
doctest Electric.Replication.PostgresInterop.Casting, import: true

describe "like?/2 Postgres compatibility" do
test "`%` and `_` match newline characters" do
# In Postgres both wildcards match any character, including newlines.
assert like?("hello\nworld", "hello%world")
assert like?("a\nb", "a_b")
end

test "the pattern must match the whole value, including a trailing newline" do
# 'trailing\n' LIKE 'trailing' is false in Postgres; the newline is a
# real character that the (anchored) pattern must account for.
refute like?("trailing\n", "trailing")
assert like?("trailing\n", "trailing%")
end

test "a backslash escapes `%` and `_` to match the literal character" do
assert like?("100%", "100\\%")
assert like?("a_b", "a\\_b")
refute like?("hello", "hell\\%")
end

test "ilike?/2 keeps the corrected semantics while ignoring case" do
assert ilike?("HELLO\nWORLD", "hello%world")
assert ilike?("100%", "100\\%")
end
end
end
Loading