|
|
|
@ -1,8 +1,10 @@ |
|
|
|
defmodule GcIndexRelay.Nostr.PublicationSearch do |
|
|
|
defmodule GcIndexRelay.Nostr.PublicationSearch do |
|
|
|
@moduledoc """ |
|
|
|
@moduledoc """ |
|
|
|
Exact-match search over kind **30040** publication index metadata (`d`, `title`, `author`, `source`). |
|
|
|
Metadata search over kind **30040** publication index tags (`d`, `title`, `author`, `source`). |
|
|
|
|
|
|
|
|
|
|
|
Matches jumble's `publicationFieldExactMatch/2` normalization (case-insensitive, hyphen/space). |
|
|
|
Matches jumble's publication metadata matching: case-insensitive, hyphen/space equivalence, |
|
|
|
|
|
|
|
substring matches for title/author/source (needle length ≥ 2), hyphen-segment matches on `d` tags, |
|
|
|
|
|
|
|
and multi-word AND when the query has two or more significant tokens. |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
import Ecto.Query, warn: false |
|
|
|
import Ecto.Query, warn: false |
|
|
|
@ -14,27 +16,30 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do |
|
|
|
|
|
|
|
|
|
|
|
@publication_kind 30_040 |
|
|
|
@publication_kind 30_040 |
|
|
|
@search_tag_names ~w(d title author source) |
|
|
|
@search_tag_names ~w(d title author source) |
|
|
|
|
|
|
|
@min_substring_needle_len 2 |
|
|
|
|
|
|
|
|
|
|
|
@doc """ |
|
|
|
@doc """ |
|
|
|
Search kind-30040 events by exact metadata match. Returns newest first. |
|
|
|
Search kind-30040 events by metadata match. Returns newest first. |
|
|
|
""" |
|
|
|
""" |
|
|
|
@spec search(String.t(), keyword()) :: {:ok, [PubEvent.t()]} | {:error, String.t()} |
|
|
|
@spec search(String.t(), keyword()) :: {:ok, [PubEvent.t()]} | {:error, String.t()} |
|
|
|
def search(query, opts \\ []) when is_binary(query) do |
|
|
|
def search(query, opts \\ []) when is_binary(query) do |
|
|
|
needles = query_needles(query) |
|
|
|
trimmed = query |> strip_quotes() |> String.trim() |
|
|
|
|
|
|
|
|
|
|
|
if needles == [] do |
|
|
|
if trimmed == "" do |
|
|
|
{:ok, []} |
|
|
|
{:ok, []} |
|
|
|
else |
|
|
|
else |
|
|
|
limit = opts |> Keyword.get(:limit, 25) |> clamp_limit() |
|
|
|
limit = opts |> Keyword.get(:limit, 25) |> clamp_limit() |
|
|
|
do_search(needles, limit) |
|
|
|
needles = query_needles(trimmed) |
|
|
|
|
|
|
|
tokens = query_tokens(trimmed) |
|
|
|
|
|
|
|
do_search(needles, tokens, limit) |
|
|
|
end |
|
|
|
end |
|
|
|
end |
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
defp clamp_limit(limit) when is_integer(limit), do: limit |> max(1) |> min(100) |
|
|
|
defp clamp_limit(limit) when is_integer(limit), do: limit |> max(1) |> min(100) |
|
|
|
defp clamp_limit(_), do: 25 |
|
|
|
defp clamp_limit(_), do: 25 |
|
|
|
|
|
|
|
|
|
|
|
defp do_search(needles, limit) do |
|
|
|
defp do_search(needles, tokens, limit) do |
|
|
|
tag_match = metadata_tag_match(needles) |
|
|
|
tag_match = metadata_tag_match(needles, tokens) |
|
|
|
|
|
|
|
|
|
|
|
events = |
|
|
|
events = |
|
|
|
from(e in Event, |
|
|
|
from(e in Event, |
|
|
|
@ -58,10 +63,10 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do |
|
|
|
pub_events_from_db(events) |
|
|
|
pub_events_from_db(events) |
|
|
|
end |
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
defp metadata_tag_match(needles) do |
|
|
|
defp metadata_tag_match(needles, tokens) do |
|
|
|
spaced_needles = Enum.map(needles, &spaced_form/1) |> Enum.uniq() |
|
|
|
spaced_needles = Enum.map(needles, &spaced_form/1) |> Enum.uniq() |
|
|
|
|
|
|
|
|
|
|
|
tag_match = |
|
|
|
exact = |
|
|
|
Enum.reduce(needles, dynamic(false), fn needle, acc -> |
|
|
|
Enum.reduce(needles, dynamic(false), fn needle, acc -> |
|
|
|
spaced = spaced_form(needle) |
|
|
|
spaced = spaced_form(needle) |
|
|
|
|
|
|
|
|
|
|
|
@ -73,12 +78,97 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do |
|
|
|
) |
|
|
|
) |
|
|
|
end) |
|
|
|
end) |
|
|
|
|
|
|
|
|
|
|
|
Enum.reduce(spaced_needles, tag_match, fn spaced, acc -> |
|
|
|
exact = |
|
|
|
dynamic( |
|
|
|
Enum.reduce(spaced_needles, exact, fn spaced, acc -> |
|
|
|
[t], |
|
|
|
dynamic( |
|
|
|
^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) |
|
|
|
[t], |
|
|
|
) |
|
|
|
^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
end) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
substring = |
|
|
|
|
|
|
|
Enum.reduce(substring_needles(needles), dynamic(false), fn needle, acc -> |
|
|
|
|
|
|
|
spaced = spaced_form(needle) |
|
|
|
|
|
|
|
pattern = like_contains(spaced) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dynamic( |
|
|
|
|
|
|
|
[t], |
|
|
|
|
|
|
|
^acc or |
|
|
|
|
|
|
|
fragment( |
|
|
|
|
|
|
|
"LOWER(TRIM(REPLACE(?, '-', ' '))) LIKE ? ESCAPE '\\'", |
|
|
|
|
|
|
|
t.value, |
|
|
|
|
|
|
|
^pattern |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
end) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d_segment = |
|
|
|
|
|
|
|
Enum.reduce(d_segment_needles(needles), dynamic(false), fn needle, acc -> |
|
|
|
|
|
|
|
dynamic([t], ^acc or ^d_tag_segment_match(needle)) |
|
|
|
|
|
|
|
end) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
multi_word = |
|
|
|
|
|
|
|
case tokens do |
|
|
|
|
|
|
|
[_ | _] = word_tokens when length(word_tokens) >= 2 -> |
|
|
|
|
|
|
|
Enum.reduce(word_tokens, dynamic(true), fn token, acc -> |
|
|
|
|
|
|
|
pattern = like_contains(spaced_form(token)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dynamic( |
|
|
|
|
|
|
|
[t], |
|
|
|
|
|
|
|
^acc and |
|
|
|
|
|
|
|
fragment( |
|
|
|
|
|
|
|
"LOWER(TRIM(REPLACE(?, '-', ' '))) LIKE ? ESCAPE '\\'", |
|
|
|
|
|
|
|
t.value, |
|
|
|
|
|
|
|
^pattern |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
end) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_ -> |
|
|
|
|
|
|
|
dynamic(false) |
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dynamic([t], ^exact or ^substring or ^d_segment or ^multi_word) |
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
defp substring_needles(needles) do |
|
|
|
|
|
|
|
needles |
|
|
|
|
|
|
|
|> Enum.uniq() |
|
|
|
|
|
|
|
|> Enum.filter(&(String.length(&1) >= @min_substring_needle_len)) |
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
defp d_segment_needles(needles) do |
|
|
|
|
|
|
|
needles |
|
|
|
|
|
|
|
|> Enum.flat_map(fn needle -> |
|
|
|
|
|
|
|
spaced = spaced_form(needle) |
|
|
|
|
|
|
|
hyphen = needle |> String.replace(~r/\s+/, "-") |> String.replace(~r/-+/, "-") |> String.trim("-") |
|
|
|
|
|
|
|
[needle, spaced, hyphen] |
|
|
|
end) |
|
|
|
end) |
|
|
|
|
|
|
|
|> Enum.uniq() |
|
|
|
|
|
|
|
|> Enum.filter(&(String.length(&1) >= @min_substring_needle_len)) |
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
defp d_tag_segment_match(needle) do |
|
|
|
|
|
|
|
dynamic( |
|
|
|
|
|
|
|
[t], |
|
|
|
|
|
|
|
t.name == "d" and |
|
|
|
|
|
|
|
(fragment("LOWER(TRIM(?)) = ?", t.value, ^needle) or |
|
|
|
|
|
|
|
fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_prefix(needle)) or |
|
|
|
|
|
|
|
fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_segment(needle)) or |
|
|
|
|
|
|
|
fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_suffix(needle))) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
defp like_contains(value), do: "%#{like_escape(value)}%" |
|
|
|
|
|
|
|
defp like_prefix(value), do: "#{like_escape(value)}-%" |
|
|
|
|
|
|
|
defp like_segment(value), do: "%-#{like_escape(value)}-%" |
|
|
|
|
|
|
|
defp like_suffix(value), do: "%-#{like_escape(value)}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
defp like_escape(value) do |
|
|
|
|
|
|
|
value |
|
|
|
|
|
|
|
|> String.replace("\\", "\\\\") |
|
|
|
|
|
|
|
|> String.replace("%", "\\%") |
|
|
|
|
|
|
|
|> String.replace("_", "\\_") |
|
|
|
end |
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
defp pub_events_from_db(events) do |
|
|
|
defp pub_events_from_db(events) do |
|
|
|
@ -119,6 +209,19 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do |
|
|
|
end |
|
|
|
end |
|
|
|
end |
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@doc false |
|
|
|
|
|
|
|
def query_tokens(query) do |
|
|
|
|
|
|
|
query |
|
|
|
|
|
|
|
|> strip_quotes() |
|
|
|
|
|
|
|
|> String.trim() |
|
|
|
|
|
|
|
|> String.downcase() |
|
|
|
|
|
|
|
|> String.replace(~r/\s+/, " ") |
|
|
|
|
|
|
|
|> String.split(" ", trim: true) |
|
|
|
|
|
|
|
|> Enum.map(&String.trim/1) |
|
|
|
|
|
|
|
|> Enum.filter(&(String.length(&1) > 1)) |
|
|
|
|
|
|
|
|> Enum.uniq() |
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
defp strip_quotes(raw) do |
|
|
|
defp strip_quotes(raw) do |
|
|
|
trimmed = String.trim(raw) |
|
|
|
trimmed = String.trim(raw) |
|
|
|
|
|
|
|
|
|
|
|
|