diff --git a/lib/gc_index_relay/nostr/publication_search.ex b/lib/gc_index_relay/nostr/publication_search.ex index 6239bb9..2cae559 100644 --- a/lib/gc_index_relay/nostr/publication_search.ex +++ b/lib/gc_index_relay/nostr/publication_search.ex @@ -1,8 +1,10 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do @moduledoc """ - Exact-match search over kind **30040** publication index metadata (`d`, `title`, `author`, `source`). + Metadata search over kind **30040** publication index tags (`d`, `title`, `author`, `source`). - Matches jumble's `publicationFieldExactMatch/2` normalization (case-insensitive, hyphen/space). + Matches jumble's publication metadata matching: case-insensitive, hyphen/space equivalence, + substring matches for title/author/source (needle length ≥ 2), hyphen-segment matches on `d` tags, + and multi-word AND when the query has two or more significant tokens. """ import Ecto.Query, warn: false @@ -14,27 +16,30 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do @publication_kind 30_040 @search_tag_names ~w(d title author source) + @min_substring_needle_len 2 @doc """ - Search kind-30040 events by exact metadata match. Returns newest first. + Search kind-30040 events by metadata match. Returns newest first. """ @spec search(String.t(), keyword()) :: {:ok, [PubEvent.t()]} | {:error, String.t()} def search(query, opts \\ []) when is_binary(query) do - needles = query_needles(query) + trimmed = query |> strip_quotes() |> String.trim() - if needles == [] do + if trimmed == "" do {:ok, []} else limit = opts |> Keyword.get(:limit, 25) |> clamp_limit() - do_search(needles, limit) + needles = query_needles(trimmed) + tokens = query_tokens(trimmed) + do_search(needles, tokens, limit) end end defp clamp_limit(limit) when is_integer(limit), do: limit |> max(1) |> min(100) defp clamp_limit(_), do: 25 - defp do_search(needles, limit) do - tag_match = metadata_tag_match(needles) + defp do_search(needles, tokens, limit) do + tag_match = metadata_tag_match(needles, tokens) events = from(e in Event, @@ -58,10 +63,10 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do pub_events_from_db(events) end - defp metadata_tag_match(needles) do + defp metadata_tag_match(needles, tokens) do spaced_needles = Enum.map(needles, &spaced_form/1) |> Enum.uniq() - tag_match = + exact = Enum.reduce(needles, dynamic(false), fn needle, acc -> spaced = spaced_form(needle) @@ -73,12 +78,97 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do ) end) - Enum.reduce(spaced_needles, tag_match, fn spaced, acc -> - dynamic( - [t], - ^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) - ) + exact = + Enum.reduce(spaced_needles, exact, fn spaced, acc -> + dynamic( + [t], + ^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) + ) + end) + + substring = + Enum.reduce(substring_needles(needles), dynamic(false), fn needle, acc -> + spaced = spaced_form(needle) + pattern = like_contains(spaced) + + dynamic( + [t], + ^acc or + fragment( + "LOWER(TRIM(REPLACE(?, '-', ' '))) LIKE ? ESCAPE '\\'", + t.value, + ^pattern + ) + ) + end) + + d_segment = + Enum.reduce(d_segment_needles(needles), dynamic(false), fn needle, acc -> + dynamic([t], ^acc or ^d_tag_segment_match(needle)) + end) + + multi_word = + case tokens do + [_ | _] = word_tokens when length(word_tokens) >= 2 -> + Enum.reduce(word_tokens, dynamic(true), fn token, acc -> + pattern = like_contains(spaced_form(token)) + + dynamic( + [t], + ^acc and + fragment( + "LOWER(TRIM(REPLACE(?, '-', ' '))) LIKE ? ESCAPE '\\'", + t.value, + ^pattern + ) + ) + end) + + _ -> + dynamic(false) + end + + dynamic([t], ^exact or ^substring or ^d_segment or ^multi_word) + end + + defp substring_needles(needles) do + needles + |> Enum.uniq() + |> Enum.filter(&(String.length(&1) >= @min_substring_needle_len)) + end + + defp d_segment_needles(needles) do + needles + |> Enum.flat_map(fn needle -> + spaced = spaced_form(needle) + hyphen = needle |> String.replace(~r/\s+/, "-") |> String.replace(~r/-+/, "-") |> String.trim("-") + [needle, spaced, hyphen] end) + |> Enum.uniq() + |> Enum.filter(&(String.length(&1) >= @min_substring_needle_len)) + end + + defp d_tag_segment_match(needle) do + dynamic( + [t], + t.name == "d" and + (fragment("LOWER(TRIM(?)) = ?", t.value, ^needle) or + fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_prefix(needle)) or + fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_segment(needle)) or + fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_suffix(needle))) + ) + end + + defp like_contains(value), do: "%#{like_escape(value)}%" + defp like_prefix(value), do: "#{like_escape(value)}-%" + defp like_segment(value), do: "%-#{like_escape(value)}-%" + defp like_suffix(value), do: "%-#{like_escape(value)}" + + defp like_escape(value) do + value + |> String.replace("\\", "\\\\") + |> String.replace("%", "\\%") + |> String.replace("_", "\\_") end defp pub_events_from_db(events) do @@ -119,6 +209,19 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do end end + @doc false + def query_tokens(query) do + query + |> strip_quotes() + |> String.trim() + |> String.downcase() + |> String.replace(~r/\s+/, " ") + |> String.split(" ", trim: true) + |> Enum.map(&String.trim/1) + |> Enum.filter(&(String.length(&1) > 1)) + |> Enum.uniq() + end + defp strip_quotes(raw) do trimmed = String.trim(raw) diff --git a/lib/gc_index_relay_web/controllers/publication_search_controller.ex b/lib/gc_index_relay_web/controllers/publication_search_controller.ex index 18830d1..6c05520 100644 --- a/lib/gc_index_relay_web/controllers/publication_search_controller.ex +++ b/lib/gc_index_relay_web/controllers/publication_search_controller.ex @@ -11,9 +11,9 @@ defmodule GcIndexRelayWeb.PublicationSearchController do summary("Search kind-30040 publication indexes by metadata") description(""" - Exact-match search over publication index metadata tags: `d`, `title`, `author`, and `source`. - Matching is case-insensitive and treats hyphens and spaces as equivalent. Partial substring - matches are not returned. + Metadata search over publication index tags: `d`, `title`, `author`, and `source`. + Matching is case-insensitive, treats hyphens and spaces as equivalent, supports substring + matches (needle length ≥ 2), hyphen-segment matches on `d` tags, and multi-word AND queries. """) tag("Publications") @@ -23,7 +23,7 @@ defmodule GcIndexRelayWeb.PublicationSearchController do end @doc """ - POST /api/publications/search — exact metadata search for kind-30040 publication indexes. + POST /api/publications/search — metadata search for kind-30040 publication indexes. """ def search(conn, params) do with {:ok, query} <- fetch_query(params), diff --git a/test/gc_index_relay/nostr/publication_search_test.exs b/test/gc_index_relay/nostr/publication_search_test.exs index 28763ca..6330eee 100644 --- a/test/gc_index_relay/nostr/publication_search_test.exs +++ b/test/gc_index_relay/nostr/publication_search_test.exs @@ -76,7 +76,7 @@ defmodule GcIndexRelay.Nostr.PublicationSearchTest do assert {:ok, ^result} = Validator.validate_id(result) end - test "search rejects partial substring matches" do + test "search finds partial d-tag and title needles" do insert_publication!( "pg1342-pride-and-prejudice", "Pride and Prejudice", @@ -84,7 +84,27 @@ defmodule GcIndexRelay.Nostr.PublicationSearchTest do "https://www.gutenberg.org/ebooks/1342" ) - assert {:ok, []} = PublicationSearch.search("pg1342", limit: 10) - assert {:ok, []} = PublicationSearch.search("pride-and", limit: 10) + assert {:ok, results} = PublicationSearch.search("pg1342", limit: 10) + assert length(results) == 1 + + assert {:ok, results} = PublicationSearch.search("pride-and", limit: 10) + assert length(results) == 1 + + assert {:ok, results} = PublicationSearch.search("prejudice", limit: 10) + assert length(results) == 1 + + assert {:ok, results} = PublicationSearch.search("jane austen", limit: 10) + assert length(results) == 1 + end + + test "search rejects single-character needles" do + insert_publication!( + "pg1342-pride-and-prejudice", + "Pride and Prejudice", + "Jane Austen", + "https://www.gutenberg.org/ebooks/1342" + ) + + assert {:ok, []} = PublicationSearch.search("p", limit: 10) end end diff --git a/test/gc_index_relay_web/controllers/publication_search_controller_test.exs b/test/gc_index_relay_web/controllers/publication_search_controller_test.exs index 49b3a38..973115e 100644 --- a/test/gc_index_relay_web/controllers/publication_search_controller_test.exs +++ b/test/gc_index_relay_web/controllers/publication_search_controller_test.exs @@ -48,10 +48,20 @@ defmodule GcIndexRelayWeb.PublicationSearchControllerTest do assert event["kind"] == 30_040 end + test "returns partial metadata matches", %{conn: conn} do + pub_event = + insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen") + + conn = post(conn, ~p"/api/publications/search", %{"q" => "pg1342", "limit" => 10}) + + assert %{"data" => [event]} = json_response(conn, 200) + assert event["id"] == pub_event.id + end + test "returns empty list when nothing matches", %{conn: conn} do insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen") - conn = post(conn, ~p"/api/publications/search", %{"q" => "pg1342", "limit" => 10}) + conn = post(conn, ~p"/api/publications/search", %{"q" => "zzzznotfound", "limit" => 10}) assert %{"data" => []} = json_response(conn, 200) end