Compare commits

..

No commits in common. 'ec9eb2995c5cc21fefe576c130f6e76b6a9e34e3' and '0bfebaa5f483192bc416b48d1d31664cfd8f9c77' have entirely different histories.

  1. 2
      lib/gc_index_relay/nostr/event.ex
  2. 14
      lib/gc_index_relay/nostr/pub_event.ex
  3. 133
      lib/gc_index_relay/nostr/publication_search.ex
  4. 8
      lib/gc_index_relay_web/controllers/publication_search_controller.ex
  5. 19
      test/gc_index_relay/nostr/pub_event_test.exs
  6. 40
      test/gc_index_relay/nostr/publication_search_test.exs
  7. 12
      test/gc_index_relay_web/controllers/publication_search_controller_test.exs

2
lib/gc_index_relay/nostr/event.ex

@ -26,7 +26,7 @@ defmodule GcIndexRelay.Nostr.Event do
field :kind, :integer field :kind, :integer
field :content, :string field :content, :string
field :sig, :binary field :sig, :binary
has_many :tags, GcIndexRelay.Nostr.Tag, preload_order: [asc: :id] has_many :tags, GcIndexRelay.Nostr.Tag
end end
@doc false @doc false

14
lib/gc_index_relay/nostr/pub_event.ex

@ -103,21 +103,11 @@ defmodule GcIndexRelay.Nostr.PubEvent do
end end
defp from_tags(tags) when is_list(tags) do defp from_tags(tags) when is_list(tags) do
tags for t <- tags do
|> sort_tags_for_read()
|> Enum.map(fn t ->
case t.value do case t.value do
nil -> [t.name] nil -> [t.name]
value -> [t.name, value | t.additional_values] value -> [t.name, value | t.additional_values]
end end
end) end
end
# Tag rows have no explicit position column; insertion id matches Nostr signing order.
defp sort_tags_for_read(tags) do
Enum.sort_by(tags, fn
%Tag{id: id} when is_integer(id) -> {0, id}
%Tag{} -> {1, 0}
end)
end end
end end

133
lib/gc_index_relay/nostr/publication_search.ex

@ -1,10 +1,8 @@
defmodule GcIndexRelay.Nostr.PublicationSearch do defmodule GcIndexRelay.Nostr.PublicationSearch do
@moduledoc """ @moduledoc """
Metadata search over kind **30040** publication index tags (`d`, `title`, `author`, `source`). Exact-match search over kind **30040** publication index metadata (`d`, `title`, `author`, `source`).
Matches jumble's publication metadata matching: case-insensitive, hyphen/space equivalence, Matches jumble's `publicationFieldExactMatch/2` normalization (case-insensitive, hyphen/space).
substring matches for title/author/source (needle length 2), hyphen-segment matches on `d` tags,
and multi-word AND when the query has two or more significant tokens.
""" """
import Ecto.Query, warn: false import Ecto.Query, warn: false
@ -16,30 +14,27 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do
@publication_kind 30_040 @publication_kind 30_040
@search_tag_names ~w(d title author source) @search_tag_names ~w(d title author source)
@min_substring_needle_len 2
@doc """ @doc """
Search kind-30040 events by metadata match. Returns newest first. Search kind-30040 events by exact metadata match. Returns newest first.
""" """
@spec search(String.t(), keyword()) :: {:ok, [PubEvent.t()]} | {:error, String.t()} @spec search(String.t(), keyword()) :: {:ok, [PubEvent.t()]} | {:error, String.t()}
def search(query, opts \\ []) when is_binary(query) do def search(query, opts \\ []) when is_binary(query) do
trimmed = query |> strip_quotes() |> String.trim() needles = query_needles(query)
if trimmed == "" do if needles == [] do
{:ok, []} {:ok, []}
else else
limit = opts |> Keyword.get(:limit, 25) |> clamp_limit() limit = opts |> Keyword.get(:limit, 25) |> clamp_limit()
needles = query_needles(trimmed) do_search(needles, limit)
tokens = query_tokens(trimmed)
do_search(needles, tokens, limit)
end end
end end
defp clamp_limit(limit) when is_integer(limit), do: limit |> max(1) |> min(100) defp clamp_limit(limit) when is_integer(limit), do: limit |> max(1) |> min(100)
defp clamp_limit(_), do: 25 defp clamp_limit(_), do: 25
defp do_search(needles, tokens, limit) do defp do_search(needles, limit) do
tag_match = metadata_tag_match(needles, tokens) tag_match = metadata_tag_match(needles)
events = events =
from(e in Event, from(e in Event,
@ -63,10 +58,10 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do
pub_events_from_db(events) pub_events_from_db(events)
end end
defp metadata_tag_match(needles, tokens) do defp metadata_tag_match(needles) do
spaced_needles = Enum.map(needles, &spaced_form/1) |> Enum.uniq() spaced_needles = Enum.map(needles, &spaced_form/1) |> Enum.uniq()
exact = tag_match =
Enum.reduce(needles, dynamic(false), fn needle, acc -> Enum.reduce(needles, dynamic(false), fn needle, acc ->
spaced = spaced_form(needle) spaced = spaced_form(needle)
@ -78,97 +73,12 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do
) )
end) end)
exact = Enum.reduce(spaced_needles, tag_match, fn spaced, acc ->
Enum.reduce(spaced_needles, exact, fn spaced, acc -> dynamic(
dynamic( [t],
[t], ^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced)
^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) )
)
end)
substring =
Enum.reduce(substring_needles(needles), dynamic(false), fn needle, acc ->
spaced = spaced_form(needle)
pattern = like_contains(spaced)
dynamic(
[t],
^acc or
fragment(
"LOWER(TRIM(REPLACE(?, '-', ' '))) LIKE ? ESCAPE '\\'",
t.value,
^pattern
)
)
end)
d_segment =
Enum.reduce(d_segment_needles(needles), dynamic(false), fn needle, acc ->
dynamic([t], ^acc or ^d_tag_segment_match(needle))
end)
multi_word =
case tokens do
[_ | _] = word_tokens when length(word_tokens) >= 2 ->
Enum.reduce(word_tokens, dynamic(true), fn token, acc ->
pattern = like_contains(spaced_form(token))
dynamic(
[t],
^acc and
fragment(
"LOWER(TRIM(REPLACE(?, '-', ' '))) LIKE ? ESCAPE '\\'",
t.value,
^pattern
)
)
end)
_ ->
dynamic(false)
end
dynamic([t], ^exact or ^substring or ^d_segment or ^multi_word)
end
defp substring_needles(needles) do
needles
|> Enum.uniq()
|> Enum.filter(&(String.length(&1) >= @min_substring_needle_len))
end
defp d_segment_needles(needles) do
needles
|> Enum.flat_map(fn needle ->
spaced = spaced_form(needle)
hyphen = needle |> String.replace(~r/\s+/, "-") |> String.replace(~r/-+/, "-") |> String.trim("-")
[needle, spaced, hyphen]
end) end)
|> Enum.uniq()
|> Enum.filter(&(String.length(&1) >= @min_substring_needle_len))
end
defp d_tag_segment_match(needle) do
dynamic(
[t],
t.name == "d" and
(fragment("LOWER(TRIM(?)) = ?", t.value, ^needle) or
fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_prefix(needle)) or
fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_segment(needle)) or
fragment("LOWER(TRIM(?)) LIKE ? ESCAPE '\\'", t.value, ^like_suffix(needle)))
)
end
defp like_contains(value), do: "%#{like_escape(value)}%"
defp like_prefix(value), do: "#{like_escape(value)}-%"
defp like_segment(value), do: "%-#{like_escape(value)}-%"
defp like_suffix(value), do: "%-#{like_escape(value)}"
defp like_escape(value) do
value
|> String.replace("\\", "\\\\")
|> String.replace("%", "\\%")
|> String.replace("_", "\\_")
end end
defp pub_events_from_db(events) do defp pub_events_from_db(events) do
@ -209,19 +119,6 @@ defmodule GcIndexRelay.Nostr.PublicationSearch do
end end
end end
@doc false
def query_tokens(query) do
query
|> strip_quotes()
|> String.trim()
|> String.downcase()
|> String.replace(~r/\s+/, " ")
|> String.split(" ", trim: true)
|> Enum.map(&String.trim/1)
|> Enum.filter(&(String.length(&1) > 1))
|> Enum.uniq()
end
defp strip_quotes(raw) do defp strip_quotes(raw) do
trimmed = String.trim(raw) trimmed = String.trim(raw)

8
lib/gc_index_relay_web/controllers/publication_search_controller.ex

@ -11,9 +11,9 @@ defmodule GcIndexRelayWeb.PublicationSearchController do
summary("Search kind-30040 publication indexes by metadata") summary("Search kind-30040 publication indexes by metadata")
description(""" description("""
Metadata search over publication index tags: `d`, `title`, `author`, and `source`. Exact-match search over publication index metadata tags: `d`, `title`, `author`, and `source`.
Matching is case-insensitive, treats hyphens and spaces as equivalent, supports substring Matching is case-insensitive and treats hyphens and spaces as equivalent. Partial substring
matches (needle length 2), hyphen-segment matches on `d` tags, and multi-word AND queries. matches are not returned.
""") """)
tag("Publications") tag("Publications")
@ -23,7 +23,7 @@ defmodule GcIndexRelayWeb.PublicationSearchController do
end end
@doc """ @doc """
POST /api/publications/search metadata search for kind-30040 publication indexes. POST /api/publications/search exact metadata search for kind-30040 publication indexes.
""" """
def search(conn, params) do def search(conn, params) do
with {:ok, query} <- fetch_query(params), with {:ok, query} <- fetch_query(params),

19
test/gc_index_relay/nostr/pub_event_test.exs

@ -59,25 +59,6 @@ defmodule GcIndexRelay.Nostr.PubEventTest do
assert {:error, :not_found} = PubEvent.from_db(nil) assert {:error, :not_found} = PubEvent.from_db(nil)
end end
test "restores Nostr tag order from shuffled tag rows by database id" do
event = %Event{
id: Base.decode16!(String.duplicate("ab", 32), case: :lower),
pubkey: Base.decode16!(String.duplicate("cd", 32), case: :lower),
created_at: ~U[2021-12-20 17:46:40Z],
kind: 30_040,
content: "",
sig: Base.decode16!(String.duplicate("ef", 64), case: :lower),
tags: [
%Tag{id: 300, name: "a", value: "chapter-2", additional_values: []},
%Tag{id: 100, name: "d", value: "book", additional_values: []},
%Tag{id: 200, name: "title", value: "Title", additional_values: []}
]
}
assert {:ok, pub_event} = PubEvent.from_db(event)
assert pub_event.tags == [["d", "book"], ["title", "Title"], ["a", "chapter-2"]]
end
test "converts Tag structs back to nested lists" do test "converts Tag structs back to nested lists" do
event = %Event{ event = %Event{
id: Base.decode16!(String.duplicate("ab", 32), case: :lower), id: Base.decode16!(String.duplicate("ab", 32), case: :lower),

40
test/gc_index_relay/nostr/publication_search_test.exs

@ -5,7 +5,6 @@ defmodule GcIndexRelay.Nostr.PublicationSearchTest do
alias GcIndexRelay.Nostr alias GcIndexRelay.Nostr
alias GcIndexRelay.Nostr.PublicationSearch alias GcIndexRelay.Nostr.PublicationSearch
alias GcIndexRelay.Nostr.Validator
@moduletag :integration @moduletag :integration
@ -63,7 +62,7 @@ defmodule GcIndexRelay.Nostr.PublicationSearchTest do
assert length(results) == 1 assert length(results) == 1
end end
test "search returns signature-valid events" do test "search rejects partial substring matches" do
insert_publication!( insert_publication!(
"pg1342-pride-and-prejudice", "pg1342-pride-and-prejudice",
"Pride and Prejudice", "Pride and Prejudice",
@ -71,40 +70,7 @@ defmodule GcIndexRelay.Nostr.PublicationSearchTest do
"https://www.gutenberg.org/ebooks/1342" "https://www.gutenberg.org/ebooks/1342"
) )
assert {:ok, [result | _]} = PublicationSearch.search("pride and prejudice", limit: 10) assert {:ok, []} = PublicationSearch.search("pg1342", limit: 10)
assert {:ok, ^result} = Validator.validate_signature(result) assert {:ok, []} = PublicationSearch.search("pride-and", limit: 10)
assert {:ok, ^result} = Validator.validate_id(result)
end
test "search finds partial d-tag and title needles" do
insert_publication!(
"pg1342-pride-and-prejudice",
"Pride and Prejudice",
"Jane Austen",
"https://www.gutenberg.org/ebooks/1342"
)
assert {:ok, results} = PublicationSearch.search("pg1342", limit: 10)
assert length(results) == 1
assert {:ok, results} = PublicationSearch.search("pride-and", limit: 10)
assert length(results) == 1
assert {:ok, results} = PublicationSearch.search("prejudice", limit: 10)
assert length(results) == 1
assert {:ok, results} = PublicationSearch.search("jane austen", limit: 10)
assert length(results) == 1
end
test "search rejects single-character needles" do
insert_publication!(
"pg1342-pride-and-prejudice",
"Pride and Prejudice",
"Jane Austen",
"https://www.gutenberg.org/ebooks/1342"
)
assert {:ok, []} = PublicationSearch.search("p", limit: 10)
end end
end end

12
test/gc_index_relay_web/controllers/publication_search_controller_test.exs

@ -48,20 +48,10 @@ defmodule GcIndexRelayWeb.PublicationSearchControllerTest do
assert event["kind"] == 30_040 assert event["kind"] == 30_040
end end
test "returns partial metadata matches", %{conn: conn} do
pub_event =
insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen")
conn = post(conn, ~p"/api/publications/search", %{"q" => "pg1342", "limit" => 10})
assert %{"data" => [event]} = json_response(conn, 200)
assert event["id"] == pub_event.id
end
test "returns empty list when nothing matches", %{conn: conn} do test "returns empty list when nothing matches", %{conn: conn} do
insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen") insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen")
conn = post(conn, ~p"/api/publications/search", %{"q" => "zzzznotfound", "limit" => 10}) conn = post(conn, ~p"/api/publications/search", %{"q" => "pg1342", "limit" => 10})
assert %{"data" => []} = json_response(conn, 200) assert %{"data" => []} = json_response(conn, 200)
end end

Loading…
Cancel
Save