10 changed files with 456 additions and 10 deletions
@ -0,0 +1,150 @@
@@ -0,0 +1,150 @@
|
||||
defmodule GcIndexRelay.Nostr.PublicationSearch do |
||||
@moduledoc """ |
||||
Exact-match search over kind **30040** publication index metadata (`d`, `title`, `author`, `source`). |
||||
|
||||
Matches jumble's `publicationFieldExactMatch/2` normalization (case-insensitive, hyphen/space). |
||||
""" |
||||
|
||||
import Ecto.Query, warn: false |
||||
|
||||
alias GcIndexRelay.Nostr.Event |
||||
alias GcIndexRelay.Nostr.PubEvent |
||||
alias GcIndexRelay.Nostr.Tag |
||||
alias GcIndexRelay.Repo |
||||
|
||||
@publication_kind 30_040 |
||||
@search_tag_names ~w(d title author source) |
||||
|
||||
@doc """ |
||||
Search kind-30040 events by exact metadata match. Returns newest first. |
||||
""" |
||||
@spec search(String.t(), keyword()) :: {:ok, [PubEvent.t()]} | {:error, String.t()} |
||||
def search(query, opts \\ []) when is_binary(query) do |
||||
needles = query_needles(query) |
||||
|
||||
if needles == [] do |
||||
{:ok, []} |
||||
else |
||||
limit = opts |> Keyword.get(:limit, 25) |> clamp_limit() |
||||
do_search(needles, limit) |
||||
end |
||||
end |
||||
|
||||
defp clamp_limit(limit) when is_integer(limit), do: limit |> max(1) |> min(100) |
||||
defp clamp_limit(_), do: 25 |
||||
|
||||
defp do_search(needles, limit) do |
||||
spaced_needles = Enum.map(needles, &spaced_form/1) |> Enum.uniq() |
||||
|
||||
tag_match = |
||||
Enum.reduce(needles, dynamic(false), fn needle, acc -> |
||||
spaced = spaced_form(needle) |
||||
|
||||
dynamic( |
||||
[t], |
||||
^acc or |
||||
fragment("LOWER(TRIM(?)) = ?", t.value, ^needle) or |
||||
fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) |
||||
) |
||||
end) |
||||
|
||||
# Also match when the stored value's spaced form equals any spaced needle. |
||||
tag_match = |
||||
Enum.reduce(spaced_needles, tag_match, fn spaced, acc -> |
||||
dynamic( |
||||
[t], |
||||
^acc or fragment("LOWER(TRIM(REPLACE(?, '-', ' '))) = ?", t.value, ^spaced) |
||||
) |
||||
end) |
||||
|
||||
event_ids = |
||||
from(t in Tag, |
||||
inner_join: e in Event, |
||||
on: t.event_id == e.id, |
||||
where: e.kind == ^@publication_kind, |
||||
where: t.name in ^@search_tag_names, |
||||
where: ^tag_match, |
||||
distinct: e.id, |
||||
order_by: [desc: e.created_at], |
||||
limit: ^limit, |
||||
select: e.id |
||||
) |
||||
|> Repo.all() |
||||
|
||||
events = |
||||
from(e in Event, |
||||
where: e.id in ^event_ids, |
||||
order_by: [desc: e.created_at], |
||||
preload: [:tags] |
||||
) |
||||
|> Repo.all() |
||||
|
||||
pub_events_from_db(events) |
||||
end |
||||
|
||||
defp pub_events_from_db(events) do |
||||
Enum.reduce_while(events, {:ok, []}, fn event, {:ok, acc} -> |
||||
case PubEvent.from_db(event) do |
||||
{:ok, pub_event} -> {:cont, {:ok, [pub_event | acc]}} |
||||
{:error, _} = err -> {:halt, err} |
||||
end |
||||
end) |
||||
|> case do |
||||
{:ok, list} -> {:ok, Enum.reverse(list)} |
||||
{:error, _} = err -> err |
||||
end |
||||
end |
||||
|
||||
@doc false |
||||
def query_needles(query) do |
||||
query |
||||
|> strip_quotes() |
||||
|> String.trim() |
||||
|> case do |
||||
"" -> |
||||
[] |
||||
|
||||
raw -> |
||||
lower = String.downcase(raw) |
||||
normalized = lower |> String.replace(~r/\s+/, " ") |> String.trim() |
||||
|
||||
hyphen = |
||||
lower |
||||
|> String.replace(~r/\s+/, "-") |
||||
|> String.replace(~r/-+/, "-") |
||||
|> String.trim("-") |
||||
|
||||
[lower, normalized, hyphen] |
||||
|> Enum.reject(&(&1 == "")) |
||||
|> Enum.uniq() |
||||
end |
||||
end |
||||
|
||||
defp strip_quotes(raw) do |
||||
trimmed = String.trim(raw) |
||||
|
||||
pairs = [ |
||||
{"\"", "\""}, |
||||
{"'", "'"}, |
||||
{"“", "”"}, |
||||
{"‘", "’"} |
||||
] |
||||
|
||||
Enum.reduce(pairs, trimmed, fn {open, close}, acc -> |
||||
if String.length(acc) >= 2 and String.starts_with?(acc, open) and |
||||
String.ends_with?(acc, close) do |
||||
acc |> String.slice(1..-2//1) |> String.trim() |
||||
else |
||||
acc |
||||
end |
||||
end) |
||||
end |
||||
|
||||
defp spaced_form(value) do |
||||
value |
||||
|> String.downcase() |
||||
|> String.replace("-", " ") |
||||
|> String.replace(~r/\s+/, " ") |
||||
|> String.trim() |
||||
end |
||||
end |
||||
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
defmodule GcIndexRelayWeb.PublicationSearchController do |
||||
use GcIndexRelayWeb, :controller |
||||
use PhoenixSwagger |
||||
|
||||
alias GcIndexRelay.Nostr.PublicationSearch |
||||
|
||||
action_fallback GcIndexRelayWeb.FallbackController |
||||
|
||||
swagger_path :search do |
||||
post("/api/publications/search") |
||||
summary("Search kind-30040 publication indexes by metadata") |
||||
|
||||
description(""" |
||||
Exact-match search over publication index metadata tags: `d`, `title`, `author`, and `source`. |
||||
Matching is case-insensitive and treats hyphens and spaces as equivalent. Partial substring |
||||
matches are not returned. |
||||
""") |
||||
|
||||
tag("Publications") |
||||
operation_id("search_publications") |
||||
response(200, "OK", Schema.ref(:PubEventList)) |
||||
response(400, "Bad Request") |
||||
end |
||||
|
||||
@doc """ |
||||
POST /api/publications/search — exact metadata search for kind-30040 publication indexes. |
||||
""" |
||||
def search(conn, params) do |
||||
with {:ok, query} <- fetch_query(params), |
||||
{:ok, limit} <- parse_limit(Map.get(params, "limit", 25)), |
||||
:ok <- validate_limit(limit), |
||||
{:ok, events} <- PublicationSearch.search(query, limit: limit) do |
||||
render(conn, :index, events: events) |
||||
end |
||||
end |
||||
|
||||
defp fetch_query(%{"q" => q}) when is_binary(q) do |
||||
trimmed = String.trim(q) |
||||
if trimmed == "", do: {:error, "Query q must not be empty."}, else: {:ok, trimmed} |
||||
end |
||||
|
||||
defp fetch_query(_), do: {:error, "Missing required field: q"} |
||||
|
||||
defp parse_limit(v) when is_integer(v), do: {:ok, v} |
||||
|
||||
defp parse_limit(v) when is_binary(v) do |
||||
case Integer.parse(v) do |
||||
{int, ""} -> {:ok, int} |
||||
_ -> {:error, "Invalid limit: must be an integer between 1 and 100"} |
||||
end |
||||
end |
||||
|
||||
defp parse_limit(_), do: {:error, "Invalid limit: must be an integer between 1 and 100"} |
||||
|
||||
defp validate_limit(limit) when is_integer(limit) and limit >= 1 and limit <= 100, do: :ok |
||||
defp validate_limit(_), do: {:error, "The limit must be between 1 and 100."} |
||||
end |
||||
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
defmodule GcIndexRelayWeb.PublicationSearchJSON do |
||||
alias GcIndexRelay.Nostr.PubEvent |
||||
|
||||
def index(%{events: events}) do |
||||
%{data: Enum.map(events, &data/1)} |
||||
end |
||||
|
||||
defp data(%PubEvent{} = event) do |
||||
%{ |
||||
id: event.id, |
||||
pubkey: event.pubkey, |
||||
created_at: event.created_at, |
||||
kind: event.kind, |
||||
content: event.content, |
||||
sig: event.sig, |
||||
tags: event.tags |
||||
} |
||||
end |
||||
end |
||||
@ -0,0 +1,37 @@
@@ -0,0 +1,37 @@
|
||||
defmodule GcIndexRelay.Nostr.PublicationSearchQueryNeedlesTest do |
||||
use ExUnit.Case, async: true |
||||
|
||||
alias GcIndexRelay.Nostr.PublicationSearch |
||||
|
||||
@moduletag :unit |
||||
|
||||
test "query_needles returns empty list for blank input" do |
||||
assert PublicationSearch.query_needles("") == [] |
||||
assert PublicationSearch.query_needles(" ") == [] |
||||
end |
||||
|
||||
test "query_needles normalizes case, spaces, and hyphens" do |
||||
assert PublicationSearch.query_needles("Pride and Prejudice") == [ |
||||
"pride and prejudice", |
||||
"pride-and-prejudice" |
||||
] |
||||
end |
||||
|
||||
test "query_needles strips surrounding quotes" do |
||||
assert PublicationSearch.query_needles(~s("Jane Eyre")) == ["jane eyre", "jane-eyre"] |
||||
assert PublicationSearch.query_needles("'Jane Eyre'") == ["jane eyre", "jane-eyre"] |
||||
end |
||||
|
||||
test "query_needles collapses repeated whitespace and hyphens" do |
||||
assert PublicationSearch.query_needles("pride and prejudice") == [ |
||||
"pride and prejudice", |
||||
"pride and prejudice", |
||||
"pride-and-prejudice" |
||||
] |
||||
|
||||
assert PublicationSearch.query_needles("pg1342--pride--and--prejudice") == [ |
||||
"pg1342--pride--and--prejudice", |
||||
"pg1342-pride-and-prejudice" |
||||
] |
||||
end |
||||
end |
||||
@ -0,0 +1,76 @@
@@ -0,0 +1,76 @@
|
||||
defmodule GcIndexRelay.Nostr.PublicationSearchTest do |
||||
use GcIndexRelay.DataCase |
||||
|
||||
import GcIndexRelay.NostrFixtures |
||||
|
||||
alias GcIndexRelay.Nostr |
||||
alias GcIndexRelay.Nostr.PublicationSearch |
||||
|
||||
@moduletag :integration |
||||
|
||||
defp insert_publication!(d, title, author, source, created_at \\ nil) do |
||||
attrs = %{ |
||||
kind: 30_040, |
||||
content: "", |
||||
tags: [ |
||||
["d", d], |
||||
["title", title], |
||||
["author", author], |
||||
["source", source] |
||||
] |
||||
} |
||||
|
||||
attrs = if created_at, do: Map.put(attrs, :created_at, created_at), else: attrs |
||||
|
||||
event = valid_pub_event_fixture(attrs) |
||||
assert {:ok, _} = Nostr.create_event(event) |
||||
event |
||||
end |
||||
|
||||
test "search finds exact title match" do |
||||
insert_publication!( |
||||
"pg1342-pride-and-prejudice", |
||||
"Pride and Prejudice", |
||||
"Jane Austen", |
||||
"https://www.gutenberg.org/ebooks/1342", |
||||
1_700_000_100 |
||||
) |
||||
|
||||
insert_publication!( |
||||
"other-book", |
||||
"Other Book", |
||||
"Someone", |
||||
"https://example.com/1", |
||||
1_700_000_200 |
||||
) |
||||
|
||||
assert {:ok, results} = PublicationSearch.search("pride and prejudice", limit: 10) |
||||
assert length(results) == 1 |
||||
assert hd(results).kind == 30_040 |
||||
assert Enum.any?(hd(results).tags, fn ["d", v] -> v == "pg1342-pride-and-prejudice" end) |
||||
end |
||||
|
||||
test "search finds exact d-tag match" do |
||||
insert_publication!( |
||||
"pg1342-pride-and-prejudice", |
||||
"Pride and Prejudice", |
||||
"Jane Austen", |
||||
"https://www.gutenberg.org/ebooks/1342" |
||||
) |
||||
|
||||
assert {:ok, results} = PublicationSearch.search("pg1342-pride-and-prejudice", limit: 10) |
||||
assert length(results) == 1 |
||||
end |
||||
|
||||
test "search rejects partial substring matches" do |
||||
insert_publication!( |
||||
"pg1342-pride-and-prejudice", |
||||
"Pride and Prejudice", |
||||
"Jane Austen", |
||||
"https://www.gutenberg.org/ebooks/1342" |
||||
) |
||||
|
||||
assert {:ok, []} = PublicationSearch.search("pg1342", limit: 10) |
||||
assert {:ok, []} = PublicationSearch.search("pride-and", limit: 10) |
||||
end |
||||
end |
||||
@ -0,0 +1,78 @@
@@ -0,0 +1,78 @@
|
||||
defmodule GcIndexRelayWeb.PublicationSearchControllerTest do |
||||
use GcIndexRelayWeb.ConnCase |
||||
|
||||
import GcIndexRelay.NostrFixtures |
||||
|
||||
alias GcIndexRelay.Nostr |
||||
|
||||
@moduletag :integration |
||||
|
||||
setup %{conn: conn} do |
||||
conn = |
||||
conn |
||||
|> put_req_header("accept", "application/json") |
||||
|> put_req_header("content-type", "application/json") |
||||
|
||||
{:ok, conn: conn} |
||||
end |
||||
|
||||
defp insert_publication!(d, title, author) do |
||||
event = |
||||
valid_pub_event_fixture(%{ |
||||
kind: 30_040, |
||||
content: "", |
||||
tags: [ |
||||
["d", d], |
||||
["title", title], |
||||
["author", author] |
||||
] |
||||
}) |
||||
|
||||
assert {:ok, _} = Nostr.create_event(event) |
||||
event |
||||
end |
||||
|
||||
describe "POST /api/publications/search" do |
||||
test "returns matching kind-30040 events", %{conn: conn} do |
||||
pub_event = |
||||
insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen") |
||||
|
||||
conn = |
||||
post(conn, ~p"/api/publications/search", %{ |
||||
"q" => "pride and prejudice", |
||||
"limit" => 10 |
||||
}) |
||||
|
||||
assert %{"data" => [event]} = json_response(conn, 200) |
||||
assert event["id"] == pub_event.id |
||||
assert event["kind"] == 30_040 |
||||
end |
||||
|
||||
test "returns empty list when nothing matches", %{conn: conn} do |
||||
insert_publication!("pg1342-pride-and-prejudice", "Pride and Prejudice", "Jane Austen") |
||||
|
||||
conn = post(conn, ~p"/api/publications/search", %{"q" => "pg1342", "limit" => 10}) |
||||
|
||||
assert %{"data" => []} = json_response(conn, 200) |
||||
end |
||||
|
||||
test "returns 400 when q is missing", %{conn: conn} do |
||||
conn = post(conn, ~p"/api/publications/search", %{"limit" => 10}) |
||||
|
||||
assert %{"errors" => %{"detail" => "Missing required field: q"}} = json_response(conn, 400) |
||||
end |
||||
|
||||
test "returns 400 when q is empty", %{conn: conn} do |
||||
conn = post(conn, ~p"/api/publications/search", %{"q" => " ", "limit" => 10}) |
||||
|
||||
assert %{"errors" => %{"detail" => "Query q must not be empty."}} = json_response(conn, 400) |
||||
end |
||||
|
||||
test "returns 400 when limit is out of range", %{conn: conn} do |
||||
conn = post(conn, ~p"/api/publications/search", %{"q" => "book", "limit" => 0}) |
||||
|
||||
assert %{"errors" => %{"detail" => "The limit must be between 1 and 100."}} = |
||||
json_response(conn, 400) |
||||
end |
||||
end |
||||
end |
||||
Loading…
Reference in new issue