Add OpenSearch search backend with hybrid neural+BM25 support

Extract a SearchBackend trait from the existing RocksDB search code and
add an OpenSearch implementation supporting cross-room search, relevance
ranking, fuzzy matching, English stemming, and optional hybrid
neural+BM25 semantic search using sentence-transformers.

Fix macOS build by gating RLIMIT_NPROC and getrusage to supported
platforms.
This commit is contained in:
2026-03-08 17:41:20 +00:00
parent 9d47ffff05
commit c9cddc80d9
15 changed files with 2328 additions and 196 deletions

View File

@@ -907,6 +907,72 @@
#
#auto_deactivate_banned_room_attempts = false
# Search backend to use for full-text message search.
#
# Available options: "rocksdb" (default) or "opensearch".
#
#search_backend = "rocksdb"
# URL of the OpenSearch instance. Required when search_backend is
# "opensearch".
#
# example: "http://localhost:9200"
#
#search_opensearch_url =
# Name of the OpenSearch index for message search.
#
#search_opensearch_index = "tuwunel_messages"
# Authentication for OpenSearch in "user:pass" format.
#
#search_opensearch_auth =
# Maximum number of documents to batch before flushing to OpenSearch.
#
#search_opensearch_batch_size = 100
# Maximum time in milliseconds to wait before flushing a partial batch
# to OpenSearch.
#
#search_opensearch_flush_interval_ms = 1000
# Enable hybrid neural+BM25 search in OpenSearch. Requires an ML model
# deployed in OpenSearch and an ingest pipeline that populates an
# "embedding" field.
#
# When enabled, tuwunel will:
# - Create the index with a knn_vector "embedding" field
# - Attach the ingest pipeline (search_opensearch_pipeline) to the index
# - Use hybrid queries combining BM25 + neural kNN scoring
#
# For a complete reference on configuring OpenSearch's ML plugin, model
# registration, and ingest pipeline setup, see the test helpers in
# `src/service/rooms/search/opensearch.rs` (the `ensure_neural_model`,
# `ensure_ingest_pipeline`, etc. functions in the `tests` module).
#
# See also: https://opensearch.org/docs/latest/search-plugins/neural-search/
#
#search_opensearch_hybrid = false
# The model ID registered in OpenSearch for neural search. Required when
# search_opensearch_hybrid is enabled.
#
# example: "aKV84osBBHNT0StI3MBr"
#
#search_opensearch_model_id =
# Embedding dimension for the neural search model. Must match the output
# dimension of the deployed model. Common values: 384
# (all-MiniLM-L6-v2), 768 (msmarco-distilbert-base-tas-b).
#
#search_opensearch_embedding_dim = 384
# Name of the ingest pipeline that generates embeddings for the
# "embedding" field. This pipeline must already exist in OpenSearch.
#
#search_opensearch_pipeline = "tuwunel_embedding_pipeline"
# RocksDB log level. This is not the same as tuwunel's log level. This
# is the log level for the RocksDB engine/library which show up in your
# database folder/path as `LOG` files. tuwunel will log RocksDB errors