Add OpenSearch search backend with hybrid neural+BM25 support
Extract a SearchBackend trait from the existing RocksDB search code and add an OpenSearch implementation supporting cross-room search, relevance ranking, fuzzy matching, English stemming, and optional hybrid neural+BM25 semantic search using sentence-transformers. Fix macOS build by gating RLIMIT_NPROC and getrusage to supported platforms.
This commit is contained in:
@@ -907,6 +907,72 @@
|
||||
#
|
||||
#auto_deactivate_banned_room_attempts = false
|
||||
|
||||
# Search backend to use for full-text message search.
|
||||
#
|
||||
# Available options: "rocksdb" (default) or "opensearch".
|
||||
#
|
||||
#search_backend = "rocksdb"
|
||||
|
||||
# URL of the OpenSearch instance. Required when search_backend is
|
||||
# "opensearch".
|
||||
#
|
||||
# example: "http://localhost:9200"
|
||||
#
|
||||
#search_opensearch_url =
|
||||
|
||||
# Name of the OpenSearch index for message search.
|
||||
#
|
||||
#search_opensearch_index = "tuwunel_messages"
|
||||
|
||||
# Authentication for OpenSearch in "user:pass" format.
|
||||
#
|
||||
#search_opensearch_auth =
|
||||
|
||||
# Maximum number of documents to batch before flushing to OpenSearch.
|
||||
#
|
||||
#search_opensearch_batch_size = 100
|
||||
|
||||
# Maximum time in milliseconds to wait before flushing a partial batch
|
||||
# to OpenSearch.
|
||||
#
|
||||
#search_opensearch_flush_interval_ms = 1000
|
||||
|
||||
# Enable hybrid neural+BM25 search in OpenSearch. Requires an ML model
|
||||
# deployed in OpenSearch and an ingest pipeline that populates an
|
||||
# "embedding" field.
|
||||
#
|
||||
# When enabled, tuwunel will:
|
||||
# - Create the index with a knn_vector "embedding" field
|
||||
# - Attach the ingest pipeline (search_opensearch_pipeline) to the index
|
||||
# - Use hybrid queries combining BM25 + neural kNN scoring
|
||||
#
|
||||
# For a complete reference on configuring OpenSearch's ML plugin, model
|
||||
# registration, and ingest pipeline setup, see the test helpers in
|
||||
# `src/service/rooms/search/opensearch.rs` (the `ensure_neural_model`,
|
||||
# `ensure_ingest_pipeline`, etc. functions in the `tests` module).
|
||||
#
|
||||
# See also: https://opensearch.org/docs/latest/search-plugins/neural-search/
|
||||
#
|
||||
#search_opensearch_hybrid = false
|
||||
|
||||
# The model ID registered in OpenSearch for neural search. Required when
|
||||
# search_opensearch_hybrid is enabled.
|
||||
#
|
||||
# example: "aKV84osBBHNT0StI3MBr"
|
||||
#
|
||||
#search_opensearch_model_id =
|
||||
|
||||
# Embedding dimension for the neural search model. Must match the output
|
||||
# dimension of the deployed model. Common values: 384
|
||||
# (all-MiniLM-L6-v2), 768 (msmarco-distilbert-base-tas-b).
|
||||
#
|
||||
#search_opensearch_embedding_dim = 384
|
||||
|
||||
# Name of the ingest pipeline that generates embeddings for the
|
||||
# "embedding" field. This pipeline must already exist in OpenSearch.
|
||||
#
|
||||
#search_opensearch_pipeline = "tuwunel_embedding_pipeline"
|
||||
|
||||
# RocksDB log level. This is not the same as tuwunel's log level. This
|
||||
# is the log level for the RocksDB engine/library which show up in your
|
||||
# database folder/path as `LOG` files. tuwunel will log RocksDB errors
|
||||
|
||||
Reference in New Issue
Block a user