feat(grpc): dev mode, agent prefix, system prompt, error UX

- gRPC dev_mode config: disables JWT auth, uses fixed dev identity - Agent prefix (agents.agent_prefix): dev agents use "dev-sol-orchestrator" to avoid colliding with production on shared Mistral accounts - Coding sessions use instructions (system prompt + coding addendum) with mistral-medium-latest for personality adherence - Conversations API: don't send both model + agent_id (422 fix) - GrpcState carries system_prompt + orchestrator_agent_id - Session.end() keeps session active for reuse (not "ended") - User messages posted as m.notice, assistant as m.text (role detection) - History loaded from Matrix room on session resume - Docker Compose local dev stack: OpenSearch 3 + Tuwunel + SearXNG - Dev config: localhost URLs, dev_mode, opensearch-init.sh for ML setup
2026-03-23 17:07:50 +00:00
parent 71392cef9c
commit b8b76687a5
18 changed files with 1035 additions and 65 deletions
--- a/dev/opensearch-init.sh
+++ b/dev/opensearch-init.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+## Initialize OpenSearch ML pipelines for local dev.
+## Mirrors production: all-mpnet-base-v2 (768-dim), same pipelines.
+##
+## Run after `docker compose -f docker-compose.dev.yaml up -d`
+
+set -euo pipefail
+
+OS="http://localhost:9200"
+
+echo "Waiting for OpenSearch..."
+until curl -sf "$OS/_cluster/health" >/dev/null 2>&1; do
+    sleep 2
+done
+echo "OpenSearch is ready."
+
+# --- Configure ML Commons (matches production persistent settings) ---
+echo "Configuring ML Commons..."
+curl -sf -X PUT "$OS/_cluster/settings" \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "persistent": {
+      "plugins.ml_commons.only_run_on_ml_node": false,
+      "plugins.ml_commons.native_memory_threshold": 90,
+      "plugins.ml_commons.model_access_control_enabled": false,
+      "plugins.ml_commons.allow_registering_model_via_url": true
+    }
+  }' > /dev/null
+echo "Done."
+
+# --- Check for existing deployed model ---
+EXISTING=$(curl -sf -X POST "$OS/_plugins/_ml/models/_search" \
+  -H 'Content-Type: application/json' \
+  -d '{"query":{"bool":{"must":[{"term":{"name":"huggingface/sentence-transformers/all-mpnet-base-v2"}}]}},"size":1}')
+
+MODEL_ID=$(echo "$EXISTING" | python3 -c "
+import sys, json
+hits = json.load(sys.stdin).get('hits',{}).get('hits',[])
+# Find the parent model (not chunks)
+for h in hits:
+    if '_' not in h['_id'].split('BA6N7')[0][-3:]:  # heuristic
+        print(h['_id']); break
+" 2>/dev/null || echo "")
+
+# Better: search for deployed/registered models only
+if [ -z "$MODEL_ID" ]; then
+    MODEL_ID=$(echo "$EXISTING" | python3 -c "
+import sys, json
+hits = json.load(sys.stdin).get('hits',{}).get('hits',[])
+if hits:
+    # Get the model_id field from any chunk — they all share it
+    mid = hits[0]['_source'].get('model_id', hits[0]['_id'])
+    print(mid)
+" 2>/dev/null || echo "")
+fi
+
+if [ -n "$MODEL_ID" ]; then
+    echo "Model already registered: $MODEL_ID"
+
+    STATE=$(curl -sf "$OS/_plugins/_ml/models/$MODEL_ID" 2>/dev/null \
+      | python3 -c "import sys,json; print(json.load(sys.stdin).get('model_state','UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
+
+    if [ "$STATE" = "DEPLOYED" ]; then
+        echo "Model already deployed."
+    else
+        echo "Model state: $STATE — deploying..."
+        curl -sf -X POST "$OS/_plugins/_ml/models/$MODEL_ID/_deploy" > /dev/null || true
+        for i in $(seq 1 30); do
+            STATE=$(curl -sf "$OS/_plugins/_ml/models/$MODEL_ID" \
+              | python3 -c "import sys,json; print(json.load(sys.stdin).get('model_state','UNKNOWN'))")
+            echo "  state: $STATE"
+            if [ "$STATE" = "DEPLOYED" ]; then break; fi
+            sleep 5
+        done
+    fi
+else
+    # Register all-mpnet-base-v2 via pretrained model API (same as production)
+    echo "Registering all-mpnet-base-v2 (pretrained, TORCH_SCRIPT, 768-dim)..."
+    TASK_ID=$(curl -sf -X POST "$OS/_plugins/_ml/models/_register" \
+      -H 'Content-Type: application/json' \
+      -d '{
+        "name": "huggingface/sentence-transformers/all-mpnet-base-v2",
+        "version": "1.0.1",
+        "model_format": "TORCH_SCRIPT"
+      }' | python3 -c "import sys,json; print(json.load(sys.stdin).get('task_id',''))")
+    echo "Registration task: $TASK_ID"
+
+    echo "Waiting for model download + registration..."
+    for i in $(seq 1 90); do
+        RESP=$(curl -sf "$OS/_plugins/_ml/tasks/$TASK_ID")
+        STATUS=$(echo "$RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('state','UNKNOWN'))")
+        echo "  [$i] $STATUS"
+        if [ "$STATUS" = "COMPLETED" ]; then
+            MODEL_ID=$(echo "$RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('model_id',''))")
+            break
+        fi
+        if [ "$STATUS" = "FAILED" ]; then
+            echo "Registration failed!"
+            echo "$RESP" | python3 -m json.tool
+            exit 1
+        fi
+        sleep 10
+    done
+    echo "Model ID: $MODEL_ID"
+
+    # Deploy
+    echo "Deploying model..."
+    curl -sf -X POST "$OS/_plugins/_ml/models/$MODEL_ID/_deploy" > /dev/null
+
+    echo "Waiting for deployment..."
+    for i in $(seq 1 30); do
+        STATE=$(curl -sf "$OS/_plugins/_ml/models/$MODEL_ID" \
+          | python3 -c "import sys,json; print(json.load(sys.stdin).get('model_state','UNKNOWN'))")
+        echo "  state: $STATE"
+        if [ "$STATE" = "DEPLOYED" ]; then break; fi
+        sleep 5
+    done
+fi
+
+if [ -z "$MODEL_ID" ]; then
+    echo "ERROR: No model ID — cannot create pipelines."
+    exit 1
+fi
+
+echo ""
+echo "Model $MODEL_ID deployed."
+
+# --- Create ingest pipeline (matches production exactly) ---
+echo "Creating ingest pipeline: tuwunel_embedding_pipeline..."
+curl -sf -X PUT "$OS/_ingest/pipeline/tuwunel_embedding_pipeline" \
+  -H 'Content-Type: application/json' \
+  -d "{
+    \"description\": \"Tuwunel message embedding pipeline\",
+    \"processors\": [{
+      \"text_embedding\": {
+        \"model_id\": \"$MODEL_ID\",
+        \"field_map\": {
+          \"body\": \"embedding\"
+        }
+      }
+    }]
+  }" > /dev/null
+echo "Done."
+
+# --- Create search pipeline (matches production exactly) ---
+echo "Creating search pipeline: tuwunel_hybrid_pipeline..."
+curl -sf -X PUT "$OS/_search/pipeline/tuwunel_hybrid_pipeline" \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "description": "Tuwunel hybrid BM25+neural search pipeline",
+    "phase_results_processors": [{
+      "normalization-processor": {
+        "normalization": { "technique": "min_max" },
+        "combination": {
+          "technique": "arithmetic_mean",
+          "parameters": { "weights": [0.3, 0.7] }
+        }
+      }
+    }]
+  }' > /dev/null
+echo "Done."
+
+echo ""
+echo "OpenSearch ML init complete."
+echo "  Model: all-mpnet-base-v2 ($MODEL_ID)"
+echo "  Ingest pipeline: tuwunel_embedding_pipeline"
+echo "  Search pipeline: tuwunel_hybrid_pipeline"