feat(grpc): dev mode, agent prefix, system prompt, error UX

- gRPC dev_mode config: disables JWT auth, uses fixed dev identity
- Agent prefix (agents.agent_prefix): dev agents use "dev-sol-orchestrator"
  to avoid colliding with production on shared Mistral accounts
- Coding sessions use instructions (system prompt + coding addendum)
  with mistral-medium-latest for personality adherence
- Conversations API: don't send both model + agent_id (422 fix)
- GrpcState carries system_prompt + orchestrator_agent_id
- Session.end() keeps session active for reuse (not "ended")
- User messages posted as m.notice, assistant as m.text (role detection)
- History loaded from Matrix room on session resume
- Docker Compose local dev stack: OpenSearch 3 + Tuwunel + SearXNG
- Dev config: localhost URLs, dev_mode, opensearch-init.sh for ML setup
This commit is contained in:
2026-03-23 17:07:50 +00:00
parent 71392cef9c
commit b8b76687a5
18 changed files with 1035 additions and 65 deletions

View File

@@ -25,6 +25,8 @@ pub struct GrpcState {
pub store: Arc<Store>,
pub mistral: Arc<mistralai_client::v1::client::Client>,
pub matrix: matrix_sdk::Client,
pub system_prompt: String,
pub orchestrator_agent_id: String,
}
/// Start the gRPC server. Call from main.rs alongside the Matrix sync loop.
@@ -38,28 +40,31 @@ pub async fn start_server(state: Arc<GrpcState>) -> anyhow::Result<()> {
let addr = addr.parse()?;
let jwks_url = state
.config
.grpc
.as_ref()
.and_then(|g| g.jwks_url.clone())
.unwrap_or_else(|| {
"http://hydra-public.ory.svc.cluster.local:4444/.well-known/jwks.json".into()
});
// Initialize JWT validator (fetches JWKS from Hydra)
let jwt_validator = Arc::new(auth::JwtValidator::new(&jwks_url).await?);
let interceptor = auth::JwtInterceptor::new(jwt_validator);
let grpc_cfg = state.config.grpc.as_ref();
let dev_mode = grpc_cfg.map(|g| g.dev_mode).unwrap_or(false);
let jwks_url = grpc_cfg.and_then(|g| g.jwks_url.clone());
let svc = service::CodeAgentService::new(state);
let svc = CodeAgentServer::with_interceptor(svc, interceptor);
info!(%addr, "Starting gRPC server");
let mut builder = Server::builder();
Server::builder()
.add_service(svc)
.serve(addr)
.await?;
if dev_mode {
info!(%addr, "Starting gRPC server (dev mode — no auth)");
builder
.add_service(CodeAgentServer::new(svc))
.serve(addr)
.await?;
} else if let Some(ref url) = jwks_url {
info!(%addr, jwks_url = %url, "Starting gRPC server with JWT auth");
let jwt_validator = Arc::new(auth::JwtValidator::new(url).await?);
let interceptor = auth::JwtInterceptor::new(jwt_validator);
builder
.add_service(CodeAgentServer::with_interceptor(svc, interceptor))
.serve(addr)
.await?;
} else {
anyhow::bail!("gRPC requires either dev_mode = true or a jwks_url for JWT auth");
};
Ok(())
}

View File

@@ -31,10 +31,25 @@ impl CodeAgent for CodeAgentService {
&self,
request: Request<Streaming<ClientMessage>>,
) -> Result<Response<Self::SessionStream>, Status> {
let dev_mode = self
.state
.config
.grpc
.as_ref()
.map(|g| g.dev_mode)
.unwrap_or(false);
let claims = request
.extensions()
.get::<Claims>()
.cloned()
.or_else(|| {
dev_mode.then(|| Claims {
sub: "dev".into(),
email: Some("dev@sunbeam.local".into()),
exp: 0,
})
})
.ok_or_else(|| Status::unauthenticated("No valid authentication token"))?;
info!(
@@ -68,7 +83,7 @@ impl CodeAgent for CodeAgentService {
}
async fn run_session(
state: &GrpcState,
state: &Arc<GrpcState>,
claims: &Claims,
in_stream: &mut Streaming<ClientMessage>,
tx: &mpsc::Sender<Result<ServerMessage, Status>>,
@@ -85,18 +100,15 @@ async fn run_session(
};
// Create or resume session
let mut session = CodeSession::start(
Arc::new(GrpcState {
config: state.config.clone(),
tools: state.tools.clone(),
store: state.store.clone(),
mistral: state.mistral.clone(),
matrix: state.matrix.clone(),
}),
claims,
&start,
)
.await?;
let mut session = CodeSession::start(state.clone(), claims, &start).await?;
// Fetch history if resuming
let resumed = session.resumed();
let history = if resumed {
session.fetch_history(50).await
} else {
Vec::new()
};
// Send SessionReady
tx.send(Ok(ServerMessage {
@@ -104,6 +116,8 @@ async fn run_session(
session_id: session.session_id.clone(),
room_id: session.room_id.clone(),
model: session.model.clone(),
resumed,
history,
})),
}))
.await?;

View File

@@ -132,6 +132,98 @@ impl CodeSession {
})
}
/// Whether this session was resumed from a prior connection.
pub fn resumed(&self) -> bool {
self.conversation_id.is_some()
}
/// Fetch recent messages from the Matrix room for history display.
pub async fn fetch_history(&self, limit: usize) -> Vec<HistoryEntry> {
use matrix_sdk::room::MessagesOptions;
use matrix_sdk::ruma::events::AnySyncTimelineEvent;
use matrix_sdk::ruma::uint;
let Some(ref room) = self.room else {
return Vec::new();
};
let mut options = MessagesOptions::backward();
options.limit = uint!(50);
let messages = match room.messages(options).await {
Ok(m) => m,
Err(e) => {
warn!("Failed to fetch room history: {e}");
return Vec::new();
}
};
let sol_user = &self.state.config.matrix.user_id;
let mut entries = Vec::new();
// Messages come newest-first (backward), collect then reverse
for event in &messages.chunk {
let Ok(deserialized) = event.raw().deserialize() else {
continue;
};
if let AnySyncTimelineEvent::MessageLike(
matrix_sdk::ruma::events::AnySyncMessageLikeEvent::RoomMessage(msg),
) = deserialized
{
let original = match msg {
matrix_sdk::ruma::events::SyncMessageLikeEvent::Original(ref o) => o,
_ => continue,
};
use matrix_sdk::ruma::events::room::message::MessageType;
let (body, role) = match &original.content.msgtype {
MessageType::Text(t) => (t.body.clone(), "assistant"),
MessageType::Notice(t) => (t.body.clone(), "user"),
_ => continue,
};
entries.push(HistoryEntry {
role: role.into(),
content: body,
});
if entries.len() >= limit {
break;
}
}
}
entries.reverse(); // oldest first
entries
}
/// Build conversation instructions: Sol's personality + coding mode context.
fn build_instructions(&self) -> String {
let base = &self.state.system_prompt;
let coding_addendum = format!(
r#"
## coding mode
you are in a `sunbeam code` terminal session with a developer. you have direct access to their local filesystem through tools: file_read, file_write, search_replace, grep, bash, list_directory.
you also have access to server-side tools: search_archive, search_web, research, run_script, and gitea tools.
### how to work
- read before you edit. understand existing code before suggesting changes.
- use search_replace for targeted patches, file_write only for new files or complete rewrites.
- run tests after changes. use bash for builds, tests, git operations.
- keep changes minimal and focused. don't refactor what wasn't asked for.
- when uncertain, ask — you have an ask_user tool for that.
### project: {}
"#,
self.project_name
);
format!("{base}{coding_addendum}")
}
/// Build the per-message context header for coding mode.
fn build_context_header(&self) -> String {
let tc = TimeContext::now();
@@ -161,16 +253,16 @@ impl CodeSession {
let context_header = self.build_context_header();
let input_text = format!("{context_header}\n{text}");
// Post to Matrix room
// Post user message to Matrix room (as m.notice to distinguish from assistant)
if let Some(ref room) = self.room {
let content = RoomMessageEventContent::text_plain(text);
let content = RoomMessageEventContent::notice_plain(text);
let _ = room.send(content).await;
}
// Send status
let _ = client_tx.send(Ok(ServerMessage {
payload: Some(server_message::Payload::Status(Status {
message: "thinking...".into(),
message: "generating…".into(),
kind: StatusKind::Thinking.into(),
})),
})).await;
@@ -190,6 +282,7 @@ impl CodeSession {
.await
.map_err(|e| anyhow::anyhow!("append_conversation failed: {}", e.message))?
} else {
let instructions = self.build_instructions();
let req = CreateConversationRequest {
inputs: ConversationInput::Text(input_text),
model: Some(self.model.clone()),
@@ -197,7 +290,7 @@ impl CodeSession {
agent_version: None,
name: Some(format!("code-{}", self.project_name)),
description: None,
instructions: None,
instructions: Some(instructions),
completion_args: None,
tools: Some(self.build_tool_definitions()),
handoff_execution: None,
@@ -387,10 +480,10 @@ impl CodeSession {
tools
}
/// End the session.
/// Disconnect from the session (keeps it active for future reconnection).
pub fn end(&self) {
self.state.store.end_code_session(&self.session_id);
info!(session_id = self.session_id.as_str(), "Code session ended");
self.state.store.touch_code_session(&self.session_id);
info!(session_id = self.session_id.as_str(), "Code session disconnected (stays active for reuse)");
}
}